MLECO-3183: Refactoring application sources Platform agnostic application sources are moved into application api module with their own independent CMake projects. Changes for MLECO-3080 also included - they create CMake projects individial API's (again, platform agnostic) that dependent on the common logic. The API for KWS_API "joint" API has been removed and now the use case relies on individual KWS, and ASR API libraries. Change-Id: I1f7748dc767abb3904634a04e0991b74ac7b756d Signed-off-by: Kshitij Sisodia <kshitij.sisodia@arm.com>

commit: aa4bcb14d0cbee910331545dd2fc086b58c37170 [log] [tgz]
author: Kshitij Sisodia <kshitij.sisodia@arm.com> Fri May 06 09:13:03 2022 +0100
committer: Kshitij Sisodia <kshitij.sisodia@arm.com> Fri May 06 17:11:41 2022 +0100
tree: e67a43a43f61c6f8b6aad19018b0827baf7e31a6
parent: fcca863bafd5f33522bc14c23dde4540e264ec94 [diff]
diff --git a/source/application/api/use_case/noise_reduction/include/RNNoiseFeatureProcessor.hpp b/source/application/api/use_case/noise_reduction/include/RNNoiseFeatureProcessor.hpp
new file mode 100644
index 0000000..cbf0e4e
--- /dev/null
+++ b/source/application/api/use_case/noise_reduction/include/RNNoiseFeatureProcessor.hpp

@@ -0,0 +1,341 @@
+/*
+ * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef RNNOISE_FEATURE_PROCESSOR_HPP
+#define RNNOISE_FEATURE_PROCESSOR_HPP
+
+#include "PlatformMath.hpp"
+#include <cstdint>
+#include <vector>
+#include <array>
+#include <tuple>
+
+namespace arm {
+namespace app {
+namespace rnn {
+
+    using vec1D32F = std::vector<float>;
+    using vec2D32F = std::vector<vec1D32F>;
+    using arrHp = std::array<float, 2>;
+    using math::FftInstance;
+    using math::FftType;
+
+    class FrameFeatures {
+    public:
+        bool m_silence{false};        /* If frame contains silence or not. */
+        vec1D32F m_featuresVec{};     /* Calculated feature vector to feed to model. */
+        vec1D32F m_fftX{};            /* Vector of floats arranged to represent complex numbers. */
+        vec1D32F m_fftP{};            /* Vector of floats arranged to represent complex numbers. */
+        vec1D32F m_Ex{};              /* Spectral band energy for audio x. */
+        vec1D32F m_Ep{};              /* Spectral band energy for pitch p. */
+        vec1D32F m_Exp{};             /* Correlated spectral energy between x and p. */
+    };
+
+    /**
+     * @brief   RNNoise pre and post processing class based on the 2018 paper from
+     *          Jan-Marc Valin. Recommended reading:
+     *          - https://jmvalin.ca/demo/rnnoise/
+     *          - https://arxiv.org/abs/1709.08243
+     **/
+    class RNNoiseFeatureProcessor {
+    /* Public interface */
+    public:
+        RNNoiseFeatureProcessor();
+        ~RNNoiseFeatureProcessor() = default;
+
+        /**
+         * @brief        Calculates the features from a given audio buffer ready to be sent to RNNoise model.
+         * @param[in]    audioData   Pointer to the floating point vector
+         *                           with audio data (within the numerical
+         *                           limits of int16_t type).
+         * @param[in]    audioLen    Number of elements in the audio window.
+         * @param[out]   features    FrameFeatures object reference.
+         **/
+        void PreprocessFrame(const float*   audioData,
+                             size_t   audioLen,
+                             FrameFeatures& features);
+
+        /**
+         * @brief        Use the RNNoise model output gain values with pre-processing features
+         *               to generate audio with noise suppressed.
+         * @param[in]    modelOutput   Output gain values from model.
+         * @param[in]    features      Calculated features from pre-processing step.
+         * @param[out]   outFrame      Output frame to be populated.
+         **/
+        void PostProcessFrame(vec1D32F& modelOutput, FrameFeatures& features,  vec1D32F& outFrame);
+
+
+    /* Public constants */
+    public:
+        static constexpr uint32_t FRAME_SIZE_SHIFT{2};
+        static constexpr uint32_t FRAME_SIZE{512};
+        static constexpr uint32_t WINDOW_SIZE{2 * FRAME_SIZE};
+        static constexpr uint32_t FREQ_SIZE{FRAME_SIZE + 1};
+
+        static constexpr uint32_t PITCH_MIN_PERIOD{64};
+        static constexpr uint32_t PITCH_MAX_PERIOD{820};
+        static constexpr uint32_t PITCH_FRAME_SIZE{1024};
+        static constexpr uint32_t PITCH_BUF_SIZE{PITCH_MAX_PERIOD + PITCH_FRAME_SIZE};
+
+        static constexpr uint32_t NB_BANDS{22};
+        static constexpr uint32_t CEPS_MEM{8};
+        static constexpr uint32_t NB_DELTA_CEPS{6};
+
+        static constexpr uint32_t NB_FEATURES{NB_BANDS + 3*NB_DELTA_CEPS + 2};
+
+    /* Private functions */
+    private:
+
+        /**
+         * @brief   Initialises the half window and DCT tables.
+         */
+        void InitTables();
+
+        /**
+         * @brief           Applies a bi-quadratic filter over the audio window.
+         * @param[in]       bHp           Constant coefficient set b (arrHp type).
+         * @param[in]       aHp           Constant coefficient set a (arrHp type).
+         * @param[in,out]   memHpX        Coefficients populated by this function.
+         * @param[in,out]   audioWindow   Floating point vector with audio data.
+         **/
+        void BiQuad(
+            const arrHp& bHp,
+            const arrHp& aHp,
+            arrHp& memHpX,
+            vec1D32F& audioWindow);
+
+        /**
+         * @brief        Computes features from the "filtered" audio window.
+         * @param[in]    audioWindow   Floating point vector with audio data.
+         * @param[out]   features      FrameFeatures object reference.
+         **/
+        void ComputeFrameFeatures(vec1D32F& audioWindow, FrameFeatures& features);
+
+        /**
+         * @brief        Runs analysis on the audio buffer.
+         * @param[in]    audioWindow   Floating point vector with audio data.
+         * @param[out]   fft           Floating point FFT vector containing real and
+         *                             imaginary pairs of elements. NOTE: this vector
+         *                             does not contain the mirror image (conjugates)
+         *                             part of the spectrum.
+         * @param[out]   energy        Computed energy for each band in the Bark scale.
+         * @param[out]   analysisMem   Buffer sequentially, but partially,
+         *                             populated with new audio data.
+         **/
+        void FrameAnalysis(
+            const vec1D32F& audioWindow,
+            vec1D32F& fft,
+            vec1D32F& energy,
+            vec1D32F& analysisMem);
+
+        /**
+         * @brief               Applies the window function, in-place, over the given
+         *                      floating point buffer.
+         * @param[in,out]   x   Buffer the window will be applied to.
+         **/
+        void ApplyWindow(vec1D32F& x);
+
+        /**
+         * @brief        Computes the FFT for a given vector.
+         * @param[in]    x     Vector to compute the FFT from.
+         * @param[out]   fft   Floating point FFT vector containing real and
+         *                     imaginary pairs of elements. NOTE: this vector
+         *                     does not contain the mirror image (conjugates)
+         *                     part of the spectrum.
+         **/
+        void ForwardTransform(
+            vec1D32F& x,
+            vec1D32F& fft);
+
+        /**
+         * @brief        Computes band energy for each of the 22 Bark scale bands.
+         * @param[in]    fft_X   FFT spectrum (as computed by ForwardTransform).
+         * @param[out]   bandE   Vector with 22 elements populated with energy for
+         *                       each band.
+         **/
+        void ComputeBandEnergy(const vec1D32F& fft_X, vec1D32F& bandE);
+
+        /**
+         * @brief        Computes band energy correlation.
+         * @param[in]    X       FFT vector X.
+         * @param[in]    P       FFT vector P.
+         * @param[out]   bandC   Vector with 22 elements populated with band energy
+         *                       correlation for the two input FFT vectors.
+         **/
+        void ComputeBandCorr(const vec1D32F& X, const vec1D32F& P, vec1D32F& bandC);
+
+        /**
+         * @brief        Performs pitch auto-correlation for a given vector for
+         *               given lag.
+         * @param[in]    x     Input vector.
+         * @param[out]   ac    Auto-correlation output vector.
+         * @param[in]    lag   Lag value.
+         * @param[in]    n     Number of elements to consider for correlation
+         *                     computation.
+         **/
+        void AutoCorr(const vec1D32F &x,
+                     vec1D32F &ac,
+                     size_t lag,
+                     size_t n);
+
+        /**
+         * @brief       Computes pitch cross-correlation.
+         * @param[in]   x          Input vector 1.
+         * @param[in]   y          Input vector 2.
+         * @param[out]  xCorr         Cross-correlation output vector.
+         * @param[in]   len        Number of elements to consider for correlation.
+         *                         computation.
+         * @param[in]   maxPitch   Maximum pitch.
+         **/
+        void PitchXCorr(
+            const vec1D32F& x,
+            const vec1D32F& y,
+            vec1D32F& xCorr,
+            size_t len,
+            size_t maxPitch);
+
+        /**
+         * @brief        Computes "Linear Predictor Coefficients".
+         * @param[in]    ac    Correlation vector.
+         * @param[in]    p     Number of elements of input vector to consider.
+         * @param[out]   lpc   Output coefficients vector.
+         **/
+        void LPC(const vec1D32F& ac, int32_t p, vec1D32F& lpc);
+
+        /**
+         * @brief        Custom FIR implementation.
+         * @param[in]    num   FIR coefficient vector.
+         * @param[in]    N     Number of elements.
+         * @param[out]   x     Vector to be be processed.
+         **/
+        void Fir5(const vec1D32F& num, uint32_t N, vec1D32F& x);
+
+        /**
+         * @brief           Down-sample the pitch buffer.
+         * @param[in,out]   pitchBuf     Pitch buffer.
+         * @param[in]       pitchBufSz   Buffer size.
+         **/
+        void PitchDownsample(vec1D32F& pitchBuf, size_t pitchBufSz);
+
+        /**
+         * @brief       Pitch search function.
+         * @param[in]   xLP        Shifted pitch buffer input.
+         * @param[in]   y          Pitch buffer input.
+         * @param[in]   len        Length to search for.
+         * @param[in]   maxPitch   Maximum pitch.
+         * @return      pitch index.
+         **/
+        int PitchSearch(vec1D32F& xLp, vec1D32F& y, uint32_t len, uint32_t maxPitch);
+
+        /**
+         * @brief       Finds the "best" pitch from the buffer.
+         * @param[in]   xCorr      Pitch correlation vector.
+         * @param[in]   y          Pitch buffer input.
+         * @param[in]   len        Length to search for.
+         * @param[in]   maxPitch   Maximum pitch.
+         * @return      pitch array (2 elements).
+         **/
+        arrHp FindBestPitch(vec1D32F& xCorr, vec1D32F& y, uint32_t len, uint32_t maxPitch);
+
+        /**
+         * @brief           Remove pitch period doubling errors.
+         * @param[in,out]   pitchBuf     Pitch buffer vector.
+         * @param[in]       maxPeriod    Maximum period.
+         * @param[in]       minPeriod    Minimum period.
+         * @param[in]       frameSize    Frame size.
+         * @param[in]       pitchIdx0_   Pitch index 0.
+         * @return          pitch index.
+         **/
+        int RemoveDoubling(
+                vec1D32F& pitchBuf,
+                uint32_t maxPeriod,
+                uint32_t minPeriod,
+                uint32_t frameSize,
+                size_t pitchIdx0_);
+
+        /**
+         * @brief       Computes pitch gain.
+         * @param[in]   xy   Single xy cross correlation value.
+         * @param[in]   xx   Single xx auto correlation value.
+         * @param[in]   yy   Single yy auto correlation value.
+         * @return      Calculated pitch gain.
+         **/
+        float ComputePitchGain(float xy, float xx, float yy);
+
+        /**
+         * @brief        Computes DCT vector from the given input.
+         * @param[in]    input    Input vector.
+         * @param[out]   output   Output vector with DCT coefficients.
+         **/
+        void DCT(vec1D32F& input, vec1D32F& output);
+
+        /**
+         * @brief        Perform inverse fourier transform on complex spectral vector.
+         * @param[out]   out      Output vector.
+         * @param[in]    fftXIn   Vector of floats arranged to represent complex numbers interleaved.
+         **/
+        void InverseTransform(vec1D32F& out, vec1D32F& fftXIn);
+
+        /**
+         * @brief       Perform pitch filtering.
+         * @param[in]   features   Object with pre-processing calculated frame features.
+         * @param[in]   g          Gain values.
+         **/
+        void PitchFilter(FrameFeatures& features, vec1D32F& g);
+
+        /**
+         * @brief        Interpolate the band gain values.
+         * @param[out]   g       Gain values.
+         * @param[in]    bandE   Vector with 22 elements populated with energy for
+         *                       each band.
+         **/
+        void InterpBandGain(vec1D32F& g, vec1D32F& bandE);
+
+        /**
+         * @brief        Create de-noised frame.
+         * @param[out]   outFrame   Output vector for storing the created audio frame.
+         * @param[in]    fftY       Gain adjusted complex spectral vector.
+         */
+        void FrameSynthesis(vec1D32F& outFrame, vec1D32F& fftY);
+
+    /* Private objects */
+    private:
+        FftInstance m_fftInstReal;  /* FFT instance for real numbers */
+        FftInstance m_fftInstCmplx; /* FFT instance for complex numbers */
+        vec1D32F m_halfWindow;      /* Window coefficients */
+        vec1D32F m_dctTable;        /* DCT table */
+        vec1D32F m_analysisMem;     /* Buffer used for frame analysis */
+        vec2D32F m_cepstralMem;     /* Cepstral coefficients */
+        size_t m_memId;             /* memory ID */
+        vec1D32F m_synthesisMem;    /* Synthesis mem (used by post-processing) */
+        vec1D32F m_pitchBuf;        /* Pitch buffer */
+        float m_lastGain;           /* Last gain calculated */
+        int m_lastPeriod;           /* Last period calculated */
+        arrHp m_memHpX;             /* HpX coefficients. */
+        vec1D32F m_lastGVec;        /* Last gain vector (used by post-processing) */
+
+        /* Constants */
+        const std::array <uint32_t, NB_BANDS> m_eband5ms {
+            0,  1,  2,  3,  4,  5,  6,  7,  8, 10,  12,
+            14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100};
+    };
+
+
+} /* namespace rnn */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* RNNOISE_FEATURE_PROCESSOR_HPP */

diff --git a/source/application/api/use_case/noise_reduction/include/RNNoiseModel.hpp b/source/application/api/use_case/noise_reduction/include/RNNoiseModel.hpp
new file mode 100644
index 0000000..3d2f23c
--- /dev/null
+++ b/source/application/api/use_case/noise_reduction/include/RNNoiseModel.hpp

@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef RNNOISE_MODEL_HPP
+#define RNNOISE_MODEL_HPP
+
+#include "Model.hpp"
+
+extern const uint32_t g_NumInputFeatures;
+extern const uint32_t g_FrameLength;
+extern const uint32_t g_FrameStride;
+
+namespace arm {
+namespace app {
+
+    class RNNoiseModel : public Model {
+    public:
+        /**
+         * @brief Runs inference for RNNoise model.
+         *
+         * Call CopyGruStates so GRU state outputs are copied to GRU state inputs before the inference run.
+         * Run ResetGruState() method to set states to zero before starting processing logically related data.
+         * @return True if inference succeeded, False - otherwise
+         */
+        bool RunInference() override;
+
+        /**
+         * @brief Sets GRU input states to zeros.
+         * Call this method before starting processing the new sequence of logically related data.
+         */
+        void ResetGruState();
+
+        /**
+        * @brief Copy current GRU output states to input states.
+        * Call this method before starting processing the next sequence of logically related data.
+         */
+        bool CopyGruStates();
+
+        /* Which index of model outputs does the main output (gains) come from. */
+        const size_t m_indexForModelOutput = 1;
+
+    protected:
+        /** @brief   Gets the reference to op resolver interface class. */
+        const tflite::MicroOpResolver& GetOpResolver() override;
+
+        /** @brief   Adds operations to the op resolver instance. */
+        bool EnlistOperations() override;
+
+        /*
+        Each inference after the first needs to copy 3 GRU states from a output index to input index (model dependent):
+        0 -> 3, 2 -> 2, 3 -> 1
+        */
+        const std::vector<std::pair<size_t, size_t>> m_gruStateMap = {{0,3}, {2, 2}, {3, 1}};
+    private:
+        /* Maximum number of individual operations that can be enlisted. */
+        static constexpr int ms_maxOpCnt = 15;
+
+        /* A mutable op resolver instance. */
+        tflite::MicroMutableOpResolver<ms_maxOpCnt> m_opResolver;
+    };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* RNNOISE_MODEL_HPP */

diff --git a/source/application/api/use_case/noise_reduction/include/RNNoiseProcessing.hpp b/source/application/api/use_case/noise_reduction/include/RNNoiseProcessing.hpp
new file mode 100644
index 0000000..15e62d9
--- /dev/null
+++ b/source/application/api/use_case/noise_reduction/include/RNNoiseProcessing.hpp

@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef RNNOISE_PROCESSING_HPP
+#define RNNOISE_PROCESSING_HPP
+
+#include "BaseProcessing.hpp"
+#include "Model.hpp"
+#include "RNNoiseFeatureProcessor.hpp"
+
+namespace arm {
+namespace app {
+
+    /**
+     * @brief   Pre-processing class for Noise Reduction use case.
+     *          Implements methods declared by BasePreProcess and anything else needed
+     *          to populate input tensors ready for inference.
+     */
+    class RNNoisePreProcess : public BasePreProcess {
+
+    public:
+        /**
+         * @brief           Constructor
+         * @param[in]       inputTensor        Pointer to the TFLite Micro input Tensor.
+         * @param[in/out]   featureProcessor   RNNoise specific feature extractor object.
+         * @param[in/out]   frameFeatures      RNNoise specific features shared between pre & post-processing.
+         *
+         **/
+        explicit RNNoisePreProcess(TfLiteTensor* inputTensor,
+                                   std::shared_ptr<rnn::RNNoiseFeatureProcessor> featureProcessor,
+                                   std::shared_ptr<rnn::FrameFeatures> frameFeatures);
+
+        /**
+         * @brief       Should perform pre-processing of 'raw' input audio data and load it into
+         *              TFLite Micro input tensors ready for inference
+         * @param[in]   input      Pointer to the data that pre-processing will work on.
+         * @param[in]   inputSize  Size of the input data.
+         * @return      true if successful, false otherwise.
+         **/
+        bool DoPreProcess(const void* input, size_t inputSize) override;
+
+    private:
+        TfLiteTensor* m_inputTensor;                        /* Model input tensor. */
+        std::shared_ptr<rnn::RNNoiseFeatureProcessor> m_featureProcessor;   /* RNNoise feature processor shared between pre & post-processing. */
+        std::shared_ptr<rnn::FrameFeatures> m_frameFeatures;                /* RNNoise features shared between pre & post-processing. */
+        rnn::vec1D32F m_audioFrame;                         /* Audio frame cast to FP32 */
+
+        /**
+         * @brief            Quantize the given features and populate the input Tensor.
+         * @param[in]        inputFeatures   Vector of floating point features to quantize.
+         * @param[in]        quantScale      Quantization scale for the inputTensor.
+         * @param[in]        quantOffset     Quantization offset for the inputTensor.
+         * @param[in,out]    inputTensor     TFLite micro tensor to populate.
+         **/
+        static void QuantizeAndPopulateInput(rnn::vec1D32F& inputFeatures,
+                float quantScale, int quantOffset,
+                TfLiteTensor* inputTensor);
+    };
+
+    /**
+     * @brief   Post-processing class for Noise Reduction use case.
+     *          Implements methods declared by BasePostProcess and anything else needed
+     *          to populate result vector.
+     */
+    class RNNoisePostProcess : public BasePostProcess {
+
+    public:
+        /**
+         * @brief           Constructor
+         * @param[in]       outputTensor         Pointer to the TFLite Micro output Tensor.
+         * @param[out]      denoisedAudioFrame   Vector to store the final denoised audio frame.
+         * @param[in/out]   featureProcessor     RNNoise specific feature extractor object.
+         * @param[in/out]   frameFeatures        RNNoise specific features shared between pre & post-processing.
+         **/
+        RNNoisePostProcess(TfLiteTensor* outputTensor,
+                           std::vector<int16_t>& denoisedAudioFrame,
+                           std::shared_ptr<rnn::RNNoiseFeatureProcessor> featureProcessor,
+                           std::shared_ptr<rnn::FrameFeatures> frameFeatures);
+
+        /**
+         * @brief       Should perform post-processing of the result of inference then
+         *              populate result data for any later use.
+         * @return      true if successful, false otherwise.
+         **/
+        bool DoPostProcess() override;
+
+    private:
+        TfLiteTensor* m_outputTensor;                       /* Model output tensor. */
+        std::vector<int16_t>& m_denoisedAudioFrame;         /* Vector to store the final denoised frame. */
+        rnn::vec1D32F m_denoisedAudioFrameFloat;            /* Internal vector to store the final denoised frame (FP32). */
+        std::shared_ptr<rnn::RNNoiseFeatureProcessor> m_featureProcessor;   /* RNNoise feature processor shared between pre & post-processing. */
+        std::shared_ptr<rnn::FrameFeatures> m_frameFeatures;                /* RNNoise features shared between pre & post-processing. */
+        std::vector<float> m_modelOutputFloat;              /* Internal vector to store de-quantized model output. */
+
+    };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* RNNOISE_PROCESSING_HPP */
\ No newline at end of file
commit	aa4bcb14d0cbee910331545dd2fc086b58c37170	[log] [tgz]
author	Kshitij Sisodia <kshitij.sisodia@arm.com>	Fri May 06 09:13:03 2022 +0100
committer	Kshitij Sisodia <kshitij.sisodia@arm.com>	Fri May 06 17:11:41 2022 +0100
tree	e67a43a43f61c6f8b6aad19018b0827baf7e31a6
parent	fcca863bafd5f33522bc14c23dde4540e264ec94 [diff]