MLECO-3173: Add AD, KWS_ASR and Noise reduction use case API's Signed-off-by: Richard Burton <richard.burton@arm.com> Change-Id: I36f61ce74bf17f7b327cdae9704a22ca54144f37

commit: 4e002791bc6781b549c6951cfe44f918289d7e82 [log] [tgz]
author: Richard Burton <richard.burton@arm.com> Wed May 04 09:45:02 2022 +0100
committer: Richard Burton <richard.burton@arm.com> Wed May 04 09:45:02 2022 +0100
tree: b639243b5fa433657c207783a384bad1ed248536
parent: dd6d07b24bbf9023ebe8e8927be8aac3291d0f58 [diff] [blame]
diff --git a/source/use_case/kws_asr/include/Wav2LetterPreprocess.hpp b/source/use_case/kws_asr/include/Wav2LetterPreprocess.hpp
index 3609c49..1224c23 100644
--- a/source/use_case/kws_asr/include/Wav2LetterPreprocess.hpp
+++ b/source/use_case/kws_asr/include/Wav2LetterPreprocess.hpp

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,56 +14,51 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef KWS_ASR_WAV2LET_PREPROC_HPP
-#define KWS_ASR_WAV2LET_PREPROC_HPP
+#ifndef KWS_ASR_WAV2LETTER_PREPROCESS_HPP
+#define KWS_ASR_WAV2LETTER_PREPROCESS_HPP
 
 #include "Wav2LetterModel.hpp"
 #include "Wav2LetterMfcc.hpp"
 #include "AudioUtils.hpp"
 #include "DataStructures.hpp"
+#include "BaseProcessing.hpp"
 #include "log_macros.h"
 
 namespace arm {
 namespace app {
-namespace audio {
-namespace asr {
 
     /* Class to facilitate pre-processing calculation for Wav2Letter model
      * for ASR. */
-    using AudioWindow = SlidingWindow <const int16_t>;
+    using AudioWindow = audio::SlidingWindow<const int16_t>;
 
-    class Preprocess {
+    class AsrPreProcess : public BasePreProcess {
     public:
         /**
-         * @brief       Constructor
-         * @param[in]   numMfccFeatures   Number of MFCC features per window.
-         * @param[in]   windowLen         Number of elements in a window.
-         * @param[in]   windowStride      Stride (in number of elements) for
-         *                                moving the window.
-         * @param[in]   numMfccVectors    Number of MFCC vectors per window.
-        */
-        Preprocess(
-            uint32_t  numMfccFeatures,
-            uint32_t  windowLen,
-            uint32_t  windowStride,
-            uint32_t  numMfccVectors);
-        Preprocess() = delete;
-        ~Preprocess() = default;
+         * @brief       Constructor.
+         * @param[in]   inputTensor        Pointer to the TFLite Micro input Tensor.
+         * @param[in]   numMfccFeatures    Number of MFCC features per window.
+         * @param[in]   numFeatureFrames   Number of MFCC vectors that need to be calculated
+         *                                 for an inference.
+         * @param[in]   mfccWindowLen      Number of audio elements to calculate MFCC features per window.
+         * @param[in]   mfccWindowStride   Stride (in number of elements) for moving the MFCC window.
+         */
+        AsrPreProcess(TfLiteTensor* inputTensor,
+                      uint32_t  numMfccFeatures,
+                      uint32_t  numFeatureFrames,
+                      uint32_t  mfccWindowLen,
+                      uint32_t  mfccWindowStride);
 
         /**
          * @brief       Calculates the features required from audio data. This
          *              includes MFCC, first and second order deltas,
          *              normalisation and finally, quantisation. The tensor is
-         *              populated with feature from a given window placed along
+         *              populated with features from a given window placed along
          *              in a single row.
          * @param[in]   audioData      Pointer to the first element of audio data.
          * @param[in]   audioDataLen   Number of elements in the audio data.
-         * @param[in]   tensor         Tensor to be populated.
          * @return      true if successful, false in case of error.
          */
-        bool Invoke(const int16_t * audioData,
-                    uint32_t  audioDataLen,
-                    TfLiteTensor *  tensor);
+        bool DoPreProcess(const void* audioData, size_t audioDataLen) override;
 
     protected:
          /**
@@ -73,49 +68,32 @@
           * @param[in]  mfcc     MFCC buffers.
           * @param[out] delta1   Result of the first diff computation.
           * @param[out] delta2   Result of the second diff computation.
-          *
-          * @return true if successful, false otherwise.
+          * @return     true if successful, false otherwise.
           */
          static bool ComputeDeltas(Array2d<float>& mfcc,
                                    Array2d<float>& delta1,
                                    Array2d<float>& delta2);
 
         /**
-         * @brief       Given a 2D vector of floats, computes the mean.
-         * @param[in]   vec   Vector of vector of floats.
-         * @return      Mean value.
-         */
-        static float GetMean(Array2d<float>& vec);
-
-        /**
-         * @brief       Given a 2D vector of floats, computes the stddev.
-         * @param[in]   vec    Vector of vector of floats.
-         * @param[in]   mean   Mean value of the vector passed in.
-         * @return      stddev value.
-         */
-        static float GetStdDev(Array2d<float>& vec,
-                               const float mean);
-
-        /**
-         * @brief           Given a 2D vector of floats, normalises it using
-         *                  the mean and the stddev
+         * @brief           Given a 2D vector of floats, rescale it to have mean of 0 and
+        *                   standard deviation of 1.
          * @param[in,out]   vec   Vector of vector of floats.
          */
-        static void NormaliseVec(Array2d<float>& vec);
+        static void StandardizeVecF32(Array2d<float>& vec);
 
         /**
-         * @brief       Normalises the MFCC and delta buffers.
+         * @brief   Standardizes all the MFCC and delta buffers to have mean 0 and std. dev 1.
          */
-        void Normalise();
+        void Standarize();
 
         /**
          * @brief       Given the quantisation and data type limits, computes
          *              the quantised values of a floating point input data.
-         * @param[in]   elem            Element to be quantised.
-         * @param[in]   quantScale      Scale.
-         * @param[in]   quantOffset     Offset.
-         * @param[in]   minVal          Numerical limit - minimum.
-         * @param[in]   maxVal          Numerical limit - maximum.
+         * @param[in]   elem          Element to be quantised.
+         * @param[in]   quantScale    Scale.
+         * @param[in]   quantOffset   Offset.
+         * @param[in]   minVal        Numerical limit - minimum.
+         * @param[in]   maxVal        Numerical limit - maximum.
          * @return      Floating point quantised value.
          */
         static float GetQuantElem(
@@ -133,44 +111,43 @@
          *              this being the convolution speed up (as we can use
          *              contiguous memory). The output, however, requires the
          *              time axis to be in column major arrangement.
-         * @param[in]   outputBuf       Pointer to the output buffer.
-         * @param[in]   outputBufSz     Output buffer's size.
-         * @param[in]   quantScale      Quantisation scale.
-         * @param[in]   quantOffset     Quantisation offset.
+         * @param[in]   outputBuf     Pointer to the output buffer.
+         * @param[in]   outputBufSz   Output buffer's size.
+         * @param[in]   quantScale    Quantisation scale.
+         * @param[in]   quantOffset   Quantisation offset.
          */
         template <typename T>
         bool Quantise(
-                T *             outputBuf,
+                T*              outputBuf,
                 const uint32_t  outputBufSz,
                 const float     quantScale,
                 const int       quantOffset)
         {
-            /* Check the output size will for everything. */
+            /* Check the output size will fit everything. */
             if (outputBufSz < (this->m_mfccBuf.size(0) * 3 * sizeof(T))) {
                 printf_err("Tensor size too small for features\n");
                 return false;
             }
 
             /* Populate. */
-            T * outputBufMfcc = outputBuf;
-            T * outputBufD1 = outputBuf + this->m_numMfccFeats;
-            T * outputBufD2 = outputBufD1 + this->m_numMfccFeats;
+            T* outputBufMfcc = outputBuf;
+            T* outputBufD1 = outputBuf + this->m_numMfccFeats;
+            T* outputBufD2 = outputBufD1 + this->m_numMfccFeats;
             const uint32_t ptrIncr = this->m_numMfccFeats * 2;  /* (3 vectors - 1 vector) */
 
             const float minVal = std::numeric_limits<T>::min();
             const float maxVal = std::numeric_limits<T>::max();
 
-            /* We need to do a transpose while copying and concatenating
-             * the tensor. */
-            for (uint32_t j = 0; j < this->m_numFeatVectors; ++j) {
+            /* Need to transpose while copying and concatenating the tensor. */
+            for (uint32_t j = 0; j < this->m_numFeatureFrames; ++j) {
                 for (uint32_t i = 0; i < this->m_numMfccFeats; ++i) {
-                    *outputBufMfcc++ = static_cast<T>(this->GetQuantElem(
+                    *outputBufMfcc++ = static_cast<T>(AsrPreProcess::GetQuantElem(
                             this->m_mfccBuf(i, j), quantScale,
                             quantOffset, minVal, maxVal));
-                    *outputBufD1++ = static_cast<T>(this->GetQuantElem(
+                    *outputBufD1++ = static_cast<T>(AsrPreProcess::GetQuantElem(
                             this->m_delta1Buf(i, j), quantScale,
                             quantOffset, minVal, maxVal));
-                    *outputBufD2++ = static_cast<T>(this->GetQuantElem(
+                    *outputBufD2++ = static_cast<T>(AsrPreProcess::GetQuantElem(
                             this->m_delta2Buf(i, j), quantScale,
                             quantOffset, minVal, maxVal));
                 }
@@ -183,24 +160,23 @@
         }
 
     private:
-        Wav2LetterMFCC      m_mfcc;            /* MFCC instance. */
+        audio::Wav2LetterMFCC   m_mfcc;          /* MFCC instance. */
+        TfLiteTensor*           m_inputTensor;   /* Model input tensor. */
 
         /* Actual buffers to be populated. */
-        Array2d<float>      m_mfccBuf;         /* Contiguous buffer 1D: MFCC */
-        Array2d<float>      m_delta1Buf;       /* Contiguous buffer 1D: Delta 1 */
-        Array2d<float>      m_delta2Buf;       /* Contiguous buffer 1D: Delta 2 */
+        Array2d<float>   m_mfccBuf;              /* Contiguous buffer 1D: MFCC */
+        Array2d<float>   m_delta1Buf;            /* Contiguous buffer 1D: Delta 1 */
+        Array2d<float>   m_delta2Buf;            /* Contiguous buffer 1D: Delta 2 */
 
-        uint32_t            m_windowLen;       /* Window length for MFCC. */
-        uint32_t            m_windowStride;    /* Window stride len for MFCC. */
-        uint32_t            m_numMfccFeats;    /* Number of MFCC features per window. */
-        uint32_t            m_numFeatVectors;  /* Number of m_numMfccFeats. */
-        AudioWindow         m_window;          /* Sliding window. */
+        uint32_t         m_mfccWindowLen;        /* Window length for MFCC. */
+        uint32_t         m_mfccWindowStride;     /* Window stride len for MFCC. */
+        uint32_t         m_numMfccFeats;         /* Number of MFCC features per window. */
+        uint32_t         m_numFeatureFrames;     /* How many sets of m_numMfccFeats. */
+        AudioWindow      m_mfccSlidingWindow;    /* Sliding window to calculate MFCCs. */
 
     };
 
-} /* namespace asr */
-} /* namespace audio */
 } /* namespace app */
 } /* namespace arm */
 
-#endif /* KWS_ASR_WAV2LET_PREPROC_HPP */
\ No newline at end of file
+#endif /* KWS_ASR_WAV2LETTER_PREPROCESS_HPP */
\ No newline at end of file
commit	4e002791bc6781b549c6951cfe44f918289d7e82	[log] [tgz]
author	Richard Burton <richard.burton@arm.com>	Wed May 04 09:45:02 2022 +0100
committer	Richard Burton <richard.burton@arm.com>	Wed May 04 09:45:02 2022 +0100
tree	b639243b5fa433657c207783a384bad1ed248536
parent	dd6d07b24bbf9023ebe8e8927be8aac3291d0f58 [diff] [blame]