MLECO-3173: Add AD, KWS_ASR and Noise reduction use case API's Signed-off-by: Richard Burton <richard.burton@arm.com> Change-Id: I36f61ce74bf17f7b327cdae9704a22ca54144f37

commit: 4e002791bc6781b549c6951cfe44f918289d7e82 [log] [tgz]
author: Richard Burton <richard.burton@arm.com> Wed May 04 09:45:02 2022 +0100
committer: Richard Burton <richard.burton@arm.com> Wed May 04 09:45:02 2022 +0100
tree: b639243b5fa433657c207783a384bad1ed248536
parent: dd6d07b24bbf9023ebe8e8927be8aac3291d0f58 [diff] [blame]
diff --git a/source/use_case/ad/src/AdProcessing.cc b/source/use_case/ad/src/AdProcessing.cc
new file mode 100644
index 0000000..a33131c
--- /dev/null
+++ b/source/use_case/ad/src/AdProcessing.cc

@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "AdProcessing.hpp"
+
+#include "AdModel.hpp"
+
+namespace arm {
+namespace app {
+
+AdPreProcess::AdPreProcess(TfLiteTensor* inputTensor,
+                           uint32_t melSpectrogramFrameLen,
+                           uint32_t melSpectrogramFrameStride,
+                           float adModelTrainingMean):
+       m_validInstance{false},
+       m_melSpectrogramFrameLen{melSpectrogramFrameLen},
+       m_melSpectrogramFrameStride{melSpectrogramFrameStride},
+        /**< Model is trained on features downsampled 2x */
+       m_inputResizeScale{2},
+        /**< We are choosing to move by 20 frames across the audio for each inference. */
+       m_numMelSpecVectorsInAudioStride{20},
+       m_audioDataStride{m_numMelSpecVectorsInAudioStride * melSpectrogramFrameStride},
+       m_melSpec{melSpectrogramFrameLen}
+{
+    if (!inputTensor) {
+        printf_err("Invalid input tensor provided to pre-process\n");
+        return;
+    }
+
+    TfLiteIntArray* inputShape = inputTensor->dims;
+
+    if (!inputShape) {
+        printf_err("Invalid input tensor dims\n");
+        return;
+    }
+
+    const uint32_t kNumRows = inputShape->data[AdModel::ms_inputRowsIdx];
+    const uint32_t kNumCols = inputShape->data[AdModel::ms_inputColsIdx];
+
+    /* Deduce the data length required for 1 inference from the network parameters. */
+    this->m_audioDataWindowSize = (((this->m_inputResizeScale * kNumCols) - 1) *
+                                    melSpectrogramFrameStride) +
+                                    melSpectrogramFrameLen;
+    this->m_numReusedFeatureVectors = kNumRows -
+                                      (this->m_numMelSpecVectorsInAudioStride /
+                                       this->m_inputResizeScale);
+    this->m_melSpec.Init();
+
+    /* Creating a Mel Spectrogram sliding window for the data required for 1 inference.
+     * "resizing" done here by multiplying stride by resize scale. */
+    this->m_melWindowSlider = audio::SlidingWindow<const int16_t>(
+            nullptr, /* to be populated later. */
+            this->m_audioDataWindowSize,
+            melSpectrogramFrameLen,
+            melSpectrogramFrameStride * this->m_inputResizeScale);
+
+    /* Construct feature calculation function. */
+    this->m_featureCalc = GetFeatureCalculator(this->m_melSpec, inputTensor,
+                                               this->m_numReusedFeatureVectors,
+                                               adModelTrainingMean);
+    this->m_validInstance = true;
+}
+
+bool AdPreProcess::DoPreProcess(const void* input, size_t inputSize)
+{
+    /* Check that we have a valid instance. */
+    if (!this->m_validInstance) {
+        printf_err("Invalid pre-processor instance\n");
+        return false;
+    }
+
+    /* We expect that we can traverse the size with which the MEL spectrogram
+     * sliding window was initialised with. */
+    if (!input || inputSize < this->m_audioDataWindowSize) {
+        printf_err("Invalid input provided for pre-processing\n");
+        return false;
+    }
+
+    /* We moved to the next window - set the features sliding to the new address. */
+    this->m_melWindowSlider.Reset(static_cast<const int16_t*>(input));
+
+    /* The first window does not have cache ready. */
+    const bool useCache = this->m_audioWindowIndex > 0 && this->m_numReusedFeatureVectors > 0;
+
+    /* Start calculating features inside one audio sliding window. */
+    while (this->m_melWindowSlider.HasNext()) {
+        const int16_t* melSpecWindow = this->m_melWindowSlider.Next();
+        std::vector<int16_t> melSpecAudioData = std::vector<int16_t>(
+                melSpecWindow,
+                melSpecWindow + this->m_melSpectrogramFrameLen);
+
+        /* Compute features for this window and write them to input tensor. */
+        this->m_featureCalc(melSpecAudioData,
+                            this->m_melWindowSlider.Index(),
+                            useCache,
+                            this->m_numMelSpecVectorsInAudioStride,
+                            this->m_inputResizeScale);
+    }
+
+    return true;
+}
+
+uint32_t AdPreProcess::GetAudioWindowSize()
+{
+    return this->m_audioDataWindowSize;
+}
+
+uint32_t AdPreProcess::GetAudioDataStride()
+{
+    return this->m_audioDataStride;
+}
+
+void AdPreProcess::SetAudioWindowIndex(uint32_t idx)
+{
+    this->m_audioWindowIndex = idx;
+}
+
+AdPostProcess::AdPostProcess(TfLiteTensor* outputTensor) :
+    m_outputTensor {outputTensor}
+{}
+
+bool AdPostProcess::DoPostProcess()
+{
+    switch (this->m_outputTensor->type) {
+        case kTfLiteInt8:
+            this->Dequantize<int8_t>();
+            break;
+        default:
+            printf_err("Unsupported tensor type");
+            return false;
+    }
+
+    math::MathUtils::SoftmaxF32(this->m_dequantizedOutputVec);
+    return true;
+}
+
+float AdPostProcess::GetOutputValue(uint32_t index)
+{
+    if (index < this->m_dequantizedOutputVec.size()) {
+        return this->m_dequantizedOutputVec[index];
+    }
+    printf_err("Invalid index for output\n");
+    return 0.0;
+}
+
+std::function<void (std::vector<int16_t>&, int, bool, size_t, size_t)>
+GetFeatureCalculator(audio::AdMelSpectrogram& melSpec,
+                     TfLiteTensor* inputTensor,
+                     size_t cacheSize,
+                     float trainingMean)
+{
+    std::function<void (std::vector<int16_t>&, size_t, bool, size_t, size_t)> melSpecFeatureCalc;
+
+    TfLiteQuantization quant = inputTensor->quantization;
+
+    if (kTfLiteAffineQuantization == quant.type) {
+
+        auto* quantParams = static_cast<TfLiteAffineQuantization*>(quant.params);
+        const float quantScale = quantParams->scale->data[0];
+        const int quantOffset = quantParams->zero_point->data[0];
+
+        switch (inputTensor->type) {
+            case kTfLiteInt8: {
+                melSpecFeatureCalc = FeatureCalc<int8_t>(
+                        inputTensor,
+                        cacheSize,
+                        [=, &melSpec](std::vector<int16_t>& audioDataWindow) {
+                            return melSpec.MelSpecComputeQuant<int8_t>(
+                                    audioDataWindow,
+                                    quantScale,
+                                    quantOffset,
+                                    trainingMean);
+                        }
+                );
+                break;
+            }
+            default:
+            printf_err("Tensor type %s not supported\n", TfLiteTypeGetName(inputTensor->type));
+        }
+    } else {
+        melSpecFeatureCalc = FeatureCalc<float>(
+                inputTensor,
+                cacheSize,
+                [=, &melSpec](
+                        std::vector<int16_t>& audioDataWindow) {
+                    return melSpec.ComputeMelSpec(
+                            audioDataWindow,
+                            trainingMean);
+                });
+    }
+    return melSpecFeatureCalc;
+}
+
+} /* namespace app */
+} /* namespace arm */
commit	4e002791bc6781b549c6951cfe44f918289d7e82	[log] [tgz]
author	Richard Burton <richard.burton@arm.com>	Wed May 04 09:45:02 2022 +0100
committer	Richard Burton <richard.burton@arm.com>	Wed May 04 09:45:02 2022 +0100
tree	b639243b5fa433657c207783a384bad1ed248536
parent	dd6d07b24bbf9023ebe8e8927be8aac3291d0f58 [diff] [blame]