Opensource ML embedded evaluation kit

Change-Id: I12e807f19f5cacad7cef82572b6dd48252fd61fd
diff --git a/source/use_case/kws_asr/src/AsrClassifier.cc b/source/use_case/kws_asr/src/AsrClassifier.cc
new file mode 100644
index 0000000..bc86e09
--- /dev/null
+++ b/source/use_case/kws_asr/src/AsrClassifier.cc
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "AsrClassifier.hpp"
+
+#include "hal.h"
+#include "TensorFlowLiteMicro.hpp"
+#include "Wav2LetterModel.hpp"
+
+template<typename T>
+bool arm::app::AsrClassifier::_GetTopResults(TfLiteTensor* tensor,
+                            std::vector<ClassificationResult>& vecResults,
+                            const std::vector <std::string>& labels, double scale, double zeroPoint)
+{
+    const uint32_t nElems = tensor->dims->data[arm::app::Wav2LetterModel::ms_outputRowsIdx];
+    const uint32_t nLetters = tensor->dims->data[arm::app::Wav2LetterModel::ms_outputColsIdx];
+
+
+    /* NOTE: tensor's size verification against labels should be
+     *       checked by the calling/public function. */
+    if (nLetters < 1) {
+        return false;
+    }
+
+    /* Final results' container. */
+    vecResults = std::vector<ClassificationResult>(nElems);
+
+    T* tensorData = tflite::GetTensorData<T>(tensor);
+
+    /* Get the top 1 results. */
+    for (uint32_t i = 0, row = 0; i < nElems; ++i, row+=nLetters) {
+        std::pair<T, uint32_t> top_1 = std::make_pair(tensorData[row + 0], 0);
+
+        for (uint32_t j = 1; j < nLetters; ++j) {
+            if (top_1.first < tensorData[row + j]) {
+                top_1.first = tensorData[row + j];
+                top_1.second = j;
+            }
+        }
+
+        double score = static_cast<int> (top_1.first);
+        vecResults[i].m_normalisedVal = scale * (score - zeroPoint);
+        vecResults[i].m_label = labels[top_1.second];
+        vecResults[i].m_labelIdx = top_1.second;
+    }
+
+    return true;
+}
+template bool arm::app::AsrClassifier::_GetTopResults<uint8_t>(TfLiteTensor* tensor,
+                            std::vector<ClassificationResult>& vecResults,
+                            const std::vector <std::string>& labels, double scale, double zeroPoint);
+template bool arm::app::AsrClassifier::_GetTopResults<int8_t>(TfLiteTensor* tensor,
+                            std::vector<ClassificationResult>& vecResults,
+                            const std::vector <std::string>& labels, double scale, double zeroPoint);
+
+bool arm::app::AsrClassifier::GetClassificationResults(
+            TfLiteTensor* outputTensor,
+            std::vector<ClassificationResult>& vecResults,
+            const std::vector <std::string>& labels, uint32_t topNCount)
+{
+        vecResults.clear();
+
+        constexpr int minTensorDims = static_cast<int>(
+            (arm::app::Wav2LetterModel::ms_outputRowsIdx > arm::app::Wav2LetterModel::ms_outputColsIdx)?
+             arm::app::Wav2LetterModel::ms_outputRowsIdx : arm::app::Wav2LetterModel::ms_outputColsIdx);
+
+        constexpr uint32_t outColsIdx = arm::app::Wav2LetterModel::ms_outputColsIdx;
+
+        /* Sanity checks. */
+        if (outputTensor == nullptr) {
+            printf_err("Output vector is null pointer.\n");
+            return false;
+        } else if (outputTensor->dims->size < minTensorDims) {
+            printf_err("Output tensor expected to be 3D (1, m, n)\n");
+            return false;
+        } else if (static_cast<uint32_t>(outputTensor->dims->data[outColsIdx]) < topNCount) {
+            printf_err("Output vectors are smaller than %u\n", topNCount);
+            return false;
+        } else if (static_cast<uint32_t>(outputTensor->dims->data[outColsIdx]) != labels.size()) {
+            printf("Output size doesn't match the labels' size\n");
+            return false;
+        }
+
+        if (topNCount != 1) {
+            warn("TopNCount value ignored in this implementation\n");
+        }
+
+        /* To return the floating point values, we need quantization parameters. */
+        QuantParams quantParams = GetTensorQuantParams(outputTensor);
+
+        bool resultState;
+
+        switch (outputTensor->type) {
+            case kTfLiteUInt8:
+                resultState = this->_GetTopResults<uint8_t>(
+                                        outputTensor, vecResults,
+                                        labels, quantParams.scale,
+                                        quantParams.offset);
+                break;
+            case kTfLiteInt8:
+                resultState = this->_GetTopResults<int8_t>(
+                                        outputTensor, vecResults,
+                                        labels, quantParams.scale,
+                                        quantParams.offset);
+                break;
+            default:
+                printf_err("Tensor type %s not supported by classifier\n",
+                    TfLiteTypeGetName(outputTensor->type));
+                return false;
+        }
+
+        if (!resultState) {
+            printf_err("Failed to get sorted set\n");
+            return false;
+        }
+
+        return true;
+}
\ No newline at end of file
diff --git a/source/use_case/kws_asr/src/DsCnnModel.cc b/source/use_case/kws_asr/src/DsCnnModel.cc
new file mode 100644
index 0000000..b573a12
--- /dev/null
+++ b/source/use_case/kws_asr/src/DsCnnModel.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "DsCnnModel.hpp"
+
+#include "hal.h"
+
+namespace arm {
+namespace app {
+namespace kws {
+    extern uint8_t* GetModelPointer();
+    extern size_t GetModelLen();
+} /* namespace kws */
+} /* namespace app */
+} /* namespace arm */
+
+const tflite::MicroOpResolver& arm::app::DsCnnModel::GetOpResolver()
+{
+    return this->_m_opResolver;
+}
+
+bool arm::app::DsCnnModel::EnlistOperations()
+{
+    this->_m_opResolver.AddAveragePool2D();
+    this->_m_opResolver.AddConv2D();
+    this->_m_opResolver.AddDepthwiseConv2D();
+    this->_m_opResolver.AddFullyConnected();
+    this->_m_opResolver.AddRelu();
+    this->_m_opResolver.AddSoftmax();
+    this->_m_opResolver.AddQuantize();
+    this->_m_opResolver.AddDequantize();
+    this->_m_opResolver.AddReshape();
+
+#if defined(ARM_NPU)
+    if (kTfLiteOk == this->_m_opResolver.AddEthosU()) {
+        info("Added %s support to op resolver\n",
+            tflite::GetString_ETHOSU());
+    } else {
+        printf_err("Failed to add Arm NPU support to op resolver.");
+        return false;
+    }
+#endif /* ARM_NPU */
+    return true;
+}
+
+const uint8_t* arm::app::DsCnnModel::ModelPointer()
+{
+    return arm::app::kws::GetModelPointer();
+}
+
+size_t arm::app::DsCnnModel::ModelSize()
+{
+    return arm::app::kws::GetModelLen();
+}
\ No newline at end of file
diff --git a/source/use_case/kws_asr/src/MainLoop.cc b/source/use_case/kws_asr/src/MainLoop.cc
new file mode 100644
index 0000000..37146c9
--- /dev/null
+++ b/source/use_case/kws_asr/src/MainLoop.cc
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "hal.h"                    /* Brings in platform definitions. */
+#include "InputFiles.hpp"           /* For input images. */
+#include "Labels_dscnn.hpp"         /* For DS-CNN label strings. */
+#include "Labels_wav2letter.hpp"    /* For Wav2Letter label strings. */
+#include "Classifier.hpp"           /* KWS classifier. */
+#include "AsrClassifier.hpp"        /* ASR classifier. */
+#include "DsCnnModel.hpp"           /* KWS model class for running inference. */
+#include "Wav2LetterModel.hpp"      /* ASR model class for running inference. */
+#include "UseCaseCommonUtils.hpp"   /* Utils functions. */
+#include "UseCaseHandler.hpp"       /* Handlers for different user options. */
+#include "Wav2LetterPreprocess.hpp" /* ASR pre-processing class. */
+#include "Wav2LetterPostprocess.hpp"/* ASR post-processing class. */
+
+using KwsClassifier = arm::app::Classifier;
+
+enum opcodes
+{
+    MENU_OPT_RUN_INF_NEXT = 1,       /* Run on next vector. */
+    MENU_OPT_RUN_INF_CHOSEN,         /* Run on a user provided vector index. */
+    MENU_OPT_RUN_INF_ALL,            /* Run inference on all. */
+    MENU_OPT_SHOW_MODEL_INFO,        /* Show model info. */
+    MENU_OPT_LIST_AUDIO_CLIPS        /* List the current baked audio clips. */
+};
+
+static void DisplayMenu()
+{
+    printf("\n\nUser input required\n");
+    printf("Enter option number from:\n\n");
+    printf("  %u. Classify next audio clip\n", MENU_OPT_RUN_INF_NEXT);
+    printf("  %u. Classify audio clip at chosen index\n", MENU_OPT_RUN_INF_CHOSEN);
+    printf("  %u. Run classification on all audio clips\n", MENU_OPT_RUN_INF_ALL);
+    printf("  %u. Show NN model info\n", MENU_OPT_SHOW_MODEL_INFO);
+    printf("  %u. List audio clips\n\n", MENU_OPT_LIST_AUDIO_CLIPS);
+    printf("  Choice: ");
+}
+
+/** @brief Gets the number of MFCC features for a single window. */
+static uint32_t GetNumMfccFeatures(const arm::app::Model& model);
+
+/** @brief Gets the number of MFCC feature vectors to be computed. */
+static uint32_t GetNumMfccFeatureVectors(const arm::app::Model& model);
+
+/** @brief Gets the output context length (left and right) for post-processing. */
+static uint32_t GetOutputContextLen(const arm::app::Model& model,
+                                    uint32_t inputCtxLen);
+
+/** @brief Gets the output inner length for post-processing. */
+static uint32_t GetOutputInnerLen(const arm::app::Model& model,
+                                  uint32_t outputCtxLen);
+
+void main_loop(hal_platform& platform)
+{
+    /* Model wrapper objects. */
+    arm::app::DsCnnModel kwsModel;
+    arm::app::Wav2LetterModel asrModel;
+
+    /* Load the models. */
+    if (!kwsModel.Init()) {
+        printf_err("Failed to initialise KWS model\n");
+        return;
+    }
+
+    /* Initialise the asr model using the same allocator from KWS
+     * to re-use the tensor arena. */
+    if (!asrModel.Init(kwsModel.GetAllocator())) {
+        printf_err("Failed to initalise ASR model\n");
+        return;
+    }
+
+    /* Initialise ASR pre-processing. */
+    arm::app::audio::asr::Preprocess prep(
+            GetNumMfccFeatures(asrModel),
+            arm::app::asr::g_FrameLength,
+            arm::app::asr::g_FrameStride,
+            GetNumMfccFeatureVectors(asrModel));
+
+    /* Initialise ASR post-processing. */
+    const uint32_t outputCtxLen = GetOutputContextLen(asrModel, arm::app::asr::g_ctxLen);
+    const uint32_t blankTokenIdx = 28;
+    arm::app::audio::asr::Postprocess postp(
+            outputCtxLen,
+            GetOutputInnerLen(asrModel, outputCtxLen),
+            blankTokenIdx);
+
+    /* Instantiate application context. */
+    arm::app::ApplicationContext caseContext;
+
+    caseContext.Set<hal_platform&>("platform", platform);
+    caseContext.Set<arm::app::Model&>("kwsmodel", kwsModel);
+    caseContext.Set<arm::app::Model&>("asrmodel", asrModel);
+    caseContext.Set<uint32_t>("clipIndex", 0);
+    caseContext.Set<uint32_t>("ctxLen", arm::app::asr::g_ctxLen);  /* Left and right context length (MFCC feat vectors). */
+    caseContext.Set<int>("kwsframeLength", arm::app::kws::g_FrameLength);
+    caseContext.Set<int>("kwsframeStride", arm::app::kws::g_FrameStride);
+    caseContext.Set<float>("kwsscoreThreshold", arm::app::kws::g_ScoreThreshold);  /* Normalised score threshold. */
+    caseContext.Set<uint32_t >("kwsNumMfcc", arm::app::kws::g_NumMfcc);
+    caseContext.Set<uint32_t >("kwsNumAudioWins", arm::app::kws::g_NumAudioWins);
+
+    caseContext.Set<int>("asrframeLength", arm::app::asr::g_FrameLength);
+    caseContext.Set<int>("asrframeStride", arm::app::asr::g_FrameStride);
+    caseContext.Set<float>("asrscoreThreshold", arm::app::asr::g_ScoreThreshold);  /* Normalised score threshold. */
+
+    KwsClassifier kwsClassifier;  /* Classifier wrapper object. */
+    arm::app::AsrClassifier asrClassifier;  /* Classifier wrapper object. */
+    caseContext.Set<arm::app::Classifier&>("kwsclassifier", kwsClassifier);
+    caseContext.Set<arm::app::AsrClassifier&>("asrclassifier", asrClassifier);
+
+    caseContext.Set<arm::app::audio::asr::Preprocess&>("preprocess", prep);
+    caseContext.Set<arm::app::audio::asr::Postprocess&>("postprocess", postp);
+
+    std::vector<std::string> asrLabels;
+    arm::app::asr::GetLabelsVector(asrLabels);
+    std::vector<std::string> kwsLabels;
+    arm::app::kws::GetLabelsVector(kwsLabels);
+    caseContext.Set<const std::vector <std::string>&>("asrlabels", asrLabels);
+    caseContext.Set<const std::vector <std::string>&>("kwslabels", kwsLabels);
+
+    /* Index of the kws outputs we trigger ASR on. */
+    caseContext.Set<uint32_t>("keywordindex", 2);
+
+    /* Loop. */
+    bool executionSuccessful = true;
+    constexpr bool bUseMenu = NUMBER_OF_FILES > 1 ? true : false;
+
+    /* Loop. */
+    do {
+        int menuOption = MENU_OPT_RUN_INF_NEXT;
+        if (bUseMenu) {
+            DisplayMenu();
+            menuOption = arm::app::ReadUserInputAsInt(platform);
+            printf("\n");
+        }
+        switch (menuOption) {
+            case MENU_OPT_RUN_INF_NEXT:
+                executionSuccessful = ClassifyAudioHandler(
+                        caseContext,
+                        caseContext.Get<uint32_t>("clipIndex"),
+                        false);
+                break;
+            case MENU_OPT_RUN_INF_CHOSEN: {
+                printf("    Enter the audio clip index [0, %d]: ",
+                       NUMBER_OF_FILES-1);
+                auto clipIndex = static_cast<uint32_t>(
+                        arm::app::ReadUserInputAsInt(platform));
+                executionSuccessful = ClassifyAudioHandler(caseContext,
+                                                           clipIndex,
+                                                           false);
+                break;
+            }
+            case MENU_OPT_RUN_INF_ALL:
+                executionSuccessful = ClassifyAudioHandler(
+                        caseContext,
+                        caseContext.Get<uint32_t>("clipIndex"),
+                        true);
+                break;
+            case MENU_OPT_SHOW_MODEL_INFO:
+                executionSuccessful = kwsModel.ShowModelInfoHandler();
+                executionSuccessful = asrModel.ShowModelInfoHandler();
+                break;
+            case MENU_OPT_LIST_AUDIO_CLIPS:
+                executionSuccessful = ListFilesHandler(caseContext);
+                break;
+            default:
+                printf("Incorrect choice, try again.");
+                break;
+        }
+    } while (executionSuccessful && bUseMenu);
+    info("Main loop terminated.\n");
+}
+
+static uint32_t GetNumMfccFeatures(const arm::app::Model& model)
+{
+    TfLiteTensor* inputTensor = model.GetInputTensor(0);
+    const int inputCols = inputTensor->dims->data[arm::app::Wav2LetterModel::ms_inputColsIdx];
+    if (0 != inputCols % 3) {
+        printf_err("Number of input columns is not a multiple of 3\n");
+    }
+    return std::max(inputCols/3, 0);
+}
+
+static uint32_t GetNumMfccFeatureVectors(const arm::app::Model& model)
+{
+    TfLiteTensor* inputTensor = model.GetInputTensor(0);
+    const int inputRows = inputTensor->dims->data[arm::app::Wav2LetterModel::ms_inputRowsIdx];
+    return std::max(inputRows, 0);
+}
+
+static uint32_t GetOutputContextLen(const arm::app::Model& model, const uint32_t inputCtxLen)
+{
+    const uint32_t inputRows = GetNumMfccFeatureVectors(model);
+    const uint32_t inputInnerLen = inputRows - (2 * inputCtxLen);
+    constexpr uint32_t ms_outputRowsIdx = arm::app::Wav2LetterModel::ms_outputRowsIdx;
+
+    /* Check to make sure that the input tensor supports the above context and inner lengths. */
+    if (inputRows <= 2 * inputCtxLen || inputRows <= inputInnerLen) {
+        printf_err("Input rows not compatible with ctx of %u\n",
+                   inputCtxLen);
+        return 0;
+    }
+
+    TfLiteTensor* outputTensor = model.GetOutputTensor(0);
+    const uint32_t outputRows = std::max(outputTensor->dims->data[ms_outputRowsIdx], 0);
+
+    const float tensorColRatio = static_cast<float>(inputRows)/
+                                 static_cast<float>(outputRows);
+
+    return std::round(static_cast<float>(inputCtxLen)/tensorColRatio);
+}
+
+static uint32_t GetOutputInnerLen(const arm::app::Model& model,
+                                  const uint32_t outputCtxLen)
+{
+    constexpr uint32_t ms_outputRowsIdx = arm::app::Wav2LetterModel::ms_outputRowsIdx;
+    TfLiteTensor* outputTensor = model.GetOutputTensor(0);
+    const uint32_t outputRows = std::max(outputTensor->dims->data[ms_outputRowsIdx], 0);
+    return (outputRows - (2 * outputCtxLen));
+}
diff --git a/source/use_case/kws_asr/src/OutputDecode.cc b/source/use_case/kws_asr/src/OutputDecode.cc
new file mode 100644
index 0000000..41fbe07
--- /dev/null
+++ b/source/use_case/kws_asr/src/OutputDecode.cc
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "OutputDecode.hpp"
+
+namespace arm {
+namespace app {
+namespace audio {
+namespace asr {
+
+    std::string DecodeOutput(const std::vector<ClassificationResult>& vecResults)
+    {
+        std::string CleanOutputBuffer;
+
+        for (size_t i = 0; i < vecResults.size(); ++i)  /* For all elements in vector. */
+        {
+            while (i+1 < vecResults.size() &&
+                   vecResults[i].m_label == vecResults[i+1].m_label)  /* While the current element is equal to the next, ignore it and move on. */
+            {
+                ++i;
+            }
+            if (vecResults[i].m_label != "$")  /* $ is a character used to represent unknown and double characters so should not be in output. */
+            {
+                CleanOutputBuffer += vecResults[i].m_label;  /* If the element is different to the next, it will be appended to CleanOutputBuffer. */
+            }
+        }
+
+        return CleanOutputBuffer;  /* Return string type containing clean output. */
+    }
+
+} /* namespace asr */
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
diff --git a/source/use_case/kws_asr/src/UseCaseHandler.cc b/source/use_case/kws_asr/src/UseCaseHandler.cc
new file mode 100644
index 0000000..c50796f
--- /dev/null
+++ b/source/use_case/kws_asr/src/UseCaseHandler.cc
@@ -0,0 +1,707 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "UseCaseHandler.hpp"
+
+#include "hal.h"
+#include "InputFiles.hpp"
+#include "AudioUtils.hpp"
+#include "UseCaseCommonUtils.hpp"
+#include "DsCnnModel.hpp"
+#include "DsCnnMfcc.hpp"
+#include "Classifier.hpp"
+#include "KwsResult.hpp"
+#include "Wav2LetterMfcc.hpp"
+#include "Wav2LetterPreprocess.hpp"
+#include "Wav2LetterPostprocess.hpp"
+#include "AsrResult.hpp"
+#include "AsrClassifier.hpp"
+#include "OutputDecode.hpp"
+
+
+using KwsClassifier = arm::app::Classifier;
+
+namespace arm {
+namespace app {
+
+    enum AsrOutputReductionAxis {
+        AxisRow = 1,
+        AxisCol = 2
+    };
+
+    struct KWSOutput {
+        bool executionSuccess = false;
+        const int16_t* asrAudioStart = nullptr;
+        int32_t asrAudioSamples = 0;
+    };
+
+    /**
+    * @brief           Helper function to increment current audio clip index
+    * @param[in,out]   ctx     pointer to the application context object
+    **/
+    static void _IncrementAppCtxClipIdx(ApplicationContext& ctx);
+
+    /**
+    * @brief           Helper function to increment current audio clip index
+    * @param[in,out]   ctx     pointer to the application context object
+    **/
+    static void _IncrementAppCtxClipIdx(ApplicationContext& ctx);
+
+    /**
+     * @brief           Helper function to set the audio clip index
+     * @param[in,out]   ctx     pointer to the application context object
+     * @param[in]       idx     value to be set
+     * @return          true if index is set, false otherwise
+     **/
+    static bool _SetAppCtxClipIdx(ApplicationContext& ctx, uint32_t idx);
+
+    /**
+     * @brief           Presents kws inference results using the data presentation
+     *                  object.
+     * @param[in]       platform    reference to the hal platform object
+     * @param[in]       results     vector of classification results to be displayed
+     * @param[in]       infTimeMs   inference time in milliseconds, if available
+     *                              Otherwise, this can be passed in as 0.
+     * @return          true if successful, false otherwise
+     **/
+    static bool _PresentInferenceResult(hal_platform& platform, std::vector<arm::app::kws::KwsResult>& results);
+
+    /**
+     * @brief           Presents asr inference results using the data presentation
+     *                  object.
+     * @param[in]       platform    reference to the hal platform object
+     * @param[in]       results     vector of classification results to be displayed
+     * @param[in]       infTimeMs   inference time in milliseconds, if available
+     *                              Otherwise, this can be passed in as 0.
+     * @return          true if successful, false otherwise
+     **/
+    static bool _PresentInferenceResult(hal_platform& platform, std::vector<arm::app::asr::AsrResult>& results);
+
+    /**
+     * @brief Returns a function to perform feature calculation and populates input tensor data with
+     * MFCC data.
+     *
+     * Input tensor data type check is performed to choose correct MFCC feature data type.
+     * If tensor has an integer data type then original features are quantised.
+     *
+     * Warning: mfcc calculator provided as input must have the same life scope as returned function.
+     *
+     * @param[in]           mfcc            MFCC feature calculator.
+     * @param[in,out]       inputTensor     Input tensor pointer to store calculated features.
+     * @param[in]            cacheSize      Size of the feture vectors cache (number of feature vectors).
+     *
+     * @return function     function to be called providing audio sample and sliding window index.
+     **/
+    static std::function<void (std::vector<int16_t>&, int, bool, size_t)>
+    GetFeatureCalculator(audio::DsCnnMFCC&  mfcc,
+                         TfLiteTensor*      inputTensor,
+                         size_t             cacheSize);
+
+    /**
+     * @brief Performs the KWS pipeline.
+     * @param[in,out]   ctx pointer to the application context object
+     *
+     * @return KWSOutput    struct containing pointer to audio data where ASR should begin
+     *                      and how much data to process.
+     */
+    static KWSOutput doKws(ApplicationContext& ctx) {
+        constexpr uint32_t dataPsnTxtInfStartX = 20;
+        constexpr uint32_t dataPsnTxtInfStartY = 40;
+
+        constexpr int minTensorDims = static_cast<int>(
+            (arm::app::DsCnnModel::ms_inputRowsIdx > arm::app::DsCnnModel::ms_inputColsIdx)?
+             arm::app::DsCnnModel::ms_inputRowsIdx : arm::app::DsCnnModel::ms_inputColsIdx);
+
+        KWSOutput output;
+
+        auto& kwsModel = ctx.Get<Model&>("kwsmodel");
+        if (!kwsModel.IsInited()) {
+            printf_err("KWS model has not been initialised\n");
+            return output;
+        }
+
+        const int kwsFrameLength = ctx.Get<int>("kwsframeLength");
+        const int kwsFrameStride = ctx.Get<int>("kwsframeStride");
+        const float kwsScoreThreshold = ctx.Get<float>("kwsscoreThreshold");
+
+        TfLiteTensor* kwsOutputTensor = kwsModel.GetOutputTensor(0);
+        TfLiteTensor* kwsInputTensor = kwsModel.GetInputTensor(0);
+
+        if (!kwsInputTensor->dims) {
+            printf_err("Invalid input tensor dims\n");
+            return output;
+        } else if (kwsInputTensor->dims->size < minTensorDims) {
+            printf_err("Input tensor dimension should be >= %d\n", minTensorDims);
+            return output;
+        }
+
+        const uint32_t kwsNumMfccFeats = ctx.Get<uint32_t>("kwsNumMfcc");
+        const uint32_t kwsNumAudioWindows = ctx.Get<uint32_t>("kwsNumAudioWins");
+
+        audio::DsCnnMFCC kwsMfcc = audio::DsCnnMFCC(kwsNumMfccFeats, kwsFrameLength);
+        kwsMfcc.Init();
+
+        /* Deduce the data length required for 1 KWS inference from the network parameters. */
+        auto kwsAudioDataWindowSize = kwsNumAudioWindows * kwsFrameStride +
+                                        (kwsFrameLength - kwsFrameStride);
+        auto kwsMfccWindowSize = kwsFrameLength;
+        auto kwsMfccWindowStride = kwsFrameStride;
+
+        /* We are choosing to move by half the window size => for a 1 second window size,
+         * this means an overlap of 0.5 seconds. */
+        auto kwsAudioDataStride = kwsAudioDataWindowSize / 2;
+
+        info("KWS audio data window size %u\n", kwsAudioDataWindowSize);
+
+        /* Stride must be multiple of mfcc features window stride to re-use features. */
+        if (0 != kwsAudioDataStride % kwsMfccWindowStride) {
+            kwsAudioDataStride -= kwsAudioDataStride % kwsMfccWindowStride;
+        }
+
+        auto kwsMfccVectorsInAudioStride = kwsAudioDataStride/kwsMfccWindowStride;
+
+        /* We expect to be sampling 1 second worth of data at a time
+         * NOTE: This is only used for time stamp calculation. */
+        const float kwsAudioParamsSecondsPerSample = 1.0/audio::DsCnnMFCC::ms_defaultSamplingFreq;
+
+        auto currentIndex = ctx.Get<uint32_t>("clipIndex");
+
+        /* Creating a mfcc features sliding window for the data required for 1 inference. */
+        auto kwsAudioMFCCWindowSlider = audio::SlidingWindow<const int16_t>(
+                get_audio_array(currentIndex),
+                kwsAudioDataWindowSize, kwsMfccWindowSize,
+                kwsMfccWindowStride);
+
+        /* Creating a sliding window through the whole audio clip. */
+        auto audioDataSlider = audio::SlidingWindow<const int16_t>(
+                get_audio_array(currentIndex),
+                get_audio_array_size(currentIndex),
+                kwsAudioDataWindowSize, kwsAudioDataStride);
+
+        /* Calculate number of the feature vectors in the window overlap region.
+         * These feature vectors will be reused.*/
+        size_t numberOfReusedFeatureVectors = kwsAudioMFCCWindowSlider.TotalStrides() + 1
+                                              - kwsMfccVectorsInAudioStride;
+
+        auto kwsMfccFeatureCalc = GetFeatureCalculator(kwsMfcc, kwsInputTensor,
+                                                       numberOfReusedFeatureVectors);
+
+        if (!kwsMfccFeatureCalc){
+            return output;
+        }
+
+        /* Container for KWS results. */
+        std::vector<arm::app::kws::KwsResult> kwsResults;
+
+        /* Display message on the LCD - inference running. */
+        auto& platform = ctx.Get<hal_platform&>("platform");
+        std::string str_inf{"Running KWS inference... "};
+        platform.data_psn->present_data_text(
+                            str_inf.c_str(), str_inf.size(),
+                            dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
+
+        info("Running KWS inference on audio clip %u => %s\n",
+             currentIndex, get_filename(currentIndex));
+
+        /* Start sliding through audio clip. */
+        while (audioDataSlider.HasNext()) {
+            const int16_t* inferenceWindow = audioDataSlider.Next();
+
+            /* We moved to the next window - set the features sliding to the new address. */
+            kwsAudioMFCCWindowSlider.Reset(inferenceWindow);
+
+            /* The first window does not have cache ready. */
+            bool useCache = audioDataSlider.Index() > 0 && numberOfReusedFeatureVectors > 0;
+
+            /* Start calculating features inside one audio sliding window. */
+            while (kwsAudioMFCCWindowSlider.HasNext()) {
+                const int16_t* kwsMfccWindow = kwsAudioMFCCWindowSlider.Next();
+                std::vector<int16_t> kwsMfccAudioData =
+                    std::vector<int16_t>(kwsMfccWindow, kwsMfccWindow + kwsMfccWindowSize);
+
+                /* Compute features for this window and write them to input tensor. */
+                kwsMfccFeatureCalc(kwsMfccAudioData,
+                                   kwsAudioMFCCWindowSlider.Index(),
+                                   useCache,
+                                   kwsMfccVectorsInAudioStride);
+            }
+
+            info("Inference %zu/%zu\n", audioDataSlider.Index() + 1,
+                 audioDataSlider.TotalStrides() + 1);
+
+            /* Run inference over this audio clip sliding window. */
+            arm::app::RunInference(platform, kwsModel);
+
+            std::vector<ClassificationResult> kwsClassificationResult;
+            auto& kwsClassifier = ctx.Get<KwsClassifier&>("kwsclassifier");
+
+            kwsClassifier.GetClassificationResults(
+                            kwsOutputTensor, kwsClassificationResult,
+                            ctx.Get<std::vector<std::string>&>("kwslabels"), 1);
+
+            kwsResults.emplace_back(
+                kws::KwsResult(
+                    kwsClassificationResult,
+                    audioDataSlider.Index() * kwsAudioParamsSecondsPerSample * kwsAudioDataStride,
+                    audioDataSlider.Index(), kwsScoreThreshold)
+                );
+
+            /* Keyword detected. */
+            if (kwsClassificationResult[0].m_labelIdx == ctx.Get<uint32_t>("keywordindex")) {
+                output.asrAudioStart = inferenceWindow + kwsAudioDataWindowSize;
+                output.asrAudioSamples = get_audio_array_size(currentIndex) -
+                                        (audioDataSlider.NextWindowStartIndex() -
+                                        kwsAudioDataStride + kwsAudioDataWindowSize);
+                break;
+            }
+
+#if VERIFY_TEST_OUTPUT
+            arm::app::DumpTensor(kwsOutputTensor);
+#endif /* VERIFY_TEST_OUTPUT */
+
+        } /* while (audioDataSlider.HasNext()) */
+
+        /* Erase. */
+        str_inf = std::string(str_inf.size(), ' ');
+        platform.data_psn->present_data_text(
+                            str_inf.c_str(), str_inf.size(),
+                            dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
+
+        if (!_PresentInferenceResult(platform, kwsResults)) {
+            return output;
+        }
+
+        output.executionSuccess = true;
+        return output;
+    }
+
+    /**
+     * @brief Performs the ASR pipeline.
+     *
+     * @param ctx[in/out]   pointer to the application context object
+     * @param kwsOutput[in] struct containing pointer to audio data where ASR should begin
+     *                      and how much data to process
+     * @return bool         true if pipeline executed without failure
+     */
+    static bool doAsr(ApplicationContext& ctx, const KWSOutput& kwsOutput) {
+        constexpr uint32_t dataPsnTxtInfStartX = 20;
+        constexpr uint32_t dataPsnTxtInfStartY = 40;
+
+        auto& platform = ctx.Get<hal_platform&>("platform");
+        platform.data_psn->clear(COLOR_BLACK);
+
+        /* Get model reference. */
+        auto& asrModel = ctx.Get<Model&>("asrmodel");
+        if (!asrModel.IsInited()) {
+            printf_err("ASR model has not been initialised\n");
+            return false;
+        }
+
+        /* Get score threshold to be applied for the classifier (post-inference). */
+        auto asrScoreThreshold = ctx.Get<float>("asrscoreThreshold");
+
+        /* Dimensions of the tensor should have been verified by the callee. */
+        TfLiteTensor* asrInputTensor = asrModel.GetInputTensor(0);
+        TfLiteTensor* asrOutputTensor = asrModel.GetOutputTensor(0);
+        const uint32_t asrInputRows = asrInputTensor->dims->data[arm::app::Wav2LetterModel::ms_inputRowsIdx];
+
+        /* Populate ASR MFCC related parameters. */
+        auto asrMfccParamsWinLen = ctx.Get<uint32_t>("asrframeLength");
+        auto asrMfccParamsWinStride = ctx.Get<uint32_t>("asrframeStride");
+
+        /* Populate ASR inference context and inner lengths for input. */
+        auto asrInputCtxLen = ctx.Get<uint32_t>("ctxLen");
+        const uint32_t asrInputInnerLen = asrInputRows - (2 * asrInputCtxLen);
+
+        /* Make sure the input tensor supports the above context and inner lengths. */
+        if (asrInputRows <= 2 * asrInputCtxLen || asrInputRows <= asrInputInnerLen) {
+            printf_err("ASR input rows not compatible with ctx length %u\n", asrInputCtxLen);
+            return false;
+        }
+
+        /* Audio data stride corresponds to inputInnerLen feature vectors. */
+        const uint32_t asrAudioParamsWinLen = (asrInputRows - 1) *
+                                              asrMfccParamsWinStride + (asrMfccParamsWinLen);
+        const uint32_t asrAudioParamsWinStride = asrInputInnerLen * asrMfccParamsWinStride;
+        const float asrAudioParamsSecondsPerSample =
+                                        (1.0/audio::Wav2LetterMFCC::ms_defaultSamplingFreq);
+
+        /* Get pre/post-processing objects */
+        auto& asrPrep = ctx.Get<audio::asr::Preprocess&>("preprocess");
+        auto& asrPostp = ctx.Get<audio::asr::Postprocess&>("postprocess");
+
+        /* Set default reduction axis for post-processing. */
+        const uint32_t reductionAxis = arm::app::Wav2LetterModel::ms_outputRowsIdx;
+
+        /* Get the remaining audio buffer and respective size from KWS results. */
+        const int16_t* audioArr = kwsOutput.asrAudioStart;
+        const uint32_t audioArrSize = kwsOutput.asrAudioSamples;
+
+        /* Audio clip must have enough samples to produce 1 MFCC feature. */
+        std::vector<int16_t> audioBuffer = std::vector<int16_t>(audioArr, audioArr + audioArrSize);
+        if (audioArrSize < asrMfccParamsWinLen) {
+            printf_err("Not enough audio samples, minimum needed is %u\n", asrMfccParamsWinLen);
+            return false;
+        }
+
+        /* Initialise an audio slider. */
+        auto audioDataSlider = audio::ASRSlidingWindow<const int16_t>(
+                audioBuffer.data(),
+                audioBuffer.size(),
+                asrAudioParamsWinLen,
+                asrAudioParamsWinStride);
+
+        /* Declare a container for results. */
+        std::vector<arm::app::asr::AsrResult> asrResults;
+
+        /* Display message on the LCD - inference running. */
+        std::string str_inf{"Running ASR inference... "};
+        platform.data_psn->present_data_text(
+                str_inf.c_str(), str_inf.size(),
+                dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
+
+        size_t asrInferenceWindowLen = asrAudioParamsWinLen;
+
+        /* Start sliding through audio clip. */
+        while (audioDataSlider.HasNext()) {
+
+            /* If not enough audio see how much can be sent for processing. */
+            size_t nextStartIndex = audioDataSlider.NextWindowStartIndex();
+            if (nextStartIndex + asrAudioParamsWinLen > audioBuffer.size()) {
+                asrInferenceWindowLen = audioBuffer.size() - nextStartIndex;
+            }
+
+            const int16_t* asrInferenceWindow = audioDataSlider.Next();
+
+            info("Inference %zu/%zu\n", audioDataSlider.Index() + 1,
+                static_cast<size_t>(ceilf(audioDataSlider.FractionalTotalStrides() + 1)));
+
+            Profiler prepProfiler{&platform, "pre-processing"};
+            prepProfiler.StartProfiling();
+
+            /* Calculate MFCCs, deltas and populate the input tensor. */
+            asrPrep.Invoke(asrInferenceWindow, asrInferenceWindowLen, asrInputTensor);
+
+            prepProfiler.StopProfiling();
+            std::string prepProfileResults = prepProfiler.GetResultsAndReset();
+            info("%s\n", prepProfileResults.c_str());
+
+            /* Run inference over this audio clip sliding window. */
+            arm::app::RunInference(platform, asrModel);
+
+            /* Post-process. */
+            asrPostp.Invoke(asrOutputTensor, reductionAxis, !audioDataSlider.HasNext());
+
+            /* Get results. */
+            std::vector<ClassificationResult> asrClassificationResult;
+            auto& asrClassifier = ctx.Get<AsrClassifier&>("asrclassifier");
+            asrClassifier.GetClassificationResults(
+                    asrOutputTensor, asrClassificationResult,
+                    ctx.Get<std::vector<std::string>&>("asrlabels"), 1);
+
+            asrResults.emplace_back(asr::AsrResult(asrClassificationResult,
+                                                (audioDataSlider.Index() *
+                                                 asrAudioParamsSecondsPerSample *
+                                                 asrAudioParamsWinStride),
+                                                 audioDataSlider.Index(), asrScoreThreshold));
+
+#if VERIFY_TEST_OUTPUT
+            arm::app::DumpTensor(asrOutputTensor, asrOutputTensor->dims->data[arm::app::Wav2LetterModel::ms_outputColsIdx]);
+#endif /* VERIFY_TEST_OUTPUT */
+
+            /* Erase */
+            str_inf = std::string(str_inf.size(), ' ');
+            platform.data_psn->present_data_text(
+                        str_inf.c_str(), str_inf.size(),
+                        dataPsnTxtInfStartX, dataPsnTxtInfStartY, false);
+        }
+        if (!_PresentInferenceResult(platform, asrResults)) {
+            return false;
+        }
+
+        return true;
+    }
+
+    /* Audio inference classification handler. */
+    bool ClassifyAudioHandler(ApplicationContext& ctx, uint32_t clipIndex, bool runAll)
+    {
+        auto& platform = ctx.Get<hal_platform&>("platform");
+        platform.data_psn->clear(COLOR_BLACK);
+
+        /* If the request has a valid size, set the audio index. */
+        if (clipIndex < NUMBER_OF_FILES) {
+            if (!_SetAppCtxClipIdx(ctx, clipIndex)) {
+                return false;
+            }
+        }
+
+        auto startClipIdx = ctx.Get<uint32_t>("clipIndex");
+
+        do {
+            KWSOutput kwsOutput = doKws(ctx);
+            if (!kwsOutput.executionSuccess) {
+                return false;
+            }
+
+            if (kwsOutput.asrAudioStart != nullptr && kwsOutput.asrAudioSamples > 0) {
+                info("Keyword spotted\n");
+                if(!doAsr(ctx, kwsOutput)) {
+                    printf_err("ASR failed");
+                    return false;
+                }
+            }
+
+            _IncrementAppCtxClipIdx(ctx);
+
+        } while (runAll && ctx.Get<uint32_t>("clipIndex") != startClipIdx);
+
+        return true;
+    }
+
+    static void _IncrementAppCtxClipIdx(ApplicationContext& ctx)
+    {
+        auto curAudioIdx = ctx.Get<uint32_t>("clipIndex");
+
+        if (curAudioIdx + 1 >= NUMBER_OF_FILES) {
+            ctx.Set<uint32_t>("clipIndex", 0);
+            return;
+        }
+        ++curAudioIdx;
+        ctx.Set<uint32_t>("clipIndex", curAudioIdx);
+    }
+
+    static bool _SetAppCtxClipIdx(ApplicationContext& ctx, const uint32_t idx)
+    {
+        if (idx >= NUMBER_OF_FILES) {
+            printf_err("Invalid idx %u (expected less than %u)\n",
+                idx, NUMBER_OF_FILES);
+            return false;
+        }
+        ctx.Set<uint32_t>("clipIndex", idx);
+        return true;
+    }
+
+    static bool _PresentInferenceResult(hal_platform& platform,
+                std::vector<arm::app::kws::KwsResult>& results)
+    {
+        constexpr uint32_t dataPsnTxtStartX1 = 20;
+        constexpr uint32_t dataPsnTxtStartY1 = 30;
+        constexpr uint32_t dataPsnTxtYIncr   = 16;  /* Row index increment. */
+
+        platform.data_psn->set_text_color(COLOR_GREEN);
+
+        /* Display each result. */
+        uint32_t rowIdx1 = dataPsnTxtStartY1 + 2 * dataPsnTxtYIncr;
+
+        for (uint32_t i = 0; i < results.size(); ++i) {
+
+            std::string topKeyword{"<none>"};
+            float score = 0.f;
+
+            if (results[i].m_resultVec.size()) {
+                topKeyword = results[i].m_resultVec[0].m_label;
+                score = results[i].m_resultVec[0].m_normalisedVal;
+            }
+
+            std::string resultStr =
+                    std::string{"@"} + std::to_string(results[i].m_timeStamp) +
+                    std::string{"s: "} + topKeyword + std::string{" ("} +
+                    std::to_string(static_cast<int>(score * 100)) + std::string{"%)"};
+
+            platform.data_psn->present_data_text(
+                        resultStr.c_str(), resultStr.size(),
+                        dataPsnTxtStartX1, rowIdx1, 0);
+            rowIdx1 += dataPsnTxtYIncr;
+
+            info("For timestamp: %f (inference #: %u); threshold: %f\n",
+                 results[i].m_timeStamp, results[i].m_inferenceNumber,
+                 results[i].m_threshold);
+            for (uint32_t j = 0; j < results[i].m_resultVec.size(); ++j) {
+                info("\t\tlabel @ %u: %s, score: %f\n", j,
+                     results[i].m_resultVec[j].m_label.c_str(),
+                     results[i].m_resultVec[j].m_normalisedVal);
+            }
+        }
+
+        return true;
+    }
+
+    static bool _PresentInferenceResult(hal_platform& platform, std::vector<arm::app::asr::AsrResult>& results)
+    {
+        constexpr uint32_t dataPsnTxtStartX1 = 20;
+        constexpr uint32_t dataPsnTxtStartY1 = 80;
+        constexpr bool allow_multiple_lines = true;
+
+        platform.data_psn->set_text_color(COLOR_GREEN);
+
+        /* Results from multiple inferences should be combined before processing. */
+        std::vector<arm::app::ClassificationResult> combinedResults;
+        for (auto& result : results) {
+            combinedResults.insert(combinedResults.end(),
+                                   result.m_resultVec.begin(),
+                                   result.m_resultVec.end());
+        }
+
+        for (auto& result : results) {
+            /* Get the final result string using the decoder. */
+            std::string infResultStr = audio::asr::DecodeOutput(result.m_resultVec);
+
+            info("Result for inf %u: %s\n", result.m_inferenceNumber,
+                 infResultStr.c_str());
+        }
+
+        std::string finalResultStr = audio::asr::DecodeOutput(combinedResults);
+
+        platform.data_psn->present_data_text(
+                    finalResultStr.c_str(), finalResultStr.size(),
+                    dataPsnTxtStartX1, dataPsnTxtStartY1, allow_multiple_lines);
+
+        info("Final result: %s\n", finalResultStr.c_str());
+        return true;
+    }
+
+    /**
+     * @brief Generic feature calculator factory.
+     *
+     * Returns lambda function to compute features using features cache.
+     * Real features math is done by a lambda function provided as a parameter.
+     * Features are written to input tensor memory.
+     *
+     * @tparam T            feature vector type.
+     * @param inputTensor   model input tensor pointer.
+     * @param cacheSize     number of feature vectors to cache. Defined by the sliding window overlap.
+     * @param compute       features calculator function.
+     * @return              lambda function to compute features.
+     **/
+    template<class T>
+    std::function<void (std::vector<int16_t>&, size_t, bool, size_t)>
+    _FeatureCalc(TfLiteTensor* inputTensor, size_t cacheSize,
+                 std::function<std::vector<T> (std::vector<int16_t>& )> compute)
+    {
+        /* Feature cache to be captured by lambda function. */
+        static std::vector<std::vector<T>> featureCache = std::vector<std::vector<T>>(cacheSize);
+
+        return [=](std::vector<int16_t>& audioDataWindow,
+                   size_t index,
+                   bool useCache,
+                   size_t featuresOverlapIndex)
+        {
+            T* tensorData = tflite::GetTensorData<T>(inputTensor);
+            std::vector<T> features;
+
+            /* Reuse features from cache if cache is ready and sliding windows overlap.
+             * Overlap is in the beginning of sliding window with a size of a feature cache.
+             */
+            if (useCache && index < featureCache.size()) {
+                features = std::move(featureCache[index]);
+            } else {
+                features = std::move(compute(audioDataWindow));
+            }
+            auto size = features.size();
+            auto sizeBytes = sizeof(T) * size;
+            std::memcpy(tensorData + (index * size), features.data(), sizeBytes);
+
+            /* Start renewing cache as soon iteration goes out of the windows overlap. */
+            if (index >= featuresOverlapIndex) {
+                featureCache[index - featuresOverlapIndex] = std::move(features);
+            }
+        };
+    }
+
+    template std::function<void (std::vector<int16_t>&, size_t , bool, size_t)>
+    _FeatureCalc<int8_t>(TfLiteTensor* inputTensor,
+                         size_t cacheSize,
+                         std::function<std::vector<int8_t> (std::vector<int16_t>& )> compute);
+
+    template std::function<void (std::vector<int16_t>&, size_t , bool, size_t)>
+    _FeatureCalc<uint8_t>(TfLiteTensor* inputTensor,
+                          size_t cacheSize,
+                          std::function<std::vector<uint8_t> (std::vector<int16_t>& )> compute);
+
+    template std::function<void (std::vector<int16_t>&, size_t , bool, size_t)>
+    _FeatureCalc<int16_t>(TfLiteTensor* inputTensor,
+                          size_t cacheSize,
+                          std::function<std::vector<int16_t> (std::vector<int16_t>& )> compute);
+
+    template std::function<void(std::vector<int16_t>&, size_t, bool, size_t)>
+    _FeatureCalc<float>(TfLiteTensor* inputTensor,
+                        size_t cacheSize,
+                        std::function<std::vector<float>(std::vector<int16_t>&)> compute);
+
+
+    static std::function<void (std::vector<int16_t>&, int, bool, size_t)>
+    GetFeatureCalculator(audio::DsCnnMFCC& mfcc, TfLiteTensor* inputTensor, size_t cacheSize)
+    {
+        std::function<void (std::vector<int16_t>&, size_t, bool, size_t)> mfccFeatureCalc;
+
+        TfLiteQuantization quant = inputTensor->quantization;
+
+        if (kTfLiteAffineQuantization == quant.type) {
+
+            auto* quantParams = (TfLiteAffineQuantization*) quant.params;
+            const float quantScale = quantParams->scale->data[0];
+            const int quantOffset = quantParams->zero_point->data[0];
+
+            switch (inputTensor->type) {
+                case kTfLiteInt8: {
+                    mfccFeatureCalc = _FeatureCalc<int8_t>(inputTensor,
+                                                           cacheSize,
+                                                           [=, &mfcc](std::vector<int16_t>& audioDataWindow) {
+                                                               return mfcc.MfccComputeQuant<int8_t>(audioDataWindow,
+                                                                                                    quantScale,
+                                                                                                    quantOffset);
+                                                           }
+                    );
+                    break;
+                }
+                case kTfLiteUInt8: {
+                    mfccFeatureCalc = _FeatureCalc<uint8_t>(inputTensor,
+                                                            cacheSize,
+                                                            [=, &mfcc](std::vector<int16_t>& audioDataWindow) {
+                                                                return mfcc.MfccComputeQuant<uint8_t>(audioDataWindow,
+                                                                                                      quantScale,
+                                                                                                      quantOffset);
+                                                            }
+                    );
+                    break;
+                }
+                case kTfLiteInt16: {
+                    mfccFeatureCalc = _FeatureCalc<int16_t>(inputTensor,
+                                                            cacheSize,
+                                                            [=, &mfcc](std::vector<int16_t>& audioDataWindow) {
+                                                                return mfcc.MfccComputeQuant<int16_t>(audioDataWindow,
+                                                                                                      quantScale,
+                                                                                                      quantOffset);
+                                                            }
+                    );
+                    break;
+                }
+                default:
+                printf_err("Tensor type %s not supported\n", TfLiteTypeGetName(inputTensor->type));
+            }
+
+
+        } else {
+            mfccFeatureCalc = mfccFeatureCalc = _FeatureCalc<float>(inputTensor,
+                                                                    cacheSize,
+                                                                    [&mfcc](std::vector<int16_t>& audioDataWindow) {
+                                                                        return mfcc.MfccCompute(audioDataWindow);
+                                                                    });
+        }
+        return mfccFeatureCalc;
+    }
+} /* namespace app */
+} /* namespace arm */
\ No newline at end of file
diff --git a/source/use_case/kws_asr/src/Wav2LetterMfcc.cc b/source/use_case/kws_asr/src/Wav2LetterMfcc.cc
new file mode 100644
index 0000000..80e4a26
--- /dev/null
+++ b/source/use_case/kws_asr/src/Wav2LetterMfcc.cc
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "Wav2LetterMfcc.hpp"
+
+#include "PlatformMath.hpp"
+
+#include <cfloat>
+
+namespace arm {
+namespace app {
+namespace audio {
+
+    bool Wav2LetterMFCC::ApplyMelFilterBank(
+            std::vector<float>&                 fftVec,
+            std::vector<std::vector<float>>&    melFilterBank,
+            std::vector<int32_t>&               filterBankFilterFirst,
+            std::vector<int32_t>&               filterBankFilterLast,
+            std::vector<float>&                 melEnergies)
+    {
+        const size_t numBanks = melEnergies.size();
+
+        if (numBanks != filterBankFilterFirst.size() ||
+                numBanks != filterBankFilterLast.size()) {
+            printf_err("unexpected filter bank lengths\n");
+            return false;
+        }
+
+        for (size_t bin = 0; bin < numBanks; ++bin) {
+            auto filterBankIter = melFilterBank[bin].begin();
+            float melEnergy = 1e-10;  /* Avoid log of zero at later stages, same value used in librosa. */
+            const int32_t firstIndex = filterBankFilterFirst[bin];
+            const int32_t lastIndex = filterBankFilterLast[bin];
+
+            for (int32_t i = firstIndex; i <= lastIndex; ++i) {
+                melEnergy += (*filterBankIter++ * fftVec[i]);
+            }
+
+            melEnergies[bin] = melEnergy;
+        }
+
+        return true;
+    }
+
+    void Wav2LetterMFCC::ConvertToLogarithmicScale(
+                            std::vector<float>& melEnergies)
+    {
+        float maxMelEnergy = -FLT_MAX;
+
+        /* Container for natural logarithms of mel energies. */
+        std::vector <float> vecLogEnergies(melEnergies.size(), 0.f);
+
+        /* Because we are taking natural logs, we need to multiply by log10(e).
+         * Also, for wav2letter model, we scale our log10 values by 10. */
+        constexpr float multiplier = 10.0 *  /* Default scalar. */
+                                      0.4342944819032518;  /* log10f(std::exp(1.0))*/
+
+        /* Take log of the whole vector. */
+        math::MathUtils::VecLogarithmF32(melEnergies, vecLogEnergies);
+
+        /* Scale the log values and get the max. */
+        for (auto iterM = melEnergies.begin(), iterL = vecLogEnergies.begin();
+                  iterM != melEnergies.end(); ++iterM, ++iterL) {
+
+            *iterM = *iterL * multiplier;
+
+            /* Save the max mel energy. */
+            if (*iterM > maxMelEnergy) {
+                maxMelEnergy = *iterM;
+            }
+        }
+
+        /* Clamp the mel energies. */
+        constexpr float maxDb = 80.0;
+        const float clampLevelLowdB = maxMelEnergy - maxDb;
+        for (auto iter = melEnergies.begin(); iter != melEnergies.end(); ++iter) {
+            *iter = std::max(*iter, clampLevelLowdB);
+        }
+    }
+
+    std::vector<float> Wav2LetterMFCC::CreateDCTMatrix(
+                                        const int32_t inputLength,
+                                        const int32_t coefficientCount)
+    {
+        std::vector<float> dctMatix(inputLength * coefficientCount);
+
+        /* Orthonormal normalization. */
+        const float normalizerK0 = 2 * math::MathUtils::SqrtF32(1.0f /
+                                        static_cast<float>(4*inputLength));
+        const float normalizer = 2 * math::MathUtils::SqrtF32(1.0f /
+                                        static_cast<float>(2*inputLength));
+
+        const float angleIncr = M_PI/inputLength;
+        float angle = angleIncr;  /* We start using it at k = 1 loop. */
+
+        /* First row of DCT will use normalizer K0 */
+        for (int32_t n = 0; n < inputLength; ++n) {
+            dctMatix[n] = normalizerK0  /* cos(0) = 1 */;
+        }
+
+        /* Second row (index = 1) onwards, we use standard normalizer. */
+        for (int32_t k = 1, m = inputLength; k < coefficientCount; ++k, m += inputLength) {
+            for (int32_t n = 0; n < inputLength; ++n) {
+                dctMatix[m+n] = normalizer *
+                    math::MathUtils::CosineF32((n + 0.5f) * angle);
+            }
+            angle += angleIncr;
+        }
+        return dctMatix;
+    }
+
+    float Wav2LetterMFCC::GetMelFilterBankNormaliser(
+                                    const float&    leftMel,
+                                    const float&    rightMel,
+                                    const bool      useHTKMethod)
+    {
+        /* Slaney normalization for mel weights. */
+        return (2.0f / (MFCC::InverseMelScale(rightMel, useHTKMethod) -
+                MFCC::InverseMelScale(leftMel, useHTKMethod)));
+    }
+
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
diff --git a/source/use_case/kws_asr/src/Wav2LetterModel.cc b/source/use_case/kws_asr/src/Wav2LetterModel.cc
new file mode 100644
index 0000000..2114a3f
--- /dev/null
+++ b/source/use_case/kws_asr/src/Wav2LetterModel.cc
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "Wav2LetterModel.hpp"
+
+#include "hal.h"
+
+namespace arm {
+namespace app {
+namespace asr {
+    extern uint8_t* GetModelPointer();
+    extern size_t GetModelLen();
+}
+} /* namespace app */
+} /* namespace arm */
+
+const tflite::MicroOpResolver& arm::app::Wav2LetterModel::GetOpResolver()
+{
+    return this->_m_opResolver;
+}
+
+bool arm::app::Wav2LetterModel::EnlistOperations()
+{
+    this->_m_opResolver.AddConv2D();
+    this->_m_opResolver.AddMul();
+    this->_m_opResolver.AddMaximum();
+    this->_m_opResolver.AddReshape();
+
+#if defined(ARM_NPU)
+    if (kTfLiteOk == this->_m_opResolver.AddEthosU()) {
+        info("Added %s support to op resolver\n",
+            tflite::GetString_ETHOSU());
+    } else {
+        printf_err("Failed to add Arm NPU support to op resolver.");
+        return false;
+    }
+#endif /* ARM_NPU */
+    return true;
+}
+
+const uint8_t* arm::app::Wav2LetterModel::ModelPointer()
+{
+    return arm::app::asr::GetModelPointer();
+}
+
+size_t arm::app::Wav2LetterModel::ModelSize()
+{
+    return arm::app::asr::GetModelLen();
+}
\ No newline at end of file
diff --git a/source/use_case/kws_asr/src/Wav2LetterPostprocess.cc b/source/use_case/kws_asr/src/Wav2LetterPostprocess.cc
new file mode 100644
index 0000000..b173968
--- /dev/null
+++ b/source/use_case/kws_asr/src/Wav2LetterPostprocess.cc
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "Wav2LetterPostprocess.hpp"
+
+#include "Wav2LetterModel.hpp"
+
+namespace arm {
+namespace app {
+namespace audio {
+namespace asr {
+
+    Postprocess::Postprocess(const uint32_t contextLen,
+                             const uint32_t innerLen,
+                             const uint32_t blankTokenIdx)
+        :   _m_contextLen(contextLen),
+            _m_innerLen(innerLen),
+            _m_totalLen(2 * this->_m_contextLen + this->_m_innerLen),
+            _m_countIterations(0),
+            _m_blankTokenIdx(blankTokenIdx)
+    {}
+
+    bool Postprocess::Invoke(TfLiteTensor*  tensor,
+                            const uint32_t  axisIdx,
+                            const bool      lastIteration)
+    {
+        /* Basic checks. */
+        if (!this->_IsInputValid(tensor, axisIdx)) {
+            return false;
+        }
+
+        /* Irrespective of tensor type, we use unsigned "byte" */
+        uint8_t* ptrData = tflite::GetTensorData<uint8_t>(tensor);
+        const uint32_t elemSz = this->_GetTensorElementSize(tensor);
+
+        /* Other sanity checks. */
+        if (0 == elemSz) {
+            printf_err("Tensor type not supported for post processing\n");
+            return false;
+        } else if (elemSz * this->_m_totalLen > tensor->bytes) {
+            printf_err("Insufficient number of tensor bytes\n");
+            return false;
+        }
+
+        /* Which axis do we need to process? */
+        switch (axisIdx) {
+            case arm::app::Wav2LetterModel::ms_outputRowsIdx:
+                return this->_EraseSectionsRowWise(ptrData,
+                        elemSz * tensor->dims->data[arm::app::Wav2LetterModel::ms_outputColsIdx],
+                        lastIteration);
+            default:
+                printf_err("Unsupported axis index: %u\n", axisIdx);
+        }
+
+        return false;
+    }
+
+    bool Postprocess::_IsInputValid(TfLiteTensor*  tensor,
+                                    const uint32_t axisIdx) const
+    {
+        if (nullptr == tensor) {
+            return false;
+        }
+
+        if (static_cast<int>(axisIdx) >= tensor->dims->size) {
+            printf_err("Invalid axis index: %u; Max: %d\n",
+                axisIdx, tensor->dims->size);
+            return false;
+        }
+
+        if (static_cast<int>(this->_m_totalLen) !=
+                             tensor->dims->data[axisIdx]) {
+            printf_err("Unexpected tensor dimension for axis %d, \n",
+                tensor->dims->data[axisIdx]);
+            return false;
+        }
+
+        return true;
+    }
+
+    uint32_t Postprocess::_GetTensorElementSize(TfLiteTensor*  tensor)
+    {
+        switch(tensor->type) {
+            case kTfLiteUInt8:
+                return 1;
+            case kTfLiteInt8:
+                return 1;
+            case kTfLiteInt16:
+                return 2;
+            case kTfLiteInt32:
+                return 4;
+            case kTfLiteFloat32:
+                return 4;
+            default:
+                printf_err("Unsupported tensor type %s\n",
+                    TfLiteTypeGetName(tensor->type));
+        }
+
+        return 0;
+    }
+
+    bool Postprocess::_EraseSectionsRowWise(
+                        uint8_t*         ptrData,
+                        const uint32_t   strideSzBytes,
+                        const bool       lastIteration)
+    {
+        /* In this case, the "zero-ing" is quite simple as the region
+         * to be zeroed sits in contiguous memory (row-major). */
+        const uint32_t eraseLen = strideSzBytes * this->_m_contextLen;
+
+        /* Erase left context? */
+        if (this->_m_countIterations > 0) {
+            /* Set output of each classification window to the blank token. */
+            std::memset(ptrData, 0, eraseLen);
+            for (size_t windowIdx = 0; windowIdx < this->_m_contextLen; windowIdx++) {
+                ptrData[windowIdx*strideSzBytes + this->_m_blankTokenIdx] = 1;
+            }
+        }
+
+        /* Erase right context? */
+        if (false == lastIteration) {
+            uint8_t * rightCtxPtr = ptrData + (strideSzBytes * (this->_m_contextLen + this->_m_innerLen));
+            /* Set output of each classification window to the blank token. */
+            std::memset(rightCtxPtr, 0, eraseLen);
+            for (size_t windowIdx = 0; windowIdx < this->_m_contextLen; windowIdx++) {
+                rightCtxPtr[windowIdx*strideSzBytes + this->_m_blankTokenIdx] = 1;
+            }
+        }
+
+        if (lastIteration) {
+            this->_m_countIterations = 0;
+        } else {
+            ++this->_m_countIterations;
+        }
+
+        return true;
+    }
+
+} /* namespace asr */
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
\ No newline at end of file
diff --git a/source/use_case/kws_asr/src/Wav2LetterPreprocess.cc b/source/use_case/kws_asr/src/Wav2LetterPreprocess.cc
new file mode 100644
index 0000000..613ddb0
--- /dev/null
+++ b/source/use_case/kws_asr/src/Wav2LetterPreprocess.cc
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "Wav2LetterPreprocess.hpp"
+
+#include "PlatformMath.hpp"
+#include "TensorFlowLiteMicro.hpp"
+
+#include <algorithm>
+#include <math.h>
+
+namespace arm {
+namespace app {
+namespace audio {
+namespace asr {
+
+    Preprocess::Preprocess(
+        const uint32_t  numMfccFeatures,
+        const uint32_t  windowLen,
+        const uint32_t  windowStride,
+        const uint32_t  numMfccVectors):
+            _m_mfcc(numMfccFeatures, windowLen),
+            _m_mfccBuf(numMfccFeatures, numMfccVectors),
+            _m_delta1Buf(numMfccFeatures, numMfccVectors),
+            _m_delta2Buf(numMfccFeatures, numMfccVectors),
+            _m_windowLen(windowLen),
+            _m_windowStride(windowStride),
+            _m_numMfccFeats(numMfccFeatures),
+            _m_numFeatVectors(numMfccVectors),
+            _m_window()
+    {
+        if (numMfccFeatures > 0 && windowLen > 0) {
+            this->_m_mfcc.Init();
+        }
+    }
+
+    bool Preprocess::Invoke(
+                const int16_t*  audioData,
+                const uint32_t  audioDataLen,
+                TfLiteTensor*   tensor)
+    {
+        this->_m_window = SlidingWindow<const int16_t>(
+                            audioData, audioDataLen,
+                            this->_m_windowLen, this->_m_windowStride);
+
+        uint32_t mfccBufIdx = 0;
+
+        std::fill(_m_mfccBuf.begin(), _m_mfccBuf.end(), 0.f);
+        std::fill(_m_delta1Buf.begin(), _m_delta1Buf.end(), 0.f);
+        std::fill(_m_delta2Buf.begin(), _m_delta2Buf.end(), 0.f);
+
+        /* While we can slide over the window. */
+        while (this->_m_window.HasNext()) {
+            const int16_t*  mfccWindow = this->_m_window.Next();
+            auto mfccAudioData = std::vector<int16_t>(
+                                        mfccWindow,
+                                        mfccWindow + this->_m_windowLen);
+            auto mfcc = this->_m_mfcc.MfccCompute(mfccAudioData);
+            for (size_t i = 0; i < this->_m_mfccBuf.size(0); ++i) {
+                this->_m_mfccBuf(i, mfccBufIdx) = mfcc[i];
+            }
+            ++mfccBufIdx;
+        }
+
+        /* Pad MFCC if needed by adding MFCC for zeros. */
+        if (mfccBufIdx != this->_m_numFeatVectors) {
+            std::vector<int16_t> zerosWindow = std::vector<int16_t>(this->_m_windowLen, 0);
+            std::vector<float> mfccZeros = this->_m_mfcc.MfccCompute(zerosWindow);
+
+            while (mfccBufIdx != this->_m_numFeatVectors) {
+                memcpy(&this->_m_mfccBuf(0, mfccBufIdx),
+                       mfccZeros.data(), sizeof(float) * _m_numMfccFeats);
+                ++mfccBufIdx;
+            }
+        }
+
+        /* Compute first and second order deltas from MFCCs. */
+        this->_ComputeDeltas(this->_m_mfccBuf,
+                             this->_m_delta1Buf,
+                             this->_m_delta2Buf);
+
+        /* Normalise. */
+        this->_Normalise();
+
+        /* Quantise. */
+        QuantParams quantParams = GetTensorQuantParams(tensor);
+
+        if (0 == quantParams.scale) {
+            printf_err("Quantisation scale can't be 0\n");
+            return false;
+        }
+
+        switch(tensor->type) {
+            case kTfLiteUInt8:
+                return this->_Quantise<uint8_t>(
+                        tflite::GetTensorData<uint8_t>(tensor), tensor->bytes,
+                        quantParams.scale, quantParams.offset);
+            case kTfLiteInt8:
+                return this->_Quantise<int8_t>(
+                        tflite::GetTensorData<int8_t>(tensor), tensor->bytes,
+                        quantParams.scale, quantParams.offset);
+            default:
+                printf_err("Unsupported tensor type %s\n",
+                    TfLiteTypeGetName(tensor->type));
+        }
+
+        return false;
+    }
+
+    bool Preprocess::_ComputeDeltas(Array2d<float>& mfcc,
+                                    Array2d<float>& delta1,
+                                    Array2d<float>& delta2)
+    {
+        const std::vector <float> delta1Coeffs =
+            {6.66666667e-02,  5.00000000e-02,  3.33333333e-02,
+             1.66666667e-02, -3.46944695e-18, -1.66666667e-02,
+            -3.33333333e-02, -5.00000000e-02, -6.66666667e-02};
+
+        const std::vector <float> delta2Coeffs =
+            {0.06060606,      0.01515152,     -0.01731602,
+            -0.03679654,     -0.04329004,     -0.03679654,
+            -0.01731602,      0.01515152,      0.06060606};
+
+        if (delta1.size(0) == 0 || delta2.size(0) != delta1.size(0) ||
+            mfcc.size(0) == 0 || mfcc.size(1) == 0) {
+            return false;
+        }
+
+        /* Get the middle index; coeff vec len should always be odd. */
+        const size_t coeffLen = delta1Coeffs.size();
+        const size_t fMidIdx = (coeffLen - 1)/2;
+        const size_t numFeatures = mfcc.size(0);
+        const size_t numFeatVectors = mfcc.size(1);
+
+        /* Iterate through features in MFCC vector. */
+        for (size_t i = 0; i < numFeatures; ++i) {
+            /* For each feature, iterate through time (t) samples representing feature evolution and
+             * calculate d/dt and d^2/dt^2, using 1d convolution with differential kernels.
+             * Convolution padding = valid, result size is `time length - kernel length + 1`.
+             * The result is padded with 0 from both sides to match the size of initial time samples data.
+             *
+             * For the small filter, conv1d implementation as a simple loop is efficient enough.
+             * Filters of a greater size would need CMSIS-DSP functions to be used, like arm_fir_f32.
+             */
+
+            for (size_t j = fMidIdx; j < numFeatVectors - fMidIdx; ++j) {
+                float d1 = 0;
+                float d2 = 0;
+                const size_t mfccStIdx = j - fMidIdx;
+
+                for (size_t k = 0, m = coeffLen - 1; k < coeffLen; ++k, --m) {
+
+                    d1 +=  mfcc(i,mfccStIdx + k) * delta1Coeffs[m];
+                    d2 +=  mfcc(i,mfccStIdx + k) * delta2Coeffs[m];
+                }
+
+                delta1(i,j) = d1;
+                delta2(i,j) = d2;
+            }
+        }
+
+        return true;
+    }
+
+    float Preprocess::_GetMean(Array2d<float>& vec)
+    {
+        return math::MathUtils::MeanF32(vec.begin(), vec.totalSize());
+    }
+
+    float Preprocess::_GetStdDev(Array2d<float>& vec, const float mean)
+    {
+        return math::MathUtils::StdDevF32(vec.begin(), vec.totalSize(), mean);
+    }
+
+    void Preprocess::_NormaliseVec(Array2d<float>& vec)
+    {
+        auto mean = Preprocess::_GetMean(vec);
+        auto stddev = Preprocess::_GetStdDev(vec, mean);
+
+        debug("Mean: %f, Stddev: %f\n", mean, stddev);
+        if (stddev == 0) {
+            std::fill(vec.begin(), vec.end(), 0);
+        } else {
+            const float stddevInv = 1.f/stddev;
+            const float normalisedMean = mean/stddev;
+
+            auto NormalisingFunction = [=](float& value) {
+                value = value * stddevInv - normalisedMean;
+            };
+            std::for_each(vec.begin(), vec.end(), NormalisingFunction);
+        }
+    }
+
+    void Preprocess::_Normalise()
+    {
+        Preprocess::_NormaliseVec(this->_m_mfccBuf);
+        Preprocess::_NormaliseVec(this->_m_delta1Buf);
+        Preprocess::_NormaliseVec(this->_m_delta2Buf);
+    }
+
+    float Preprocess::_GetQuantElem(
+                const float     elem,
+                const float     quantScale,
+                const int       quantOffset,
+                const float     minVal,
+                const float     maxVal)
+    {
+        float val = std::round((elem/quantScale) + quantOffset);
+        return std::min<float>(std::max<float>(val, minVal), maxVal);
+    }
+
+} /* namespace asr */
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
\ No newline at end of file