MLECO-1252 ASR sample application using the public ArmNN C++ API.

Change-Id: I98cd505b8772a8c8fa88308121bc94135bb45068
Signed-off-by: Éanna Ó Catháin <eanna.ocathain@arm.com>
diff --git a/samples/SpeechRecognition/src/AudioCapture.cpp b/samples/SpeechRecognition/src/AudioCapture.cpp
new file mode 100644
index 0000000..f3b9092
--- /dev/null
+++ b/samples/SpeechRecognition/src/AudioCapture.cpp
@@ -0,0 +1,104 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "AudioCapture.hpp"
+#include <alsa/asoundlib.h>
+#include <sndfile.h>
+#include <samplerate.h>
+
+namespace asr
+{
+    std::vector<float> AudioCapture::LoadAudioFile(std::string filePath)
+    {
+        SF_INFO inputSoundFileInfo;
+        SNDFILE* infile = NULL;
+        infile = sf_open(filePath.c_str(), SFM_READ, &inputSoundFileInfo);
+
+        float audioIn[inputSoundFileInfo.channels * inputSoundFileInfo.frames];
+        sf_read_float(infile, audioIn, inputSoundFileInfo.channels * inputSoundFileInfo.frames);
+
+        float sampleRate = 16000.0f;
+        float srcRatio = sampleRate / (float)inputSoundFileInfo.samplerate;
+        int outputFrames = ceil(inputSoundFileInfo.frames * srcRatio);
+        float dataOut[outputFrames];
+
+        // Convert to mono
+        float monoData[inputSoundFileInfo.frames];
+        for(int i = 0; i < inputSoundFileInfo.frames; i++)
+        {
+            float val = 0.0f;
+            for(int j = 0; j < inputSoundFileInfo.channels; j++)
+                monoData[i] += audioIn[i * inputSoundFileInfo.channels + j];
+            monoData[i] /= inputSoundFileInfo.channels;
+        }
+
+        // Resample
+        SRC_DATA srcData;
+        srcData.data_in = monoData;
+        srcData.input_frames = inputSoundFileInfo.frames;
+        srcData.data_out = dataOut;
+        srcData.output_frames = outputFrames;
+        srcData.src_ratio = srcRatio;
+
+        src_simple(&srcData, SRC_SINC_BEST_QUALITY, 1);
+
+        // Convert to Vector
+        std::vector<float> processedInput;
+
+        for(int i = 0; i < srcData.output_frames_gen; ++i)
+        {
+            processedInput.push_back(srcData.data_out[i]);
+        }
+
+        sf_close(infile);
+
+        return processedInput;
+    }
+
+    void AudioCapture::InitSlidingWindow(float* data, size_t dataSize, int minSamples, size_t stride)
+    {
+        this->m_window = SlidingWindow<const float>(data, dataSize, minSamples, stride);
+    }
+
+    bool AudioCapture::HasNext()
+    {
+        return m_window.HasNext();
+    }
+
+    std::vector<float> AudioCapture::Next()
+    {
+        if (this->m_window.HasNext())
+        {
+            int remainingData = this->m_window.RemainingData();
+            const float* windowData = this->m_window.Next();
+
+            size_t windowSize = this->m_window.GetWindowSize();
+
+            if(remainingData < windowSize)
+            {
+                std::vector<float> mfccAudioData(windowSize, 0.0f);
+                for(int i = 0; i < remainingData; ++i)
+                {
+                    mfccAudioData[i] = *windowData;
+                    if(i < remainingData - 1)
+                    {
+                        ++windowData;
+                    }
+                }
+                return mfccAudioData;
+            }
+            else
+            {
+                std::vector<float> mfccAudioData(windowData,  windowData + windowSize);
+                return mfccAudioData;
+            }
+        }
+        else
+        {
+            throw std::out_of_range("Error, end of audio data reached.");
+        }
+    }
+} //namespace asr
+