blob: f3b909221816c853345e28c241b611e1fa508184 [file] [log] [blame]
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +01001//
2// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#include "AudioCapture.hpp"
7#include <alsa/asoundlib.h>
8#include <sndfile.h>
9#include <samplerate.h>
10
11namespace asr
12{
13 std::vector<float> AudioCapture::LoadAudioFile(std::string filePath)
14 {
15 SF_INFO inputSoundFileInfo;
16 SNDFILE* infile = NULL;
17 infile = sf_open(filePath.c_str(), SFM_READ, &inputSoundFileInfo);
18
19 float audioIn[inputSoundFileInfo.channels * inputSoundFileInfo.frames];
20 sf_read_float(infile, audioIn, inputSoundFileInfo.channels * inputSoundFileInfo.frames);
21
22 float sampleRate = 16000.0f;
23 float srcRatio = sampleRate / (float)inputSoundFileInfo.samplerate;
24 int outputFrames = ceil(inputSoundFileInfo.frames * srcRatio);
25 float dataOut[outputFrames];
26
27 // Convert to mono
28 float monoData[inputSoundFileInfo.frames];
29 for(int i = 0; i < inputSoundFileInfo.frames; i++)
30 {
31 float val = 0.0f;
32 for(int j = 0; j < inputSoundFileInfo.channels; j++)
33 monoData[i] += audioIn[i * inputSoundFileInfo.channels + j];
34 monoData[i] /= inputSoundFileInfo.channels;
35 }
36
37 // Resample
38 SRC_DATA srcData;
39 srcData.data_in = monoData;
40 srcData.input_frames = inputSoundFileInfo.frames;
41 srcData.data_out = dataOut;
42 srcData.output_frames = outputFrames;
43 srcData.src_ratio = srcRatio;
44
45 src_simple(&srcData, SRC_SINC_BEST_QUALITY, 1);
46
47 // Convert to Vector
48 std::vector<float> processedInput;
49
50 for(int i = 0; i < srcData.output_frames_gen; ++i)
51 {
52 processedInput.push_back(srcData.data_out[i]);
53 }
54
55 sf_close(infile);
56
57 return processedInput;
58 }
59
60 void AudioCapture::InitSlidingWindow(float* data, size_t dataSize, int minSamples, size_t stride)
61 {
62 this->m_window = SlidingWindow<const float>(data, dataSize, minSamples, stride);
63 }
64
65 bool AudioCapture::HasNext()
66 {
67 return m_window.HasNext();
68 }
69
70 std::vector<float> AudioCapture::Next()
71 {
72 if (this->m_window.HasNext())
73 {
74 int remainingData = this->m_window.RemainingData();
75 const float* windowData = this->m_window.Next();
76
77 size_t windowSize = this->m_window.GetWindowSize();
78
79 if(remainingData < windowSize)
80 {
81 std::vector<float> mfccAudioData(windowSize, 0.0f);
82 for(int i = 0; i < remainingData; ++i)
83 {
84 mfccAudioData[i] = *windowData;
85 if(i < remainingData - 1)
86 {
87 ++windowData;
88 }
89 }
90 return mfccAudioData;
91 }
92 else
93 {
94 std::vector<float> mfccAudioData(windowData, windowData + windowSize);
95 return mfccAudioData;
96 }
97 }
98 else
99 {
100 throw std::out_of_range("Error, end of audio data reached.");
101 }
102 }
103} //namespace asr
104