Éanna Ó Catháin | c6ab02a | 2021-04-07 14:35:25 +0100 | [diff] [blame] | 1 | // |
| 2 | // Copyright © 2020 Arm Ltd and Contributors. All rights reserved. |
| 3 | // SPDX-License-Identifier: MIT |
| 4 | // |
| 5 | |
| 6 | #include "AudioCapture.hpp" |
| 7 | #include <alsa/asoundlib.h> |
| 8 | #include <sndfile.h> |
| 9 | #include <samplerate.h> |
| 10 | |
| 11 | namespace asr |
| 12 | { |
| 13 | std::vector<float> AudioCapture::LoadAudioFile(std::string filePath) |
| 14 | { |
| 15 | SF_INFO inputSoundFileInfo; |
| 16 | SNDFILE* infile = NULL; |
| 17 | infile = sf_open(filePath.c_str(), SFM_READ, &inputSoundFileInfo); |
| 18 | |
| 19 | float audioIn[inputSoundFileInfo.channels * inputSoundFileInfo.frames]; |
| 20 | sf_read_float(infile, audioIn, inputSoundFileInfo.channels * inputSoundFileInfo.frames); |
| 21 | |
| 22 | float sampleRate = 16000.0f; |
| 23 | float srcRatio = sampleRate / (float)inputSoundFileInfo.samplerate; |
| 24 | int outputFrames = ceil(inputSoundFileInfo.frames * srcRatio); |
| 25 | float dataOut[outputFrames]; |
| 26 | |
| 27 | // Convert to mono |
| 28 | float monoData[inputSoundFileInfo.frames]; |
| 29 | for(int i = 0; i < inputSoundFileInfo.frames; i++) |
| 30 | { |
| 31 | float val = 0.0f; |
| 32 | for(int j = 0; j < inputSoundFileInfo.channels; j++) |
| 33 | monoData[i] += audioIn[i * inputSoundFileInfo.channels + j]; |
| 34 | monoData[i] /= inputSoundFileInfo.channels; |
| 35 | } |
| 36 | |
| 37 | // Resample |
| 38 | SRC_DATA srcData; |
| 39 | srcData.data_in = monoData; |
| 40 | srcData.input_frames = inputSoundFileInfo.frames; |
| 41 | srcData.data_out = dataOut; |
| 42 | srcData.output_frames = outputFrames; |
| 43 | srcData.src_ratio = srcRatio; |
| 44 | |
| 45 | src_simple(&srcData, SRC_SINC_BEST_QUALITY, 1); |
| 46 | |
| 47 | // Convert to Vector |
| 48 | std::vector<float> processedInput; |
| 49 | |
| 50 | for(int i = 0; i < srcData.output_frames_gen; ++i) |
| 51 | { |
| 52 | processedInput.push_back(srcData.data_out[i]); |
| 53 | } |
| 54 | |
| 55 | sf_close(infile); |
| 56 | |
| 57 | return processedInput; |
| 58 | } |
| 59 | |
| 60 | void AudioCapture::InitSlidingWindow(float* data, size_t dataSize, int minSamples, size_t stride) |
| 61 | { |
| 62 | this->m_window = SlidingWindow<const float>(data, dataSize, minSamples, stride); |
| 63 | } |
| 64 | |
| 65 | bool AudioCapture::HasNext() |
| 66 | { |
| 67 | return m_window.HasNext(); |
| 68 | } |
| 69 | |
| 70 | std::vector<float> AudioCapture::Next() |
| 71 | { |
| 72 | if (this->m_window.HasNext()) |
| 73 | { |
| 74 | int remainingData = this->m_window.RemainingData(); |
| 75 | const float* windowData = this->m_window.Next(); |
| 76 | |
| 77 | size_t windowSize = this->m_window.GetWindowSize(); |
| 78 | |
| 79 | if(remainingData < windowSize) |
| 80 | { |
| 81 | std::vector<float> mfccAudioData(windowSize, 0.0f); |
| 82 | for(int i = 0; i < remainingData; ++i) |
| 83 | { |
| 84 | mfccAudioData[i] = *windowData; |
| 85 | if(i < remainingData - 1) |
| 86 | { |
| 87 | ++windowData; |
| 88 | } |
| 89 | } |
| 90 | return mfccAudioData; |
| 91 | } |
| 92 | else |
| 93 | { |
| 94 | std::vector<float> mfccAudioData(windowData, windowData + windowSize); |
| 95 | return mfccAudioData; |
| 96 | } |
| 97 | } |
| 98 | else |
| 99 | { |
| 100 | throw std::out_of_range("Error, end of audio data reached."); |
| 101 | } |
| 102 | } |
| 103 | } //namespace asr |
| 104 | |