blob: 9329d5e4d5fff2cf91053d3ca956e9ad5938a814 [file] [log] [blame]
George Gekov23c26272021-08-16 11:32:10 +01001//
2// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5#include "MathUtils.hpp"
6#include <cstring>
7#include <cmath>
8#include <numeric>
9#include <algorithm>
10#include <memory>
11#include "Wav2LetterPreprocessor.hpp"
12#include "Wav2LetterMFCC.hpp"
13
14float Wav2LetterPreprocessor::GetMean(Array2d<float>& vec)
15{
16 return MathUtils::MeanF32(vec.begin(), vec.totalSize());
17}
18
19float Wav2LetterPreprocessor::GetStdDev(Array2d<float>& vec, const float mean)
20{
21 return MathUtils::StdDevF32(vec.begin(), vec.totalSize(), mean);
22}
23
24void Wav2LetterPreprocessor::NormaliseVec(Array2d<float>& vec)
25{
26 auto mean = Wav2LetterPreprocessor::GetMean(vec);
27 auto stddev = Wav2LetterPreprocessor::GetStdDev(vec, mean);
28
29 if (stddev == 0)
30 {
31 std::fill(vec.begin(), vec.end(), 0);
32 }
33 else
34 {
35 const float stddevInv = 1.f/stddev;
36 const float normalisedMean = mean/stddev;
37
38 auto NormalisingFunction = [=](float &value) {
39 value = value * stddevInv - normalisedMean;
40 };
41 std::for_each(vec.begin(), vec.end(), NormalisingFunction);
42 }
43}
44
45void Wav2LetterPreprocessor::Normalise()
46{
47 Wav2LetterPreprocessor::NormaliseVec(this->m_mfccBuf);
48 Wav2LetterPreprocessor::NormaliseVec(this->m_delta1Buf);
49 Wav2LetterPreprocessor::NormaliseVec(this->m_delta2Buf);
50}
51
52float Wav2LetterPreprocessor::GetQuantElem(
53 const float elem,
54 const float quantScale,
55 const int quantOffset,
56 const float minVal,
57 const float maxVal)
58{
59 float val = std::round((elem/quantScale) + quantOffset);
60 float returnVal = std::min<float>(std::max<float>(val, minVal), maxVal);
61 return returnVal;
62}
63
64bool Wav2LetterPreprocessor::Invoke(const float* audioData, const uint32_t audioDataLen, std::vector<int8_t>& output,
65 int quantOffset, float quantScale)
66{
67 this->m_window = SlidingWindow<const float>(
68 audioData, audioDataLen,
69 this->m_windowLen, this->m_windowStride);
70
71 uint32_t mfccBufIdx = 0;
72
73 // Init buffers with 0
74 std::fill(m_mfccBuf.begin(), m_mfccBuf.end(), 0.f);
75 std::fill(m_delta1Buf.begin(), m_delta1Buf.end(), 0.f);
76 std::fill(m_delta2Buf.begin(), m_delta2Buf.end(), 0.f);
77
78 // While we can slide over the window
79 while (this->m_window.HasNext())
80 {
81 const float* mfccWindow = this->m_window.Next();
82 auto mfccAudioData = std::vector<float>(
83 mfccWindow,
84 mfccWindow + this->m_windowLen);
85
86 auto mfcc = this->m_mfcc->MfccCompute(mfccAudioData);
87 for (size_t i = 0; i < this->m_mfccBuf.size(0); ++i)
88 {
89 this->m_mfccBuf(i, mfccBufIdx) = mfcc[i];
90 }
91 ++mfccBufIdx;
92 }
93
94 // Pad MFCC if needed by repeating last feature vector
95 while (mfccBufIdx != this->m_mfcc->m_params.m_numMfccVectors)
96 {
97 memcpy(&this->m_mfccBuf(0, mfccBufIdx),
98 &this->m_mfccBuf(0, mfccBufIdx - 1), sizeof(float) * this->m_mfcc->m_params.m_numMfccFeatures);
99 ++mfccBufIdx;
100 }
101
102 // Compute first and second order deltas from MFCCs
103 Wav2LetterPreprocessor::ComputeDeltas(this->m_mfccBuf,
104 this->m_delta1Buf,
105 this->m_delta2Buf);
106
107 // Normalise
108 this->Normalise();
109
110 return this->Quantise<int8_t>(output.data(), quantOffset, quantScale);
111}
112
113bool Wav2LetterPreprocessor::ComputeDeltas(Array2d<float>& mfcc,
114 Array2d<float>& delta1,
115 Array2d<float>& delta2)
116{
117 const std::vector <float> delta1Coeffs =
118 {6.66666667e-02, 5.00000000e-02, 3.33333333e-02,
119 1.66666667e-02, -3.46944695e-18, -1.66666667e-02,
120 -3.33333333e-02, -5.00000000e-02, -6.66666667e-02};
121
122 const std::vector <float> delta2Coeffs =
123 {0.06060606, 0.01515152, -0.01731602,
124 -0.03679654, -0.04329004, -0.03679654,
125 -0.01731602, 0.01515152, 0.06060606};
126
127 if (delta1.size(0) == 0 || delta2.size(0) != delta1.size(0) ||
128 mfcc.size(0) == 0 || mfcc.size(1) == 0)
129 {
130 return false;
131 }
132
133 // Get the middle index; coeff vec len should always be odd
134 const size_t coeffLen = delta1Coeffs.size();
135 const size_t fMidIdx = (coeffLen - 1)/2;
136 const size_t numFeatures = mfcc.size(0);
137 const size_t numFeatVectors = mfcc.size(1);
138
139 // iterate through features in MFCC vector
140 for (size_t i = 0; i < numFeatures; ++i)
141 {
142 /* for each feature, iterate through time (t) samples representing feature evolution and
143 * calculate d/dt and d^2/dt^2, using 1d convolution with differential kernels.
144 * Convolution padding = valid, result size is `time length - kernel length + 1`.
145 * The result is padded with 0 from both sides to match the size of initial time samples data.
146 *
147 * For the small filter, conv1d implementation as a simple loop is efficient enough.
148 * Filters of a greater size would need CMSIS-DSP functions to be used, like arm_fir_f32.
149 */
150
151 for (size_t j = fMidIdx; j < numFeatVectors - fMidIdx; ++j)
152 {
153 float d1 = 0;
154 float d2 = 0;
155 const size_t mfccStIdx = j - fMidIdx;
156
157 for (size_t k = 0, m = coeffLen - 1; k < coeffLen; ++k, --m)
158 {
159
160 d1 += mfcc(i,mfccStIdx + k) * delta1Coeffs[m];
161 d2 += mfcc(i,mfccStIdx + k) * delta2Coeffs[m];
162 }
163
164 delta1(i,j) = d1;
165 delta2(i,j) = d2;
166 }
167 }
168
169 return true;
170}
171
172Wav2LetterPreprocessor::Wav2LetterPreprocessor(const uint32_t windowLen,
173 const uint32_t windowStride,
174 std::unique_ptr<Wav2LetterMFCC> mfccInst):
175 m_mfcc(std::move(mfccInst)),
176 m_mfccBuf(m_mfcc->m_params.m_numMfccFeatures, m_mfcc->m_params.m_numMfccVectors),
177 m_delta1Buf(m_mfcc->m_params.m_numMfccFeatures, m_mfcc->m_params.m_numMfccVectors),
178 m_delta2Buf(m_mfcc->m_params.m_numMfccFeatures, m_mfcc->m_params.m_numMfccVectors),
179 m_windowLen(windowLen),
180 m_windowStride(windowStride)
181{
182 if (m_mfcc->m_params.m_numMfccFeatures > 0 && windowLen > 0)
183 {
184 this->m_mfcc->Init();
185 }
186 std::fill(m_mfccBuf.begin(), m_mfccBuf.end(), 0.f);
187}