blob: 613ddb0c89d0a7264628665cc1234f0354b7703a [file] [log] [blame]
alexander3c798932021-03-26 21:42:19 +00001/*
2 * Copyright (c) 2021 Arm Limited. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17#include "Wav2LetterPreprocess.hpp"
18
19#include "PlatformMath.hpp"
20#include "TensorFlowLiteMicro.hpp"
21
22#include <algorithm>
23#include <math.h>
24
25namespace arm {
26namespace app {
27namespace audio {
28namespace asr {
29
30 Preprocess::Preprocess(
31 const uint32_t numMfccFeatures,
32 const uint32_t windowLen,
33 const uint32_t windowStride,
34 const uint32_t numMfccVectors):
35 _m_mfcc(numMfccFeatures, windowLen),
36 _m_mfccBuf(numMfccFeatures, numMfccVectors),
37 _m_delta1Buf(numMfccFeatures, numMfccVectors),
38 _m_delta2Buf(numMfccFeatures, numMfccVectors),
39 _m_windowLen(windowLen),
40 _m_windowStride(windowStride),
41 _m_numMfccFeats(numMfccFeatures),
42 _m_numFeatVectors(numMfccVectors),
43 _m_window()
44 {
45 if (numMfccFeatures > 0 && windowLen > 0) {
46 this->_m_mfcc.Init();
47 }
48 }
49
50 bool Preprocess::Invoke(
51 const int16_t* audioData,
52 const uint32_t audioDataLen,
53 TfLiteTensor* tensor)
54 {
55 this->_m_window = SlidingWindow<const int16_t>(
56 audioData, audioDataLen,
57 this->_m_windowLen, this->_m_windowStride);
58
59 uint32_t mfccBufIdx = 0;
60
61 std::fill(_m_mfccBuf.begin(), _m_mfccBuf.end(), 0.f);
62 std::fill(_m_delta1Buf.begin(), _m_delta1Buf.end(), 0.f);
63 std::fill(_m_delta2Buf.begin(), _m_delta2Buf.end(), 0.f);
64
65 /* While we can slide over the window. */
66 while (this->_m_window.HasNext()) {
67 const int16_t* mfccWindow = this->_m_window.Next();
68 auto mfccAudioData = std::vector<int16_t>(
69 mfccWindow,
70 mfccWindow + this->_m_windowLen);
71 auto mfcc = this->_m_mfcc.MfccCompute(mfccAudioData);
72 for (size_t i = 0; i < this->_m_mfccBuf.size(0); ++i) {
73 this->_m_mfccBuf(i, mfccBufIdx) = mfcc[i];
74 }
75 ++mfccBufIdx;
76 }
77
78 /* Pad MFCC if needed by adding MFCC for zeros. */
79 if (mfccBufIdx != this->_m_numFeatVectors) {
80 std::vector<int16_t> zerosWindow = std::vector<int16_t>(this->_m_windowLen, 0);
81 std::vector<float> mfccZeros = this->_m_mfcc.MfccCompute(zerosWindow);
82
83 while (mfccBufIdx != this->_m_numFeatVectors) {
84 memcpy(&this->_m_mfccBuf(0, mfccBufIdx),
85 mfccZeros.data(), sizeof(float) * _m_numMfccFeats);
86 ++mfccBufIdx;
87 }
88 }
89
90 /* Compute first and second order deltas from MFCCs. */
91 this->_ComputeDeltas(this->_m_mfccBuf,
92 this->_m_delta1Buf,
93 this->_m_delta2Buf);
94
95 /* Normalise. */
96 this->_Normalise();
97
98 /* Quantise. */
99 QuantParams quantParams = GetTensorQuantParams(tensor);
100
101 if (0 == quantParams.scale) {
102 printf_err("Quantisation scale can't be 0\n");
103 return false;
104 }
105
106 switch(tensor->type) {
107 case kTfLiteUInt8:
108 return this->_Quantise<uint8_t>(
109 tflite::GetTensorData<uint8_t>(tensor), tensor->bytes,
110 quantParams.scale, quantParams.offset);
111 case kTfLiteInt8:
112 return this->_Quantise<int8_t>(
113 tflite::GetTensorData<int8_t>(tensor), tensor->bytes,
114 quantParams.scale, quantParams.offset);
115 default:
116 printf_err("Unsupported tensor type %s\n",
117 TfLiteTypeGetName(tensor->type));
118 }
119
120 return false;
121 }
122
123 bool Preprocess::_ComputeDeltas(Array2d<float>& mfcc,
124 Array2d<float>& delta1,
125 Array2d<float>& delta2)
126 {
127 const std::vector <float> delta1Coeffs =
128 {6.66666667e-02, 5.00000000e-02, 3.33333333e-02,
129 1.66666667e-02, -3.46944695e-18, -1.66666667e-02,
130 -3.33333333e-02, -5.00000000e-02, -6.66666667e-02};
131
132 const std::vector <float> delta2Coeffs =
133 {0.06060606, 0.01515152, -0.01731602,
134 -0.03679654, -0.04329004, -0.03679654,
135 -0.01731602, 0.01515152, 0.06060606};
136
137 if (delta1.size(0) == 0 || delta2.size(0) != delta1.size(0) ||
138 mfcc.size(0) == 0 || mfcc.size(1) == 0) {
139 return false;
140 }
141
142 /* Get the middle index; coeff vec len should always be odd. */
143 const size_t coeffLen = delta1Coeffs.size();
144 const size_t fMidIdx = (coeffLen - 1)/2;
145 const size_t numFeatures = mfcc.size(0);
146 const size_t numFeatVectors = mfcc.size(1);
147
148 /* Iterate through features in MFCC vector. */
149 for (size_t i = 0; i < numFeatures; ++i) {
150 /* For each feature, iterate through time (t) samples representing feature evolution and
151 * calculate d/dt and d^2/dt^2, using 1d convolution with differential kernels.
152 * Convolution padding = valid, result size is `time length - kernel length + 1`.
153 * The result is padded with 0 from both sides to match the size of initial time samples data.
154 *
155 * For the small filter, conv1d implementation as a simple loop is efficient enough.
156 * Filters of a greater size would need CMSIS-DSP functions to be used, like arm_fir_f32.
157 */
158
159 for (size_t j = fMidIdx; j < numFeatVectors - fMidIdx; ++j) {
160 float d1 = 0;
161 float d2 = 0;
162 const size_t mfccStIdx = j - fMidIdx;
163
164 for (size_t k = 0, m = coeffLen - 1; k < coeffLen; ++k, --m) {
165
166 d1 += mfcc(i,mfccStIdx + k) * delta1Coeffs[m];
167 d2 += mfcc(i,mfccStIdx + k) * delta2Coeffs[m];
168 }
169
170 delta1(i,j) = d1;
171 delta2(i,j) = d2;
172 }
173 }
174
175 return true;
176 }
177
178 float Preprocess::_GetMean(Array2d<float>& vec)
179 {
180 return math::MathUtils::MeanF32(vec.begin(), vec.totalSize());
181 }
182
183 float Preprocess::_GetStdDev(Array2d<float>& vec, const float mean)
184 {
185 return math::MathUtils::StdDevF32(vec.begin(), vec.totalSize(), mean);
186 }
187
188 void Preprocess::_NormaliseVec(Array2d<float>& vec)
189 {
190 auto mean = Preprocess::_GetMean(vec);
191 auto stddev = Preprocess::_GetStdDev(vec, mean);
192
193 debug("Mean: %f, Stddev: %f\n", mean, stddev);
194 if (stddev == 0) {
195 std::fill(vec.begin(), vec.end(), 0);
196 } else {
197 const float stddevInv = 1.f/stddev;
198 const float normalisedMean = mean/stddev;
199
200 auto NormalisingFunction = [=](float& value) {
201 value = value * stddevInv - normalisedMean;
202 };
203 std::for_each(vec.begin(), vec.end(), NormalisingFunction);
204 }
205 }
206
207 void Preprocess::_Normalise()
208 {
209 Preprocess::_NormaliseVec(this->_m_mfccBuf);
210 Preprocess::_NormaliseVec(this->_m_delta1Buf);
211 Preprocess::_NormaliseVec(this->_m_delta2Buf);
212 }
213
214 float Preprocess::_GetQuantElem(
215 const float elem,
216 const float quantScale,
217 const int quantOffset,
218 const float minVal,
219 const float maxVal)
220 {
221 float val = std::round((elem/quantScale) + quantOffset);
222 return std::min<float>(std::max<float>(val, minVal), maxVal);
223 }
224
225} /* namespace asr */
226} /* namespace audio */
227} /* namespace app */
228} /* namespace arm */