blob: 99e769c6d1d2600507fd5a2f7ae4528a929dbbcc [file] [log] [blame]
alexander3c798932021-03-26 21:42:19 +00001/*
Richard Burtonf32a86a2022-11-15 11:46:11 +00002 * SPDX-FileCopyrightText: Copyright 2021-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
alexander3c798932021-03-26 21:42:19 +00003 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17#include "Wav2LetterPreprocess.hpp"
18
19#include "PlatformMath.hpp"
20#include "TensorFlowLiteMicro.hpp"
21
22#include <algorithm>
23#include <cmath>
24
25namespace arm {
26namespace app {
alexander3c798932021-03-26 21:42:19 +000027
Richard Burtonb40ecf82022-04-22 16:14:57 +010028 AsrPreProcess::AsrPreProcess(TfLiteTensor* inputTensor, const uint32_t numMfccFeatures,
29 const uint32_t numFeatureFrames, const uint32_t mfccWindowLen,
30 const uint32_t mfccWindowStride
Richard Burtonc2911442022-04-22 09:08:21 +010031 ):
32 m_mfcc(numMfccFeatures, mfccWindowLen),
33 m_inputTensor(inputTensor),
34 m_mfccBuf(numMfccFeatures, numFeatureFrames),
35 m_delta1Buf(numMfccFeatures, numFeatureFrames),
36 m_delta2Buf(numMfccFeatures, numFeatureFrames),
37 m_mfccWindowLen(mfccWindowLen),
38 m_mfccWindowStride(mfccWindowStride),
Isabella Gottardi56ee6202021-05-12 08:27:15 +010039 m_numMfccFeats(numMfccFeatures),
Richard Burtonc2911442022-04-22 09:08:21 +010040 m_numFeatureFrames(numFeatureFrames)
alexander3c798932021-03-26 21:42:19 +000041 {
Richard Burtonc2911442022-04-22 09:08:21 +010042 if (numMfccFeatures > 0 && mfccWindowLen > 0) {
Isabella Gottardi56ee6202021-05-12 08:27:15 +010043 this->m_mfcc.Init();
alexander3c798932021-03-26 21:42:19 +000044 }
45 }
46
Richard Burtonb40ecf82022-04-22 16:14:57 +010047 bool AsrPreProcess::DoPreProcess(const void* audioData, const size_t audioDataLen)
alexander3c798932021-03-26 21:42:19 +000048 {
Richard Burtonc2911442022-04-22 09:08:21 +010049 this->m_mfccSlidingWindow = audio::SlidingWindow<const int16_t>(
50 static_cast<const int16_t*>(audioData), audioDataLen,
51 this->m_mfccWindowLen, this->m_mfccWindowStride);
alexander3c798932021-03-26 21:42:19 +000052
53 uint32_t mfccBufIdx = 0;
54
Isabella Gottardi56ee6202021-05-12 08:27:15 +010055 std::fill(m_mfccBuf.begin(), m_mfccBuf.end(), 0.f);
56 std::fill(m_delta1Buf.begin(), m_delta1Buf.end(), 0.f);
57 std::fill(m_delta2Buf.begin(), m_delta2Buf.end(), 0.f);
alexander3c798932021-03-26 21:42:19 +000058
Richard Burtonc2911442022-04-22 09:08:21 +010059 /* While we can slide over the audio. */
60 while (this->m_mfccSlidingWindow.HasNext()) {
61 const int16_t* mfccWindow = this->m_mfccSlidingWindow.Next();
alexander3c798932021-03-26 21:42:19 +000062 auto mfccAudioData = std::vector<int16_t>(
63 mfccWindow,
Richard Burtonc2911442022-04-22 09:08:21 +010064 mfccWindow + this->m_mfccWindowLen);
Isabella Gottardi56ee6202021-05-12 08:27:15 +010065 auto mfcc = this->m_mfcc.MfccCompute(mfccAudioData);
66 for (size_t i = 0; i < this->m_mfccBuf.size(0); ++i) {
67 this->m_mfccBuf(i, mfccBufIdx) = mfcc[i];
alexander3c798932021-03-26 21:42:19 +000068 }
69 ++mfccBufIdx;
70 }
71
72 /* Pad MFCC if needed by adding MFCC for zeros. */
Richard Burtonc2911442022-04-22 09:08:21 +010073 if (mfccBufIdx != this->m_numFeatureFrames) {
74 std::vector<int16_t> zerosWindow = std::vector<int16_t>(this->m_mfccWindowLen, 0);
Isabella Gottardi56ee6202021-05-12 08:27:15 +010075 std::vector<float> mfccZeros = this->m_mfcc.MfccCompute(zerosWindow);
alexander3c798932021-03-26 21:42:19 +000076
Richard Burtonc2911442022-04-22 09:08:21 +010077 while (mfccBufIdx != this->m_numFeatureFrames) {
Isabella Gottardi56ee6202021-05-12 08:27:15 +010078 memcpy(&this->m_mfccBuf(0, mfccBufIdx),
79 mfccZeros.data(), sizeof(float) * m_numMfccFeats);
alexander3c798932021-03-26 21:42:19 +000080 ++mfccBufIdx;
81 }
82 }
83
84 /* Compute first and second order deltas from MFCCs. */
Richard Burtonb40ecf82022-04-22 16:14:57 +010085 AsrPreProcess::ComputeDeltas(this->m_mfccBuf, this->m_delta1Buf, this->m_delta2Buf);
alexander3c798932021-03-26 21:42:19 +000086
Richard Burtonc2911442022-04-22 09:08:21 +010087 /* Standardize calculated features. */
88 this->Standarize();
alexander3c798932021-03-26 21:42:19 +000089
90 /* Quantise. */
Richard Burtonc2911442022-04-22 09:08:21 +010091 QuantParams quantParams = GetTensorQuantParams(this->m_inputTensor);
alexander3c798932021-03-26 21:42:19 +000092
93 if (0 == quantParams.scale) {
94 printf_err("Quantisation scale can't be 0\n");
95 return false;
96 }
97
Richard Burtonc2911442022-04-22 09:08:21 +010098 switch(this->m_inputTensor->type) {
alexander3c798932021-03-26 21:42:19 +000099 case kTfLiteUInt8:
alexanderc350cdc2021-04-29 20:36:09 +0100100 return this->Quantise<uint8_t>(
Richard Burtonc2911442022-04-22 09:08:21 +0100101 tflite::GetTensorData<uint8_t>(this->m_inputTensor), this->m_inputTensor->bytes,
alexander3c798932021-03-26 21:42:19 +0000102 quantParams.scale, quantParams.offset);
103 case kTfLiteInt8:
alexanderc350cdc2021-04-29 20:36:09 +0100104 return this->Quantise<int8_t>(
Richard Burtonc2911442022-04-22 09:08:21 +0100105 tflite::GetTensorData<int8_t>(this->m_inputTensor), this->m_inputTensor->bytes,
alexander3c798932021-03-26 21:42:19 +0000106 quantParams.scale, quantParams.offset);
107 default:
108 printf_err("Unsupported tensor type %s\n",
Richard Burtonc2911442022-04-22 09:08:21 +0100109 TfLiteTypeGetName(this->m_inputTensor->type));
alexander3c798932021-03-26 21:42:19 +0000110 }
111
112 return false;
113 }
114
Richard Burtonb40ecf82022-04-22 16:14:57 +0100115 bool AsrPreProcess::ComputeDeltas(Array2d<float>& mfcc,
116 Array2d<float>& delta1,
117 Array2d<float>& delta2)
alexander3c798932021-03-26 21:42:19 +0000118 {
119 const std::vector <float> delta1Coeffs =
120 {6.66666667e-02, 5.00000000e-02, 3.33333333e-02,
121 1.66666667e-02, -3.46944695e-18, -1.66666667e-02,
122 -3.33333333e-02, -5.00000000e-02, -6.66666667e-02};
123
124 const std::vector <float> delta2Coeffs =
125 {0.06060606, 0.01515152, -0.01731602,
126 -0.03679654, -0.04329004, -0.03679654,
127 -0.01731602, 0.01515152, 0.06060606};
128
129 if (delta1.size(0) == 0 || delta2.size(0) != delta1.size(0) ||
130 mfcc.size(0) == 0 || mfcc.size(1) == 0) {
131 return false;
132 }
133
134 /* Get the middle index; coeff vec len should always be odd. */
135 const size_t coeffLen = delta1Coeffs.size();
136 const size_t fMidIdx = (coeffLen - 1)/2;
137 const size_t numFeatures = mfcc.size(0);
138 const size_t numFeatVectors = mfcc.size(1);
139
140 /* Iterate through features in MFCC vector. */
141 for (size_t i = 0; i < numFeatures; ++i) {
142 /* For each feature, iterate through time (t) samples representing feature evolution and
143 * calculate d/dt and d^2/dt^2, using 1D convolution with differential kernels.
144 * Convolution padding = valid, result size is `time length - kernel length + 1`.
145 * The result is padded with 0 from both sides to match the size of initial time samples data.
146 *
147 * For the small filter, conv1D implementation as a simple loop is efficient enough.
148 * Filters of a greater size would need CMSIS-DSP functions to be used, like arm_fir_f32.
149 */
150
151 for (size_t j = fMidIdx; j < numFeatVectors - fMidIdx; ++j) {
152 float d1 = 0;
153 float d2 = 0;
154 const size_t mfccStIdx = j - fMidIdx;
155
156 for (size_t k = 0, m = coeffLen - 1; k < coeffLen; ++k, --m) {
157
158 d1 += mfcc(i,mfccStIdx + k) * delta1Coeffs[m];
159 d2 += mfcc(i,mfccStIdx + k) * delta2Coeffs[m];
160 }
161
162 delta1(i,j) = d1;
163 delta2(i,j) = d2;
164 }
165 }
166
167 return true;
168 }
169
Richard Burtonb40ecf82022-04-22 16:14:57 +0100170 void AsrPreProcess::StandardizeVecF32(Array2d<float>& vec)
alexander3c798932021-03-26 21:42:19 +0000171 {
Richard Burtonc2911442022-04-22 09:08:21 +0100172 auto mean = math::MathUtils::MeanF32(vec.begin(), vec.totalSize());
173 auto stddev = math::MathUtils::StdDevF32(vec.begin(), vec.totalSize(), mean);
alexander3c798932021-03-26 21:42:19 +0000174
175 debug("Mean: %f, Stddev: %f\n", mean, stddev);
176 if (stddev == 0) {
177 std::fill(vec.begin(), vec.end(), 0);
178 } else {
179 const float stddevInv = 1.f/stddev;
180 const float normalisedMean = mean/stddev;
181
182 auto NormalisingFunction = [=](float& value) {
183 value = value * stddevInv - normalisedMean;
184 };
185 std::for_each(vec.begin(), vec.end(), NormalisingFunction);
186 }
187 }
188
Richard Burtonb40ecf82022-04-22 16:14:57 +0100189 void AsrPreProcess::Standarize()
alexander3c798932021-03-26 21:42:19 +0000190 {
Richard Burtonb40ecf82022-04-22 16:14:57 +0100191 AsrPreProcess::StandardizeVecF32(this->m_mfccBuf);
192 AsrPreProcess::StandardizeVecF32(this->m_delta1Buf);
193 AsrPreProcess::StandardizeVecF32(this->m_delta2Buf);
alexander3c798932021-03-26 21:42:19 +0000194 }
195
Richard Burtonb40ecf82022-04-22 16:14:57 +0100196 float AsrPreProcess::GetQuantElem(
alexander3c798932021-03-26 21:42:19 +0000197 const float elem,
198 const float quantScale,
199 const int quantOffset,
200 const float minVal,
201 const float maxVal)
202 {
203 float val = std::round((elem/quantScale) + quantOffset);
204 return std::min<float>(std::max<float>(val, minVal), maxVal);
205 }
206
alexander3c798932021-03-26 21:42:19 +0000207} /* namespace app */
208} /* namespace arm */