| /* |
| * Copyright (c) 2021-2022 Arm Limited. All rights reserved. |
| * SPDX-License-Identifier: Apache-2.0 |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| #include "Wav2LetterPreprocess.hpp" |
| |
| #include "PlatformMath.hpp" |
| #include "TensorFlowLiteMicro.hpp" |
| |
| #include <algorithm> |
| #include <cmath> |
| |
| namespace arm { |
| namespace app { |
| |
| AsrPreProcess::AsrPreProcess(TfLiteTensor* inputTensor, const uint32_t numMfccFeatures, |
| const uint32_t numFeatureFrames, const uint32_t mfccWindowLen, |
| const uint32_t mfccWindowStride |
| ): |
| m_mfcc(numMfccFeatures, mfccWindowLen), |
| m_inputTensor(inputTensor), |
| m_mfccBuf(numMfccFeatures, numFeatureFrames), |
| m_delta1Buf(numMfccFeatures, numFeatureFrames), |
| m_delta2Buf(numMfccFeatures, numFeatureFrames), |
| m_mfccWindowLen(mfccWindowLen), |
| m_mfccWindowStride(mfccWindowStride), |
| m_numMfccFeats(numMfccFeatures), |
| m_numFeatureFrames(numFeatureFrames) |
| { |
| if (numMfccFeatures > 0 && mfccWindowLen > 0) { |
| this->m_mfcc.Init(); |
| } |
| } |
| |
| bool AsrPreProcess::DoPreProcess(const void* audioData, const size_t audioDataLen) |
| { |
| this->m_mfccSlidingWindow = audio::SlidingWindow<const int16_t>( |
| static_cast<const int16_t*>(audioData), audioDataLen, |
| this->m_mfccWindowLen, this->m_mfccWindowStride); |
| |
| uint32_t mfccBufIdx = 0; |
| |
| std::fill(m_mfccBuf.begin(), m_mfccBuf.end(), 0.f); |
| std::fill(m_delta1Buf.begin(), m_delta1Buf.end(), 0.f); |
| std::fill(m_delta2Buf.begin(), m_delta2Buf.end(), 0.f); |
| |
| /* While we can slide over the audio. */ |
| while (this->m_mfccSlidingWindow.HasNext()) { |
| const int16_t* mfccWindow = this->m_mfccSlidingWindow.Next(); |
| auto mfccAudioData = std::vector<int16_t>( |
| mfccWindow, |
| mfccWindow + this->m_mfccWindowLen); |
| auto mfcc = this->m_mfcc.MfccCompute(mfccAudioData); |
| for (size_t i = 0; i < this->m_mfccBuf.size(0); ++i) { |
| this->m_mfccBuf(i, mfccBufIdx) = mfcc[i]; |
| } |
| ++mfccBufIdx; |
| } |
| |
| /* Pad MFCC if needed by adding MFCC for zeros. */ |
| if (mfccBufIdx != this->m_numFeatureFrames) { |
| std::vector<int16_t> zerosWindow = std::vector<int16_t>(this->m_mfccWindowLen, 0); |
| std::vector<float> mfccZeros = this->m_mfcc.MfccCompute(zerosWindow); |
| |
| while (mfccBufIdx != this->m_numFeatureFrames) { |
| memcpy(&this->m_mfccBuf(0, mfccBufIdx), |
| mfccZeros.data(), sizeof(float) * m_numMfccFeats); |
| ++mfccBufIdx; |
| } |
| } |
| |
| /* Compute first and second order deltas from MFCCs. */ |
| AsrPreProcess::ComputeDeltas(this->m_mfccBuf, this->m_delta1Buf, this->m_delta2Buf); |
| |
| /* Standardize calculated features. */ |
| this->Standarize(); |
| |
| /* Quantise. */ |
| QuantParams quantParams = GetTensorQuantParams(this->m_inputTensor); |
| |
| if (0 == quantParams.scale) { |
| printf_err("Quantisation scale can't be 0\n"); |
| return false; |
| } |
| |
| switch(this->m_inputTensor->type) { |
| case kTfLiteUInt8: |
| return this->Quantise<uint8_t>( |
| tflite::GetTensorData<uint8_t>(this->m_inputTensor), this->m_inputTensor->bytes, |
| quantParams.scale, quantParams.offset); |
| case kTfLiteInt8: |
| return this->Quantise<int8_t>( |
| tflite::GetTensorData<int8_t>(this->m_inputTensor), this->m_inputTensor->bytes, |
| quantParams.scale, quantParams.offset); |
| default: |
| printf_err("Unsupported tensor type %s\n", |
| TfLiteTypeGetName(this->m_inputTensor->type)); |
| } |
| |
| return false; |
| } |
| |
| bool AsrPreProcess::ComputeDeltas(Array2d<float>& mfcc, |
| Array2d<float>& delta1, |
| Array2d<float>& delta2) |
| { |
| const std::vector <float> delta1Coeffs = |
| {6.66666667e-02, 5.00000000e-02, 3.33333333e-02, |
| 1.66666667e-02, -3.46944695e-18, -1.66666667e-02, |
| -3.33333333e-02, -5.00000000e-02, -6.66666667e-02}; |
| |
| const std::vector <float> delta2Coeffs = |
| {0.06060606, 0.01515152, -0.01731602, |
| -0.03679654, -0.04329004, -0.03679654, |
| -0.01731602, 0.01515152, 0.06060606}; |
| |
| if (delta1.size(0) == 0 || delta2.size(0) != delta1.size(0) || |
| mfcc.size(0) == 0 || mfcc.size(1) == 0) { |
| return false; |
| } |
| |
| /* Get the middle index; coeff vec len should always be odd. */ |
| const size_t coeffLen = delta1Coeffs.size(); |
| const size_t fMidIdx = (coeffLen - 1)/2; |
| const size_t numFeatures = mfcc.size(0); |
| const size_t numFeatVectors = mfcc.size(1); |
| |
| /* Iterate through features in MFCC vector. */ |
| for (size_t i = 0; i < numFeatures; ++i) { |
| /* For each feature, iterate through time (t) samples representing feature evolution and |
| * calculate d/dt and d^2/dt^2, using 1D convolution with differential kernels. |
| * Convolution padding = valid, result size is `time length - kernel length + 1`. |
| * The result is padded with 0 from both sides to match the size of initial time samples data. |
| * |
| * For the small filter, conv1D implementation as a simple loop is efficient enough. |
| * Filters of a greater size would need CMSIS-DSP functions to be used, like arm_fir_f32. |
| */ |
| |
| for (size_t j = fMidIdx; j < numFeatVectors - fMidIdx; ++j) { |
| float d1 = 0; |
| float d2 = 0; |
| const size_t mfccStIdx = j - fMidIdx; |
| |
| for (size_t k = 0, m = coeffLen - 1; k < coeffLen; ++k, --m) { |
| |
| d1 += mfcc(i,mfccStIdx + k) * delta1Coeffs[m]; |
| d2 += mfcc(i,mfccStIdx + k) * delta2Coeffs[m]; |
| } |
| |
| delta1(i,j) = d1; |
| delta2(i,j) = d2; |
| } |
| } |
| |
| return true; |
| } |
| |
| void AsrPreProcess::StandardizeVecF32(Array2d<float>& vec) |
| { |
| auto mean = math::MathUtils::MeanF32(vec.begin(), vec.totalSize()); |
| auto stddev = math::MathUtils::StdDevF32(vec.begin(), vec.totalSize(), mean); |
| |
| debug("Mean: %f, Stddev: %f\n", mean, stddev); |
| if (stddev == 0) { |
| std::fill(vec.begin(), vec.end(), 0); |
| } else { |
| const float stddevInv = 1.f/stddev; |
| const float normalisedMean = mean/stddev; |
| |
| auto NormalisingFunction = [=](float& value) { |
| value = value * stddevInv - normalisedMean; |
| }; |
| std::for_each(vec.begin(), vec.end(), NormalisingFunction); |
| } |
| } |
| |
| void AsrPreProcess::Standarize() |
| { |
| AsrPreProcess::StandardizeVecF32(this->m_mfccBuf); |
| AsrPreProcess::StandardizeVecF32(this->m_delta1Buf); |
| AsrPreProcess::StandardizeVecF32(this->m_delta2Buf); |
| } |
| |
| float AsrPreProcess::GetQuantElem( |
| const float elem, |
| const float quantScale, |
| const int quantOffset, |
| const float minVal, |
| const float maxVal) |
| { |
| float val = std::round((elem/quantScale) + quantOffset); |
| return std::min<float>(std::max<float>(val, minVal), maxVal); |
| } |
| |
| } /* namespace app */ |
| } /* namespace arm */ |