Blame - source/use_case/kws_asr/src/Wav2LetterPreprocess.cc - ml/ethos-u/ml-embedded-evaluation-kit

blob: 613ddb0c89d0a7264628665cc1234f0354b7703a [file] [log] [blame]

alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	1	/*
				2	* Copyright (c) 2021 Arm Limited. All rights reserved.
				3	* SPDX-License-Identifier: Apache-2.0
				4	*
				5	* Licensed under the Apache License, Version 2.0 (the "License");
				6	* you may not use this file except in compliance with the License.
				7	* You may obtain a copy of the License at
				8	*
				9	* http://www.apache.org/licenses/LICENSE-2.0
				10	*
				11	* Unless required by applicable law or agreed to in writing, software
				12	* distributed under the License is distributed on an "AS IS" BASIS,
				13	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	* See the License for the specific language governing permissions and
				15	* limitations under the License.
				16	*/
				17	#include "Wav2LetterPreprocess.hpp"
				18
				19	#include "PlatformMath.hpp"
				20	#include "TensorFlowLiteMicro.hpp"
				21
				22	#include <algorithm>
				23	#include <math.h>
				24
				25	namespace arm {
				26	namespace app {
				27	namespace audio {
				28	namespace asr {
				29
				30	Preprocess::Preprocess(
				31	const uint32_t numMfccFeatures,
				32	const uint32_t windowLen,
				33	const uint32_t windowStride,
				34	const uint32_t numMfccVectors):
				35	_m_mfcc(numMfccFeatures, windowLen),
				36	_m_mfccBuf(numMfccFeatures, numMfccVectors),
				37	_m_delta1Buf(numMfccFeatures, numMfccVectors),
				38	_m_delta2Buf(numMfccFeatures, numMfccVectors),
				39	_m_windowLen(windowLen),
				40	_m_windowStride(windowStride),
				41	_m_numMfccFeats(numMfccFeatures),
				42	_m_numFeatVectors(numMfccVectors),
				43	_m_window()
				44	{
				45	if (numMfccFeatures > 0 && windowLen > 0) {
				46	this->_m_mfcc.Init();
				47	}
				48	}
				49
				50	bool Preprocess::Invoke(
				51	const int16_t* audioData,
				52	const uint32_t audioDataLen,
				53	TfLiteTensor* tensor)
				54	{
				55	this->_m_window = SlidingWindow<const int16_t>(
				56	audioData, audioDataLen,
				57	this->_m_windowLen, this->_m_windowStride);
				58
				59	uint32_t mfccBufIdx = 0;
				60
				61	std::fill(_m_mfccBuf.begin(), _m_mfccBuf.end(), 0.f);
				62	std::fill(_m_delta1Buf.begin(), _m_delta1Buf.end(), 0.f);
				63	std::fill(_m_delta2Buf.begin(), _m_delta2Buf.end(), 0.f);
				64
				65	/* While we can slide over the window. */
				66	while (this->_m_window.HasNext()) {
				67	const int16_t* mfccWindow = this->_m_window.Next();
				68	auto mfccAudioData = std::vector<int16_t>(
				69	mfccWindow,
				70	mfccWindow + this->_m_windowLen);
				71	auto mfcc = this->_m_mfcc.MfccCompute(mfccAudioData);
				72	for (size_t i = 0; i < this->_m_mfccBuf.size(0); ++i) {
				73	this->_m_mfccBuf(i, mfccBufIdx) = mfcc[i];
				74	}
				75	++mfccBufIdx;
				76	}
				77
				78	/* Pad MFCC if needed by adding MFCC for zeros. */
				79	if (mfccBufIdx != this->_m_numFeatVectors) {
				80	std::vector<int16_t> zerosWindow = std::vector<int16_t>(this->_m_windowLen, 0);
				81	std::vector<float> mfccZeros = this->_m_mfcc.MfccCompute(zerosWindow);
				82
				83	while (mfccBufIdx != this->_m_numFeatVectors) {
				84	memcpy(&this->_m_mfccBuf(0, mfccBufIdx),
				85	mfccZeros.data(), sizeof(float) * _m_numMfccFeats);
				86	++mfccBufIdx;
				87	}
				88	}
				89
				90	/* Compute first and second order deltas from MFCCs. */
				91	this->_ComputeDeltas(this->_m_mfccBuf,
				92	this->_m_delta1Buf,
				93	this->_m_delta2Buf);
				94
				95	/* Normalise. */
				96	this->_Normalise();
				97
				98	/* Quantise. */
				99	QuantParams quantParams = GetTensorQuantParams(tensor);
				100
				101	if (0 == quantParams.scale) {
				102	printf_err("Quantisation scale can't be 0\n");
				103	return false;
				104	}
				105
				106	switch(tensor->type) {
				107	case kTfLiteUInt8:
				108	return this->_Quantise<uint8_t>(
				109	tflite::GetTensorData<uint8_t>(tensor), tensor->bytes,
				110	quantParams.scale, quantParams.offset);
				111	case kTfLiteInt8:
				112	return this->_Quantise<int8_t>(
				113	tflite::GetTensorData<int8_t>(tensor), tensor->bytes,
				114	quantParams.scale, quantParams.offset);
				115	default:
				116	printf_err("Unsupported tensor type %s\n",
				117	TfLiteTypeGetName(tensor->type));
				118	}
				119
				120	return false;
				121	}
				122
				123	bool Preprocess::_ComputeDeltas(Array2d<float>& mfcc,
				124	Array2d<float>& delta1,
				125	Array2d<float>& delta2)
				126	{
				127	const std::vector <float> delta1Coeffs =
				128	{6.66666667e-02, 5.00000000e-02, 3.33333333e-02,
				129	1.66666667e-02, -3.46944695e-18, -1.66666667e-02,
				130	-3.33333333e-02, -5.00000000e-02, -6.66666667e-02};
				131
				132	const std::vector <float> delta2Coeffs =
				133	{0.06060606, 0.01515152, -0.01731602,
				134	-0.03679654, -0.04329004, -0.03679654,
				135	-0.01731602, 0.01515152, 0.06060606};
				136
				137	if (delta1.size(0) == 0 \|\| delta2.size(0) != delta1.size(0) \|\|
				138	mfcc.size(0) == 0 \|\| mfcc.size(1) == 0) {
				139	return false;
				140	}
				141
				142	/* Get the middle index; coeff vec len should always be odd. */
				143	const size_t coeffLen = delta1Coeffs.size();
				144	const size_t fMidIdx = (coeffLen - 1)/2;
				145	const size_t numFeatures = mfcc.size(0);
				146	const size_t numFeatVectors = mfcc.size(1);
				147
				148	/* Iterate through features in MFCC vector. */
				149	for (size_t i = 0; i < numFeatures; ++i) {
				150	/* For each feature, iterate through time (t) samples representing feature evolution and
				151	* calculate d/dt and d^2/dt^2, using 1d convolution with differential kernels.
				152	* Convolution padding = valid, result size is `time length - kernel length + 1`.
				153	* The result is padded with 0 from both sides to match the size of initial time samples data.
				154	*
				155	* For the small filter, conv1d implementation as a simple loop is efficient enough.
				156	* Filters of a greater size would need CMSIS-DSP functions to be used, like arm_fir_f32.
				157	*/
				158
				159	for (size_t j = fMidIdx; j < numFeatVectors - fMidIdx; ++j) {
				160	float d1 = 0;
				161	float d2 = 0;
				162	const size_t mfccStIdx = j - fMidIdx;
				163
				164	for (size_t k = 0, m = coeffLen - 1; k < coeffLen; ++k, --m) {
				165
				166	d1 += mfcc(i,mfccStIdx + k) * delta1Coeffs[m];
				167	d2 += mfcc(i,mfccStIdx + k) * delta2Coeffs[m];
				168	}
				169
				170	delta1(i,j) = d1;
				171	delta2(i,j) = d2;
				172	}
				173	}
				174
				175	return true;
				176	}
				177
				178	float Preprocess::_GetMean(Array2d<float>& vec)
				179	{
				180	return math::MathUtils::MeanF32(vec.begin(), vec.totalSize());
				181	}
				182
				183	float Preprocess::_GetStdDev(Array2d<float>& vec, const float mean)
				184	{
				185	return math::MathUtils::StdDevF32(vec.begin(), vec.totalSize(), mean);
				186	}
				187
				188	void Preprocess::_NormaliseVec(Array2d<float>& vec)
				189	{
				190	auto mean = Preprocess::_GetMean(vec);
				191	auto stddev = Preprocess::_GetStdDev(vec, mean);
				192
				193	debug("Mean: %f, Stddev: %f\n", mean, stddev);
				194	if (stddev == 0) {
				195	std::fill(vec.begin(), vec.end(), 0);
				196	} else {
				197	const float stddevInv = 1.f/stddev;
				198	const float normalisedMean = mean/stddev;
				199
				200	auto NormalisingFunction = [=](float& value) {
				201	value = value * stddevInv - normalisedMean;
				202	};
				203	std::for_each(vec.begin(), vec.end(), NormalisingFunction);
				204	}
				205	}
				206
				207	void Preprocess::_Normalise()
				208	{
				209	Preprocess::_NormaliseVec(this->_m_mfccBuf);
				210	Preprocess::_NormaliseVec(this->_m_delta1Buf);
				211	Preprocess::_NormaliseVec(this->_m_delta2Buf);
				212	}
				213
				214	float Preprocess::_GetQuantElem(
				215	const float elem,
				216	const float quantScale,
				217	const int quantOffset,
				218	const float minVal,
				219	const float maxVal)
				220	{
				221	float val = std::round((elem/quantScale) + quantOffset);
				222	return std::min<float>(std::max<float>(val, minVal), maxVal);
				223	}
				224
				225	} /* namespace asr */
				226	} /* namespace audio */
				227	} /* namespace app */
				228	} /* namespace arm */