Blame - samples/SpeechRecognition/src/Wav2LetterPreprocessor.cpp - ml/armnn

blob: 9329d5e4d5fff2cf91053d3ca956e9ad5938a814 [file] [log] [blame]

George Gekov	23c2627	2021-08-16 11:32:10 +0100	[diff] [blame]	1	//
				2	// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5	#include "MathUtils.hpp"
				6	#include <cstring>
				7	#include <cmath>
				8	#include <numeric>
				9	#include <algorithm>
				10	#include <memory>
				11	#include "Wav2LetterPreprocessor.hpp"
				12	#include "Wav2LetterMFCC.hpp"
				13
				14	float Wav2LetterPreprocessor::GetMean(Array2d<float>& vec)
				15	{
				16	return MathUtils::MeanF32(vec.begin(), vec.totalSize());
				17	}
				18
				19	float Wav2LetterPreprocessor::GetStdDev(Array2d<float>& vec, const float mean)
				20	{
				21	return MathUtils::StdDevF32(vec.begin(), vec.totalSize(), mean);
				22	}
				23
				24	void Wav2LetterPreprocessor::NormaliseVec(Array2d<float>& vec)
				25	{
				26	auto mean = Wav2LetterPreprocessor::GetMean(vec);
				27	auto stddev = Wav2LetterPreprocessor::GetStdDev(vec, mean);
				28
				29	if (stddev == 0)
				30	{
				31	std::fill(vec.begin(), vec.end(), 0);
				32	}
				33	else
				34	{
				35	const float stddevInv = 1.f/stddev;
				36	const float normalisedMean = mean/stddev;
				37
				38	auto NormalisingFunction = [=](float &value) {
				39	value = value * stddevInv - normalisedMean;
				40	};
				41	std::for_each(vec.begin(), vec.end(), NormalisingFunction);
				42	}
				43	}
				44
				45	void Wav2LetterPreprocessor::Normalise()
				46	{
				47	Wav2LetterPreprocessor::NormaliseVec(this->m_mfccBuf);
				48	Wav2LetterPreprocessor::NormaliseVec(this->m_delta1Buf);
				49	Wav2LetterPreprocessor::NormaliseVec(this->m_delta2Buf);
				50	}
				51
				52	float Wav2LetterPreprocessor::GetQuantElem(
				53	const float elem,
				54	const float quantScale,
				55	const int quantOffset,
				56	const float minVal,
				57	const float maxVal)
				58	{
				59	float val = std::round((elem/quantScale) + quantOffset);
				60	float returnVal = std::min<float>(std::max<float>(val, minVal), maxVal);
				61	return returnVal;
				62	}
				63
				64	bool Wav2LetterPreprocessor::Invoke(const float* audioData, const uint32_t audioDataLen, std::vector<int8_t>& output,
				65	int quantOffset, float quantScale)
				66	{
				67	this->m_window = SlidingWindow<const float>(
				68	audioData, audioDataLen,
				69	this->m_windowLen, this->m_windowStride);
				70
				71	uint32_t mfccBufIdx = 0;
				72
				73	// Init buffers with 0
				74	std::fill(m_mfccBuf.begin(), m_mfccBuf.end(), 0.f);
				75	std::fill(m_delta1Buf.begin(), m_delta1Buf.end(), 0.f);
				76	std::fill(m_delta2Buf.begin(), m_delta2Buf.end(), 0.f);
				77
				78	// While we can slide over the window
				79	while (this->m_window.HasNext())
				80	{
				81	const float* mfccWindow = this->m_window.Next();
				82	auto mfccAudioData = std::vector<float>(
				83	mfccWindow,
				84	mfccWindow + this->m_windowLen);
				85
				86	auto mfcc = this->m_mfcc->MfccCompute(mfccAudioData);
				87	for (size_t i = 0; i < this->m_mfccBuf.size(0); ++i)
				88	{
				89	this->m_mfccBuf(i, mfccBufIdx) = mfcc[i];
				90	}
				91	++mfccBufIdx;
				92	}
				93
				94	// Pad MFCC if needed by repeating last feature vector
				95	while (mfccBufIdx != this->m_mfcc->m_params.m_numMfccVectors)
				96	{
				97	memcpy(&this->m_mfccBuf(0, mfccBufIdx),
				98	&this->m_mfccBuf(0, mfccBufIdx - 1), sizeof(float) * this->m_mfcc->m_params.m_numMfccFeatures);
				99	++mfccBufIdx;
				100	}
				101
				102	// Compute first and second order deltas from MFCCs
				103	Wav2LetterPreprocessor::ComputeDeltas(this->m_mfccBuf,
				104	this->m_delta1Buf,
				105	this->m_delta2Buf);
				106
				107	// Normalise
				108	this->Normalise();
				109
				110	return this->Quantise<int8_t>(output.data(), quantOffset, quantScale);
				111	}
				112
				113	bool Wav2LetterPreprocessor::ComputeDeltas(Array2d<float>& mfcc,
				114	Array2d<float>& delta1,
				115	Array2d<float>& delta2)
				116	{
				117	const std::vector <float> delta1Coeffs =
				118	{6.66666667e-02, 5.00000000e-02, 3.33333333e-02,
				119	1.66666667e-02, -3.46944695e-18, -1.66666667e-02,
				120	-3.33333333e-02, -5.00000000e-02, -6.66666667e-02};
				121
				122	const std::vector <float> delta2Coeffs =
				123	{0.06060606, 0.01515152, -0.01731602,
				124	-0.03679654, -0.04329004, -0.03679654,
				125	-0.01731602, 0.01515152, 0.06060606};
				126
				127	if (delta1.size(0) == 0 \|\| delta2.size(0) != delta1.size(0) \|\|
				128	mfcc.size(0) == 0 \|\| mfcc.size(1) == 0)
				129	{
				130	return false;
				131	}
				132
				133	// Get the middle index; coeff vec len should always be odd
				134	const size_t coeffLen = delta1Coeffs.size();
				135	const size_t fMidIdx = (coeffLen - 1)/2;
				136	const size_t numFeatures = mfcc.size(0);
				137	const size_t numFeatVectors = mfcc.size(1);
				138
				139	// iterate through features in MFCC vector
				140	for (size_t i = 0; i < numFeatures; ++i)
				141	{
				142	/* for each feature, iterate through time (t) samples representing feature evolution and
				143	* calculate d/dt and d^2/dt^2, using 1d convolution with differential kernels.
				144	* Convolution padding = valid, result size is `time length - kernel length + 1`.
				145	* The result is padded with 0 from both sides to match the size of initial time samples data.
				146	*
				147	* For the small filter, conv1d implementation as a simple loop is efficient enough.
				148	* Filters of a greater size would need CMSIS-DSP functions to be used, like arm_fir_f32.
				149	*/
				150
				151	for (size_t j = fMidIdx; j < numFeatVectors - fMidIdx; ++j)
				152	{
				153	float d1 = 0;
				154	float d2 = 0;
				155	const size_t mfccStIdx = j - fMidIdx;
				156
				157	for (size_t k = 0, m = coeffLen - 1; k < coeffLen; ++k, --m)
				158	{
				159
				160	d1 += mfcc(i,mfccStIdx + k) * delta1Coeffs[m];
				161	d2 += mfcc(i,mfccStIdx + k) * delta2Coeffs[m];
				162	}
				163
				164	delta1(i,j) = d1;
				165	delta2(i,j) = d2;
				166	}
				167	}
				168
				169	return true;
				170	}
				171
				172	Wav2LetterPreprocessor::Wav2LetterPreprocessor(const uint32_t windowLen,
				173	const uint32_t windowStride,
				174	std::unique_ptr<Wav2LetterMFCC> mfccInst):
				175	m_mfcc(std::move(mfccInst)),
				176	m_mfccBuf(m_mfcc->m_params.m_numMfccFeatures, m_mfcc->m_params.m_numMfccVectors),
				177	m_delta1Buf(m_mfcc->m_params.m_numMfccFeatures, m_mfcc->m_params.m_numMfccVectors),
				178	m_delta2Buf(m_mfcc->m_params.m_numMfccFeatures, m_mfcc->m_params.m_numMfccVectors),
				179	m_windowLen(windowLen),
				180	m_windowStride(windowStride)
				181	{
				182	if (m_mfcc->m_params.m_numMfccFeatures > 0 && windowLen > 0)
				183	{
				184	this->m_mfcc->Init();
				185	}
				186	std::fill(m_mfccBuf.begin(), m_mfccBuf.end(), 0.f);
				187	}