Blame - samples/SpeechRecognition/include/Preprocess.hpp - ml/armnn

blob: 80c568439bf5a598bd171ceb987a5c08a790ce6a [file] [log] [blame]

Éanna Ó Catháin	c6ab02a	2021-04-07 14:35:25 +0100	[diff] [blame^]	1	//
				2	// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5
				6	#pragma once
				7
				8	#include "DataStructures.hpp"
				9	#include "SlidingWindow.hpp"
				10	#include <numeric>
				11	#include "MFCC.hpp"
				12
				13	/* Class to facilitate pre-processing calculation for Wav2Letter model
				14	* for ASR */
				15	using AudioWindow = SlidingWindow <const float>;
				16
				17	class Preprocess
				18	{
				19	public:
				20
				21	MFCC _m_mfcc; /* MFCC instance */
				22
				23	/* Actual buffers to be populated */
				24	Array2d<float> _m_mfccBuf; /* Contiguous buffer 1D: MFCC */
				25	Array2d<float> _m_delta1Buf; /* Contiguous buffer 1D: Delta 1 */
				26	Array2d<float> _m_delta2Buf; /* Contiguous buffer 1D: Delta 2 */
				27
				28	uint32_t _m_windowLen; /* Window length for MFCC */
				29	uint32_t _m_windowStride; /* Window stride len for MFCC */
				30	AudioWindow _m_window; /* Sliding window */
				31
				32	/**
				33	* @brief Constructor
				34	* @param[in] numMfccFeatures number of MFCC features per window
				35	* @param[in] windowLen number of elements in a window
				36	* @param[in] windowStride stride (in number of elements) for
				37	* moving the window
				38	* @param[in] numMfccVectors number of MFCC vectors per window
				39	*/
				40	Preprocess(
				41	const uint32_t windowLen,
				42	const uint32_t windowStride,
				43	const MFCC mfccInst);
				44	Preprocess() = delete;
				45	~Preprocess();
				46
				47	/**
				48	* @brief Calculates the features required from audio data. This
				49	* includes MFCC, first and second order deltas,
				50	* normalisation and finally, quantisation. The tensor is
				51	* populated with feature from a given window placed along
				52	* in a single row.
				53	* @param[in] audioData pointer to the first element of audio data
				54	* @param[in] audioDataLen number of elements in the audio data
				55	* @param[in] tensor tensor to be populated
				56	* @return true if successful, false in case of error.
				57	*/
				58	bool Invoke(const float* audioData,
				59	const uint32_t audioDataLen,
				60	std::vector<int8_t>& output,
				61	int quantOffset,
				62	float quantScale);
				63
				64
				65	protected:
				66	/**
				67	* @brief Computes the first and second order deltas for the
				68	* MFCC buffers - they are assumed to be populated.
				69	*
				70	* @param[in] mfcc MFCC buffers
				71	* @param[out] delta1 result of the first diff computation
				72	* @param[out] delta2 result of the second diff computation
				73	*
				74	* @return true if successful, false otherwise
				75	*/
				76	static bool _ComputeDeltas(Array2d<float>& mfcc,
				77	Array2d<float>& delta1,
				78	Array2d<float>& delta2);
				79
				80	/**
				81	* @brief Given a 2D vector of floats, computes the mean
				82	* @param[in] vec vector of vector of floats
				83	* @return mean value
				84	*/
				85	static float _GetMean(Array2d<float>& vec);
				86
				87	/**
				88	* @brief Given a 2D vector of floats, computes the stddev
				89	* @param[in] vec vector of vector of floats
				90	* @param[in] mean mean value of the vector passed in
				91	* @return stddev value
				92	*/
				93	static float _GetStdDev(Array2d<float>& vec,
				94	const float mean);
				95
				96	/**
				97	* @brief Given a 2D vector of floats, normalises it using
				98	* the mean and the stddev
				99	* @param[in/out] vec vector of vector of floats
				100	* @return
				101	*/
				102	static void _NormaliseVec(Array2d<float>& vec);
				103
				104	/**
				105	* @brief Normalises the MFCC and delta buffers
				106	* @return
				107	*/
				108	void _Normalise();
				109
				110	/**
				111	* @brief Given the quantisation and data type limits, computes
				112	* the quantised values of a floating point input data.
				113	* @param[in] elem Element to be quantised
				114	* @param[in] quantScale Scale
				115	* @param[in] quantOffset Offset
				116	* @param[in] minVal Numerical limit - minimum
				117	* @param[in] maxVal Numerical limit - maximum
				118	* @return floating point quantised value
				119	*/
				120	static float _GetQuantElem(
				121	const float elem,
				122	const float quantScale,
				123	const int quantOffset,
				124	const float minVal,
				125	const float maxVal);
				126
				127	/**
				128	* @brief Quantises the MFCC and delta buffers, and places them
				129	* in the output buffer. While doing so, it transposes
				130	* the data. Reason: Buffers in this class are arranged
				131	* for "time" axis to be row major. Primary reason for
				132	* this being the convolution speed up (as we can use
				133	* contiguous memory). The output, however, requires the
				134	* time axis to be in column major arrangement.
				135	* @param[in] outputBuf pointer to the output buffer
				136	* @param[in] outputBufSz output buffer's size
				137	* @param[in] quantScale quantisation scale
				138	* @param[in] quantOffset quantisation offset
				139	*/
				140	template <typename T>
				141	bool _Quantise(T* outputBuf, int quantOffset, float quantScale)
				142	{
				143	/* Populate */
				144	T* outputBufMfcc = outputBuf;
				145	T* outputBufD1 = outputBuf + this->_m_mfcc._m_params.m_numMfccFeatures;
				146	T* outputBufD2 = outputBufD1 + this->_m_mfcc._m_params.m_numMfccFeatures;
				147	const uint32_t ptrIncr = this->_m_mfcc._m_params.m_numMfccFeatures * 2; /* (3 vectors - 1 vector) */
				148
				149	const float minVal = std::numeric_limits<T>::min();
				150	const float maxVal = std::numeric_limits<T>::max();
				151
				152	/* We need to do a transpose while copying and concatenating
				153	* the tensor*/
				154	for (uint32_t j = 0; j < this->_m_mfcc._m_params.m_numMfccVectors; ++j) {
				155	for (uint32_t i = 0; i < this->_m_mfcc._m_params.m_numMfccFeatures; ++i)
				156	{
				157	*outputBufMfcc++ = static_cast<T>(this->_GetQuantElem(
				158	this->_m_mfccBuf(i, j), quantScale,
				159	quantOffset, minVal, maxVal));
				160	*outputBufD1++ = static_cast<T>(this->_GetQuantElem(
				161	this->_m_delta1Buf(i, j), quantScale,
				162	quantOffset, minVal, maxVal));
				163	*outputBufD2++ = static_cast<T>(this->_GetQuantElem(
				164	this->_m_delta2Buf(i, j), quantScale,
				165	quantOffset, minVal, maxVal));
				166	}
				167	outputBufMfcc += ptrIncr;
				168	outputBufD1 += ptrIncr;
				169	outputBufD2 += ptrIncr;
				170	}
				171
				172	return true;
				173	}
				174	};
				175