Blame - samples/SpeechRecognition/include/MFCC.hpp - ml/armnn

blob: 14b6d9fe79296fe7bde06039cee8ac3b8d53a043 [file] [log] [blame]

Éanna Ó Catháin	c6ab02a	2021-04-07 14:35:25 +0100	[diff] [blame]	1	//
				2	// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5
				6	#pragma once
				7
				8	#include <vector>
				9	#include <cstdint>
				10	#include <cmath>
				11	#include <limits>
				12	#include <string>
				13
				14	/* MFCC's consolidated parameters */
				15	class MfccParams
				16	{
				17	public:
				18	float m_samplingFreq;
				19	int m_numFbankBins;
				20	float m_melLoFreq;
				21	float m_melHiFreq;
				22	int m_numMfccFeatures;
				23	int m_frameLen;
				24	int m_frameLenPadded;
				25	bool m_useHtkMethod;
				26	int m_numMfccVectors;
				27
				28	/** @brief Constructor */
				29	MfccParams(const float samplingFreq, const int numFbankBins,
				30	const float melLoFreq, const float melHiFreq,
				31	const int numMfccFeats, const int frameLen,
				32	const bool useHtkMethod, const int numMfccVectors);
				33
				34	/* Delete the default constructor */
				35	MfccParams() = delete;
				36
				37	/* Default destructor */
				38	~MfccParams() = default;
				39
				40	/** @brief String representation of parameters */
				41	std::string Str();
				42	};
				43
				44	/**
				45	* @brief Class for MFCC feature extraction.
				46	* Based on https://github.com/ARM-software/ML-KWS-for-MCU/blob/master/Deployment/Source/MFCC/mfcc.cpp
				47	* This class is designed to be generic and self-sufficient but
				48	* certain calculation routines can be overridden to accommodate
				49	* use-case specific requirements.
				50	*/
				51	class MFCC
				52	{
				53
				54	public:
				55
				56	/**
				57	* @brief Extract MFCC features for one single small frame of
				58	* audio data e.g. 640 samples.
				59	* @param[in] audioData - Vector of audio samples to calculate
				60	* features for.
				61	* @return Vector of extracted MFCC features.
				62	**/
				63	std::vector<float> MfccCompute(const std::vector<float>& audioData);
				64
				65	MfccParams _m_params;
				66
				67	/**
				68	* @brief Constructor
				69	* @param[in] params - MFCC parameters
				70	*/
				71	MFCC(const MfccParams& params);
				72
				73	/* Delete the default constructor */
				74	MFCC() = delete;
				75
				76	/** @brief Default destructor */
				77	~MFCC() = default;
				78
				79	/** @brief Initialise */
				80	void Init();
				81
				82	/**
				83	* @brief Extract MFCC features and quantise for one single small
				84	* frame of audio data e.g. 640 samples.
				85	* @param[in] audioData - Vector of audio samples to calculate
				86	* features for.
				87	* @param[in] quantScale - quantisation scale.
				88	* @param[in] quantOffset - quantisation offset
				89	* @return Vector of extracted quantised MFCC features.
				90	**/
				91	template<typename T>
				92	std::vector<T> MfccComputeQuant(const std::vector<float>& audioData,
				93	const float quantScale,
				94	const int quantOffset)
				95	{
				96	this->_MfccComputePreFeature(audioData);
				97	float minVal = std::numeric_limits<T>::min();
				98	float maxVal = std::numeric_limits<T>::max();
				99
				100	std::vector<T> mfccOut(this->_m_params.m_numMfccFeatures);
				101	const size_t numFbankBins = this->_m_params.m_numFbankBins;
				102
				103	/* Take DCT. Uses matrix mul. */
				104	for (size_t i = 0, j = 0; i < mfccOut.size(); ++i, j += numFbankBins)
				105	{
				106	float sum = 0;
				107	for (size_t k = 0; k < numFbankBins; ++k)
				108	{
				109	sum += this->_m_dctMatrix[j + k] * this->_m_melEnergies[k];
				110	}
				111	/* Quantize to T. */
				112	sum = std::round((sum / quantScale) + quantOffset);
				113	mfccOut[i] = static_cast<T>(std::min<float>(std::max<float>(sum, minVal), maxVal));
				114	}
				115
				116	return mfccOut;
				117	}
				118
				119	/* Constants */
				120	static constexpr float logStep = 1.8562979903656 / 27.0;
				121	static constexpr float freqStep = 200.0 / 3;
				122	static constexpr float minLogHz = 1000.0;
				123	static constexpr float minLogMel = minLogHz / freqStep;
				124
				125	protected:
				126	/**
				127	* @brief Project input frequency to Mel Scale.
				128	* @param[in] freq - input frequency in floating point
				129	* @param[in] useHTKmethod - bool to signal if HTK method is to be
				130	* used for calculation
				131	* @return Mel transformed frequency in floating point
				132	**/
				133	static float MelScale(const float freq,
				134	const bool useHTKMethod = true);
				135
				136	/**
				137	* @brief Inverse Mel transform - convert MEL warped frequency
				138	* back to normal frequency
				139	* @param[in] freq - Mel frequency in floating point
				140	* @param[in] useHTKmethod - bool to signal if HTK method is to be
				141	* used for calculation
				142	* @return Real world frequency in floating point
				143	**/
				144	static float InverseMelScale(const float melFreq,
				145	const bool useHTKMethod = true);
				146
				147	/**
				148	* @brief Populates MEL energies after applying the MEL filter
				149	* bank weights and adding them up to be placed into
				150	* bins, according to the filter bank's first and last
				151	* indices (pre-computed for each filter bank element
				152	* by _CreateMelFilterBank function).
				153	* @param[in] fftVec Vector populated with FFT magnitudes
				154	* @param[in] melFilterBank 2D Vector with filter bank weights
				155	* @param[in] filterBankFilterFirst Vector containing the first indices of filter bank
				156	* to be used for each bin.
				157	* @param[in] filterBankFilterLast Vector containing the last indices of filter bank
				158	* to be used for each bin.
				159	* @param[out] melEnergies Pre-allocated vector of MEL energies to be
				160	* populated.
				161	* @return true if successful, false otherwise
				162	*/
				163	virtual bool ApplyMelFilterBank(
				164	std::vector<float>& fftVec,
				165	std::vector<std::vector<float>>& melFilterBank,
				166	std::vector<int32_t>& filterBankFilterFirst,
				167	std::vector<int32_t>& filterBankFilterLast,
				168	std::vector<float>& melEnergies);
				169
				170	/**
				171	* @brief Converts the Mel energies for logarithmic scale
				172	* @param[in/out] melEnergies - 1D vector of Mel energies
				173	**/
				174	virtual void ConvertToLogarithmicScale(std::vector<float>& melEnergies);
				175
				176	/**
				177	* @brief Create a matrix used to calculate Discrete Cosine
				178	* Transform.
				179	* @param[in] inputLength - input length of the buffer on which
				180	* DCT will be performed
				181	* @param[in] coefficientCount - Total coefficients per input
				182	* length
				183	* @return 1D vector with inputLength x coefficientCount elements
				184	* populated with DCT coefficients.
				185	*/
				186	virtual std::vector<float> CreateDCTMatrix(
				187	const int32_t inputLength,
				188	const int32_t coefficientCount);
				189
				190	/**
				191	* @brief Given the low and high Mel values, get the normaliser
				192	* for weights to be applied when populating the filter
				193	* bank.
				194	* @param[in] leftMel - low Mel frequency value
				195	* @param[in] rightMel - high Mel frequency value
				196	* @param[in] useHTKMethod - bool to signal if HTK method is to be
				197	* used for calculation
				198	*/
				199	virtual float GetMelFilterBankNormaliser(
				200	const float& leftMel,
				201	const float& rightMel,
				202	const bool useHTKMethod);
				203
				204	private:
				205
				206	std::vector<float> _m_frame;
				207	std::vector<float> _m_buffer;
				208	std::vector<float> _m_melEnergies;
				209	std::vector<float> _m_windowFunc;
				210	std::vector<std::vector<float>> _m_melFilterBank;
				211	std::vector<float> _m_dctMatrix;
				212	std::vector<int32_t> _m_filterBankFilterFirst;
				213	std::vector<int32_t> _m_filterBankFilterLast;
				214	bool _m_filterBankInitialised;
				215
				216	/**
				217	* @brief Initialises the filter banks and the DCT matrix **/
				218	void _InitMelFilterBank();
				219
				220	/**
				221	* @brief Signals whether the instance of MFCC has had its
				222	* required buffers initialised
				223	* @return True if initialised, false otherwise
				224	**/
				225	bool _IsMelFilterBankInited();
				226
				227	/**
				228	* @brief Create mel filter banks for MFCC calculation.
				229	* @return 2D vector of floats
				230	**/
				231	std::vector<std::vector<float>> _CreateMelFilterBank();
				232
				233	/**
				234	* @brief Computes and populates internal memeber buffers used
				235	* in MFCC feature calculation
				236	* @param[in] audioData - 1D vector of 16-bit audio data
				237	*/
				238	void _MfccComputePreFeature(const std::vector<float>& audioData);
				239
				240	/** @brief Computes the magnitude from an interleaved complex array */
				241	void _ConvertToPowerSpectrum();
				242
				243	};
				244