blob: 468bf92fae64aea70a9e3d799bc2ce051e364fe1 [file] [log] [blame]
George Gekov23c26272021-08-16 11:32:10 +01001//
2// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5#pragma once
6
7
8#include <vector>
9#include <cstdint>
10#include <cmath>
11#include <limits>
12#include <string>
13
14/* MFCC's consolidated parameters */
15class MfccParams
16{
17public:
18 float m_samplingFreq;
19 int m_numFbankBins;
20 float m_melLoFreq;
21 float m_melHiFreq;
22 int m_numMfccFeatures;
23 int m_frameLen;
24 int m_frameLenPadded;
25 bool m_useHtkMethod;
26 int m_numMfccVectors;
27 /** @brief Constructor */
28 MfccParams(const float samplingFreq, const int numFbankBins,
29 const float melLoFreq, const float melHiFreq,
30 const int numMfccFeats, const int frameLen,
31 const bool useHtkMethod, const int numMfccVectors);
32 /* Delete the default constructor */
33 MfccParams() = delete;
34 /* Default destructor */
35 ~MfccParams() = default;
36 /** @brief String representation of parameters */
37 std::string Str();
38};
39
40/**
41 * @brief Class for MFCC feature extraction.
42 * Based on https://github.com/ARM-software/ML-KWS-for-MCU/blob/master/Deployment/Source/MFCC/mfcc.cpp
43 * This class is designed to be generic and self-sufficient but
44 * certain calculation routines can be overridden to accommodate
45 * use-case specific requirements.
46 */
47class MFCC {
48public:
49 /**
50 * @brief Constructor
51 * @param[in] params MFCC parameters
52 */
53 explicit MFCC(const MfccParams& params);
54
55 MFCC() = delete;
56
57 ~MFCC() = default;
58
59 /**
60 * @brief Extract MFCC features for one single small frame of
61 * audio data e.g. 640 samples.
62 * @param[in] audioData Vector of audio samples to calculate
63 * features for.
64 * @return Vector of extracted MFCC features.
65 **/
66 std::vector<float> MfccCompute(const std::vector<float>& audioData);
67
68 /** @brief Initialise. */
69 void Init();
70
71 /**
72 * @brief Extract MFCC features and quantise for one single small
73 * frame of audio data e.g. 640 samples.
74 * @param[in] audioData Vector of audio samples to calculate
75 * features for.
76 * @param[in] quantScale Quantisation scale.
77 * @param[in] quantOffset Quantisation offset.
78 * @return Vector of extracted quantised MFCC features.
79 **/
80 template<typename T>
81 std::vector<T> MfccComputeQuant(const std::vector<float>& audioData,
82 const float quantScale,
83 const int quantOffset)
84 {
85 this->MfccComputePreFeature(audioData);
86 float minVal = std::numeric_limits<T>::min();
87 float maxVal = std::numeric_limits<T>::max();
88
89 std::vector<T> mfccOut(this->m_params.m_numMfccFeatures);
90 const size_t numFbankBins = this->m_params.m_numFbankBins;
91
92 /* Take DCT. Uses matrix mul. */
93 for (size_t i = 0, j = 0; i < mfccOut.size(); ++i, j += numFbankBins)
94 {
95 float sum = 0;
96 for (size_t k = 0; k < numFbankBins; ++k)
97 {
98 sum += this->m_dctMatrix[j + k] * this->m_melEnergies[k];
99 }
100 /* Quantize to T. */
101 sum = std::round((sum / quantScale) + quantOffset);
102 mfccOut[i] = static_cast<T>(std::min<float>(std::max<float>(sum, minVal), maxVal));
103 }
104
105 return mfccOut;
106 }
107
108 MfccParams m_params;
109
110 /* Constants */
111 static constexpr float ms_logStep = /*logf(6.4)*/ 1.8562979903656 / 27.0;
112 static constexpr float ms_freqStep = 200.0 / 3;
113 static constexpr float ms_minLogHz = 1000.0;
114 static constexpr float ms_minLogMel = ms_minLogHz / ms_freqStep;
115
116protected:
117 /**
118 * @brief Project input frequency to Mel Scale.
119 * @param[in] freq Input frequency in floating point.
120 * @param[in] useHTKMethod bool to signal if HTK method is to be
121 * used for calculation.
122 * @return Mel transformed frequency in floating point.
123 **/
124 static float MelScale(float freq,
125 bool useHTKMethod = true);
126
127 /**
128 * @brief Inverse Mel transform - convert MEL warped frequency
129 * back to normal frequency.
130 * @param[in] melFreq Mel frequency in floating point.
131 * @param[in] useHTKMethod bool to signal if HTK method is to be
132 * used for calculation.
133 * @return Real world frequency in floating point.
134 **/
135 static float InverseMelScale(float melFreq,
136 bool useHTKMethod = true);
137
138 /**
139 * @brief Populates MEL energies after applying the MEL filter
140 * bank weights and adding them up to be placed into
141 * bins, according to the filter bank's first and last
142 * indices (pre-computed for each filter bank element
143 * by CreateMelFilterBank function).
144 * @param[in] fftVec Vector populated with FFT magnitudes.
145 * @param[in] melFilterBank 2D Vector with filter bank weights.
146 * @param[in] filterBankFilterFirst Vector containing the first indices of filter bank
147 * to be used for each bin.
148 * @param[in] filterBankFilterLast Vector containing the last indices of filter bank
149 * to be used for each bin.
150 * @param[out] melEnergies Pre-allocated vector of MEL energies to be
151 * populated.
152 * @return true if successful, false otherwise.
153 */
154 virtual bool ApplyMelFilterBank(
155 std::vector<float>& fftVec,
156 std::vector<std::vector<float>>& melFilterBank,
157 std::vector<uint32_t>& filterBankFilterFirst,
158 std::vector<uint32_t>& filterBankFilterLast,
159 std::vector<float>& melEnergies);
160
161 /**
162 * @brief Converts the Mel energies for logarithmic scale.
163 * @param[in,out] melEnergies 1D vector of Mel energies.
164 **/
165 virtual void ConvertToLogarithmicScale(std::vector<float>& melEnergies);
166
167 /**
168 * @brief Create a matrix used to calculate Discrete Cosine
169 * Transform.
170 * @param[in] inputLength Input length of the buffer on which
171 * DCT will be performed.
172 * @param[in] coefficientCount Total coefficients per input length.
173 * @return 1D vector with inputLength x coefficientCount elements
174 * populated with DCT coefficients.
175 */
176 virtual std::vector<float> CreateDCTMatrix(
177 int32_t inputLength,
178 int32_t coefficientCount);
179
180 /**
181 * @brief Given the low and high Mel values, get the normaliser
182 * for weights to be applied when populating the filter
183 * bank.
184 * @param[in] leftMel Low Mel frequency value.
185 * @param[in] rightMel High Mel frequency value.
186 * @param[in] useHTKMethod bool to signal if HTK method is to be
187 * used for calculation.
188 * @return Value to use for normalizing.
189 */
190 virtual float GetMelFilterBankNormaliser(
191 const float& leftMel,
192 const float& rightMel,
193 bool useHTKMethod);
194
195private:
196
197 std::vector<float> m_frame;
198 std::vector<float> m_buffer;
199 std::vector<float> m_melEnergies;
200 std::vector<float> m_windowFunc;
201 std::vector<std::vector<float>> m_melFilterBank;
202 std::vector<float> m_dctMatrix;
203 std::vector<uint32_t> m_filterBankFilterFirst;
204 std::vector<uint32_t> m_filterBankFilterLast;
205 bool m_filterBankInitialised;
206
207 /**
208 * @brief Initialises the filter banks and the DCT matrix. **/
209 void InitMelFilterBank();
210
211 /**
212 * @brief Signals whether the instance of MFCC has had its
213 * required buffers initialised.
214 * @return true if initialised, false otherwise.
215 **/
216 bool IsMelFilterBankInited() const;
217
218 /**
219 * @brief Create mel filter banks for MFCC calculation.
220 * @return 2D vector of floats.
221 **/
222 std::vector<std::vector<float>> CreateMelFilterBank();
223
224 /**
225 * @brief Computes and populates internal memeber buffers used
226 * in MFCC feature calculation
227 * @param[in] audioData 1D vector of 16-bit audio data.
228 */
229 void MfccComputePreFeature(const std::vector<float>& audioData);
230
231 /** @brief Computes the magnitude from an interleaved complex array. */
232 void ConvertToPowerSpectrum();
233
234};