blob: 74e021c83b7cb647de31f813cebf5f485db5c441 [file] [log] [blame]
alexander3c798932021-03-26 21:42:19 +00001/*
Richard Burtonf32a86a2022-11-15 11:46:11 +00002 * SPDX-FileCopyrightText: Copyright 2021 Arm Limited and/or its affiliates <open-source-office@arm.com>
alexander3c798932021-03-26 21:42:19 +00003 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17#ifndef MFCC_HPP
18#define MFCC_HPP
19
20#include "PlatformMath.hpp"
21
22#include <vector>
23#include <cstdint>
24#include <cmath>
25#include <limits>
26#include <string>
27
28namespace arm {
29namespace app {
30namespace audio {
31
32 /* MFCC's consolidated parameters. */
33 class MfccParams {
34 public:
35 float m_samplingFreq;
36 uint32_t m_numFbankBins;
37 float m_melLoFreq;
38 float m_melHiFreq;
39 uint32_t m_numMfccFeatures;
40 uint32_t m_frameLen;
41 uint32_t m_frameLenPadded;
42 bool m_useHtkMethod;
43
44 /** @brief Constructor */
45 MfccParams(float samplingFreq, uint32_t numFbankBins,
46 float melLoFreq, float melHiFreq,
47 uint32_t numMfccFeats, uint32_t frameLen,
48 bool useHtkMethod);
49
50 MfccParams() = delete;
51
52 ~MfccParams() = default;
53
Kshitij Sisodia3c8256d2021-05-24 16:12:40 +010054 /** @brief Log parameters */
55 void Log() const;
alexander3c798932021-03-26 21:42:19 +000056 };
57
58 /**
59 * @brief Class for MFCC feature extraction.
60 * Based on https://github.com/ARM-software/ML-KWS-for-MCU/blob/master/Deployment/Source/MFCC/mfcc.cpp
61 * This class is designed to be generic and self-sufficient but
62 * certain calculation routines can be overridden to accommodate
63 * use-case specific requirements.
64 */
65 class MFCC {
66 public:
67 /**
68 * @brief Constructor
69 * @param[in] params MFCC parameters
70 */
71 explicit MFCC(const MfccParams& params);
72
73 MFCC() = delete;
74
75 ~MFCC() = default;
76
77 /**
78 * @brief Extract MFCC features for one single small frame of
79 * audio data e.g. 640 samples.
80 * @param[in] audioData Vector of audio samples to calculate
81 * features for.
82 * @return Vector of extracted MFCC features.
83 **/
84 std::vector<float> MfccCompute(const std::vector<int16_t>& audioData);
85
86 /** @brief Initialise. */
87 void Init();
88
89 /**
90 * @brief Extract MFCC features and quantise for one single small
91 * frame of audio data e.g. 640 samples.
92 * @param[in] audioData Vector of audio samples to calculate
93 * features for.
94 * @param[in] quantScale Quantisation scale.
95 * @param[in] quantOffset Quantisation offset.
96 * @return Vector of extracted quantised MFCC features.
97 **/
98 template<typename T>
99 std::vector<T> MfccComputeQuant(const std::vector<int16_t>& audioData,
100 const float quantScale,
101 const int quantOffset)
102 {
alexanderc350cdc2021-04-29 20:36:09 +0100103 this->MfccComputePreFeature(audioData);
alexander3c798932021-03-26 21:42:19 +0000104 float minVal = std::numeric_limits<T>::min();
105 float maxVal = std::numeric_limits<T>::max();
106
Isabella Gottardi56ee6202021-05-12 08:27:15 +0100107 std::vector<T> mfccOut(this->m_params.m_numMfccFeatures);
108 const size_t numFbankBins = this->m_params.m_numFbankBins;
alexander3c798932021-03-26 21:42:19 +0000109
110 /* Take DCT. Uses matrix mul. */
111 for (size_t i = 0, j = 0; i < mfccOut.size(); ++i, j += numFbankBins) {
112 float sum = 0;
113 for (size_t k = 0; k < numFbankBins; ++k) {
Isabella Gottardi56ee6202021-05-12 08:27:15 +0100114 sum += this->m_dctMatrix[j + k] * this->m_melEnergies[k];
alexander3c798932021-03-26 21:42:19 +0000115 }
116 /* Quantize to T. */
117 sum = std::round((sum / quantScale) + quantOffset);
118 mfccOut[i] = static_cast<T>(std::min<float>(std::max<float>(sum, minVal), maxVal));
119 }
120
121 return mfccOut;
122 }
123
124 /* Constants */
125 static constexpr float ms_logStep = /*logf(6.4)*/ 1.8562979903656 / 27.0;
126 static constexpr float ms_freqStep = 200.0 / 3;
127 static constexpr float ms_minLogHz = 1000.0;
128 static constexpr float ms_minLogMel = ms_minLogHz / ms_freqStep;
129
130 protected:
131 /**
132 * @brief Project input frequency to Mel Scale.
133 * @param[in] freq Input frequency in floating point.
Isabella Gottardi56ee6202021-05-12 08:27:15 +0100134 * @param[in] useHTKMethod bool to signal if HTK method is to be
alexander3c798932021-03-26 21:42:19 +0000135 * used for calculation.
136 * @return Mel transformed frequency in floating point.
137 **/
138 static float MelScale(float freq,
139 bool useHTKMethod = true);
140
141 /**
142 * @brief Inverse Mel transform - convert MEL warped frequency
143 * back to normal frequency.
Isabella Gottardi56ee6202021-05-12 08:27:15 +0100144 * @param[in] melFreq Mel frequency in floating point.
145 * @param[in] useHTKMethod bool to signal if HTK method is to be
alexander3c798932021-03-26 21:42:19 +0000146 * used for calculation.
147 * @return Real world frequency in floating point.
148 **/
149 static float InverseMelScale(float melFreq,
150 bool useHTKMethod = true);
151
152 /**
153 * @brief Populates MEL energies after applying the MEL filter
154 * bank weights and adding them up to be placed into
155 * bins, according to the filter bank's first and last
156 * indices (pre-computed for each filter bank element
alexanderc350cdc2021-04-29 20:36:09 +0100157 * by CreateMelFilterBank function).
alexander3c798932021-03-26 21:42:19 +0000158 * @param[in] fftVec Vector populated with FFT magnitudes.
159 * @param[in] melFilterBank 2D Vector with filter bank weights.
160 * @param[in] filterBankFilterFirst Vector containing the first indices of filter bank
161 * to be used for each bin.
162 * @param[in] filterBankFilterLast Vector containing the last indices of filter bank
163 * to be used for each bin.
164 * @param[out] melEnergies Pre-allocated vector of MEL energies to be
165 * populated.
166 * @return true if successful, false otherwise.
167 */
168 virtual bool ApplyMelFilterBank(
169 std::vector<float>& fftVec,
170 std::vector<std::vector<float>>& melFilterBank,
alexanderc350cdc2021-04-29 20:36:09 +0100171 std::vector<uint32_t>& filterBankFilterFirst,
172 std::vector<uint32_t>& filterBankFilterLast,
alexander3c798932021-03-26 21:42:19 +0000173 std::vector<float>& melEnergies);
174
175 /**
176 * @brief Converts the Mel energies for logarithmic scale.
177 * @param[in,out] melEnergies 1D vector of Mel energies.
178 **/
179 virtual void ConvertToLogarithmicScale(std::vector<float>& melEnergies);
180
181 /**
182 * @brief Create a matrix used to calculate Discrete Cosine
183 * Transform.
184 * @param[in] inputLength Input length of the buffer on which
185 * DCT will be performed.
186 * @param[in] coefficientCount Total coefficients per input length.
187 * @return 1D vector with inputLength x coefficientCount elements
188 * populated with DCT coefficients.
189 */
190 virtual std::vector<float> CreateDCTMatrix(
191 int32_t inputLength,
192 int32_t coefficientCount);
193
194 /**
195 * @brief Given the low and high Mel values, get the normaliser
196 * for weights to be applied when populating the filter
197 * bank.
198 * @param[in] leftMel Low Mel frequency value.
199 * @param[in] rightMel High Mel frequency value.
200 * @param[in] useHTKMethod bool to signal if HTK method is to be
201 * used for calculation.
202 * @return Value to use for normalizing.
203 */
204 virtual float GetMelFilterBankNormaliser(
205 const float& leftMel,
206 const float& rightMel,
207 bool useHTKMethod);
208
209 private:
Isabella Gottardi56ee6202021-05-12 08:27:15 +0100210 MfccParams m_params;
211 std::vector<float> m_frame;
212 std::vector<float> m_buffer;
213 std::vector<float> m_melEnergies;
214 std::vector<float> m_windowFunc;
215 std::vector<std::vector<float>> m_melFilterBank;
216 std::vector<float> m_dctMatrix;
217 std::vector<uint32_t> m_filterBankFilterFirst;
218 std::vector<uint32_t> m_filterBankFilterLast;
219 bool m_filterBankInitialised;
220 arm::app::math::FftInstance m_fftInstance;
alexander3c798932021-03-26 21:42:19 +0000221
222 /**
223 * @brief Initialises the filter banks and the DCT matrix. **/
alexanderc350cdc2021-04-29 20:36:09 +0100224 void InitMelFilterBank();
alexander3c798932021-03-26 21:42:19 +0000225
226 /**
227 * @brief Signals whether the instance of MFCC has had its
228 * required buffers initialised.
229 * @return true if initialised, false otherwise.
230 **/
alexanderc350cdc2021-04-29 20:36:09 +0100231 bool IsMelFilterBankInited() const;
alexander3c798932021-03-26 21:42:19 +0000232
233 /**
234 * @brief Create mel filter banks for MFCC calculation.
235 * @return 2D vector of floats.
236 **/
alexanderc350cdc2021-04-29 20:36:09 +0100237 std::vector<std::vector<float>> CreateMelFilterBank();
alexander3c798932021-03-26 21:42:19 +0000238
239 /**
240 * @brief Computes and populates internal memeber buffers used
241 * in MFCC feature calculation
242 * @param[in] audioData 1D vector of 16-bit audio data.
243 */
alexanderc350cdc2021-04-29 20:36:09 +0100244 void MfccComputePreFeature(const std::vector<int16_t>& audioData);
alexander3c798932021-03-26 21:42:19 +0000245
246 /** @brief Computes the magnitude from an interleaved complex array. */
alexanderc350cdc2021-04-29 20:36:09 +0100247 void ConvertToPowerSpectrum();
alexander3c798932021-03-26 21:42:19 +0000248
249 };
250
251} /* namespace audio */
252} /* namespace app */
253} /* namespace arm */
254
255#endif /* MFCC_HPP */