blob: c1dd61e5e6fd2064402ffebe90cf5b9f08503992 [file] [log] [blame]
alexander3c798932021-03-26 21:42:19 +00001/*
2 * Copyright (c) 2021 Arm Limited. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17#ifndef MELSPECTROGRAM_HPP
18#define MELSPECTROGRAM_HPP
19
20#include "PlatformMath.hpp"
21
22#include <vector>
23#include <cstdint>
24#include <cmath>
25#include <limits>
26#include <string>
27
28namespace arm {
29namespace app {
30namespace audio {
31
32 /* Mel Spectrogram consolidated parameters */
33 class MelSpecParams {
34 public:
35 float m_samplingFreq;
36 uint32_t m_numFbankBins;
37 float m_melLoFreq;
38 float m_melHiFreq;
39 uint32_t m_frameLen;
40 uint32_t m_frameLenPadded;
41 bool m_useHtkMethod;
42
43 /** @brief Constructor */
44 MelSpecParams(const float samplingFreq, const uint32_t numFbankBins,
45 const float melLoFreq, const float melHiFreq,
46 const uint32_t frameLen, const bool useHtkMethod);
47
48 MelSpecParams() = delete;
49 ~MelSpecParams() = default;
50
51 /** @brief String representation of parameters */
52 std::string Str();
53 };
54
55 /**
56 * @brief Class for Mel Spectrogram feature extraction.
57 * Based on https://github.com/ARM-software/ML-KWS-for-MCU/blob/master/Deployment/Source/MFCC/mfcc.cpp
58 * This class is designed to be generic and self-sufficient but
59 * certain calculation routines can be overridden to accommodate
60 * use-case specific requirements.
61 */
62 class MelSpectrogram {
63
64 public:
65 /**
66 * @brief Extract Mel Spectrogram for one single small frame of
67 * audio data e.g. 640 samples.
68 * @param[in] audioData - Vector of audio samples to calculate
69 * features for.
70 * @param[in] trainingMean - Value to subtract from the the computed mel spectrogram, default 0.
71 * @return Vector of extracted Mel Spectrogram features.
72 **/
73 std::vector<float> ComputeMelSpec(const std::vector<int16_t>& audioData, float trainingMean = 0);
74
75 /**
76 * @brief Constructor
77 * @param[in] params - Mel Spectrogram parameters
78 */
79 MelSpectrogram(const MelSpecParams& params);
80
81 MelSpectrogram() = delete;
82 ~MelSpectrogram() = default;
83
84 /** @brief Initialise */
85 void Init();
86
87 /**
88 * @brief Extract Mel Spectrogram features and quantise for one single small
89 * frame of audio data e.g. 640 samples.
90 * @param[in] audioData - Vector of audio samples to calculate
91 * features for.
92 * @param[in] quantScale - quantisation scale.
93 * @param[in] quantOffset - quantisation offset
94 * @return Vector of extracted quantised Mel Spectrogram features.
95 **/
96 template<typename T>
97 std::vector<T> MelSpecComputeQuant(const std::vector<int16_t>& audioData,
98 const float quantScale,
99 const int quantOffset,
100 float trainingMean = 0)
101 {
102 this->ComputeMelSpec(audioData, trainingMean);
103 float minVal = std::numeric_limits<T>::min();
104 float maxVal = std::numeric_limits<T>::max();
105
106 std::vector<T> melSpecOut(this->_m_params.m_numFbankBins);
107 const size_t numFbankBins = this->_m_params.m_numFbankBins;
108
109 /* Quantize to T. */
110 for (size_t k = 0; k < numFbankBins; ++k) {
111 auto quantizedEnergy = std::round(((this->_m_melEnergies[k]) / quantScale) + quantOffset);
112 melSpecOut[k] = static_cast<T>(std::min<float>(std::max<float>(quantizedEnergy, minVal), maxVal));
113 }
114
115 return melSpecOut;
116 }
117
118 /* Constants */
119 static constexpr float ms_logStep = /*logf(6.4)*/ 1.8562979903656 / 27.0;
120 static constexpr float ms_freqStep = 200.0 / 3;
121 static constexpr float ms_minLogHz = 1000.0;
122 static constexpr float ms_minLogMel = ms_minLogHz / ms_freqStep;
123
124 protected:
125 /**
126 * @brief Project input frequency to Mel Scale.
127 * @param[in] freq - input frequency in floating point
128 * @param[in] useHTKmethod - bool to signal if HTK method is to be
129 * used for calculation
130 * @return Mel transformed frequency in floating point
131 **/
132 static float MelScale(const float freq,
133 const bool useHTKMethod = true);
134
135 /**
136 * @brief Inverse Mel transform - convert MEL warped frequency
137 * back to normal frequency
138 * @param[in] freq - Mel frequency in floating point
139 * @param[in] useHTKmethod - bool to signal if HTK method is to be
140 * used for calculation
141 * @return Real world frequency in floating point
142 **/
143 static float InverseMelScale(const float melFreq,
144 const bool useHTKMethod = true);
145
146 /**
147 * @brief Populates MEL energies after applying the MEL filter
148 * bank weights and adding them up to be placed into
149 * bins, according to the filter bank's first and last
150 * indices (pre-computed for each filter bank element
151 * by _CreateMelFilterBank function).
152 * @param[in] fftVec Vector populated with FFT magnitudes
153 * @param[in] melFilterBank 2D Vector with filter bank weights
154 * @param[in] filterBankFilterFirst Vector containing the first indices of filter bank
155 * to be used for each bin.
156 * @param[in] filterBankFilterLast Vector containing the last indices of filter bank
157 * to be used for each bin.
158 * @param[out] melEnergies Pre-allocated vector of MEL energies to be
159 * populated.
160 * @return true if successful, false otherwise
161 */
162 virtual bool ApplyMelFilterBank(
163 std::vector<float>& fftVec,
164 std::vector<std::vector<float>>& melFilterBank,
165 std::vector<int32_t>& filterBankFilterFirst,
166 std::vector<int32_t>& filterBankFilterLast,
167 std::vector<float>& melEnergies);
168
169 /**
170 * @brief Converts the Mel energies for logarithmic scale
171 * @param[in/out] melEnergies - 1D vector of Mel energies
172 **/
173 virtual void ConvertToLogarithmicScale(std::vector<float>& melEnergies);
174
175 /**
176 * @brief Given the low and high Mel values, get the normaliser
177 * for weights to be applied when populating the filter
178 * bank.
179 * @param[in] leftMel - low Mel frequency value
180 * @param[in] rightMel - high Mel frequency value
181 * @param[in] useHTKMethod - bool to signal if HTK method is to be
182 * used for calculation
183 * @return Return float value to be applied
184 * when populating the filter bank.
185 */
186 virtual float GetMelFilterBankNormaliser(
187 const float& leftMel,
188 const float& rightMel,
189 const bool useHTKMethod);
190
191 private:
192 MelSpecParams _m_params;
193 std::vector<float> _m_frame;
194 std::vector<float> _m_buffer;
195 std::vector<float> _m_melEnergies;
196 std::vector<float> _m_windowFunc;
197 std::vector<std::vector<float>> _m_melFilterBank;
198 std::vector<int32_t> _m_filterBankFilterFirst;
199 std::vector<int32_t> _m_filterBankFilterLast;
200 bool _m_filterBankInitialised;
201 arm::app::math::FftInstance _m_fftInstance;
202
203 /**
204 * @brief Initialises the filter banks.
205 **/
206 void _InitMelFilterBank();
207
208 /**
209 * @brief Signals whether the instance of MelSpectrogram has had its
210 * required buffers initialised
211 * @return True if initialised, false otherwise
212 **/
213 bool _IsMelFilterBankInited();
214
215 /**
216 * @brief Create mel filter banks for Mel Spectrogram calculation.
217 * @return 2D vector of floats
218 **/
219 std::vector<std::vector<float>> _CreateMelFilterBank();
220
221 /**
222 * @brief Computes the magnitude from an interleaved complex array
223 **/
224 void _ConvertToPowerSpectrum();
225
226 };
227
228} /* namespace audio */
229} /* namespace app */
230} /* namespace arm */
231
232
233#endif /* MELSPECTROGRAM_HPP */