blob: 2e738fca8a54fe88f5137950379a3b5d04e9ca6b [file] [log] [blame]
alexander3c798932021-03-26 21:42:19 +00001/*
Richard Burtonf32a86a2022-11-15 11:46:11 +00002 * SPDX-FileCopyrightText: Copyright 2021 Arm Limited and/or its affiliates <open-source-office@arm.com>
alexander3c798932021-03-26 21:42:19 +00003 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17#ifndef MELSPECTROGRAM_HPP
18#define MELSPECTROGRAM_HPP
19
20#include "PlatformMath.hpp"
21
22#include <vector>
23#include <cstdint>
24#include <cmath>
25#include <limits>
26#include <string>
27
28namespace arm {
29namespace app {
30namespace audio {
31
32 /* Mel Spectrogram consolidated parameters */
33 class MelSpecParams {
34 public:
35 float m_samplingFreq;
36 uint32_t m_numFbankBins;
37 float m_melLoFreq;
38 float m_melHiFreq;
39 uint32_t m_frameLen;
40 uint32_t m_frameLenPadded;
41 bool m_useHtkMethod;
42
43 /** @brief Constructor */
44 MelSpecParams(const float samplingFreq, const uint32_t numFbankBins,
45 const float melLoFreq, const float melHiFreq,
46 const uint32_t frameLen, const bool useHtkMethod);
47
48 MelSpecParams() = delete;
49 ~MelSpecParams() = default;
50
51 /** @brief String representation of parameters */
alexanderc350cdc2021-04-29 20:36:09 +010052 std::string Str() const;
alexander3c798932021-03-26 21:42:19 +000053 };
54
55 /**
56 * @brief Class for Mel Spectrogram feature extraction.
57 * Based on https://github.com/ARM-software/ML-KWS-for-MCU/blob/master/Deployment/Source/MFCC/mfcc.cpp
58 * This class is designed to be generic and self-sufficient but
59 * certain calculation routines can be overridden to accommodate
60 * use-case specific requirements.
61 */
62 class MelSpectrogram {
63
64 public:
65 /**
66 * @brief Extract Mel Spectrogram for one single small frame of
67 * audio data e.g. 640 samples.
Isabella Gottardi56ee6202021-05-12 08:27:15 +010068 * @param[in] audioData Vector of audio samples to calculate
alexander3c798932021-03-26 21:42:19 +000069 * features for.
Isabella Gottardi56ee6202021-05-12 08:27:15 +010070 * @param[in] trainingMean Value to subtract from the the computed mel spectrogram, default 0.
alexander3c798932021-03-26 21:42:19 +000071 * @return Vector of extracted Mel Spectrogram features.
72 **/
73 std::vector<float> ComputeMelSpec(const std::vector<int16_t>& audioData, float trainingMean = 0);
74
75 /**
76 * @brief Constructor
Isabella Gottardi56ee6202021-05-12 08:27:15 +010077 * @param[in] params Mel Spectrogram parameters
alexander3c798932021-03-26 21:42:19 +000078 */
alexanderc350cdc2021-04-29 20:36:09 +010079 explicit MelSpectrogram(const MelSpecParams& params);
alexander3c798932021-03-26 21:42:19 +000080
81 MelSpectrogram() = delete;
82 ~MelSpectrogram() = default;
83
84 /** @brief Initialise */
85 void Init();
86
87 /**
88 * @brief Extract Mel Spectrogram features and quantise for one single small
89 * frame of audio data e.g. 640 samples.
Isabella Gottardi56ee6202021-05-12 08:27:15 +010090 * @param[in] audioData Vector of audio samples to calculate
alexander3c798932021-03-26 21:42:19 +000091 * features for.
Isabella Gottardi56ee6202021-05-12 08:27:15 +010092 * @param[in] quantScale quantisation scale.
93 * @param[in] quantOffset quantisation offset.
94 * @param[in] trainingMean training mean.
alexander3c798932021-03-26 21:42:19 +000095 * @return Vector of extracted quantised Mel Spectrogram features.
96 **/
97 template<typename T>
98 std::vector<T> MelSpecComputeQuant(const std::vector<int16_t>& audioData,
99 const float quantScale,
100 const int quantOffset,
101 float trainingMean = 0)
102 {
103 this->ComputeMelSpec(audioData, trainingMean);
104 float minVal = std::numeric_limits<T>::min();
105 float maxVal = std::numeric_limits<T>::max();
106
Isabella Gottardi56ee6202021-05-12 08:27:15 +0100107 std::vector<T> melSpecOut(this->m_params.m_numFbankBins);
108 const size_t numFbankBins = this->m_params.m_numFbankBins;
alexander3c798932021-03-26 21:42:19 +0000109
110 /* Quantize to T. */
111 for (size_t k = 0; k < numFbankBins; ++k) {
Isabella Gottardi56ee6202021-05-12 08:27:15 +0100112 auto quantizedEnergy = std::round(((this->m_melEnergies[k]) / quantScale) + quantOffset);
alexander3c798932021-03-26 21:42:19 +0000113 melSpecOut[k] = static_cast<T>(std::min<float>(std::max<float>(quantizedEnergy, minVal), maxVal));
114 }
115
116 return melSpecOut;
117 }
118
119 /* Constants */
120 static constexpr float ms_logStep = /*logf(6.4)*/ 1.8562979903656 / 27.0;
121 static constexpr float ms_freqStep = 200.0 / 3;
122 static constexpr float ms_minLogHz = 1000.0;
123 static constexpr float ms_minLogMel = ms_minLogHz / ms_freqStep;
124
125 protected:
126 /**
127 * @brief Project input frequency to Mel Scale.
Isabella Gottardi56ee6202021-05-12 08:27:15 +0100128 * @param[in] freq input frequency in floating point
129 * @param[in] useHTKMethod bool to signal if HTK method is to be
130 * used for calculation
alexander3c798932021-03-26 21:42:19 +0000131 * @return Mel transformed frequency in floating point
132 **/
133 static float MelScale(const float freq,
134 const bool useHTKMethod = true);
135
136 /**
137 * @brief Inverse Mel transform - convert MEL warped frequency
138 * back to normal frequency
Isabella Gottardi56ee6202021-05-12 08:27:15 +0100139 * @param[in] melFreq Mel frequency in floating point
140 * @param[in] useHTKMethod bool to signal if HTK method is to be
141 * used for calculation
alexander3c798932021-03-26 21:42:19 +0000142 * @return Real world frequency in floating point
143 **/
144 static float InverseMelScale(const float melFreq,
145 const bool useHTKMethod = true);
146
147 /**
148 * @brief Populates MEL energies after applying the MEL filter
149 * bank weights and adding them up to be placed into
150 * bins, according to the filter bank's first and last
151 * indices (pre-computed for each filter bank element
alexanderc350cdc2021-04-29 20:36:09 +0100152 * by CreateMelFilterBank function).
alexander3c798932021-03-26 21:42:19 +0000153 * @param[in] fftVec Vector populated with FFT magnitudes
154 * @param[in] melFilterBank 2D Vector with filter bank weights
155 * @param[in] filterBankFilterFirst Vector containing the first indices of filter bank
156 * to be used for each bin.
157 * @param[in] filterBankFilterLast Vector containing the last indices of filter bank
158 * to be used for each bin.
159 * @param[out] melEnergies Pre-allocated vector of MEL energies to be
160 * populated.
161 * @return true if successful, false otherwise
162 */
163 virtual bool ApplyMelFilterBank(
164 std::vector<float>& fftVec,
165 std::vector<std::vector<float>>& melFilterBank,
alexanderc350cdc2021-04-29 20:36:09 +0100166 std::vector<uint32_t>& filterBankFilterFirst,
167 std::vector<uint32_t>& filterBankFilterLast,
alexander3c798932021-03-26 21:42:19 +0000168 std::vector<float>& melEnergies);
169
170 /**
171 * @brief Converts the Mel energies for logarithmic scale
Isabella Gottardi56ee6202021-05-12 08:27:15 +0100172 * @param[in,out] melEnergies 1D vector of Mel energies
alexander3c798932021-03-26 21:42:19 +0000173 **/
174 virtual void ConvertToLogarithmicScale(std::vector<float>& melEnergies);
175
176 /**
177 * @brief Given the low and high Mel values, get the normaliser
178 * for weights to be applied when populating the filter
179 * bank.
Isabella Gottardi56ee6202021-05-12 08:27:15 +0100180 * @param[in] leftMel low Mel frequency value
181 * @param[in] rightMel high Mel frequency value
182 * @param[in] useHTKMethod bool to signal if HTK method is to be
183 * used for calculation
alexander3c798932021-03-26 21:42:19 +0000184 * @return Return float value to be applied
185 * when populating the filter bank.
186 */
187 virtual float GetMelFilterBankNormaliser(
188 const float& leftMel,
189 const float& rightMel,
190 const bool useHTKMethod);
191
192 private:
Isabella Gottardi56ee6202021-05-12 08:27:15 +0100193 MelSpecParams m_params;
194 std::vector<float> m_frame;
195 std::vector<float> m_buffer;
196 std::vector<float> m_melEnergies;
197 std::vector<float> m_windowFunc;
198 std::vector<std::vector<float>> m_melFilterBank;
199 std::vector<uint32_t> m_filterBankFilterFirst;
200 std::vector<uint32_t> m_filterBankFilterLast;
201 bool m_filterBankInitialised;
202 arm::app::math::FftInstance m_fftInstance;
alexander3c798932021-03-26 21:42:19 +0000203
204 /**
205 * @brief Initialises the filter banks.
206 **/
alexanderc350cdc2021-04-29 20:36:09 +0100207 void InitMelFilterBank();
alexander3c798932021-03-26 21:42:19 +0000208
209 /**
210 * @brief Signals whether the instance of MelSpectrogram has had its
211 * required buffers initialised
212 * @return True if initialised, false otherwise
213 **/
alexanderc350cdc2021-04-29 20:36:09 +0100214 bool IsMelFilterBankInited() const;
alexander3c798932021-03-26 21:42:19 +0000215
216 /**
217 * @brief Create mel filter banks for Mel Spectrogram calculation.
218 * @return 2D vector of floats
219 **/
alexanderc350cdc2021-04-29 20:36:09 +0100220 std::vector<std::vector<float>> CreateMelFilterBank();
alexander3c798932021-03-26 21:42:19 +0000221
222 /**
223 * @brief Computes the magnitude from an interleaved complex array
224 **/
alexanderc350cdc2021-04-29 20:36:09 +0100225 void ConvertToPowerSpectrum();
alexander3c798932021-03-26 21:42:19 +0000226
227 };
228
229} /* namespace audio */
230} /* namespace app */
231} /* namespace arm */
232
233
234#endif /* MELSPECTROGRAM_HPP */