blob: 80c568439bf5a598bd171ceb987a5c08a790ce6a [file] [log] [blame]
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +01001//
2// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#pragma once
7
8#include "DataStructures.hpp"
9#include "SlidingWindow.hpp"
10#include <numeric>
11#include "MFCC.hpp"
12
13/* Class to facilitate pre-processing calculation for Wav2Letter model
14 * for ASR */
15using AudioWindow = SlidingWindow <const float>;
16
17class Preprocess
18{
19public:
20
21 MFCC _m_mfcc; /* MFCC instance */
22
23 /* Actual buffers to be populated */
24 Array2d<float> _m_mfccBuf; /* Contiguous buffer 1D: MFCC */
25 Array2d<float> _m_delta1Buf; /* Contiguous buffer 1D: Delta 1 */
26 Array2d<float> _m_delta2Buf; /* Contiguous buffer 1D: Delta 2 */
27
28 uint32_t _m_windowLen; /* Window length for MFCC */
29 uint32_t _m_windowStride; /* Window stride len for MFCC */
30 AudioWindow _m_window; /* Sliding window */
31
32 /**
33 * @brief Constructor
34 * @param[in] numMfccFeatures number of MFCC features per window
35 * @param[in] windowLen number of elements in a window
36 * @param[in] windowStride stride (in number of elements) for
37 * moving the window
38 * @param[in] numMfccVectors number of MFCC vectors per window
39 */
40 Preprocess(
41 const uint32_t windowLen,
42 const uint32_t windowStride,
43 const MFCC mfccInst);
44 Preprocess() = delete;
45 ~Preprocess();
46
47 /**
48 * @brief Calculates the features required from audio data. This
49 * includes MFCC, first and second order deltas,
50 * normalisation and finally, quantisation. The tensor is
51 * populated with feature from a given window placed along
52 * in a single row.
53 * @param[in] audioData pointer to the first element of audio data
54 * @param[in] audioDataLen number of elements in the audio data
55 * @param[in] tensor tensor to be populated
56 * @return true if successful, false in case of error.
57 */
58 bool Invoke(const float* audioData,
59 const uint32_t audioDataLen,
60 std::vector<int8_t>& output,
61 int quantOffset,
62 float quantScale);
63
64
65protected:
66 /**
67 * @brief Computes the first and second order deltas for the
68 * MFCC buffers - they are assumed to be populated.
69 *
70 * @param[in] mfcc MFCC buffers
71 * @param[out] delta1 result of the first diff computation
72 * @param[out] delta2 result of the second diff computation
73 *
74 * @return true if successful, false otherwise
75 */
76 static bool _ComputeDeltas(Array2d<float>& mfcc,
77 Array2d<float>& delta1,
78 Array2d<float>& delta2);
79
80 /**
81 * @brief Given a 2D vector of floats, computes the mean
82 * @param[in] vec vector of vector of floats
83 * @return mean value
84 */
85 static float _GetMean(Array2d<float>& vec);
86
87 /**
88 * @brief Given a 2D vector of floats, computes the stddev
89 * @param[in] vec vector of vector of floats
90 * @param[in] mean mean value of the vector passed in
91 * @return stddev value
92 */
93 static float _GetStdDev(Array2d<float>& vec,
94 const float mean);
95
96 /**
97 * @brief Given a 2D vector of floats, normalises it using
98 * the mean and the stddev
99 * @param[in/out] vec vector of vector of floats
100 * @return
101 */
102 static void _NormaliseVec(Array2d<float>& vec);
103
104 /**
105 * @brief Normalises the MFCC and delta buffers
106 * @return
107 */
108 void _Normalise();
109
110 /**
111 * @brief Given the quantisation and data type limits, computes
112 * the quantised values of a floating point input data.
113 * @param[in] elem Element to be quantised
114 * @param[in] quantScale Scale
115 * @param[in] quantOffset Offset
116 * @param[in] minVal Numerical limit - minimum
117 * @param[in] maxVal Numerical limit - maximum
118 * @return floating point quantised value
119 */
120 static float _GetQuantElem(
121 const float elem,
122 const float quantScale,
123 const int quantOffset,
124 const float minVal,
125 const float maxVal);
126
127 /**
128 * @brief Quantises the MFCC and delta buffers, and places them
129 * in the output buffer. While doing so, it transposes
130 * the data. Reason: Buffers in this class are arranged
131 * for "time" axis to be row major. Primary reason for
132 * this being the convolution speed up (as we can use
133 * contiguous memory). The output, however, requires the
134 * time axis to be in column major arrangement.
135 * @param[in] outputBuf pointer to the output buffer
136 * @param[in] outputBufSz output buffer's size
137 * @param[in] quantScale quantisation scale
138 * @param[in] quantOffset quantisation offset
139 */
140 template <typename T>
141 bool _Quantise(T* outputBuf, int quantOffset, float quantScale)
142 {
143 /* Populate */
144 T* outputBufMfcc = outputBuf;
145 T* outputBufD1 = outputBuf + this->_m_mfcc._m_params.m_numMfccFeatures;
146 T* outputBufD2 = outputBufD1 + this->_m_mfcc._m_params.m_numMfccFeatures;
147 const uint32_t ptrIncr = this->_m_mfcc._m_params.m_numMfccFeatures * 2; /* (3 vectors - 1 vector) */
148
149 const float minVal = std::numeric_limits<T>::min();
150 const float maxVal = std::numeric_limits<T>::max();
151
152 /* We need to do a transpose while copying and concatenating
153 * the tensor*/
154 for (uint32_t j = 0; j < this->_m_mfcc._m_params.m_numMfccVectors; ++j) {
155 for (uint32_t i = 0; i < this->_m_mfcc._m_params.m_numMfccFeatures; ++i)
156 {
157 *outputBufMfcc++ = static_cast<T>(this->_GetQuantElem(
158 this->_m_mfccBuf(i, j), quantScale,
159 quantOffset, minVal, maxVal));
160 *outputBufD1++ = static_cast<T>(this->_GetQuantElem(
161 this->_m_delta1Buf(i, j), quantScale,
162 quantOffset, minVal, maxVal));
163 *outputBufD2++ = static_cast<T>(this->_GetQuantElem(
164 this->_m_delta2Buf(i, j), quantScale,
165 quantOffset, minVal, maxVal));
166 }
167 outputBufMfcc += ptrIncr;
168 outputBufD1 += ptrIncr;
169 outputBufD2 += ptrIncr;
170 }
171
172 return true;
173 }
174};
175