Blame - source/use_case/asr/include/Wav2LetterPreprocess.hpp - ml/ethos-u/ml-embedded-evaluation-kit

2021-03-26 21:42:19 +0000

[diff] [blame]

1

/*

2

3

* SPDX-License-Identifier: Apache-2.0

4

*

5

* Licensed under the Apache License, Version 2.0 (the "License");

6

* you may not use this file except in compliance with the License.

7

* You may obtain a copy of the License at

8

*

9

* http://www.apache.org/licenses/LICENSE-2.0

10

*

11

* Unless required by applicable law or agreed to in writing, software

12

* distributed under the License is distributed on an "AS IS" BASIS,

13

* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

* See the License for the specific language governing permissions and

15

* limitations under the License.

16

*/

17

#ifndef ASR_WAV2LETTER_PREPROCESS_HPP

18

#define ASR_WAV2LETTER_PREPROCESS_HPP

19

20

#include "Wav2LetterModel.hpp"

21

#include "Wav2LetterMfcc.hpp"

22

#include "AudioUtils.hpp"

23

#include "DataStructures.hpp"

namespace arm {

namespace app {

namespace audio {

namespace asr {

/* Class to facilitate pre-processing calculation for Wav2Letter model

31

* for ASR. */

32

using AudioWindow = SlidingWindow <const int16_t>;

class Preprocess {

public:

/**

* @brief Constructor.

38

* @param[in] numMfccFeatures Number of MFCC features per window.

39

* @param[in] windowLen Number of elements in a window.

40

* @param[in] windowStride Stride (in number of elements) for

41

* moving the window.

42

* @param[in] numMfccVectors Number of MFCC vectors per window.

43

*/

44

Preprocess(

45

uint32_t numMfccFeatures,

46

uint32_t windowLen,

47

uint32_t windowStride,

48

uint32_t numMfccVectors);

49

Preprocess() = delete;

50

~Preprocess() = default;

51

52

/**

53

* @brief Calculates the features required from audio data. This

54

* includes MFCC, first and second order deltas,

55

* normalisation and finally, quantisation. The tensor is

56

* populated with feature from a given window placed along

57

* in a single row.

58

* @param[in] audioData Pointer to the first element of audio data.

59

* @param[in] audioDataLen Number of elements in the audio data.

60

* @param[in] tensor Tensor to be populated.

61

* @return true if successful, false in case of error.

62

*/

63

bool Invoke(const int16_t * audioData,

64

uint32_t audioDataLen,

65

TfLiteTensor * tensor);

protected:

/**

* @brief Computes the first and second order deltas for the

70

* MFCC buffers - they are assumed to be populated.

71

*

72

* @param[in] mfcc MFCC buffers.

73

* @param[out] delta1 Result of the first diff computation.

74

* @param[out] delta2 Result of the second diff computation.

75

* @return true if successful, false otherwise.

76

*/

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

77

static bool ComputeDeltas(Array2d<float>& mfcc,

78

Array2d<float>& delta1,

79

Array2d<float>& delta2);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

80

81

/**

82

* @brief Given a 2D vector of floats, computes the mean.

83

* @param[in] vec Vctor of vector of floats.

84

* @return Mean value.

85

*/

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

86

static float GetMean(Array2d<float>& vec);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

87

88

/**

89

* @brief Given a 2D vector of floats, computes the stddev.

90

* @param[in] vec Vector of vector of floats.

91

* @param[in] mean Mean value of the vector passed in.

92

* @return stddev value.

93

*/

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

94

static float GetStdDev(Array2d<float>& vec,

95

const float mean);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

96

97

/**

98

* @brief Given a 2D vector of floats, normalises it using

99

* the mean and the stddev.

100

* @param[in,out] vec Vector of vector of floats.

101

*/

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

102

static void NormaliseVec(Array2d<float>& vec);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

103

104

/**

105

* @brief Normalises the MFCC and delta buffers.

106

*/

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

107

void Normalise();

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

108

109

/**

110

* @brief Given the quantisation and data type limits, computes

111

* the quantised values of a floating point input data.

112

* @param[in] elem Element to be quantised.

113

* @param[in] quantScale Scale.

114

* @param[in] quantOffset Offset.

115

* @param[in] minVal Numerical limit - minimum.

116

* @param[in] maxVal Numerical limit - maximum.

117

* @return Floating point quantised value.

118

*/

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

119

static float GetQuantElem(

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

float elem,

float quantScale,

int quantOffset,

float minVal,

float maxVal);

/**

* @brief Quantises the MFCC and delta buffers, and places them

128

* in the output buffer. While doing so, it transposes

129

* the data. Reason: Buffers in this class are arranged

130

* for "time" axis to be row major. Primary reason for

131

* this being the convolution speed up (as we can use

132

* contiguous memory). The output, however, requires the

133

* time axis to be in column major arrangement.

134

* @param[in] outputBuf Pointer to the output buffer.

135

* @param[in] outputBufSz Output buffer's size.

136

* @param[in] quantScale Quantisation scale.

137

* @param[in] quantOffset Quantisation offset.

138

*/

139

template <typename T>

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

140

bool Quantise(

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

141

T * outputBuf,

142

const uint32_t outputBufSz,

143

const float quantScale,

144

const int quantOffset)

145

{

146

/* Check the output size will fit everything. */

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

147

if (outputBufSz < (this->m_mfccBuf.size(0) * 3 * sizeof(T))) {

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

148

printf_err("Tensor size too small for features\n");

return false;

}

/* Populate. */

T * outputBufMfcc = outputBuf;

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

154

T * outputBufD1 = outputBuf + this->m_numMfccFeats;

155

T * outputBufD2 = outputBufD1 + this->m_numMfccFeats;

156

const uint32_t ptrIncr = this->m_numMfccFeats * 2; /* (3 vectors - 1 vector) */

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

157

158

const float minVal = std::numeric_limits<T>::min();

159

const float maxVal = std::numeric_limits<T>::max();

160

161

/* Need to transpose while copying and concatenating the tensor. */

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

162

for (uint32_t j = 0; j < this->m_numFeatVectors; ++j) {

163

for (uint32_t i = 0; i < this->m_numMfccFeats; ++i) {

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

164

*outputBufMfcc++ = static_cast<T>(Preprocess::GetQuantElem(

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

165

this->m_mfccBuf(i, j), quantScale,

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

166

quantOffset, minVal, maxVal));

167

*outputBufD1++ = static_cast<T>(Preprocess::GetQuantElem(

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

168

this->m_delta1Buf(i, j), quantScale,

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

169

quantOffset, minVal, maxVal));

170

*outputBufD2++ = static_cast<T>(Preprocess::GetQuantElem(

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

171

this->m_delta2Buf(i, j), quantScale,

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

172

quantOffset, minVal, maxVal));

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

173

}

174

outputBufMfcc += ptrIncr;

175

outputBufD1 += ptrIncr;

176

outputBufD2 += ptrIncr;

}

return true;

}

private:

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

183

Wav2LetterMFCC m_mfcc; /* MFCC instance. */

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

184

185

/* Actual buffers to be populated. */

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

186

Array2d<float> m_mfccBuf; /* Contiguous buffer 1D: MFCC */

187

Array2d<float> m_delta1Buf; /* Contiguous buffer 1D: Delta 1 */

188

Array2d<float> m_delta2Buf; /* Contiguous buffer 1D: Delta 2 */

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

189

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

190

uint32_t m_windowLen; /* Window length for MFCC. */

191

uint32_t m_windowStride; /* Window stride len for MFCC. */

192

uint32_t m_numMfccFeats; /* Number of MFCC features per window. */

193

uint32_t m_numFeatVectors; /* Number of m_numMfccFeats. */

194

AudioWindow m_window; /* Sliding window. */

alexander