Blame - source/use_case/kws_asr/include/Wav2LetterPreprocess.hpp - ml/ethos-u/ml-embedded-evaluation-kit

2021-03-26 21:42:19 +0000

[diff] [blame]

1

/*

2

3

* SPDX-License-Identifier: Apache-2.0

4

*

5

* Licensed under the Apache License, Version 2.0 (the "License");

6

* you may not use this file except in compliance with the License.

7

* You may obtain a copy of the License at

8

*

9

* http://www.apache.org/licenses/LICENSE-2.0

10

*

11

* Unless required by applicable law or agreed to in writing, software

12

* distributed under the License is distributed on an "AS IS" BASIS,

13

* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

* See the License for the specific language governing permissions and

15

* limitations under the License.

16

*/

17

#ifndef KWS_ASR_WAV2LET_PREPROC_HPP

18

#define KWS_ASR_WAV2LET_PREPROC_HPP

19

20

#include "Wav2LetterModel.hpp"

21

#include "Wav2LetterMfcc.hpp"

22

#include "AudioUtils.hpp"

23

#include "DataStructures.hpp"

alexander

31ae9f0

2022-02-10 16:15:54 +0000

[diff] [blame^]

24

#include "log_macros.h"

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

namespace arm {

namespace app {

namespace audio {

namespace asr {

/* Class to facilitate pre-processing calculation for Wav2Letter model

32

* for ASR. */

33

using AudioWindow = SlidingWindow <const int16_t>;

class Preprocess {

public:

/**

* @brief Constructor

* @param[in] numMfccFeatures Number of MFCC features per window.

40

* @param[in] windowLen Number of elements in a window.

41

* @param[in] windowStride Stride (in number of elements) for

42

* moving the window.

43

* @param[in] numMfccVectors Number of MFCC vectors per window.

44

*/

45

Preprocess(

46

uint32_t numMfccFeatures,

47

uint32_t windowLen,

48

uint32_t windowStride,

49

uint32_t numMfccVectors);

50

Preprocess() = delete;

51

~Preprocess() = default;

52

53

/**

54

* @brief Calculates the features required from audio data. This

55

* includes MFCC, first and second order deltas,

56

* normalisation and finally, quantisation. The tensor is

57

* populated with feature from a given window placed along

58

* in a single row.

59

* @param[in] audioData Pointer to the first element of audio data.

60

* @param[in] audioDataLen Number of elements in the audio data.

61

* @param[in] tensor Tensor to be populated.

62

* @return true if successful, false in case of error.

63

*/

64

bool Invoke(const int16_t * audioData,

65

uint32_t audioDataLen,

66

TfLiteTensor * tensor);

protected:

/**

* @brief Computes the first and second order deltas for the

71

* MFCC buffers - they are assumed to be populated.

72

*

73

* @param[in] mfcc MFCC buffers.

74

* @param[out] delta1 Result of the first diff computation.

75

* @param[out] delta2 Result of the second diff computation.

76

*

77

* @return true if successful, false otherwise.

78

*/

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

79

static bool ComputeDeltas(Array2d<float>& mfcc,

80

Array2d<float>& delta1,

81

Array2d<float>& delta2);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

82

83

/**

84

* @brief Given a 2D vector of floats, computes the mean.

85

* @param[in] vec Vector of vector of floats.

86

* @return Mean value.

87

*/

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

88

static float GetMean(Array2d<float>& vec);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

89

90

/**

91

* @brief Given a 2D vector of floats, computes the stddev.

92

* @param[in] vec Vector of vector of floats.

93

* @param[in] mean Mean value of the vector passed in.

94

* @return stddev value.

95

*/

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

96

static float GetStdDev(Array2d<float>& vec,

97

const float mean);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

98

99

/**

100

* @brief Given a 2D vector of floats, normalises it using

101

* the mean and the stddev

102

* @param[in,out] vec Vector of vector of floats.

103

*/

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

104

static void NormaliseVec(Array2d<float>& vec);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

105

106

/**

107

* @brief Normalises the MFCC and delta buffers.

108

*/

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

109

void Normalise();

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

110

111

/**

112

* @brief Given the quantisation and data type limits, computes

113

* the quantised values of a floating point input data.

114

* @param[in] elem Element to be quantised.

115

* @param[in] quantScale Scale.

116

* @param[in] quantOffset Offset.

117

* @param[in] minVal Numerical limit - minimum.

118

* @param[in] maxVal Numerical limit - maximum.

119

* @return Floating point quantised value.

120

*/

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

121

static float GetQuantElem(

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

float elem,

float quantScale,

int quantOffset,

float minVal,

float maxVal);

/**

* @brief Quantises the MFCC and delta buffers, and places them

130

* in the output buffer. While doing so, it transposes

131

* the data. Reason: Buffers in this class are arranged

132

* for "time" axis to be row major. Primary reason for

133

* this being the convolution speed up (as we can use

134

* contiguous memory). The output, however, requires the

135

* time axis to be in column major arrangement.

136

* @param[in] outputBuf Pointer to the output buffer.

137

* @param[in] outputBufSz Output buffer's size.

138

* @param[in] quantScale Quantisation scale.

139

* @param[in] quantOffset Quantisation offset.

140

*/

141

template <typename T>

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

142

bool Quantise(

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

143

T * outputBuf,

144

const uint32_t outputBufSz,

145

const float quantScale,

146

const int quantOffset)

147

{

148

/* Check the output size will for everything. */

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

149

if (outputBufSz < (this->m_mfccBuf.size(0) * 3 * sizeof(T))) {

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

150

printf_err("Tensor size too small for features\n");

return false;

}

/* Populate. */

T * outputBufMfcc = outputBuf;

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

156

T * outputBufD1 = outputBuf + this->m_numMfccFeats;

157

T * outputBufD2 = outputBufD1 + this->m_numMfccFeats;

158

const uint32_t ptrIncr = this->m_numMfccFeats * 2; /* (3 vectors - 1 vector) */

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

159

160

const float minVal = std::numeric_limits<T>::min();

161

const float maxVal = std::numeric_limits<T>::max();

162

163

/* We need to do a transpose while copying and concatenating

164

* the tensor. */

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

165

for (uint32_t j = 0; j < this->m_numFeatVectors; ++j) {

166

for (uint32_t i = 0; i < this->m_numMfccFeats; ++i) {

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

167

*outputBufMfcc++ = static_cast<T>(this->GetQuantElem(

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

168

this->m_mfccBuf(i, j), quantScale,

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

169

quantOffset, minVal, maxVal));

170

*outputBufD1++ = static_cast<T>(this->GetQuantElem(

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

171

this->m_delta1Buf(i, j), quantScale,

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

172

quantOffset, minVal, maxVal));

173

*outputBufD2++ = static_cast<T>(this->GetQuantElem(

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

174

this->m_delta2Buf(i, j), quantScale,

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

175

quantOffset, minVal, maxVal));

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

176

}

177

outputBufMfcc += ptrIncr;

178

outputBufD1 += ptrIncr;

179

outputBufD2 += ptrIncr;

}

return true;

}

private:

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

186

Wav2LetterMFCC m_mfcc; /* MFCC instance. */

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

187

188

/* Actual buffers to be populated. */

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

189

Array2d<float> m_mfccBuf; /* Contiguous buffer 1D: MFCC */

190

Array2d<float> m_delta1Buf; /* Contiguous buffer 1D: Delta 1 */

191

Array2d<float> m_delta2Buf; /* Contiguous buffer 1D: Delta 2 */

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

192

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

193

uint32_t m_windowLen; /* Window length for MFCC. */

194

uint32_t m_windowStride; /* Window stride len for MFCC. */

195

uint32_t m_numMfccFeats; /* Number of MFCC features per window. */

196

uint32_t m_numFeatVectors; /* Number of m_numMfccFeats. */

197

AudioWindow m_window; /* Sliding window. */

alexander