Blame - source/use_case/asr/include/Wav2LetterPreprocess.hpp - ml/ethos-u/ml-embedded-evaluation-kit

2021-03-26 21:42:19 +0000

[diff] [blame]

1

/*

2

3

* SPDX-License-Identifier: Apache-2.0

4

*

5

* Licensed under the Apache License, Version 2.0 (the "License");

6

* you may not use this file except in compliance with the License.

7

* You may obtain a copy of the License at

8

*

9

* http://www.apache.org/licenses/LICENSE-2.0

10

*

11

* Unless required by applicable law or agreed to in writing, software

12

* distributed under the License is distributed on an "AS IS" BASIS,

13

* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

* See the License for the specific language governing permissions and

15

* limitations under the License.

16

*/

17

#ifndef ASR_WAV2LETTER_PREPROCESS_HPP

18

#define ASR_WAV2LETTER_PREPROCESS_HPP

19

20

#include "Wav2LetterModel.hpp"

21

#include "Wav2LetterMfcc.hpp"

22

#include "AudioUtils.hpp"

23

#include "DataStructures.hpp"

alexander

31ae9f0

2022-02-10 16:15:54 +0000

[diff] [blame^]

24

#include "log_macros.h"

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

namespace arm {

namespace app {

namespace audio {

namespace asr {

/* Class to facilitate pre-processing calculation for Wav2Letter model

32

* for ASR. */

33

using AudioWindow = SlidingWindow <const int16_t>;

class Preprocess {

public:

/**

* @brief Constructor.

39

* @param[in] numMfccFeatures Number of MFCC features per window.

40

* @param[in] windowLen Number of elements in a window.

41

* @param[in] windowStride Stride (in number of elements) for

42

* moving the window.

43

* @param[in] numMfccVectors Number of MFCC vectors per window.

44

*/

45

Preprocess(

46

uint32_t numMfccFeatures,

47

uint32_t windowLen,

48

uint32_t windowStride,

49

uint32_t numMfccVectors);

50

Preprocess() = delete;

51

~Preprocess() = default;

52

53

/**

54

* @brief Calculates the features required from audio data. This

55

* includes MFCC, first and second order deltas,

56

* normalisation and finally, quantisation. The tensor is

57

* populated with feature from a given window placed along

58

* in a single row.

59

* @param[in] audioData Pointer to the first element of audio data.

60

* @param[in] audioDataLen Number of elements in the audio data.

61

* @param[in] tensor Tensor to be populated.

62

* @return true if successful, false in case of error.

63

*/

64

bool Invoke(const int16_t * audioData,

65

uint32_t audioDataLen,

66

TfLiteTensor * tensor);

protected:

/**

* @brief Computes the first and second order deltas for the

71

* MFCC buffers - they are assumed to be populated.

72

*

73

* @param[in] mfcc MFCC buffers.

74

* @param[out] delta1 Result of the first diff computation.

75

* @param[out] delta2 Result of the second diff computation.

76

* @return true if successful, false otherwise.

77

*/

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

78

static bool ComputeDeltas(Array2d<float>& mfcc,

79

Array2d<float>& delta1,

80

Array2d<float>& delta2);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

81

82

/**

83

* @brief Given a 2D vector of floats, computes the mean.

84

* @param[in] vec Vctor of vector of floats.

85

* @return Mean value.

86

*/

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

87

static float GetMean(Array2d<float>& vec);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

88

89

/**

90

* @brief Given a 2D vector of floats, computes the stddev.

91

* @param[in] vec Vector of vector of floats.

92

* @param[in] mean Mean value of the vector passed in.

93

* @return stddev value.

94

*/

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

95

static float GetStdDev(Array2d<float>& vec,

96

const float mean);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

97

98

/**

99

* @brief Given a 2D vector of floats, normalises it using

100

* the mean and the stddev.

101

* @param[in,out] vec Vector of vector of floats.

102

*/

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

103

static void NormaliseVec(Array2d<float>& vec);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

104

105

/**

106

* @brief Normalises the MFCC and delta buffers.

107

*/

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

108

void Normalise();

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

109

110

/**

111

* @brief Given the quantisation and data type limits, computes

112

* the quantised values of a floating point input data.

113

* @param[in] elem Element to be quantised.

114

* @param[in] quantScale Scale.

115

* @param[in] quantOffset Offset.

116

* @param[in] minVal Numerical limit - minimum.

117

* @param[in] maxVal Numerical limit - maximum.

118

* @return Floating point quantised value.

119

*/

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

120

static float GetQuantElem(

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

float elem,

float quantScale,

int quantOffset,

float minVal,

float maxVal);

/**

* @brief Quantises the MFCC and delta buffers, and places them

129

* in the output buffer. While doing so, it transposes

130

* the data. Reason: Buffers in this class are arranged

131

* for "time" axis to be row major. Primary reason for

132

* this being the convolution speed up (as we can use

133

* contiguous memory). The output, however, requires the

134

* time axis to be in column major arrangement.

135

* @param[in] outputBuf Pointer to the output buffer.

136

* @param[in] outputBufSz Output buffer's size.

137

* @param[in] quantScale Quantisation scale.

138

* @param[in] quantOffset Quantisation offset.

139

*/

140

template <typename T>

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

141

bool Quantise(

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

142

T * outputBuf,

143

const uint32_t outputBufSz,

144

const float quantScale,

145

const int quantOffset)

146

{

147

/* Check the output size will fit everything. */

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

148

if (outputBufSz < (this->m_mfccBuf.size(0) * 3 * sizeof(T))) {

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

149

printf_err("Tensor size too small for features\n");

return false;

}

/* Populate. */

T * outputBufMfcc = outputBuf;

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

155

T * outputBufD1 = outputBuf + this->m_numMfccFeats;

156

T * outputBufD2 = outputBufD1 + this->m_numMfccFeats;

157

const uint32_t ptrIncr = this->m_numMfccFeats * 2; /* (3 vectors - 1 vector) */

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

158

159

const float minVal = std::numeric_limits<T>::min();

160

const float maxVal = std::numeric_limits<T>::max();

161

162

/* Need to transpose while copying and concatenating the tensor. */

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

163

for (uint32_t j = 0; j < this->m_numFeatVectors; ++j) {

164

for (uint32_t i = 0; i < this->m_numMfccFeats; ++i) {

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

165

*outputBufMfcc++ = static_cast<T>(Preprocess::GetQuantElem(

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

166

this->m_mfccBuf(i, j), quantScale,

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

167

quantOffset, minVal, maxVal));

168

*outputBufD1++ = static_cast<T>(Preprocess::GetQuantElem(

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

169

this->m_delta1Buf(i, j), quantScale,

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

170

quantOffset, minVal, maxVal));

171

*outputBufD2++ = static_cast<T>(Preprocess::GetQuantElem(

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

172

this->m_delta2Buf(i, j), quantScale,

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

173

quantOffset, minVal, maxVal));

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

174

}

175

outputBufMfcc += ptrIncr;

176

outputBufD1 += ptrIncr;

177

outputBufD2 += ptrIncr;

}

return true;

}

private:

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

184

Wav2LetterMFCC m_mfcc; /* MFCC instance. */

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

185

186

/* Actual buffers to be populated. */

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

187

Array2d<float> m_mfccBuf; /* Contiguous buffer 1D: MFCC */

188

Array2d<float> m_delta1Buf; /* Contiguous buffer 1D: Delta 1 */

189

Array2d<float> m_delta2Buf; /* Contiguous buffer 1D: Delta 2 */

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

190

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

191

uint32_t m_windowLen; /* Window length for MFCC. */

192

uint32_t m_windowStride; /* Window stride len for MFCC. */

193

uint32_t m_numMfccFeats; /* Number of MFCC features per window. */

194

uint32_t m_numFeatVectors; /* Number of m_numMfccFeats. */

195

AudioWindow m_window; /* Sliding window. */

alexander