Blame - source/application/api/use_case/asr/include/Wav2LetterPreprocess.hpp - ml/ethos-u/ml-embedded-evaluation-kit

2021-03-26 21:42:19 +0000

[diff] [blame]

1

/*

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

2

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

3

* SPDX-License-Identifier: Apache-2.0

4

*

5

* Licensed under the Apache License, Version 2.0 (the "License");

6

* you may not use this file except in compliance with the License.

7

* You may obtain a copy of the License at

8

*

9

* http://www.apache.org/licenses/LICENSE-2.0

10

*

11

* Unless required by applicable law or agreed to in writing, software

12

* distributed under the License is distributed on an "AS IS" BASIS,

13

* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

* See the License for the specific language governing permissions and

15

* limitations under the License.

16

*/

17

#ifndef ASR_WAV2LETTER_PREPROCESS_HPP

18

#define ASR_WAV2LETTER_PREPROCESS_HPP

19

Kshitij Sisodia

aa4bcb1

2022-05-06 09:13:03 +0100

[diff] [blame]

20

#include "TensorFlowLiteMicro.hpp"

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

21

#include "Wav2LetterMfcc.hpp"

22

#include "AudioUtils.hpp"

23

#include "DataStructures.hpp"

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

24

#include "BaseProcessing.hpp"

alexander

31ae9f0

2022-02-10 16:15:54 +0000

[diff] [blame]

25

#include "log_macros.h"

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

26

27

namespace arm {

28

namespace app {

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

29

30

/* Class to facilitate pre-processing calculation for Wav2Letter model

31

* for ASR. */

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

32

using AudioWindow = audio::SlidingWindow<const int16_t>;

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

33

Richard Burton

2022-04-22 16:14:57 +0100

[diff] [blame]

34

class AsrPreProcess : public BasePreProcess {

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

35

public:

36

/**

37

* @brief Constructor.

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

38

* @param[in] inputTensor Pointer to the TFLite Micro input Tensor.

39

* @param[in] numMfccFeatures Number of MFCC features per window.

Richard Burton

2022-04-22 16:14:57 +0100

[diff] [blame]

40

* @param[in] numFeatureFrames Number of MFCC vectors that need to be calculated

41

* for an inference.

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

42

* @param[in] mfccWindowLen Number of audio elements to calculate MFCC features per window.

43

* @param[in] mfccWindowStride Stride (in number of elements) for moving the MFCC window.

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

44

*/

Richard Burton

2022-04-22 16:14:57 +0100

[diff] [blame]

45

AsrPreProcess(TfLiteTensor* inputTensor,

46

uint32_t numMfccFeatures,

47

uint32_t numFeatureFrames,

48

uint32_t mfccWindowLen,

49

uint32_t mfccWindowStride);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

50

51

/**

52

* @brief Calculates the features required from audio data. This

53

* includes MFCC, first and second order deltas,

54

* normalisation and finally, quantisation. The tensor is

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

55

* populated with features from a given window placed along

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

56

* in a single row.

57

* @param[in] audioData Pointer to the first element of audio data.

58

* @param[in] audioDataLen Number of elements in the audio data.

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

59

* @return true if successful, false in case of error.

60

*/

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

61

bool DoPreProcess(const void* audioData, size_t audioDataLen) override;

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

protected:

/**

* @brief Computes the first and second order deltas for the

66

* MFCC buffers - they are assumed to be populated.

67

*

68

* @param[in] mfcc MFCC buffers.

69

* @param[out] delta1 Result of the first diff computation.

70

* @param[out] delta2 Result of the second diff computation.

71

* @return true if successful, false otherwise.

72

*/

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

73

static bool ComputeDeltas(Array2d<float>& mfcc,

74

Array2d<float>& delta1,

75

Array2d<float>& delta2);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

76

77

/**

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

78

* @brief Given a 2D vector of floats, rescale it to have mean of 0 and

79

* standard deviation of 1.

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

80

* @param[in,out] vec Vector of vector of floats.

81

*/

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

82

static void StandardizeVecF32(Array2d<float>& vec);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

83

84

/**

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

85

* @brief Standardizes all the MFCC and delta buffers to have mean 0 and std. dev 1.

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

86

*/

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

87

void Standarize();

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

88

89

/**

90

* @brief Given the quantisation and data type limits, computes

91

* the quantised values of a floating point input data.

92

* @param[in] elem Element to be quantised.

93

* @param[in] quantScale Scale.

94

* @param[in] quantOffset Offset.

95

* @param[in] minVal Numerical limit - minimum.

96

* @param[in] maxVal Numerical limit - maximum.

97

* @return Floating point quantised value.

98

*/

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

99

static float GetQuantElem(

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

float elem,

float quantScale,

int quantOffset,

float minVal,

float maxVal);

/**

* @brief Quantises the MFCC and delta buffers, and places them

108

* in the output buffer. While doing so, it transposes

109

* the data. Reason: Buffers in this class are arranged

110

* for "time" axis to be row major. Primary reason for

111

* this being the convolution speed up (as we can use

112

* contiguous memory). The output, however, requires the

113

* time axis to be in column major arrangement.

114

* @param[in] outputBuf Pointer to the output buffer.

115

* @param[in] outputBufSz Output buffer's size.

116

* @param[in] quantScale Quantisation scale.

117

* @param[in] quantOffset Quantisation offset.

118

*/

119

template <typename T>

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

120

bool Quantise(

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

121

T* outputBuf,

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

122

const uint32_t outputBufSz,

123

const float quantScale,

124

const int quantOffset)

125

{

126

/* Check the output size will fit everything. */

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

127

if (outputBufSz < (this->m_mfccBuf.size(0) * 3 * sizeof(T))) {

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

128

printf_err("Tensor size too small for features\n");

return false;

}

/* Populate. */

Richard Burton

2022-04-22 16:14:57 +0100

[diff] [blame]

133

T* outputBufMfcc = outputBuf;

134

T* outputBufD1 = outputBuf + this->m_numMfccFeats;

135

T* outputBufD2 = outputBufD1 + this->m_numMfccFeats;

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

136

const uint32_t ptrIncr = this->m_numMfccFeats * 2; /* (3 vectors - 1 vector) */

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

137

138

const float minVal = std::numeric_limits<T>::min();

139

const float maxVal = std::numeric_limits<T>::max();

140

141

/* Need to transpose while copying and concatenating the tensor. */

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

142

for (uint32_t j = 0; j < this->m_numFeatureFrames; ++j) {

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

143

for (uint32_t i = 0; i < this->m_numMfccFeats; ++i) {

Richard Burton

2022-04-22 16:14:57 +0100

[diff] [blame]

144

*outputBufMfcc++ = static_cast<T>(AsrPreProcess::GetQuantElem(

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

145

this->m_mfccBuf(i, j), quantScale,

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

146

quantOffset, minVal, maxVal));

Richard Burton

2022-04-22 16:14:57 +0100

[diff] [blame]

147

*outputBufD1++ = static_cast<T>(AsrPreProcess::GetQuantElem(

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

148

this->m_delta1Buf(i, j), quantScale,

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

149

quantOffset, minVal, maxVal));

Richard Burton

2022-04-22 16:14:57 +0100

[diff] [blame]

150

*outputBufD2++ = static_cast<T>(AsrPreProcess::GetQuantElem(

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

151

this->m_delta2Buf(i, j), quantScale,

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

152

quantOffset, minVal, maxVal));

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

153

}

154

outputBufMfcc += ptrIncr;

155

outputBufD1 += ptrIncr;

156

outputBufD2 += ptrIncr;

}

return true;

}

private:

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

163

audio::Wav2LetterMFCC m_mfcc; /* MFCC instance. */

164

TfLiteTensor* m_inputTensor; /* Model input tensor. */

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

165

166

/* Actual buffers to be populated. */

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

167

Array2d<float> m_mfccBuf; /* Contiguous buffer 1D: MFCC */

168

Array2d<float> m_delta1Buf; /* Contiguous buffer 1D: Delta 1 */

169

Array2d<float> m_delta2Buf; /* Contiguous buffer 1D: Delta 2 */

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

170

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

171

uint32_t m_mfccWindowLen; /* Window length for MFCC. */

172

uint32_t m_mfccWindowStride; /* Window stride len for MFCC. */

173

uint32_t m_numMfccFeats; /* Number of MFCC features per window. */

174

uint32_t m_numFeatureFrames; /* How many sets of m_numMfccFeats. */

175

AudioWindow m_mfccSlidingWindow; /* Sliding window to calculate MFCCs. */

alexander