Blame - source/application/api/use_case/asr/src/Wav2LetterPostprocess.cc - ml/ethos-u/ml-embedded-evaluation-kit

2021-03-26 21:42:19 +0000

[diff] [blame]

1

/*

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

2

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

3

* SPDX-License-Identifier: Apache-2.0

4

*

5

* Licensed under the Apache License, Version 2.0 (the "License");

6

* you may not use this file except in compliance with the License.

7

* You may obtain a copy of the License at

8

*

9

* http://www.apache.org/licenses/LICENSE-2.0

10

*

11

* Unless required by applicable law or agreed to in writing, software

12

* distributed under the License is distributed on an "AS IS" BASIS,

13

* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

* See the License for the specific language governing permissions and

15

* limitations under the License.

16

*/

17

#include "Wav2LetterPostprocess.hpp"

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

18

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

19

#include "Wav2LetterModel.hpp"

alexander

31ae9f0

2022-02-10 16:15:54 +0000

[diff] [blame]

20

#include "log_macros.h"

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

21

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

22

#include <cmath>

23

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

24

namespace arm {

25

namespace app {

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

26

Richard Burton

2022-04-22 16:14:57 +0100

[diff] [blame]

27

AsrPostProcess::AsrPostProcess(TfLiteTensor* outputTensor, AsrClassifier& classifier,

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

28

const std::vector<std::string>& labels, std::vector<ClassificationResult>& results,

29

const uint32_t outputContextLen,

30

const uint32_t blankTokenIdx, const uint32_t reductionAxisIdx

31

):

32

m_classifier(classifier),

33

m_outputTensor(outputTensor),

34

m_labels{labels},

35

m_results(results),

36

m_outputContextLen(outputContextLen),

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

37

m_countIterations(0),

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

38

m_blankTokenIdx(blankTokenIdx),

39

m_reductionAxisIdx(reductionAxisIdx)

40

{

Richard Burton

2022-04-22 16:14:57 +0100

[diff] [blame]

41

this->m_outputInnerLen = AsrPostProcess::GetOutputInnerLen(this->m_outputTensor, this->m_outputContextLen);

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

42

this->m_totalLen = (2 * this->m_outputContextLen + this->m_outputInnerLen);

43

}

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

44

Richard Burton

2022-04-22 16:14:57 +0100

[diff] [blame]

45

bool AsrPostProcess::DoPostProcess()

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

46

{

47

/* Basic checks. */

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

48

if (!this->IsInputValid(this->m_outputTensor, this->m_reductionAxisIdx)) {

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

return false;

}

/* Irrespective of tensor type, we use unsigned "byte" */

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

53

auto* ptrData = tflite::GetTensorData<uint8_t>(this->m_outputTensor);

Richard Burton

2022-04-22 16:14:57 +0100

[diff] [blame]

54

const uint32_t elemSz = AsrPostProcess::GetTensorElementSize(this->m_outputTensor);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

55

56

/* Other sanity checks. */

57

if (0 == elemSz) {

58

printf_err("Tensor type not supported for post processing\n");

59

return false;

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

60

} else if (elemSz * this->m_totalLen > this->m_outputTensor->bytes) {

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

61

printf_err("Insufficient number of tensor bytes\n");

return false;

}

/* Which axis do we need to process? */

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

66

switch (this->m_reductionAxisIdx) {

67

case Wav2LetterModel::ms_outputRowsIdx:

68

this->EraseSectionsRowWise(

69

ptrData, elemSz * this->m_outputTensor->dims->data[Wav2LetterModel::ms_outputColsIdx],

70

this->m_lastIteration);

71

break;

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

72

default:

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

73

printf_err("Unsupported axis index: %" PRIu32 "\n", this->m_reductionAxisIdx);

74

return false;

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

75

}

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

76

this->m_classifier.GetClassificationResults(this->m_outputTensor,

77

this->m_results, this->m_labels, 1);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

78

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

79

return true;

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

80

}

81

Richard Burton

2022-04-22 16:14:57 +0100

[diff] [blame]

82

bool AsrPostProcess::IsInputValid(TfLiteTensor* tensor, const uint32_t axisIdx) const

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

83

{

84

if (nullptr == tensor) {

return false;

}

if (static_cast<int>(axisIdx) >= tensor->dims->size) {

Kshitij Sisodia

f9c19ea

2021-05-07 16:08:14 +0100

[diff] [blame]

89

printf_err("Invalid axis index: %" PRIu32 "; Max: %d\n",

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

90

axisIdx, tensor->dims->size);

return false;

}

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

94

if (static_cast<int>(this->m_totalLen) !=

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

95

tensor->dims->data[axisIdx]) {

Kshitij Sisodia

aa4bcb1

2022-05-06 09:13:03 +0100

[diff] [blame]

96

printf_err("Unexpected tensor dimension for axis %" PRIu32", got %d.\n",

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

97

axisIdx, tensor->dims->data[axisIdx]);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

return false;

}

return true;

}

Richard Burton

2022-04-22 16:14:57 +0100

[diff] [blame]

104

uint32_t AsrPostProcess::GetTensorElementSize(TfLiteTensor* tensor)

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

105

{

106

switch(tensor->type) {

107

case kTfLiteUInt8:

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

case kTfLiteInt8:

return 1;

case kTfLiteInt16:

return 2;

case kTfLiteInt32:

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

case kTfLiteFloat32:

return 4;

default:

printf_err("Unsupported tensor type %s\n",

117

TfLiteTypeGetName(tensor->type));

}

return 0;

}

Richard Burton

2022-04-22 16:14:57 +0100

[diff] [blame]

123

bool AsrPostProcess::EraseSectionsRowWise(

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

124

uint8_t* ptrData,

125

const uint32_t strideSzBytes,

126

const bool lastIteration)

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

127

{

128

/* In this case, the "zero-ing" is quite simple as the region

129

* to be zeroed sits in contiguous memory (row-major). */

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

130

const uint32_t eraseLen = strideSzBytes * this->m_outputContextLen;

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

131

132

/* Erase left context? */

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

133

if (this->m_countIterations > 0) {

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

134

/* Set output of each classification window to the blank token. */

135

std::memset(ptrData, 0, eraseLen);

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

136

for (size_t windowIdx = 0; windowIdx < this->m_outputContextLen; windowIdx++) {

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

137

ptrData[windowIdx*strideSzBytes + this->m_blankTokenIdx] = 1;

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

}

}

/* Erase right context? */

142

if (false == lastIteration) {

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

143

uint8_t* rightCtxPtr = ptrData + (strideSzBytes * (this->m_outputContextLen + this->m_outputInnerLen));

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

144

/* Set output of each classification window to the blank token. */

145

std::memset(rightCtxPtr, 0, eraseLen);

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

146

for (size_t windowIdx = 0; windowIdx < this->m_outputContextLen; windowIdx++) {

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

147

rightCtxPtr[windowIdx*strideSzBytes + this->m_blankTokenIdx] = 1;

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

}

}

if (lastIteration) {

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

152

this->m_countIterations = 0;

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

153

} else {

Isabella Gottardi

2021-05-12 08:27:15 +0100

[diff] [blame]

154

++this->m_countIterations;

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

}

return true;

}

Richard Burton

2022-04-22 16:14:57 +0100

[diff] [blame]

160

uint32_t AsrPostProcess::GetNumFeatureVectors(const Model& model)

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

161

{

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

162

TfLiteTensor* inputTensor = model.GetInputTensor(0);

163

const int inputRows = std::max(inputTensor->dims->data[Wav2LetterModel::ms_inputRowsIdx], 0);

164

if (inputRows == 0) {

165

printf_err("Error getting number of input rows for axis: %" PRIu32 "\n",

166

Wav2LetterModel::ms_inputRowsIdx);

167

}

168

return inputRows;

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

169

}

170

Richard Burton

2022-04-22 16:14:57 +0100

[diff] [blame]

171

uint32_t AsrPostProcess::GetOutputInnerLen(const TfLiteTensor* outputTensor, const uint32_t outputCtxLen)

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

172

{

173

const uint32_t outputRows = std::max(outputTensor->dims->data[Wav2LetterModel::ms_outputRowsIdx], 0);

174

if (outputRows == 0) {

175

printf_err("Error getting number of output rows for axis: %" PRIu32 "\n",

176

Wav2LetterModel::ms_outputRowsIdx);

177

}

Richard Burton

2022-04-22 16:14:57 +0100

[diff] [blame]

178

179

/* Watching for underflow. */

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

180

int innerLen = (outputRows - (2 * outputCtxLen));

181

182

return std::max(innerLen, 0);

183

}

184

Richard Burton

2022-04-22 16:14:57 +0100

[diff] [blame]

185

uint32_t AsrPostProcess::GetOutputContextLen(const Model& model, const uint32_t inputCtxLen)

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

186

{

Richard Burton

2022-04-22 16:14:57 +0100

[diff] [blame]

187

const uint32_t inputRows = AsrPostProcess::GetNumFeatureVectors(model);

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

188

const uint32_t inputInnerLen = inputRows - (2 * inputCtxLen);

189

constexpr uint32_t ms_outputRowsIdx = Wav2LetterModel::ms_outputRowsIdx;

190

191

/* Check to make sure that the input tensor supports the above

192

* context and inner lengths. */

193

if (inputRows <= 2 * inputCtxLen || inputRows <= inputInnerLen) {

194

printf_err("Input rows not compatible with ctx of %" PRIu32 "\n",

inputCtxLen);

return 0;

}

TfLiteTensor* outputTensor = model.GetOutputTensor(0);

200

const uint32_t outputRows = std::max(outputTensor->dims->data[ms_outputRowsIdx], 0);

201

if (outputRows == 0) {

202

printf_err("Error getting number of output rows for axis: %" PRIu32 "\n",

203

Wav2LetterModel::ms_outputRowsIdx);

return 0;

}

const float inOutRowRatio = static_cast<float>(inputRows) /

208

static_cast<float>(outputRows);

209

210

return std::round(static_cast<float>(inputCtxLen) / inOutRowRatio);

211

}

212

alexander