Blame - source/use_case/asr/src/UseCaseHandler.cc - ml/ethos-u/ml-embedded-evaluation-kit

2021-03-26 21:42:19 +0000

[diff] [blame]

1

/*

Richard Burton

f32a86a

2022-11-15 11:46:11 +0000

[diff] [blame^]

2

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

3

* SPDX-License-Identifier: Apache-2.0

4

*

5

* Licensed under the Apache License, Version 2.0 (the "License");

6

* you may not use this file except in compliance with the License.

7

* You may obtain a copy of the License at

8

*

9

* http://www.apache.org/licenses/LICENSE-2.0

10

*

11

* Unless required by applicable law or agreed to in writing, software

12

* distributed under the License is distributed on an "AS IS" BASIS,

13

* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

* See the License for the specific language governing permissions and

15

* limitations under the License.

16

*/

17

#include "UseCaseHandler.hpp"

18

19

#include "InputFiles.hpp"

20

#include "AsrClassifier.hpp"

21

#include "Wav2LetterModel.hpp"

22

#include "hal.h"

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

23

#include "AudioUtils.hpp"

Richard Burton

ed35a6f

2022-02-14 11:55:35 +0000

[diff] [blame]

24

#include "ImageUtils.hpp"

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

25

#include "UseCaseCommonUtils.hpp"

26

#include "AsrResult.hpp"

27

#include "Wav2LetterPreprocess.hpp"

28

#include "Wav2LetterPostprocess.hpp"

29

#include "OutputDecode.hpp"

alexander

31ae9f0

2022-02-10 16:15:54 +0000

[diff] [blame]

30

#include "log_macros.h"

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

namespace arm {

namespace app {

/**

Richard Burton

2022-04-22 16:14:57 +0100

[diff] [blame]

36

* @brief Presents ASR inference results.

37

* @param[in] results Vector of ASR classification results to be displayed.

38

* @return true if successful, false otherwise.

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

39

**/

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

40

static bool PresentInferenceResult(const std::vector<asr::AsrResult>& results);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

41

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

42

/* ASR inference handler. */

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

43

bool ClassifyAudioHandler(ApplicationContext& ctx, uint32_t clipIndex, bool runAll)

44

{

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

45

auto& model = ctx.Get<Model&>("model");

Isabella Gottardi

2021-04-07 17:15:31 +0100

[diff] [blame]

46

auto& profiler = ctx.Get<Profiler&>("profiler");

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

47

auto mfccFrameLen = ctx.Get<uint32_t>("frameLength");

48

auto mfccFrameStride = ctx.Get<uint32_t>("frameStride");

49

auto scoreThreshold = ctx.Get<float>("scoreThreshold");

50

auto inputCtxLen = ctx.Get<uint32_t>("ctxLen");

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

51

/* If the request has a valid size, set the audio index. */

52

if (clipIndex < NUMBER_OF_FILES) {

Éanna Ó Catháin

8f95887

2021-09-15 09:32:30 +0100

[diff] [blame]

53

if (!SetAppCtxIfmIdx(ctx, clipIndex,"clipIndex")) {

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

54

return false;

55

}

56

}

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

57

auto initialClipIdx = ctx.Get<uint32_t>("clipIndex");

58

constexpr uint32_t dataPsnTxtInfStartX = 20;

59

constexpr uint32_t dataPsnTxtInfStartY = 40;

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

60

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

61

if (!model.IsInited()) {

62

printf_err("Model is not initialised! Terminating processing.\n");

return false;

}

Richard Burton

2022-04-22 16:14:57 +0100

[diff] [blame]

66

TfLiteTensor* inputTensor = model.GetInputTensor(0);

67

TfLiteTensor* outputTensor = model.GetOutputTensor(0);

68

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

69

/* Get input shape. Dimensions of the tensor should have been verified by

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

70

* the callee. */

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

71

TfLiteIntArray* inputShape = model.GetInputShape(0);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

72

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

73

const uint32_t inputRowsSize = inputShape->data[Wav2LetterModel::ms_inputRowsIdx];

74

const uint32_t inputInnerLen = inputRowsSize - (2 * inputCtxLen);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

75

76

/* Audio data stride corresponds to inputInnerLen feature vectors. */

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

77

const uint32_t audioDataWindowLen = (inputRowsSize - 1) * mfccFrameStride + (mfccFrameLen);

78

const uint32_t audioDataWindowStride = inputInnerLen * mfccFrameStride;

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

79

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

80

/* NOTE: This is only used for time stamp calculation. */

81

const float secondsPerSample = (1.0 / audio::Wav2LetterMFCC::ms_defaultSamplingFreq);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

82

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

83

/* Set up pre and post-processing objects. */

Richard Burton

2022-04-22 16:14:57 +0100

[diff] [blame]

84

AsrPreProcess preProcess = AsrPreProcess(inputTensor, Wav2LetterModel::ms_numMfccFeatures,

85

inputShape->data[Wav2LetterModel::ms_inputRowsIdx],

86

mfccFrameLen, mfccFrameStride);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

87

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

88

std::vector<ClassificationResult> singleInfResult;

Richard Burton

2022-04-22 16:14:57 +0100

[diff] [blame]

89

const uint32_t outputCtxLen = AsrPostProcess::GetOutputContextLen(model, inputCtxLen);

90

AsrPostProcess postProcess = AsrPostProcess(

91

outputTensor, ctx.Get<AsrClassifier&>("classifier"),

92

ctx.Get<std::vector<std::string>&>("labels"),

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

93

singleInfResult, outputCtxLen,

94

Wav2LetterModel::ms_blankTokenIdx, Wav2LetterModel::ms_outputRowsIdx

95

);

96

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

97

/* Loop to process audio clips. */

98

do {

Kshitij Sisodia

68fdd11

2022-04-06 13:03:20 +0100

[diff] [blame]

99

hal_lcd_clear(COLOR_BLACK);

Richard Burton

9b8d67a

2021-12-10 12:32:51 +0000

[diff] [blame]

100

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

101

/* Get current audio clip index. */

102

auto currentIndex = ctx.Get<uint32_t>("clipIndex");

103

104

/* Get the current audio buffer and respective size. */

105

const int16_t* audioArr = get_audio_array(currentIndex);

106

const uint32_t audioArrSize = get_audio_array_size(currentIndex);

107

108

if (!audioArr) {

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

109

printf_err("Invalid audio array pointer.\n");

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

return false;

}

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

113

/* Audio clip needs enough samples to produce at least 1 MFCC feature. */

114

if (audioArrSize < mfccFrameLen) {

Kshitij Sisodia

f9c19ea

2021-05-07 16:08:14 +0100

[diff] [blame]

115

printf_err("Not enough audio samples, minimum needed is %" PRIu32 "\n",

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

116

mfccFrameLen);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

return false;

}

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

120

/* Creating a sliding window through the whole audio clip. */

alexander

80eecfb

2021-07-06 19:47:59 +0100

[diff] [blame]

121

auto audioDataSlider = audio::FractionalSlidingWindow<const int16_t>(

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

122

audioArr, audioArrSize,

123

audioDataWindowLen, audioDataWindowStride);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

124

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

125

/* Declare a container for final results. */

126

std::vector<asr::AsrResult> finalResults;

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

127

128

/* Display message on the LCD - inference running. */

129

std::string str_inf{"Running inference... "};

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

130

hal_lcd_display_text(str_inf.c_str(), str_inf.size(),

131

dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

132

Kshitij Sisodia

f9c19ea

2021-05-07 16:08:14 +0100

[diff] [blame]

133

info("Running inference on audio clip %" PRIu32 " => %s\n", currentIndex,

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

134

get_filename(currentIndex));

135

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

136

size_t inferenceWindowLen = audioDataWindowLen;

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

137

138

/* Start sliding through audio clip. */

139

while (audioDataSlider.HasNext()) {

140

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

141

/* If not enough audio, see how much can be sent for processing. */

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

142

size_t nextStartIndex = audioDataSlider.NextWindowStartIndex();

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

143

if (nextStartIndex + audioDataWindowLen > audioArrSize) {

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

144

inferenceWindowLen = audioArrSize - nextStartIndex;

145

}

146

147

const int16_t* inferenceWindow = audioDataSlider.Next();

148

149

info("Inference %zu/%zu\n", audioDataSlider.Index() + 1,

150

static_cast<size_t>(ceilf(audioDataSlider.FractionalTotalStrides() + 1)));

151

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

152

/* Run the pre-processing, inference and post-processing. */

Richard Burton

2022-04-22 16:14:57 +0100

[diff] [blame]

153

if (!preProcess.DoPreProcess(inferenceWindow, inferenceWindowLen)) {

154

printf_err("Pre-processing failed.");

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

155

return false;

156

}

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

157

Richard Burton

2022-04-22 16:14:57 +0100

[diff] [blame]

158

if (!RunInference(model, profiler)) {

159

printf_err("Inference failed.");

return false;

}

/* Post processing needs to know if we are on the last audio window. */

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

164

postProcess.m_lastIteration = !audioDataSlider.HasNext();

Richard Burton

2022-04-22 16:14:57 +0100

[diff] [blame]

165

if (!postProcess.DoPostProcess()) {

166

printf_err("Post-processing failed.");

alexander

27b62d9

2021-05-04 20:46:08 +0100

[diff] [blame]

167

return false;

168

}

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

169

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

170

/* Add results from this window to our final results vector. */

171

finalResults.emplace_back(asr::AsrResult(singleInfResult,

172

(audioDataSlider.Index() * secondsPerSample * audioDataWindowStride),

173

audioDataSlider.Index(), scoreThreshold));

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

174

175

#if VERIFY_TEST_OUTPUT

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

176

armDumpTensor(outputTensor,

177

outputTensor->dims->data[Wav2LetterModel::ms_outputColsIdx]);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

178

#endif /* VERIFY_TEST_OUTPUT */

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

179

} /* while (audioDataSlider.HasNext()) */

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

180

181

/* Erase. */

182

str_inf = std::string(str_inf.size(), ' ');

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

183

hal_lcd_display_text(str_inf.c_str(), str_inf.size(),

184

dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

185

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

186

ctx.Set<std::vector<asr::AsrResult>>("results", finalResults);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

187

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

188

if (!PresentInferenceResult(finalResults)) {

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

return false;

}

Isabella Gottardi

2021-04-07 17:15:31 +0100

[diff] [blame]

192

profiler.PrintProfilingResult();

193

Éanna Ó Catháin

8f95887

2021-09-15 09:32:30 +0100

[diff] [blame]

194

IncrementAppCtxIfmIdx(ctx,"clipIndex");

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

195

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

196

} while (runAll && ctx.Get<uint32_t>("clipIndex") != initialClipIdx);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

return true;

}

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

201

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

202

static bool PresentInferenceResult(const std::vector<asr::AsrResult>& results)

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

203

{

204

constexpr uint32_t dataPsnTxtStartX1 = 20;

205

constexpr uint32_t dataPsnTxtStartY1 = 60;

206

constexpr bool allow_multiple_lines = true;

207

Kshitij Sisodia

68fdd11

2022-04-06 13:03:20 +0100

[diff] [blame]

208

hal_lcd_set_text_color(COLOR_GREEN);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

209

Isabella Gottardi

2021-04-07 17:15:31 +0100

[diff] [blame]

210

info("Final results:\n");

211

info("Total number of inferences: %zu\n", results.size());

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

212

/* Results from multiple inferences should be combined before processing. */

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

213

std::vector<ClassificationResult> combinedResults;

214

for (const auto& result : results) {

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

215

combinedResults.insert(combinedResults.end(),

216

result.m_resultVec.begin(),

217

result.m_resultVec.end());

218

}

219

220

/* Get each inference result string using the decoder. */

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

221

for (const auto& result : results) {

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

222

std::string infResultStr = audio::asr::DecodeOutput(result.m_resultVec);

223

Kshitij Sisodia

f9c19ea

2021-05-07 16:08:14 +0100

[diff] [blame]

224

info("For timestamp: %f (inference #: %" PRIu32 "); label: %s\n",

Isabella Gottardi

2021-04-07 17:15:31 +0100

[diff] [blame]

225

result.m_timeStamp, result.m_inferenceNumber,

226

infResultStr.c_str());

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

227

}

228

229

/* Get the decoded result for the combined result. */

230

std::string finalResultStr = audio::asr::DecodeOutput(combinedResults);

231

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame]

232

hal_lcd_display_text(finalResultStr.c_str(), finalResultStr.size(),

233

dataPsnTxtStartX1, dataPsnTxtStartY1,

234

allow_multiple_lines);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

235

Isabella Gottardi

2021-04-07 17:15:31 +0100

[diff] [blame]

236

info("Complete recognition: %s\n", finalResultStr.c_str());

alexander