Blame - source/use_case/asr/src/UseCaseHandler.cc - ml/ethos-u/ml-embedded-evaluation-kit

2021-03-26 21:42:19 +0000

[diff] [blame]

1

/*

Richard Burton

ed35a6f

2022-02-14 11:55:35 +0000

[diff] [blame]

2

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

3

* SPDX-License-Identifier: Apache-2.0

4

*

5

* Licensed under the Apache License, Version 2.0 (the "License");

6

* you may not use this file except in compliance with the License.

7

* You may obtain a copy of the License at

8

*

9

* http://www.apache.org/licenses/LICENSE-2.0

10

*

11

* Unless required by applicable law or agreed to in writing, software

12

* distributed under the License is distributed on an "AS IS" BASIS,

13

* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

* See the License for the specific language governing permissions and

15

* limitations under the License.

16

*/

17

#include "UseCaseHandler.hpp"

18

19

#include "InputFiles.hpp"

20

#include "AsrClassifier.hpp"

21

#include "Wav2LetterModel.hpp"

22

#include "hal.h"

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

23

#include "AudioUtils.hpp"

Richard Burton

ed35a6f

2022-02-14 11:55:35 +0000

[diff] [blame]

24

#include "ImageUtils.hpp"

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

25

#include "UseCaseCommonUtils.hpp"

26

#include "AsrResult.hpp"

27

#include "Wav2LetterPreprocess.hpp"

28

#include "Wav2LetterPostprocess.hpp"

29

#include "OutputDecode.hpp"

alexander

31ae9f0

2022-02-10 16:15:54 +0000

[diff] [blame]

30

#include "log_macros.h"

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

namespace arm {

namespace app {

/**

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

36

* @brief Presents ASR inference results.

37

* @param[in] results Vector of ASR classification results to be displayed.

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

38

* @return true if successful, false otherwise.

39

**/

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

40

static bool PresentInferenceResult(const std::vector<asr::AsrResult>& results);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

41

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

42

/* ASR inference handler. */

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

43

bool ClassifyAudioHandler(ApplicationContext& ctx, uint32_t clipIndex, bool runAll)

44

{

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

45

auto& model = ctx.Get<Model&>("model");

Isabella Gottardi

2021-04-07 17:15:31 +0100

[diff] [blame]

46

auto& profiler = ctx.Get<Profiler&>("profiler");

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

47

auto mfccFrameLen = ctx.Get<uint32_t>("frameLength");

48

auto mfccFrameStride = ctx.Get<uint32_t>("frameStride");

49

auto scoreThreshold = ctx.Get<float>("scoreThreshold");

50

auto inputCtxLen = ctx.Get<uint32_t>("ctxLen");

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

51

/* If the request has a valid size, set the audio index. */

52

if (clipIndex < NUMBER_OF_FILES) {

Éanna Ó Catháin

8f95887

2021-09-15 09:32:30 +0100

[diff] [blame]

53

if (!SetAppCtxIfmIdx(ctx, clipIndex,"clipIndex")) {

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

54

return false;

55

}

56

}

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

57

auto initialClipIdx = ctx.Get<uint32_t>("clipIndex");

58

constexpr uint32_t dataPsnTxtInfStartX = 20;

59

constexpr uint32_t dataPsnTxtInfStartY = 40;

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

60

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

61

if (!model.IsInited()) {

62

printf_err("Model is not initialised! Terminating processing.\n");

return false;

}

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

66

/* Get input shape. Dimensions of the tensor should have been verified by

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

67

* the callee. */

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

68

TfLiteIntArray* inputShape = model.GetInputShape(0);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

69

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

70

const uint32_t inputRowsSize = inputShape->data[Wav2LetterModel::ms_inputRowsIdx];

71

const uint32_t inputInnerLen = inputRowsSize - (2 * inputCtxLen);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

72

73

/* Audio data stride corresponds to inputInnerLen feature vectors. */

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

74

const uint32_t audioDataWindowLen = (inputRowsSize - 1) * mfccFrameStride + (mfccFrameLen);

75

const uint32_t audioDataWindowStride = inputInnerLen * mfccFrameStride;

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

76

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

77

/* NOTE: This is only used for time stamp calculation. */

78

const float secondsPerSample = (1.0 / audio::Wav2LetterMFCC::ms_defaultSamplingFreq);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

79

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

80

/* Set up pre and post-processing objects. */

81

ASRPreProcess preProcess = ASRPreProcess(model.GetInputTensor(0), Wav2LetterModel::ms_numMfccFeatures,

82

inputShape->data[Wav2LetterModel::ms_inputRowsIdx], mfccFrameLen, mfccFrameStride);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

83

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

84

std::vector<ClassificationResult> singleInfResult;

85

const uint32_t outputCtxLen = ASRPostProcess::GetOutputContextLen(model, inputCtxLen);

86

ASRPostProcess postProcess = ASRPostProcess(ctx.Get<AsrClassifier&>("classifier"),

87

model.GetOutputTensor(0), ctx.Get<std::vector<std::string>&>("labels"),

88

singleInfResult, outputCtxLen,

89

Wav2LetterModel::ms_blankTokenIdx, Wav2LetterModel::ms_outputRowsIdx

90

);

91

92

UseCaseRunner runner = UseCaseRunner(&preProcess, &postProcess, &model);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

93

94

/* Loop to process audio clips. */

95

do {

Kshitij Sisodia

68fdd11

2022-04-06 13:03:20 +0100

[diff] [blame]

96

hal_lcd_clear(COLOR_BLACK);

Richard Burton

9b8d67a

2021-12-10 12:32:51 +0000

[diff] [blame]

97

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

98

/* Get current audio clip index. */

99

auto currentIndex = ctx.Get<uint32_t>("clipIndex");

100

101

/* Get the current audio buffer and respective size. */

102

const int16_t* audioArr = get_audio_array(currentIndex);

103

const uint32_t audioArrSize = get_audio_array_size(currentIndex);

104

105

if (!audioArr) {

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

106

printf_err("Invalid audio array pointer.\n");

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

return false;

}

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

110

/* Audio clip needs enough samples to produce at least 1 MFCC feature. */

111

if (audioArrSize < mfccFrameLen) {

Kshitij Sisodia

f9c19ea

2021-05-07 16:08:14 +0100

[diff] [blame]

112

printf_err("Not enough audio samples, minimum needed is %" PRIu32 "\n",

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

113

mfccFrameLen);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

return false;

}

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

117

/* Creating a sliding window through the whole audio clip. */

alexander

80eecfb

2021-07-06 19:47:59 +0100

[diff] [blame]

118

auto audioDataSlider = audio::FractionalSlidingWindow<const int16_t>(

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

119

audioArr, audioArrSize,

120

audioDataWindowLen, audioDataWindowStride);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

121

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

122

/* Declare a container for final results. */

123

std::vector<asr::AsrResult> finalResults;

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

124

125

/* Display message on the LCD - inference running. */

126

std::string str_inf{"Running inference... "};

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

127

hal_lcd_display_text(str_inf.c_str(), str_inf.size(),

128

dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

129

Kshitij Sisodia

f9c19ea

2021-05-07 16:08:14 +0100

[diff] [blame]

130

info("Running inference on audio clip %" PRIu32 " => %s\n", currentIndex,

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

131

get_filename(currentIndex));

132

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

133

size_t inferenceWindowLen = audioDataWindowLen;

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

134

135

/* Start sliding through audio clip. */

136

while (audioDataSlider.HasNext()) {

137

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

138

/* If not enough audio, see how much can be sent for processing. */

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

139

size_t nextStartIndex = audioDataSlider.NextWindowStartIndex();

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

140

if (nextStartIndex + audioDataWindowLen > audioArrSize) {

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

141

inferenceWindowLen = audioArrSize - nextStartIndex;

142

}

143

144

const int16_t* inferenceWindow = audioDataSlider.Next();

145

146

info("Inference %zu/%zu\n", audioDataSlider.Index() + 1,

147

static_cast<size_t>(ceilf(audioDataSlider.FractionalTotalStrides() + 1)));

148

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

149

/* Run the pre-processing, inference and post-processing. */

150

runner.PreProcess(inferenceWindow, inferenceWindowLen);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

151

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

152

profiler.StartProfiling("Inference");

153

if (!runner.RunInference()) {

154

return false;

155

}

156

profiler.StopProfiling();

157

158

postProcess.m_lastIteration = !audioDataSlider.HasNext();

159

if (!runner.PostProcess()) {

alexander

27b62d9

2021-05-04 20:46:08 +0100

[diff] [blame]

160

return false;

161

}

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

162

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

163

/* Add results from this window to our final results vector. */

164

finalResults.emplace_back(asr::AsrResult(singleInfResult,

165

(audioDataSlider.Index() * secondsPerSample * audioDataWindowStride),

166

audioDataSlider.Index(), scoreThreshold));

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

167

168

#if VERIFY_TEST_OUTPUT

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

169

TfLiteTensor* outputTensor = model.GetOutputTensor(0);

170

armDumpTensor(outputTensor,

171

outputTensor->dims->data[Wav2LetterModel::ms_outputColsIdx]);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

172

#endif /* VERIFY_TEST_OUTPUT */

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

173

} /* while (audioDataSlider.HasNext()) */

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

174

175

/* Erase. */

176

str_inf = std::string(str_inf.size(), ' ');

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

177

hal_lcd_display_text(str_inf.c_str(), str_inf.size(),

178

dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

179

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

180

ctx.Set<std::vector<asr::AsrResult>>("results", finalResults);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

181

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

182

if (!PresentInferenceResult(finalResults)) {

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

return false;

}

Isabella Gottardi

2021-04-07 17:15:31 +0100

[diff] [blame]

186

profiler.PrintProfilingResult();

187

Éanna Ó Catháin

8f95887

2021-09-15 09:32:30 +0100

[diff] [blame]

188

IncrementAppCtxIfmIdx(ctx,"clipIndex");

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

189

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

190

} while (runAll && ctx.Get<uint32_t>("clipIndex") != initialClipIdx);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

return true;

}

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

195

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

196

static bool PresentInferenceResult(const std::vector<asr::AsrResult>& results)

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

197

{

198

constexpr uint32_t dataPsnTxtStartX1 = 20;

199

constexpr uint32_t dataPsnTxtStartY1 = 60;

200

constexpr bool allow_multiple_lines = true;

201

Kshitij Sisodia

68fdd11

2022-04-06 13:03:20 +0100

[diff] [blame]

202

hal_lcd_set_text_color(COLOR_GREEN);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

203

Isabella Gottardi

2021-04-07 17:15:31 +0100

[diff] [blame]

204

info("Final results:\n");

205

info("Total number of inferences: %zu\n", results.size());

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

206

/* Results from multiple inferences should be combined before processing. */

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

207

std::vector<ClassificationResult> combinedResults;

208

for (const auto& result : results) {

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

209

combinedResults.insert(combinedResults.end(),

210

result.m_resultVec.begin(),

211

result.m_resultVec.end());

212

}

213

214

/* Get each inference result string using the decoder. */

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

215

for (const auto& result : results) {

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

216

std::string infResultStr = audio::asr::DecodeOutput(result.m_resultVec);

217

Kshitij Sisodia

f9c19ea

2021-05-07 16:08:14 +0100

[diff] [blame]

218

info("For timestamp: %f (inference #: %" PRIu32 "); label: %s\n",

Isabella Gottardi

2021-04-07 17:15:31 +0100

[diff] [blame]

219

result.m_timeStamp, result.m_inferenceNumber,

220

infResultStr.c_str());

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

221

}

222

223

/* Get the decoded result for the combined result. */

224

std::string finalResultStr = audio::asr::DecodeOutput(combinedResults);

225

Richard Burton

2022-04-22 09:08:21 +0100

[diff] [blame^]

226

hal_lcd_display_text(finalResultStr.c_str(), finalResultStr.size(),

227

dataPsnTxtStartX1, dataPsnTxtStartY1,

228

allow_multiple_lines);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

229

Isabella Gottardi

2021-04-07 17:15:31 +0100

[diff] [blame]

230

info("Complete recognition: %s\n", finalResultStr.c_str());

alexander