Blame - source/use_case/kws_asr/src/MainLoop.cc - ml/ethos-u/ml-embedded-evaluation-kit

alexander

3c79893

2021-03-26 21:42:19 +0000

[diff] [blame]

1

/*

2

3

* SPDX-License-Identifier: Apache-2.0

4

*

5

* Licensed under the Apache License, Version 2.0 (the "License");

6

* you may not use this file except in compliance with the License.

7

* You may obtain a copy of the License at

8

*

9

* http://www.apache.org/licenses/LICENSE-2.0

10

*

11

* Unless required by applicable law or agreed to in writing, software

12

* distributed under the License is distributed on an "AS IS" BASIS,

13

* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

* See the License for the specific language governing permissions and

15

* limitations under the License.

16

*/

17

#include "hal.h" /* Brings in platform definitions. */

18

#include "InputFiles.hpp" /* For input images. */

Kshitij Sisodia

76a1580

2021-12-24 11:05:11 +0000

[diff] [blame]

19

#include "Labels_micronetkws.hpp" /* For MicroNetKws label strings. */

alexander

3c79893

2021-03-26 21:42:19 +0000

[diff] [blame]

20

#include "Labels_wav2letter.hpp" /* For Wav2Letter label strings. */

21

#include "Classifier.hpp" /* KWS classifier. */

22

#include "AsrClassifier.hpp" /* ASR classifier. */

Kshitij Sisodia

76a1580

2021-12-24 11:05:11 +0000

[diff] [blame]

23

#include "MicroNetKwsModel.hpp" /* KWS model class for running inference. */

alexander

3c79893

2021-03-26 21:42:19 +0000

[diff] [blame]

24

#include "Wav2LetterModel.hpp" /* ASR model class for running inference. */

25

#include "UseCaseCommonUtils.hpp" /* Utils functions. */

26

#include "UseCaseHandler.hpp" /* Handlers for different user options. */

27

#include "Wav2LetterPreprocess.hpp" /* ASR pre-processing class. */

28

#include "Wav2LetterPostprocess.hpp"/* ASR post-processing class. */

alexander

31ae9f0

2022-02-10 16:15:54 +0000

[diff] [blame^]

29

#include "log_macros.h"

alexander

3c79893

2021-03-26 21:42:19 +0000

[diff] [blame]

30

31

using KwsClassifier = arm::app::Classifier;

enum opcodes

{

MENU_OPT_RUN_INF_NEXT = 1, /* Run on next vector. */

36

MENU_OPT_RUN_INF_CHOSEN, /* Run on a user provided vector index. */

37

MENU_OPT_RUN_INF_ALL, /* Run inference on all. */

38

MENU_OPT_SHOW_MODEL_INFO, /* Show model info. */

39

MENU_OPT_LIST_AUDIO_CLIPS /* List the current baked audio clips. */

40

};

41

42

static void DisplayMenu()

43

{

Kshitij Sisodia

3c8256d

2021-05-24 16:12:40 +0100

[diff] [blame]

44

printf("\n\n");

45

printf("User input required\n");

alexander

3c79893

2021-03-26 21:42:19 +0000

[diff] [blame]

46

printf("Enter option number from:\n\n");

47

printf(" %u. Classify next audio clip\n", MENU_OPT_RUN_INF_NEXT);

48

printf(" %u. Classify audio clip at chosen index\n", MENU_OPT_RUN_INF_CHOSEN);

49

printf(" %u. Run classification on all audio clips\n", MENU_OPT_RUN_INF_ALL);

50

printf(" %u. Show NN model info\n", MENU_OPT_SHOW_MODEL_INFO);

51

printf(" %u. List audio clips\n\n", MENU_OPT_LIST_AUDIO_CLIPS);

52

printf(" Choice: ");

George Gekov

93e5951

2021-08-03 11:18:41 +0100

[diff] [blame]

53

fflush(stdout);

alexander

3c79893

2021-03-26 21:42:19 +0000

[diff] [blame]

54

}

55

56

/** @brief Gets the number of MFCC features for a single window. */

57

static uint32_t GetNumMfccFeatures(const arm::app::Model& model);

58

59

/** @brief Gets the number of MFCC feature vectors to be computed. */

60

static uint32_t GetNumMfccFeatureVectors(const arm::app::Model& model);

61

62

/** @brief Gets the output context length (left and right) for post-processing. */

63

static uint32_t GetOutputContextLen(const arm::app::Model& model,

64

uint32_t inputCtxLen);

65

66

/** @brief Gets the output inner length for post-processing. */

67

static uint32_t GetOutputInnerLen(const arm::app::Model& model,

68

uint32_t outputCtxLen);

69

70

void main_loop(hal_platform& platform)

71

{

72

/* Model wrapper objects. */

Kshitij Sisodia

76a1580

2021-12-24 11:05:11 +0000

[diff] [blame]

73

arm::app::MicroNetKwsModel kwsModel;

alexander

3c79893

2021-03-26 21:42:19 +0000

[diff] [blame]

74

arm::app::Wav2LetterModel asrModel;

75

76

/* Load the models. */

77

if (!kwsModel.Init()) {

78

printf_err("Failed to initialise KWS model\n");

return;

}

/* Initialise the asr model using the same allocator from KWS

83

* to re-use the tensor arena. */

84

if (!asrModel.Init(kwsModel.GetAllocator())) {

Kshitij Sisodia

76a1580

2021-12-24 11:05:11 +0000

[diff] [blame]

85

printf_err("Failed to initialise ASR model\n");

alexander

3c79893

2021-03-26 21:42:19 +0000

[diff] [blame]

return;

}

/* Initialise ASR pre-processing. */

90

arm::app::audio::asr::Preprocess prep(

91

GetNumMfccFeatures(asrModel),

92

arm::app::asr::g_FrameLength,

93

arm::app::asr::g_FrameStride,

94

GetNumMfccFeatureVectors(asrModel));

95

96

/* Initialise ASR post-processing. */

97

const uint32_t outputCtxLen = GetOutputContextLen(asrModel, arm::app::asr::g_ctxLen);

98

const uint32_t blankTokenIdx = 28;

99

arm::app::audio::asr::Postprocess postp(

100

outputCtxLen,

101

GetOutputInnerLen(asrModel, outputCtxLen),

102

blankTokenIdx);

103

104

/* Instantiate application context. */

105

arm::app::ApplicationContext caseContext;

106

Isabella Gottardi

8df12f3

2021-04-07 17:15:31 +0100

[diff] [blame]

107

arm::app::Profiler profiler{&platform, "kws_asr"};

108

caseContext.Set<arm::app::Profiler&>("profiler", profiler);

109

alexander

3c79893

2021-03-26 21:42:19 +0000

[diff] [blame]

110

caseContext.Set<hal_platform&>("platform", platform);

111

caseContext.Set<arm::app::Model&>("kwsmodel", kwsModel);

112

caseContext.Set<arm::app::Model&>("asrmodel", asrModel);

113

caseContext.Set<uint32_t>("clipIndex", 0);

114

caseContext.Set<uint32_t>("ctxLen", arm::app::asr::g_ctxLen); /* Left and right context length (MFCC feat vectors). */

115

caseContext.Set<int>("kwsframeLength", arm::app::kws::g_FrameLength);

116

caseContext.Set<int>("kwsframeStride", arm::app::kws::g_FrameStride);

117

caseContext.Set<float>("kwsscoreThreshold", arm::app::kws::g_ScoreThreshold); /* Normalised score threshold. */

118

caseContext.Set<uint32_t >("kwsNumMfcc", arm::app::kws::g_NumMfcc);

119

caseContext.Set<uint32_t >("kwsNumAudioWins", arm::app::kws::g_NumAudioWins);

120

121

caseContext.Set<int>("asrframeLength", arm::app::asr::g_FrameLength);

122

caseContext.Set<int>("asrframeStride", arm::app::asr::g_FrameStride);

123

caseContext.Set<float>("asrscoreThreshold", arm::app::asr::g_ScoreThreshold); /* Normalised score threshold. */

124

125

KwsClassifier kwsClassifier; /* Classifier wrapper object. */

126

arm::app::AsrClassifier asrClassifier; /* Classifier wrapper object. */

127

caseContext.Set<arm::app::Classifier&>("kwsclassifier", kwsClassifier);

128

caseContext.Set<arm::app::AsrClassifier&>("asrclassifier", asrClassifier);

129

130

caseContext.Set<arm::app::audio::asr::Preprocess&>("preprocess", prep);

131

caseContext.Set<arm::app::audio::asr::Postprocess&>("postprocess", postp);

132

133

std::vector<std::string> asrLabels;

134

arm::app::asr::GetLabelsVector(asrLabels);

135

std::vector<std::string> kwsLabels;

136

arm::app::kws::GetLabelsVector(kwsLabels);

137

caseContext.Set<const std::vector <std::string>&>("asrlabels", asrLabels);

138

caseContext.Set<const std::vector <std::string>&>("kwslabels", kwsLabels);

139

Liam Barry

b5b32d3

2021-12-30 11:35:00 +0000

[diff] [blame]

140

/* KWS keyword that triggers ASR and associated checks */

141

std::string triggerKeyword = std::string("yes");

142

if (std::find(kwsLabels.begin(), kwsLabels.end(), triggerKeyword) != kwsLabels.end()) {

143

caseContext.Set<const std::string &>("triggerkeyword", triggerKeyword);

144

}

145

else {

146

printf_err("Selected trigger keyword not found in labels file\n");

147

return;

148

}

alexander

3c79893

2021-03-26 21:42:19 +0000

[diff] [blame]

149

150

/* Loop. */

151

bool executionSuccessful = true;

152

constexpr bool bUseMenu = NUMBER_OF_FILES > 1 ? true : false;

/* Loop. */

do {

int menuOption = MENU_OPT_RUN_INF_NEXT;

157

if (bUseMenu) {

158

DisplayMenu();

159

menuOption = arm::app::ReadUserInputAsInt(platform);

160

printf("\n");

161

}

162

switch (menuOption) {

163

case MENU_OPT_RUN_INF_NEXT:

164

executionSuccessful = ClassifyAudioHandler(

165

caseContext,

166

caseContext.Get<uint32_t>("clipIndex"),

167

false);

168

break;

169

case MENU_OPT_RUN_INF_CHOSEN: {

170

printf(" Enter the audio clip index [0, %d]: ",

171

NUMBER_OF_FILES-1);

Isabella Gottardi

79d4154

2021-10-20 15:52:32 +0100

[diff] [blame]

172

fflush(stdout);

alexander

3c79893

2021-03-26 21:42:19 +0000

[diff] [blame]

173

auto clipIndex = static_cast<uint32_t>(

174

arm::app::ReadUserInputAsInt(platform));

175

executionSuccessful = ClassifyAudioHandler(caseContext,

clipIndex,

false);

break;

}

case MENU_OPT_RUN_INF_ALL:

181

executionSuccessful = ClassifyAudioHandler(

182

caseContext,

183

caseContext.Get<uint32_t>("clipIndex"),

184

true);

185

break;

186

case MENU_OPT_SHOW_MODEL_INFO:

187

executionSuccessful = kwsModel.ShowModelInfoHandler();

188

executionSuccessful = asrModel.ShowModelInfoHandler();

189

break;

190

case MENU_OPT_LIST_AUDIO_CLIPS:

191

executionSuccessful = ListFilesHandler(caseContext);

192

break;

193

default:

194

printf("Incorrect choice, try again.");

195

break;

196

}

197

} while (executionSuccessful && bUseMenu);

198

info("Main loop terminated.\n");

199

}

200

201

static uint32_t GetNumMfccFeatures(const arm::app::Model& model)

202

{

203

TfLiteTensor* inputTensor = model.GetInputTensor(0);

204

const int inputCols = inputTensor->dims->data[arm::app::Wav2LetterModel::ms_inputColsIdx];

205

if (0 != inputCols % 3) {

206

printf_err("Number of input columns is not a multiple of 3\n");

207

}

208

return std::max(inputCols/3, 0);

209

}

210

211

static uint32_t GetNumMfccFeatureVectors(const arm::app::Model& model)

212

{

213

TfLiteTensor* inputTensor = model.GetInputTensor(0);

214

const int inputRows = inputTensor->dims->data[arm::app::Wav2LetterModel::ms_inputRowsIdx];

215

return std::max(inputRows, 0);

216

}

217

218

static uint32_t GetOutputContextLen(const arm::app::Model& model, const uint32_t inputCtxLen)

219

{

220

const uint32_t inputRows = GetNumMfccFeatureVectors(model);

221

const uint32_t inputInnerLen = inputRows - (2 * inputCtxLen);

222

constexpr uint32_t ms_outputRowsIdx = arm::app::Wav2LetterModel::ms_outputRowsIdx;

223

224

/* Check to make sure that the input tensor supports the above context and inner lengths. */

225

if (inputRows <= 2 * inputCtxLen || inputRows <= inputInnerLen) {

Kshitij Sisodia

f9c19ea

2021-05-07 16:08:14 +0100

[diff] [blame]

226

printf_err("Input rows not compatible with ctx of %" PRIu32 "\n",

alexander

3c79893

2021-03-26 21:42:19 +0000

[diff] [blame]

inputCtxLen);

return 0;

}

TfLiteTensor* outputTensor = model.GetOutputTensor(0);

232

const uint32_t outputRows = std::max(outputTensor->dims->data[ms_outputRowsIdx], 0);

233

234

const float tensorColRatio = static_cast<float>(inputRows)/

235

static_cast<float>(outputRows);

236

237

return std::round(static_cast<float>(inputCtxLen)/tensorColRatio);

238

}

239

240

static uint32_t GetOutputInnerLen(const arm::app::Model& model,

241

const uint32_t outputCtxLen)

242

{

243

constexpr uint32_t ms_outputRowsIdx = arm::app::Wav2LetterModel::ms_outputRowsIdx;

244

TfLiteTensor* outputTensor = model.GetOutputTensor(0);

245

const uint32_t outputRows = std::max(outputTensor->dims->data[ms_outputRowsIdx], 0);

246

return (outputRows - (2 * outputCtxLen));

247

}