Blame - source/use_case/kws_asr/src/MainLoop.cc - ml/ethos-u/ml-embedded-evaluation-kit

alexander

3c79893

2021-03-26 21:42:19 +0000

[diff] [blame]

1

/*

2

3

* SPDX-License-Identifier: Apache-2.0

4

*

5

* Licensed under the Apache License, Version 2.0 (the "License");

6

* you may not use this file except in compliance with the License.

7

* You may obtain a copy of the License at

8

*

9

* http://www.apache.org/licenses/LICENSE-2.0

10

*

11

* Unless required by applicable law or agreed to in writing, software

12

* distributed under the License is distributed on an "AS IS" BASIS,

13

* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

* See the License for the specific language governing permissions and

15

* limitations under the License.

16

*/

17

#include "hal.h" /* Brings in platform definitions. */

18

#include "InputFiles.hpp" /* For input images. */

Kshitij Sisodia

76a1580

2021-12-24 11:05:11 +0000

[diff] [blame]

19

#include "Labels_micronetkws.hpp" /* For MicroNetKws label strings. */

alexander

3c79893

2021-03-26 21:42:19 +0000

[diff] [blame]

20

#include "Labels_wav2letter.hpp" /* For Wav2Letter label strings. */

21

#include "Classifier.hpp" /* KWS classifier. */

22

#include "AsrClassifier.hpp" /* ASR classifier. */

Kshitij Sisodia

76a1580

2021-12-24 11:05:11 +0000

[diff] [blame]

23

#include "MicroNetKwsModel.hpp" /* KWS model class for running inference. */

alexander

3c79893

2021-03-26 21:42:19 +0000

[diff] [blame]

24

#include "Wav2LetterModel.hpp" /* ASR model class for running inference. */

25

#include "UseCaseCommonUtils.hpp" /* Utils functions. */

26

#include "UseCaseHandler.hpp" /* Handlers for different user options. */

27

#include "Wav2LetterPreprocess.hpp" /* ASR pre-processing class. */

28

#include "Wav2LetterPostprocess.hpp"/* ASR post-processing class. */

alexander

31ae9f0

2022-02-10 16:15:54 +0000

[diff] [blame]

29

#include "log_macros.h"

alexander

3c79893

2021-03-26 21:42:19 +0000

[diff] [blame]

30

31

using KwsClassifier = arm::app::Classifier;

enum opcodes

{

MENU_OPT_RUN_INF_NEXT = 1, /* Run on next vector. */

36

MENU_OPT_RUN_INF_CHOSEN, /* Run on a user provided vector index. */

37

MENU_OPT_RUN_INF_ALL, /* Run inference on all. */

38

MENU_OPT_SHOW_MODEL_INFO, /* Show model info. */

39

MENU_OPT_LIST_AUDIO_CLIPS /* List the current baked audio clips. */

40

};

41

42

static void DisplayMenu()

43

{

Kshitij Sisodia

3c8256d

2021-05-24 16:12:40 +0100

[diff] [blame]

44

printf("\n\n");

45

printf("User input required\n");

alexander

3c79893

2021-03-26 21:42:19 +0000

[diff] [blame]

46

printf("Enter option number from:\n\n");

47

printf(" %u. Classify next audio clip\n", MENU_OPT_RUN_INF_NEXT);

48

printf(" %u. Classify audio clip at chosen index\n", MENU_OPT_RUN_INF_CHOSEN);

49

printf(" %u. Run classification on all audio clips\n", MENU_OPT_RUN_INF_ALL);

50

printf(" %u. Show NN model info\n", MENU_OPT_SHOW_MODEL_INFO);

51

printf(" %u. List audio clips\n\n", MENU_OPT_LIST_AUDIO_CLIPS);

52

printf(" Choice: ");

George Gekov

93e5951

2021-08-03 11:18:41 +0100

[diff] [blame]

53

fflush(stdout);

alexander

3c79893

2021-03-26 21:42:19 +0000

[diff] [blame]

54

}

55

56

/** @brief Gets the number of MFCC features for a single window. */

57

static uint32_t GetNumMfccFeatures(const arm::app::Model& model);

58

59

/** @brief Gets the number of MFCC feature vectors to be computed. */

60

static uint32_t GetNumMfccFeatureVectors(const arm::app::Model& model);

61

62

/** @brief Gets the output context length (left and right) for post-processing. */

63

static uint32_t GetOutputContextLen(const arm::app::Model& model,

64

uint32_t inputCtxLen);

65

66

/** @brief Gets the output inner length for post-processing. */

67

static uint32_t GetOutputInnerLen(const arm::app::Model& model,

68

uint32_t outputCtxLen);

69

Kshitij Sisodia

4cc4021

2022-04-08 09:54:53 +0100

[diff] [blame^]

70

void main_loop()

alexander

3c79893

2021-03-26 21:42:19 +0000

[diff] [blame]

71

{

72

/* Model wrapper objects. */

Kshitij Sisodia

76a1580

2021-12-24 11:05:11 +0000

[diff] [blame]

73

arm::app::MicroNetKwsModel kwsModel;

alexander

3c79893

2021-03-26 21:42:19 +0000

[diff] [blame]

74

arm::app::Wav2LetterModel asrModel;

75

76

/* Load the models. */

77

if (!kwsModel.Init()) {

78

printf_err("Failed to initialise KWS model\n");

return;

}

/* Initialise the asr model using the same allocator from KWS

83

* to re-use the tensor arena. */

84

if (!asrModel.Init(kwsModel.GetAllocator())) {

Kshitij Sisodia

76a1580

2021-12-24 11:05:11 +0000

[diff] [blame]

85

printf_err("Failed to initialise ASR model\n");

alexander

3c79893

2021-03-26 21:42:19 +0000

[diff] [blame]

return;

}

/* Initialise ASR pre-processing. */

90

arm::app::audio::asr::Preprocess prep(

91

GetNumMfccFeatures(asrModel),

92

arm::app::asr::g_FrameLength,

93

arm::app::asr::g_FrameStride,

94

GetNumMfccFeatureVectors(asrModel));

95

96

/* Initialise ASR post-processing. */

97

const uint32_t outputCtxLen = GetOutputContextLen(asrModel, arm::app::asr::g_ctxLen);

98

const uint32_t blankTokenIdx = 28;

99

arm::app::audio::asr::Postprocess postp(

100

outputCtxLen,

101

GetOutputInnerLen(asrModel, outputCtxLen),

102

blankTokenIdx);

103

104

/* Instantiate application context. */

105

arm::app::ApplicationContext caseContext;

106

Kshitij Sisodia

4cc4021

2022-04-08 09:54:53 +0100

[diff] [blame^]

107

arm::app::Profiler profiler{"kws_asr"};

Isabella Gottardi

8df12f3

2021-04-07 17:15:31 +0100

[diff] [blame]

108

caseContext.Set<arm::app::Profiler&>("profiler", profiler);

alexander

3c79893

2021-03-26 21:42:19 +0000

[diff] [blame]

109

caseContext.Set<arm::app::Model&>("kwsmodel", kwsModel);

110

caseContext.Set<arm::app::Model&>("asrmodel", asrModel);

111

caseContext.Set<uint32_t>("clipIndex", 0);

112

caseContext.Set<uint32_t>("ctxLen", arm::app::asr::g_ctxLen); /* Left and right context length (MFCC feat vectors). */

113

caseContext.Set<int>("kwsframeLength", arm::app::kws::g_FrameLength);

114

caseContext.Set<int>("kwsframeStride", arm::app::kws::g_FrameStride);

115

caseContext.Set<float>("kwsscoreThreshold", arm::app::kws::g_ScoreThreshold); /* Normalised score threshold. */

116

caseContext.Set<uint32_t >("kwsNumMfcc", arm::app::kws::g_NumMfcc);

117

caseContext.Set<uint32_t >("kwsNumAudioWins", arm::app::kws::g_NumAudioWins);

118

119

caseContext.Set<int>("asrframeLength", arm::app::asr::g_FrameLength);

120

caseContext.Set<int>("asrframeStride", arm::app::asr::g_FrameStride);

121

caseContext.Set<float>("asrscoreThreshold", arm::app::asr::g_ScoreThreshold); /* Normalised score threshold. */

122

123

KwsClassifier kwsClassifier; /* Classifier wrapper object. */

124

arm::app::AsrClassifier asrClassifier; /* Classifier wrapper object. */

125

caseContext.Set<arm::app::Classifier&>("kwsclassifier", kwsClassifier);

126

caseContext.Set<arm::app::AsrClassifier&>("asrclassifier", asrClassifier);

127

128

caseContext.Set<arm::app::audio::asr::Preprocess&>("preprocess", prep);

129

caseContext.Set<arm::app::audio::asr::Postprocess&>("postprocess", postp);

130

131

std::vector<std::string> asrLabels;

132

arm::app::asr::GetLabelsVector(asrLabels);

133

std::vector<std::string> kwsLabels;

134

arm::app::kws::GetLabelsVector(kwsLabels);

135

caseContext.Set<const std::vector <std::string>&>("asrlabels", asrLabels);

136

caseContext.Set<const std::vector <std::string>&>("kwslabels", kwsLabels);

137

Liam Barry

b5b32d3

2021-12-30 11:35:00 +0000

[diff] [blame]

138

/* KWS keyword that triggers ASR and associated checks */

139

std::string triggerKeyword = std::string("yes");

140

if (std::find(kwsLabels.begin(), kwsLabels.end(), triggerKeyword) != kwsLabels.end()) {

141

caseContext.Set<const std::string &>("triggerkeyword", triggerKeyword);

142

}

143

else {

144

printf_err("Selected trigger keyword not found in labels file\n");

145

return;

146

}

alexander

3c79893

2021-03-26 21:42:19 +0000

[diff] [blame]

147

148

/* Loop. */

149

bool executionSuccessful = true;

150

constexpr bool bUseMenu = NUMBER_OF_FILES > 1 ? true : false;

/* Loop. */

do {

int menuOption = MENU_OPT_RUN_INF_NEXT;

155

if (bUseMenu) {

156

DisplayMenu();

Kshitij Sisodia

68fdd11

2022-04-06 13:03:20 +0100

[diff] [blame]

157

menuOption = arm::app::ReadUserInputAsInt();

alexander

3c79893

2021-03-26 21:42:19 +0000

[diff] [blame]

158

printf("\n");

159

}

160

switch (menuOption) {

161

case MENU_OPT_RUN_INF_NEXT:

162

executionSuccessful = ClassifyAudioHandler(

163

caseContext,

164

caseContext.Get<uint32_t>("clipIndex"),

165

false);

166

break;

167

case MENU_OPT_RUN_INF_CHOSEN: {

168

printf(" Enter the audio clip index [0, %d]: ",

169

NUMBER_OF_FILES-1);

Isabella Gottardi

79d4154

2021-10-20 15:52:32 +0100

[diff] [blame]

170

fflush(stdout);

alexander

3c79893

2021-03-26 21:42:19 +0000

[diff] [blame]

171

auto clipIndex = static_cast<uint32_t>(

Kshitij Sisodia

68fdd11

2022-04-06 13:03:20 +0100

[diff] [blame]

172

arm::app::ReadUserInputAsInt());

alexander

3c79893

2021-03-26 21:42:19 +0000

[diff] [blame]

173

executionSuccessful = ClassifyAudioHandler(caseContext,

clipIndex,

false);

break;

}

case MENU_OPT_RUN_INF_ALL:

179

executionSuccessful = ClassifyAudioHandler(

180

caseContext,

181

caseContext.Get<uint32_t>("clipIndex"),

182

true);

183

break;

184

case MENU_OPT_SHOW_MODEL_INFO:

185

executionSuccessful = kwsModel.ShowModelInfoHandler();

186

executionSuccessful = asrModel.ShowModelInfoHandler();

187

break;

188

case MENU_OPT_LIST_AUDIO_CLIPS:

189

executionSuccessful = ListFilesHandler(caseContext);

190

break;

191

default:

192

printf("Incorrect choice, try again.");

193

break;

194

}

195

} while (executionSuccessful && bUseMenu);

196

info("Main loop terminated.\n");

197

}

198

199

static uint32_t GetNumMfccFeatures(const arm::app::Model& model)

200

{

201

TfLiteTensor* inputTensor = model.GetInputTensor(0);

202

const int inputCols = inputTensor->dims->data[arm::app::Wav2LetterModel::ms_inputColsIdx];

203

if (0 != inputCols % 3) {

204

printf_err("Number of input columns is not a multiple of 3\n");

205

}

206

return std::max(inputCols/3, 0);

207

}

208

209

static uint32_t GetNumMfccFeatureVectors(const arm::app::Model& model)

210

{

211

TfLiteTensor* inputTensor = model.GetInputTensor(0);

212

const int inputRows = inputTensor->dims->data[arm::app::Wav2LetterModel::ms_inputRowsIdx];

213

return std::max(inputRows, 0);

214

}

215

216

static uint32_t GetOutputContextLen(const arm::app::Model& model, const uint32_t inputCtxLen)

217

{

218

const uint32_t inputRows = GetNumMfccFeatureVectors(model);

219

const uint32_t inputInnerLen = inputRows - (2 * inputCtxLen);

220

constexpr uint32_t ms_outputRowsIdx = arm::app::Wav2LetterModel::ms_outputRowsIdx;

221

222

/* Check to make sure that the input tensor supports the above context and inner lengths. */

223

if (inputRows <= 2 * inputCtxLen || inputRows <= inputInnerLen) {

Kshitij Sisodia

f9c19ea

2021-05-07 16:08:14 +0100

[diff] [blame]

224

printf_err("Input rows not compatible with ctx of %" PRIu32 "\n",

alexander

3c79893

2021-03-26 21:42:19 +0000

[diff] [blame]

inputCtxLen);

return 0;

}

TfLiteTensor* outputTensor = model.GetOutputTensor(0);

230

const uint32_t outputRows = std::max(outputTensor->dims->data[ms_outputRowsIdx], 0);

231

232

const float tensorColRatio = static_cast<float>(inputRows)/

233

static_cast<float>(outputRows);

234

235

return std::round(static_cast<float>(inputCtxLen)/tensorColRatio);

236

}

237

238

static uint32_t GetOutputInnerLen(const arm::app::Model& model,

239

const uint32_t outputCtxLen)

240

{

241

constexpr uint32_t ms_outputRowsIdx = arm::app::Wav2LetterModel::ms_outputRowsIdx;

242

TfLiteTensor* outputTensor = model.GetOutputTensor(0);

243

const uint32_t outputRows = std::max(outputTensor->dims->data[ms_outputRowsIdx], 0);

244

return (outputRows - (2 * outputCtxLen));

245

}