Blame - source/use_case/kws_asr/src/UseCaseHandler.cc - ml/ethos-u/ml-embedded-evaluation-kit

2021-03-26 21:42:19 +0000

[diff] [blame]

1

/*

Richard Burton

ed35a6f

2022-02-14 11:55:35 +0000

[diff] [blame^]

2

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

3

* SPDX-License-Identifier: Apache-2.0

4

*

5

* Licensed under the Apache License, Version 2.0 (the "License");

6

* you may not use this file except in compliance with the License.

7

* You may obtain a copy of the License at

8

*

9

* http://www.apache.org/licenses/LICENSE-2.0

10

*

11

* Unless required by applicable law or agreed to in writing, software

12

* distributed under the License is distributed on an "AS IS" BASIS,

13

* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

* See the License for the specific language governing permissions and

15

* limitations under the License.

16

*/

17

#include "UseCaseHandler.hpp"

18

19

#include "hal.h"

20

#include "InputFiles.hpp"

21

#include "AudioUtils.hpp"

Richard Burton

ed35a6f

2022-02-14 11:55:35 +0000

[diff] [blame^]

22

#include "ImageUtils.hpp"

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

23

#include "UseCaseCommonUtils.hpp"

Kshitij Sisodia

2021-12-24 11:05:11 +0000

[diff] [blame]

24

#include "MicroNetKwsModel.hpp"

25

#include "MicroNetKwsMfcc.hpp"

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

26

#include "Classifier.hpp"

27

#include "KwsResult.hpp"

28

#include "Wav2LetterMfcc.hpp"

29

#include "Wav2LetterPreprocess.hpp"

30

#include "Wav2LetterPostprocess.hpp"

31

#include "AsrResult.hpp"

32

#include "AsrClassifier.hpp"

33

#include "OutputDecode.hpp"

alexander

31ae9f0

2022-02-10 16:15:54 +0000

[diff] [blame]

34

#include "log_macros.h"

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

35

36

37

using KwsClassifier = arm::app::Classifier;

namespace arm {

namespace app {

enum AsrOutputReductionAxis {

AxisRow = 1,

AxisCol = 2

};

struct KWSOutput {

bool executionSuccess = false;

49

const int16_t* asrAudioStart = nullptr;

50

int32_t asrAudioSamples = 0;

51

};

52

53

/**

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

54

* @brief Presents kws inference results using the data presentation

55

* object.

56

* @param[in] platform reference to the hal platform object

57

* @param[in] results vector of classification results to be displayed

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

58

* @return true if successful, false otherwise

59

**/

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

60

static bool PresentInferenceResult(hal_platform& platform, std::vector<arm::app::kws::KwsResult>& results);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

61

62

/**

63

* @brief Presents asr inference results using the data presentation

64

* object.

65

* @param[in] platform reference to the hal platform object

66

* @param[in] results vector of classification results to be displayed

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

67

* @return true if successful, false otherwise

68

**/

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

69

static bool PresentInferenceResult(hal_platform& platform, std::vector<arm::app::asr::AsrResult>& results);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

70

71

/**

72

* @brief Returns a function to perform feature calculation and populates input tensor data with

73

* MFCC data.

74

*

75

* Input tensor data type check is performed to choose correct MFCC feature data type.

76

* If tensor has an integer data type then original features are quantised.

77

*

78

* Warning: mfcc calculator provided as input must have the same life scope as returned function.

79

*

80

* @param[in] mfcc MFCC feature calculator.

81

* @param[in,out] inputTensor Input tensor pointer to store calculated features.

Kshitij Sisodia

2021-12-24 11:05:11 +0000

[diff] [blame]

82

* @param[in] cacheSize Size of the feature vectors cache (number of feature vectors).

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

83

*

84

* @return function function to be called providing audio sample and sliding window index.

85

**/

86

static std::function<void (std::vector<int16_t>&, int, bool, size_t)>

Kshitij Sisodia

2021-12-24 11:05:11 +0000

[diff] [blame]

87

GetFeatureCalculator(audio::MicroNetMFCC& mfcc,

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

88

TfLiteTensor* inputTensor,

size_t cacheSize);

/**

* @brief Performs the KWS pipeline.

93

* @param[in,out] ctx pointer to the application context object

94

*

95

* @return KWSOutput struct containing pointer to audio data where ASR should begin

96

* and how much data to process.

97

*/

98

static KWSOutput doKws(ApplicationContext& ctx) {

99

constexpr uint32_t dataPsnTxtInfStartX = 20;

100

constexpr uint32_t dataPsnTxtInfStartY = 40;

101

102

constexpr int minTensorDims = static_cast<int>(

Kshitij Sisodia

2021-12-24 11:05:11 +0000

[diff] [blame]

103

(arm::app::MicroNetKwsModel::ms_inputRowsIdx > arm::app::MicroNetKwsModel::ms_inputColsIdx)?

104

arm::app::MicroNetKwsModel::ms_inputRowsIdx : arm::app::MicroNetKwsModel::ms_inputColsIdx);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

KWSOutput output;

Isabella Gottardi

2021-04-07 17:15:31 +0100

[diff] [blame]

108

auto& profiler = ctx.Get<Profiler&>("profiler");

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

109

auto& kwsModel = ctx.Get<Model&>("kwsmodel");

110

if (!kwsModel.IsInited()) {

111

printf_err("KWS model has not been initialised\n");

return output;

}

const int kwsFrameLength = ctx.Get<int>("kwsframeLength");

116

const int kwsFrameStride = ctx.Get<int>("kwsframeStride");

117

const float kwsScoreThreshold = ctx.Get<float>("kwsscoreThreshold");

118

119

TfLiteTensor* kwsOutputTensor = kwsModel.GetOutputTensor(0);

120

TfLiteTensor* kwsInputTensor = kwsModel.GetInputTensor(0);

121

122

if (!kwsInputTensor->dims) {

123

printf_err("Invalid input tensor dims\n");

124

return output;

125

} else if (kwsInputTensor->dims->size < minTensorDims) {

126

printf_err("Input tensor dimension should be >= %d\n", minTensorDims);

return output;

}

const uint32_t kwsNumMfccFeats = ctx.Get<uint32_t>("kwsNumMfcc");

131

const uint32_t kwsNumAudioWindows = ctx.Get<uint32_t>("kwsNumAudioWins");

132

Kshitij Sisodia

2021-12-24 11:05:11 +0000

[diff] [blame]

133

audio::MicroNetMFCC kwsMfcc = audio::MicroNetMFCC(kwsNumMfccFeats, kwsFrameLength);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

134

kwsMfcc.Init();

135

136

/* Deduce the data length required for 1 KWS inference from the network parameters. */

137

auto kwsAudioDataWindowSize = kwsNumAudioWindows * kwsFrameStride +

138

(kwsFrameLength - kwsFrameStride);

139

auto kwsMfccWindowSize = kwsFrameLength;

140

auto kwsMfccWindowStride = kwsFrameStride;

141

142

/* We are choosing to move by half the window size => for a 1 second window size,

143

* this means an overlap of 0.5 seconds. */

144

auto kwsAudioDataStride = kwsAudioDataWindowSize / 2;

145

Kshitij Sisodia

2021-05-07 16:08:14 +0100

[diff] [blame]

146

info("KWS audio data window size %" PRIu32 "\n", kwsAudioDataWindowSize);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

147

148

/* Stride must be multiple of mfcc features window stride to re-use features. */

149

if (0 != kwsAudioDataStride % kwsMfccWindowStride) {

150

kwsAudioDataStride -= kwsAudioDataStride % kwsMfccWindowStride;

151

}

152

153

auto kwsMfccVectorsInAudioStride = kwsAudioDataStride/kwsMfccWindowStride;

154

155

/* We expect to be sampling 1 second worth of data at a time

156

* NOTE: This is only used for time stamp calculation. */

Kshitij Sisodia

2021-12-24 11:05:11 +0000

[diff] [blame]

157

const float kwsAudioParamsSecondsPerSample = 1.0/audio::MicroNetMFCC::ms_defaultSamplingFreq;

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

158

159

auto currentIndex = ctx.Get<uint32_t>("clipIndex");

160

161

/* Creating a mfcc features sliding window for the data required for 1 inference. */

162

auto kwsAudioMFCCWindowSlider = audio::SlidingWindow<const int16_t>(

163

get_audio_array(currentIndex),

164

kwsAudioDataWindowSize, kwsMfccWindowSize,

165

kwsMfccWindowStride);

166

167

/* Creating a sliding window through the whole audio clip. */

168

auto audioDataSlider = audio::SlidingWindow<const int16_t>(

169

get_audio_array(currentIndex),

170

get_audio_array_size(currentIndex),

171

kwsAudioDataWindowSize, kwsAudioDataStride);

172

173

/* Calculate number of the feature vectors in the window overlap region.

174

* These feature vectors will be reused.*/

175

size_t numberOfReusedFeatureVectors = kwsAudioMFCCWindowSlider.TotalStrides() + 1

176

- kwsMfccVectorsInAudioStride;

177

178

auto kwsMfccFeatureCalc = GetFeatureCalculator(kwsMfcc, kwsInputTensor,

179

numberOfReusedFeatureVectors);

180

181

if (!kwsMfccFeatureCalc){

return output;

}

/* Container for KWS results. */

186

std::vector<arm::app::kws::KwsResult> kwsResults;

187

188

/* Display message on the LCD - inference running. */

189

auto& platform = ctx.Get<hal_platform&>("platform");

190

std::string str_inf{"Running KWS inference... "};

191

platform.data_psn->present_data_text(

192

str_inf.c_str(), str_inf.size(),

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

193

dataPsnTxtInfStartX, dataPsnTxtInfStartY, false);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

194

Kshitij Sisodia

2021-05-07 16:08:14 +0100

[diff] [blame]

195

info("Running KWS inference on audio clip %" PRIu32 " => %s\n",

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

196

currentIndex, get_filename(currentIndex));

197

198

/* Start sliding through audio clip. */

199

while (audioDataSlider.HasNext()) {

200

const int16_t* inferenceWindow = audioDataSlider.Next();

201

202

/* We moved to the next window - set the features sliding to the new address. */

203

kwsAudioMFCCWindowSlider.Reset(inferenceWindow);

204

205

/* The first window does not have cache ready. */

206

bool useCache = audioDataSlider.Index() > 0 && numberOfReusedFeatureVectors > 0;

207

208

/* Start calculating features inside one audio sliding window. */

209

while (kwsAudioMFCCWindowSlider.HasNext()) {

210

const int16_t* kwsMfccWindow = kwsAudioMFCCWindowSlider.Next();

211

std::vector<int16_t> kwsMfccAudioData =

212

std::vector<int16_t>(kwsMfccWindow, kwsMfccWindow + kwsMfccWindowSize);

213

214

/* Compute features for this window and write them to input tensor. */

215

kwsMfccFeatureCalc(kwsMfccAudioData,

216

kwsAudioMFCCWindowSlider.Index(),

217

useCache,

218

kwsMfccVectorsInAudioStride);

219

}

220

221

info("Inference %zu/%zu\n", audioDataSlider.Index() + 1,

222

audioDataSlider.TotalStrides() + 1);

223

224

/* Run inference over this audio clip sliding window. */

alexander

27b62d9

2021-05-04 20:46:08 +0100

[diff] [blame]

225

if (!RunInference(kwsModel, profiler)) {

226

printf_err("KWS inference failed\n");

227

return output;

228

}

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

229

230

std::vector<ClassificationResult> kwsClassificationResult;

231

auto& kwsClassifier = ctx.Get<KwsClassifier&>("kwsclassifier");

232

233

kwsClassifier.GetClassificationResults(

234

kwsOutputTensor, kwsClassificationResult,

Kshitij Sisodia

2021-12-24 11:05:11 +0000

[diff] [blame]

235

ctx.Get<std::vector<std::string>&>("kwslabels"), 1, true);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

236

237

kwsResults.emplace_back(

238

kws::KwsResult(

239

kwsClassificationResult,

240

audioDataSlider.Index() * kwsAudioParamsSecondsPerSample * kwsAudioDataStride,

241

audioDataSlider.Index(), kwsScoreThreshold)

242

);

243

244

/* Keyword detected. */

Liam Barry

b5b32d3

2021-12-30 11:35:00 +0000

[diff] [blame]

245

if (kwsClassificationResult[0].m_label == ctx.Get<const std::string&>("triggerkeyword")) {

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

246

output.asrAudioStart = inferenceWindow + kwsAudioDataWindowSize;

247

output.asrAudioSamples = get_audio_array_size(currentIndex) -

248

(audioDataSlider.NextWindowStartIndex() -

249

kwsAudioDataStride + kwsAudioDataWindowSize);

break;

}

#if VERIFY_TEST_OUTPUT

254

arm::app::DumpTensor(kwsOutputTensor);

255

#endif /* VERIFY_TEST_OUTPUT */

256

257

} /* while (audioDataSlider.HasNext()) */

258

259

/* Erase. */

260

str_inf = std::string(str_inf.size(), ' ');

261

platform.data_psn->present_data_text(

262

str_inf.c_str(), str_inf.size(),

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

263

dataPsnTxtInfStartX, dataPsnTxtInfStartY, false);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

264

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

265

if (!PresentInferenceResult(platform, kwsResults)) {

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

return output;

}

Isabella Gottardi

2021-04-07 17:15:31 +0100

[diff] [blame]

269

profiler.PrintProfilingResult();

270

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

271

output.executionSuccess = true;

return output;

}

/**

* @brief Performs the ASR pipeline.

277

*

Isabella Gottardi

56ee620

2021-05-12 08:27:15 +0100

[diff] [blame]

278

* @param[in,out] ctx pointer to the application context object

279

* @param[in] kwsOutput struct containing pointer to audio data where ASR should begin

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

280

* and how much data to process

281

* @return bool true if pipeline executed without failure

282

*/

283

static bool doAsr(ApplicationContext& ctx, const KWSOutput& kwsOutput) {

284

constexpr uint32_t dataPsnTxtInfStartX = 20;

285

constexpr uint32_t dataPsnTxtInfStartY = 40;

286

Isabella Gottardi

8df12f3

2021-04-07 17:15:31 +0100

[diff] [blame]

287

auto& profiler = ctx.Get<Profiler&>("profiler");

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

288

auto& platform = ctx.Get<hal_platform&>("platform");

289

platform.data_psn->clear(COLOR_BLACK);

290

291

/* Get model reference. */

292

auto& asrModel = ctx.Get<Model&>("asrmodel");

293

if (!asrModel.IsInited()) {

294

printf_err("ASR model has not been initialised\n");

return false;

}

/* Get score threshold to be applied for the classifier (post-inference). */

299

auto asrScoreThreshold = ctx.Get<float>("asrscoreThreshold");

300

301

/* Dimensions of the tensor should have been verified by the callee. */

302

TfLiteTensor* asrInputTensor = asrModel.GetInputTensor(0);

303

TfLiteTensor* asrOutputTensor = asrModel.GetOutputTensor(0);

304

const uint32_t asrInputRows = asrInputTensor->dims->data[arm::app::Wav2LetterModel::ms_inputRowsIdx];

305

306

/* Populate ASR MFCC related parameters. */

307

auto asrMfccParamsWinLen = ctx.Get<uint32_t>("asrframeLength");

308

auto asrMfccParamsWinStride = ctx.Get<uint32_t>("asrframeStride");

309

310

/* Populate ASR inference context and inner lengths for input. */

311

auto asrInputCtxLen = ctx.Get<uint32_t>("ctxLen");

312

const uint32_t asrInputInnerLen = asrInputRows - (2 * asrInputCtxLen);

313

314

/* Make sure the input tensor supports the above context and inner lengths. */

315

if (asrInputRows <= 2 * asrInputCtxLen || asrInputRows <= asrInputInnerLen) {

Kshitij Sisodia

2021-05-07 16:08:14 +0100

[diff] [blame]

316

printf_err("ASR input rows not compatible with ctx length %" PRIu32 "\n",

317

asrInputCtxLen);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

return false;

}

/* Audio data stride corresponds to inputInnerLen feature vectors. */

322

const uint32_t asrAudioParamsWinLen = (asrInputRows - 1) *

323

asrMfccParamsWinStride + (asrMfccParamsWinLen);

324

const uint32_t asrAudioParamsWinStride = asrInputInnerLen * asrMfccParamsWinStride;

325

const float asrAudioParamsSecondsPerSample =

326

(1.0/audio::Wav2LetterMFCC::ms_defaultSamplingFreq);

327

328

/* Get pre/post-processing objects */

329

auto& asrPrep = ctx.Get<audio::asr::Preprocess&>("preprocess");

330

auto& asrPostp = ctx.Get<audio::asr::Postprocess&>("postprocess");

331

332

/* Set default reduction axis for post-processing. */

333

const uint32_t reductionAxis = arm::app::Wav2LetterModel::ms_outputRowsIdx;

334

335

/* Get the remaining audio buffer and respective size from KWS results. */

336

const int16_t* audioArr = kwsOutput.asrAudioStart;

337

const uint32_t audioArrSize = kwsOutput.asrAudioSamples;

338

339

/* Audio clip must have enough samples to produce 1 MFCC feature. */

340

std::vector<int16_t> audioBuffer = std::vector<int16_t>(audioArr, audioArr + audioArrSize);

341

if (audioArrSize < asrMfccParamsWinLen) {

Kshitij Sisodia

2021-05-07 16:08:14 +0100

[diff] [blame]

342

printf_err("Not enough audio samples, minimum needed is %" PRIu32 "\n",

343

asrMfccParamsWinLen);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

return false;

}

/* Initialise an audio slider. */

alexander

80eecfb

2021-07-06 19:47:59 +0100

[diff] [blame]

348

auto audioDataSlider = audio::FractionalSlidingWindow<const int16_t>(

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

349

audioBuffer.data(),

350

audioBuffer.size(),

351

asrAudioParamsWinLen,

352

asrAudioParamsWinStride);

353

354

/* Declare a container for results. */

355

std::vector<arm::app::asr::AsrResult> asrResults;

356

357

/* Display message on the LCD - inference running. */

358

std::string str_inf{"Running ASR inference... "};

359

platform.data_psn->present_data_text(

360

str_inf.c_str(), str_inf.size(),

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

361

dataPsnTxtInfStartX, dataPsnTxtInfStartY, false);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

362

363

size_t asrInferenceWindowLen = asrAudioParamsWinLen;

364

365

/* Start sliding through audio clip. */

366

while (audioDataSlider.HasNext()) {

367

368

/* If not enough audio see how much can be sent for processing. */

369

size_t nextStartIndex = audioDataSlider.NextWindowStartIndex();

370

if (nextStartIndex + asrAudioParamsWinLen > audioBuffer.size()) {

371

asrInferenceWindowLen = audioBuffer.size() - nextStartIndex;

372

}

373

374

const int16_t* asrInferenceWindow = audioDataSlider.Next();

375

376

info("Inference %zu/%zu\n", audioDataSlider.Index() + 1,

377

static_cast<size_t>(ceilf(audioDataSlider.FractionalTotalStrides() + 1)));

378

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

379

/* Calculate MFCCs, deltas and populate the input tensor. */

380

asrPrep.Invoke(asrInferenceWindow, asrInferenceWindowLen, asrInputTensor);

381

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

382

/* Run inference over this audio clip sliding window. */

alexander

27b62d9

2021-05-04 20:46:08 +0100

[diff] [blame]

383

if (!RunInference(asrModel, profiler)) {

384

printf_err("ASR inference failed\n");

385

return false;

386

}

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

387

388

/* Post-process. */

389

asrPostp.Invoke(asrOutputTensor, reductionAxis, !audioDataSlider.HasNext());

390

391

/* Get results. */

392

std::vector<ClassificationResult> asrClassificationResult;

393

auto& asrClassifier = ctx.Get<AsrClassifier&>("asrclassifier");

394

asrClassifier.GetClassificationResults(

395

asrOutputTensor, asrClassificationResult,

396

ctx.Get<std::vector<std::string>&>("asrlabels"), 1);

397

398

asrResults.emplace_back(asr::AsrResult(asrClassificationResult,

399

(audioDataSlider.Index() *

400

asrAudioParamsSecondsPerSample *

401

asrAudioParamsWinStride),

402

audioDataSlider.Index(), asrScoreThreshold));

403

404

#if VERIFY_TEST_OUTPUT

405

arm::app::DumpTensor(asrOutputTensor, asrOutputTensor->dims->data[arm::app::Wav2LetterModel::ms_outputColsIdx]);

406

#endif /* VERIFY_TEST_OUTPUT */

407

408

/* Erase */

409

str_inf = std::string(str_inf.size(), ' ');

410

platform.data_psn->present_data_text(

411

str_inf.c_str(), str_inf.size(),

412

dataPsnTxtInfStartX, dataPsnTxtInfStartY, false);

413

}

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

414

if (!PresentInferenceResult(platform, asrResults)) {

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

return false;

}

Isabella Gottardi

2021-04-07 17:15:31 +0100

[diff] [blame]

418

profiler.PrintProfilingResult();

419

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

return true;

}

/* Audio inference classification handler. */

424

bool ClassifyAudioHandler(ApplicationContext& ctx, uint32_t clipIndex, bool runAll)

425

{

426

auto& platform = ctx.Get<hal_platform&>("platform");

427

platform.data_psn->clear(COLOR_BLACK);

428

429

/* If the request has a valid size, set the audio index. */

430

if (clipIndex < NUMBER_OF_FILES) {

Éanna Ó Catháin

8f95887

2021-09-15 09:32:30 +0100

[diff] [blame]

431

if (!SetAppCtxIfmIdx(ctx, clipIndex,"kws_asr")) {

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

return false;

}

}

auto startClipIdx = ctx.Get<uint32_t>("clipIndex");

437

438

do {

439

KWSOutput kwsOutput = doKws(ctx);

440

if (!kwsOutput.executionSuccess) {

return false;

}

if (kwsOutput.asrAudioStart != nullptr && kwsOutput.asrAudioSamples > 0) {

445

info("Keyword spotted\n");

446

if(!doAsr(ctx, kwsOutput)) {

447

printf_err("ASR failed");

return false;

}

}

Éanna Ó Catháin

2021-09-15 09:32:30 +0100

[diff] [blame]

452

IncrementAppCtxIfmIdx(ctx,"kws_asr");

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

453

454

} while (runAll && ctx.Get<uint32_t>("clipIndex") != startClipIdx);

return true;

}

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

459

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

460

static bool PresentInferenceResult(hal_platform& platform,

461

std::vector<arm::app::kws::KwsResult>& results)

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

462

{

463

constexpr uint32_t dataPsnTxtStartX1 = 20;

464

constexpr uint32_t dataPsnTxtStartY1 = 30;

465

constexpr uint32_t dataPsnTxtYIncr = 16; /* Row index increment. */

466

467

platform.data_psn->set_text_color(COLOR_GREEN);

468

469

/* Display each result. */

470

uint32_t rowIdx1 = dataPsnTxtStartY1 + 2 * dataPsnTxtYIncr;

471

472

for (uint32_t i = 0; i < results.size(); ++i) {

473

474

std::string topKeyword{"<none>"};

475

float score = 0.f;

476

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

477

if (!results[i].m_resultVec.empty()) {

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

478

topKeyword = results[i].m_resultVec[0].m_label;

479

score = results[i].m_resultVec[0].m_normalisedVal;

480

}

481

482

std::string resultStr =

483

std::string{"@"} + std::to_string(results[i].m_timeStamp) +

484

std::string{"s: "} + topKeyword + std::string{" ("} +

485

std::to_string(static_cast<int>(score * 100)) + std::string{"%)"};

486

487

platform.data_psn->present_data_text(

488

resultStr.c_str(), resultStr.size(),

489

dataPsnTxtStartX1, rowIdx1, 0);

490

rowIdx1 += dataPsnTxtYIncr;

491

Kshitij Sisodia

2021-05-07 16:08:14 +0100

[diff] [blame]

492

info("For timestamp: %f (inference #: %" PRIu32 "); threshold: %f\n",

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

493

results[i].m_timeStamp, results[i].m_inferenceNumber,

494

results[i].m_threshold);

495

for (uint32_t j = 0; j < results[i].m_resultVec.size(); ++j) {

Kshitij Sisodia

2021-05-07 16:08:14 +0100

[diff] [blame]

496

info("\t\tlabel @ %" PRIu32 ": %s, score: %f\n", j,

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

497

results[i].m_resultVec[j].m_label.c_str(),

498

results[i].m_resultVec[j].m_normalisedVal);

}

}

return true;

}

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

505

static bool PresentInferenceResult(hal_platform& platform, std::vector<arm::app::asr::AsrResult>& results)

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

506

{

507

constexpr uint32_t dataPsnTxtStartX1 = 20;

508

constexpr uint32_t dataPsnTxtStartY1 = 80;

509

constexpr bool allow_multiple_lines = true;

510

511

platform.data_psn->set_text_color(COLOR_GREEN);

512

513

/* Results from multiple inferences should be combined before processing. */

514

std::vector<arm::app::ClassificationResult> combinedResults;

515

for (auto& result : results) {

516

combinedResults.insert(combinedResults.end(),

517

result.m_resultVec.begin(),

518

result.m_resultVec.end());

519

}

520

521

for (auto& result : results) {

522

/* Get the final result string using the decoder. */

523

std::string infResultStr = audio::asr::DecodeOutput(result.m_resultVec);

524

Kshitij Sisodia

2021-05-07 16:08:14 +0100

[diff] [blame]

525

info("Result for inf %" PRIu32 ": %s\n", result.m_inferenceNumber,

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

526

infResultStr.c_str());

527

}

528

529

std::string finalResultStr = audio::asr::DecodeOutput(combinedResults);

530

531

platform.data_psn->present_data_text(

532

finalResultStr.c_str(), finalResultStr.size(),

533

dataPsnTxtStartX1, dataPsnTxtStartY1, allow_multiple_lines);

534

535

info("Final result: %s\n", finalResultStr.c_str());

return true;

}

/**

* @brief Generic feature calculator factory.

541

*

542

* Returns lambda function to compute features using features cache.

543

* Real features math is done by a lambda function provided as a parameter.

544

* Features are written to input tensor memory.

545

*

546

* @tparam T feature vector type.

547

* @param inputTensor model input tensor pointer.

548

* @param cacheSize number of feature vectors to cache. Defined by the sliding window overlap.

549

* @param compute features calculator function.

550

* @return lambda function to compute features.

551

**/

552

template<class T>

553

std::function<void (std::vector<int16_t>&, size_t, bool, size_t)>

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

554

FeatureCalc(TfLiteTensor* inputTensor, size_t cacheSize,

555

std::function<std::vector<T> (std::vector<int16_t>& )> compute)

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

556

{

557

/* Feature cache to be captured by lambda function. */

558

static std::vector<std::vector<T>> featureCache = std::vector<std::vector<T>>(cacheSize);

559

560

return [=](std::vector<int16_t>& audioDataWindow,

561

size_t index,

562

bool useCache,

563

size_t featuresOverlapIndex)

564

{

565

T* tensorData = tflite::GetTensorData<T>(inputTensor);

566

std::vector<T> features;

567

568

/* Reuse features from cache if cache is ready and sliding windows overlap.

569

* Overlap is in the beginning of sliding window with a size of a feature cache.

570

*/

571

if (useCache && index < featureCache.size()) {

572

features = std::move(featureCache[index]);

573

} else {

574

features = std::move(compute(audioDataWindow));

575

}

576

auto size = features.size();

577

auto sizeBytes = sizeof(T) * size;

578

std::memcpy(tensorData + (index * size), features.data(), sizeBytes);

579

580

/* Start renewing cache as soon iteration goes out of the windows overlap. */

581

if (index >= featuresOverlapIndex) {

582

featureCache[index - featuresOverlapIndex] = std::move(features);

}

};

}

template std::function<void (std::vector<int16_t>&, size_t , bool, size_t)>

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

588

FeatureCalc<int8_t>(TfLiteTensor* inputTensor,

589

size_t cacheSize,

590

std::function<std::vector<int8_t> (std::vector<int16_t>& )> compute);

591

592

template std::function<void (std::vector<int16_t>&, size_t , bool, size_t)>

593

FeatureCalc<uint8_t>(TfLiteTensor* inputTensor,

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

594

size_t cacheSize,

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

595

std::function<std::vector<uint8_t> (std::vector<int16_t>& )> compute);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

596

597

template std::function<void (std::vector<int16_t>&, size_t , bool, size_t)>

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

598

FeatureCalc<int16_t>(TfLiteTensor* inputTensor,

599

size_t cacheSize,

600

std::function<std::vector<int16_t> (std::vector<int16_t>& )> compute);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

601

602

template std::function<void(std::vector<int16_t>&, size_t, bool, size_t)>

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

603

FeatureCalc<float>(TfLiteTensor* inputTensor,

604

size_t cacheSize,

605

std::function<std::vector<float>(std::vector<int16_t>&)> compute);

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

606

607

608

static std::function<void (std::vector<int16_t>&, int, bool, size_t)>

Kshitij Sisodia

2021-12-24 11:05:11 +0000

[diff] [blame]

609

GetFeatureCalculator(audio::MicroNetMFCC& mfcc, TfLiteTensor* inputTensor, size_t cacheSize)

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

610

{

611

std::function<void (std::vector<int16_t>&, size_t, bool, size_t)> mfccFeatureCalc;

612

613

TfLiteQuantization quant = inputTensor->quantization;

614

615

if (kTfLiteAffineQuantization == quant.type) {

616

617

auto* quantParams = (TfLiteAffineQuantization*) quant.params;

618

const float quantScale = quantParams->scale->data[0];

619

const int quantOffset = quantParams->zero_point->data[0];

620

621

switch (inputTensor->type) {

622

case kTfLiteInt8: {

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

623

mfccFeatureCalc = FeatureCalc<int8_t>(inputTensor,

624

cacheSize,

625

[=, &mfcc](std::vector<int16_t>& audioDataWindow) {

626

return mfcc.MfccComputeQuant<int8_t>(audioDataWindow,

627

quantScale,

628

quantOffset);

629

}

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

);

break;

}

case kTfLiteUInt8: {

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

634

mfccFeatureCalc = FeatureCalc<uint8_t>(inputTensor,

635

cacheSize,

636

[=, &mfcc](std::vector<int16_t>& audioDataWindow) {

637

return mfcc.MfccComputeQuant<uint8_t>(audioDataWindow,

638

quantScale,

639

quantOffset);

640

}

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

);

break;

}

case kTfLiteInt16: {

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

645

mfccFeatureCalc = FeatureCalc<int16_t>(inputTensor,

646

cacheSize,

647

[=, &mfcc](std::vector<int16_t>& audioDataWindow) {

648

return mfcc.MfccComputeQuant<int16_t>(audioDataWindow,

649

quantScale,

650

quantOffset);

651

}

alexander

2021-03-26 21:42:19 +0000

[diff] [blame]

);

break;

}

default:

printf_err("Tensor type %s not supported\n", TfLiteTypeGetName(inputTensor->type));

}

} else {

alexander

2021-04-29 20:36:09 +0100

[diff] [blame]

661

mfccFeatureCalc = mfccFeatureCalc = FeatureCalc<float>(inputTensor,

662

cacheSize,

663

[&mfcc](std::vector<int16_t>& audioDataWindow) {

664

return mfcc.MfccCompute(audioDataWindow);

665

});

alexander