Blame - source/use_case/kws_asr/src/UseCaseHandler.cc - ml/ethos-u/ml-embedded-evaluation-kit

blob: c50796fbfaa2d2ef932a0ef8f63f903b6ce7eac7 [file] [log] [blame]

alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame^]	1	/*
				2	* Copyright (c) 2021 Arm Limited. All rights reserved.
				3	* SPDX-License-Identifier: Apache-2.0
				4	*
				5	* Licensed under the Apache License, Version 2.0 (the "License");
				6	* you may not use this file except in compliance with the License.
				7	* You may obtain a copy of the License at
				8	*
				9	* http://www.apache.org/licenses/LICENSE-2.0
				10	*
				11	* Unless required by applicable law or agreed to in writing, software
				12	* distributed under the License is distributed on an "AS IS" BASIS,
				13	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	* See the License for the specific language governing permissions and
				15	* limitations under the License.
				16	*/
				17	#include "UseCaseHandler.hpp"
				18
				19	#include "hal.h"
				20	#include "InputFiles.hpp"
				21	#include "AudioUtils.hpp"
				22	#include "UseCaseCommonUtils.hpp"
				23	#include "DsCnnModel.hpp"
				24	#include "DsCnnMfcc.hpp"
				25	#include "Classifier.hpp"
				26	#include "KwsResult.hpp"
				27	#include "Wav2LetterMfcc.hpp"
				28	#include "Wav2LetterPreprocess.hpp"
				29	#include "Wav2LetterPostprocess.hpp"
				30	#include "AsrResult.hpp"
				31	#include "AsrClassifier.hpp"
				32	#include "OutputDecode.hpp"
				33
				34
				35	using KwsClassifier = arm::app::Classifier;
				36
				37	namespace arm {
				38	namespace app {
				39
				40	enum AsrOutputReductionAxis {
				41	AxisRow = 1,
				42	AxisCol = 2
				43	};
				44
				45	struct KWSOutput {
				46	bool executionSuccess = false;
				47	const int16_t* asrAudioStart = nullptr;
				48	int32_t asrAudioSamples = 0;
				49	};
				50
				51	/**
				52	* @brief Helper function to increment current audio clip index
				53	* @param[in,out] ctx pointer to the application context object
				54	**/
				55	static void _IncrementAppCtxClipIdx(ApplicationContext& ctx);
				56
				57	/**
				58	* @brief Helper function to increment current audio clip index
				59	* @param[in,out] ctx pointer to the application context object
				60	**/
				61	static void _IncrementAppCtxClipIdx(ApplicationContext& ctx);
				62
				63	/**
				64	* @brief Helper function to set the audio clip index
				65	* @param[in,out] ctx pointer to the application context object
				66	* @param[in] idx value to be set
				67	* @return true if index is set, false otherwise
				68	**/
				69	static bool _SetAppCtxClipIdx(ApplicationContext& ctx, uint32_t idx);
				70
				71	/**
				72	* @brief Presents kws inference results using the data presentation
				73	* object.
				74	* @param[in] platform reference to the hal platform object
				75	* @param[in] results vector of classification results to be displayed
				76	* @param[in] infTimeMs inference time in milliseconds, if available
				77	* Otherwise, this can be passed in as 0.
				78	* @return true if successful, false otherwise
				79	**/
				80	static bool _PresentInferenceResult(hal_platform& platform, std::vector<arm::app::kws::KwsResult>& results);
				81
				82	/**
				83	* @brief Presents asr inference results using the data presentation
				84	* object.
				85	* @param[in] platform reference to the hal platform object
				86	* @param[in] results vector of classification results to be displayed
				87	* @param[in] infTimeMs inference time in milliseconds, if available
				88	* Otherwise, this can be passed in as 0.
				89	* @return true if successful, false otherwise
				90	**/
				91	static bool _PresentInferenceResult(hal_platform& platform, std::vector<arm::app::asr::AsrResult>& results);
				92
				93	/**
				94	* @brief Returns a function to perform feature calculation and populates input tensor data with
				95	* MFCC data.
				96	*
				97	* Input tensor data type check is performed to choose correct MFCC feature data type.
				98	* If tensor has an integer data type then original features are quantised.
				99	*
				100	* Warning: mfcc calculator provided as input must have the same life scope as returned function.
				101	*
				102	* @param[in] mfcc MFCC feature calculator.
				103	* @param[in,out] inputTensor Input tensor pointer to store calculated features.
				104	* @param[in] cacheSize Size of the feture vectors cache (number of feature vectors).
				105	*
				106	* @return function function to be called providing audio sample and sliding window index.
				107	**/
				108	static std::function<void (std::vector<int16_t>&, int, bool, size_t)>
				109	GetFeatureCalculator(audio::DsCnnMFCC& mfcc,
				110	TfLiteTensor* inputTensor,
				111	size_t cacheSize);
				112
				113	/**
				114	* @brief Performs the KWS pipeline.
				115	* @param[in,out] ctx pointer to the application context object
				116	*
				117	* @return KWSOutput struct containing pointer to audio data where ASR should begin
				118	* and how much data to process.
				119	*/
				120	static KWSOutput doKws(ApplicationContext& ctx) {
				121	constexpr uint32_t dataPsnTxtInfStartX = 20;
				122	constexpr uint32_t dataPsnTxtInfStartY = 40;
				123
				124	constexpr int minTensorDims = static_cast<int>(
				125	(arm::app::DsCnnModel::ms_inputRowsIdx > arm::app::DsCnnModel::ms_inputColsIdx)?
				126	arm::app::DsCnnModel::ms_inputRowsIdx : arm::app::DsCnnModel::ms_inputColsIdx);
				127
				128	KWSOutput output;
				129
				130	auto& kwsModel = ctx.Get<Model&>("kwsmodel");
				131	if (!kwsModel.IsInited()) {
				132	printf_err("KWS model has not been initialised\n");
				133	return output;
				134	}
				135
				136	const int kwsFrameLength = ctx.Get<int>("kwsframeLength");
				137	const int kwsFrameStride = ctx.Get<int>("kwsframeStride");
				138	const float kwsScoreThreshold = ctx.Get<float>("kwsscoreThreshold");
				139
				140	TfLiteTensor* kwsOutputTensor = kwsModel.GetOutputTensor(0);
				141	TfLiteTensor* kwsInputTensor = kwsModel.GetInputTensor(0);
				142
				143	if (!kwsInputTensor->dims) {
				144	printf_err("Invalid input tensor dims\n");
				145	return output;
				146	} else if (kwsInputTensor->dims->size < minTensorDims) {
				147	printf_err("Input tensor dimension should be >= %d\n", minTensorDims);
				148	return output;
				149	}
				150
				151	const uint32_t kwsNumMfccFeats = ctx.Get<uint32_t>("kwsNumMfcc");
				152	const uint32_t kwsNumAudioWindows = ctx.Get<uint32_t>("kwsNumAudioWins");
				153
				154	audio::DsCnnMFCC kwsMfcc = audio::DsCnnMFCC(kwsNumMfccFeats, kwsFrameLength);
				155	kwsMfcc.Init();
				156
				157	/* Deduce the data length required for 1 KWS inference from the network parameters. */
				158	auto kwsAudioDataWindowSize = kwsNumAudioWindows * kwsFrameStride +
				159	(kwsFrameLength - kwsFrameStride);
				160	auto kwsMfccWindowSize = kwsFrameLength;
				161	auto kwsMfccWindowStride = kwsFrameStride;
				162
				163	/* We are choosing to move by half the window size => for a 1 second window size,
				164	* this means an overlap of 0.5 seconds. */
				165	auto kwsAudioDataStride = kwsAudioDataWindowSize / 2;
				166
				167	info("KWS audio data window size %u\n", kwsAudioDataWindowSize);
				168
				169	/* Stride must be multiple of mfcc features window stride to re-use features. */
				170	if (0 != kwsAudioDataStride % kwsMfccWindowStride) {
				171	kwsAudioDataStride -= kwsAudioDataStride % kwsMfccWindowStride;
				172	}
				173
				174	auto kwsMfccVectorsInAudioStride = kwsAudioDataStride/kwsMfccWindowStride;
				175
				176	/* We expect to be sampling 1 second worth of data at a time
				177	* NOTE: This is only used for time stamp calculation. */
				178	const float kwsAudioParamsSecondsPerSample = 1.0/audio::DsCnnMFCC::ms_defaultSamplingFreq;
				179
				180	auto currentIndex = ctx.Get<uint32_t>("clipIndex");
				181
				182	/* Creating a mfcc features sliding window for the data required for 1 inference. */
				183	auto kwsAudioMFCCWindowSlider = audio::SlidingWindow<const int16_t>(
				184	get_audio_array(currentIndex),
				185	kwsAudioDataWindowSize, kwsMfccWindowSize,
				186	kwsMfccWindowStride);
				187
				188	/* Creating a sliding window through the whole audio clip. */
				189	auto audioDataSlider = audio::SlidingWindow<const int16_t>(
				190	get_audio_array(currentIndex),
				191	get_audio_array_size(currentIndex),
				192	kwsAudioDataWindowSize, kwsAudioDataStride);
				193
				194	/* Calculate number of the feature vectors in the window overlap region.
				195	* These feature vectors will be reused.*/
				196	size_t numberOfReusedFeatureVectors = kwsAudioMFCCWindowSlider.TotalStrides() + 1
				197	- kwsMfccVectorsInAudioStride;
				198
				199	auto kwsMfccFeatureCalc = GetFeatureCalculator(kwsMfcc, kwsInputTensor,
				200	numberOfReusedFeatureVectors);
				201
				202	if (!kwsMfccFeatureCalc){
				203	return output;
				204	}
				205
				206	/* Container for KWS results. */
				207	std::vector<arm::app::kws::KwsResult> kwsResults;
				208
				209	/* Display message on the LCD - inference running. */
				210	auto& platform = ctx.Get<hal_platform&>("platform");
				211	std::string str_inf{"Running KWS inference... "};
				212	platform.data_psn->present_data_text(
				213	str_inf.c_str(), str_inf.size(),
				214	dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
				215
				216	info("Running KWS inference on audio clip %u => %s\n",
				217	currentIndex, get_filename(currentIndex));
				218
				219	/* Start sliding through audio clip. */
				220	while (audioDataSlider.HasNext()) {
				221	const int16_t* inferenceWindow = audioDataSlider.Next();
				222
				223	/* We moved to the next window - set the features sliding to the new address. */
				224	kwsAudioMFCCWindowSlider.Reset(inferenceWindow);
				225
				226	/* The first window does not have cache ready. */
				227	bool useCache = audioDataSlider.Index() > 0 && numberOfReusedFeatureVectors > 0;
				228
				229	/* Start calculating features inside one audio sliding window. */
				230	while (kwsAudioMFCCWindowSlider.HasNext()) {
				231	const int16_t* kwsMfccWindow = kwsAudioMFCCWindowSlider.Next();
				232	std::vector<int16_t> kwsMfccAudioData =
				233	std::vector<int16_t>(kwsMfccWindow, kwsMfccWindow + kwsMfccWindowSize);
				234
				235	/* Compute features for this window and write them to input tensor. */
				236	kwsMfccFeatureCalc(kwsMfccAudioData,
				237	kwsAudioMFCCWindowSlider.Index(),
				238	useCache,
				239	kwsMfccVectorsInAudioStride);
				240	}
				241
				242	info("Inference %zu/%zu\n", audioDataSlider.Index() + 1,
				243	audioDataSlider.TotalStrides() + 1);
				244
				245	/* Run inference over this audio clip sliding window. */
				246	arm::app::RunInference(platform, kwsModel);
				247
				248	std::vector<ClassificationResult> kwsClassificationResult;
				249	auto& kwsClassifier = ctx.Get<KwsClassifier&>("kwsclassifier");
				250
				251	kwsClassifier.GetClassificationResults(
				252	kwsOutputTensor, kwsClassificationResult,
				253	ctx.Get<std::vector<std::string>&>("kwslabels"), 1);
				254
				255	kwsResults.emplace_back(
				256	kws::KwsResult(
				257	kwsClassificationResult,
				258	audioDataSlider.Index() * kwsAudioParamsSecondsPerSample * kwsAudioDataStride,
				259	audioDataSlider.Index(), kwsScoreThreshold)
				260	);
				261
				262	/* Keyword detected. */
				263	if (kwsClassificationResult[0].m_labelIdx == ctx.Get<uint32_t>("keywordindex")) {
				264	output.asrAudioStart = inferenceWindow + kwsAudioDataWindowSize;
				265	output.asrAudioSamples = get_audio_array_size(currentIndex) -
				266	(audioDataSlider.NextWindowStartIndex() -
				267	kwsAudioDataStride + kwsAudioDataWindowSize);
				268	break;
				269	}
				270
				271	#if VERIFY_TEST_OUTPUT
				272	arm::app::DumpTensor(kwsOutputTensor);
				273	#endif /* VERIFY_TEST_OUTPUT */
				274
				275	} /* while (audioDataSlider.HasNext()) */
				276
				277	/* Erase. */
				278	str_inf = std::string(str_inf.size(), ' ');
				279	platform.data_psn->present_data_text(
				280	str_inf.c_str(), str_inf.size(),
				281	dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
				282
				283	if (!_PresentInferenceResult(platform, kwsResults)) {
				284	return output;
				285	}
				286
				287	output.executionSuccess = true;
				288	return output;
				289	}
				290
				291	/**
				292	* @brief Performs the ASR pipeline.
				293	*
				294	* @param ctx[in/out] pointer to the application context object
				295	* @param kwsOutput[in] struct containing pointer to audio data where ASR should begin
				296	* and how much data to process
				297	* @return bool true if pipeline executed without failure
				298	*/
				299	static bool doAsr(ApplicationContext& ctx, const KWSOutput& kwsOutput) {
				300	constexpr uint32_t dataPsnTxtInfStartX = 20;
				301	constexpr uint32_t dataPsnTxtInfStartY = 40;
				302
				303	auto& platform = ctx.Get<hal_platform&>("platform");
				304	platform.data_psn->clear(COLOR_BLACK);
				305
				306	/* Get model reference. */
				307	auto& asrModel = ctx.Get<Model&>("asrmodel");
				308	if (!asrModel.IsInited()) {
				309	printf_err("ASR model has not been initialised\n");
				310	return false;
				311	}
				312
				313	/* Get score threshold to be applied for the classifier (post-inference). */
				314	auto asrScoreThreshold = ctx.Get<float>("asrscoreThreshold");
				315
				316	/* Dimensions of the tensor should have been verified by the callee. */
				317	TfLiteTensor* asrInputTensor = asrModel.GetInputTensor(0);
				318	TfLiteTensor* asrOutputTensor = asrModel.GetOutputTensor(0);
				319	const uint32_t asrInputRows = asrInputTensor->dims->data[arm::app::Wav2LetterModel::ms_inputRowsIdx];
				320
				321	/* Populate ASR MFCC related parameters. */
				322	auto asrMfccParamsWinLen = ctx.Get<uint32_t>("asrframeLength");
				323	auto asrMfccParamsWinStride = ctx.Get<uint32_t>("asrframeStride");
				324
				325	/* Populate ASR inference context and inner lengths for input. */
				326	auto asrInputCtxLen = ctx.Get<uint32_t>("ctxLen");
				327	const uint32_t asrInputInnerLen = asrInputRows - (2 * asrInputCtxLen);
				328
				329	/* Make sure the input tensor supports the above context and inner lengths. */
				330	if (asrInputRows <= 2 * asrInputCtxLen \|\| asrInputRows <= asrInputInnerLen) {
				331	printf_err("ASR input rows not compatible with ctx length %u\n", asrInputCtxLen);
				332	return false;
				333	}
				334
				335	/* Audio data stride corresponds to inputInnerLen feature vectors. */
				336	const uint32_t asrAudioParamsWinLen = (asrInputRows - 1) *
				337	asrMfccParamsWinStride + (asrMfccParamsWinLen);
				338	const uint32_t asrAudioParamsWinStride = asrInputInnerLen * asrMfccParamsWinStride;
				339	const float asrAudioParamsSecondsPerSample =
				340	(1.0/audio::Wav2LetterMFCC::ms_defaultSamplingFreq);
				341
				342	/* Get pre/post-processing objects */
				343	auto& asrPrep = ctx.Get<audio::asr::Preprocess&>("preprocess");
				344	auto& asrPostp = ctx.Get<audio::asr::Postprocess&>("postprocess");
				345
				346	/* Set default reduction axis for post-processing. */
				347	const uint32_t reductionAxis = arm::app::Wav2LetterModel::ms_outputRowsIdx;
				348
				349	/* Get the remaining audio buffer and respective size from KWS results. */
				350	const int16_t* audioArr = kwsOutput.asrAudioStart;
				351	const uint32_t audioArrSize = kwsOutput.asrAudioSamples;
				352
				353	/* Audio clip must have enough samples to produce 1 MFCC feature. */
				354	std::vector<int16_t> audioBuffer = std::vector<int16_t>(audioArr, audioArr + audioArrSize);
				355	if (audioArrSize < asrMfccParamsWinLen) {
				356	printf_err("Not enough audio samples, minimum needed is %u\n", asrMfccParamsWinLen);
				357	return false;
				358	}
				359
				360	/* Initialise an audio slider. */
				361	auto audioDataSlider = audio::ASRSlidingWindow<const int16_t>(
				362	audioBuffer.data(),
				363	audioBuffer.size(),
				364	asrAudioParamsWinLen,
				365	asrAudioParamsWinStride);
				366
				367	/* Declare a container for results. */
				368	std::vector<arm::app::asr::AsrResult> asrResults;
				369
				370	/* Display message on the LCD - inference running. */
				371	std::string str_inf{"Running ASR inference... "};
				372	platform.data_psn->present_data_text(
				373	str_inf.c_str(), str_inf.size(),
				374	dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
				375
				376	size_t asrInferenceWindowLen = asrAudioParamsWinLen;
				377
				378	/* Start sliding through audio clip. */
				379	while (audioDataSlider.HasNext()) {
				380
				381	/* If not enough audio see how much can be sent for processing. */
				382	size_t nextStartIndex = audioDataSlider.NextWindowStartIndex();
				383	if (nextStartIndex + asrAudioParamsWinLen > audioBuffer.size()) {
				384	asrInferenceWindowLen = audioBuffer.size() - nextStartIndex;
				385	}
				386
				387	const int16_t* asrInferenceWindow = audioDataSlider.Next();
				388
				389	info("Inference %zu/%zu\n", audioDataSlider.Index() + 1,
				390	static_cast<size_t>(ceilf(audioDataSlider.FractionalTotalStrides() + 1)));
				391
				392	Profiler prepProfiler{&platform, "pre-processing"};
				393	prepProfiler.StartProfiling();
				394
				395	/* Calculate MFCCs, deltas and populate the input tensor. */
				396	asrPrep.Invoke(asrInferenceWindow, asrInferenceWindowLen, asrInputTensor);
				397
				398	prepProfiler.StopProfiling();
				399	std::string prepProfileResults = prepProfiler.GetResultsAndReset();
				400	info("%s\n", prepProfileResults.c_str());
				401
				402	/* Run inference over this audio clip sliding window. */
				403	arm::app::RunInference(platform, asrModel);
				404
				405	/* Post-process. */
				406	asrPostp.Invoke(asrOutputTensor, reductionAxis, !audioDataSlider.HasNext());
				407
				408	/* Get results. */
				409	std::vector<ClassificationResult> asrClassificationResult;
				410	auto& asrClassifier = ctx.Get<AsrClassifier&>("asrclassifier");
				411	asrClassifier.GetClassificationResults(
				412	asrOutputTensor, asrClassificationResult,
				413	ctx.Get<std::vector<std::string>&>("asrlabels"), 1);
				414
				415	asrResults.emplace_back(asr::AsrResult(asrClassificationResult,
				416	(audioDataSlider.Index() *
				417	asrAudioParamsSecondsPerSample *
				418	asrAudioParamsWinStride),
				419	audioDataSlider.Index(), asrScoreThreshold));
				420
				421	#if VERIFY_TEST_OUTPUT
				422	arm::app::DumpTensor(asrOutputTensor, asrOutputTensor->dims->data[arm::app::Wav2LetterModel::ms_outputColsIdx]);
				423	#endif /* VERIFY_TEST_OUTPUT */
				424
				425	/* Erase */
				426	str_inf = std::string(str_inf.size(), ' ');
				427	platform.data_psn->present_data_text(
				428	str_inf.c_str(), str_inf.size(),
				429	dataPsnTxtInfStartX, dataPsnTxtInfStartY, false);
				430	}
				431	if (!_PresentInferenceResult(platform, asrResults)) {
				432	return false;
				433	}
				434
				435	return true;
				436	}
				437
				438	/* Audio inference classification handler. */
				439	bool ClassifyAudioHandler(ApplicationContext& ctx, uint32_t clipIndex, bool runAll)
				440	{
				441	auto& platform = ctx.Get<hal_platform&>("platform");
				442	platform.data_psn->clear(COLOR_BLACK);
				443
				444	/* If the request has a valid size, set the audio index. */
				445	if (clipIndex < NUMBER_OF_FILES) {
				446	if (!_SetAppCtxClipIdx(ctx, clipIndex)) {
				447	return false;
				448	}
				449	}
				450
				451	auto startClipIdx = ctx.Get<uint32_t>("clipIndex");
				452
				453	do {
				454	KWSOutput kwsOutput = doKws(ctx);
				455	if (!kwsOutput.executionSuccess) {
				456	return false;
				457	}
				458
				459	if (kwsOutput.asrAudioStart != nullptr && kwsOutput.asrAudioSamples > 0) {
				460	info("Keyword spotted\n");
				461	if(!doAsr(ctx, kwsOutput)) {
				462	printf_err("ASR failed");
				463	return false;
				464	}
				465	}
				466
				467	_IncrementAppCtxClipIdx(ctx);
				468
				469	} while (runAll && ctx.Get<uint32_t>("clipIndex") != startClipIdx);
				470
				471	return true;
				472	}
				473
				474	static void _IncrementAppCtxClipIdx(ApplicationContext& ctx)
				475	{
				476	auto curAudioIdx = ctx.Get<uint32_t>("clipIndex");
				477
				478	if (curAudioIdx + 1 >= NUMBER_OF_FILES) {
				479	ctx.Set<uint32_t>("clipIndex", 0);
				480	return;
				481	}
				482	++curAudioIdx;
				483	ctx.Set<uint32_t>("clipIndex", curAudioIdx);
				484	}
				485
				486	static bool _SetAppCtxClipIdx(ApplicationContext& ctx, const uint32_t idx)
				487	{
				488	if (idx >= NUMBER_OF_FILES) {
				489	printf_err("Invalid idx %u (expected less than %u)\n",
				490	idx, NUMBER_OF_FILES);
				491	return false;
				492	}
				493	ctx.Set<uint32_t>("clipIndex", idx);
				494	return true;
				495	}
				496
				497	static bool _PresentInferenceResult(hal_platform& platform,
				498	std::vector<arm::app::kws::KwsResult>& results)
				499	{
				500	constexpr uint32_t dataPsnTxtStartX1 = 20;
				501	constexpr uint32_t dataPsnTxtStartY1 = 30;
				502	constexpr uint32_t dataPsnTxtYIncr = 16; /* Row index increment. */
				503
				504	platform.data_psn->set_text_color(COLOR_GREEN);
				505
				506	/* Display each result. */
				507	uint32_t rowIdx1 = dataPsnTxtStartY1 + 2 * dataPsnTxtYIncr;
				508
				509	for (uint32_t i = 0; i < results.size(); ++i) {
				510
				511	std::string topKeyword{"<none>"};
				512	float score = 0.f;
				513
				514	if (results[i].m_resultVec.size()) {
				515	topKeyword = results[i].m_resultVec[0].m_label;
				516	score = results[i].m_resultVec[0].m_normalisedVal;
				517	}
				518
				519	std::string resultStr =
				520	std::string{"@"} + std::to_string(results[i].m_timeStamp) +
				521	std::string{"s: "} + topKeyword + std::string{" ("} +
				522	std::to_string(static_cast<int>(score * 100)) + std::string{"%)"};
				523
				524	platform.data_psn->present_data_text(
				525	resultStr.c_str(), resultStr.size(),
				526	dataPsnTxtStartX1, rowIdx1, 0);
				527	rowIdx1 += dataPsnTxtYIncr;
				528
				529	info("For timestamp: %f (inference #: %u); threshold: %f\n",
				530	results[i].m_timeStamp, results[i].m_inferenceNumber,
				531	results[i].m_threshold);
				532	for (uint32_t j = 0; j < results[i].m_resultVec.size(); ++j) {
				533	info("\t\tlabel @ %u: %s, score: %f\n", j,
				534	results[i].m_resultVec[j].m_label.c_str(),
				535	results[i].m_resultVec[j].m_normalisedVal);
				536	}
				537	}
				538
				539	return true;
				540	}
				541
				542	static bool _PresentInferenceResult(hal_platform& platform, std::vector<arm::app::asr::AsrResult>& results)
				543	{
				544	constexpr uint32_t dataPsnTxtStartX1 = 20;
				545	constexpr uint32_t dataPsnTxtStartY1 = 80;
				546	constexpr bool allow_multiple_lines = true;
				547
				548	platform.data_psn->set_text_color(COLOR_GREEN);
				549
				550	/* Results from multiple inferences should be combined before processing. */
				551	std::vector<arm::app::ClassificationResult> combinedResults;
				552	for (auto& result : results) {
				553	combinedResults.insert(combinedResults.end(),
				554	result.m_resultVec.begin(),
				555	result.m_resultVec.end());
				556	}
				557
				558	for (auto& result : results) {
				559	/* Get the final result string using the decoder. */
				560	std::string infResultStr = audio::asr::DecodeOutput(result.m_resultVec);
				561
				562	info("Result for inf %u: %s\n", result.m_inferenceNumber,
				563	infResultStr.c_str());
				564	}
				565
				566	std::string finalResultStr = audio::asr::DecodeOutput(combinedResults);
				567
				568	platform.data_psn->present_data_text(
				569	finalResultStr.c_str(), finalResultStr.size(),
				570	dataPsnTxtStartX1, dataPsnTxtStartY1, allow_multiple_lines);
				571
				572	info("Final result: %s\n", finalResultStr.c_str());
				573	return true;
				574	}
				575
				576	/**
				577	* @brief Generic feature calculator factory.
				578	*
				579	* Returns lambda function to compute features using features cache.
				580	* Real features math is done by a lambda function provided as a parameter.
				581	* Features are written to input tensor memory.
				582	*
				583	* @tparam T feature vector type.
				584	* @param inputTensor model input tensor pointer.
				585	* @param cacheSize number of feature vectors to cache. Defined by the sliding window overlap.
				586	* @param compute features calculator function.
				587	* @return lambda function to compute features.
				588	**/
				589	template<class T>
				590	std::function<void (std::vector<int16_t>&, size_t, bool, size_t)>
				591	_FeatureCalc(TfLiteTensor* inputTensor, size_t cacheSize,
				592	std::function<std::vector<T> (std::vector<int16_t>& )> compute)
				593	{
				594	/* Feature cache to be captured by lambda function. */
				595	static std::vector<std::vector<T>> featureCache = std::vector<std::vector<T>>(cacheSize);
				596
				597	return [=](std::vector<int16_t>& audioDataWindow,
				598	size_t index,
				599	bool useCache,
				600	size_t featuresOverlapIndex)
				601	{
				602	T* tensorData = tflite::GetTensorData<T>(inputTensor);
				603	std::vector<T> features;
				604
				605	/* Reuse features from cache if cache is ready and sliding windows overlap.
				606	* Overlap is in the beginning of sliding window with a size of a feature cache.
				607	*/
				608	if (useCache && index < featureCache.size()) {
				609	features = std::move(featureCache[index]);
				610	} else {
				611	features = std::move(compute(audioDataWindow));
				612	}
				613	auto size = features.size();
				614	auto sizeBytes = sizeof(T) * size;
				615	std::memcpy(tensorData + (index * size), features.data(), sizeBytes);
				616
				617	/* Start renewing cache as soon iteration goes out of the windows overlap. */
				618	if (index >= featuresOverlapIndex) {
				619	featureCache[index - featuresOverlapIndex] = std::move(features);
				620	}
				621	};
				622	}
				623
				624	template std::function<void (std::vector<int16_t>&, size_t , bool, size_t)>
				625	_FeatureCalc<int8_t>(TfLiteTensor* inputTensor,
				626	size_t cacheSize,
				627	std::function<std::vector<int8_t> (std::vector<int16_t>& )> compute);
				628
				629	template std::function<void (std::vector<int16_t>&, size_t , bool, size_t)>
				630	_FeatureCalc<uint8_t>(TfLiteTensor* inputTensor,
				631	size_t cacheSize,
				632	std::function<std::vector<uint8_t> (std::vector<int16_t>& )> compute);
				633
				634	template std::function<void (std::vector<int16_t>&, size_t , bool, size_t)>
				635	_FeatureCalc<int16_t>(TfLiteTensor* inputTensor,
				636	size_t cacheSize,
				637	std::function<std::vector<int16_t> (std::vector<int16_t>& )> compute);
				638
				639	template std::function<void(std::vector<int16_t>&, size_t, bool, size_t)>
				640	_FeatureCalc<float>(TfLiteTensor* inputTensor,
				641	size_t cacheSize,
				642	std::function<std::vector<float>(std::vector<int16_t>&)> compute);
				643
				644
				645	static std::function<void (std::vector<int16_t>&, int, bool, size_t)>
				646	GetFeatureCalculator(audio::DsCnnMFCC& mfcc, TfLiteTensor* inputTensor, size_t cacheSize)
				647	{
				648	std::function<void (std::vector<int16_t>&, size_t, bool, size_t)> mfccFeatureCalc;
				649
				650	TfLiteQuantization quant = inputTensor->quantization;
				651
				652	if (kTfLiteAffineQuantization == quant.type) {
				653
				654	auto* quantParams = (TfLiteAffineQuantization*) quant.params;
				655	const float quantScale = quantParams->scale->data[0];
				656	const int quantOffset = quantParams->zero_point->data[0];
				657
				658	switch (inputTensor->type) {
				659	case kTfLiteInt8: {
				660	mfccFeatureCalc = _FeatureCalc<int8_t>(inputTensor,
				661	cacheSize,
				662	[=, &mfcc](std::vector<int16_t>& audioDataWindow) {
				663	return mfcc.MfccComputeQuant<int8_t>(audioDataWindow,
				664	quantScale,
				665	quantOffset);
				666	}
				667	);
				668	break;
				669	}
				670	case kTfLiteUInt8: {
				671	mfccFeatureCalc = _FeatureCalc<uint8_t>(inputTensor,
				672	cacheSize,
				673	[=, &mfcc](std::vector<int16_t>& audioDataWindow) {
				674	return mfcc.MfccComputeQuant<uint8_t>(audioDataWindow,
				675	quantScale,
				676	quantOffset);
				677	}
				678	);
				679	break;
				680	}
				681	case kTfLiteInt16: {
				682	mfccFeatureCalc = _FeatureCalc<int16_t>(inputTensor,
				683	cacheSize,
				684	[=, &mfcc](std::vector<int16_t>& audioDataWindow) {
				685	return mfcc.MfccComputeQuant<int16_t>(audioDataWindow,
				686	quantScale,
				687	quantOffset);
				688	}
				689	);
				690	break;
				691	}
				692	default:
				693	printf_err("Tensor type %s not supported\n", TfLiteTypeGetName(inputTensor->type));
				694	}
				695
				696
				697	} else {
				698	mfccFeatureCalc = mfccFeatureCalc = _FeatureCalc<float>(inputTensor,
				699	cacheSize,
				700	[&mfcc](std::vector<int16_t>& audioDataWindow) {
				701	return mfcc.MfccCompute(audioDataWindow);
				702	});
				703	}
				704	return mfccFeatureCalc;
				705	}
				706	} /* namespace app */
				707	} /* namespace arm */