Blame - source/use_case/asr/src/UseCaseHandler.cc - ml/ethos-u/ml-embedded-evaluation-kit

blob: 719978515fc80170d7272f7b9aff9ca2368db267 [file] [log] [blame]

alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	1	/*
				2	* Copyright (c) 2021 Arm Limited. All rights reserved.
				3	* SPDX-License-Identifier: Apache-2.0
				4	*
				5	* Licensed under the Apache License, Version 2.0 (the "License");
				6	* you may not use this file except in compliance with the License.
				7	* You may obtain a copy of the License at
				8	*
				9	* http://www.apache.org/licenses/LICENSE-2.0
				10	*
				11	* Unless required by applicable law or agreed to in writing, software
				12	* distributed under the License is distributed on an "AS IS" BASIS,
				13	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	* See the License for the specific language governing permissions and
				15	* limitations under the License.
				16	*/
				17	#include "UseCaseHandler.hpp"
				18
				19	#include "InputFiles.hpp"
				20	#include "AsrClassifier.hpp"
				21	#include "Wav2LetterModel.hpp"
				22	#include "hal.h"
				23	#include "Wav2LetterMfcc.hpp"
				24	#include "AudioUtils.hpp"
				25	#include "UseCaseCommonUtils.hpp"
				26	#include "AsrResult.hpp"
				27	#include "Wav2LetterPreprocess.hpp"
				28	#include "Wav2LetterPostprocess.hpp"
				29	#include "OutputDecode.hpp"
				30
				31	namespace arm {
				32	namespace app {
				33
				34	/**
				35	* @brief Helper function to increment current audio clip index.
				36	* @param[in,out] ctx Pointer to the application context object.
				37	**/
alexander	c350cdc	2021-04-29 20:36:09 +0100	[diff] [blame]	38	static void IncrementAppCtxClipIdx(ApplicationContext& ctx);
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	39
				40	/**
				41	* @brief Helper function to set the audio clip index.
				42	* @param[in,out] ctx Pointer to the application context object.
				43	* @param[in] idx Value to be set.
				44	* @return true if index is set, false otherwise.
				45	**/
alexander	c350cdc	2021-04-29 20:36:09 +0100	[diff] [blame]	46	static bool SetAppCtxClipIdx(ApplicationContext& ctx, uint32_t idx);
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	47
				48	/**
				49	* @brief Presents inference results using the data presentation
				50	* object.
				51	* @param[in] platform Reference to the hal platform object.
				52	* @param[in] results Vector of classification results to be displayed.
				53	* @param[in] infTimeMs Inference time in milliseconds, if available
				54	* otherwise, this can be passed in as 0.
				55	* @return true if successful, false otherwise.
				56	**/
alexander	c350cdc	2021-04-29 20:36:09 +0100	[diff] [blame]	57	static bool PresentInferenceResult(
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	58	hal_platform& platform,
				59	const std::vector<arm::app::asr::AsrResult>& results);
				60
				61	/* Audio inference classification handler. */
				62	bool ClassifyAudioHandler(ApplicationContext& ctx, uint32_t clipIndex, bool runAll)
				63	{
				64	constexpr uint32_t dataPsnTxtInfStartX = 20;
				65	constexpr uint32_t dataPsnTxtInfStartY = 40;
				66
				67	auto& platform = ctx.Get<hal_platform&>("platform");
				68	platform.data_psn->clear(COLOR_BLACK);
				69
Isabella Gottardi	8df12f3	2021-04-07 17:15:31 +0100	[diff] [blame]	70	auto& profiler = ctx.Get<Profiler&>("profiler");
				71
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	72	/* If the request has a valid size, set the audio index. */
				73	if (clipIndex < NUMBER_OF_FILES) {
alexander	c350cdc	2021-04-29 20:36:09 +0100	[diff] [blame]	74	if (!SetAppCtxClipIdx(ctx, clipIndex)) {
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	75	return false;
				76	}
				77	}
				78
				79	/* Get model reference. */
				80	auto& model = ctx.Get<Model&>("model");
				81	if (!model.IsInited()) {
				82	printf_err("Model is not initialised! Terminating processing.\n");
				83	return false;
				84	}
				85
				86	/* Get score threshold to be applied for the classifier (post-inference). */
				87	auto scoreThreshold = ctx.Get<float>("scoreThreshold");
				88
				89	/* Get tensors. Dimensions of the tensor should have been verified by
				90	* the callee. */
				91	TfLiteTensor* inputTensor = model.GetInputTensor(0);
				92	TfLiteTensor* outputTensor = model.GetOutputTensor(0);
				93	const uint32_t inputRows = inputTensor->dims->data[arm::app::Wav2LetterModel::ms_inputRowsIdx];
				94
				95	/* Populate MFCC related parameters. */
				96	auto mfccParamsWinLen = ctx.Get<uint32_t>("frameLength");
				97	auto mfccParamsWinStride = ctx.Get<uint32_t>("frameStride");
				98
				99	/* Populate ASR inference context and inner lengths for input. */
				100	auto inputCtxLen = ctx.Get<uint32_t>("ctxLen");
				101	const uint32_t inputInnerLen = inputRows - (2 * inputCtxLen);
				102
				103	/* Audio data stride corresponds to inputInnerLen feature vectors. */
				104	const uint32_t audioParamsWinLen = (inputRows - 1) * mfccParamsWinStride + (mfccParamsWinLen);
				105	const uint32_t audioParamsWinStride = inputInnerLen * mfccParamsWinStride;
				106	const float audioParamsSecondsPerSample = (1.0/audio::Wav2LetterMFCC::ms_defaultSamplingFreq);
				107
				108	/* Get pre/post-processing objects. */
				109	auto& prep = ctx.Get<audio::asr::Preprocess&>("preprocess");
				110	auto& postp = ctx.Get<audio::asr::Postprocess&>("postprocess");
				111
				112	/* Set default reduction axis for post-processing. */
				113	const uint32_t reductionAxis = arm::app::Wav2LetterModel::ms_outputRowsIdx;
				114
				115	/* Audio clip start index. */
				116	auto startClipIdx = ctx.Get<uint32_t>("clipIndex");
				117
				118	/* Loop to process audio clips. */
				119	do {
				120	/* Get current audio clip index. */
				121	auto currentIndex = ctx.Get<uint32_t>("clipIndex");
				122
				123	/* Get the current audio buffer and respective size. */
				124	const int16_t* audioArr = get_audio_array(currentIndex);
				125	const uint32_t audioArrSize = get_audio_array_size(currentIndex);
				126
				127	if (!audioArr) {
				128	printf_err("Invalid audio array pointer\n");
				129	return false;
				130	}
				131
				132	/* Audio clip must have enough samples to produce 1 MFCC feature. */
				133	if (audioArrSize < mfccParamsWinLen) {
				134	printf_err("Not enough audio samples, minimum needed is %u\n", mfccParamsWinLen);
				135	return false;
				136	}
				137
				138	/* Initialise an audio slider. */
				139	auto audioDataSlider = audio::ASRSlidingWindow<const int16_t>(
				140	audioArr,
				141	audioArrSize,
				142	audioParamsWinLen,
				143	audioParamsWinStride);
				144
				145	/* Declare a container for results. */
				146	std::vector<arm::app::asr::AsrResult> results;
				147
				148	/* Display message on the LCD - inference running. */
				149	std::string str_inf{"Running inference... "};
				150	platform.data_psn->present_data_text(
				151	str_inf.c_str(), str_inf.size(),
				152	dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
				153
				154	info("Running inference on audio clip %u => %s\n", currentIndex,
				155	get_filename(currentIndex));
				156
				157	size_t inferenceWindowLen = audioParamsWinLen;
				158
				159	/* Start sliding through audio clip. */
				160	while (audioDataSlider.HasNext()) {
				161
				162	/* If not enough audio see how much can be sent for processing. */
				163	size_t nextStartIndex = audioDataSlider.NextWindowStartIndex();
				164	if (nextStartIndex + audioParamsWinLen > audioArrSize) {
				165	inferenceWindowLen = audioArrSize - nextStartIndex;
				166	}
				167
				168	const int16_t* inferenceWindow = audioDataSlider.Next();
				169
				170	info("Inference %zu/%zu\n", audioDataSlider.Index() + 1,
				171	static_cast<size_t>(ceilf(audioDataSlider.FractionalTotalStrides() + 1)));
				172
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	173	/* Calculate MFCCs, deltas and populate the input tensor. */
				174	prep.Invoke(inferenceWindow, inferenceWindowLen, inputTensor);
				175
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	176	/* Run inference over this audio clip sliding window. */
alexander	27b62d9	2021-05-04 20:46:08 +0100	[diff] [blame]	177	if (!RunInference(model, profiler)) {
				178	return false;
				179	}
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	180
				181	/* Post-process. */
				182	postp.Invoke(outputTensor, reductionAxis, !audioDataSlider.HasNext());
				183
				184	/* Get results. */
				185	std::vector<ClassificationResult> classificationResult;
				186	auto& classifier = ctx.Get<AsrClassifier&>("classifier");
				187	classifier.GetClassificationResults(
				188	outputTensor, classificationResult,
				189	ctx.Get<std::vector<std::string>&>("labels"), 1);
				190
				191	results.emplace_back(asr::AsrResult(classificationResult,
				192	(audioDataSlider.Index() *
				193	audioParamsSecondsPerSample *
				194	audioParamsWinStride),
				195	audioDataSlider.Index(), scoreThreshold));
				196
				197	#if VERIFY_TEST_OUTPUT
				198	arm::app::DumpTensor(outputTensor,
				199	outputTensor->dims->data[arm::app::Wav2LetterModel::ms_outputColsIdx]);
				200	#endif /* VERIFY_TEST_OUTPUT */
				201
				202	}
				203
				204	/* Erase. */
				205	str_inf = std::string(str_inf.size(), ' ');
				206	platform.data_psn->present_data_text(
				207	str_inf.c_str(), str_inf.size(),
				208	dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
				209
				210	ctx.Set<std::vector<arm::app::asr::AsrResult>>("results", results);
				211
alexander	c350cdc	2021-04-29 20:36:09 +0100	[diff] [blame]	212	if (!PresentInferenceResult(platform, results)) {
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	213	return false;
				214	}
				215
Isabella Gottardi	8df12f3	2021-04-07 17:15:31 +0100	[diff] [blame]	216	profiler.PrintProfilingResult();
				217
alexander	c350cdc	2021-04-29 20:36:09 +0100	[diff] [blame]	218	IncrementAppCtxClipIdx(ctx);
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	219
				220	} while (runAll && ctx.Get<uint32_t>("clipIndex") != startClipIdx);
				221
				222	return true;
				223	}
				224
alexander	c350cdc	2021-04-29 20:36:09 +0100	[diff] [blame]	225	static void IncrementAppCtxClipIdx(ApplicationContext& ctx)
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	226	{
				227	auto curAudioIdx = ctx.Get<uint32_t>("clipIndex");
				228
				229	if (curAudioIdx + 1 >= NUMBER_OF_FILES) {
				230	ctx.Set<uint32_t>("clipIndex", 0);
				231	return;
				232	}
				233	++curAudioIdx;
				234	ctx.Set<uint32_t>("clipIndex", curAudioIdx);
				235	}
				236
alexander	c350cdc	2021-04-29 20:36:09 +0100	[diff] [blame]	237	static bool SetAppCtxClipIdx(ApplicationContext& ctx, uint32_t idx)
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	238	{
				239	if (idx >= NUMBER_OF_FILES) {
				240	printf_err("Invalid idx %u (expected less than %u)\n",
				241	idx, NUMBER_OF_FILES);
				242	return false;
				243	}
				244
				245	ctx.Set<uint32_t>("clipIndex", idx);
				246	return true;
				247	}
				248
alexander	c350cdc	2021-04-29 20:36:09 +0100	[diff] [blame]	249	static bool PresentInferenceResult(hal_platform& platform,
				250	const std::vector<arm::app::asr::AsrResult>& results)
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	251	{
				252	constexpr uint32_t dataPsnTxtStartX1 = 20;
				253	constexpr uint32_t dataPsnTxtStartY1 = 60;
				254	constexpr bool allow_multiple_lines = true;
				255
				256	platform.data_psn->set_text_color(COLOR_GREEN);
				257
Isabella Gottardi	8df12f3	2021-04-07 17:15:31 +0100	[diff] [blame]	258	info("Final results:\n");
				259	info("Total number of inferences: %zu\n", results.size());
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	260	/* Results from multiple inferences should be combined before processing. */
				261	std::vector<arm::app::ClassificationResult> combinedResults;
				262	for (auto& result : results) {
				263	combinedResults.insert(combinedResults.end(),
				264	result.m_resultVec.begin(),
				265	result.m_resultVec.end());
				266	}
				267
				268	/* Get each inference result string using the decoder. */
				269	for (const auto & result : results) {
				270	std::string infResultStr = audio::asr::DecodeOutput(result.m_resultVec);
				271
Isabella Gottardi	8df12f3	2021-04-07 17:15:31 +0100	[diff] [blame]	272	info("For timestamp: %f (inference #: %u); label: %s\n",
				273	result.m_timeStamp, result.m_inferenceNumber,
				274	infResultStr.c_str());
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	275	}
				276
				277	/* Get the decoded result for the combined result. */
				278	std::string finalResultStr = audio::asr::DecodeOutput(combinedResults);
				279
				280	platform.data_psn->present_data_text(
				281	finalResultStr.c_str(), finalResultStr.size(),
				282	dataPsnTxtStartX1, dataPsnTxtStartY1,
				283	allow_multiple_lines);
				284
Isabella Gottardi	8df12f3	2021-04-07 17:15:31 +0100	[diff] [blame]	285	info("Complete recognition: %s\n", finalResultStr.c_str());
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	286	return true;
				287	}
				288
				289	} /* namespace app */
				290	} /* namespace arm */