Blame - source/use_case/asr/src/MainLoop.cc - ml/ethos-u/ml-embedded-evaluation-kit

blob: 51b0b18bc8e87ab2512e24933d932f0451ada84b [file] [log] [blame]

alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	1	/*
				2	* Copyright (c) 2021 Arm Limited. All rights reserved.
				3	* SPDX-License-Identifier: Apache-2.0
				4	*
				5	* Licensed under the Apache License, Version 2.0 (the "License");
				6	* you may not use this file except in compliance with the License.
				7	* You may obtain a copy of the License at
				8	*
				9	* http://www.apache.org/licenses/LICENSE-2.0
				10	*
				11	* Unless required by applicable law or agreed to in writing, software
				12	* distributed under the License is distributed on an "AS IS" BASIS,
				13	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	* See the License for the specific language governing permissions and
				15	* limitations under the License.
				16	*/
				17	#include "hal.h" /* Brings in platform definitions. */
				18	#include "Labels.hpp" /* For label strings. */
				19	#include "UseCaseHandler.hpp" /* Handlers for different user options. */
				20	#include "Wav2LetterModel.hpp" /* Model class for running inference. */
				21	#include "UseCaseCommonUtils.hpp" /* Utils functions. */
				22	#include "AsrClassifier.hpp" /* Classifier. */
				23	#include "InputFiles.hpp" /* Generated audio clip header. */
				24	#include "Wav2LetterPreprocess.hpp" /* Pre-processing class. */
				25	#include "Wav2LetterPostprocess.hpp" /* Post-processing class. */
alexander	31ae9f0	2022-02-10 16:15:54 +0000	[diff] [blame]	26	#include "log_macros.h"
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	27
				28	enum opcodes
				29	{
				30	MENU_OPT_RUN_INF_NEXT = 1, /* Run on next vector. */
				31	MENU_OPT_RUN_INF_CHOSEN, /* Run on a user provided vector index. */
				32	MENU_OPT_RUN_INF_ALL, /* Run inference on all. */
				33	MENU_OPT_SHOW_MODEL_INFO, /* Show model info. */
				34	MENU_OPT_LIST_AUDIO_CLIPS /* List the current baked audio clips. */
				35	};
				36
				37	static void DisplayMenu()
				38	{
Kshitij Sisodia	3c8256d	2021-05-24 16:12:40 +0100	[diff] [blame]	39	printf("\n\n");
				40	printf("User input required\n");
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	41	printf("Enter option number from:\n\n");
				42	printf(" %u. Classify next audio clip\n", MENU_OPT_RUN_INF_NEXT);
				43	printf(" %u. Classify audio clip at chosen index\n", MENU_OPT_RUN_INF_CHOSEN);
				44	printf(" %u. Run classification on all audio clips\n", MENU_OPT_RUN_INF_ALL);
				45	printf(" %u. Show NN model info\n", MENU_OPT_SHOW_MODEL_INFO);
				46	printf(" %u. List audio clips\n\n", MENU_OPT_LIST_AUDIO_CLIPS);
				47	printf(" Choice: ");
George Gekov	93e5951	2021-08-03 11:18:41 +0100	[diff] [blame]	48	fflush(stdout);
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	49	}
				50
				51	/** @brief Verify input and output tensor are of certain min dimensions. */
				52	static bool VerifyTensorDimensions(const arm::app::Model& model);
				53
				54	/** @brief Gets the number of MFCC features for a single window. */
				55	static uint32_t GetNumMfccFeatures(const arm::app::Model& model);
				56
				57	/** @brief Gets the number of MFCC feature vectors to be computed. */
				58	static uint32_t GetNumMfccFeatureVectors(const arm::app::Model& model);
				59
				60	/** @brief Gets the output context length (left and right) for post-processing. */
				61	static uint32_t GetOutputContextLen(const arm::app::Model& model,
				62	uint32_t inputCtxLen);
				63
				64	/** @brief Gets the output inner length for post-processing. */
				65	static uint32_t GetOutputInnerLen(const arm::app::Model& model,
				66	uint32_t outputCtxLen);
				67
Kshitij Sisodia	4cc4021	2022-04-08 09:54:53 +0100	[diff] [blame^]	68	void main_loop()
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	69	{
				70	arm::app::Wav2LetterModel model; /* Model wrapper object. */
				71
				72	/* Load the model. */
				73	if (!model.Init()) {
				74	printf_err("Failed to initialise model\n");
				75	return;
				76	} else if (!VerifyTensorDimensions(model)) {
				77	printf_err("Model's input or output dimension verification failed\n");
				78	return;
				79	}
				80
				81	/* Initialise pre-processing. */
				82	arm::app::audio::asr::Preprocess prep(
				83	GetNumMfccFeatures(model),
				84	g_FrameLength,
				85	g_FrameStride,
				86	GetNumMfccFeatureVectors(model));
				87
				88	/* Initialise post-processing. */
				89	const uint32_t outputCtxLen = GetOutputContextLen(model, g_ctxLen);
				90	const uint32_t blankTokenIdx = 28;
				91	arm::app::audio::asr::Postprocess postp(
				92	outputCtxLen,
				93	GetOutputInnerLen(model, outputCtxLen),
				94	blankTokenIdx);
				95
				96	/* Instantiate application context. */
				97	arm::app::ApplicationContext caseContext;
				98	std::vector <std::string> labels;
				99	GetLabelsVector(labels);
				100	arm::app::AsrClassifier classifier; /* Classifier wrapper object. */
				101
Kshitij Sisodia	4cc4021	2022-04-08 09:54:53 +0100	[diff] [blame^]	102	arm::app::Profiler profiler{"asr"};
Isabella Gottardi	8df12f3	2021-04-07 17:15:31 +0100	[diff] [blame]	103	caseContext.Set<arm::app::Profiler&>("profiler", profiler);
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	104	caseContext.Set<arm::app::Model&>("model", model);
				105	caseContext.Set<uint32_t>("clipIndex", 0);
				106	caseContext.Set<uint32_t>("frameLength", g_FrameLength);
				107	caseContext.Set<uint32_t>("frameStride", g_FrameStride);
				108	caseContext.Set<float>("scoreThreshold", g_ScoreThreshold); /* Score threshold. */
				109	caseContext.Set<uint32_t>("ctxLen", g_ctxLen); /* Left and right context length (MFCC feat vectors). */
				110	caseContext.Set<const std::vector <std::string>&>("labels", labels);
				111	caseContext.Set<arm::app::AsrClassifier&>("classifier", classifier);
				112	caseContext.Set<arm::app::audio::asr::Preprocess&>("preprocess", prep);
				113	caseContext.Set<arm::app::audio::asr::Postprocess&>("postprocess", postp);
				114
				115	bool executionSuccessful = true;
				116	constexpr bool bUseMenu = NUMBER_OF_FILES > 1 ? true : false;
				117
				118	/* Loop. */
				119	do {
				120	int menuOption = MENU_OPT_RUN_INF_NEXT;
				121	if (bUseMenu) {
				122	DisplayMenu();
Kshitij Sisodia	68fdd11	2022-04-06 13:03:20 +0100	[diff] [blame]	123	menuOption = arm::app::ReadUserInputAsInt();
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	124	printf("\n");
				125	}
				126	switch (menuOption) {
				127	case MENU_OPT_RUN_INF_NEXT:
				128	executionSuccessful = ClassifyAudioHandler(
				129	caseContext,
				130	caseContext.Get<uint32_t>("clipIndex"),
				131	false);
				132	break;
				133	case MENU_OPT_RUN_INF_CHOSEN: {
				134	printf(" Enter the audio clip index [0, %d]: ",
				135	NUMBER_OF_FILES-1);
Isabella Gottardi	79d4154	2021-10-20 15:52:32 +0100	[diff] [blame]	136	fflush(stdout);
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	137	auto clipIndex = static_cast<uint32_t>(
Kshitij Sisodia	68fdd11	2022-04-06 13:03:20 +0100	[diff] [blame]	138	arm::app::ReadUserInputAsInt());
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	139	executionSuccessful = ClassifyAudioHandler(caseContext,
				140	clipIndex,
				141	false);
				142	break;
				143	}
				144	case MENU_OPT_RUN_INF_ALL:
				145	executionSuccessful = ClassifyAudioHandler(
				146	caseContext,
				147	caseContext.Get<uint32_t>("clipIndex"),
				148	true);
				149	break;
				150	case MENU_OPT_SHOW_MODEL_INFO:
				151	executionSuccessful = model.ShowModelInfoHandler();
				152	break;
				153	case MENU_OPT_LIST_AUDIO_CLIPS:
				154	executionSuccessful = ListFilesHandler(caseContext);
				155	break;
				156	default:
				157	printf("Incorrect choice, try again.");
				158	break;
				159	}
				160	} while (executionSuccessful && bUseMenu);
				161	info("Main loop terminated.\n");
				162	}
				163
				164	static bool VerifyTensorDimensions(const arm::app::Model& model)
				165	{
				166	/* Populate tensor related parameters. */
				167	TfLiteTensor* inputTensor = model.GetInputTensor(0);
				168	if (!inputTensor->dims) {
				169	printf_err("Invalid input tensor dims\n");
				170	return false;
				171	} else if (inputTensor->dims->size < 3) {
				172	printf_err("Input tensor dimension should be >= 3\n");
				173	return false;
				174	}
				175
				176	TfLiteTensor* outputTensor = model.GetOutputTensor(0);
				177	if (!outputTensor->dims) {
				178	printf_err("Invalid output tensor dims\n");
				179	return false;
				180	} else if (outputTensor->dims->size < 3) {
				181	printf_err("Output tensor dimension should be >= 3\n");
				182	return false;
				183	}
				184
				185	return true;
				186	}
				187
				188	static uint32_t GetNumMfccFeatures(const arm::app::Model& model)
				189	{
				190	TfLiteTensor* inputTensor = model.GetInputTensor(0);
				191	const int inputCols = inputTensor->dims->data[arm::app::Wav2LetterModel::ms_inputColsIdx];
				192	if (0 != inputCols % 3) {
				193	printf_err("Number of input columns is not a multiple of 3\n");
				194	}
				195	return std::max(inputCols/3, 0);
				196	}
				197
				198	static uint32_t GetNumMfccFeatureVectors(const arm::app::Model& model)
				199	{
				200	TfLiteTensor* inputTensor = model.GetInputTensor(0);
				201	const int inputRows = inputTensor->dims->data[arm::app::Wav2LetterModel::ms_inputRowsIdx];
				202	return std::max(inputRows, 0);
				203	}
				204
				205	static uint32_t GetOutputContextLen(const arm::app::Model& model, const uint32_t inputCtxLen)
				206	{
				207	const uint32_t inputRows = GetNumMfccFeatureVectors(model);
				208	const uint32_t inputInnerLen = inputRows - (2 * inputCtxLen);
				209	constexpr uint32_t ms_outputRowsIdx = arm::app::Wav2LetterModel::ms_outputRowsIdx;
				210
				211	/* Check to make sure that the input tensor supports the above
				212	* context and inner lengths. */
				213	if (inputRows <= 2 * inputCtxLen \|\| inputRows <= inputInnerLen) {
Kshitij Sisodia	f9c19ea	2021-05-07 16:08:14 +0100	[diff] [blame]	214	printf_err("Input rows not compatible with ctx of %" PRIu32 "\n",
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	215	inputCtxLen);
				216	return 0;
				217	}
				218
				219	TfLiteTensor* outputTensor = model.GetOutputTensor(0);
				220	const uint32_t outputRows = std::max(outputTensor->dims->data[ms_outputRowsIdx], 0);
				221
				222	const float tensorColRatio = static_cast<float>(inputRows)/
				223	static_cast<float>(outputRows);
				224
				225	return std::round(static_cast<float>(inputCtxLen)/tensorColRatio);
				226	}
				227
				228	static uint32_t GetOutputInnerLen(const arm::app::Model& model,
				229	const uint32_t outputCtxLen)
				230	{
				231	constexpr uint32_t ms_outputRowsIdx = arm::app::Wav2LetterModel::ms_outputRowsIdx;
				232	TfLiteTensor* outputTensor = model.GetOutputTensor(0);
				233	const uint32_t outputRows = std::max(outputTensor->dims->data[ms_outputRowsIdx], 0);
				234	return (outputRows - (2 * outputCtxLen));
				235	}