Blame - source/use_case/kws_asr/src/MainLoop.cc - ml/ethos-u/ml-embedded-evaluation-kit

blob: c7e977fc7505070e3a87d26f90086654e6e4b1a1 [file] [log] [blame]

alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	1	/*
				2	* Copyright (c) 2021 Arm Limited. All rights reserved.
				3	* SPDX-License-Identifier: Apache-2.0
				4	*
				5	* Licensed under the Apache License, Version 2.0 (the "License");
				6	* you may not use this file except in compliance with the License.
				7	* You may obtain a copy of the License at
				8	*
				9	* http://www.apache.org/licenses/LICENSE-2.0
				10	*
				11	* Unless required by applicable law or agreed to in writing, software
				12	* distributed under the License is distributed on an "AS IS" BASIS,
				13	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	* See the License for the specific language governing permissions and
				15	* limitations under the License.
				16	*/
				17	#include "hal.h" /* Brings in platform definitions. */
				18	#include "InputFiles.hpp" /* For input images. */
Kshitij Sisodia	76a1580	2021-12-24 11:05:11 +0000	[diff] [blame]	19	#include "Labels_micronetkws.hpp" /* For MicroNetKws label strings. */
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	20	#include "Labels_wav2letter.hpp" /* For Wav2Letter label strings. */
				21	#include "Classifier.hpp" /* KWS classifier. */
				22	#include "AsrClassifier.hpp" /* ASR classifier. */
Kshitij Sisodia	76a1580	2021-12-24 11:05:11 +0000	[diff] [blame]	23	#include "MicroNetKwsModel.hpp" /* KWS model class for running inference. */
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	24	#include "Wav2LetterModel.hpp" /* ASR model class for running inference. */
				25	#include "UseCaseCommonUtils.hpp" /* Utils functions. */
				26	#include "UseCaseHandler.hpp" /* Handlers for different user options. */
				27	#include "Wav2LetterPreprocess.hpp" /* ASR pre-processing class. */
				28	#include "Wav2LetterPostprocess.hpp"/* ASR post-processing class. */
				29
				30	using KwsClassifier = arm::app::Classifier;
				31
				32	enum opcodes
				33	{
				34	MENU_OPT_RUN_INF_NEXT = 1, /* Run on next vector. */
				35	MENU_OPT_RUN_INF_CHOSEN, /* Run on a user provided vector index. */
				36	MENU_OPT_RUN_INF_ALL, /* Run inference on all. */
				37	MENU_OPT_SHOW_MODEL_INFO, /* Show model info. */
				38	MENU_OPT_LIST_AUDIO_CLIPS /* List the current baked audio clips. */
				39	};
				40
				41	static void DisplayMenu()
				42	{
Kshitij Sisodia	3c8256d	2021-05-24 16:12:40 +0100	[diff] [blame]	43	printf("\n\n");
				44	printf("User input required\n");
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	45	printf("Enter option number from:\n\n");
				46	printf(" %u. Classify next audio clip\n", MENU_OPT_RUN_INF_NEXT);
				47	printf(" %u. Classify audio clip at chosen index\n", MENU_OPT_RUN_INF_CHOSEN);
				48	printf(" %u. Run classification on all audio clips\n", MENU_OPT_RUN_INF_ALL);
				49	printf(" %u. Show NN model info\n", MENU_OPT_SHOW_MODEL_INFO);
				50	printf(" %u. List audio clips\n\n", MENU_OPT_LIST_AUDIO_CLIPS);
				51	printf(" Choice: ");
George Gekov	93e5951	2021-08-03 11:18:41 +0100	[diff] [blame]	52	fflush(stdout);
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	53	}
				54
				55	/** @brief Gets the number of MFCC features for a single window. */
				56	static uint32_t GetNumMfccFeatures(const arm::app::Model& model);
				57
				58	/** @brief Gets the number of MFCC feature vectors to be computed. */
				59	static uint32_t GetNumMfccFeatureVectors(const arm::app::Model& model);
				60
				61	/** @brief Gets the output context length (left and right) for post-processing. */
				62	static uint32_t GetOutputContextLen(const arm::app::Model& model,
				63	uint32_t inputCtxLen);
				64
				65	/** @brief Gets the output inner length for post-processing. */
				66	static uint32_t GetOutputInnerLen(const arm::app::Model& model,
				67	uint32_t outputCtxLen);
				68
				69	void main_loop(hal_platform& platform)
				70	{
				71	/* Model wrapper objects. */
Kshitij Sisodia	76a1580	2021-12-24 11:05:11 +0000	[diff] [blame]	72	arm::app::MicroNetKwsModel kwsModel;
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	73	arm::app::Wav2LetterModel asrModel;
				74
				75	/* Load the models. */
				76	if (!kwsModel.Init()) {
				77	printf_err("Failed to initialise KWS model\n");
				78	return;
				79	}
				80
				81	/* Initialise the asr model using the same allocator from KWS
				82	* to re-use the tensor arena. */
				83	if (!asrModel.Init(kwsModel.GetAllocator())) {
Kshitij Sisodia	76a1580	2021-12-24 11:05:11 +0000	[diff] [blame]	84	printf_err("Failed to initialise ASR model\n");
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	85	return;
				86	}
				87
				88	/* Initialise ASR pre-processing. */
				89	arm::app::audio::asr::Preprocess prep(
				90	GetNumMfccFeatures(asrModel),
				91	arm::app::asr::g_FrameLength,
				92	arm::app::asr::g_FrameStride,
				93	GetNumMfccFeatureVectors(asrModel));
				94
				95	/* Initialise ASR post-processing. */
				96	const uint32_t outputCtxLen = GetOutputContextLen(asrModel, arm::app::asr::g_ctxLen);
				97	const uint32_t blankTokenIdx = 28;
				98	arm::app::audio::asr::Postprocess postp(
				99	outputCtxLen,
				100	GetOutputInnerLen(asrModel, outputCtxLen),
				101	blankTokenIdx);
				102
				103	/* Instantiate application context. */
				104	arm::app::ApplicationContext caseContext;
				105
Isabella Gottardi	8df12f3	2021-04-07 17:15:31 +0100	[diff] [blame]	106	arm::app::Profiler profiler{&platform, "kws_asr"};
				107	caseContext.Set<arm::app::Profiler&>("profiler", profiler);
				108
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	109	caseContext.Set<hal_platform&>("platform", platform);
				110	caseContext.Set<arm::app::Model&>("kwsmodel", kwsModel);
				111	caseContext.Set<arm::app::Model&>("asrmodel", asrModel);
				112	caseContext.Set<uint32_t>("clipIndex", 0);
				113	caseContext.Set<uint32_t>("ctxLen", arm::app::asr::g_ctxLen); /* Left and right context length (MFCC feat vectors). */
				114	caseContext.Set<int>("kwsframeLength", arm::app::kws::g_FrameLength);
				115	caseContext.Set<int>("kwsframeStride", arm::app::kws::g_FrameStride);
				116	caseContext.Set<float>("kwsscoreThreshold", arm::app::kws::g_ScoreThreshold); /* Normalised score threshold. */
				117	caseContext.Set<uint32_t >("kwsNumMfcc", arm::app::kws::g_NumMfcc);
				118	caseContext.Set<uint32_t >("kwsNumAudioWins", arm::app::kws::g_NumAudioWins);
				119
				120	caseContext.Set<int>("asrframeLength", arm::app::asr::g_FrameLength);
				121	caseContext.Set<int>("asrframeStride", arm::app::asr::g_FrameStride);
				122	caseContext.Set<float>("asrscoreThreshold", arm::app::asr::g_ScoreThreshold); /* Normalised score threshold. */
				123
				124	KwsClassifier kwsClassifier; /* Classifier wrapper object. */
				125	arm::app::AsrClassifier asrClassifier; /* Classifier wrapper object. */
				126	caseContext.Set<arm::app::Classifier&>("kwsclassifier", kwsClassifier);
				127	caseContext.Set<arm::app::AsrClassifier&>("asrclassifier", asrClassifier);
				128
				129	caseContext.Set<arm::app::audio::asr::Preprocess&>("preprocess", prep);
				130	caseContext.Set<arm::app::audio::asr::Postprocess&>("postprocess", postp);
				131
				132	std::vector<std::string> asrLabels;
				133	arm::app::asr::GetLabelsVector(asrLabels);
				134	std::vector<std::string> kwsLabels;
				135	arm::app::kws::GetLabelsVector(kwsLabels);
				136	caseContext.Set<const std::vector <std::string>&>("asrlabels", asrLabels);
				137	caseContext.Set<const std::vector <std::string>&>("kwslabels", kwsLabels);
				138
Liam Barry	b5b32d3	2021-12-30 11:35:00 +0000	[diff] [blame]	139	/* KWS keyword that triggers ASR and associated checks */
				140	std::string triggerKeyword = std::string("yes");
				141	if (std::find(kwsLabels.begin(), kwsLabels.end(), triggerKeyword) != kwsLabels.end()) {
				142	caseContext.Set<const std::string &>("triggerkeyword", triggerKeyword);
				143	}
				144	else {
				145	printf_err("Selected trigger keyword not found in labels file\n");
				146	return;
				147	}
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	148
				149	/* Loop. */
				150	bool executionSuccessful = true;
				151	constexpr bool bUseMenu = NUMBER_OF_FILES > 1 ? true : false;
				152
				153	/* Loop. */
				154	do {
				155	int menuOption = MENU_OPT_RUN_INF_NEXT;
				156	if (bUseMenu) {
				157	DisplayMenu();
				158	menuOption = arm::app::ReadUserInputAsInt(platform);
				159	printf("\n");
				160	}
				161	switch (menuOption) {
				162	case MENU_OPT_RUN_INF_NEXT:
				163	executionSuccessful = ClassifyAudioHandler(
				164	caseContext,
				165	caseContext.Get<uint32_t>("clipIndex"),
				166	false);
				167	break;
				168	case MENU_OPT_RUN_INF_CHOSEN: {
				169	printf(" Enter the audio clip index [0, %d]: ",
				170	NUMBER_OF_FILES-1);
Isabella Gottardi	79d4154	2021-10-20 15:52:32 +0100	[diff] [blame]	171	fflush(stdout);
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	172	auto clipIndex = static_cast<uint32_t>(
				173	arm::app::ReadUserInputAsInt(platform));
				174	executionSuccessful = ClassifyAudioHandler(caseContext,
				175	clipIndex,
				176	false);
				177	break;
				178	}
				179	case MENU_OPT_RUN_INF_ALL:
				180	executionSuccessful = ClassifyAudioHandler(
				181	caseContext,
				182	caseContext.Get<uint32_t>("clipIndex"),
				183	true);
				184	break;
				185	case MENU_OPT_SHOW_MODEL_INFO:
				186	executionSuccessful = kwsModel.ShowModelInfoHandler();
				187	executionSuccessful = asrModel.ShowModelInfoHandler();
				188	break;
				189	case MENU_OPT_LIST_AUDIO_CLIPS:
				190	executionSuccessful = ListFilesHandler(caseContext);
				191	break;
				192	default:
				193	printf("Incorrect choice, try again.");
				194	break;
				195	}
				196	} while (executionSuccessful && bUseMenu);
				197	info("Main loop terminated.\n");
				198	}
				199
				200	static uint32_t GetNumMfccFeatures(const arm::app::Model& model)
				201	{
				202	TfLiteTensor* inputTensor = model.GetInputTensor(0);
				203	const int inputCols = inputTensor->dims->data[arm::app::Wav2LetterModel::ms_inputColsIdx];
				204	if (0 != inputCols % 3) {
				205	printf_err("Number of input columns is not a multiple of 3\n");
				206	}
				207	return std::max(inputCols/3, 0);
				208	}
				209
				210	static uint32_t GetNumMfccFeatureVectors(const arm::app::Model& model)
				211	{
				212	TfLiteTensor* inputTensor = model.GetInputTensor(0);
				213	const int inputRows = inputTensor->dims->data[arm::app::Wav2LetterModel::ms_inputRowsIdx];
				214	return std::max(inputRows, 0);
				215	}
				216
				217	static uint32_t GetOutputContextLen(const arm::app::Model& model, const uint32_t inputCtxLen)
				218	{
				219	const uint32_t inputRows = GetNumMfccFeatureVectors(model);
				220	const uint32_t inputInnerLen = inputRows - (2 * inputCtxLen);
				221	constexpr uint32_t ms_outputRowsIdx = arm::app::Wav2LetterModel::ms_outputRowsIdx;
				222
				223	/* Check to make sure that the input tensor supports the above context and inner lengths. */
				224	if (inputRows <= 2 * inputCtxLen \|\| inputRows <= inputInnerLen) {
Kshitij Sisodia	f9c19ea	2021-05-07 16:08:14 +0100	[diff] [blame]	225	printf_err("Input rows not compatible with ctx of %" PRIu32 "\n",
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	226	inputCtxLen);
				227	return 0;
				228	}
				229
				230	TfLiteTensor* outputTensor = model.GetOutputTensor(0);
				231	const uint32_t outputRows = std::max(outputTensor->dims->data[ms_outputRowsIdx], 0);
				232
				233	const float tensorColRatio = static_cast<float>(inputRows)/
				234	static_cast<float>(outputRows);
				235
				236	return std::round(static_cast<float>(inputCtxLen)/tensorColRatio);
				237	}
				238
				239	static uint32_t GetOutputInnerLen(const arm::app::Model& model,
				240	const uint32_t outputCtxLen)
				241	{
				242	constexpr uint32_t ms_outputRowsIdx = arm::app::Wav2LetterModel::ms_outputRowsIdx;
				243	TfLiteTensor* outputTensor = model.GetOutputTensor(0);
				244	const uint32_t outputRows = std::max(outputTensor->dims->data[ms_outputRowsIdx], 0);
				245	return (outputRows - (2 * outputCtxLen));
				246	}