blob: 51b0b18bc8e87ab2512e24933d932f0451ada84b [file] [log] [blame]
alexander3c798932021-03-26 21:42:19 +00001/*
2 * Copyright (c) 2021 Arm Limited. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17#include "hal.h" /* Brings in platform definitions. */
18#include "Labels.hpp" /* For label strings. */
19#include "UseCaseHandler.hpp" /* Handlers for different user options. */
20#include "Wav2LetterModel.hpp" /* Model class for running inference. */
21#include "UseCaseCommonUtils.hpp" /* Utils functions. */
22#include "AsrClassifier.hpp" /* Classifier. */
23#include "InputFiles.hpp" /* Generated audio clip header. */
24#include "Wav2LetterPreprocess.hpp" /* Pre-processing class. */
25#include "Wav2LetterPostprocess.hpp" /* Post-processing class. */
alexander31ae9f02022-02-10 16:15:54 +000026#include "log_macros.h"
alexander3c798932021-03-26 21:42:19 +000027
28enum opcodes
29{
30 MENU_OPT_RUN_INF_NEXT = 1, /* Run on next vector. */
31 MENU_OPT_RUN_INF_CHOSEN, /* Run on a user provided vector index. */
32 MENU_OPT_RUN_INF_ALL, /* Run inference on all. */
33 MENU_OPT_SHOW_MODEL_INFO, /* Show model info. */
34 MENU_OPT_LIST_AUDIO_CLIPS /* List the current baked audio clips. */
35};
36
37static void DisplayMenu()
38{
Kshitij Sisodia3c8256d2021-05-24 16:12:40 +010039 printf("\n\n");
40 printf("User input required\n");
alexander3c798932021-03-26 21:42:19 +000041 printf("Enter option number from:\n\n");
42 printf(" %u. Classify next audio clip\n", MENU_OPT_RUN_INF_NEXT);
43 printf(" %u. Classify audio clip at chosen index\n", MENU_OPT_RUN_INF_CHOSEN);
44 printf(" %u. Run classification on all audio clips\n", MENU_OPT_RUN_INF_ALL);
45 printf(" %u. Show NN model info\n", MENU_OPT_SHOW_MODEL_INFO);
46 printf(" %u. List audio clips\n\n", MENU_OPT_LIST_AUDIO_CLIPS);
47 printf(" Choice: ");
George Gekov93e59512021-08-03 11:18:41 +010048 fflush(stdout);
alexander3c798932021-03-26 21:42:19 +000049}
50
51/** @brief Verify input and output tensor are of certain min dimensions. */
52static bool VerifyTensorDimensions(const arm::app::Model& model);
53
54/** @brief Gets the number of MFCC features for a single window. */
55static uint32_t GetNumMfccFeatures(const arm::app::Model& model);
56
57/** @brief Gets the number of MFCC feature vectors to be computed. */
58static uint32_t GetNumMfccFeatureVectors(const arm::app::Model& model);
59
60/** @brief Gets the output context length (left and right) for post-processing. */
61static uint32_t GetOutputContextLen(const arm::app::Model& model,
62 uint32_t inputCtxLen);
63
64/** @brief Gets the output inner length for post-processing. */
65static uint32_t GetOutputInnerLen(const arm::app::Model& model,
66 uint32_t outputCtxLen);
67
Kshitij Sisodia4cc40212022-04-08 09:54:53 +010068void main_loop()
alexander3c798932021-03-26 21:42:19 +000069{
70 arm::app::Wav2LetterModel model; /* Model wrapper object. */
71
72 /* Load the model. */
73 if (!model.Init()) {
74 printf_err("Failed to initialise model\n");
75 return;
76 } else if (!VerifyTensorDimensions(model)) {
77 printf_err("Model's input or output dimension verification failed\n");
78 return;
79 }
80
81 /* Initialise pre-processing. */
82 arm::app::audio::asr::Preprocess prep(
83 GetNumMfccFeatures(model),
84 g_FrameLength,
85 g_FrameStride,
86 GetNumMfccFeatureVectors(model));
87
88 /* Initialise post-processing. */
89 const uint32_t outputCtxLen = GetOutputContextLen(model, g_ctxLen);
90 const uint32_t blankTokenIdx = 28;
91 arm::app::audio::asr::Postprocess postp(
92 outputCtxLen,
93 GetOutputInnerLen(model, outputCtxLen),
94 blankTokenIdx);
95
96 /* Instantiate application context. */
97 arm::app::ApplicationContext caseContext;
98 std::vector <std::string> labels;
99 GetLabelsVector(labels);
100 arm::app::AsrClassifier classifier; /* Classifier wrapper object. */
101
Kshitij Sisodia4cc40212022-04-08 09:54:53 +0100102 arm::app::Profiler profiler{"asr"};
Isabella Gottardi8df12f32021-04-07 17:15:31 +0100103 caseContext.Set<arm::app::Profiler&>("profiler", profiler);
alexander3c798932021-03-26 21:42:19 +0000104 caseContext.Set<arm::app::Model&>("model", model);
105 caseContext.Set<uint32_t>("clipIndex", 0);
106 caseContext.Set<uint32_t>("frameLength", g_FrameLength);
107 caseContext.Set<uint32_t>("frameStride", g_FrameStride);
108 caseContext.Set<float>("scoreThreshold", g_ScoreThreshold); /* Score threshold. */
109 caseContext.Set<uint32_t>("ctxLen", g_ctxLen); /* Left and right context length (MFCC feat vectors). */
110 caseContext.Set<const std::vector <std::string>&>("labels", labels);
111 caseContext.Set<arm::app::AsrClassifier&>("classifier", classifier);
112 caseContext.Set<arm::app::audio::asr::Preprocess&>("preprocess", prep);
113 caseContext.Set<arm::app::audio::asr::Postprocess&>("postprocess", postp);
114
115 bool executionSuccessful = true;
116 constexpr bool bUseMenu = NUMBER_OF_FILES > 1 ? true : false;
117
118 /* Loop. */
119 do {
120 int menuOption = MENU_OPT_RUN_INF_NEXT;
121 if (bUseMenu) {
122 DisplayMenu();
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100123 menuOption = arm::app::ReadUserInputAsInt();
alexander3c798932021-03-26 21:42:19 +0000124 printf("\n");
125 }
126 switch (menuOption) {
127 case MENU_OPT_RUN_INF_NEXT:
128 executionSuccessful = ClassifyAudioHandler(
129 caseContext,
130 caseContext.Get<uint32_t>("clipIndex"),
131 false);
132 break;
133 case MENU_OPT_RUN_INF_CHOSEN: {
134 printf(" Enter the audio clip index [0, %d]: ",
135 NUMBER_OF_FILES-1);
Isabella Gottardi79d41542021-10-20 15:52:32 +0100136 fflush(stdout);
alexander3c798932021-03-26 21:42:19 +0000137 auto clipIndex = static_cast<uint32_t>(
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100138 arm::app::ReadUserInputAsInt());
alexander3c798932021-03-26 21:42:19 +0000139 executionSuccessful = ClassifyAudioHandler(caseContext,
140 clipIndex,
141 false);
142 break;
143 }
144 case MENU_OPT_RUN_INF_ALL:
145 executionSuccessful = ClassifyAudioHandler(
146 caseContext,
147 caseContext.Get<uint32_t>("clipIndex"),
148 true);
149 break;
150 case MENU_OPT_SHOW_MODEL_INFO:
151 executionSuccessful = model.ShowModelInfoHandler();
152 break;
153 case MENU_OPT_LIST_AUDIO_CLIPS:
154 executionSuccessful = ListFilesHandler(caseContext);
155 break;
156 default:
157 printf("Incorrect choice, try again.");
158 break;
159 }
160 } while (executionSuccessful && bUseMenu);
161 info("Main loop terminated.\n");
162}
163
164static bool VerifyTensorDimensions(const arm::app::Model& model)
165{
166 /* Populate tensor related parameters. */
167 TfLiteTensor* inputTensor = model.GetInputTensor(0);
168 if (!inputTensor->dims) {
169 printf_err("Invalid input tensor dims\n");
170 return false;
171 } else if (inputTensor->dims->size < 3) {
172 printf_err("Input tensor dimension should be >= 3\n");
173 return false;
174 }
175
176 TfLiteTensor* outputTensor = model.GetOutputTensor(0);
177 if (!outputTensor->dims) {
178 printf_err("Invalid output tensor dims\n");
179 return false;
180 } else if (outputTensor->dims->size < 3) {
181 printf_err("Output tensor dimension should be >= 3\n");
182 return false;
183 }
184
185 return true;
186}
187
188static uint32_t GetNumMfccFeatures(const arm::app::Model& model)
189{
190 TfLiteTensor* inputTensor = model.GetInputTensor(0);
191 const int inputCols = inputTensor->dims->data[arm::app::Wav2LetterModel::ms_inputColsIdx];
192 if (0 != inputCols % 3) {
193 printf_err("Number of input columns is not a multiple of 3\n");
194 }
195 return std::max(inputCols/3, 0);
196}
197
198static uint32_t GetNumMfccFeatureVectors(const arm::app::Model& model)
199{
200 TfLiteTensor* inputTensor = model.GetInputTensor(0);
201 const int inputRows = inputTensor->dims->data[arm::app::Wav2LetterModel::ms_inputRowsIdx];
202 return std::max(inputRows, 0);
203}
204
205static uint32_t GetOutputContextLen(const arm::app::Model& model, const uint32_t inputCtxLen)
206{
207 const uint32_t inputRows = GetNumMfccFeatureVectors(model);
208 const uint32_t inputInnerLen = inputRows - (2 * inputCtxLen);
209 constexpr uint32_t ms_outputRowsIdx = arm::app::Wav2LetterModel::ms_outputRowsIdx;
210
211 /* Check to make sure that the input tensor supports the above
212 * context and inner lengths. */
213 if (inputRows <= 2 * inputCtxLen || inputRows <= inputInnerLen) {
Kshitij Sisodiaf9c19ea2021-05-07 16:08:14 +0100214 printf_err("Input rows not compatible with ctx of %" PRIu32 "\n",
alexander3c798932021-03-26 21:42:19 +0000215 inputCtxLen);
216 return 0;
217 }
218
219 TfLiteTensor* outputTensor = model.GetOutputTensor(0);
220 const uint32_t outputRows = std::max(outputTensor->dims->data[ms_outputRowsIdx], 0);
221
222 const float tensorColRatio = static_cast<float>(inputRows)/
223 static_cast<float>(outputRows);
224
225 return std::round(static_cast<float>(inputCtxLen)/tensorColRatio);
226}
227
228static uint32_t GetOutputInnerLen(const arm::app::Model& model,
229 const uint32_t outputCtxLen)
230{
231 constexpr uint32_t ms_outputRowsIdx = arm::app::Wav2LetterModel::ms_outputRowsIdx;
232 TfLiteTensor* outputTensor = model.GetOutputTensor(0);
233 const uint32_t outputRows = std::max(outputTensor->dims->data[ms_outputRowsIdx], 0);
234 return (outputRows - (2 * outputCtxLen));
235}