blob: ca777bed9e75f82815a638c8f50b3c684afe23cf [file] [log] [blame]
alexander3c798932021-03-26 21:42:19 +00001/*
2 * Copyright (c) 2021 Arm Limited. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17#include "hal.h" /* Brings in platform definitions. */
18#include "Labels.hpp" /* For label strings. */
19#include "UseCaseHandler.hpp" /* Handlers for different user options. */
20#include "Wav2LetterModel.hpp" /* Model class for running inference. */
21#include "UseCaseCommonUtils.hpp" /* Utils functions. */
22#include "AsrClassifier.hpp" /* Classifier. */
23#include "InputFiles.hpp" /* Generated audio clip header. */
24#include "Wav2LetterPreprocess.hpp" /* Pre-processing class. */
25#include "Wav2LetterPostprocess.hpp" /* Post-processing class. */
26
27enum opcodes
28{
29 MENU_OPT_RUN_INF_NEXT = 1, /* Run on next vector. */
30 MENU_OPT_RUN_INF_CHOSEN, /* Run on a user provided vector index. */
31 MENU_OPT_RUN_INF_ALL, /* Run inference on all. */
32 MENU_OPT_SHOW_MODEL_INFO, /* Show model info. */
33 MENU_OPT_LIST_AUDIO_CLIPS /* List the current baked audio clips. */
34};
35
36static void DisplayMenu()
37{
38 printf("\n\nUser input required\n");
39 printf("Enter option number from:\n\n");
40 printf(" %u. Classify next audio clip\n", MENU_OPT_RUN_INF_NEXT);
41 printf(" %u. Classify audio clip at chosen index\n", MENU_OPT_RUN_INF_CHOSEN);
42 printf(" %u. Run classification on all audio clips\n", MENU_OPT_RUN_INF_ALL);
43 printf(" %u. Show NN model info\n", MENU_OPT_SHOW_MODEL_INFO);
44 printf(" %u. List audio clips\n\n", MENU_OPT_LIST_AUDIO_CLIPS);
45 printf(" Choice: ");
46}
47
48/** @brief Verify input and output tensor are of certain min dimensions. */
49static bool VerifyTensorDimensions(const arm::app::Model& model);
50
51/** @brief Gets the number of MFCC features for a single window. */
52static uint32_t GetNumMfccFeatures(const arm::app::Model& model);
53
54/** @brief Gets the number of MFCC feature vectors to be computed. */
55static uint32_t GetNumMfccFeatureVectors(const arm::app::Model& model);
56
57/** @brief Gets the output context length (left and right) for post-processing. */
58static uint32_t GetOutputContextLen(const arm::app::Model& model,
59 uint32_t inputCtxLen);
60
61/** @brief Gets the output inner length for post-processing. */
62static uint32_t GetOutputInnerLen(const arm::app::Model& model,
63 uint32_t outputCtxLen);
64
65void main_loop(hal_platform& platform)
66{
67 arm::app::Wav2LetterModel model; /* Model wrapper object. */
68
69 /* Load the model. */
70 if (!model.Init()) {
71 printf_err("Failed to initialise model\n");
72 return;
73 } else if (!VerifyTensorDimensions(model)) {
74 printf_err("Model's input or output dimension verification failed\n");
75 return;
76 }
77
78 /* Initialise pre-processing. */
79 arm::app::audio::asr::Preprocess prep(
80 GetNumMfccFeatures(model),
81 g_FrameLength,
82 g_FrameStride,
83 GetNumMfccFeatureVectors(model));
84
85 /* Initialise post-processing. */
86 const uint32_t outputCtxLen = GetOutputContextLen(model, g_ctxLen);
87 const uint32_t blankTokenIdx = 28;
88 arm::app::audio::asr::Postprocess postp(
89 outputCtxLen,
90 GetOutputInnerLen(model, outputCtxLen),
91 blankTokenIdx);
92
93 /* Instantiate application context. */
94 arm::app::ApplicationContext caseContext;
95 std::vector <std::string> labels;
96 GetLabelsVector(labels);
97 arm::app::AsrClassifier classifier; /* Classifier wrapper object. */
98
99 caseContext.Set<hal_platform&>("platform", platform);
100 caseContext.Set<arm::app::Model&>("model", model);
101 caseContext.Set<uint32_t>("clipIndex", 0);
102 caseContext.Set<uint32_t>("frameLength", g_FrameLength);
103 caseContext.Set<uint32_t>("frameStride", g_FrameStride);
104 caseContext.Set<float>("scoreThreshold", g_ScoreThreshold); /* Score threshold. */
105 caseContext.Set<uint32_t>("ctxLen", g_ctxLen); /* Left and right context length (MFCC feat vectors). */
106 caseContext.Set<const std::vector <std::string>&>("labels", labels);
107 caseContext.Set<arm::app::AsrClassifier&>("classifier", classifier);
108 caseContext.Set<arm::app::audio::asr::Preprocess&>("preprocess", prep);
109 caseContext.Set<arm::app::audio::asr::Postprocess&>("postprocess", postp);
110
111 bool executionSuccessful = true;
112 constexpr bool bUseMenu = NUMBER_OF_FILES > 1 ? true : false;
113
114 /* Loop. */
115 do {
116 int menuOption = MENU_OPT_RUN_INF_NEXT;
117 if (bUseMenu) {
118 DisplayMenu();
119 menuOption = arm::app::ReadUserInputAsInt(platform);
120 printf("\n");
121 }
122 switch (menuOption) {
123 case MENU_OPT_RUN_INF_NEXT:
124 executionSuccessful = ClassifyAudioHandler(
125 caseContext,
126 caseContext.Get<uint32_t>("clipIndex"),
127 false);
128 break;
129 case MENU_OPT_RUN_INF_CHOSEN: {
130 printf(" Enter the audio clip index [0, %d]: ",
131 NUMBER_OF_FILES-1);
132 auto clipIndex = static_cast<uint32_t>(
133 arm::app::ReadUserInputAsInt(platform));
134 executionSuccessful = ClassifyAudioHandler(caseContext,
135 clipIndex,
136 false);
137 break;
138 }
139 case MENU_OPT_RUN_INF_ALL:
140 executionSuccessful = ClassifyAudioHandler(
141 caseContext,
142 caseContext.Get<uint32_t>("clipIndex"),
143 true);
144 break;
145 case MENU_OPT_SHOW_MODEL_INFO:
146 executionSuccessful = model.ShowModelInfoHandler();
147 break;
148 case MENU_OPT_LIST_AUDIO_CLIPS:
149 executionSuccessful = ListFilesHandler(caseContext);
150 break;
151 default:
152 printf("Incorrect choice, try again.");
153 break;
154 }
155 } while (executionSuccessful && bUseMenu);
156 info("Main loop terminated.\n");
157}
158
159static bool VerifyTensorDimensions(const arm::app::Model& model)
160{
161 /* Populate tensor related parameters. */
162 TfLiteTensor* inputTensor = model.GetInputTensor(0);
163 if (!inputTensor->dims) {
164 printf_err("Invalid input tensor dims\n");
165 return false;
166 } else if (inputTensor->dims->size < 3) {
167 printf_err("Input tensor dimension should be >= 3\n");
168 return false;
169 }
170
171 TfLiteTensor* outputTensor = model.GetOutputTensor(0);
172 if (!outputTensor->dims) {
173 printf_err("Invalid output tensor dims\n");
174 return false;
175 } else if (outputTensor->dims->size < 3) {
176 printf_err("Output tensor dimension should be >= 3\n");
177 return false;
178 }
179
180 return true;
181}
182
183static uint32_t GetNumMfccFeatures(const arm::app::Model& model)
184{
185 TfLiteTensor* inputTensor = model.GetInputTensor(0);
186 const int inputCols = inputTensor->dims->data[arm::app::Wav2LetterModel::ms_inputColsIdx];
187 if (0 != inputCols % 3) {
188 printf_err("Number of input columns is not a multiple of 3\n");
189 }
190 return std::max(inputCols/3, 0);
191}
192
193static uint32_t GetNumMfccFeatureVectors(const arm::app::Model& model)
194{
195 TfLiteTensor* inputTensor = model.GetInputTensor(0);
196 const int inputRows = inputTensor->dims->data[arm::app::Wav2LetterModel::ms_inputRowsIdx];
197 return std::max(inputRows, 0);
198}
199
200static uint32_t GetOutputContextLen(const arm::app::Model& model, const uint32_t inputCtxLen)
201{
202 const uint32_t inputRows = GetNumMfccFeatureVectors(model);
203 const uint32_t inputInnerLen = inputRows - (2 * inputCtxLen);
204 constexpr uint32_t ms_outputRowsIdx = arm::app::Wav2LetterModel::ms_outputRowsIdx;
205
206 /* Check to make sure that the input tensor supports the above
207 * context and inner lengths. */
208 if (inputRows <= 2 * inputCtxLen || inputRows <= inputInnerLen) {
209 printf_err("Input rows not compatible with ctx of %u\n",
210 inputCtxLen);
211 return 0;
212 }
213
214 TfLiteTensor* outputTensor = model.GetOutputTensor(0);
215 const uint32_t outputRows = std::max(outputTensor->dims->data[ms_outputRowsIdx], 0);
216
217 const float tensorColRatio = static_cast<float>(inputRows)/
218 static_cast<float>(outputRows);
219
220 return std::round(static_cast<float>(inputCtxLen)/tensorColRatio);
221}
222
223static uint32_t GetOutputInnerLen(const arm::app::Model& model,
224 const uint32_t outputCtxLen)
225{
226 constexpr uint32_t ms_outputRowsIdx = arm::app::Wav2LetterModel::ms_outputRowsIdx;
227 TfLiteTensor* outputTensor = model.GetOutputTensor(0);
228 const uint32_t outputRows = std::max(outputTensor->dims->data[ms_outputRowsIdx], 0);
229 return (outputRows - (2 * outputCtxLen));
230}