blob: 95e5a8fe36afb97b3f5a3b88126770f9b4bc4313 [file] [log] [blame]
alexander3c798932021-03-26 21:42:19 +00001/*
2 * Copyright (c) 2021 Arm Limited. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17#include "hal.h" /* Brings in platform definitions. */
18#include "InputFiles.hpp" /* For input images. */
19#include "Labels_dscnn.hpp" /* For DS-CNN label strings. */
20#include "Labels_wav2letter.hpp" /* For Wav2Letter label strings. */
21#include "Classifier.hpp" /* KWS classifier. */
22#include "AsrClassifier.hpp" /* ASR classifier. */
23#include "DsCnnModel.hpp" /* KWS model class for running inference. */
24#include "Wav2LetterModel.hpp" /* ASR model class for running inference. */
25#include "UseCaseCommonUtils.hpp" /* Utils functions. */
26#include "UseCaseHandler.hpp" /* Handlers for different user options. */
27#include "Wav2LetterPreprocess.hpp" /* ASR pre-processing class. */
28#include "Wav2LetterPostprocess.hpp"/* ASR post-processing class. */
29
30using KwsClassifier = arm::app::Classifier;
31
32enum opcodes
33{
34 MENU_OPT_RUN_INF_NEXT = 1, /* Run on next vector. */
35 MENU_OPT_RUN_INF_CHOSEN, /* Run on a user provided vector index. */
36 MENU_OPT_RUN_INF_ALL, /* Run inference on all. */
37 MENU_OPT_SHOW_MODEL_INFO, /* Show model info. */
38 MENU_OPT_LIST_AUDIO_CLIPS /* List the current baked audio clips. */
39};
40
41static void DisplayMenu()
42{
43 printf("\n\nUser input required\n");
44 printf("Enter option number from:\n\n");
45 printf(" %u. Classify next audio clip\n", MENU_OPT_RUN_INF_NEXT);
46 printf(" %u. Classify audio clip at chosen index\n", MENU_OPT_RUN_INF_CHOSEN);
47 printf(" %u. Run classification on all audio clips\n", MENU_OPT_RUN_INF_ALL);
48 printf(" %u. Show NN model info\n", MENU_OPT_SHOW_MODEL_INFO);
49 printf(" %u. List audio clips\n\n", MENU_OPT_LIST_AUDIO_CLIPS);
50 printf(" Choice: ");
51}
52
53/** @brief Gets the number of MFCC features for a single window. */
54static uint32_t GetNumMfccFeatures(const arm::app::Model& model);
55
56/** @brief Gets the number of MFCC feature vectors to be computed. */
57static uint32_t GetNumMfccFeatureVectors(const arm::app::Model& model);
58
59/** @brief Gets the output context length (left and right) for post-processing. */
60static uint32_t GetOutputContextLen(const arm::app::Model& model,
61 uint32_t inputCtxLen);
62
63/** @brief Gets the output inner length for post-processing. */
64static uint32_t GetOutputInnerLen(const arm::app::Model& model,
65 uint32_t outputCtxLen);
66
67void main_loop(hal_platform& platform)
68{
69 /* Model wrapper objects. */
70 arm::app::DsCnnModel kwsModel;
71 arm::app::Wav2LetterModel asrModel;
72
73 /* Load the models. */
74 if (!kwsModel.Init()) {
75 printf_err("Failed to initialise KWS model\n");
76 return;
77 }
78
79 /* Initialise the asr model using the same allocator from KWS
80 * to re-use the tensor arena. */
81 if (!asrModel.Init(kwsModel.GetAllocator())) {
82 printf_err("Failed to initalise ASR model\n");
83 return;
84 }
85
86 /* Initialise ASR pre-processing. */
87 arm::app::audio::asr::Preprocess prep(
88 GetNumMfccFeatures(asrModel),
89 arm::app::asr::g_FrameLength,
90 arm::app::asr::g_FrameStride,
91 GetNumMfccFeatureVectors(asrModel));
92
93 /* Initialise ASR post-processing. */
94 const uint32_t outputCtxLen = GetOutputContextLen(asrModel, arm::app::asr::g_ctxLen);
95 const uint32_t blankTokenIdx = 28;
96 arm::app::audio::asr::Postprocess postp(
97 outputCtxLen,
98 GetOutputInnerLen(asrModel, outputCtxLen),
99 blankTokenIdx);
100
101 /* Instantiate application context. */
102 arm::app::ApplicationContext caseContext;
103
Isabella Gottardi8df12f32021-04-07 17:15:31 +0100104 arm::app::Profiler profiler{&platform, "kws_asr"};
105 caseContext.Set<arm::app::Profiler&>("profiler", profiler);
106
alexander3c798932021-03-26 21:42:19 +0000107 caseContext.Set<hal_platform&>("platform", platform);
108 caseContext.Set<arm::app::Model&>("kwsmodel", kwsModel);
109 caseContext.Set<arm::app::Model&>("asrmodel", asrModel);
110 caseContext.Set<uint32_t>("clipIndex", 0);
111 caseContext.Set<uint32_t>("ctxLen", arm::app::asr::g_ctxLen); /* Left and right context length (MFCC feat vectors). */
112 caseContext.Set<int>("kwsframeLength", arm::app::kws::g_FrameLength);
113 caseContext.Set<int>("kwsframeStride", arm::app::kws::g_FrameStride);
114 caseContext.Set<float>("kwsscoreThreshold", arm::app::kws::g_ScoreThreshold); /* Normalised score threshold. */
115 caseContext.Set<uint32_t >("kwsNumMfcc", arm::app::kws::g_NumMfcc);
116 caseContext.Set<uint32_t >("kwsNumAudioWins", arm::app::kws::g_NumAudioWins);
117
118 caseContext.Set<int>("asrframeLength", arm::app::asr::g_FrameLength);
119 caseContext.Set<int>("asrframeStride", arm::app::asr::g_FrameStride);
120 caseContext.Set<float>("asrscoreThreshold", arm::app::asr::g_ScoreThreshold); /* Normalised score threshold. */
121
122 KwsClassifier kwsClassifier; /* Classifier wrapper object. */
123 arm::app::AsrClassifier asrClassifier; /* Classifier wrapper object. */
124 caseContext.Set<arm::app::Classifier&>("kwsclassifier", kwsClassifier);
125 caseContext.Set<arm::app::AsrClassifier&>("asrclassifier", asrClassifier);
126
127 caseContext.Set<arm::app::audio::asr::Preprocess&>("preprocess", prep);
128 caseContext.Set<arm::app::audio::asr::Postprocess&>("postprocess", postp);
129
130 std::vector<std::string> asrLabels;
131 arm::app::asr::GetLabelsVector(asrLabels);
132 std::vector<std::string> kwsLabels;
133 arm::app::kws::GetLabelsVector(kwsLabels);
134 caseContext.Set<const std::vector <std::string>&>("asrlabels", asrLabels);
135 caseContext.Set<const std::vector <std::string>&>("kwslabels", kwsLabels);
136
137 /* Index of the kws outputs we trigger ASR on. */
138 caseContext.Set<uint32_t>("keywordindex", 2);
139
140 /* Loop. */
141 bool executionSuccessful = true;
142 constexpr bool bUseMenu = NUMBER_OF_FILES > 1 ? true : false;
143
144 /* Loop. */
145 do {
146 int menuOption = MENU_OPT_RUN_INF_NEXT;
147 if (bUseMenu) {
148 DisplayMenu();
149 menuOption = arm::app::ReadUserInputAsInt(platform);
150 printf("\n");
151 }
152 switch (menuOption) {
153 case MENU_OPT_RUN_INF_NEXT:
154 executionSuccessful = ClassifyAudioHandler(
155 caseContext,
156 caseContext.Get<uint32_t>("clipIndex"),
157 false);
158 break;
159 case MENU_OPT_RUN_INF_CHOSEN: {
160 printf(" Enter the audio clip index [0, %d]: ",
161 NUMBER_OF_FILES-1);
162 auto clipIndex = static_cast<uint32_t>(
163 arm::app::ReadUserInputAsInt(platform));
164 executionSuccessful = ClassifyAudioHandler(caseContext,
165 clipIndex,
166 false);
167 break;
168 }
169 case MENU_OPT_RUN_INF_ALL:
170 executionSuccessful = ClassifyAudioHandler(
171 caseContext,
172 caseContext.Get<uint32_t>("clipIndex"),
173 true);
174 break;
175 case MENU_OPT_SHOW_MODEL_INFO:
176 executionSuccessful = kwsModel.ShowModelInfoHandler();
177 executionSuccessful = asrModel.ShowModelInfoHandler();
178 break;
179 case MENU_OPT_LIST_AUDIO_CLIPS:
180 executionSuccessful = ListFilesHandler(caseContext);
181 break;
182 default:
183 printf("Incorrect choice, try again.");
184 break;
185 }
186 } while (executionSuccessful && bUseMenu);
187 info("Main loop terminated.\n");
188}
189
190static uint32_t GetNumMfccFeatures(const arm::app::Model& model)
191{
192 TfLiteTensor* inputTensor = model.GetInputTensor(0);
193 const int inputCols = inputTensor->dims->data[arm::app::Wav2LetterModel::ms_inputColsIdx];
194 if (0 != inputCols % 3) {
195 printf_err("Number of input columns is not a multiple of 3\n");
196 }
197 return std::max(inputCols/3, 0);
198}
199
200static uint32_t GetNumMfccFeatureVectors(const arm::app::Model& model)
201{
202 TfLiteTensor* inputTensor = model.GetInputTensor(0);
203 const int inputRows = inputTensor->dims->data[arm::app::Wav2LetterModel::ms_inputRowsIdx];
204 return std::max(inputRows, 0);
205}
206
207static uint32_t GetOutputContextLen(const arm::app::Model& model, const uint32_t inputCtxLen)
208{
209 const uint32_t inputRows = GetNumMfccFeatureVectors(model);
210 const uint32_t inputInnerLen = inputRows - (2 * inputCtxLen);
211 constexpr uint32_t ms_outputRowsIdx = arm::app::Wav2LetterModel::ms_outputRowsIdx;
212
213 /* Check to make sure that the input tensor supports the above context and inner lengths. */
214 if (inputRows <= 2 * inputCtxLen || inputRows <= inputInnerLen) {
215 printf_err("Input rows not compatible with ctx of %u\n",
216 inputCtxLen);
217 return 0;
218 }
219
220 TfLiteTensor* outputTensor = model.GetOutputTensor(0);
221 const uint32_t outputRows = std::max(outputTensor->dims->data[ms_outputRowsIdx], 0);
222
223 const float tensorColRatio = static_cast<float>(inputRows)/
224 static_cast<float>(outputRows);
225
226 return std::round(static_cast<float>(inputCtxLen)/tensorColRatio);
227}
228
229static uint32_t GetOutputInnerLen(const arm::app::Model& model,
230 const uint32_t outputCtxLen)
231{
232 constexpr uint32_t ms_outputRowsIdx = arm::app::Wav2LetterModel::ms_outputRowsIdx;
233 TfLiteTensor* outputTensor = model.GetOutputTensor(0);
234 const uint32_t outputRows = std::max(outputTensor->dims->data[ms_outputRowsIdx], 0);
235 return (outputRows - (2 * outputCtxLen));
236}