blob: 350d34b27260b587af2cd5fa23300f68f8bc8c2d [file] [log] [blame]
alexander3c798932021-03-26 21:42:19 +00001/*
Richard Burtoned35a6f2022-02-14 11:55:35 +00002 * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
alexander3c798932021-03-26 21:42:19 +00003 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17#include "UseCaseHandler.hpp"
18
19#include "InputFiles.hpp"
20#include "Classifier.hpp"
Kshitij Sisodia76a15802021-12-24 11:05:11 +000021#include "MicroNetKwsModel.hpp"
alexander3c798932021-03-26 21:42:19 +000022#include "hal.h"
alexander3c798932021-03-26 21:42:19 +000023#include "AudioUtils.hpp"
Richard Burtoned35a6f2022-02-14 11:55:35 +000024#include "ImageUtils.hpp"
alexander3c798932021-03-26 21:42:19 +000025#include "UseCaseCommonUtils.hpp"
26#include "KwsResult.hpp"
alexander31ae9f02022-02-10 16:15:54 +000027#include "log_macros.h"
Richard Burtone6398cd2022-04-13 11:58:28 +010028#include "KwsProcessing.hpp"
alexander3c798932021-03-26 21:42:19 +000029
30#include <vector>
alexander3c798932021-03-26 21:42:19 +000031
32using KwsClassifier = arm::app::Classifier;
33
34namespace arm {
35namespace app {
36
Kshitij Sisodia68fdd112022-04-06 13:03:20 +010037
alexander3c798932021-03-26 21:42:19 +000038 /**
Richard Burtone6398cd2022-04-13 11:58:28 +010039 * @brief Presents KWS inference results.
40 * @param[in] results Vector of KWS classification results to be displayed.
alexander3c798932021-03-26 21:42:19 +000041 * @return true if successful, false otherwise.
42 **/
Kshitij Sisodia68fdd112022-04-06 13:03:20 +010043 static bool PresentInferenceResult(const std::vector<arm::app::kws::KwsResult>& results);
alexander3c798932021-03-26 21:42:19 +000044
Richard Burtone6398cd2022-04-13 11:58:28 +010045 /* KWS inference handler. */
alexander3c798932021-03-26 21:42:19 +000046 bool ClassifyAudioHandler(ApplicationContext& ctx, uint32_t clipIndex, bool runAll)
47 {
Isabella Gottardi8df12f32021-04-07 17:15:31 +010048 auto& profiler = ctx.Get<Profiler&>("profiler");
Richard Burtone6398cd2022-04-13 11:58:28 +010049 auto& model = ctx.Get<Model&>("model");
50 const auto mfccFrameLength = ctx.Get<int>("frameLength");
51 const auto mfccFrameStride = ctx.Get<int>("frameStride");
52 const auto scoreThreshold = ctx.Get<float>("scoreThreshold");
53 /* If the request has a valid size, set the audio index. */
54 if (clipIndex < NUMBER_OF_FILES) {
55 if (!SetAppCtxIfmIdx(ctx, clipIndex,"clipIndex")) {
56 return false;
57 }
58 }
59 auto initialClipIdx = ctx.Get<uint32_t>("clipIndex");
alexander3c798932021-03-26 21:42:19 +000060
61 constexpr uint32_t dataPsnTxtInfStartX = 20;
62 constexpr uint32_t dataPsnTxtInfStartY = 40;
63 constexpr int minTensorDims = static_cast<int>(
Kshitij Sisodia76a15802021-12-24 11:05:11 +000064 (arm::app::MicroNetKwsModel::ms_inputRowsIdx > arm::app::MicroNetKwsModel::ms_inputColsIdx)?
65 arm::app::MicroNetKwsModel::ms_inputRowsIdx : arm::app::MicroNetKwsModel::ms_inputColsIdx);
alexander3c798932021-03-26 21:42:19 +000066
alexander3c798932021-03-26 21:42:19 +000067
alexander3c798932021-03-26 21:42:19 +000068 if (!model.IsInited()) {
69 printf_err("Model is not initialised! Terminating processing.\n");
70 return false;
71 }
72
alexander3c798932021-03-26 21:42:19 +000073 TfLiteTensor* inputTensor = model.GetInputTensor(0);
alexander3c798932021-03-26 21:42:19 +000074 if (!inputTensor->dims) {
75 printf_err("Invalid input tensor dims\n");
76 return false;
77 } else if (inputTensor->dims->size < minTensorDims) {
78 printf_err("Input tensor dimension should be >= %d\n", minTensorDims);
79 return false;
80 }
81
Richard Burtone6398cd2022-04-13 11:58:28 +010082 /* Get input shape for feature extraction. */
alexander3c798932021-03-26 21:42:19 +000083 TfLiteIntArray* inputShape = model.GetInputShape(0);
Richard Burtone6398cd2022-04-13 11:58:28 +010084 const uint32_t numMfccFeatures = inputShape->data[arm::app::MicroNetKwsModel::ms_inputColsIdx];
alexander3c798932021-03-26 21:42:19 +000085
86 /* We expect to be sampling 1 second worth of data at a time.
87 * NOTE: This is only used for time stamp calculation. */
Richard Burtone6398cd2022-04-13 11:58:28 +010088 const float secondsPerSample = 1.0 / audio::MicroNetKwsMFCC::ms_defaultSamplingFreq;
89
90 /* Set up pre and post-processing. */
91 KWSPreProcess preprocess = KWSPreProcess(&model, numMfccFeatures, mfccFrameLength, mfccFrameStride);
92
93 std::vector<ClassificationResult> singleInfResult;
94 KWSPostProcess postprocess = KWSPostProcess(ctx.Get<KwsClassifier &>("classifier"), &model,
95 ctx.Get<std::vector<std::string>&>("labels"),
96 singleInfResult, scoreThreshold);
97
98 UseCaseRunner runner = UseCaseRunner(&preprocess, &postprocess, &model);
alexander3c798932021-03-26 21:42:19 +000099
100 do {
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100101 hal_lcd_clear(COLOR_BLACK);
Richard Burton9b8d67a2021-12-10 12:32:51 +0000102
alexander3c798932021-03-26 21:42:19 +0000103 auto currentIndex = ctx.Get<uint32_t>("clipIndex");
104
alexander3c798932021-03-26 21:42:19 +0000105 /* Creating a sliding window through the whole audio clip. */
106 auto audioDataSlider = audio::SlidingWindow<const int16_t>(
Richard Burtone6398cd2022-04-13 11:58:28 +0100107 get_audio_array(currentIndex),
108 get_audio_array_size(currentIndex),
109 preprocess.m_audioDataWindowSize, preprocess.m_audioDataStride);
alexander3c798932021-03-26 21:42:19 +0000110
Richard Burtone6398cd2022-04-13 11:58:28 +0100111 /* Declare a container to hold results from across the whole audio clip. */
112 std::vector<kws::KwsResult> finalResults;
alexander3c798932021-03-26 21:42:19 +0000113
114 /* Display message on the LCD - inference running. */
115 std::string str_inf{"Running inference... "};
Richard Burtone6398cd2022-04-13 11:58:28 +0100116 hal_lcd_display_text(str_inf.c_str(), str_inf.size(),
117 dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
Kshitij Sisodiaf9c19ea2021-05-07 16:08:14 +0100118 info("Running inference on audio clip %" PRIu32 " => %s\n", currentIndex,
alexander3c798932021-03-26 21:42:19 +0000119 get_filename(currentIndex));
120
121 /* Start sliding through audio clip. */
122 while (audioDataSlider.HasNext()) {
Richard Burtone6398cd2022-04-13 11:58:28 +0100123 const int16_t* inferenceWindow = audioDataSlider.Next();
alexander3c798932021-03-26 21:42:19 +0000124
125 /* The first window does not have cache ready. */
Richard Burtone6398cd2022-04-13 11:58:28 +0100126 preprocess.m_audioWindowIndex = audioDataSlider.Index();
alexander3c798932021-03-26 21:42:19 +0000127
128 info("Inference %zu/%zu\n", audioDataSlider.Index() + 1,
129 audioDataSlider.TotalStrides() + 1);
130
Richard Burtone6398cd2022-04-13 11:58:28 +0100131 /* Run the pre-processing, inference and post-processing. */
132 if (!runner.PreProcess(inferenceWindow, audio::MicroNetKwsMFCC::ms_defaultSamplingFreq)) {
alexander27b62d92021-05-04 20:46:08 +0100133 return false;
134 }
alexander3c798932021-03-26 21:42:19 +0000135
Richard Burtone6398cd2022-04-13 11:58:28 +0100136 profiler.StartProfiling("Inference");
137 if (!runner.RunInference()) {
138 return false;
139 }
140 profiler.StopProfiling();
alexander3c798932021-03-26 21:42:19 +0000141
Richard Burtone6398cd2022-04-13 11:58:28 +0100142 if (!runner.PostProcess()) {
143 return false;
144 }
145
146 /* Add results from this window to our final results vector. */
147 finalResults.emplace_back(kws::KwsResult(singleInfResult,
148 audioDataSlider.Index() * secondsPerSample * preprocess.m_audioDataStride,
149 audioDataSlider.Index(), postprocess.m_scoreThreshold));
alexander3c798932021-03-26 21:42:19 +0000150
151#if VERIFY_TEST_OUTPUT
Richard Burtone6398cd2022-04-13 11:58:28 +0100152 TfLiteTensor* outputTensor = model.GetOutputTensor(0);
alexander3c798932021-03-26 21:42:19 +0000153 arm::app::DumpTensor(outputTensor);
154#endif /* VERIFY_TEST_OUTPUT */
155 } /* while (audioDataSlider.HasNext()) */
156
157 /* Erase. */
158 str_inf = std::string(str_inf.size(), ' ');
Richard Burtone6398cd2022-04-13 11:58:28 +0100159 hal_lcd_display_text(str_inf.c_str(), str_inf.size(),
160 dataPsnTxtInfStartX, dataPsnTxtInfStartY, false);
alexander3c798932021-03-26 21:42:19 +0000161
Richard Burtone6398cd2022-04-13 11:58:28 +0100162 ctx.Set<std::vector<kws::KwsResult>>("results", finalResults);
alexander3c798932021-03-26 21:42:19 +0000163
Richard Burtone6398cd2022-04-13 11:58:28 +0100164 if (!PresentInferenceResult(finalResults)) {
alexander3c798932021-03-26 21:42:19 +0000165 return false;
166 }
167
Isabella Gottardi8df12f32021-04-07 17:15:31 +0100168 profiler.PrintProfilingResult();
169
Éanna Ó Catháin8f958872021-09-15 09:32:30 +0100170 IncrementAppCtxIfmIdx(ctx,"clipIndex");
alexander3c798932021-03-26 21:42:19 +0000171
Richard Burtone6398cd2022-04-13 11:58:28 +0100172 } while (runAll && ctx.Get<uint32_t>("clipIndex") != initialClipIdx);
alexander3c798932021-03-26 21:42:19 +0000173
174 return true;
175 }
176
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100177 static bool PresentInferenceResult(const std::vector<arm::app::kws::KwsResult>& results)
178 {
179 constexpr uint32_t dataPsnTxtStartX1 = 20;
180 constexpr uint32_t dataPsnTxtStartY1 = 30;
181 constexpr uint32_t dataPsnTxtYIncr = 16; /* Row index increment. */
182
183 hal_lcd_set_text_color(COLOR_GREEN);
184 info("Final results:\n");
185 info("Total number of inferences: %zu\n", results.size());
186
187 /* Display each result */
188 uint32_t rowIdx1 = dataPsnTxtStartY1 + 2 * dataPsnTxtYIncr;
189
Richard Burtone6398cd2022-04-13 11:58:28 +0100190 for (const auto & result : results) {
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100191
192 std::string topKeyword{"<none>"};
193 float score = 0.f;
Richard Burtone6398cd2022-04-13 11:58:28 +0100194 if (!result.m_resultVec.empty()) {
195 topKeyword = result.m_resultVec[0].m_label;
196 score = result.m_resultVec[0].m_normalisedVal;
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100197 }
198
199 std::string resultStr =
Richard Burtone6398cd2022-04-13 11:58:28 +0100200 std::string{"@"} + std::to_string(result.m_timeStamp) +
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100201 std::string{"s: "} + topKeyword + std::string{" ("} +
202 std::to_string(static_cast<int>(score * 100)) + std::string{"%)"};
203
Richard Burtone6398cd2022-04-13 11:58:28 +0100204 hal_lcd_display_text(resultStr.c_str(), resultStr.size(),
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100205 dataPsnTxtStartX1, rowIdx1, false);
206 rowIdx1 += dataPsnTxtYIncr;
207
Richard Burtone6398cd2022-04-13 11:58:28 +0100208 if (result.m_resultVec.empty()) {
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100209 info("For timestamp: %f (inference #: %" PRIu32
210 "); label: %s; threshold: %f\n",
Richard Burtone6398cd2022-04-13 11:58:28 +0100211 result.m_timeStamp, result.m_inferenceNumber,
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100212 topKeyword.c_str(),
Richard Burtone6398cd2022-04-13 11:58:28 +0100213 result.m_threshold);
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100214 } else {
Richard Burtone6398cd2022-04-13 11:58:28 +0100215 for (uint32_t j = 0; j < result.m_resultVec.size(); ++j) {
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100216 info("For timestamp: %f (inference #: %" PRIu32
217 "); label: %s, score: %f; threshold: %f\n",
Richard Burtone6398cd2022-04-13 11:58:28 +0100218 result.m_timeStamp,
219 result.m_inferenceNumber,
220 result.m_resultVec[j].m_label.c_str(),
221 result.m_resultVec[j].m_normalisedVal,
222 result.m_threshold);
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100223 }
224 }
225 }
226
227 return true;
228 }
229
alexander3c798932021-03-26 21:42:19 +0000230} /* namespace app */
Richard Burtone6398cd2022-04-13 11:58:28 +0100231} /* namespace arm */