blob: 61c6eb61d610c807d714d48615564874671cc90e [file] [log] [blame]
alexander3c798932021-03-26 21:42:19 +00001/*
Richard Burtoned35a6f2022-02-14 11:55:35 +00002 * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
alexander3c798932021-03-26 21:42:19 +00003 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17#include "UseCaseHandler.hpp"
18
19#include "InputFiles.hpp"
20#include "Classifier.hpp"
Kshitij Sisodia76a15802021-12-24 11:05:11 +000021#include "MicroNetKwsModel.hpp"
alexander3c798932021-03-26 21:42:19 +000022#include "hal.h"
alexander3c798932021-03-26 21:42:19 +000023#include "AudioUtils.hpp"
Richard Burtoned35a6f2022-02-14 11:55:35 +000024#include "ImageUtils.hpp"
alexander3c798932021-03-26 21:42:19 +000025#include "UseCaseCommonUtils.hpp"
26#include "KwsResult.hpp"
alexander31ae9f02022-02-10 16:15:54 +000027#include "log_macros.h"
Richard Burtone6398cd2022-04-13 11:58:28 +010028#include "KwsProcessing.hpp"
alexander3c798932021-03-26 21:42:19 +000029
30#include <vector>
alexander3c798932021-03-26 21:42:19 +000031
32using KwsClassifier = arm::app::Classifier;
33
34namespace arm {
35namespace app {
36
alexander3c798932021-03-26 21:42:19 +000037 /**
Richard Burtone6398cd2022-04-13 11:58:28 +010038 * @brief Presents KWS inference results.
39 * @param[in] results Vector of KWS classification results to be displayed.
alexander3c798932021-03-26 21:42:19 +000040 * @return true if successful, false otherwise.
41 **/
Richard Burtonb40ecf82022-04-22 16:14:57 +010042 static bool PresentInferenceResult(const std::vector<kws::KwsResult>& results);
alexander3c798932021-03-26 21:42:19 +000043
Richard Burtone6398cd2022-04-13 11:58:28 +010044 /* KWS inference handler. */
alexander3c798932021-03-26 21:42:19 +000045 bool ClassifyAudioHandler(ApplicationContext& ctx, uint32_t clipIndex, bool runAll)
46 {
Isabella Gottardi8df12f32021-04-07 17:15:31 +010047 auto& profiler = ctx.Get<Profiler&>("profiler");
Richard Burtone6398cd2022-04-13 11:58:28 +010048 auto& model = ctx.Get<Model&>("model");
49 const auto mfccFrameLength = ctx.Get<int>("frameLength");
50 const auto mfccFrameStride = ctx.Get<int>("frameStride");
51 const auto scoreThreshold = ctx.Get<float>("scoreThreshold");
Richard Burtonb40ecf82022-04-22 16:14:57 +010052
Richard Burtone6398cd2022-04-13 11:58:28 +010053 /* If the request has a valid size, set the audio index. */
54 if (clipIndex < NUMBER_OF_FILES) {
55 if (!SetAppCtxIfmIdx(ctx, clipIndex,"clipIndex")) {
56 return false;
57 }
58 }
59 auto initialClipIdx = ctx.Get<uint32_t>("clipIndex");
alexander3c798932021-03-26 21:42:19 +000060
61 constexpr uint32_t dataPsnTxtInfStartX = 20;
62 constexpr uint32_t dataPsnTxtInfStartY = 40;
63 constexpr int minTensorDims = static_cast<int>(
Richard Burtonb40ecf82022-04-22 16:14:57 +010064 (MicroNetKwsModel::ms_inputRowsIdx > MicroNetKwsModel::ms_inputColsIdx)?
65 MicroNetKwsModel::ms_inputRowsIdx : MicroNetKwsModel::ms_inputColsIdx);
alexander3c798932021-03-26 21:42:19 +000066
alexander3c798932021-03-26 21:42:19 +000067 if (!model.IsInited()) {
68 printf_err("Model is not initialised! Terminating processing.\n");
69 return false;
70 }
71
Richard Burtonb40ecf82022-04-22 16:14:57 +010072 /* Get Input and Output tensors for pre/post processing. */
alexander3c798932021-03-26 21:42:19 +000073 TfLiteTensor* inputTensor = model.GetInputTensor(0);
Richard Burtonb40ecf82022-04-22 16:14:57 +010074 TfLiteTensor* outputTensor = model.GetOutputTensor(0);
alexander3c798932021-03-26 21:42:19 +000075 if (!inputTensor->dims) {
76 printf_err("Invalid input tensor dims\n");
77 return false;
78 } else if (inputTensor->dims->size < minTensorDims) {
79 printf_err("Input tensor dimension should be >= %d\n", minTensorDims);
80 return false;
81 }
82
Richard Burtone6398cd2022-04-13 11:58:28 +010083 /* Get input shape for feature extraction. */
alexander3c798932021-03-26 21:42:19 +000084 TfLiteIntArray* inputShape = model.GetInputShape(0);
Richard Burtonb40ecf82022-04-22 16:14:57 +010085 const uint32_t numMfccFeatures = inputShape->data[MicroNetKwsModel::ms_inputColsIdx];
86 const uint32_t numMfccFrames = inputShape->data[arm::app::MicroNetKwsModel::ms_inputRowsIdx];
alexander3c798932021-03-26 21:42:19 +000087
88 /* We expect to be sampling 1 second worth of data at a time.
89 * NOTE: This is only used for time stamp calculation. */
Richard Burtone6398cd2022-04-13 11:58:28 +010090 const float secondsPerSample = 1.0 / audio::MicroNetKwsMFCC::ms_defaultSamplingFreq;
91
92 /* Set up pre and post-processing. */
Richard Burtonb40ecf82022-04-22 16:14:57 +010093 KwsPreProcess preProcess = KwsPreProcess(inputTensor, numMfccFeatures, numMfccFrames,
94 mfccFrameLength, mfccFrameStride);
Richard Burtone6398cd2022-04-13 11:58:28 +010095
96 std::vector<ClassificationResult> singleInfResult;
Richard Burtonb40ecf82022-04-22 16:14:57 +010097 KwsPostProcess postProcess = KwsPostProcess(outputTensor, ctx.Get<KwsClassifier &>("classifier"),
Richard Burtone6398cd2022-04-13 11:58:28 +010098 ctx.Get<std::vector<std::string>&>("labels"),
Richard Burtonc2911442022-04-22 09:08:21 +010099 singleInfResult);
Richard Burtone6398cd2022-04-13 11:58:28 +0100100
Richard Burtonb40ecf82022-04-22 16:14:57 +0100101 /* Loop to process audio clips. */
alexander3c798932021-03-26 21:42:19 +0000102 do {
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100103 hal_lcd_clear(COLOR_BLACK);
Richard Burton9b8d67a2021-12-10 12:32:51 +0000104
alexander3c798932021-03-26 21:42:19 +0000105 auto currentIndex = ctx.Get<uint32_t>("clipIndex");
106
alexander3c798932021-03-26 21:42:19 +0000107 /* Creating a sliding window through the whole audio clip. */
108 auto audioDataSlider = audio::SlidingWindow<const int16_t>(
Richard Burtone6398cd2022-04-13 11:58:28 +0100109 get_audio_array(currentIndex),
110 get_audio_array_size(currentIndex),
Richard Burtonb40ecf82022-04-22 16:14:57 +0100111 preProcess.m_audioDataWindowSize, preProcess.m_audioDataStride);
alexander3c798932021-03-26 21:42:19 +0000112
Richard Burtone6398cd2022-04-13 11:58:28 +0100113 /* Declare a container to hold results from across the whole audio clip. */
114 std::vector<kws::KwsResult> finalResults;
alexander3c798932021-03-26 21:42:19 +0000115
116 /* Display message on the LCD - inference running. */
117 std::string str_inf{"Running inference... "};
Richard Burtone6398cd2022-04-13 11:58:28 +0100118 hal_lcd_display_text(str_inf.c_str(), str_inf.size(),
119 dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
Kshitij Sisodiaf9c19ea2021-05-07 16:08:14 +0100120 info("Running inference on audio clip %" PRIu32 " => %s\n", currentIndex,
alexander3c798932021-03-26 21:42:19 +0000121 get_filename(currentIndex));
122
123 /* Start sliding through audio clip. */
124 while (audioDataSlider.HasNext()) {
Richard Burtone6398cd2022-04-13 11:58:28 +0100125 const int16_t* inferenceWindow = audioDataSlider.Next();
alexander3c798932021-03-26 21:42:19 +0000126
127 /* The first window does not have cache ready. */
Richard Burtonb40ecf82022-04-22 16:14:57 +0100128 preProcess.m_audioWindowIndex = audioDataSlider.Index();
alexander3c798932021-03-26 21:42:19 +0000129
130 info("Inference %zu/%zu\n", audioDataSlider.Index() + 1,
131 audioDataSlider.TotalStrides() + 1);
132
Richard Burtone6398cd2022-04-13 11:58:28 +0100133 /* Run the pre-processing, inference and post-processing. */
Richard Burtonb40ecf82022-04-22 16:14:57 +0100134 if (!preProcess.DoPreProcess(inferenceWindow, audio::MicroNetKwsMFCC::ms_defaultSamplingFreq)) {
135 printf_err("Pre-processing failed.");
alexander27b62d92021-05-04 20:46:08 +0100136 return false;
137 }
alexander3c798932021-03-26 21:42:19 +0000138
Richard Burtonb40ecf82022-04-22 16:14:57 +0100139 if (!RunInference(model, profiler)) {
140 printf_err("Inference failed.");
Richard Burtone6398cd2022-04-13 11:58:28 +0100141 return false;
142 }
alexander3c798932021-03-26 21:42:19 +0000143
Richard Burtonb40ecf82022-04-22 16:14:57 +0100144 if (!postProcess.DoPostProcess()) {
145 printf_err("Post-processing failed.");
Richard Burtone6398cd2022-04-13 11:58:28 +0100146 return false;
147 }
148
149 /* Add results from this window to our final results vector. */
150 finalResults.emplace_back(kws::KwsResult(singleInfResult,
Richard Burtonb40ecf82022-04-22 16:14:57 +0100151 audioDataSlider.Index() * secondsPerSample * preProcess.m_audioDataStride,
Richard Burtonc2911442022-04-22 09:08:21 +0100152 audioDataSlider.Index(), scoreThreshold));
alexander3c798932021-03-26 21:42:19 +0000153
154#if VERIFY_TEST_OUTPUT
Richard Burtonb40ecf82022-04-22 16:14:57 +0100155 DumpTensor(outputTensor);
alexander3c798932021-03-26 21:42:19 +0000156#endif /* VERIFY_TEST_OUTPUT */
157 } /* while (audioDataSlider.HasNext()) */
158
159 /* Erase. */
160 str_inf = std::string(str_inf.size(), ' ');
Richard Burtone6398cd2022-04-13 11:58:28 +0100161 hal_lcd_display_text(str_inf.c_str(), str_inf.size(),
162 dataPsnTxtInfStartX, dataPsnTxtInfStartY, false);
alexander3c798932021-03-26 21:42:19 +0000163
Richard Burtone6398cd2022-04-13 11:58:28 +0100164 ctx.Set<std::vector<kws::KwsResult>>("results", finalResults);
alexander3c798932021-03-26 21:42:19 +0000165
Richard Burtone6398cd2022-04-13 11:58:28 +0100166 if (!PresentInferenceResult(finalResults)) {
alexander3c798932021-03-26 21:42:19 +0000167 return false;
168 }
169
Isabella Gottardi8df12f32021-04-07 17:15:31 +0100170 profiler.PrintProfilingResult();
171
Éanna Ó Catháin8f958872021-09-15 09:32:30 +0100172 IncrementAppCtxIfmIdx(ctx,"clipIndex");
alexander3c798932021-03-26 21:42:19 +0000173
Richard Burtone6398cd2022-04-13 11:58:28 +0100174 } while (runAll && ctx.Get<uint32_t>("clipIndex") != initialClipIdx);
alexander3c798932021-03-26 21:42:19 +0000175
176 return true;
177 }
178
Richard Burtonb40ecf82022-04-22 16:14:57 +0100179 static bool PresentInferenceResult(const std::vector<kws::KwsResult>& results)
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100180 {
181 constexpr uint32_t dataPsnTxtStartX1 = 20;
182 constexpr uint32_t dataPsnTxtStartY1 = 30;
183 constexpr uint32_t dataPsnTxtYIncr = 16; /* Row index increment. */
184
185 hal_lcd_set_text_color(COLOR_GREEN);
186 info("Final results:\n");
187 info("Total number of inferences: %zu\n", results.size());
188
189 /* Display each result */
190 uint32_t rowIdx1 = dataPsnTxtStartY1 + 2 * dataPsnTxtYIncr;
191
Richard Burtonb40ecf82022-04-22 16:14:57 +0100192 for (const auto& result : results) {
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100193
194 std::string topKeyword{"<none>"};
195 float score = 0.f;
Richard Burtone6398cd2022-04-13 11:58:28 +0100196 if (!result.m_resultVec.empty()) {
197 topKeyword = result.m_resultVec[0].m_label;
198 score = result.m_resultVec[0].m_normalisedVal;
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100199 }
200
201 std::string resultStr =
Richard Burtone6398cd2022-04-13 11:58:28 +0100202 std::string{"@"} + std::to_string(result.m_timeStamp) +
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100203 std::string{"s: "} + topKeyword + std::string{" ("} +
204 std::to_string(static_cast<int>(score * 100)) + std::string{"%)"};
205
Richard Burtone6398cd2022-04-13 11:58:28 +0100206 hal_lcd_display_text(resultStr.c_str(), resultStr.size(),
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100207 dataPsnTxtStartX1, rowIdx1, false);
208 rowIdx1 += dataPsnTxtYIncr;
209
Richard Burtone6398cd2022-04-13 11:58:28 +0100210 if (result.m_resultVec.empty()) {
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100211 info("For timestamp: %f (inference #: %" PRIu32
212 "); label: %s; threshold: %f\n",
Richard Burtone6398cd2022-04-13 11:58:28 +0100213 result.m_timeStamp, result.m_inferenceNumber,
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100214 topKeyword.c_str(),
Richard Burtone6398cd2022-04-13 11:58:28 +0100215 result.m_threshold);
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100216 } else {
Richard Burtone6398cd2022-04-13 11:58:28 +0100217 for (uint32_t j = 0; j < result.m_resultVec.size(); ++j) {
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100218 info("For timestamp: %f (inference #: %" PRIu32
219 "); label: %s, score: %f; threshold: %f\n",
Richard Burtone6398cd2022-04-13 11:58:28 +0100220 result.m_timeStamp,
221 result.m_inferenceNumber,
222 result.m_resultVec[j].m_label.c_str(),
223 result.m_resultVec[j].m_normalisedVal,
224 result.m_threshold);
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100225 }
226 }
227 }
228
229 return true;
230 }
231
alexander3c798932021-03-26 21:42:19 +0000232} /* namespace app */
Richard Burtone6398cd2022-04-13 11:58:28 +0100233} /* namespace arm */