blob: c20c32bd958acb2c658eadae6af09133b6d861e8 [file] [log] [blame]
alexander3c798932021-03-26 21:42:19 +00001/*
Richard Burtonf32a86a2022-11-15 11:46:11 +00002 * SPDX-FileCopyrightText: Copyright 2021-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
alexander3c798932021-03-26 21:42:19 +00003 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17#include "UseCaseHandler.hpp"
18
19#include "InputFiles.hpp"
Richard Burtonec5e99b2022-10-05 11:00:37 +010020#include "KwsClassifier.hpp"
Kshitij Sisodia76a15802021-12-24 11:05:11 +000021#include "MicroNetKwsModel.hpp"
alexander3c798932021-03-26 21:42:19 +000022#include "hal.h"
alexander3c798932021-03-26 21:42:19 +000023#include "AudioUtils.hpp"
Richard Burtoned35a6f2022-02-14 11:55:35 +000024#include "ImageUtils.hpp"
alexander3c798932021-03-26 21:42:19 +000025#include "UseCaseCommonUtils.hpp"
26#include "KwsResult.hpp"
alexander31ae9f02022-02-10 16:15:54 +000027#include "log_macros.h"
Richard Burtone6398cd2022-04-13 11:58:28 +010028#include "KwsProcessing.hpp"
alexander3c798932021-03-26 21:42:19 +000029
30#include <vector>
alexander3c798932021-03-26 21:42:19 +000031
alexander3c798932021-03-26 21:42:19 +000032namespace arm {
33namespace app {
34
alexander3c798932021-03-26 21:42:19 +000035 /**
Richard Burtone6398cd2022-04-13 11:58:28 +010036 * @brief Presents KWS inference results.
37 * @param[in] results Vector of KWS classification results to be displayed.
alexander3c798932021-03-26 21:42:19 +000038 * @return true if successful, false otherwise.
39 **/
Richard Burtonb40ecf82022-04-22 16:14:57 +010040 static bool PresentInferenceResult(const std::vector<kws::KwsResult>& results);
alexander3c798932021-03-26 21:42:19 +000041
Richard Burtone6398cd2022-04-13 11:58:28 +010042 /* KWS inference handler. */
alexander3c798932021-03-26 21:42:19 +000043 bool ClassifyAudioHandler(ApplicationContext& ctx, uint32_t clipIndex, bool runAll)
44 {
Isabella Gottardi8df12f32021-04-07 17:15:31 +010045 auto& profiler = ctx.Get<Profiler&>("profiler");
Richard Burtone6398cd2022-04-13 11:58:28 +010046 auto& model = ctx.Get<Model&>("model");
47 const auto mfccFrameLength = ctx.Get<int>("frameLength");
48 const auto mfccFrameStride = ctx.Get<int>("frameStride");
49 const auto scoreThreshold = ctx.Get<float>("scoreThreshold");
Richard Burtonb40ecf82022-04-22 16:14:57 +010050
Richard Burtone6398cd2022-04-13 11:58:28 +010051 /* If the request has a valid size, set the audio index. */
52 if (clipIndex < NUMBER_OF_FILES) {
53 if (!SetAppCtxIfmIdx(ctx, clipIndex,"clipIndex")) {
54 return false;
55 }
56 }
57 auto initialClipIdx = ctx.Get<uint32_t>("clipIndex");
alexander3c798932021-03-26 21:42:19 +000058
59 constexpr uint32_t dataPsnTxtInfStartX = 20;
60 constexpr uint32_t dataPsnTxtInfStartY = 40;
61 constexpr int minTensorDims = static_cast<int>(
Richard Burtonb40ecf82022-04-22 16:14:57 +010062 (MicroNetKwsModel::ms_inputRowsIdx > MicroNetKwsModel::ms_inputColsIdx)?
63 MicroNetKwsModel::ms_inputRowsIdx : MicroNetKwsModel::ms_inputColsIdx);
alexander3c798932021-03-26 21:42:19 +000064
alexander3c798932021-03-26 21:42:19 +000065 if (!model.IsInited()) {
66 printf_err("Model is not initialised! Terminating processing.\n");
67 return false;
68 }
69
Richard Burtonb40ecf82022-04-22 16:14:57 +010070 /* Get Input and Output tensors for pre/post processing. */
alexander3c798932021-03-26 21:42:19 +000071 TfLiteTensor* inputTensor = model.GetInputTensor(0);
Richard Burtonb40ecf82022-04-22 16:14:57 +010072 TfLiteTensor* outputTensor = model.GetOutputTensor(0);
alexander3c798932021-03-26 21:42:19 +000073 if (!inputTensor->dims) {
74 printf_err("Invalid input tensor dims\n");
75 return false;
76 } else if (inputTensor->dims->size < minTensorDims) {
77 printf_err("Input tensor dimension should be >= %d\n", minTensorDims);
78 return false;
79 }
80
Richard Burtone6398cd2022-04-13 11:58:28 +010081 /* Get input shape for feature extraction. */
alexander3c798932021-03-26 21:42:19 +000082 TfLiteIntArray* inputShape = model.GetInputShape(0);
Richard Burtonb40ecf82022-04-22 16:14:57 +010083 const uint32_t numMfccFeatures = inputShape->data[MicroNetKwsModel::ms_inputColsIdx];
84 const uint32_t numMfccFrames = inputShape->data[arm::app::MicroNetKwsModel::ms_inputRowsIdx];
alexander3c798932021-03-26 21:42:19 +000085
86 /* We expect to be sampling 1 second worth of data at a time.
87 * NOTE: This is only used for time stamp calculation. */
Richard Burtone6398cd2022-04-13 11:58:28 +010088 const float secondsPerSample = 1.0 / audio::MicroNetKwsMFCC::ms_defaultSamplingFreq;
89
90 /* Set up pre and post-processing. */
Richard Burtonb40ecf82022-04-22 16:14:57 +010091 KwsPreProcess preProcess = KwsPreProcess(inputTensor, numMfccFeatures, numMfccFrames,
92 mfccFrameLength, mfccFrameStride);
Richard Burtone6398cd2022-04-13 11:58:28 +010093
94 std::vector<ClassificationResult> singleInfResult;
Richard Burtonb40ecf82022-04-22 16:14:57 +010095 KwsPostProcess postProcess = KwsPostProcess(outputTensor, ctx.Get<KwsClassifier &>("classifier"),
Richard Burtone6398cd2022-04-13 11:58:28 +010096 ctx.Get<std::vector<std::string>&>("labels"),
Richard Burtonc2911442022-04-22 09:08:21 +010097 singleInfResult);
Richard Burtone6398cd2022-04-13 11:58:28 +010098
Richard Burtonb40ecf82022-04-22 16:14:57 +010099 /* Loop to process audio clips. */
alexander3c798932021-03-26 21:42:19 +0000100 do {
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100101 hal_lcd_clear(COLOR_BLACK);
Richard Burton9b8d67a2021-12-10 12:32:51 +0000102
alexander3c798932021-03-26 21:42:19 +0000103 auto currentIndex = ctx.Get<uint32_t>("clipIndex");
104
alexander3c798932021-03-26 21:42:19 +0000105 /* Creating a sliding window through the whole audio clip. */
106 auto audioDataSlider = audio::SlidingWindow<const int16_t>(
Richard Burtone6398cd2022-04-13 11:58:28 +0100107 get_audio_array(currentIndex),
108 get_audio_array_size(currentIndex),
Richard Burtonb40ecf82022-04-22 16:14:57 +0100109 preProcess.m_audioDataWindowSize, preProcess.m_audioDataStride);
alexander3c798932021-03-26 21:42:19 +0000110
Richard Burtone6398cd2022-04-13 11:58:28 +0100111 /* Declare a container to hold results from across the whole audio clip. */
112 std::vector<kws::KwsResult> finalResults;
alexander3c798932021-03-26 21:42:19 +0000113
114 /* Display message on the LCD - inference running. */
115 std::string str_inf{"Running inference... "};
Richard Burtone6398cd2022-04-13 11:58:28 +0100116 hal_lcd_display_text(str_inf.c_str(), str_inf.size(),
117 dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
Kshitij Sisodiaf9c19ea2021-05-07 16:08:14 +0100118 info("Running inference on audio clip %" PRIu32 " => %s\n", currentIndex,
alexander3c798932021-03-26 21:42:19 +0000119 get_filename(currentIndex));
120
121 /* Start sliding through audio clip. */
122 while (audioDataSlider.HasNext()) {
Richard Burtone6398cd2022-04-13 11:58:28 +0100123 const int16_t* inferenceWindow = audioDataSlider.Next();
alexander3c798932021-03-26 21:42:19 +0000124
alexander3c798932021-03-26 21:42:19 +0000125 info("Inference %zu/%zu\n", audioDataSlider.Index() + 1,
126 audioDataSlider.TotalStrides() + 1);
127
Richard Burtone6398cd2022-04-13 11:58:28 +0100128 /* Run the pre-processing, inference and post-processing. */
Richard Burtonec5e99b2022-10-05 11:00:37 +0100129 if (!preProcess.DoPreProcess(inferenceWindow, audioDataSlider.Index())) {
Richard Burtonb40ecf82022-04-22 16:14:57 +0100130 printf_err("Pre-processing failed.");
alexander27b62d92021-05-04 20:46:08 +0100131 return false;
132 }
alexander3c798932021-03-26 21:42:19 +0000133
Richard Burtonb40ecf82022-04-22 16:14:57 +0100134 if (!RunInference(model, profiler)) {
135 printf_err("Inference failed.");
Richard Burtone6398cd2022-04-13 11:58:28 +0100136 return false;
137 }
alexander3c798932021-03-26 21:42:19 +0000138
Richard Burtonb40ecf82022-04-22 16:14:57 +0100139 if (!postProcess.DoPostProcess()) {
140 printf_err("Post-processing failed.");
Richard Burtone6398cd2022-04-13 11:58:28 +0100141 return false;
142 }
143
144 /* Add results from this window to our final results vector. */
145 finalResults.emplace_back(kws::KwsResult(singleInfResult,
Richard Burtonb40ecf82022-04-22 16:14:57 +0100146 audioDataSlider.Index() * secondsPerSample * preProcess.m_audioDataStride,
Richard Burtonc2911442022-04-22 09:08:21 +0100147 audioDataSlider.Index(), scoreThreshold));
alexander3c798932021-03-26 21:42:19 +0000148
149#if VERIFY_TEST_OUTPUT
Richard Burtonb40ecf82022-04-22 16:14:57 +0100150 DumpTensor(outputTensor);
alexander3c798932021-03-26 21:42:19 +0000151#endif /* VERIFY_TEST_OUTPUT */
152 } /* while (audioDataSlider.HasNext()) */
153
154 /* Erase. */
155 str_inf = std::string(str_inf.size(), ' ');
Richard Burtone6398cd2022-04-13 11:58:28 +0100156 hal_lcd_display_text(str_inf.c_str(), str_inf.size(),
157 dataPsnTxtInfStartX, dataPsnTxtInfStartY, false);
alexander3c798932021-03-26 21:42:19 +0000158
Richard Burtone6398cd2022-04-13 11:58:28 +0100159 ctx.Set<std::vector<kws::KwsResult>>("results", finalResults);
alexander3c798932021-03-26 21:42:19 +0000160
Richard Burtone6398cd2022-04-13 11:58:28 +0100161 if (!PresentInferenceResult(finalResults)) {
alexander3c798932021-03-26 21:42:19 +0000162 return false;
163 }
164
Isabella Gottardi8df12f32021-04-07 17:15:31 +0100165 profiler.PrintProfilingResult();
166
Éanna Ó Catháin8f958872021-09-15 09:32:30 +0100167 IncrementAppCtxIfmIdx(ctx,"clipIndex");
alexander3c798932021-03-26 21:42:19 +0000168
Richard Burtone6398cd2022-04-13 11:58:28 +0100169 } while (runAll && ctx.Get<uint32_t>("clipIndex") != initialClipIdx);
alexander3c798932021-03-26 21:42:19 +0000170
171 return true;
172 }
173
Richard Burtonb40ecf82022-04-22 16:14:57 +0100174 static bool PresentInferenceResult(const std::vector<kws::KwsResult>& results)
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100175 {
176 constexpr uint32_t dataPsnTxtStartX1 = 20;
177 constexpr uint32_t dataPsnTxtStartY1 = 30;
178 constexpr uint32_t dataPsnTxtYIncr = 16; /* Row index increment. */
179
180 hal_lcd_set_text_color(COLOR_GREEN);
181 info("Final results:\n");
182 info("Total number of inferences: %zu\n", results.size());
183
184 /* Display each result */
185 uint32_t rowIdx1 = dataPsnTxtStartY1 + 2 * dataPsnTxtYIncr;
186
Richard Burtonb40ecf82022-04-22 16:14:57 +0100187 for (const auto& result : results) {
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100188
189 std::string topKeyword{"<none>"};
190 float score = 0.f;
Richard Burtone6398cd2022-04-13 11:58:28 +0100191 if (!result.m_resultVec.empty()) {
192 topKeyword = result.m_resultVec[0].m_label;
193 score = result.m_resultVec[0].m_normalisedVal;
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100194 }
195
196 std::string resultStr =
Richard Burtone6398cd2022-04-13 11:58:28 +0100197 std::string{"@"} + std::to_string(result.m_timeStamp) +
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100198 std::string{"s: "} + topKeyword + std::string{" ("} +
199 std::to_string(static_cast<int>(score * 100)) + std::string{"%)"};
200
Richard Burtone6398cd2022-04-13 11:58:28 +0100201 hal_lcd_display_text(resultStr.c_str(), resultStr.size(),
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100202 dataPsnTxtStartX1, rowIdx1, false);
203 rowIdx1 += dataPsnTxtYIncr;
204
Richard Burtone6398cd2022-04-13 11:58:28 +0100205 if (result.m_resultVec.empty()) {
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100206 info("For timestamp: %f (inference #: %" PRIu32
207 "); label: %s; threshold: %f\n",
Richard Burtone6398cd2022-04-13 11:58:28 +0100208 result.m_timeStamp, result.m_inferenceNumber,
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100209 topKeyword.c_str(),
Richard Burtone6398cd2022-04-13 11:58:28 +0100210 result.m_threshold);
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100211 } else {
Richard Burtone6398cd2022-04-13 11:58:28 +0100212 for (uint32_t j = 0; j < result.m_resultVec.size(); ++j) {
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100213 info("For timestamp: %f (inference #: %" PRIu32
214 "); label: %s, score: %f; threshold: %f\n",
Richard Burtone6398cd2022-04-13 11:58:28 +0100215 result.m_timeStamp,
216 result.m_inferenceNumber,
217 result.m_resultVec[j].m_label.c_str(),
218 result.m_resultVec[j].m_normalisedVal,
219 result.m_threshold);
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100220 }
221 }
222 }
223
224 return true;
225 }
226
alexander3c798932021-03-26 21:42:19 +0000227} /* namespace app */
Richard Burtone6398cd2022-04-13 11:58:28 +0100228} /* namespace arm */