blob: 1a20db574466ba2662986027db82a01e4abadbfb [file] [log] [blame]
Michael Levit06fcf752022-01-12 11:53:46 +02001/*
Richard Burtoncefc7e12023-12-06 17:13:10 +00002 * SPDX-FileCopyrightText: Copyright 2022, 2024 Arm Limited and/or its affiliates
Kshitij Sisodia2ea46232022-12-19 16:37:33 +00003 * <open-source-office@arm.com> SPDX-License-Identifier: Apache-2.0
Michael Levit06fcf752022-01-12 11:53:46 +02004 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17#include "UseCaseHandler.hpp"
Michael Levit06fcf752022-01-12 11:53:46 +020018#include "DetectorPostProcessing.hpp"
Richard Burtonef904972022-04-27 17:24:36 +010019#include "DetectorPreProcessing.hpp"
Kshitij Sisodia2ea46232022-12-19 16:37:33 +000020#include "InputFiles.hpp"
21#include "UseCaseCommonUtils.hpp"
22#include "YoloFastestModel.hpp"
Michael Levit06fcf752022-01-12 11:53:46 +020023#include "hal.h"
alexander31ae9f02022-02-10 16:15:54 +000024#include "log_macros.h"
Michael Levit06fcf752022-01-12 11:53:46 +020025
alexander31ae9f02022-02-10 16:15:54 +000026#include <cinttypes>
Michael Levit06fcf752022-01-12 11:53:46 +020027
Michael Levit06fcf752022-01-12 11:53:46 +020028namespace arm {
29namespace app {
30
Richard Burtoncefc7e12023-12-06 17:13:10 +000031#if VSI_ENABLED
32#include "video_drv.h" /* Video Driver API. */
33
34 /**
35 * @brief Draws a box in the image using the object detection result object.
36 *
37 * @param[out] imageData Pointer to the start of the image.
38 * @param[in] width Image width.
39 * @param[in] height Image height.
40 * @param[in] result Object detection result.
41 */
42 static void DrawBox(uint8_t* imageData,
43 const uint32_t width,
44 const uint32_t height,
45 const OdResults& result)
46 {
47 UNUSED(height);
48 const auto x = result.m_x0;
49 const auto y = result.m_y0;
50 const auto w = result.m_w;
51 const auto h = result.m_h;
52
53 const uint32_t step = width * 3;
54 uint8_t* const imStart = imageData + (y * step) + (x * 3);
55
56 uint8_t* dst_0 = imStart;
57 uint8_t* dst_1 = imStart + (h * step);
58
59 for (uint32_t i = 0; i < static_cast<uint32_t>(w); ++i) {
60 *dst_0 = 255;
61 *dst_1 = 255;
62
63 dst_0 += 3;
64 dst_1 += 3;
65 }
66
67 dst_0 = imStart;
68 dst_1 = imStart + (w * 3);
69
70 for (uint32_t j = 0; j < static_cast<uint32_t>(h); ++j) {
71 *dst_0 = 255;
72 *dst_1 = 255;
73
74 dst_0 += step;
75 dst_1 += step;
76 }
77 }
78
79 void DrawDetectionBoxesVsi(uint8_t* image,
80 const uint32_t imageWidth,
81 const uint32_t imageHeight,
82 const std::vector<OdResults>& results)
83 {
84 for (const auto& result : results) {
85 DrawBox(image, imageWidth, imageHeight, result);
86 printf("Detection :: [%d" ", %d"
87 ", %d" ", %d" "]\n",
88 result.m_x0,
89 result.m_y0,
90 result.m_w,
91 result.m_h);
92 }
93 }
94
95 /* Object detection VSI inference handler. */
96 bool ObjectDetectionHandlerVsi(ApplicationContext& ctx)
97 {
98 /* Image buffer. */
99 static uint8_t ImageBuf[IMAGE_DATA_SIZE];
100 static uint8_t ImageOut[IMAGE_DATA_SIZE];
101
102 /* Model object creation and initialisation. */
103 auto& model = ctx.Get<Model&>("model");
104
105 TfLiteTensor* inputTensor = model.GetInputTensor(0);
106 TfLiteTensor* outputTensor0 = model.GetOutputTensor(0);
107 TfLiteTensor* outputTensor1 = model.GetOutputTensor(1);
108
109 if (!inputTensor->dims) {
110 printf_err("Invalid input tensor dims\n");
111 return false;
112 } else if (inputTensor->dims->size < 3) {
113 printf_err("Input tensor dimension should be >= 3\n");
114 return false;
115 }
116
117 TfLiteIntArray* inputShape = model.GetInputShape(0);
118 const int inputImgCols = inputShape->data[arm::app::YoloFastestModel::ms_inputColsIdx];
119 const int inputImgRows = inputShape->data[arm::app::YoloFastestModel::ms_inputRowsIdx];
120
121 /* Set up pre- and post-processing. */
122 arm::app::DetectorPreProcess preProcess =
123 arm::app::DetectorPreProcess(inputTensor, true, model.IsDataSigned());
124
125 std::vector<arm::app::OdResults> results;
126 const arm::app::object_detection::PostProcessParams postProcessParams{
127 inputImgRows,
128 inputImgCols,
129 arm::app::object_detection::originalImageSize,
130 arm::app::object_detection::anchor1,
131 arm::app::object_detection::anchor2};
132 arm::app::DetectorPostProcess postProcess =
133 arm::app::DetectorPostProcess(outputTensor0, outputTensor1, results, postProcessParams);
134
135 const size_t imgSz = inputTensor->bytes < IMAGE_DATA_SIZE ?
136 inputTensor->bytes : IMAGE_DATA_SIZE;
137
138 if (sizeof(ImageBuf) < imgSz) {
139 printf_err("Image buffer is insufficient\n");
140 return false;
141 }
142
143 /* Configure Input Video. */
144 if (VideoDrv_Configure(VIDEO_DRV_IN0,
145 arm::app::object_detection::originalImageSize,
146 arm::app::object_detection::originalImageSize,
147 COLOR_RGB888, 24U) != VIDEO_DRV_OK) {
148 printf_err("Failed to configure video input\n");
149 return false;
150 }
151
152 /* Set Input Video buffer. */
153 if (VideoDrv_SetBuf(VIDEO_DRV_IN0, ImageBuf, IMAGE_DATA_SIZE) != VIDEO_DRV_OK) {
154 printf_err("Failed to set buffer for video input\n");
155 return false;
156 }
157
158 /* Set Output Video file (only when using AVH - default: Display) */
159 // if (VideoDrv_SetFile(VIDEO_DRV_OUT0, "output_image.png") != VIDEO_DRV_OK) {
160 // printf_err("Failed to set filename for video output\n");
161 // return 1;
162 // }
163 /* Configure Output Video. */
164 if (VideoDrv_Configure(VIDEO_DRV_OUT0,
165 arm::app::object_detection::originalImageSize,
166 arm::app::object_detection::originalImageSize,
167 COLOR_RGB888, 24U) != VIDEO_DRV_OK) {
168 printf_err("Failed to configure video output\n");
169 return false;
170 }
171
172 /* Set Output Video buffer. */
173 if (VideoDrv_SetBuf(VIDEO_DRV_OUT0, ImageOut, IMAGE_DATA_SIZE) != VIDEO_DRV_OK) {
174 printf_err("Failed to set buffer for video output\n");
175 return false;
176 }
177
178 auto imgCount = ctx.Get<uint32_t>("imgIndex");
179 void* imgFrame = nullptr;
180 void* outFrame = nullptr;
181
182 while (true) {
183#if VSI_IMAGE_INPUT
184 if (VideoDrv_SetFile(VIDEO_DRV_IN0, GetFilePath(imgCount)) != VIDEO_DRV_OK) {
185 printf_err("Failed to set filename for video input\n");
186 return false;
187 }
188#endif
189
190 VideoDrv_Status_t status;
191
192 results.clear();
193
194 /* Start video capture (single frame). */
195 if (VideoDrv_StreamStart(VIDEO_DRV_IN0, VIDEO_DRV_MODE_SINGLE) != VIDEO_DRV_OK) {
196 printf_err("Failed to start video capture\n");
197 return false;
198 }
199
200 /* Wait for video input frame. */
201 do {
202 status = VideoDrv_GetStatus(VIDEO_DRV_IN0);
203 } while (status.buf_empty != 0U);
204
205 /* Get input video frame buffer. */
206 imgFrame = VideoDrv_GetFrameBuf(VIDEO_DRV_IN0);
207
208 /* Run the pre-processing, inference and post-processing. */
209 if (!preProcess.DoPreProcess(imgFrame, imgSz)) {
210 printf_err("Pre-processing failed.\n");
211 return false;
212 }
213
214 /* Run inference over this image. */
215 printf("\rImage %" PRIu32 "; ", ++imgCount);
216
217 if (!model.RunInference()) {
218 printf_err("Inference failed.\n");
219 return false;
220 }
221
222 if (!postProcess.DoPostProcess()) {
223 printf_err("Post-processing failed.\n");
224 return false;
225 }
226
227 /* Release input frame. */
228 VideoDrv_ReleaseFrame(VIDEO_DRV_IN0);
229
230 arm::app::DrawDetectionBoxesVsi(static_cast<uint8_t*>(imgFrame), inputImgCols, inputImgRows, results);
231
232 /* Get output video frame buffer. */
233 outFrame = VideoDrv_GetFrameBuf(VIDEO_DRV_OUT0);
234
235 /* Copy image frame with detection boxes to output frame buffer. */
236 memcpy(outFrame, imgFrame, IMAGE_DATA_SIZE);
237
238 /* Release output frame. */
239 VideoDrv_ReleaseFrame(VIDEO_DRV_OUT0);
240
241 /* Start video output (single frame). */
242 VideoDrv_StreamStart(VIDEO_DRV_OUT0, VIDEO_DRV_MODE_SINGLE);
243
244 /* Check for end of stream (when using AVH with file as Video input). */
245 if (status.eos != 0U) {
246 while (VideoDrv_GetStatus(VIDEO_DRV_OUT0).buf_empty == 0U);
247 break;
248 }
249 }
250
251 IncrementAppCtxIfmIdx(ctx, "imgIndex");
252
253 /* De-initialize Video Interface. */
254 //VideoDrv_Uninitialize();
255 return true;
256 }
257
258#endif
259
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000260 /**
261 * @brief Presents inference results along using the data presentation
262 * object.
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000263 * @param[in] results Vector of detection results to be displayed.
264 * @return true if successful, false otherwise.
265 **/
Kshitij Sisodia2ea46232022-12-19 16:37:33 +0000266 static bool
267 PresentInferenceResult(const std::vector<object_detection::DetectionResult>& results);
Richard Burton9c549902022-02-15 16:39:18 +0000268
269 /**
270 * @brief Draw boxes directly on the LCD for all detected objects.
Richard Burton9c549902022-02-15 16:39:18 +0000271 * @param[in] results Vector of detection results to be displayed.
272 * @param[in] imageStartX X coordinate where the image starts on the LCD.
273 * @param[in] imageStartY Y coordinate where the image starts on the LCD.
274 * @param[in] imgDownscaleFactor How much image has been downscaled on LCD.
275 **/
Kshitij Sisodia2ea46232022-12-19 16:37:33 +0000276 static void DrawDetectionBoxes(const std::vector<object_detection::DetectionResult>& results,
277 uint32_t imgStartX,
278 uint32_t imgStartY,
279 uint32_t imgDownscaleFactor);
Michael Levit06fcf752022-01-12 11:53:46 +0200280
Richard Burtonef904972022-04-27 17:24:36 +0100281 /* Object detection inference handler. */
Michael Levit06fcf752022-01-12 11:53:46 +0200282 bool ObjectDetectionHandler(ApplicationContext& ctx, uint32_t imgIndex, bool runAll)
283 {
Michael Levit06fcf752022-01-12 11:53:46 +0200284 auto& profiler = ctx.Get<Profiler&>("profiler");
285
286 constexpr uint32_t dataPsnImgDownscaleFactor = 1;
Kshitij Sisodia2ea46232022-12-19 16:37:33 +0000287 constexpr uint32_t dataPsnImgStartX = 10;
288 constexpr uint32_t dataPsnImgStartY = 35;
Michael Levit06fcf752022-01-12 11:53:46 +0200289
Richard Burton71f282e2022-12-01 12:31:23 +0000290 constexpr uint32_t dataPsnTxtInfStartX = 20;
291 constexpr uint32_t dataPsnTxtInfStartY = 28;
Michael Levit06fcf752022-01-12 11:53:46 +0200292
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100293 hal_lcd_clear(COLOR_BLACK);
Michael Levit06fcf752022-01-12 11:53:46 +0200294
295 auto& model = ctx.Get<Model&>("model");
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000296
Michael Levit06fcf752022-01-12 11:53:46 +0200297 /* If the request has a valid size, set the image index. */
298 if (imgIndex < NUMBER_OF_FILES) {
299 if (!SetAppCtxIfmIdx(ctx, imgIndex, "imgIndex")) {
300 return false;
301 }
302 }
303 if (!model.IsInited()) {
304 printf_err("Model is not initialised! Terminating processing.\n");
305 return false;
306 }
307
Richard Burtonef904972022-04-27 17:24:36 +0100308 auto initialImgIdx = ctx.Get<uint32_t>("imgIndex");
Michael Levit06fcf752022-01-12 11:53:46 +0200309
Kshitij Sisodia2ea46232022-12-19 16:37:33 +0000310 TfLiteTensor* inputTensor = model.GetInputTensor(0);
Richard Burtonef904972022-04-27 17:24:36 +0100311 TfLiteTensor* outputTensor0 = model.GetOutputTensor(0);
312 TfLiteTensor* outputTensor1 = model.GetOutputTensor(1);
Michael Levit06fcf752022-01-12 11:53:46 +0200313
314 if (!inputTensor->dims) {
315 printf_err("Invalid input tensor dims\n");
316 return false;
317 } else if (inputTensor->dims->size < 3) {
318 printf_err("Input tensor dimension should be >= 3\n");
319 return false;
320 }
321
322 TfLiteIntArray* inputShape = model.GetInputShape(0);
323
Richard Burtonef904972022-04-27 17:24:36 +0100324 const int inputImgCols = inputShape->data[YoloFastestModel::ms_inputColsIdx];
325 const int inputImgRows = inputShape->data[YoloFastestModel::ms_inputRowsIdx];
Michael Levit06fcf752022-01-12 11:53:46 +0200326
Richard Burtonef904972022-04-27 17:24:36 +0100327 /* Set up pre and post-processing. */
328 DetectorPreProcess preProcess = DetectorPreProcess(inputTensor, true, model.IsDataSigned());
Michael Levit06fcf752022-01-12 11:53:46 +0200329
Richard Burtonef904972022-04-27 17:24:36 +0100330 std::vector<object_detection::DetectionResult> results;
Kshitij Sisodia2ea46232022-12-19 16:37:33 +0000331 const object_detection::PostProcessParams postProcessParams{
332 inputImgRows,
333 inputImgCols,
334 object_detection::originalImageSize,
335 object_detection::anchor1,
336 object_detection::anchor2};
337 DetectorPostProcess postProcess =
338 DetectorPostProcess(outputTensor0, outputTensor1, results, postProcessParams);
Michael Levit06fcf752022-01-12 11:53:46 +0200339 do {
Matthew Sloyan0bc74e92022-05-10 13:21:01 +0100340 /* Ensure there are no results leftover from previous inference when running all. */
341 results.clear();
342
Michael Levit06fcf752022-01-12 11:53:46 +0200343 /* Strings for presentation/logging. */
344 std::string str_inf{"Running inference... "};
345
Kshitij Sisodia2ea46232022-12-19 16:37:33 +0000346 const uint8_t* currImage = GetImgArray(ctx.Get<uint32_t>("imgIndex"));
Michael Levit06fcf752022-01-12 11:53:46 +0200347
Richard Burtonef904972022-04-27 17:24:36 +0100348 auto dstPtr = static_cast<uint8_t*>(inputTensor->data.uint8);
Kshitij Sisodia2ea46232022-12-19 16:37:33 +0000349 const size_t copySz =
350 inputTensor->bytes < IMAGE_DATA_SIZE ? inputTensor->bytes : IMAGE_DATA_SIZE;
Michael Levit06fcf752022-01-12 11:53:46 +0200351
Richard Burtonef904972022-04-27 17:24:36 +0100352 /* Run the pre-processing, inference and post-processing. */
353 if (!preProcess.DoPreProcess(currImage, copySz)) {
354 printf_err("Pre-processing failed.");
355 return false;
356 }
Michael Levit06fcf752022-01-12 11:53:46 +0200357
Isabella Gottardie76a6912022-02-16 10:42:32 +0000358 /* Display image on the LCD. */
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100359 hal_lcd_display_image(
Liam Barry213a5432022-05-09 17:06:19 +0100360 (arm::app::object_detection::channelsImageDisplayed == 3) ? currImage : dstPtr,
361 inputImgCols,
362 inputImgRows,
363 arm::app::object_detection::channelsImageDisplayed,
364 dataPsnImgStartX,
365 dataPsnImgStartY,
366 dataPsnImgDownscaleFactor);
Michael Levit06fcf752022-01-12 11:53:46 +0200367
Michael Levit06fcf752022-01-12 11:53:46 +0200368 /* Display message on the LCD - inference running. */
Kshitij Sisodia2ea46232022-12-19 16:37:33 +0000369 hal_lcd_display_text(
370 str_inf.c_str(), str_inf.size(), dataPsnTxtInfStartX, dataPsnTxtInfStartY, false);
Michael Levit06fcf752022-01-12 11:53:46 +0200371
372 /* Run inference over this image. */
Kshitij Sisodia2ea46232022-12-19 16:37:33 +0000373 info("Running inference on image %" PRIu32 " => %s\n",
374 ctx.Get<uint32_t>("imgIndex"),
375 GetFilename(ctx.Get<uint32_t>("imgIndex")));
Michael Levit06fcf752022-01-12 11:53:46 +0200376
377 if (!RunInference(model, profiler)) {
Richard Burtonef904972022-04-27 17:24:36 +0100378 printf_err("Inference failed.");
379 return false;
380 }
381
382 if (!postProcess.DoPostProcess()) {
383 printf_err("Post-processing failed.");
Michael Levit06fcf752022-01-12 11:53:46 +0200384 return false;
385 }
386
387 /* Erase. */
388 str_inf = std::string(str_inf.size(), ' ');
Kshitij Sisodia2ea46232022-12-19 16:37:33 +0000389 hal_lcd_display_text(
390 str_inf.c_str(), str_inf.size(), dataPsnTxtInfStartX, dataPsnTxtInfStartY, false);
Michael Levit06fcf752022-01-12 11:53:46 +0200391
Richard Burton9c549902022-02-15 16:39:18 +0000392 /* Draw boxes. */
Kshitij Sisodia2ea46232022-12-19 16:37:33 +0000393 DrawDetectionBoxes(
394 results, dataPsnImgStartX, dataPsnImgStartY, dataPsnImgDownscaleFactor);
Michael Levit06fcf752022-01-12 11:53:46 +0200395
Michael Levit06fcf752022-01-12 11:53:46 +0200396#if VERIFY_TEST_OUTPUT
Richard Burtonef904972022-04-27 17:24:36 +0100397 DumpTensor(modelOutput0);
398 DumpTensor(modelOutput1);
Michael Levit06fcf752022-01-12 11:53:46 +0200399#endif /* VERIFY_TEST_OUTPUT */
400
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100401 if (!PresentInferenceResult(results)) {
Michael Levit06fcf752022-01-12 11:53:46 +0200402 return false;
403 }
404
405 profiler.PrintProfilingResult();
406
Kshitij Sisodia2ea46232022-12-19 16:37:33 +0000407 IncrementAppCtxIfmIdx(ctx, "imgIndex");
Michael Levit06fcf752022-01-12 11:53:46 +0200408
Richard Burtonef904972022-04-27 17:24:36 +0100409 } while (runAll && ctx.Get<uint32_t>("imgIndex") != initialImgIdx);
Michael Levit06fcf752022-01-12 11:53:46 +0200410
411 return true;
412 }
413
Kshitij Sisodia2ea46232022-12-19 16:37:33 +0000414 static bool
415 PresentInferenceResult(const std::vector<object_detection::DetectionResult>& results)
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000416 {
Kshitij Sisodia68fdd112022-04-06 13:03:20 +0100417 hal_lcd_set_text_color(COLOR_GREEN);
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000418
419 /* If profiling is enabled, and the time is valid. */
420 info("Final results:\n");
421 info("Total number of inferences: 1\n");
422
423 for (uint32_t i = 0; i < results.size(); ++i) {
Kshitij Sisodia2ea46232022-12-19 16:37:33 +0000424 info("%" PRIu32 ") (%f) -> %s {x=%d,y=%d,w=%d,h=%d}\n",
425 i,
426 results[i].m_normalisedVal,
427 "Detection box:",
428 results[i].m_x0,
429 results[i].m_y0,
430 results[i].m_w,
431 results[i].m_h);
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000432 }
433
434 return true;
435 }
436
Richard Burtonef904972022-04-27 17:24:36 +0100437 static void DrawDetectionBoxes(const std::vector<object_detection::DetectionResult>& results,
Richard Burton9c549902022-02-15 16:39:18 +0000438 uint32_t imgStartX,
439 uint32_t imgStartY,
440 uint32_t imgDownscaleFactor)
441 {
442 uint32_t lineThickness = 1;
443
Kshitij Sisodia2ea46232022-12-19 16:37:33 +0000444 for (const auto& result : results) {
Richard Burton9c549902022-02-15 16:39:18 +0000445 /* Top line. */
Kshitij Sisodia2ea46232022-12-19 16:37:33 +0000446 hal_lcd_display_box(imgStartX + result.m_x0 / imgDownscaleFactor,
447 imgStartY + result.m_y0 / imgDownscaleFactor,
448 result.m_w / imgDownscaleFactor,
449 lineThickness,
450 COLOR_GREEN);
Richard Burton9c549902022-02-15 16:39:18 +0000451 /* Bot line. */
Kshitij Sisodia2ea46232022-12-19 16:37:33 +0000452 hal_lcd_display_box(imgStartX + result.m_x0 / imgDownscaleFactor,
453 imgStartY + (result.m_y0 + result.m_h) / imgDownscaleFactor -
454 lineThickness,
455 result.m_w / imgDownscaleFactor,
456 lineThickness,
457 COLOR_GREEN);
Richard Burton9c549902022-02-15 16:39:18 +0000458
459 /* Left line. */
Kshitij Sisodia2ea46232022-12-19 16:37:33 +0000460 hal_lcd_display_box(imgStartX + result.m_x0 / imgDownscaleFactor,
461 imgStartY + result.m_y0 / imgDownscaleFactor,
462 lineThickness,
463 result.m_h / imgDownscaleFactor,
464 COLOR_GREEN);
Richard Burton9c549902022-02-15 16:39:18 +0000465 /* Right line. */
Kshitij Sisodia2ea46232022-12-19 16:37:33 +0000466 hal_lcd_display_box(imgStartX + (result.m_x0 + result.m_w) / imgDownscaleFactor -
467 lineThickness,
468 imgStartY + result.m_y0 / imgDownscaleFactor,
469 lineThickness,
470 result.m_h / imgDownscaleFactor,
471 COLOR_GREEN);
Richard Burton9c549902022-02-15 16:39:18 +0000472 }
473 }
474
Michael Levit06fcf752022-01-12 11:53:46 +0200475} /* namespace app */
476} /* namespace arm */