blob: e97e6b334062df5c26575ca675b17749a7323d24 [file] [log] [blame]
Isabella Gottardi3107aa22022-01-27 16:39:37 +00001/*
2 * Copyright (c) 2022 Arm Limited. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17#include "DetectorPostProcessing.hpp"
Richard Burtoned35a6f2022-02-14 11:55:35 +000018#include "PlatformMath.hpp"
Isabella Gottardi3107aa22022-01-27 16:39:37 +000019
20#include <algorithm>
21#include <cmath>
22
23namespace arm {
24namespace app {
25namespace object_detection {
26
27DetectorPostprocessing::DetectorPostprocessing(
28 const float threshold,
29 const float nms,
30 int numClasses,
31 int topN)
32 : m_threshold(threshold),
33 m_nms(nms),
34 m_numClasses(numClasses),
35 m_topN(topN)
36{}
37
38void DetectorPostprocessing::RunPostProcessing(
39 uint8_t* imgIn,
40 uint32_t imgRows,
41 uint32_t imgCols,
42 TfLiteTensor* modelOutput0,
43 TfLiteTensor* modelOutput1,
44 std::vector<DetectionResult>& resultsOut)
45{
46 /* init postprocessing */
47 Network net {
48 .inputWidth = static_cast<int>(imgCols),
49 .inputHeight = static_cast<int>(imgRows),
50 .numClasses = m_numClasses,
51 .branches = {
52 Branch {
53 .resolution = static_cast<int>(imgCols/32),
54 .numBox = 3,
55 .anchor = anchor1,
56 .modelOutput = modelOutput0->data.int8,
57 .scale = ((TfLiteAffineQuantization*)(modelOutput0->quantization.params))->scale->data[0],
58 .zeroPoint = ((TfLiteAffineQuantization*)(modelOutput0->quantization.params))->zero_point->data[0],
59 .size = modelOutput0->bytes
60 },
61 Branch {
62 .resolution = static_cast<int>(imgCols/16),
63 .numBox = 3,
64 .anchor = anchor2,
65 .modelOutput = modelOutput1->data.int8,
66 .scale = ((TfLiteAffineQuantization*)(modelOutput1->quantization.params))->scale->data[0],
67 .zeroPoint = ((TfLiteAffineQuantization*)(modelOutput1->quantization.params))->zero_point->data[0],
68 .size = modelOutput1->bytes
69 }
70 },
71 .topN = m_topN
72 };
73 /* End init */
74
75 /* Start postprocessing */
76 int originalImageWidth = originalImageSize;
77 int originalImageHeight = originalImageSize;
78
Richard Burtoned35a6f2022-02-14 11:55:35 +000079 std::forward_list<image::Detection> detections;
Isabella Gottardi3107aa22022-01-27 16:39:37 +000080 GetNetworkBoxes(net, originalImageWidth, originalImageHeight, m_threshold, detections);
81
82 /* Do nms */
83 CalculateNMS(detections, net.numClasses, m_nms);
84
85 for (auto& it: detections) {
86 float xMin = it.bbox.x - it.bbox.w / 2.0f;
87 float xMax = it.bbox.x + it.bbox.w / 2.0f;
88 float yMin = it.bbox.y - it.bbox.h / 2.0f;
89 float yMax = it.bbox.y + it.bbox.h / 2.0f;
90
91 if (xMin < 0) {
92 xMin = 0;
93 }
94 if (yMin < 0) {
95 yMin = 0;
96 }
97 if (xMax > originalImageWidth) {
98 xMax = originalImageWidth;
99 }
100 if (yMax > originalImageHeight) {
101 yMax = originalImageHeight;
102 }
103
104 float boxX = xMin;
105 float boxY = yMin;
106 float boxWidth = xMax - xMin;
107 float boxHeight = yMax - yMin;
108
109 for (int j = 0; j < net.numClasses; ++j) {
110 if (it.prob[j] > 0) {
111
112 DetectionResult tmpResult = {};
113 tmpResult.m_normalisedVal = it.prob[j];
114 tmpResult.m_x0 = boxX;
115 tmpResult.m_y0 = boxY;
116 tmpResult.m_w = boxWidth;
117 tmpResult.m_h = boxHeight;
118
119 resultsOut.push_back(tmpResult);
120
121 /* TODO: Instead of draw on the image, return the boxes and draw on the LCD */
122 DrawBoxOnImage(imgIn, originalImageWidth, originalImageHeight, boxX, boxY, boxWidth, boxHeight);;
123 }
124 }
125 }
126}
127
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000128
Richard Burtoned35a6f2022-02-14 11:55:35 +0000129void DetectorPostprocessing::InsertTopNDetections(std::forward_list<image::Detection>& detections, image::Detection& det)
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000130{
Richard Burtoned35a6f2022-02-14 11:55:35 +0000131 std::forward_list<image::Detection>::iterator it;
132 std::forward_list<image::Detection>::iterator last_it;
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000133 for ( it = detections.begin(); it != detections.end(); ++it ) {
134 if(it->objectness > det.objectness)
135 break;
136 last_it = it;
137 }
138 if(it != detections.begin()) {
139 detections.emplace_after(last_it, det);
140 detections.pop_front();
141 }
142}
143
Richard Burtoned35a6f2022-02-14 11:55:35 +0000144void DetectorPostprocessing::GetNetworkBoxes(Network& net, int imageWidth, int imageHeight, float threshold, std::forward_list<image::Detection>& detections)
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000145{
146 int numClasses = net.numClasses;
147 int num = 0;
Richard Burtoned35a6f2022-02-14 11:55:35 +0000148 auto det_objectness_comparator = [](image::Detection& pa, image::Detection& pb) {
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000149 return pa.objectness < pb.objectness;
150 };
151 for (size_t i = 0; i < net.branches.size(); ++i) {
152 int height = net.branches[i].resolution;
153 int width = net.branches[i].resolution;
154 int channel = net.branches[i].numBox*(5+numClasses);
155
156 for (int h = 0; h < net.branches[i].resolution; h++) {
157 for (int w = 0; w < net.branches[i].resolution; w++) {
158 for (int anc = 0; anc < net.branches[i].numBox; anc++) {
159
160 /* Objectness score */
161 int bbox_obj_offset = h * width * channel + w * channel + anc * (numClasses + 5) + 4;
Richard Burtoned35a6f2022-02-14 11:55:35 +0000162 float objectness = math::MathUtils::SigmoidF32(((float)net.branches[i].modelOutput[bbox_obj_offset] - net.branches[i].zeroPoint) * net.branches[i].scale);
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000163
164 if(objectness > threshold) {
Richard Burtoned35a6f2022-02-14 11:55:35 +0000165 image::Detection det;
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000166 det.objectness = objectness;
167 /* Get bbox prediction data for each anchor, each feature point */
168 int bbox_x_offset = bbox_obj_offset -4;
169 int bbox_y_offset = bbox_x_offset + 1;
170 int bbox_w_offset = bbox_x_offset + 2;
171 int bbox_h_offset = bbox_x_offset + 3;
172 int bbox_scores_offset = bbox_x_offset + 5;
173
174 det.bbox.x = ((float)net.branches[i].modelOutput[bbox_x_offset] - net.branches[i].zeroPoint) * net.branches[i].scale;
175 det.bbox.y = ((float)net.branches[i].modelOutput[bbox_y_offset] - net.branches[i].zeroPoint) * net.branches[i].scale;
176 det.bbox.w = ((float)net.branches[i].modelOutput[bbox_w_offset] - net.branches[i].zeroPoint) * net.branches[i].scale;
177 det.bbox.h = ((float)net.branches[i].modelOutput[bbox_h_offset] - net.branches[i].zeroPoint) * net.branches[i].scale;
178
179
180 float bbox_x, bbox_y;
181
182 /* Eliminate grid sensitivity trick involved in YOLOv4 */
Richard Burtoned35a6f2022-02-14 11:55:35 +0000183 bbox_x = math::MathUtils::SigmoidF32(det.bbox.x);
184 bbox_y = math::MathUtils::SigmoidF32(det.bbox.y);
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000185 det.bbox.x = (bbox_x + w) / width;
186 det.bbox.y = (bbox_y + h) / height;
187
188 det.bbox.w = exp(det.bbox.w) * net.branches[i].anchor[anc*2] / net.inputWidth;
189 det.bbox.h = exp(det.bbox.h) * net.branches[i].anchor[anc*2+1] / net.inputHeight;
190
191 for (int s = 0; s < numClasses; s++) {
Richard Burtoned35a6f2022-02-14 11:55:35 +0000192 float sig = math::MathUtils::SigmoidF32(((float)net.branches[i].modelOutput[bbox_scores_offset + s] - net.branches[i].zeroPoint) * net.branches[i].scale)*objectness;
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000193 det.prob.emplace_back((sig > threshold) ? sig : 0);
194 }
195
196 /* Correct_YOLO_boxes */
197 det.bbox.x *= imageWidth;
198 det.bbox.w *= imageWidth;
199 det.bbox.y *= imageHeight;
200 det.bbox.h *= imageHeight;
201
202 if (num < net.topN || net.topN <=0) {
203 detections.emplace_front(det);
204 num += 1;
205 } else if (num == net.topN) {
206 detections.sort(det_objectness_comparator);
207 InsertTopNDetections(detections,det);
208 num += 1;
209 } else {
210 InsertTopNDetections(detections,det);
211 }
212 }
213 }
214 }
215 }
216 }
217 if(num > net.topN)
218 num -=1;
219}
220
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000221void DetectorPostprocessing::DrawBoxOnImage(uint8_t* imgIn, int imWidth, int imHeight, int boxX,int boxY, int boxWidth, int boxHeight)
222{
223 auto CheckAndFixOffset = [](int im_width,int im_height,int& offset) {
224 if ( (offset) >= im_width*im_height*channelsImageDisplayed) {
225 offset = im_width * im_height * channelsImageDisplayed -1;
226 }
227 else if ( (offset) < 0) {
228 offset = 0;
229 }
230 };
231
232 /* Consistency checks */
233 if (!imgIn) {
234 return;
235 }
236
237 int offset=0;
238 for (int i=0; i < boxWidth; i++) {
239 /* Draw two horizontal lines */
240 for (int line=0; line < 2; line++) {
241 /*top*/
242 offset =(i + (boxY + line)*imWidth + boxX) * channelsImageDisplayed; /* channelsImageDisplayed for rgb or grayscale*/
243 CheckAndFixOffset(imWidth,imHeight,offset);
244 imgIn[offset] = 0xFF;
245 /*bottom*/
246 offset = (i + (boxY + boxHeight - line)*imWidth + boxX) * channelsImageDisplayed;
247 CheckAndFixOffset(imWidth,imHeight,offset);
248 imgIn[offset] = 0xFF;
249 }
250 }
251
252 for (int i=0; i < boxHeight; i++) {
253 /* Draw two vertical lines */
254 for (int line=0; line < 2; line++) {
255 /*left*/
256 offset = ((i + boxY)*imWidth + boxX + line)*channelsImageDisplayed;
257 CheckAndFixOffset(imWidth,imHeight,offset);
258 imgIn[offset] = 0xFF;
259 /*right*/
260 offset = ((i + boxY)*imWidth + boxX + boxWidth - line)*channelsImageDisplayed;
261 CheckAndFixOffset(imWidth,imHeight, offset);
262 imgIn[offset] = 0xFF;
263 }
264 }
265
266}
267
268} /* namespace object_detection */
269} /* namespace app */
270} /* namespace arm */