blob: fb1606a2b2bfc1caed80894ef8a7227e9a31e009 [file] [log] [blame]
Isabella Gottardi3107aa22022-01-27 16:39:37 +00001/*
2 * Copyright (c) 2022 Arm Limited. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17#include "DetectorPostProcessing.hpp"
Richard Burtoned35a6f2022-02-14 11:55:35 +000018#include "PlatformMath.hpp"
Isabella Gottardi3107aa22022-01-27 16:39:37 +000019
Isabella Gottardi3107aa22022-01-27 16:39:37 +000020#include <cmath>
21
22namespace arm {
23namespace app {
Isabella Gottardi3107aa22022-01-27 16:39:37 +000024
Richard Burtonef904972022-04-27 17:24:36 +010025 DetectorPostProcess::DetectorPostProcess(
26 TfLiteTensor* modelOutput0,
27 TfLiteTensor* modelOutput1,
28 std::vector<object_detection::DetectionResult>& results,
29 int inputImgRows,
30 int inputImgCols,
31 const float threshold,
32 const float nms,
33 int numClasses,
34 int topN)
35 : m_outputTensor0{modelOutput0},
36 m_outputTensor1{modelOutput1},
37 m_results{results},
38 m_inputImgRows{inputImgRows},
39 m_inputImgCols{inputImgCols},
40 m_threshold(threshold),
41 m_nms(nms),
42 m_numClasses(numClasses),
43 m_topN(topN)
Isabella Gottardi3107aa22022-01-27 16:39:37 +000044{
Richard Burtonef904972022-04-27 17:24:36 +010045 /* Init PostProcessing */
46 this->m_net =
47 object_detection::Network {
48 .inputWidth = inputImgCols,
49 .inputHeight = inputImgRows,
50 .numClasses = numClasses,
Isabella Gottardi3107aa22022-01-27 16:39:37 +000051 .branches = {
Richard Burtonef904972022-04-27 17:24:36 +010052 object_detection::Branch {
53 .resolution = inputImgCols/32,
54 .numBox = 3,
55 .anchor = anchor1,
56 .modelOutput = this->m_outputTensor0->data.int8,
57 .scale = (static_cast<TfLiteAffineQuantization*>(
58 this->m_outputTensor0->quantization.params))->scale->data[0],
59 .zeroPoint = (static_cast<TfLiteAffineQuantization*>(
60 this->m_outputTensor0->quantization.params))->zero_point->data[0],
61 .size = this->m_outputTensor0->bytes
Isabella Gottardi3107aa22022-01-27 16:39:37 +000062 },
Richard Burtonef904972022-04-27 17:24:36 +010063 object_detection::Branch {
64 .resolution = inputImgCols/16,
65 .numBox = 3,
66 .anchor = anchor2,
67 .modelOutput = this->m_outputTensor1->data.int8,
68 .scale = (static_cast<TfLiteAffineQuantization*>(
69 this->m_outputTensor1->quantization.params))->scale->data[0],
70 .zeroPoint = (static_cast<TfLiteAffineQuantization*>(
71 this->m_outputTensor1->quantization.params))->zero_point->data[0],
72 .size = this->m_outputTensor1->bytes
Isabella Gottardi3107aa22022-01-27 16:39:37 +000073 }
74 },
75 .topN = m_topN
76 };
77 /* End init */
Richard Burtonef904972022-04-27 17:24:36 +010078}
Isabella Gottardi3107aa22022-01-27 16:39:37 +000079
Richard Burtonef904972022-04-27 17:24:36 +010080bool DetectorPostProcess::DoPostProcess()
81{
Isabella Gottardi3107aa22022-01-27 16:39:37 +000082 /* Start postprocessing */
83 int originalImageWidth = originalImageSize;
84 int originalImageHeight = originalImageSize;
85
Richard Burtoned35a6f2022-02-14 11:55:35 +000086 std::forward_list<image::Detection> detections;
Richard Burtonef904972022-04-27 17:24:36 +010087 GetNetworkBoxes(this->m_net, originalImageWidth, originalImageHeight, m_threshold, detections);
Isabella Gottardi3107aa22022-01-27 16:39:37 +000088
89 /* Do nms */
Richard Burtonef904972022-04-27 17:24:36 +010090 CalculateNMS(detections, this->m_net.numClasses, m_nms);
Isabella Gottardi3107aa22022-01-27 16:39:37 +000091
92 for (auto& it: detections) {
93 float xMin = it.bbox.x - it.bbox.w / 2.0f;
94 float xMax = it.bbox.x + it.bbox.w / 2.0f;
95 float yMin = it.bbox.y - it.bbox.h / 2.0f;
96 float yMax = it.bbox.y + it.bbox.h / 2.0f;
97
98 if (xMin < 0) {
99 xMin = 0;
100 }
101 if (yMin < 0) {
102 yMin = 0;
103 }
104 if (xMax > originalImageWidth) {
105 xMax = originalImageWidth;
106 }
107 if (yMax > originalImageHeight) {
108 yMax = originalImageHeight;
109 }
110
111 float boxX = xMin;
112 float boxY = yMin;
113 float boxWidth = xMax - xMin;
114 float boxHeight = yMax - yMin;
115
Richard Burtonef904972022-04-27 17:24:36 +0100116 for (int j = 0; j < this->m_net.numClasses; ++j) {
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000117 if (it.prob[j] > 0) {
118
Richard Burtonef904972022-04-27 17:24:36 +0100119 object_detection::DetectionResult tmpResult = {};
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000120 tmpResult.m_normalisedVal = it.prob[j];
121 tmpResult.m_x0 = boxX;
122 tmpResult.m_y0 = boxY;
123 tmpResult.m_w = boxWidth;
124 tmpResult.m_h = boxHeight;
125
Richard Burtonef904972022-04-27 17:24:36 +0100126 this->m_results.push_back(tmpResult);
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000127 }
128 }
129 }
Richard Burtonef904972022-04-27 17:24:36 +0100130 return true;
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000131}
132
Richard Burtonef904972022-04-27 17:24:36 +0100133void DetectorPostProcess::InsertTopNDetections(std::forward_list<image::Detection>& detections, image::Detection& det)
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000134{
Richard Burtoned35a6f2022-02-14 11:55:35 +0000135 std::forward_list<image::Detection>::iterator it;
136 std::forward_list<image::Detection>::iterator last_it;
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000137 for ( it = detections.begin(); it != detections.end(); ++it ) {
138 if(it->objectness > det.objectness)
139 break;
140 last_it = it;
141 }
142 if(it != detections.begin()) {
143 detections.emplace_after(last_it, det);
144 detections.pop_front();
145 }
146}
147
Richard Burtonef904972022-04-27 17:24:36 +0100148void DetectorPostProcess::GetNetworkBoxes(
149 object_detection::Network& net,
150 int imageWidth,
151 int imageHeight,
152 float threshold,
153 std::forward_list<image::Detection>& detections)
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000154{
155 int numClasses = net.numClasses;
156 int num = 0;
Richard Burtoned35a6f2022-02-14 11:55:35 +0000157 auto det_objectness_comparator = [](image::Detection& pa, image::Detection& pb) {
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000158 return pa.objectness < pb.objectness;
159 };
160 for (size_t i = 0; i < net.branches.size(); ++i) {
161 int height = net.branches[i].resolution;
162 int width = net.branches[i].resolution;
163 int channel = net.branches[i].numBox*(5+numClasses);
164
165 for (int h = 0; h < net.branches[i].resolution; h++) {
166 for (int w = 0; w < net.branches[i].resolution; w++) {
167 for (int anc = 0; anc < net.branches[i].numBox; anc++) {
168
169 /* Objectness score */
170 int bbox_obj_offset = h * width * channel + w * channel + anc * (numClasses + 5) + 4;
Richard Burton9c549902022-02-15 16:39:18 +0000171 float objectness = math::MathUtils::SigmoidF32(
172 (static_cast<float>(net.branches[i].modelOutput[bbox_obj_offset])
173 - net.branches[i].zeroPoint
174 ) * net.branches[i].scale);
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000175
176 if(objectness > threshold) {
Richard Burtoned35a6f2022-02-14 11:55:35 +0000177 image::Detection det;
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000178 det.objectness = objectness;
179 /* Get bbox prediction data for each anchor, each feature point */
180 int bbox_x_offset = bbox_obj_offset -4;
181 int bbox_y_offset = bbox_x_offset + 1;
182 int bbox_w_offset = bbox_x_offset + 2;
183 int bbox_h_offset = bbox_x_offset + 3;
184 int bbox_scores_offset = bbox_x_offset + 5;
185
Richard Burtonef904972022-04-27 17:24:36 +0100186 det.bbox.x = (static_cast<float>(net.branches[i].modelOutput[bbox_x_offset])
187 - net.branches[i].zeroPoint) * net.branches[i].scale;
188 det.bbox.y = (static_cast<float>(net.branches[i].modelOutput[bbox_y_offset])
189 - net.branches[i].zeroPoint) * net.branches[i].scale;
190 det.bbox.w = (static_cast<float>(net.branches[i].modelOutput[bbox_w_offset])
191 - net.branches[i].zeroPoint) * net.branches[i].scale;
192 det.bbox.h = (static_cast<float>(net.branches[i].modelOutput[bbox_h_offset])
193 - net.branches[i].zeroPoint) * net.branches[i].scale;
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000194
195 float bbox_x, bbox_y;
196
197 /* Eliminate grid sensitivity trick involved in YOLOv4 */
Richard Burtoned35a6f2022-02-14 11:55:35 +0000198 bbox_x = math::MathUtils::SigmoidF32(det.bbox.x);
199 bbox_y = math::MathUtils::SigmoidF32(det.bbox.y);
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000200 det.bbox.x = (bbox_x + w) / width;
201 det.bbox.y = (bbox_y + h) / height;
202
Richard Burton9c549902022-02-15 16:39:18 +0000203 det.bbox.w = std::exp(det.bbox.w) * net.branches[i].anchor[anc*2] / net.inputWidth;
204 det.bbox.h = std::exp(det.bbox.h) * net.branches[i].anchor[anc*2+1] / net.inputHeight;
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000205
206 for (int s = 0; s < numClasses; s++) {
Richard Burton9c549902022-02-15 16:39:18 +0000207 float sig = math::MathUtils::SigmoidF32(
208 (static_cast<float>(net.branches[i].modelOutput[bbox_scores_offset + s]) -
209 net.branches[i].zeroPoint) * net.branches[i].scale
210 ) * objectness;
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000211 det.prob.emplace_back((sig > threshold) ? sig : 0);
212 }
213
214 /* Correct_YOLO_boxes */
215 det.bbox.x *= imageWidth;
216 det.bbox.w *= imageWidth;
217 det.bbox.y *= imageHeight;
218 det.bbox.h *= imageHeight;
219
220 if (num < net.topN || net.topN <=0) {
221 detections.emplace_front(det);
222 num += 1;
223 } else if (num == net.topN) {
224 detections.sort(det_objectness_comparator);
225 InsertTopNDetections(detections,det);
226 num += 1;
227 } else {
228 InsertTopNDetections(detections,det);
229 }
230 }
231 }
232 }
233 }
234 }
235 if(num > net.topN)
236 num -=1;
237}
238
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000239} /* namespace app */
240} /* namespace arm */