blob: 60b463f20e07f1ff8e0a180cd97c88080204d5c9 [file] [log] [blame]
Isabella Gottardi3107aa22022-01-27 16:39:37 +00001/*
Richard Burtonf32a86a2022-11-15 11:46:11 +00002 * SPDX-FileCopyrightText: Copyright 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
Isabella Gottardi3107aa22022-01-27 16:39:37 +00003 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17#include "DetectorPostProcessing.hpp"
Richard Burtoned35a6f2022-02-14 11:55:35 +000018#include "PlatformMath.hpp"
Isabella Gottardi3107aa22022-01-27 16:39:37 +000019
Isabella Gottardi3107aa22022-01-27 16:39:37 +000020#include <cmath>
21
22namespace arm {
23namespace app {
Isabella Gottardi3107aa22022-01-27 16:39:37 +000024
Richard Burtonef904972022-04-27 17:24:36 +010025 DetectorPostProcess::DetectorPostProcess(
26 TfLiteTensor* modelOutput0,
27 TfLiteTensor* modelOutput1,
28 std::vector<object_detection::DetectionResult>& results,
Richard Burton6f6df092022-05-17 12:52:50 +010029 const object_detection::PostProcessParams& postProcessParams)
Richard Burtonef904972022-04-27 17:24:36 +010030 : m_outputTensor0{modelOutput0},
31 m_outputTensor1{modelOutput1},
32 m_results{results},
Richard Burton6f6df092022-05-17 12:52:50 +010033 m_postProcessParams{postProcessParams}
Isabella Gottardi3107aa22022-01-27 16:39:37 +000034{
Richard Burtonef904972022-04-27 17:24:36 +010035 /* Init PostProcessing */
Liam Barry213a5432022-05-09 17:06:19 +010036 this->m_net = object_detection::Network{
Richard Burton6f6df092022-05-17 12:52:50 +010037 .inputWidth = postProcessParams.inputImgCols,
38 .inputHeight = postProcessParams.inputImgRows,
39 .numClasses = postProcessParams.numClasses,
Liam Barry213a5432022-05-09 17:06:19 +010040 .branches =
Richard Burton6f6df092022-05-17 12:52:50 +010041 {object_detection::Branch{.resolution = postProcessParams.inputImgCols / 32,
Liam Barry213a5432022-05-09 17:06:19 +010042 .numBox = 3,
Richard Burton6f6df092022-05-17 12:52:50 +010043 .anchor = postProcessParams.anchor1,
Liam Barry213a5432022-05-09 17:06:19 +010044 .modelOutput = this->m_outputTensor0->data.int8,
45 .scale = (static_cast<TfLiteAffineQuantization*>(
46 this->m_outputTensor0->quantization.params))
47 ->scale->data[0],
48 .zeroPoint = (static_cast<TfLiteAffineQuantization*>(
49 this->m_outputTensor0->quantization.params))
50 ->zero_point->data[0],
51 .size = this->m_outputTensor0->bytes},
Richard Burton6f6df092022-05-17 12:52:50 +010052 object_detection::Branch{.resolution = postProcessParams.inputImgCols / 16,
Liam Barry213a5432022-05-09 17:06:19 +010053 .numBox = 3,
Richard Burton6f6df092022-05-17 12:52:50 +010054 .anchor = postProcessParams.anchor2,
Liam Barry213a5432022-05-09 17:06:19 +010055 .modelOutput = this->m_outputTensor1->data.int8,
56 .scale = (static_cast<TfLiteAffineQuantization*>(
57 this->m_outputTensor1->quantization.params))
58 ->scale->data[0],
59 .zeroPoint = (static_cast<TfLiteAffineQuantization*>(
60 this->m_outputTensor1->quantization.params))
61 ->zero_point->data[0],
62 .size = this->m_outputTensor1->bytes}},
Richard Burton6f6df092022-05-17 12:52:50 +010063 .topN = postProcessParams.topN};
Isabella Gottardi3107aa22022-01-27 16:39:37 +000064 /* End init */
Richard Burtonef904972022-04-27 17:24:36 +010065}
Isabella Gottardi3107aa22022-01-27 16:39:37 +000066
Richard Burtonef904972022-04-27 17:24:36 +010067bool DetectorPostProcess::DoPostProcess()
68{
Isabella Gottardi3107aa22022-01-27 16:39:37 +000069 /* Start postprocessing */
Richard Burton6f6df092022-05-17 12:52:50 +010070 int originalImageWidth = m_postProcessParams.originalImageSize;
71 int originalImageHeight = m_postProcessParams.originalImageSize;
Isabella Gottardi3107aa22022-01-27 16:39:37 +000072
Richard Burtoned35a6f2022-02-14 11:55:35 +000073 std::forward_list<image::Detection> detections;
Richard Burton6f6df092022-05-17 12:52:50 +010074 GetNetworkBoxes(this->m_net, originalImageWidth, originalImageHeight, m_postProcessParams.threshold, detections);
Isabella Gottardi3107aa22022-01-27 16:39:37 +000075
76 /* Do nms */
Richard Burton6f6df092022-05-17 12:52:50 +010077 CalculateNMS(detections, this->m_net.numClasses, this->m_postProcessParams.nms);
Isabella Gottardi3107aa22022-01-27 16:39:37 +000078
79 for (auto& it: detections) {
80 float xMin = it.bbox.x - it.bbox.w / 2.0f;
81 float xMax = it.bbox.x + it.bbox.w / 2.0f;
82 float yMin = it.bbox.y - it.bbox.h / 2.0f;
83 float yMax = it.bbox.y + it.bbox.h / 2.0f;
84
85 if (xMin < 0) {
86 xMin = 0;
87 }
88 if (yMin < 0) {
89 yMin = 0;
90 }
91 if (xMax > originalImageWidth) {
92 xMax = originalImageWidth;
93 }
94 if (yMax > originalImageHeight) {
95 yMax = originalImageHeight;
96 }
97
98 float boxX = xMin;
99 float boxY = yMin;
100 float boxWidth = xMax - xMin;
101 float boxHeight = yMax - yMin;
102
Richard Burtonef904972022-04-27 17:24:36 +0100103 for (int j = 0; j < this->m_net.numClasses; ++j) {
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000104 if (it.prob[j] > 0) {
105
Richard Burtonef904972022-04-27 17:24:36 +0100106 object_detection::DetectionResult tmpResult = {};
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000107 tmpResult.m_normalisedVal = it.prob[j];
108 tmpResult.m_x0 = boxX;
109 tmpResult.m_y0 = boxY;
110 tmpResult.m_w = boxWidth;
111 tmpResult.m_h = boxHeight;
112
Richard Burtonef904972022-04-27 17:24:36 +0100113 this->m_results.push_back(tmpResult);
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000114 }
115 }
116 }
Richard Burtonef904972022-04-27 17:24:36 +0100117 return true;
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000118}
119
Richard Burtonef904972022-04-27 17:24:36 +0100120void DetectorPostProcess::InsertTopNDetections(std::forward_list<image::Detection>& detections, image::Detection& det)
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000121{
Richard Burtoned35a6f2022-02-14 11:55:35 +0000122 std::forward_list<image::Detection>::iterator it;
123 std::forward_list<image::Detection>::iterator last_it;
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000124 for ( it = detections.begin(); it != detections.end(); ++it ) {
125 if(it->objectness > det.objectness)
126 break;
127 last_it = it;
128 }
129 if(it != detections.begin()) {
130 detections.emplace_after(last_it, det);
131 detections.pop_front();
132 }
133}
134
Richard Burtonef904972022-04-27 17:24:36 +0100135void DetectorPostProcess::GetNetworkBoxes(
136 object_detection::Network& net,
137 int imageWidth,
138 int imageHeight,
139 float threshold,
140 std::forward_list<image::Detection>& detections)
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000141{
142 int numClasses = net.numClasses;
143 int num = 0;
Richard Burtoned35a6f2022-02-14 11:55:35 +0000144 auto det_objectness_comparator = [](image::Detection& pa, image::Detection& pb) {
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000145 return pa.objectness < pb.objectness;
146 };
147 for (size_t i = 0; i < net.branches.size(); ++i) {
148 int height = net.branches[i].resolution;
149 int width = net.branches[i].resolution;
150 int channel = net.branches[i].numBox*(5+numClasses);
151
152 for (int h = 0; h < net.branches[i].resolution; h++) {
153 for (int w = 0; w < net.branches[i].resolution; w++) {
154 for (int anc = 0; anc < net.branches[i].numBox; anc++) {
155
156 /* Objectness score */
157 int bbox_obj_offset = h * width * channel + w * channel + anc * (numClasses + 5) + 4;
Richard Burton9c549902022-02-15 16:39:18 +0000158 float objectness = math::MathUtils::SigmoidF32(
159 (static_cast<float>(net.branches[i].modelOutput[bbox_obj_offset])
160 - net.branches[i].zeroPoint
161 ) * net.branches[i].scale);
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000162
163 if(objectness > threshold) {
Richard Burtoned35a6f2022-02-14 11:55:35 +0000164 image::Detection det;
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000165 det.objectness = objectness;
166 /* Get bbox prediction data for each anchor, each feature point */
167 int bbox_x_offset = bbox_obj_offset -4;
168 int bbox_y_offset = bbox_x_offset + 1;
169 int bbox_w_offset = bbox_x_offset + 2;
170 int bbox_h_offset = bbox_x_offset + 3;
171 int bbox_scores_offset = bbox_x_offset + 5;
172
Richard Burtonef904972022-04-27 17:24:36 +0100173 det.bbox.x = (static_cast<float>(net.branches[i].modelOutput[bbox_x_offset])
174 - net.branches[i].zeroPoint) * net.branches[i].scale;
175 det.bbox.y = (static_cast<float>(net.branches[i].modelOutput[bbox_y_offset])
176 - net.branches[i].zeroPoint) * net.branches[i].scale;
177 det.bbox.w = (static_cast<float>(net.branches[i].modelOutput[bbox_w_offset])
178 - net.branches[i].zeroPoint) * net.branches[i].scale;
179 det.bbox.h = (static_cast<float>(net.branches[i].modelOutput[bbox_h_offset])
180 - net.branches[i].zeroPoint) * net.branches[i].scale;
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000181
182 float bbox_x, bbox_y;
183
184 /* Eliminate grid sensitivity trick involved in YOLOv4 */
Richard Burtoned35a6f2022-02-14 11:55:35 +0000185 bbox_x = math::MathUtils::SigmoidF32(det.bbox.x);
186 bbox_y = math::MathUtils::SigmoidF32(det.bbox.y);
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000187 det.bbox.x = (bbox_x + w) / width;
188 det.bbox.y = (bbox_y + h) / height;
189
Richard Burton9c549902022-02-15 16:39:18 +0000190 det.bbox.w = std::exp(det.bbox.w) * net.branches[i].anchor[anc*2] / net.inputWidth;
191 det.bbox.h = std::exp(det.bbox.h) * net.branches[i].anchor[anc*2+1] / net.inputHeight;
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000192
193 for (int s = 0; s < numClasses; s++) {
Richard Burton9c549902022-02-15 16:39:18 +0000194 float sig = math::MathUtils::SigmoidF32(
195 (static_cast<float>(net.branches[i].modelOutput[bbox_scores_offset + s]) -
196 net.branches[i].zeroPoint) * net.branches[i].scale
197 ) * objectness;
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000198 det.prob.emplace_back((sig > threshold) ? sig : 0);
199 }
200
201 /* Correct_YOLO_boxes */
202 det.bbox.x *= imageWidth;
203 det.bbox.w *= imageWidth;
204 det.bbox.y *= imageHeight;
205 det.bbox.h *= imageHeight;
206
207 if (num < net.topN || net.topN <=0) {
208 detections.emplace_front(det);
209 num += 1;
210 } else if (num == net.topN) {
211 detections.sort(det_objectness_comparator);
Richard Burton6f6df092022-05-17 12:52:50 +0100212 InsertTopNDetections(detections, det);
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000213 num += 1;
214 } else {
Richard Burton6f6df092022-05-17 12:52:50 +0100215 InsertTopNDetections(detections, det);
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000216 }
217 }
218 }
219 }
220 }
221 }
222 if(num > net.topN)
223 num -=1;
224}
225
Isabella Gottardi3107aa22022-01-27 16:39:37 +0000226} /* namespace app */
227} /* namespace arm */