Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 1 | /* |
Richard Burton | f32a86a | 2022-11-15 11:46:11 +0000 | [diff] [blame] | 2 | * SPDX-FileCopyrightText: Copyright 2022 Arm Limited and/or its affiliates <open-source-office@arm.com> |
Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 3 | * SPDX-License-Identifier: Apache-2.0 |
| 4 | * |
| 5 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | * you may not use this file except in compliance with the License. |
| 7 | * You may obtain a copy of the License at |
| 8 | * |
| 9 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | * |
| 11 | * Unless required by applicable law or agreed to in writing, software |
| 12 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | * See the License for the specific language governing permissions and |
| 15 | * limitations under the License. |
| 16 | */ |
| 17 | #include "DetectorPostProcessing.hpp" |
Richard Burton | ed35a6f | 2022-02-14 11:55:35 +0000 | [diff] [blame] | 18 | #include "PlatformMath.hpp" |
Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 19 | |
Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 20 | #include <cmath> |
| 21 | |
| 22 | namespace arm { |
| 23 | namespace app { |
Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 24 | |
Richard Burton | ef90497 | 2022-04-27 17:24:36 +0100 | [diff] [blame] | 25 | DetectorPostProcess::DetectorPostProcess( |
| 26 | TfLiteTensor* modelOutput0, |
| 27 | TfLiteTensor* modelOutput1, |
| 28 | std::vector<object_detection::DetectionResult>& results, |
Richard Burton | 6f6df09 | 2022-05-17 12:52:50 +0100 | [diff] [blame] | 29 | const object_detection::PostProcessParams& postProcessParams) |
Richard Burton | ef90497 | 2022-04-27 17:24:36 +0100 | [diff] [blame] | 30 | : m_outputTensor0{modelOutput0}, |
| 31 | m_outputTensor1{modelOutput1}, |
| 32 | m_results{results}, |
Richard Burton | 6f6df09 | 2022-05-17 12:52:50 +0100 | [diff] [blame] | 33 | m_postProcessParams{postProcessParams} |
Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 34 | { |
Richard Burton | ef90497 | 2022-04-27 17:24:36 +0100 | [diff] [blame] | 35 | /* Init PostProcessing */ |
Liam Barry | 213a543 | 2022-05-09 17:06:19 +0100 | [diff] [blame] | 36 | this->m_net = object_detection::Network{ |
Richard Burton | 6f6df09 | 2022-05-17 12:52:50 +0100 | [diff] [blame] | 37 | .inputWidth = postProcessParams.inputImgCols, |
| 38 | .inputHeight = postProcessParams.inputImgRows, |
| 39 | .numClasses = postProcessParams.numClasses, |
Liam Barry | 213a543 | 2022-05-09 17:06:19 +0100 | [diff] [blame] | 40 | .branches = |
Richard Burton | 6f6df09 | 2022-05-17 12:52:50 +0100 | [diff] [blame] | 41 | {object_detection::Branch{.resolution = postProcessParams.inputImgCols / 32, |
Liam Barry | 213a543 | 2022-05-09 17:06:19 +0100 | [diff] [blame] | 42 | .numBox = 3, |
Richard Burton | 6f6df09 | 2022-05-17 12:52:50 +0100 | [diff] [blame] | 43 | .anchor = postProcessParams.anchor1, |
Liam Barry | 213a543 | 2022-05-09 17:06:19 +0100 | [diff] [blame] | 44 | .modelOutput = this->m_outputTensor0->data.int8, |
| 45 | .scale = (static_cast<TfLiteAffineQuantization*>( |
| 46 | this->m_outputTensor0->quantization.params)) |
| 47 | ->scale->data[0], |
| 48 | .zeroPoint = (static_cast<TfLiteAffineQuantization*>( |
| 49 | this->m_outputTensor0->quantization.params)) |
| 50 | ->zero_point->data[0], |
| 51 | .size = this->m_outputTensor0->bytes}, |
Richard Burton | 6f6df09 | 2022-05-17 12:52:50 +0100 | [diff] [blame] | 52 | object_detection::Branch{.resolution = postProcessParams.inputImgCols / 16, |
Liam Barry | 213a543 | 2022-05-09 17:06:19 +0100 | [diff] [blame] | 53 | .numBox = 3, |
Richard Burton | 6f6df09 | 2022-05-17 12:52:50 +0100 | [diff] [blame] | 54 | .anchor = postProcessParams.anchor2, |
Liam Barry | 213a543 | 2022-05-09 17:06:19 +0100 | [diff] [blame] | 55 | .modelOutput = this->m_outputTensor1->data.int8, |
| 56 | .scale = (static_cast<TfLiteAffineQuantization*>( |
| 57 | this->m_outputTensor1->quantization.params)) |
| 58 | ->scale->data[0], |
| 59 | .zeroPoint = (static_cast<TfLiteAffineQuantization*>( |
| 60 | this->m_outputTensor1->quantization.params)) |
| 61 | ->zero_point->data[0], |
| 62 | .size = this->m_outputTensor1->bytes}}, |
Richard Burton | 6f6df09 | 2022-05-17 12:52:50 +0100 | [diff] [blame] | 63 | .topN = postProcessParams.topN}; |
Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 64 | /* End init */ |
Richard Burton | ef90497 | 2022-04-27 17:24:36 +0100 | [diff] [blame] | 65 | } |
Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 66 | |
Richard Burton | ef90497 | 2022-04-27 17:24:36 +0100 | [diff] [blame] | 67 | bool DetectorPostProcess::DoPostProcess() |
| 68 | { |
Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 69 | /* Start postprocessing */ |
Richard Burton | 6f6df09 | 2022-05-17 12:52:50 +0100 | [diff] [blame] | 70 | int originalImageWidth = m_postProcessParams.originalImageSize; |
| 71 | int originalImageHeight = m_postProcessParams.originalImageSize; |
Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 72 | |
Richard Burton | ed35a6f | 2022-02-14 11:55:35 +0000 | [diff] [blame] | 73 | std::forward_list<image::Detection> detections; |
Richard Burton | 6f6df09 | 2022-05-17 12:52:50 +0100 | [diff] [blame] | 74 | GetNetworkBoxes(this->m_net, originalImageWidth, originalImageHeight, m_postProcessParams.threshold, detections); |
Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 75 | |
| 76 | /* Do nms */ |
Richard Burton | 6f6df09 | 2022-05-17 12:52:50 +0100 | [diff] [blame] | 77 | CalculateNMS(detections, this->m_net.numClasses, this->m_postProcessParams.nms); |
Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 78 | |
| 79 | for (auto& it: detections) { |
| 80 | float xMin = it.bbox.x - it.bbox.w / 2.0f; |
| 81 | float xMax = it.bbox.x + it.bbox.w / 2.0f; |
| 82 | float yMin = it.bbox.y - it.bbox.h / 2.0f; |
| 83 | float yMax = it.bbox.y + it.bbox.h / 2.0f; |
| 84 | |
| 85 | if (xMin < 0) { |
| 86 | xMin = 0; |
| 87 | } |
| 88 | if (yMin < 0) { |
| 89 | yMin = 0; |
| 90 | } |
| 91 | if (xMax > originalImageWidth) { |
| 92 | xMax = originalImageWidth; |
| 93 | } |
| 94 | if (yMax > originalImageHeight) { |
| 95 | yMax = originalImageHeight; |
| 96 | } |
| 97 | |
| 98 | float boxX = xMin; |
| 99 | float boxY = yMin; |
| 100 | float boxWidth = xMax - xMin; |
| 101 | float boxHeight = yMax - yMin; |
| 102 | |
Richard Burton | ef90497 | 2022-04-27 17:24:36 +0100 | [diff] [blame] | 103 | for (int j = 0; j < this->m_net.numClasses; ++j) { |
Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 104 | if (it.prob[j] > 0) { |
| 105 | |
Richard Burton | ef90497 | 2022-04-27 17:24:36 +0100 | [diff] [blame] | 106 | object_detection::DetectionResult tmpResult = {}; |
Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 107 | tmpResult.m_normalisedVal = it.prob[j]; |
| 108 | tmpResult.m_x0 = boxX; |
| 109 | tmpResult.m_y0 = boxY; |
| 110 | tmpResult.m_w = boxWidth; |
| 111 | tmpResult.m_h = boxHeight; |
| 112 | |
Richard Burton | ef90497 | 2022-04-27 17:24:36 +0100 | [diff] [blame] | 113 | this->m_results.push_back(tmpResult); |
Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 114 | } |
| 115 | } |
| 116 | } |
Richard Burton | ef90497 | 2022-04-27 17:24:36 +0100 | [diff] [blame] | 117 | return true; |
Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 118 | } |
| 119 | |
Richard Burton | ef90497 | 2022-04-27 17:24:36 +0100 | [diff] [blame] | 120 | void DetectorPostProcess::InsertTopNDetections(std::forward_list<image::Detection>& detections, image::Detection& det) |
Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 121 | { |
Richard Burton | ed35a6f | 2022-02-14 11:55:35 +0000 | [diff] [blame] | 122 | std::forward_list<image::Detection>::iterator it; |
| 123 | std::forward_list<image::Detection>::iterator last_it; |
Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 124 | for ( it = detections.begin(); it != detections.end(); ++it ) { |
| 125 | if(it->objectness > det.objectness) |
| 126 | break; |
| 127 | last_it = it; |
| 128 | } |
| 129 | if(it != detections.begin()) { |
| 130 | detections.emplace_after(last_it, det); |
| 131 | detections.pop_front(); |
| 132 | } |
| 133 | } |
| 134 | |
Richard Burton | ef90497 | 2022-04-27 17:24:36 +0100 | [diff] [blame] | 135 | void DetectorPostProcess::GetNetworkBoxes( |
| 136 | object_detection::Network& net, |
| 137 | int imageWidth, |
| 138 | int imageHeight, |
| 139 | float threshold, |
| 140 | std::forward_list<image::Detection>& detections) |
Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 141 | { |
| 142 | int numClasses = net.numClasses; |
| 143 | int num = 0; |
Richard Burton | ed35a6f | 2022-02-14 11:55:35 +0000 | [diff] [blame] | 144 | auto det_objectness_comparator = [](image::Detection& pa, image::Detection& pb) { |
Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 145 | return pa.objectness < pb.objectness; |
| 146 | }; |
| 147 | for (size_t i = 0; i < net.branches.size(); ++i) { |
| 148 | int height = net.branches[i].resolution; |
| 149 | int width = net.branches[i].resolution; |
| 150 | int channel = net.branches[i].numBox*(5+numClasses); |
| 151 | |
| 152 | for (int h = 0; h < net.branches[i].resolution; h++) { |
| 153 | for (int w = 0; w < net.branches[i].resolution; w++) { |
| 154 | for (int anc = 0; anc < net.branches[i].numBox; anc++) { |
| 155 | |
| 156 | /* Objectness score */ |
| 157 | int bbox_obj_offset = h * width * channel + w * channel + anc * (numClasses + 5) + 4; |
Richard Burton | 9c54990 | 2022-02-15 16:39:18 +0000 | [diff] [blame] | 158 | float objectness = math::MathUtils::SigmoidF32( |
| 159 | (static_cast<float>(net.branches[i].modelOutput[bbox_obj_offset]) |
| 160 | - net.branches[i].zeroPoint |
| 161 | ) * net.branches[i].scale); |
Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 162 | |
| 163 | if(objectness > threshold) { |
Richard Burton | ed35a6f | 2022-02-14 11:55:35 +0000 | [diff] [blame] | 164 | image::Detection det; |
Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 165 | det.objectness = objectness; |
| 166 | /* Get bbox prediction data for each anchor, each feature point */ |
| 167 | int bbox_x_offset = bbox_obj_offset -4; |
| 168 | int bbox_y_offset = bbox_x_offset + 1; |
| 169 | int bbox_w_offset = bbox_x_offset + 2; |
| 170 | int bbox_h_offset = bbox_x_offset + 3; |
| 171 | int bbox_scores_offset = bbox_x_offset + 5; |
| 172 | |
Richard Burton | ef90497 | 2022-04-27 17:24:36 +0100 | [diff] [blame] | 173 | det.bbox.x = (static_cast<float>(net.branches[i].modelOutput[bbox_x_offset]) |
| 174 | - net.branches[i].zeroPoint) * net.branches[i].scale; |
| 175 | det.bbox.y = (static_cast<float>(net.branches[i].modelOutput[bbox_y_offset]) |
| 176 | - net.branches[i].zeroPoint) * net.branches[i].scale; |
| 177 | det.bbox.w = (static_cast<float>(net.branches[i].modelOutput[bbox_w_offset]) |
| 178 | - net.branches[i].zeroPoint) * net.branches[i].scale; |
| 179 | det.bbox.h = (static_cast<float>(net.branches[i].modelOutput[bbox_h_offset]) |
| 180 | - net.branches[i].zeroPoint) * net.branches[i].scale; |
Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 181 | |
| 182 | float bbox_x, bbox_y; |
| 183 | |
| 184 | /* Eliminate grid sensitivity trick involved in YOLOv4 */ |
Richard Burton | ed35a6f | 2022-02-14 11:55:35 +0000 | [diff] [blame] | 185 | bbox_x = math::MathUtils::SigmoidF32(det.bbox.x); |
| 186 | bbox_y = math::MathUtils::SigmoidF32(det.bbox.y); |
Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 187 | det.bbox.x = (bbox_x + w) / width; |
| 188 | det.bbox.y = (bbox_y + h) / height; |
| 189 | |
Richard Burton | 9c54990 | 2022-02-15 16:39:18 +0000 | [diff] [blame] | 190 | det.bbox.w = std::exp(det.bbox.w) * net.branches[i].anchor[anc*2] / net.inputWidth; |
| 191 | det.bbox.h = std::exp(det.bbox.h) * net.branches[i].anchor[anc*2+1] / net.inputHeight; |
Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 192 | |
| 193 | for (int s = 0; s < numClasses; s++) { |
Richard Burton | 9c54990 | 2022-02-15 16:39:18 +0000 | [diff] [blame] | 194 | float sig = math::MathUtils::SigmoidF32( |
| 195 | (static_cast<float>(net.branches[i].modelOutput[bbox_scores_offset + s]) - |
| 196 | net.branches[i].zeroPoint) * net.branches[i].scale |
| 197 | ) * objectness; |
Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 198 | det.prob.emplace_back((sig > threshold) ? sig : 0); |
| 199 | } |
| 200 | |
| 201 | /* Correct_YOLO_boxes */ |
| 202 | det.bbox.x *= imageWidth; |
| 203 | det.bbox.w *= imageWidth; |
| 204 | det.bbox.y *= imageHeight; |
| 205 | det.bbox.h *= imageHeight; |
| 206 | |
| 207 | if (num < net.topN || net.topN <=0) { |
| 208 | detections.emplace_front(det); |
| 209 | num += 1; |
| 210 | } else if (num == net.topN) { |
| 211 | detections.sort(det_objectness_comparator); |
Richard Burton | 6f6df09 | 2022-05-17 12:52:50 +0100 | [diff] [blame] | 212 | InsertTopNDetections(detections, det); |
Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 213 | num += 1; |
| 214 | } else { |
Richard Burton | 6f6df09 | 2022-05-17 12:52:50 +0100 | [diff] [blame] | 215 | InsertTopNDetections(detections, det); |
Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 216 | } |
| 217 | } |
| 218 | } |
| 219 | } |
| 220 | } |
| 221 | } |
| 222 | if(num > net.topN) |
| 223 | num -=1; |
| 224 | } |
| 225 | |
Isabella Gottardi | 3107aa2 | 2022-01-27 16:39:37 +0000 | [diff] [blame] | 226 | } /* namespace app */ |
| 227 | } /* namespace arm */ |