blob: e6291f973e092c140259dff8e2e7ba2beee228eb [file] [log] [blame]
Isabella Gottardi05e56442018-11-16 11:26:52 +00001/*
ramelg014a6d9e82021-10-02 14:34:36 +01002 * Copyright (c) 2018-2021 Arm Limited.
Isabella Gottardi05e56442018-11-16 11:26:52 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/runtime/CPP/functions/CPPDetectionOutputLayer.h"
25
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/Validate.h"
Isabella Gottardi05e56442018-11-16 11:26:52 +000029
ramelg014a6d9e82021-10-02 14:34:36 +010030#include "src/common/utils/Log.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010031#include "src/core/helpers/AutoConfiguration.h"
ramelg014a6d9e82021-10-02 14:34:36 +010032
Isabella Gottardi05e56442018-11-16 11:26:52 +000033#include <list>
34
35namespace arm_compute
36{
37namespace
38{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010039Status validate_arguments(const ITensorInfo *input_loc,
40 const ITensorInfo *input_conf,
41 const ITensorInfo *input_priorbox,
42 const ITensorInfo *output,
43 DetectionOutputLayerInfo info)
Isabella Gottardi05e56442018-11-16 11:26:52 +000044{
45 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input_loc, input_conf, input_priorbox, output);
46 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_loc, 1, DataType::F32);
47 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input_loc, input_conf, input_priorbox);
48 ARM_COMPUTE_RETURN_ERROR_ON_MSG(input_loc->num_dimensions() > 2, "The location input tensor should be [C1, N].");
49 ARM_COMPUTE_RETURN_ERROR_ON_MSG(input_conf->num_dimensions() > 2, "The location input tensor should be [C2, N].");
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010050 ARM_COMPUTE_RETURN_ERROR_ON_MSG(input_priorbox->num_dimensions() > 3,
51 "The priorbox input tensor should be [C3, 2, N].");
Isabella Gottardi05e56442018-11-16 11:26:52 +000052
53 ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.eta() <= 0.f && info.eta() > 1.f, "Eta should be between 0 and 1");
54
55 const int num_priors = input_priorbox->tensor_shape()[0] / 4;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010056 ARM_COMPUTE_RETURN_ERROR_ON_MSG(static_cast<size_t>((num_priors * info.num_loc_classes() * 4)) !=
57 input_loc->tensor_shape()[0],
58 "Number of priors must match number of location predictions.");
59 ARM_COMPUTE_RETURN_ERROR_ON_MSG(static_cast<size_t>((num_priors * info.num_classes())) !=
60 input_conf->tensor_shape()[0],
61 "Number of priors must match number of confidence predictions.");
Isabella Gottardi05e56442018-11-16 11:26:52 +000062
63 // Validate configured output
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010064 if (output->total_size() != 0)
Isabella Gottardi05e56442018-11-16 11:26:52 +000065 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010066 const unsigned int max_size =
67 info.keep_top_k() * (input_loc->num_dimensions() > 1 ? input_loc->dimension(1) : 1);
Isabella Gottardi05e56442018-11-16 11:26:52 +000068 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), TensorShape(7U, max_size));
69 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input_loc, output);
70 }
71
72 return Status{};
73}
74
75/** Function used to sort pair<float, T> in descend order based on the score (first) value.
76 */
77template <typename T>
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010078bool SortScorePairDescend(const std::pair<float, T> &pair1, const std::pair<float, T> &pair2)
Isabella Gottardi05e56442018-11-16 11:26:52 +000079{
80 return pair1.first > pair2.first;
81}
82
83/** Get location predictions from input_loc.
84 *
85 * @param[in] input_loc The input location prediction.
86 * @param[in] num The number of images.
87 * @param[in] num_priors number of predictions per class.
88 * @param[in] num_loc_classes number of location classes. It is 1 if share_location is true,
89 * and is equal to number of classes needed to predict otherwise.
90 * @param[in] share_location If true, all classes share the same location prediction.
91 * @param[out] all_location_predictions All the location predictions.
92 *
93 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010094void retrieve_all_loc_predictions(const ITensor *input_loc,
95 const int num,
96 const int num_priors,
97 const int num_loc_classes,
98 const bool share_location,
99 std::vector<LabelBBox> &all_location_predictions)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000100{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100101 for (int i = 0; i < num; ++i)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000102 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100103 for (int c = 0; c < num_loc_classes; ++c)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000104 {
105 int label = share_location ? -1 : c;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100106 if (all_location_predictions[i].find(label) == all_location_predictions[i].end())
Isabella Gottardi05e56442018-11-16 11:26:52 +0000107 {
108 all_location_predictions[i][label].resize(num_priors);
109 }
110 else
111 {
112 ARM_COMPUTE_ERROR_ON(all_location_predictions[i][label].size() != static_cast<size_t>(num_priors));
113 break;
114 }
115 }
116 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100117 for (int i = 0; i < num; ++i)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000118 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100119 for (int p = 0; p < num_priors; ++p)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000120 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100121 for (int c = 0; c < num_loc_classes; ++c)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000122 {
123 const int label = share_location ? -1 : c;
124 const int base_ptr = i * num_priors * num_loc_classes * 4 + p * num_loc_classes * 4 + c * 4;
125 //xmin, ymin, xmax, ymax
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100126 all_location_predictions[i][label][p][0] =
127 *reinterpret_cast<float *>(input_loc->ptr_to_element(Coordinates(base_ptr)));
128 all_location_predictions[i][label][p][1] =
129 *reinterpret_cast<float *>(input_loc->ptr_to_element(Coordinates(base_ptr + 1)));
130 all_location_predictions[i][label][p][2] =
131 *reinterpret_cast<float *>(input_loc->ptr_to_element(Coordinates(base_ptr + 2)));
132 all_location_predictions[i][label][p][3] =
133 *reinterpret_cast<float *>(input_loc->ptr_to_element(Coordinates(base_ptr + 3)));
Isabella Gottardi05e56442018-11-16 11:26:52 +0000134 }
135 }
136 }
137}
138
139/** Get confidence predictions from input_conf.
140 *
141 * @param[in] input_loc The input location prediction.
142 * @param[in] num The number of images.
143 * @param[in] num_priors Number of predictions per class.
144 * @param[in] num_loc_classes Number of location classes. It is 1 if share_location is true,
145 * and is equal to number of classes needed to predict otherwise.
146 * @param[out] all_location_predictions All the location predictions.
147 *
148 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100149void retrieve_all_conf_scores(const ITensor *input_conf,
150 const int num,
151 const int num_priors,
152 const int num_classes,
Isabella Gottardi05e56442018-11-16 11:26:52 +0000153 std::vector<std::map<int, std::vector<float>>> &all_confidence_scores)
154{
155 std::vector<float> tmp_buffer;
156 tmp_buffer.resize(num * num_priors * num_classes);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100157 for (int i = 0; i < num; ++i)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000158 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100159 for (int c = 0; c < num_classes; ++c)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000160 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100161 for (int p = 0; p < num_priors; ++p)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000162 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100163 tmp_buffer[i * num_classes * num_priors + c * num_priors + p] = *reinterpret_cast<float *>(
164 input_conf->ptr_to_element(Coordinates(i * num_classes * num_priors + p * num_classes + c)));
Isabella Gottardi05e56442018-11-16 11:26:52 +0000165 }
166 }
167 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100168 for (int i = 0; i < num; ++i)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000169 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100170 for (int c = 0; c < num_classes; ++c)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000171 {
172 all_confidence_scores[i][c].resize(num_priors);
173 all_confidence_scores[i][c].assign(&tmp_buffer[i * num_classes * num_priors + c * num_priors],
174 &tmp_buffer[i * num_classes * num_priors + c * num_priors + num_priors]);
175 }
176 }
177}
178
179/** Get prior boxes from input_priorbox.
180 *
181 * @param[in] input_priorbox The input location prediction.
182 * @param[in] num_priors Number of priors.
183 * @param[in] num_loc_classes number of location classes. It is 1 if share_location is true,
184 * and is equal to number of classes needed to predict otherwise.
185 * @param[out] all_prior_bboxes If true, all classes share the same location prediction.
186 * @param[out] all_location_predictions All the location predictions.
187 *
188 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100189void retrieve_all_priorbox(const ITensor *input_priorbox,
190 const int num_priors,
191 std::vector<BBox> &all_prior_bboxes,
Isabella Gottardi05e56442018-11-16 11:26:52 +0000192 std::vector<std::array<float, 4>> &all_prior_variances)
193{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100194 for (int i = 0; i < num_priors; ++i)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000195 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100196 all_prior_bboxes[i] = {{*reinterpret_cast<float *>(input_priorbox->ptr_to_element(Coordinates(i * 4))),
197 *reinterpret_cast<float *>(input_priorbox->ptr_to_element(Coordinates(i * 4 + 1))),
198 *reinterpret_cast<float *>(input_priorbox->ptr_to_element(Coordinates(i * 4 + 2))),
199 *reinterpret_cast<float *>(input_priorbox->ptr_to_element(Coordinates(i * 4 + 3)))}};
Isabella Gottardi05e56442018-11-16 11:26:52 +0000200 }
201
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100202 std::array<float, 4> var({{0, 0, 0, 0}});
203 for (int i = 0; i < num_priors; ++i)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000204 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100205 for (int j = 0; j < 4; ++j)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000206 {
207 var[j] = *reinterpret_cast<float *>(input_priorbox->ptr_to_element(Coordinates((num_priors + i) * 4 + j)));
208 }
209 all_prior_variances[i] = var;
210 }
211}
212
213/** Decode a bbox according to a prior bbox.
214 *
215 * @param[in] prior_bbox The input prior bounding boxes.
216 * @param[in] prior_variance The corresponding input variance.
217 * @param[in] code_type The detection output code type used to decode the results.
218 * @param[in] variance_encoded_in_target If true, the variance is encoded in target.
219 * @param[in] clip_bbox If true, the results should be between 0.f and 1.f.
220 * @param[in] bbox The input bbox to decode
221 * @param[out] decode_bbox The decoded bboxes.
222 *
223 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100224void DecodeBBox(const BBox &prior_bbox,
225 const std::array<float, 4> &prior_variance,
226 const DetectionOutputLayerCodeType code_type,
227 const bool variance_encoded_in_target,
228 const bool clip_bbox,
229 const BBox &bbox,
230 BBox &decode_bbox)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000231{
232 // if the variance is encoded in target, we simply need to add the offset predictions
233 // otherwise we need to scale the offset accordingly.
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100234 switch (code_type)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000235 {
236 case DetectionOutputLayerCodeType::CORNER:
237 {
238 decode_bbox[0] = prior_bbox[0] + (variance_encoded_in_target ? bbox[0] : prior_variance[0] * bbox[0]);
239 decode_bbox[1] = prior_bbox[1] + (variance_encoded_in_target ? bbox[1] : prior_variance[1] * bbox[1]);
240 decode_bbox[2] = prior_bbox[2] + (variance_encoded_in_target ? bbox[2] : prior_variance[2] * bbox[2]);
241 decode_bbox[3] = prior_bbox[3] + (variance_encoded_in_target ? bbox[3] : prior_variance[3] * bbox[3]);
242
243 break;
244 }
245 case DetectionOutputLayerCodeType::CENTER_SIZE:
246 {
247 const float prior_width = prior_bbox[2] - prior_bbox[0];
248 const float prior_height = prior_bbox[3] - prior_bbox[1];
249
250 // Check if the prior width and height are right
251 ARM_COMPUTE_ERROR_ON(prior_width <= 0.f);
252 ARM_COMPUTE_ERROR_ON(prior_height <= 0.f);
253
254 const float prior_center_x = (prior_bbox[0] + prior_bbox[2]) / 2.;
255 const float prior_center_y = (prior_bbox[1] + prior_bbox[3]) / 2.;
256
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100257 const float decode_bbox_center_x =
258 (variance_encoded_in_target ? bbox[0] : prior_variance[0] * bbox[0]) * prior_width + prior_center_x;
259 const float decode_bbox_center_y =
260 (variance_encoded_in_target ? bbox[1] : prior_variance[1] * bbox[1]) * prior_height + prior_center_y;
261 const float decode_bbox_width =
262 (variance_encoded_in_target ? std::exp(bbox[2]) : std::exp(prior_variance[2] * bbox[2])) * prior_width;
263 const float decode_bbox_height =
264 (variance_encoded_in_target ? std::exp(bbox[3]) : std::exp(prior_variance[3] * bbox[3])) * prior_height;
Isabella Gottardi05e56442018-11-16 11:26:52 +0000265
266 decode_bbox[0] = (decode_bbox_center_x - decode_bbox_width / 2.f);
267 decode_bbox[1] = (decode_bbox_center_y - decode_bbox_height / 2.f);
268 decode_bbox[2] = (decode_bbox_center_x + decode_bbox_width / 2.f);
269 decode_bbox[3] = (decode_bbox_center_y + decode_bbox_height / 2.f);
270
271 break;
272 }
273 case DetectionOutputLayerCodeType::CORNER_SIZE:
274 {
275 const float prior_width = prior_bbox[2] - prior_bbox[0];
276 const float prior_height = prior_bbox[3] - prior_bbox[1];
277
278 // Check if the prior width and height are greater than 0
279 ARM_COMPUTE_ERROR_ON(prior_width <= 0.f);
280 ARM_COMPUTE_ERROR_ON(prior_height <= 0.f);
281
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100282 decode_bbox[0] =
283 prior_bbox[0] + (variance_encoded_in_target ? bbox[0] : prior_variance[0] * bbox[0]) * prior_width;
284 decode_bbox[1] =
285 prior_bbox[1] + (variance_encoded_in_target ? bbox[1] : prior_variance[1] * bbox[1]) * prior_height;
286 decode_bbox[2] =
287 prior_bbox[2] + (variance_encoded_in_target ? bbox[2] : prior_variance[2] * bbox[2]) * prior_width;
288 decode_bbox[3] =
289 prior_bbox[3] + (variance_encoded_in_target ? bbox[3] : prior_variance[3] * bbox[3]) * prior_height;
Isabella Gottardi05e56442018-11-16 11:26:52 +0000290
291 break;
292 }
293 default:
294 ARM_COMPUTE_ERROR("Unsupported Detection Output Code Type.");
295 }
296
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100297 if (clip_bbox)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000298 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100299 for (auto &d_bbox : decode_bbox)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000300 {
301 d_bbox = utility::clamp(d_bbox, 0.f, 1.f);
302 }
303 }
304}
305
306/** Do non maximum suppression given bboxes and scores.
307 *
308 * @param[in] bboxes The input bounding boxes.
309 * @param[in] scores The corresponding input confidence.
310 * @param[in] score_threshold The threshold used to filter detection results.
311 * @param[in] nms_threshold The threshold used in non maximum suppression.
312 * @param[in] eta Adaptation rate for nms threshold.
313 * @param[in] top_k If not -1, keep at most top_k picked indices.
314 * @param[out] indices The kept indices of bboxes after nms.
315 *
316 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100317void ApplyNMSFast(const std::vector<BBox> &bboxes,
318 const std::vector<float> &scores,
319 const float score_threshold,
320 const float nms_threshold,
321 const float eta,
322 const int top_k,
323 std::vector<int> &indices)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000324{
325 ARM_COMPUTE_ERROR_ON_MSG(bboxes.size() != scores.size(), "bboxes and scores have different size.");
326
327 // Get top_k scores (with corresponding indices).
328 std::list<std::pair<float, int>> score_index_vec;
329
330 // Generate index score pairs.
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100331 for (size_t i = 0; i < scores.size(); ++i)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000332 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100333 if (scores[i] > score_threshold)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000334 {
335 score_index_vec.emplace_back(std::make_pair(scores[i], i));
336 }
337 }
338
339 // Sort the score pair according to the scores in descending order
340 score_index_vec.sort(SortScorePairDescend<int>);
341
342 // Keep top_k scores if needed.
343 const int score_index_vec_size = score_index_vec.size();
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100344 if (top_k > -1 && top_k < score_index_vec_size)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000345 {
346 score_index_vec.resize(top_k);
347 }
348
349 // Do nms.
350 float adaptive_threshold = nms_threshold;
351 indices.clear();
352
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100353 while (!score_index_vec.empty())
Isabella Gottardi05e56442018-11-16 11:26:52 +0000354 {
355 const int idx = score_index_vec.front().second;
356 bool keep = true;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100357 for (int kept_idx : indices)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000358 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100359 if (keep)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000360 {
361 // Compute the jaccard (intersection over union IoU) overlap between two bboxes.
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100362 BBox intersect_bbox = std::array<float, 4>({0, 0, 0, 0});
363 if (bboxes[kept_idx][0] > bboxes[idx][2] || bboxes[kept_idx][2] < bboxes[idx][0] ||
364 bboxes[kept_idx][1] > bboxes[idx][3] || bboxes[kept_idx][3] < bboxes[idx][1])
Isabella Gottardi05e56442018-11-16 11:26:52 +0000365 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100366 intersect_bbox = std::array<float, 4>({{0, 0, 0, 0}});
Isabella Gottardi05e56442018-11-16 11:26:52 +0000367 }
368 else
369 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100370 intersect_bbox = std::array<float, 4>(
371 {{std::max(bboxes[idx][0], bboxes[kept_idx][0]), std::max(bboxes[idx][1], bboxes[kept_idx][1]),
372 std::min(bboxes[idx][2], bboxes[kept_idx][2]),
373 std::min(bboxes[idx][3], bboxes[kept_idx][3])}});
Isabella Gottardi05e56442018-11-16 11:26:52 +0000374 }
375
376 float intersect_width = intersect_bbox[2] - intersect_bbox[0];
377 float intersect_height = intersect_bbox[3] - intersect_bbox[1];
378
379 float overlap = 0.f;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100380 if (intersect_width > 0 && intersect_height > 0)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000381 {
382 float intersect_size = intersect_width * intersect_height;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100383 float bbox1_size = (bboxes[idx][2] < bboxes[idx][0] || bboxes[idx][3] < bboxes[idx][1])
384 ? 0.f
385 : (bboxes[idx][2] - bboxes[idx][0]) *
386 (bboxes[idx][3] - bboxes[idx][1]); //BBoxSize(bboxes[idx]);
387 float bbox2_size =
388 (bboxes[kept_idx][2] < bboxes[kept_idx][0] || bboxes[kept_idx][3] < bboxes[kept_idx][1])
389 ? 0.f
390 : (bboxes[kept_idx][2] - bboxes[kept_idx][0]) *
391 (bboxes[kept_idx][3] - bboxes[kept_idx][1]); // BBoxSize(bboxes[kept_idx]);
Isabella Gottardi05e56442018-11-16 11:26:52 +0000392 overlap = intersect_size / (bbox1_size + bbox2_size - intersect_size);
393 }
394 keep = (overlap <= adaptive_threshold);
395 }
396 else
397 {
398 break;
399 }
400 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100401 if (keep)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000402 {
403 indices.push_back(idx);
404 }
405 score_index_vec.erase(score_index_vec.begin());
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100406 if (keep && eta < 1.f && adaptive_threshold > 0.5f)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000407 {
408 adaptive_threshold *= eta;
409 }
410 }
411}
412} // namespace
413
414CPPDetectionOutputLayer::CPPDetectionOutputLayer()
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100415 : _input_loc(nullptr),
416 _input_conf(nullptr),
417 _input_priorbox(nullptr),
418 _output(nullptr),
419 _info(),
420 _num_priors(),
421 _num(),
422 _all_location_predictions(),
423 _all_confidence_scores(),
424 _all_prior_bboxes(),
425 _all_prior_variances(),
426 _all_decode_bboxes(),
427 _all_indices()
Isabella Gottardi05e56442018-11-16 11:26:52 +0000428{
429}
430
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100431void CPPDetectionOutputLayer::configure(const ITensor *input_loc,
432 const ITensor *input_conf,
433 const ITensor *input_priorbox,
434 ITensor *output,
435 DetectionOutputLayerInfo info)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000436{
437 ARM_COMPUTE_ERROR_ON_NULLPTR(input_loc, input_conf, input_priorbox, output);
ramelg014a6d9e82021-10-02 14:34:36 +0100438 ARM_COMPUTE_LOG_PARAMS(input_loc, input_conf, input_priorbox, output, info);
439
Isabella Gottardi05e56442018-11-16 11:26:52 +0000440 // Output auto initialization if not yet initialized
441 // Since the number of bboxes to kept is unknown before nms, the shape is set to the maximum
442 // The maximum is keep_top_k * input_loc_size[1]
443 // Each row is a 7 dimension std::vector, which stores [image_id, label, confidence, xmin, ymin, xmax, ymax]
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100444 const unsigned int max_size =
445 info.keep_top_k() * (input_loc->info()->num_dimensions() > 1 ? input_loc->info()->dimension(1) : 1);
Isabella Gottardi05e56442018-11-16 11:26:52 +0000446 auto_init_if_empty(*output->info(), input_loc->info()->clone()->set_tensor_shape(TensorShape(7U, max_size)));
447
448 // Perform validation step
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100449 ARM_COMPUTE_ERROR_THROW_ON(
450 validate_arguments(input_loc->info(), input_conf->info(), input_priorbox->info(), output->info(), info));
Isabella Gottardi05e56442018-11-16 11:26:52 +0000451
452 _input_loc = input_loc;
453 _input_conf = input_conf;
454 _input_priorbox = input_priorbox;
455 _output = output;
456 _info = info;
457 _num_priors = input_priorbox->info()->dimension(0) / 4;
458 _num = (_input_loc->info()->num_dimensions() > 1 ? _input_loc->info()->dimension(1) : 1);
459
460 _all_location_predictions.resize(_num);
461 _all_confidence_scores.resize(_num);
462 _all_prior_bboxes.resize(_num_priors);
463 _all_prior_variances.resize(_num_priors);
464 _all_decode_bboxes.resize(_num);
465
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100466 for (int i = 0; i < _num; ++i)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000467 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100468 for (int c = 0; c < _info.num_loc_classes(); ++c)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000469 {
470 const int label = _info.share_location() ? -1 : c;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100471 if (label == _info.background_label_id())
Isabella Gottardi05e56442018-11-16 11:26:52 +0000472 {
473 // Ignore background class.
474 continue;
475 }
476 _all_decode_bboxes[i][label].resize(_num_priors);
477 }
478 }
479 _all_indices.resize(_num);
480
481 Coordinates coord;
482 coord.set_num_dimensions(output->info()->num_dimensions());
483 output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
484}
485
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100486Status CPPDetectionOutputLayer::validate(const ITensorInfo *input_loc,
487 const ITensorInfo *input_conf,
488 const ITensorInfo *input_priorbox,
489 const ITensorInfo *output,
490 DetectionOutputLayerInfo info)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000491{
Isabella Gottardi883bad72019-07-15 17:33:07 +0100492 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input_loc, input_conf, input_priorbox, output, info));
Isabella Gottardi05e56442018-11-16 11:26:52 +0000493 return Status{};
494}
495
496void CPPDetectionOutputLayer::run()
497{
498 // Retrieve all location predictions.
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100499 retrieve_all_loc_predictions(_input_loc, _num, _num_priors, _info.num_loc_classes(), _info.share_location(),
500 _all_location_predictions);
Isabella Gottardi05e56442018-11-16 11:26:52 +0000501
502 // Retrieve all confidences.
503 retrieve_all_conf_scores(_input_conf, _num, _num_priors, _info.num_classes(), _all_confidence_scores);
504
505 // Retrieve all prior bboxes.
506 retrieve_all_priorbox(_input_priorbox, _num_priors, _all_prior_bboxes, _all_prior_variances);
507
508 // Decode all loc predictions to bboxes
509 const bool clip_bbox = false;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100510 for (int i = 0; i < _num; ++i)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000511 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100512 for (int c = 0; c < _info.num_loc_classes(); ++c)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000513 {
514 const int label = _info.share_location() ? -1 : c;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100515 if (label == _info.background_label_id())
Isabella Gottardi05e56442018-11-16 11:26:52 +0000516 {
517 // Ignore background class.
518 continue;
519 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100520 ARM_COMPUTE_ERROR_ON_MSG_VAR(_all_location_predictions[i].find(label) == _all_location_predictions[i].end(),
521 "Could not find location predictions for label %d.", label);
Isabella Gottardi05e56442018-11-16 11:26:52 +0000522
Isabella Gottardia7acb3c2019-01-08 13:48:44 +0000523 const std::vector<BBox> &label_loc_preds = _all_location_predictions[i].find(label)->second;
Isabella Gottardi05e56442018-11-16 11:26:52 +0000524
525 const int num_bboxes = _all_prior_bboxes.size();
526 ARM_COMPUTE_ERROR_ON(_all_prior_variances[i].size() != 4);
527
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100528 for (int j = 0; j < num_bboxes; ++j)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000529 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100530 DecodeBBox(_all_prior_bboxes[j], _all_prior_variances[j], _info.code_type(),
531 _info.variance_encoded_in_target(), clip_bbox, label_loc_preds[j],
532 _all_decode_bboxes[i][label][j]);
Isabella Gottardi05e56442018-11-16 11:26:52 +0000533 }
534 }
535 }
536
537 int num_kept = 0;
538
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100539 for (int i = 0; i < _num; ++i)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000540 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100541 const LabelBBox &decode_bboxes = _all_decode_bboxes[i];
542 const std::map<int, std::vector<float>> &conf_scores = _all_confidence_scores[i];
Isabella Gottardi05e56442018-11-16 11:26:52 +0000543
544 std::map<int, std::vector<int>> indices;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100545 int num_det = 0;
546 for (int c = 0; c < _info.num_classes(); ++c)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000547 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100548 if (c == _info.background_label_id())
Isabella Gottardi05e56442018-11-16 11:26:52 +0000549 {
550 // Ignore background class
551 continue;
552 }
553 const int label = _info.share_location() ? -1 : c;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100554 if (conf_scores.find(c) == conf_scores.end() || decode_bboxes.find(label) == decode_bboxes.end())
Isabella Gottardi05e56442018-11-16 11:26:52 +0000555 {
Michalis Spyrou7c60c992019-10-10 14:33:47 +0100556 ARM_COMPUTE_ERROR_VAR("Could not find predictions for label %d.", label);
Isabella Gottardi05e56442018-11-16 11:26:52 +0000557 }
Isabella Gottardia7acb3c2019-01-08 13:48:44 +0000558 const std::vector<float> &scores = conf_scores.find(c)->second;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100559 const std::vector<BBox> &bboxes = decode_bboxes.find(label)->second;
Isabella Gottardi05e56442018-11-16 11:26:52 +0000560
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100561 ApplyNMSFast(bboxes, scores, _info.confidence_threshold(), _info.nms_threshold(), _info.eta(),
562 _info.top_k(), indices[c]);
Isabella Gottardi05e56442018-11-16 11:26:52 +0000563
564 num_det += indices[c].size();
565 }
566
567 int num_to_add = 0;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100568 if (_info.keep_top_k() > -1 && num_det > _info.keep_top_k())
Isabella Gottardi05e56442018-11-16 11:26:52 +0000569 {
570 std::vector<std::pair<float, std::pair<int, int>>> score_index_pairs;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100571 for (auto const &it : indices)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000572 {
573 const int label = it.first;
574 const std::vector<int> &label_indices = it.second;
575
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100576 if (conf_scores.find(label) == conf_scores.end())
Isabella Gottardi05e56442018-11-16 11:26:52 +0000577 {
Michalis Spyrou7c60c992019-10-10 14:33:47 +0100578 ARM_COMPUTE_ERROR_VAR("Could not find predictions for label %d.", label);
Isabella Gottardi05e56442018-11-16 11:26:52 +0000579 }
580
581 const std::vector<float> &scores = conf_scores.find(label)->second;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100582 for (auto idx : label_indices)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000583 {
584 ARM_COMPUTE_ERROR_ON(idx > static_cast<int>(scores.size()));
Michalis Spyroubcfd09a2019-05-01 13:03:59 +0100585 score_index_pairs.emplace_back(std::make_pair(scores[idx], std::make_pair(label, idx)));
Isabella Gottardi05e56442018-11-16 11:26:52 +0000586 }
587 }
588
589 // Keep top k results per image.
590 std::sort(score_index_pairs.begin(), score_index_pairs.end(), SortScorePairDescend<std::pair<int, int>>);
591 score_index_pairs.resize(_info.keep_top_k());
592
593 // Store the new indices.
594
595 std::map<int, std::vector<int>> new_indices;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100596 for (auto score_index_pair : score_index_pairs)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000597 {
598 int label = score_index_pair.second.first;
599 int idx = score_index_pair.second.second;
600 new_indices[label].push_back(idx);
601 }
602 _all_indices[i] = new_indices;
603 num_to_add = _info.keep_top_k();
604 }
605 else
606 {
607 _all_indices[i] = indices;
608 num_to_add = num_det;
609 }
610 num_kept += num_to_add;
611 }
612
613 //Update the valid region of the ouput to mark the exact number of detection
614 _output->info()->set_valid_region(ValidRegion(Coordinates(0, 0), TensorShape(7, num_kept)));
615
616 int count = 0;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100617 for (int i = 0; i < _num; ++i)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000618 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100619 const std::map<int, std::vector<float>> &conf_scores = _all_confidence_scores[i];
620 const LabelBBox &decode_bboxes = _all_decode_bboxes[i];
621 for (auto &it : _all_indices[i])
Isabella Gottardi05e56442018-11-16 11:26:52 +0000622 {
623 const int label = it.first;
624 const std::vector<float> &scores = conf_scores.find(label)->second;
625 const int loc_label = _info.share_location() ? -1 : label;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100626 if (conf_scores.find(label) == conf_scores.end() || decode_bboxes.find(loc_label) == decode_bboxes.end())
Isabella Gottardi05e56442018-11-16 11:26:52 +0000627 {
628 // Either if there are no confidence predictions
629 // or there are no location predictions for current label.
Michalis Spyrou7c60c992019-10-10 14:33:47 +0100630 ARM_COMPUTE_ERROR_VAR("Could not find predictions for the label %d.", label);
Isabella Gottardi05e56442018-11-16 11:26:52 +0000631 }
Isabella Gottardia7acb3c2019-01-08 13:48:44 +0000632 const std::vector<BBox> &bboxes = decode_bboxes.find(loc_label)->second;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100633 const std::vector<int> &indices = it.second;
Isabella Gottardi05e56442018-11-16 11:26:52 +0000634
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100635 for (auto idx : indices)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000636 {
637 *(reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(count * 7)))) = i;
638 *(reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(count * 7 + 1)))) = label;
639 *(reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(count * 7 + 2)))) = scores[idx];
640 *(reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(count * 7 + 3)))) = bboxes[idx][0];
641 *(reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(count * 7 + 4)))) = bboxes[idx][1];
642 *(reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(count * 7 + 5)))) = bboxes[idx][2];
643 *(reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(count * 7 + 6)))) = bboxes[idx][3];
644
645 ++count;
646 }
647 }
648 }
649}
Pablo Telloe96e4f02018-12-21 16:47:23 +0000650} // namespace arm_compute