blob: 9d6273338469b4bb8332644e6afdaa5e8d696ced [file] [log] [blame]
Isabella Gottardi05e56442018-11-16 11:26:52 +00001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2018-2020 Arm Limited.
Isabella Gottardi05e56442018-11-16 11:26:52 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/runtime/CPP/functions/CPPDetectionOutputLayer.h"
25
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/Validate.h"
Isabella Gottardi05e56442018-11-16 11:26:52 +000029
30#include <list>
31
32namespace arm_compute
33{
34namespace
35{
Isabella Gottardi883bad72019-07-15 17:33:07 +010036Status validate_arguments(const ITensorInfo *input_loc, const ITensorInfo *input_conf, const ITensorInfo *input_priorbox, const ITensorInfo *output, DetectionOutputLayerInfo info)
Isabella Gottardi05e56442018-11-16 11:26:52 +000037{
38 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input_loc, input_conf, input_priorbox, output);
39 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_loc, 1, DataType::F32);
40 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input_loc, input_conf, input_priorbox);
41 ARM_COMPUTE_RETURN_ERROR_ON_MSG(input_loc->num_dimensions() > 2, "The location input tensor should be [C1, N].");
42 ARM_COMPUTE_RETURN_ERROR_ON_MSG(input_conf->num_dimensions() > 2, "The location input tensor should be [C2, N].");
43 ARM_COMPUTE_RETURN_ERROR_ON_MSG(input_priorbox->num_dimensions() > 3, "The priorbox input tensor should be [C3, 2, N].");
44
45 ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.eta() <= 0.f && info.eta() > 1.f, "Eta should be between 0 and 1");
46
47 const int num_priors = input_priorbox->tensor_shape()[0] / 4;
48 ARM_COMPUTE_RETURN_ERROR_ON_MSG(static_cast<size_t>((num_priors * info.num_loc_classes() * 4)) != input_loc->tensor_shape()[0], "Number of priors must match number of location predictions.");
49 ARM_COMPUTE_RETURN_ERROR_ON_MSG(static_cast<size_t>((num_priors * info.num_classes())) != input_conf->tensor_shape()[0], "Number of priors must match number of confidence predictions.");
50
51 // Validate configured output
52 if(output->total_size() != 0)
53 {
54 const unsigned int max_size = info.keep_top_k() * (input_loc->num_dimensions() > 1 ? input_loc->dimension(1) : 1);
55 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), TensorShape(7U, max_size));
56 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input_loc, output);
57 }
58
59 return Status{};
60}
61
62/** Function used to sort pair<float, T> in descend order based on the score (first) value.
63 */
64template <typename T>
65bool SortScorePairDescend(const std::pair<float, T> &pair1,
66 const std::pair<float, T> &pair2)
67{
68 return pair1.first > pair2.first;
69}
70
71/** Get location predictions from input_loc.
72 *
73 * @param[in] input_loc The input location prediction.
74 * @param[in] num The number of images.
75 * @param[in] num_priors number of predictions per class.
76 * @param[in] num_loc_classes number of location classes. It is 1 if share_location is true,
77 * and is equal to number of classes needed to predict otherwise.
78 * @param[in] share_location If true, all classes share the same location prediction.
79 * @param[out] all_location_predictions All the location predictions.
80 *
81 */
82void retrieve_all_loc_predictions(const ITensor *input_loc, const int num,
83 const int num_priors, const int num_loc_classes,
84 const bool share_location, std::vector<LabelBBox> &all_location_predictions)
85{
86 for(int i = 0; i < num; ++i)
87 {
88 for(int c = 0; c < num_loc_classes; ++c)
89 {
90 int label = share_location ? -1 : c;
91 if(all_location_predictions[i].find(label) == all_location_predictions[i].end())
92 {
93 all_location_predictions[i][label].resize(num_priors);
94 }
95 else
96 {
97 ARM_COMPUTE_ERROR_ON(all_location_predictions[i][label].size() != static_cast<size_t>(num_priors));
98 break;
99 }
100 }
101 }
102 for(int i = 0; i < num; ++i)
103 {
104 for(int p = 0; p < num_priors; ++p)
105 {
106 for(int c = 0; c < num_loc_classes; ++c)
107 {
108 const int label = share_location ? -1 : c;
109 const int base_ptr = i * num_priors * num_loc_classes * 4 + p * num_loc_classes * 4 + c * 4;
110 //xmin, ymin, xmax, ymax
111 all_location_predictions[i][label][p][0] = *reinterpret_cast<float *>(input_loc->ptr_to_element(Coordinates(base_ptr)));
112 all_location_predictions[i][label][p][1] = *reinterpret_cast<float *>(input_loc->ptr_to_element(Coordinates(base_ptr + 1)));
113 all_location_predictions[i][label][p][2] = *reinterpret_cast<float *>(input_loc->ptr_to_element(Coordinates(base_ptr + 2)));
114 all_location_predictions[i][label][p][3] = *reinterpret_cast<float *>(input_loc->ptr_to_element(Coordinates(base_ptr + 3)));
115 }
116 }
117 }
118}
119
120/** Get confidence predictions from input_conf.
121 *
122 * @param[in] input_loc The input location prediction.
123 * @param[in] num The number of images.
124 * @param[in] num_priors Number of predictions per class.
125 * @param[in] num_loc_classes Number of location classes. It is 1 if share_location is true,
126 * and is equal to number of classes needed to predict otherwise.
127 * @param[out] all_location_predictions All the location predictions.
128 *
129 */
130void retrieve_all_conf_scores(const ITensor *input_conf, const int num,
131 const int num_priors, const int num_classes,
132 std::vector<std::map<int, std::vector<float>>> &all_confidence_scores)
133{
134 std::vector<float> tmp_buffer;
135 tmp_buffer.resize(num * num_priors * num_classes);
136 for(int i = 0; i < num; ++i)
137 {
138 for(int c = 0; c < num_classes; ++c)
139 {
140 for(int p = 0; p < num_priors; ++p)
141 {
142 tmp_buffer[i * num_classes * num_priors + c * num_priors + p] =
143 *reinterpret_cast<float *>(input_conf->ptr_to_element(Coordinates(i * num_classes * num_priors + p * num_classes + c)));
144 }
145 }
146 }
147 for(int i = 0; i < num; ++i)
148 {
149 for(int c = 0; c < num_classes; ++c)
150 {
151 all_confidence_scores[i][c].resize(num_priors);
152 all_confidence_scores[i][c].assign(&tmp_buffer[i * num_classes * num_priors + c * num_priors],
153 &tmp_buffer[i * num_classes * num_priors + c * num_priors + num_priors]);
154 }
155 }
156}
157
158/** Get prior boxes from input_priorbox.
159 *
160 * @param[in] input_priorbox The input location prediction.
161 * @param[in] num_priors Number of priors.
162 * @param[in] num_loc_classes number of location classes. It is 1 if share_location is true,
163 * and is equal to number of classes needed to predict otherwise.
164 * @param[out] all_prior_bboxes If true, all classes share the same location prediction.
165 * @param[out] all_location_predictions All the location predictions.
166 *
167 */
Isabella Gottardia7acb3c2019-01-08 13:48:44 +0000168void retrieve_all_priorbox(const ITensor *input_priorbox,
169 const int num_priors,
170 std::vector<BBox> &all_prior_bboxes,
Isabella Gottardi05e56442018-11-16 11:26:52 +0000171 std::vector<std::array<float, 4>> &all_prior_variances)
172{
173 for(int i = 0; i < num_priors; ++i)
174 {
Georgios Pinitasd57891a2019-02-19 18:10:03 +0000175 all_prior_bboxes[i] =
176 {
177 {
178 *reinterpret_cast<float *>(input_priorbox->ptr_to_element(Coordinates(i * 4))),
179 *reinterpret_cast<float *>(input_priorbox->ptr_to_element(Coordinates(i * 4 + 1))),
180 *reinterpret_cast<float *>(input_priorbox->ptr_to_element(Coordinates(i * 4 + 2))),
181 *reinterpret_cast<float *>(input_priorbox->ptr_to_element(Coordinates(i * 4 + 3)))
182 }
183 };
Isabella Gottardi05e56442018-11-16 11:26:52 +0000184 }
185
Georgios Pinitasd57891a2019-02-19 18:10:03 +0000186 std::array<float, 4> var({ { 0, 0, 0, 0 } });
Isabella Gottardi05e56442018-11-16 11:26:52 +0000187 for(int i = 0; i < num_priors; ++i)
188 {
189 for(int j = 0; j < 4; ++j)
190 {
191 var[j] = *reinterpret_cast<float *>(input_priorbox->ptr_to_element(Coordinates((num_priors + i) * 4 + j)));
192 }
193 all_prior_variances[i] = var;
194 }
195}
196
197/** Decode a bbox according to a prior bbox.
198 *
199 * @param[in] prior_bbox The input prior bounding boxes.
200 * @param[in] prior_variance The corresponding input variance.
201 * @param[in] code_type The detection output code type used to decode the results.
202 * @param[in] variance_encoded_in_target If true, the variance is encoded in target.
203 * @param[in] clip_bbox If true, the results should be between 0.f and 1.f.
204 * @param[in] bbox The input bbox to decode
205 * @param[out] decode_bbox The decoded bboxes.
206 *
207 */
Isabella Gottardia7acb3c2019-01-08 13:48:44 +0000208void DecodeBBox(const BBox &prior_bbox, const std::array<float, 4> &prior_variance,
Isabella Gottardi05e56442018-11-16 11:26:52 +0000209 const DetectionOutputLayerCodeType code_type, const bool variance_encoded_in_target,
Isabella Gottardia7acb3c2019-01-08 13:48:44 +0000210 const bool clip_bbox, const BBox &bbox, BBox &decode_bbox)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000211{
212 // if the variance is encoded in target, we simply need to add the offset predictions
213 // otherwise we need to scale the offset accordingly.
214 switch(code_type)
215 {
216 case DetectionOutputLayerCodeType::CORNER:
217 {
218 decode_bbox[0] = prior_bbox[0] + (variance_encoded_in_target ? bbox[0] : prior_variance[0] * bbox[0]);
219 decode_bbox[1] = prior_bbox[1] + (variance_encoded_in_target ? bbox[1] : prior_variance[1] * bbox[1]);
220 decode_bbox[2] = prior_bbox[2] + (variance_encoded_in_target ? bbox[2] : prior_variance[2] * bbox[2]);
221 decode_bbox[3] = prior_bbox[3] + (variance_encoded_in_target ? bbox[3] : prior_variance[3] * bbox[3]);
222
223 break;
224 }
225 case DetectionOutputLayerCodeType::CENTER_SIZE:
226 {
227 const float prior_width = prior_bbox[2] - prior_bbox[0];
228 const float prior_height = prior_bbox[3] - prior_bbox[1];
229
230 // Check if the prior width and height are right
231 ARM_COMPUTE_ERROR_ON(prior_width <= 0.f);
232 ARM_COMPUTE_ERROR_ON(prior_height <= 0.f);
233
234 const float prior_center_x = (prior_bbox[0] + prior_bbox[2]) / 2.;
235 const float prior_center_y = (prior_bbox[1] + prior_bbox[3]) / 2.;
236
237 const float decode_bbox_center_x = (variance_encoded_in_target ? bbox[0] : prior_variance[0] * bbox[0]) * prior_width + prior_center_x;
238 const float decode_bbox_center_y = (variance_encoded_in_target ? bbox[1] : prior_variance[1] * bbox[1]) * prior_height + prior_center_y;
239 const float decode_bbox_width = (variance_encoded_in_target ? std::exp(bbox[2]) : std::exp(prior_variance[2] * bbox[2])) * prior_width;
240 const float decode_bbox_height = (variance_encoded_in_target ? std::exp(bbox[3]) : std::exp(prior_variance[3] * bbox[3])) * prior_height;
241
242 decode_bbox[0] = (decode_bbox_center_x - decode_bbox_width / 2.f);
243 decode_bbox[1] = (decode_bbox_center_y - decode_bbox_height / 2.f);
244 decode_bbox[2] = (decode_bbox_center_x + decode_bbox_width / 2.f);
245 decode_bbox[3] = (decode_bbox_center_y + decode_bbox_height / 2.f);
246
247 break;
248 }
249 case DetectionOutputLayerCodeType::CORNER_SIZE:
250 {
251 const float prior_width = prior_bbox[2] - prior_bbox[0];
252 const float prior_height = prior_bbox[3] - prior_bbox[1];
253
254 // Check if the prior width and height are greater than 0
255 ARM_COMPUTE_ERROR_ON(prior_width <= 0.f);
256 ARM_COMPUTE_ERROR_ON(prior_height <= 0.f);
257
258 decode_bbox[0] = prior_bbox[0] + (variance_encoded_in_target ? bbox[0] : prior_variance[0] * bbox[0]) * prior_width;
259 decode_bbox[1] = prior_bbox[1] + (variance_encoded_in_target ? bbox[1] : prior_variance[1] * bbox[1]) * prior_height;
260 decode_bbox[2] = prior_bbox[2] + (variance_encoded_in_target ? bbox[2] : prior_variance[2] * bbox[2]) * prior_width;
261 decode_bbox[3] = prior_bbox[3] + (variance_encoded_in_target ? bbox[3] : prior_variance[3] * bbox[3]) * prior_height;
262
263 break;
264 }
265 default:
266 ARM_COMPUTE_ERROR("Unsupported Detection Output Code Type.");
267 }
268
269 if(clip_bbox)
270 {
271 for(auto &d_bbox : decode_bbox)
272 {
273 d_bbox = utility::clamp(d_bbox, 0.f, 1.f);
274 }
275 }
276}
277
278/** Do non maximum suppression given bboxes and scores.
279 *
280 * @param[in] bboxes The input bounding boxes.
281 * @param[in] scores The corresponding input confidence.
282 * @param[in] score_threshold The threshold used to filter detection results.
283 * @param[in] nms_threshold The threshold used in non maximum suppression.
284 * @param[in] eta Adaptation rate for nms threshold.
285 * @param[in] top_k If not -1, keep at most top_k picked indices.
286 * @param[out] indices The kept indices of bboxes after nms.
287 *
288 */
Isabella Gottardia7acb3c2019-01-08 13:48:44 +0000289void ApplyNMSFast(const std::vector<BBox> &bboxes,
Isabella Gottardi05e56442018-11-16 11:26:52 +0000290 const std::vector<float> &scores, const float score_threshold,
291 const float nms_threshold, const float eta, const int top_k,
292 std::vector<int> &indices)
293{
294 ARM_COMPUTE_ERROR_ON_MSG(bboxes.size() != scores.size(), "bboxes and scores have different size.");
295
296 // Get top_k scores (with corresponding indices).
297 std::list<std::pair<float, int>> score_index_vec;
298
299 // Generate index score pairs.
300 for(size_t i = 0; i < scores.size(); ++i)
301 {
302 if(scores[i] > score_threshold)
303 {
304 score_index_vec.emplace_back(std::make_pair(scores[i], i));
305 }
306 }
307
308 // Sort the score pair according to the scores in descending order
309 score_index_vec.sort(SortScorePairDescend<int>);
310
311 // Keep top_k scores if needed.
312 const int score_index_vec_size = score_index_vec.size();
313 if(top_k > -1 && top_k < score_index_vec_size)
314 {
315 score_index_vec.resize(top_k);
316 }
317
318 // Do nms.
319 float adaptive_threshold = nms_threshold;
320 indices.clear();
321
322 while(!score_index_vec.empty())
323 {
324 const int idx = score_index_vec.front().second;
325 bool keep = true;
326 for(int kept_idx : indices)
327 {
328 if(keep)
329 {
330 // Compute the jaccard (intersection over union IoU) overlap between two bboxes.
Isabella Gottardia7acb3c2019-01-08 13:48:44 +0000331 BBox intersect_bbox = std::array<float, 4>({ 0, 0, 0, 0 });
Isabella Gottardi05e56442018-11-16 11:26:52 +0000332 if(bboxes[kept_idx][0] > bboxes[idx][2] || bboxes[kept_idx][2] < bboxes[idx][0] || bboxes[kept_idx][1] > bboxes[idx][3] || bboxes[kept_idx][3] < bboxes[idx][1])
333 {
Georgios Pinitasd57891a2019-02-19 18:10:03 +0000334 intersect_bbox = std::array<float, 4>({ { 0, 0, 0, 0 } });
Isabella Gottardi05e56442018-11-16 11:26:52 +0000335 }
336 else
337 {
Georgios Pinitasd57891a2019-02-19 18:10:03 +0000338 intersect_bbox = std::array<float, 4>({ {
339 std::max(bboxes[idx][0], bboxes[kept_idx][0]),
340 std::max(bboxes[idx][1], bboxes[kept_idx][1]),
341 std::min(bboxes[idx][2], bboxes[kept_idx][2]),
342 std::min(bboxes[idx][3], bboxes[kept_idx][3])
343 }
344 });
Isabella Gottardi05e56442018-11-16 11:26:52 +0000345 }
346
347 float intersect_width = intersect_bbox[2] - intersect_bbox[0];
348 float intersect_height = intersect_bbox[3] - intersect_bbox[1];
349
350 float overlap = 0.f;
351 if(intersect_width > 0 && intersect_height > 0)
352 {
353 float intersect_size = intersect_width * intersect_height;
354 float bbox1_size = (bboxes[idx][2] < bboxes[idx][0]
355 || bboxes[idx][3] < bboxes[idx][1]) ?
356 0.f :
357 (bboxes[idx][2] - bboxes[idx][0]) * (bboxes[idx][3] - bboxes[idx][1]); //BBoxSize(bboxes[idx]);
358 float bbox2_size = (bboxes[kept_idx][2] < bboxes[kept_idx][0]
359 || bboxes[kept_idx][3] < bboxes[kept_idx][1]) ?
360 0.f :
361 (bboxes[kept_idx][2] - bboxes[kept_idx][0]) * (bboxes[kept_idx][3] - bboxes[kept_idx][1]); // BBoxSize(bboxes[kept_idx]);
362 overlap = intersect_size / (bbox1_size + bbox2_size - intersect_size);
363 }
364 keep = (overlap <= adaptive_threshold);
365 }
366 else
367 {
368 break;
369 }
370 }
371 if(keep)
372 {
373 indices.push_back(idx);
374 }
375 score_index_vec.erase(score_index_vec.begin());
Pablo Telloe96e4f02018-12-21 16:47:23 +0000376 if(keep && eta < 1.f && adaptive_threshold > 0.5f)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000377 {
378 adaptive_threshold *= eta;
379 }
380 }
381}
382} // namespace
383
384CPPDetectionOutputLayer::CPPDetectionOutputLayer()
385 : _input_loc(nullptr), _input_conf(nullptr), _input_priorbox(nullptr), _output(nullptr), _info(), _num_priors(), _num(), _all_location_predictions(), _all_confidence_scores(), _all_prior_bboxes(),
386 _all_prior_variances(), _all_decode_bboxes(), _all_indices()
387{
388}
389
390void CPPDetectionOutputLayer::configure(const ITensor *input_loc, const ITensor *input_conf, const ITensor *input_priorbox, ITensor *output, DetectionOutputLayerInfo info)
391{
392 ARM_COMPUTE_ERROR_ON_NULLPTR(input_loc, input_conf, input_priorbox, output);
393 // Output auto initialization if not yet initialized
394 // Since the number of bboxes to kept is unknown before nms, the shape is set to the maximum
395 // The maximum is keep_top_k * input_loc_size[1]
396 // Each row is a 7 dimension std::vector, which stores [image_id, label, confidence, xmin, ymin, xmax, ymax]
397 const unsigned int max_size = info.keep_top_k() * (input_loc->info()->num_dimensions() > 1 ? input_loc->info()->dimension(1) : 1);
398 auto_init_if_empty(*output->info(), input_loc->info()->clone()->set_tensor_shape(TensorShape(7U, max_size)));
399
400 // Perform validation step
Isabella Gottardi883bad72019-07-15 17:33:07 +0100401 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input_loc->info(), input_conf->info(), input_priorbox->info(), output->info(), info));
Isabella Gottardi05e56442018-11-16 11:26:52 +0000402
403 _input_loc = input_loc;
404 _input_conf = input_conf;
405 _input_priorbox = input_priorbox;
406 _output = output;
407 _info = info;
408 _num_priors = input_priorbox->info()->dimension(0) / 4;
409 _num = (_input_loc->info()->num_dimensions() > 1 ? _input_loc->info()->dimension(1) : 1);
410
411 _all_location_predictions.resize(_num);
412 _all_confidence_scores.resize(_num);
413 _all_prior_bboxes.resize(_num_priors);
414 _all_prior_variances.resize(_num_priors);
415 _all_decode_bboxes.resize(_num);
416
417 for(int i = 0; i < _num; ++i)
418 {
419 for(int c = 0; c < _info.num_loc_classes(); ++c)
420 {
421 const int label = _info.share_location() ? -1 : c;
422 if(label == _info.background_label_id())
423 {
424 // Ignore background class.
425 continue;
426 }
427 _all_decode_bboxes[i][label].resize(_num_priors);
428 }
429 }
430 _all_indices.resize(_num);
431
432 Coordinates coord;
433 coord.set_num_dimensions(output->info()->num_dimensions());
434 output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
435}
436
437Status CPPDetectionOutputLayer::validate(const ITensorInfo *input_loc, const ITensorInfo *input_conf, const ITensorInfo *input_priorbox, const ITensorInfo *output, DetectionOutputLayerInfo info)
438{
Isabella Gottardi883bad72019-07-15 17:33:07 +0100439 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input_loc, input_conf, input_priorbox, output, info));
Isabella Gottardi05e56442018-11-16 11:26:52 +0000440 return Status{};
441}
442
443void CPPDetectionOutputLayer::run()
444{
445 // Retrieve all location predictions.
446 retrieve_all_loc_predictions(_input_loc, _num, _num_priors, _info.num_loc_classes(), _info.share_location(), _all_location_predictions);
447
448 // Retrieve all confidences.
449 retrieve_all_conf_scores(_input_conf, _num, _num_priors, _info.num_classes(), _all_confidence_scores);
450
451 // Retrieve all prior bboxes.
452 retrieve_all_priorbox(_input_priorbox, _num_priors, _all_prior_bboxes, _all_prior_variances);
453
454 // Decode all loc predictions to bboxes
455 const bool clip_bbox = false;
456 for(int i = 0; i < _num; ++i)
457 {
458 for(int c = 0; c < _info.num_loc_classes(); ++c)
459 {
460 const int label = _info.share_location() ? -1 : c;
461 if(label == _info.background_label_id())
462 {
463 // Ignore background class.
464 continue;
465 }
Michalis Spyrou7c60c992019-10-10 14:33:47 +0100466 ARM_COMPUTE_ERROR_ON_MSG_VAR(_all_location_predictions[i].find(label) == _all_location_predictions[i].end(), "Could not find location predictions for label %d.", label);
Isabella Gottardi05e56442018-11-16 11:26:52 +0000467
Isabella Gottardia7acb3c2019-01-08 13:48:44 +0000468 const std::vector<BBox> &label_loc_preds = _all_location_predictions[i].find(label)->second;
Isabella Gottardi05e56442018-11-16 11:26:52 +0000469
470 const int num_bboxes = _all_prior_bboxes.size();
471 ARM_COMPUTE_ERROR_ON(_all_prior_variances[i].size() != 4);
472
473 for(int j = 0; j < num_bboxes; ++j)
474 {
475 DecodeBBox(_all_prior_bboxes[j], _all_prior_variances[j], _info.code_type(), _info.variance_encoded_in_target(), clip_bbox, label_loc_preds[j], _all_decode_bboxes[i][label][j]);
476 }
477 }
478 }
479
480 int num_kept = 0;
481
482 for(int i = 0; i < _num; ++i)
483 {
484 const LabelBBox &decode_bboxes = _all_decode_bboxes[i];
485 const std::map<int, std::vector<float>> &conf_scores = _all_confidence_scores[i];
486
487 std::map<int, std::vector<int>> indices;
488 int num_det = 0;
489 for(int c = 0; c < _info.num_classes(); ++c)
490 {
491 if(c == _info.background_label_id())
492 {
493 // Ignore background class
494 continue;
495 }
496 const int label = _info.share_location() ? -1 : c;
497 if(conf_scores.find(c) == conf_scores.end() || decode_bboxes.find(label) == decode_bboxes.end())
498 {
Michalis Spyrou7c60c992019-10-10 14:33:47 +0100499 ARM_COMPUTE_ERROR_VAR("Could not find predictions for label %d.", label);
Isabella Gottardi05e56442018-11-16 11:26:52 +0000500 }
Isabella Gottardia7acb3c2019-01-08 13:48:44 +0000501 const std::vector<float> &scores = conf_scores.find(c)->second;
502 const std::vector<BBox> &bboxes = decode_bboxes.find(label)->second;
Isabella Gottardi05e56442018-11-16 11:26:52 +0000503
504 ApplyNMSFast(bboxes, scores, _info.confidence_threshold(), _info.nms_threshold(), _info.eta(), _info.top_k(), indices[c]);
505
506 num_det += indices[c].size();
507 }
508
509 int num_to_add = 0;
510 if(_info.keep_top_k() > -1 && num_det > _info.keep_top_k())
511 {
512 std::vector<std::pair<float, std::pair<int, int>>> score_index_pairs;
Michalis Spyroubcfd09a2019-05-01 13:03:59 +0100513 for(auto const &it : indices)
Isabella Gottardi05e56442018-11-16 11:26:52 +0000514 {
515 const int label = it.first;
516 const std::vector<int> &label_indices = it.second;
517
518 if(conf_scores.find(label) == conf_scores.end())
519 {
Michalis Spyrou7c60c992019-10-10 14:33:47 +0100520 ARM_COMPUTE_ERROR_VAR("Could not find predictions for label %d.", label);
Isabella Gottardi05e56442018-11-16 11:26:52 +0000521 }
522
523 const std::vector<float> &scores = conf_scores.find(label)->second;
524 for(auto idx : label_indices)
525 {
526 ARM_COMPUTE_ERROR_ON(idx > static_cast<int>(scores.size()));
Michalis Spyroubcfd09a2019-05-01 13:03:59 +0100527 score_index_pairs.emplace_back(std::make_pair(scores[idx], std::make_pair(label, idx)));
Isabella Gottardi05e56442018-11-16 11:26:52 +0000528 }
529 }
530
531 // Keep top k results per image.
532 std::sort(score_index_pairs.begin(), score_index_pairs.end(), SortScorePairDescend<std::pair<int, int>>);
533 score_index_pairs.resize(_info.keep_top_k());
534
535 // Store the new indices.
536
537 std::map<int, std::vector<int>> new_indices;
538 for(auto score_index_pair : score_index_pairs)
539 {
540 int label = score_index_pair.second.first;
541 int idx = score_index_pair.second.second;
542 new_indices[label].push_back(idx);
543 }
544 _all_indices[i] = new_indices;
545 num_to_add = _info.keep_top_k();
546 }
547 else
548 {
549 _all_indices[i] = indices;
550 num_to_add = num_det;
551 }
552 num_kept += num_to_add;
553 }
554
555 //Update the valid region of the ouput to mark the exact number of detection
556 _output->info()->set_valid_region(ValidRegion(Coordinates(0, 0), TensorShape(7, num_kept)));
557
558 int count = 0;
559 for(int i = 0; i < _num; ++i)
560 {
561 const std::map<int, std::vector<float>> &conf_scores = _all_confidence_scores[i];
562 const LabelBBox &decode_bboxes = _all_decode_bboxes[i];
563 for(auto &it : _all_indices[i])
564 {
565 const int label = it.first;
566 const std::vector<float> &scores = conf_scores.find(label)->second;
567 const int loc_label = _info.share_location() ? -1 : label;
568 if(conf_scores.find(label) == conf_scores.end() || decode_bboxes.find(loc_label) == decode_bboxes.end())
569 {
570 // Either if there are no confidence predictions
571 // or there are no location predictions for current label.
Michalis Spyrou7c60c992019-10-10 14:33:47 +0100572 ARM_COMPUTE_ERROR_VAR("Could not find predictions for the label %d.", label);
Isabella Gottardi05e56442018-11-16 11:26:52 +0000573 }
Isabella Gottardia7acb3c2019-01-08 13:48:44 +0000574 const std::vector<BBox> &bboxes = decode_bboxes.find(loc_label)->second;
575 const std::vector<int> &indices = it.second;
Isabella Gottardi05e56442018-11-16 11:26:52 +0000576
577 for(auto idx : indices)
578 {
579 *(reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(count * 7)))) = i;
580 *(reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(count * 7 + 1)))) = label;
581 *(reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(count * 7 + 2)))) = scores[idx];
582 *(reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(count * 7 + 3)))) = bboxes[idx][0];
583 *(reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(count * 7 + 4)))) = bboxes[idx][1];
584 *(reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(count * 7 + 5)))) = bboxes[idx][2];
585 *(reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(count * 7 + 6)))) = bboxes[idx][3];
586
587 ++count;
588 }
589 }
590 }
591}
Pablo Telloe96e4f02018-12-21 16:47:23 +0000592} // namespace arm_compute