Blame - src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp - ml/ComputeLibrary

blob: 89413fcca4a5f1fb964e42a9028dec83f2e2ffd2 [file] [log] [blame]

Michalis Spyrou	2709d61	2018-09-19 09:46:47 +0100	[diff] [blame]	1	/*
				2	* Copyright (c) 2018 ARM Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h"
				25
				26	#include "arm_compute/core/Error.h"
				27	#include "arm_compute/core/Helpers.h"
				28
				29	#include <algorithm>
				30	#include <cmath>
				31
				32	namespace arm_compute
				33	{
				34	namespace
				35	{
				36	template <typename T>
				37	std::vector<int> SoftNMS(const ITensor *proposals, std::vector<std::vector<T>> &scores_in, std::vector<int> inds, const BoxNMSLimitInfo &info, int class_id)
				38	{
				39	std::vector<int> keep;
				40	const int proposals_width = proposals->info()->dimension(1);
				41
				42	std::vector<T> x1(proposals_width);
				43	std::vector<T> y1(proposals_width);
				44	std::vector<T> x2(proposals_width);
				45	std::vector<T> y2(proposals_width);
				46	std::vector<T> areas(proposals_width);
				47
				48	for(int i = 0; i < proposals_width; ++i)
				49	{
				50	x1[i] = reinterpret_cast<T >(proposals->ptr_to_element(Coordinates(class_id * 4, i)));
				51	y1[i] = reinterpret_cast<T >(proposals->ptr_to_element(Coordinates(class_id * 4 + 1, i)));
				52	x2[i] = reinterpret_cast<T >(proposals->ptr_to_element(Coordinates(class_id * 4 + 2, i)));
				53	y2[i] = reinterpret_cast<T >(proposals->ptr_to_element(Coordinates(class_id * 4 + 3, i)));
				54	areas[i] = (x2[i] - x1[i] + 1.0) * (y2[i] - y1[i] + 1.0);
				55	}
				56
				57	// Note: Soft NMS scores have already been initialize with input scores
				58
				59	while(!inds.empty())
				60	{
				61	// Find proposal with max score among remaining proposals
				62	int max_pos = 0;
				63	for(unsigned int i = 1; i < inds.size(); ++i)
				64	{
				65	if(scores_in[class_id][inds.at(i)] > scores_in[class_id][inds.at(max_pos)])
				66	{
				67	max_pos = i;
				68	}
				69	}
				70	int element = inds.at(max_pos);
				71	keep.push_back(element);
				72	std::swap(inds.at(0), inds.at(max_pos));
				73
				74	// Remove first element and compute IoU of the remaining boxes with identified max box
				75	inds.erase(inds.begin());
				76
				77	std::vector<int> sorted_indices_temp;
				78	for(auto idx : inds)
				79	{
				80	const auto xx1 = std::max(x1[idx], x1[element]);
				81	const auto yy1 = std::max(y1[idx], y1[element]);
				82	const auto xx2 = std::min(x2[idx], x2[element]);
				83	const auto yy2 = std::min(y2[idx], y2[element]);
				84
				85	const auto w = std::max((xx2 - xx1 + 1.f), 0.f);
				86	const auto h = std::max((yy2 - yy1 + 1.f), 0.f);
				87	const auto inter = w * h;
				88	const auto ovr = inter / (areas[element] + areas[idx] - inter);
				89
				90	// Update scores based on computed IoU, overlap threshold and NMS method
				91	T weight;
				92	switch(info.soft_nms_method())
				93	{
				94	case NMSType::LINEAR:
				95	weight = (ovr > info.nms()) ? (1.f - ovr) : 1.f;
				96	break;
				97	case NMSType::GAUSSIAN: // Gaussian
				98	weight = std::exp(-1.f * ovr * ovr / info.soft_nms_sigma());
				99	break;
				100	case NMSType::ORIGINAL: // Original NMS
				101	weight = (ovr > info.nms()) ? 0.f : 1.f;
				102	break;
				103	default:
				104	ARM_COMPUTE_ERROR("Not supported");
				105	}
				106
				107	// Discard boxes with new scores below min threshold and update pending indices
				108	scores_in[class_id][idx] *= weight;
				109	if(scores_in[class_id][idx] >= info.soft_nms_min_score_thres())
				110	{
				111	sorted_indices_temp.push_back(idx);
				112	}
				113	}
				114	inds = sorted_indices_temp;
				115	}
				116
				117	return keep;
				118	}
				119
				120	template <typename T>
				121	std::vector<int> NonMaximaSuppression(const ITensor *proposals, std::vector<int> sorted_indices, const BoxNMSLimitInfo &info, int class_id)
				122	{
				123	std::vector<int> keep;
				124
				125	const int proposals_width = proposals->info()->dimension(1);
				126
				127	std::vector<T> x1(proposals_width);
				128	std::vector<T> y1(proposals_width);
				129	std::vector<T> x2(proposals_width);
				130	std::vector<T> y2(proposals_width);
				131	std::vector<T> areas(proposals_width);
				132
				133	for(int i = 0; i < proposals_width; ++i)
				134	{
				135	x1[i] = reinterpret_cast<T >(proposals->ptr_to_element(Coordinates(class_id * 4, i)));
				136	y1[i] = reinterpret_cast<T >(proposals->ptr_to_element(Coordinates(class_id * 4 + 1, i)));
				137	x2[i] = reinterpret_cast<T >(proposals->ptr_to_element(Coordinates(class_id * 4 + 2, i)));
				138	y2[i] = reinterpret_cast<T >(proposals->ptr_to_element(Coordinates(class_id * 4 + 3, i)));
				139	areas[i] = (x2[i] - x1[i] + 1.0) * (y2[i] - y1[i] + 1.0);
				140	}
				141
				142	while(!sorted_indices.empty())
				143	{
				144	int i = sorted_indices.at(0);
				145	keep.push_back(i);
				146
				147	std::vector<int> sorted_indices_temp = sorted_indices;
				148	std::vector<int> new_indices;
				149	sorted_indices_temp.erase(sorted_indices_temp.begin());
				150
				151	for(unsigned int j = 0; j < sorted_indices_temp.size(); ++j)
				152	{
				153	const auto xx1 = std::max(x1[sorted_indices_temp.at(j)], x1[i]);
				154	const auto yy1 = std::max(y1[sorted_indices_temp.at(j)], y1[i]);
				155	const auto xx2 = std::min(x2[sorted_indices_temp.at(j)], x2[i]);
				156	const auto yy2 = std::min(y2[sorted_indices_temp.at(j)], y2[i]);
				157
				158	const auto w = std::max((xx2 - xx1 + 1.f), 0.f);
				159	const auto h = std::max((yy2 - yy1 + 1.f), 0.f);
				160	const auto inter = w * h;
				161	const auto ovr = inter / (areas[i] + areas[sorted_indices_temp.at(j)] - inter);
				162
				163	if(ovr <= info.nms())
				164	{
				165	new_indices.push_back(j);
				166	}
				167	}
				168
				169	const unsigned int new_indices_size = new_indices.size();
				170	std::vector<int> new_sorted_indices(new_indices_size);
				171	for(unsigned int i = 0; i < new_indices_size; ++i)
				172	{
				173	new_sorted_indices[i] = sorted_indices[new_indices[i] + 1];
				174	}
				175	sorted_indices = new_sorted_indices;
				176	}
				177
				178	return keep;
				179	}
				180	} // namespace
				181
				182	CPPBoxWithNonMaximaSuppressionLimitKernel::CPPBoxWithNonMaximaSuppressionLimitKernel()
				183	: _scores_in(nullptr), _boxes_in(nullptr), _batch_splits_in(nullptr), _scores_out(nullptr), _boxes_out(nullptr), _classes(nullptr), _batch_splits_out(nullptr), _keeps(nullptr), _keeps_size(nullptr),
				184	_info()
				185	{
				186	}
				187
				188	bool CPPBoxWithNonMaximaSuppressionLimitKernel::is_parallelisable() const
				189	{
				190	return false;
				191	}
				192
				193	template <typename T>
				194	void CPPBoxWithNonMaximaSuppressionLimitKernel::run_nmslimit()
				195	{
				196	const int batch_size = _batch_splits_in == nullptr ? 1 : _batch_splits_in->info()->dimension(0);
				197	const int num_classes = _scores_in->info()->dimension(0);
				198	const int scores_count = _scores_in->info()->dimension(1);
				199	std::vector<int> total_keep_per_batch(batch_size);
				200	std::vector<std::vector<int>> keeps(num_classes);
				201	int total_keep_count = 0;
				202
				203	std::vector<std::vector<T>> in_scores(num_classes, std::vector<T>(scores_count));
				204	for(int i = 0; i < scores_count; ++i)
				205	{
				206	for(int j = 0; j < num_classes; ++j)
				207	{
				208	in_scores[j][i] = reinterpret_cast<const T >(_scores_in->ptr_to_element(Coordinates(j, i)));
				209	}
				210	}
				211
				212	int offset = 0;
				213	int cur_start_idx = 0;
				214	for(int b = 0; b < batch_size; ++b)
				215	{
				216	const int num_boxes = _batch_splits_in == nullptr ? 1 : static_cast<int>(reinterpret_cast<T >(_batch_splits_in->ptr_to_element(Coordinates(b))));
				217	// Skip first class
				218	for(int j = 1; j < num_classes; ++j)
				219	{
				220	std::vector<T> cur_scores(scores_count);
				221	std::vector<int> inds;
				222	for(int i = 0; i < scores_count; ++i)
				223	{
				224	const T score = in_scores[j][i];
				225	cur_scores[i] = score;
				226
				227	if(score > _info.score_thresh())
				228	{
				229	inds.push_back(i);
				230	}
				231	}
				232	if(_info.soft_nms_enabled())
				233	{
				234	keeps[j] = SoftNMS(_boxes_in, in_scores, inds, _info, j);
				235	}
				236	else
				237	{
				238	std::sort(inds.data(), inds.data() + inds.size(),
				239	[&cur_scores](int lhs, int rhs)
				240	{
				241	return cur_scores[lhs] > cur_scores[rhs];
				242	});
				243
				244	keeps[j] = NonMaximaSuppression<T>(_boxes_in, inds, _info, j);
				245	}
				246	total_keep_count += keeps[j].size();
				247	}
				248
				249	if(_info.detections_per_im() > 0 && total_keep_count > _info.detections_per_im())
				250	{
				251	// merge all scores (represented by indices) together and sort
				252	auto get_all_scores_sorted = [&in_scores, &keeps, total_keep_count]()
				253	{
				254	std::vector<T> ret(total_keep_count);
				255
				256	int ret_idx = 0;
				257	for(unsigned int i = 1; i < keeps.size(); ++i)
				258	{
				259	auto &cur_keep = keeps[i];
				260	for(auto &ckv : cur_keep)
				261	{
				262	ret[ret_idx++] = in_scores[i][ckv];
				263	}
				264	}
				265
				266	std::sort(ret.data(), ret.data() + ret.size());
				267
				268	return ret;
				269	};
				270
				271	auto all_scores_sorted = get_all_scores_sorted();
				272	const T image_thresh = all_scores_sorted[all_scores_sorted.size() - _info.detections_per_im()];
				273	for(int j = 1; j < num_classes; ++j)
				274	{
				275	auto &cur_keep = keeps[j];
				276	std::vector<int> new_keeps_j;
				277	for(auto &k : cur_keep)
				278	{
				279	if(in_scores[j][k] >= image_thresh)
				280	{
				281	new_keeps_j.push_back(k);
				282	}
				283	}
				284	keeps[j] = new_keeps_j;
				285	}
				286	total_keep_count = _info.detections_per_im();
				287	}
				288
				289	total_keep_per_batch[b] = total_keep_count;
				290
				291	// Write results
				292	int cur_out_idx = 0;
				293	for(int j = 1; j < num_classes; ++j)
				294	{
				295	auto &cur_keep = keeps[j];
				296	auto cur_out_scores = reinterpret_cast<T *>(_scores_out->ptr_to_element(Coordinates(cur_start_idx + cur_out_idx)));
				297	auto cur_out_classes = reinterpret_cast<T *>(_classes->ptr_to_element(Coordinates(cur_start_idx + cur_out_idx)));
				298	const int box_column = (cur_start_idx + cur_out_idx) * 4;
				299
				300	for(unsigned int k = 0; k < cur_keep.size(); ++k)
				301	{
				302	cur_out_scores[k] = in_scores[j][cur_keep[k]];
				303	cur_out_classes[k] = static_cast<T>(j);
				304	auto cur_out_box_row0 = reinterpret_cast<T *>(_boxes_out->ptr_to_element(Coordinates(box_column + 0, k)));
				305	auto cur_out_box_row1 = reinterpret_cast<T *>(_boxes_out->ptr_to_element(Coordinates(box_column + 1, k)));
				306	auto cur_out_box_row2 = reinterpret_cast<T *>(_boxes_out->ptr_to_element(Coordinates(box_column + 2, k)));
				307	auto cur_out_box_row3 = reinterpret_cast<T *>(_boxes_out->ptr_to_element(Coordinates(box_column + 3, k)));
				308	cur_out_box_row0 = reinterpret_cast<const T >(_boxes_in->ptr_to_element(Coordinates(j 4 + 0, cur_keep[k])));
				309	cur_out_box_row1 = reinterpret_cast<const T >(_boxes_in->ptr_to_element(Coordinates(j 4 + 1, cur_keep[k])));
				310	cur_out_box_row2 = reinterpret_cast<const T >(_boxes_in->ptr_to_element(Coordinates(j 4 + 2, cur_keep[k])));
				311	cur_out_box_row3 = reinterpret_cast<const T >(_boxes_in->ptr_to_element(Coordinates(j 4 + 3, cur_keep[k])));
				312	}
				313
				314	cur_out_idx += cur_keep.size();
				315	}
				316
				317	if(_keeps != nullptr)
				318	{
				319	cur_out_idx = 0;
				320	for(int j = 0; j < num_classes; ++j)
				321	{
				322	for(unsigned int i = 0; i < keeps[j].size(); ++i)
				323	{
				324	reinterpret_cast<T >(_keeps->ptr_to_element(Coordinates(cur_start_idx + cur_out_idx + i))) = static_cast<T>(keeps[j].at(i));
				325	}
				326	reinterpret_cast<T >(_keeps_size->ptr_to_element(Coordinates(j + b * num_classes))) = static_cast<T>(keeps[j].size());
				327	cur_out_idx += keeps[j].size();
				328	}
				329	}
				330
				331	offset += num_boxes;
				332	cur_start_idx += total_keep_count;
				333	}
				334
				335	if(_batch_splits_out != nullptr)
				336	{
				337	for(int b = 0; b < batch_size; ++b)
				338	{
				339	reinterpret_cast<float >(_batch_splits_out->ptr_to_element(Coordinates(b))) = total_keep_per_batch[b];
				340	}
				341	}
				342	}
				343
				344	void CPPBoxWithNonMaximaSuppressionLimitKernel::configure(const ITensor scores_in, const ITensor boxes_in, const ITensor batch_splits_in, ITensor scores_out, ITensor boxes_out, ITensor classes,
				345	ITensor batch_splits_out, ITensor keeps, ITensor *keeps_size, const BoxNMSLimitInfo info)
				346	{
				347	ARM_COMPUTE_ERROR_ON_NULLPTR(scores_in, boxes_in, scores_out, boxes_out, classes);
				348	ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(scores_in, 1, DataType::F16, DataType::F32);
				349	const unsigned int num_classes = scores_in->info()->dimension(0);
				350
				351	ARM_COMPUTE_UNUSED(num_classes);
				352	ARM_COMPUTE_ERROR_ON_MSG((4 * num_classes) != boxes_in->info()->dimension(0), "First dimension of input boxes must be of size 4*num_classes");
				353	ARM_COMPUTE_ERROR_ON_MSG(scores_in->info()->dimension(1) != boxes_in->info()->dimension(1), "Input scores and input boxes must have the same number of rows");
				354	ARM_COMPUTE_ERROR_ON(scores_out->info()->dimension(0) != boxes_out->info()->dimension(1));
				355	ARM_COMPUTE_ERROR_ON(boxes_out->info()->dimension(0) != 4);
				356	if(keeps != nullptr)
				357	{
				358	ARM_COMPUTE_ERROR_ON_MSG(keeps_size == nullptr, "keeps_size cannot be nullptr if keeps has to be provided as output");
				359	ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(scores_in, keeps);
				360	ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(scores_in, keeps_size);
				361	ARM_COMPUTE_ERROR_ON(scores_out->info()->dimension(0) != keeps->info()->dimension(0));
				362	ARM_COMPUTE_ERROR_ON(num_classes != keeps_size->info()->dimension(0));
				363	}
				364	if(batch_splits_in != nullptr)
				365	{
				366	ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(scores_in, batch_splits_in);
				367	}
				368	if(batch_splits_out != nullptr)
				369	{
				370	ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(scores_in, batch_splits_out);
				371	}
				372
				373	_scores_in = scores_in;
				374	_boxes_in = boxes_in;
				375	_batch_splits_in = batch_splits_in;
				376	_scores_out = scores_out;
				377	_boxes_out = boxes_out;
				378	_classes = classes;
				379	_batch_splits_out = batch_splits_out;
				380	_keeps = keeps;
				381	_keeps_size = keeps_size;
				382	_info = info;
				383
				384	// Configure kernel window
				385	Window win = calculate_max_window(*scores_in->info(), Steps(scores_in->info()->dimension(0)));
				386
				387	IKernel::configure(win);
				388	}
				389
				390	void CPPBoxWithNonMaximaSuppressionLimitKernel::run(const Window &window, const ThreadInfo &info)
				391	{
				392	ARM_COMPUTE_UNUSED(info);
				393	ARM_COMPUTE_UNUSED(window);
				394	ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
				395	ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IKernel::window(), window);
				396
				397	switch(_scores_in->info()->data_type())
				398	{
				399	case DataType::F32:
				400	run_nmslimit<float>();
				401	break;
				402	case DataType::F16:
				403	run_nmslimit<half>();
				404	break;
				405	default:
				406	ARM_COMPUTE_ERROR("Not supported");
				407	}
				408	}
				409	} // namespace arm_compute