Blame - src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp - ml/ComputeLibrary

blob: 6e5da43a9441e71b526ac4a3681a44a886cdd503 [file] [log] [blame]

Pablo Tello	c9564cb	2019-09-13 10:20:25 +0100	[diff] [blame^]	1	/*
				2	* Copyright (c) 2019 ARM Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h"
				25
				26	#include "arm_compute/core/Types.h"
				27	#include "arm_compute/runtime/NEON/NEScheduler.h"
				28	#include "support/ToolchainSupport.h"
				29
				30	namespace arm_compute
				31	{
				32	NEGenerateProposalsLayer::NEGenerateProposalsLayer(std::shared_ptr<IMemoryManager> memory_manager)
				33	: _memory_group(std::move(memory_manager)),
				34	_permute_deltas_kernel(),
				35	_flatten_deltas_kernel(),
				36	_permute_scores_kernel(),
				37	_flatten_scores_kernel(),
				38	_compute_anchors_kernel(),
				39	_bounding_box_kernel(),
				40	_memset_kernel(),
				41	_padded_copy_kernel(),
				42	_cpp_nms_kernel(),
				43	_is_nhwc(false),
				44	_deltas_permuted(),
				45	_deltas_flattened(),
				46	_scores_permuted(),
				47	_scores_flattened(),
				48	_all_anchors(),
				49	_all_proposals(),
				50	_keeps_nms_unused(),
				51	_classes_nms_unused(),
				52	_proposals_4_roi_values(),
				53	_num_valid_proposals(nullptr),
				54	_scores_out(nullptr)
				55	{
				56	}
				57
				58	void NEGenerateProposalsLayer::configure(const ITensor scores, const ITensor deltas, const ITensor anchors, ITensor proposals, ITensor scores_out, ITensor num_valid_proposals,
				59	const GenerateProposalsInfo &info)
				60	{
				61	ARM_COMPUTE_ERROR_ON_NULLPTR(scores, deltas, anchors, proposals, scores_out, num_valid_proposals);
				62	ARM_COMPUTE_ERROR_THROW_ON(NEGenerateProposalsLayer::validate(scores->info(), deltas->info(), anchors->info(), proposals->info(), scores_out->info(), num_valid_proposals->info(), info));
				63
				64	_is_nhwc = scores->info()->data_layout() == DataLayout::NHWC;
				65	const DataType data_type = deltas->info()->data_type();
				66	const int num_anchors = scores->info()->dimension(get_data_layout_dimension_index(scores->info()->data_layout(), DataLayoutDimension::CHANNEL));
				67	const int feat_width = scores->info()->dimension(get_data_layout_dimension_index(scores->info()->data_layout(), DataLayoutDimension::WIDTH));
				68	const int feat_height = scores->info()->dimension(get_data_layout_dimension_index(scores->info()->data_layout(), DataLayoutDimension::HEIGHT));
				69	const int total_num_anchors = num_anchors * feat_width * feat_height;
				70	const int pre_nms_topN = info.pre_nms_topN();
				71	const int post_nms_topN = info.post_nms_topN();
				72	const size_t values_per_roi = info.values_per_roi();
				73
				74	// Compute all the anchors
				75	_memory_group.manage(&_all_anchors);
				76	_compute_anchors_kernel.configure(anchors, &_all_anchors, ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale()));
				77
				78	const TensorShape flatten_shape_deltas(values_per_roi, total_num_anchors);
				79	_deltas_flattened.allocator()->init(TensorInfo(flatten_shape_deltas, 1, data_type));
				80	_memory_group.manage(&_deltas_flattened);
				81
				82	// Permute and reshape deltas
				83	if(!_is_nhwc)
				84	{
				85	_memory_group.manage(&_deltas_permuted);
				86	_permute_deltas_kernel.configure(deltas, &_deltas_permuted, PermutationVector{ 2, 0, 1 });
				87	_flatten_deltas_kernel.configure(&_deltas_permuted, &_deltas_flattened);
				88	_deltas_permuted.allocator()->allocate();
				89	}
				90	else
				91	{
				92	_flatten_deltas_kernel.configure(deltas, &_deltas_flattened);
				93	}
				94
				95	const TensorShape flatten_shape_scores(1, total_num_anchors);
				96	_scores_flattened.allocator()->init(TensorInfo(flatten_shape_scores, 1, data_type));
				97	_memory_group.manage(&_scores_flattened);
				98	// Permute and reshape scores
				99	if(!_is_nhwc)
				100	{
				101	_memory_group.manage(&_scores_permuted);
				102	_permute_scores_kernel.configure(scores, &_scores_permuted, PermutationVector{ 2, 0, 1 });
				103	_flatten_scores_kernel.configure(&_scores_permuted, &_scores_flattened);
				104	_scores_permuted.allocator()->allocate();
				105	}
				106	else
				107	{
				108	_flatten_scores_kernel.configure(scores, &_scores_flattened);
				109	}
				110
				111	// Bounding box transform
				112	_memory_group.manage(&_all_proposals);
				113	BoundingBoxTransformInfo bbox_info(info.im_width(), info.im_height(), 1.f);
				114	_bounding_box_kernel.configure(&_all_anchors, &_all_proposals, &_deltas_flattened, bbox_info);
				115	_deltas_flattened.allocator()->allocate();
				116	_all_anchors.allocator()->allocate();
				117
				118	// The original layer implementation first selects the best pre_nms_topN anchors (thus having a lightweight sort)
				119	// that are then transformed by bbox_transform. The boxes generated are then fed into a non-sorting NMS operation.
				120	// Since we are reusing the NMS layer and we don't implement any CL/sort, we let NMS do the sorting (of all the input)
				121	// and the filtering
				122	const int scores_nms_size = std::min<int>(std::min<int>(post_nms_topN, pre_nms_topN), total_num_anchors);
				123	const float min_size_scaled = info.min_size() * info.im_scale();
				124	_memory_group.manage(&_classes_nms_unused);
				125	_memory_group.manage(&_keeps_nms_unused);
				126
				127	// Note that NMS needs outputs preinitialized.
				128	auto_init_if_empty(*scores_out->info(), TensorShape(scores_nms_size), 1, data_type);
				129	auto_init_if_empty(*_proposals_4_roi_values.info(), TensorShape(values_per_roi, scores_nms_size), 1, data_type);
				130	auto_init_if_empty(*num_valid_proposals->info(), TensorShape(scores_nms_size), 1, DataType::U32);
				131
				132	// Initialize temporaries (unused) outputs
				133	_classes_nms_unused.allocator()->init(TensorInfo(TensorShape(8, 1), 1, data_type));
				134	_keeps_nms_unused.allocator()->init(*scores_out->info());
				135
				136	// Save the output (to map and unmap them at run)
				137	_scores_out = scores_out;
				138	_num_valid_proposals = num_valid_proposals;
				139
				140	_memory_group.manage(&_proposals_4_roi_values);
				141
				142	const BoxNMSLimitInfo box_nms_info(0.0f, info.nms_thres(), scores_nms_size, false, NMSType::LINEAR, 0.5f, 0.001f, true, min_size_scaled, info.im_width(), info.im_height());
				143	_cpp_nms_kernel.configure(&_scores_flattened /scores_in/,
				144	&_all_proposals /boxes_in,/,
				145	nullptr /* batch_splits_in*/,
				146	scores_out /* scores_out*/,
				147	&_proposals_4_roi_values /boxes_out/,
				148	&_classes_nms_unused /classes/,
				149	nullptr /batch_splits_out/,
				150	&_keeps_nms_unused /keeps/,
				151	num_valid_proposals /* keeps_size*/,
				152	box_nms_info);
				153
				154	_keeps_nms_unused.allocator()->allocate();
				155	_classes_nms_unused.allocator()->allocate();
				156	_all_proposals.allocator()->allocate();
				157	_scores_flattened.allocator()->allocate();
				158
				159	// Add the first column that represents the batch id. This will be all zeros, as we don't support multiple images
				160	_padded_copy_kernel.configure(&_proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } });
				161	_proposals_4_roi_values.allocator()->allocate();
				162
				163	_memset_kernel.configure(proposals, PixelValue());
				164	}
				165
				166	Status NEGenerateProposalsLayer::validate(const ITensorInfo scores, const ITensorInfo deltas, const ITensorInfo anchors, const ITensorInfo proposals, const ITensorInfo *scores_out,
				167	const ITensorInfo *num_valid_proposals, const GenerateProposalsInfo &info)
				168	{
				169	ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(scores, deltas, anchors, proposals, scores_out, num_valid_proposals);
				170	ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(scores, DataLayout::NCHW, DataLayout::NHWC);
				171	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(scores, deltas);
				172
				173	const int num_anchors = scores->dimension(get_data_layout_dimension_index(scores->data_layout(), DataLayoutDimension::CHANNEL));
				174	const int feat_width = scores->dimension(get_data_layout_dimension_index(scores->data_layout(), DataLayoutDimension::WIDTH));
				175	const int feat_height = scores->dimension(get_data_layout_dimension_index(scores->data_layout(), DataLayoutDimension::HEIGHT));
				176	const int num_images = scores->dimension(3);
				177	const int total_num_anchors = num_anchors * feat_width * feat_height;
				178	const int values_per_roi = info.values_per_roi();
				179
				180	ARM_COMPUTE_RETURN_ERROR_ON(num_images > 1);
				181
				182	TensorInfo all_anchors_info(anchors->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true));
				183	ARM_COMPUTE_RETURN_ON_ERROR(NEComputeAllAnchorsKernel::validate(anchors, &all_anchors_info, ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale())));
				184
				185	TensorInfo deltas_permuted_info = deltas->clone()->set_tensor_shape(TensorShape(values_per_roi * num_anchors, feat_width, feat_height)).set_is_resizable(true);
				186	TensorInfo scores_permuted_info = scores->clone()->set_tensor_shape(TensorShape(num_anchors, feat_width, feat_height)).set_is_resizable(true);
				187	if(scores->data_layout() == DataLayout::NHWC)
				188	{
				189	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(deltas, &deltas_permuted_info);
				190	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(scores, &scores_permuted_info);
				191	}
				192	else
				193	{
				194	ARM_COMPUTE_RETURN_ON_ERROR(NEPermuteKernel::validate(deltas, &deltas_permuted_info, PermutationVector{ 2, 0, 1 }));
				195	ARM_COMPUTE_RETURN_ON_ERROR(NEPermuteKernel::validate(scores, &scores_permuted_info, PermutationVector{ 2, 0, 1 }));
				196	}
				197
				198	TensorInfo deltas_flattened_info(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true));
				199	ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayerKernel::validate(&deltas_permuted_info, &deltas_flattened_info));
				200
				201	TensorInfo scores_flattened_info(scores->clone()->set_tensor_shape(TensorShape(1, total_num_anchors)).set_is_resizable(true));
				202	TensorInfo proposals_4_roi_values(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true));
				203
				204	ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayerKernel::validate(&scores_permuted_info, &scores_flattened_info));
				205	ARM_COMPUTE_RETURN_ON_ERROR(NEBoundingBoxTransformKernel::validate(&all_anchors_info, &proposals_4_roi_values, &deltas_flattened_info, BoundingBoxTransformInfo(info.im_width(), info.im_height(),
				206	1.f)));
				207
				208	ARM_COMPUTE_RETURN_ON_ERROR(NECopyKernel::validate(&proposals_4_roi_values, proposals, PaddingList{ { 0, 1 } }));
				209
				210	if(num_valid_proposals->total_size() > 0)
				211	{
				212	ARM_COMPUTE_RETURN_ERROR_ON(num_valid_proposals->num_dimensions() > 1);
				213	ARM_COMPUTE_RETURN_ERROR_ON(num_valid_proposals->dimension(0) > 1);
				214	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(num_valid_proposals, 1, DataType::U32);
				215	}
				216
				217	if(proposals->total_size() > 0)
				218	{
				219	ARM_COMPUTE_RETURN_ERROR_ON(proposals->num_dimensions() > 2);
				220	ARM_COMPUTE_RETURN_ERROR_ON(proposals->dimension(0) != size_t(values_per_roi) + 1);
				221	ARM_COMPUTE_RETURN_ERROR_ON(proposals->dimension(1) != size_t(total_num_anchors));
				222	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(proposals, deltas);
				223	}
				224
				225	if(scores_out->total_size() > 0)
				226	{
				227	ARM_COMPUTE_RETURN_ERROR_ON(scores_out->num_dimensions() > 1);
				228	ARM_COMPUTE_RETURN_ERROR_ON(scores_out->dimension(0) != size_t(total_num_anchors));
				229	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(scores_out, scores);
				230	}
				231
				232	return Status{};
				233	}
				234
				235	void NEGenerateProposalsLayer::run()
				236	{
				237	// Acquire all the temporaries
				238	MemoryGroupResourceScope scope_mg(_memory_group);
				239
				240	// Compute all the anchors
				241	NEScheduler::get().schedule(&_compute_anchors_kernel, Window::DimY);
				242
				243	// Transpose and reshape the inputs
				244	if(!_is_nhwc)
				245	{
				246	NEScheduler::get().schedule(&_permute_deltas_kernel, Window::DimY);
				247	NEScheduler::get().schedule(&_permute_scores_kernel, Window::DimY);
				248	}
				249
				250	NEScheduler::get().schedule(&_flatten_deltas_kernel, Window::DimY);
				251	NEScheduler::get().schedule(&_flatten_scores_kernel, Window::DimY);
				252
				253	// Build the boxes
				254	NEScheduler::get().schedule(&_bounding_box_kernel, Window::DimY);
				255
				256	// Non maxima suppression
				257	CPPScheduler::get().schedule(&_cpp_nms_kernel, Window::DimX);
				258
				259	// Add dummy batch indexes
				260
				261	NEScheduler::get().schedule(&_memset_kernel, Window::DimY);
				262	NEScheduler::get().schedule(&_padded_copy_kernel, Window::DimY);
				263	}
				264	} // namespace arm_compute