Blame - src/runtime/NEON/functions/NEPadLayer.cpp - ml/ComputeLibrary

blob: 62a7d4559bb5158492aa88a8eea6d9aea69eadfe [file] [log] [blame]

Georgios Pinitas	dea2d2d	2018-12-19 16:23:17 +0000	[diff] [blame]	1	/*
				2	* Copyright (c) 2018-2019 ARM Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "arm_compute/runtime/NEON/functions/NEPadLayer.h"
				25
				26	#include "arm_compute/runtime/NEON/NEScheduler.h"
				27
Georgios Pinitas	dea2d2d	2018-12-19 16:23:17 +0000	[diff] [blame]	28	#include "arm_compute/core/Types.h"
				29	#include "arm_compute/core/utils/misc/ShapeCalculator.h"
				30
				31	#include "support/ToolchainSupport.h"
				32
				33	namespace arm_compute
				34	{
				35	namespace
				36	{
				37	TensorInfo get_expected_output_tensorinfo(const ITensorInfo &input, const PaddingList &paddings)
				38	{
				39	const TensorShape expected_output_shape = arm_compute::misc::shape_calculator::compute_padded_shape(input.tensor_shape(), paddings);
				40	const TensorInfo expected_output_info = input.clone()->set_tensor_shape(expected_output_shape);
				41	return expected_output_info;
				42	}
				43
				44	Status validate_arguments(const ITensorInfo &input, ITensorInfo &output, const PaddingList &paddings)
				45	{
				46	const TensorInfo expected_output_info = get_expected_output_tensorinfo(input, paddings);
				47	auto_init_if_empty(output, expected_output_info);
				48	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&output, &expected_output_info);
				49
				50	return Status{};
				51	}
				52
				53	Coordinates get_subtensor_coords(const PaddingList &paddings)
				54	{
				55	Coordinates coords;
				56	for(unsigned int i = 0; i < paddings.size(); ++i)
				57	{
				58	coords.set(i, paddings[i].first);
				59	}
				60
				61	return coords;
				62	}
Usama Arif	8cf8c11	2019-03-14 15:36:54 +0000	[diff] [blame^]	63
				64	uint32_t last_padding_dimension(const PaddingList &padding)
				65	{
				66	int last_padding_dim = padding.size() - 1;
				67	for(; last_padding_dim >= 0; --last_padding_dim)
				68	{
				69	if(padding[last_padding_dim].first > 0 \|\| padding[last_padding_dim].second > 0)
				70	{
				71	break;
				72	}
				73	}
				74	return static_cast<uint32_t>(last_padding_dim);
				75	}
Georgios Pinitas	dea2d2d	2018-12-19 16:23:17 +0000	[diff] [blame]	76	} // namespace
				77
				78	NEPadLayer::NEPadLayer()
Usama Arif	8cf8c11	2019-03-14 15:36:54 +0000	[diff] [blame^]	79	: _copy_kernel(), _mode(), _padding(), _memset_kernel(), _num_dimensions(0), _slice_functions(nullptr), _concat_functions(nullptr), _slice_results(nullptr), _concat_results(nullptr),
				80	_output_subtensor()
Georgios Pinitas	dea2d2d	2018-12-19 16:23:17 +0000	[diff] [blame]	81	{
				82	}
				83
Usama Arif	8cf8c11	2019-03-14 15:36:54 +0000	[diff] [blame^]	84	void NEPadLayer::configure_constant_mode(ITensor input, ITensor output, const PaddingList &padding, const PixelValue constant_value)
Georgios Pinitas	dea2d2d	2018-12-19 16:23:17 +0000	[diff] [blame]	85	{
Georgios Pinitas	dea2d2d	2018-12-19 16:23:17 +0000	[diff] [blame]	86	// Auto-init
				87	auto_init_if_empty(output->info(), get_expected_output_tensorinfo(input->info(), padding));
				88
				89	// Create SubTensor (Can use sub-tensor as the kernels to be executed do not require padding)
				90	_output_subtensor = SubTensor(output, input->info()->tensor_shape(), get_subtensor_coords(padding), true);
				91
				92	// Set the pages of the output to the specified value
				93	_memset_kernel.configure(output, constant_value);
				94
				95	// Copy the input to the output
				96	_copy_kernel.configure(input, &_output_subtensor);
				97	}
				98
Usama Arif	8cf8c11	2019-03-14 15:36:54 +0000	[diff] [blame^]	99	void NEPadLayer::configure_reflect_symmetric_mode(ITensor input, ITensor output)
				100	{
				101	// Reflecting can be performed by effectively unfolding the input as follows:
				102	// For each dimension starting at DimX:
				103	// For before and after:
				104	// Use strided slice to extract and reverse the part of the
				105	// input / previously produced tensor required for the padding.
				106	// Concatenate the before and after padding with the input / previously
				107	// produced tensor along the current dimension.
				108
				109	// Two strided slice functions will be required for each dimension padded as well as a
				110	// concatenate function and the tensors to hold the temporary results.
				111	_slice_functions = arm_compute::support::cpp14::make_unique<NEStridedSlice[]>(2 * _num_dimensions);
				112	_slice_results = arm_compute::support::cpp14::make_unique<Tensor[]>(2 * _num_dimensions);
				113	_concat_functions = arm_compute::support::cpp14::make_unique<NEConcatenateLayer[]>(_num_dimensions);
				114	_concat_results = arm_compute::support::cpp14::make_unique<Tensor[]>(_num_dimensions - 1);
				115	Coordinates starts_before, ends_before, starts_after, ends_after, strides;
				116	ITensor *prev = input;
				117	for(uint32_t i = 0; i < _num_dimensions; ++i)
				118	{
				119	// Values in strides from the previous dimensions need to be set to 1 to avoid reversing again.
				120	if(i > 0)
				121	{
				122	strides.set(i - 1, 1);
				123	}
				124
				125	if(_padding[i].first > 0 \|\| _padding[i].second > 0)
				126	{
				127	// Set the starts, ends, and strides values for the current dimension.
				128	// Due to the bit masks passed to strided slice, the values below the current dimension in
				129	// starts and ends will be ignored so do not need to be modified.
				130	if(_mode == PaddingMode::REFLECT)
				131	{
				132	starts_before.set(i, _padding[i].first);
				133	ends_before.set(i, 0);
				134	starts_after.set(i, input->info()->dimension(i) - 2);
				135	ends_after.set(i, input->info()->dimension(i) - _padding[i].second - 2);
				136	strides.set(i, -1);
				137	}
				138	else
				139	{
				140	starts_before.set(i, _padding[i].first - 1);
				141	ends_before.set(i, -1);
				142	starts_after.set(i, input->info()->dimension(i) - 1);
				143	ends_after.set(i, input->info()->dimension(i) - _padding[i].second - 1);
				144	strides.set(i, -1);
				145	}
				146
				147	// Strided slice wraps negative indexes around to the end of the range,
				148	// instead this should indicate use of the full range and so the bit mask will be modified.
				149	const int32_t begin_mask_before = starts_before[i] < 0 ? ~0 : ~(1u << i);
				150	const int32_t end_mask_before = ends_before[i] < 0 ? ~0 : ~(1u << i);
				151	const int32_t begin_mask_after = starts_after[i] < 0 ? ~0 : ~(1u << i);
				152	const int32_t end_mask_after = ends_after[i] < 0 ? ~0 : ~(1u << i);
				153
				154	// Reflect the input values for the padding before and after the input.
				155	std::vector<ITensor *> concat_vector;
				156	if(_padding[i].first > 0)
				157	{
				158	if(i < prev->info()->num_dimensions())
				159	{
				160	_slice_functions[2 * i].configure(prev, &_slice_results[2 * i], starts_before, ends_before, strides, begin_mask_before, end_mask_before);
				161	concat_vector.push_back(&_slice_results[2 * i]);
				162	}
				163	else
				164	{
				165	// Performing the slice is unnecessary if the result would simply be a copy of the tensor.
				166	concat_vector.push_back(prev);
				167	}
				168	}
				169	concat_vector.push_back(prev);
				170	if(_padding[i].second > 0)
				171	{
				172	if(i < prev->info()->num_dimensions())
				173	{
				174	_slice_functions[2 * i + 1].configure(prev, &_slice_results[2 * i + 1], starts_after, ends_after, strides, begin_mask_after, end_mask_after);
				175	concat_vector.push_back(&_slice_results[2 * i + 1]);
				176	}
				177	else
				178	{
				179	// Performing the slice is unnecessary if the result would simply be a copy of the tensor.
				180	concat_vector.push_back(prev);
				181	}
				182	}
				183	// Concatenate the padding before and after with the input.
				184	ITensor *out = (i == _num_dimensions - 1) ? output : &_concat_results[i];
				185	_concat_functions[i].configure(concat_vector, out, get_index_data_layout_dimension(input->info()->data_layout(), i));
				186	if(i != _num_dimensions - 1)
				187	{
				188	_concat_results[i].allocator()->allocate();
				189	}
				190	prev = out;
				191	}
				192	_slice_results[2 * i].allocator()->allocate();
				193	_slice_results[2 * i + 1].allocator()->allocate();
				194	}
				195	}
				196
				197	void NEPadLayer::configure(ITensor input, ITensor output, const PaddingList &padding, const PixelValue constant_value, const PaddingMode mode)
				198	{
				199	ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), padding, constant_value, mode));
				200
				201	_padding = padding;
				202	_mode = mode;
				203
				204	const TensorShape padded_shape = misc::shape_calculator::compute_padded_shape(input->info()->tensor_shape(), _padding);
				205
				206	auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(padded_shape));
				207
				208	// Find the last dimension requiring padding so that it is known when to write to output and whether any padding is applied.
				209	_num_dimensions = last_padding_dimension(padding) + 1;
				210	if(_num_dimensions > 0)
				211	{
				212	switch(_mode)
				213	{
				214	case PaddingMode::CONSTANT:
				215	{
				216	configure_constant_mode(input, output, padding, constant_value);
				217	break;
				218	}
				219	case PaddingMode::REFLECT:
				220	case PaddingMode::SYMMETRIC:
				221	{
				222	configure_reflect_symmetric_mode(input, output);
				223	break;
				224	}
				225	default:
				226	ARM_COMPUTE_ERROR("Padding mode not supported.");
				227	}
				228	}
				229	else
				230	{
				231	// Copy the input to the whole output if no padding is applied
				232	_copy_kernel.configure(input, output);
				233	}
				234	}
				235
				236	Status NEPadLayer::validate(const ITensorInfo input, const ITensorInfo output, const PaddingList &padding, const PixelValue constant_value, const PaddingMode mode)
Georgios Pinitas	dea2d2d	2018-12-19 16:23:17 +0000	[diff] [blame]	237	{
				238	ARM_COMPUTE_UNUSED(constant_value);
Georgios Pinitas	dea2d2d	2018-12-19 16:23:17 +0000	[diff] [blame]	239
Usama Arif	8cf8c11	2019-03-14 15:36:54 +0000	[diff] [blame^]	240	const TensorShape padded_shape = misc::shape_calculator::compute_padded_shape(input->tensor_shape(), padding);
Georgios Pinitas	dea2d2d	2018-12-19 16:23:17 +0000	[diff] [blame]	241
Usama Arif	8cf8c11	2019-03-14 15:36:54 +0000	[diff] [blame^]	242	if(output->total_size() > 0)
				243	{
				244	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), padded_shape);
				245	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
				246	}
Georgios Pinitas	dea2d2d	2018-12-19 16:23:17 +0000	[diff] [blame]	247
Usama Arif	8cf8c11	2019-03-14 15:36:54 +0000	[diff] [blame^]	248	switch(mode)
				249	{
				250	case PaddingMode::CONSTANT:
				251	{
				252	auto output_clone = output->clone();
				253	SubTensorInfo output_subtensor_info(output_clone.get(), input->tensor_shape(), get_subtensor_coords(padding), true);
				254	ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output_clone, padding));
				255	ARM_COMPUTE_RETURN_ON_ERROR(NECopyKernel::validate(input, &output_subtensor_info));
				256	break;
				257	}
				258	case PaddingMode::REFLECT:
				259	case PaddingMode::SYMMETRIC:
				260	{
				261	for(uint32_t i = 0; i < padding.size(); ++i)
				262	{
				263	if(mode == PaddingMode::REFLECT)
				264	{
				265	ARM_COMPUTE_RETURN_ERROR_ON(padding[i].first >= input->dimension(i));
				266	ARM_COMPUTE_RETURN_ERROR_ON(padding[i].second >= input->dimension(i));
				267	}
				268	else
				269	{
				270	ARM_COMPUTE_RETURN_ERROR_ON(padding[i].first > input->dimension(i));
				271	ARM_COMPUTE_RETURN_ERROR_ON(padding[i].second > input->dimension(i));
				272	}
				273	}
				274	break;
				275	}
				276	default:
				277	{
				278	ARM_COMPUTE_ERROR("Invalid mode");
				279	}
				280	}
Georgios Pinitas	dea2d2d	2018-12-19 16:23:17 +0000	[diff] [blame]	281	return Status{};
				282	}
				283
				284	void NEPadLayer::run()
				285	{
Usama Arif	8cf8c11	2019-03-14 15:36:54 +0000	[diff] [blame^]	286	if(_num_dimensions > 0)
				287	{
				288	switch(_mode)
				289	{
				290	case PaddingMode::CONSTANT:
				291	{
				292	NEScheduler::get().schedule(&_memset_kernel, Window::DimY);
				293	NEScheduler::get().schedule(&_copy_kernel, Window::DimY);
				294	break;
				295	}
				296	case PaddingMode::REFLECT:
				297	case PaddingMode::SYMMETRIC:
				298	{
				299	for(uint32_t i = 0; i < _num_dimensions; ++i)
				300	{
				301	if(_padding[i].first > 0 \|\| _padding[i].second > 0)
				302	{
				303	if(_padding[i].first > 0 && _slice_results[2 * i].info()->total_size() > 0)
				304	{
				305	_slice_functions[2 * i].run();
				306	}
				307	if(_padding[i].second > 0 && _slice_results[2 * i + 1].info()->total_size() > 0)
				308	{
				309	_slice_functions[2 * i + 1].run();
				310	}
				311	_concat_functions[i].run();
				312	}
				313	}
				314	break;
				315	}
				316	default:
				317	ARM_COMPUTE_ERROR("Padding mode not supported.");
				318	}
				319	}
				320	else
				321	{
				322	NEScheduler::get().schedule(&_copy_kernel, Window::DimY);
				323	}
Georgios Pinitas	dea2d2d	2018-12-19 16:23:17 +0000	[diff] [blame]	324	}
				325	} // namespace arm_compute