Blame - src/runtime/CL/functions/CLFFTConvolutionLayer.cpp - ml/ComputeLibrary

blob: 97b64b24f39f18c83e44302d59f5dbb06453ebfb [file] [log] [blame]

Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	1	/*
Michele Di Giorgio	d9eaf61	2020-07-08 11:12:57 +0100	[diff] [blame]	2	* Copyright (c) 2019-2020 Arm Limited.
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h"
				25
				26	#include "arm_compute/core/CL/ICLTensor.h"
				27	#include "arm_compute/core/Utils.h"
				28	#include "arm_compute/core/Validate.h"
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	29	#include "arm_compute/core/utils/misc/ShapeCalculator.h"
				30	#include "arm_compute/runtime/CL/CLScheduler.h"
				31	#include "arm_compute/runtime/CPP/CPPScheduler.h"
Sang-Hoon Park	bef7fa2	2020-10-21 15:58:54 +0100	[diff] [blame]	32	#include "src/core/CL/kernels/CLCopyKernel.h"
				33	#include "src/core/CL/kernels/CLFFTDigitReverseKernel.h"
				34	#include "src/core/CL/kernels/CLFFTRadixStageKernel.h"
				35	#include "src/core/CL/kernels/CLFFTScaleKernel.h"
				36	#include "src/core/CL/kernels/CLFillBorderKernel.h"
				37	#include "src/core/CL/kernels/CLPadLayerKernel.h"
				38	#include "src/core/CL/kernels/CLReductionOperationKernel.h"
Sang-Hoon Park	68dd25f	2020-10-19 16:00:11 +0100	[diff] [blame]	39	#include "src/core/helpers/AutoConfiguration.h"
				40	#include "src/core/utils/helpers/fft.h"
				41
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	42	namespace arm_compute
				43	{
				44	namespace
				45	{
				46	int pad_decomposable(int N)
				47	{
				48	const auto supported_radix = CLFFTRadixStageKernel::supported_radix();
				49
				50	int pad = 0;
				51	bool is_decomposed = false;
				52	while(!is_decomposed)
				53	{
				54	const auto decomposed_vector = arm_compute::helpers::fft::decompose_stages(N++, supported_radix);
				55	is_decomposed = !decomposed_vector.empty();
				56	if(!is_decomposed)
				57	{
				58	++pad;
				59	}
				60	}
				61	return pad;
				62	}
				63	} // namespace
				64	CLFFTConvolutionLayer::CLFFTConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
				65	: _memory_group(memory_manager),
				66	_flip_weights_func(),
				67	_permute_input_func(),
				68	_permute_output_func(),
				69	_permute_weights_func(),
				70	_permute_bias_func(),
				71	_pad_input_func(),
				72	_pad_weights_func(),
				73	_transform_input_func(memory_manager),
Georgios Pinitas	098516b	2019-04-25 18:25:06 +0100	[diff] [blame]	74	_transform_weights_func(),
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	75	_itransform_output_func(memory_manager),
				76	_prod_func(),
				77	_reduce_func(),
				78	_extract_output_func(),
				79	_bias_add_func(),
				80	_activation_layer_func(),
				81	_permuted_input(),
				82	_permuted_weights(),
				83	_permuted_bias(),
				84	_permuted_output(),
				85	_padded_input(),
				86	_padded_weights(),
				87	_flip_axis(),
				88	_flipped_weights(),
				89	_transformed_input(),
				90	_transformed_weights(),
				91	_input_weights_product(),
				92	_output_product(),
				93	_output_reduced(),
				94	_itransformed_output(),
				95	_reshaped_output(),
				96	_bias_output(),
				97	_original_weights(nullptr),
				98	_original_bias(nullptr),
				99	_is_activationlayer_enabled(false),
				100	_needs_permute(false),
				101	_has_bias(false),
				102	_is_prepared(false)
				103	{
				104	}
				105
				106	void CLFFTConvolutionLayer::configure(ICLTensor input, const ICLTensor weights, const ICLTensor biases, ICLTensor output, const PadStrideInfo &conv_info,
				107	const ActivationLayerInfo &act_info)
				108	{
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	109	configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, act_info);
				110	}
				111
				112	void CLFFTConvolutionLayer::configure(const CLCompileContext &compile_context, ICLTensor input, const ICLTensor weights, const ICLTensor biases, ICLTensor output, const PadStrideInfo &conv_info,
				113	const ActivationLayerInfo &act_info)
				114	{
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	115	_original_weights = weights;
				116	_original_bias = biases;
				117
				118	// Flat if bias addition is required
				119	_has_bias = biases != nullptr;
				120
				121	// Get indices for the width and height
				122	const size_t idx_width = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::WIDTH);
				123	const size_t idx_height = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT);
				124
				125	// Input shape, kernel size and output tile
				126	const Size2D input_dims = Size2D(input->info()->tensor_shape()[idx_width], input->info()->tensor_shape()[idx_height]);
				127	const Size2D kernel_size = Size2D(weights->info()->tensor_shape()[idx_width], weights->info()->tensor_shape()[idx_height]);
				128	const Size2D pad_valid = Size2D(pad_decomposable(input_dims.x() + kernel_size.x() - 1),
				129	pad_decomposable(input_dims.y() + kernel_size.y() - 1));
				130	// Tensors to use
				131	ICLTensor *input_to_use = input;
				132	const ICLTensor *weights_to_use = weights;
				133	ICLTensor *output_to_use = _has_bias ? &_bias_output : output;
				134
				135	// Permute bias
Georgios Pinitas	68c6a79	2019-05-15 13:24:00 +0100	[diff] [blame]	136	if(biases != nullptr)
				137	{
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	138	_permute_bias_func.configure(compile_context, biases, &_permuted_bias, PermutationVector(1U, 2U, 0U));
Georgios Pinitas	68c6a79	2019-05-15 13:24:00 +0100	[diff] [blame]	139	_permuted_bias.info()->set_data_layout(DataLayout::NCHW);
				140	}
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	141
				142	// Permute input if needed
				143	_needs_permute = input->info()->data_layout() == DataLayout::NHWC;
				144	if(_needs_permute)
				145	{
				146	_memory_group.manage(&_permuted_input);
				147	// Configure the function to transform the input tensor from NHWC -> NCHW
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	148	_permute_input_func.configure(compile_context, input, &_permuted_input, PermutationVector(1U, 2U, 0U));
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	149	_permuted_input.info()->set_data_layout(DataLayout::NCHW);
				150
				151	// Configure the function to transform the weights tensor from HWI -> IHW
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	152	_permute_weights_func.configure(compile_context, weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	153	_permuted_weights.info()->set_data_layout(DataLayout::NCHW);
				154
				155	input_to_use = &_permuted_input;
				156	weights_to_use = &_permuted_weights;
				157	}
				158
				159	// Flip weights
				160	_flipped_weights.allocator()->init(weights_to_use->info()->clone()->set_is_resizable(true).reset_padding());
				161	_flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32));
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	162	_flip_weights_func.configure(compile_context, weights_to_use, &_flipped_weights, &_flip_axis);
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	163
				164	// Pad weights
				165	const PaddingList padding_w = { { 0, input_dims.x() + pad_valid.x() - 1 }, { 0, input_dims.y() + pad_valid.y() - 1 } };
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	166	_pad_weights_func.configure(compile_context, &_flipped_weights, &_padded_weights, padding_w);
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	167
				168	// Transform weights
Georgios Pinitas	40f51a6	2020-11-21 03:04:18 +0000	[diff] [blame^]	169	_transform_weights_func = std::make_unique<CLFFT2D>();
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	170	_transform_weights_func->configure(compile_context, &_padded_weights, &_transformed_weights, FFT2DInfo());
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	171
				172	// Pad input
				173	const PaddingList padding_in = { { 0, kernel_size.x() + pad_valid.x() - 1 }, { 0, kernel_size.y() + pad_valid.y() - 1 } };
				174	_memory_group.manage(&_padded_input);
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	175	_pad_input_func.configure(compile_context, input_to_use, &_padded_input, padding_in);
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	176	if(_needs_permute)
				177	{
				178	_permuted_input.allocator()->allocate();
				179	}
				180
				181	// Transform input
				182	_memory_group.manage(&_transformed_input);
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	183	_transform_input_func.configure(compile_context, &_padded_input, &_transformed_input, FFT2DInfo());
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	184	_padded_input.allocator()->allocate();
				185
				186	// Perform product
				187	_memory_group.manage(&_output_product);
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	188	_prod_func.configure(compile_context, &_transformed_input, &_transformed_weights, &_output_product);
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	189	_transformed_input.allocator()->allocate();
				190
				191	// Perform reduction
				192	_memory_group.manage(&_output_reduced);
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	193	_reduce_func.configure(compile_context, &_output_product, &_output_reduced, 2, ReductionOperation::SUM);
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	194	_output_product.allocator()->allocate();
				195
				196	// Transform output
				197	_memory_group.manage(&_itransformed_output);
				198	FFT2DInfo itranform_info;
				199	itranform_info.direction = FFTDirection::Inverse;
				200	_itransformed_output.allocator()->init(_output_reduced.info()->clone()->set_is_resizable(true).set_num_channels(1).reset_padding());
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	201	_itransform_output_func.configure(compile_context, &_output_reduced, &_itransformed_output, itranform_info);
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	202	_output_reduced.allocator()->allocate();
				203
				204	// Reshape output
				205	TensorShape reshaped_shape = _itransformed_output.info()->tensor_shape();
				206	reshaped_shape.remove_dimension(2);
				207	_reshaped_output.allocator()->init(_itransformed_output.info()->clone()->set_tensor_shape(reshaped_shape));
				208
				209	// Extract correct region
				210	const int start_left = kernel_size.x() - conv_info.pad_left() - 1;
				211	const int start_top = kernel_size.y() - conv_info.pad_top() - 1;
				212	const int end_right = _reshaped_output.info()->tensor_shape().x() - (kernel_size.x() - conv_info.pad_right() - 1) - pad_valid.x();
				213	const int end_botton = _reshaped_output.info()->tensor_shape().y() - (kernel_size.y() - conv_info.pad_bottom() - 1) - pad_valid.y();
				214	if(_has_bias)
				215	{
				216	_memory_group.manage(&_bias_output);
				217	}
				218	else if(_needs_permute)
				219	{
				220	output_to_use = &_permuted_output;
				221	_memory_group.manage(&_permuted_output);
				222	}
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	223	_extract_output_func.configure(compile_context, &_reshaped_output, output_to_use, Coordinates(start_left, start_top), Coordinates(end_right, end_botton));
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	224	_itransformed_output.allocator()->allocate();
				225
				226	// Add bias
				227	if(biases != nullptr)
				228	{
				229	output_to_use = output;
				230	if(_needs_permute)
				231	{
				232	output_to_use = &_permuted_output;
				233	_memory_group.manage(&_permuted_output);
				234	}
				235	auto_init_if_empty(output_to_use->info(), _bias_output.info());
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	236	_bias_add_func.configure(compile_context, &_bias_output, &_permuted_bias, output_to_use, ConvertPolicy::WRAP);
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	237	_bias_output.allocator()->allocate();
				238	}
				239
				240	// Permute output
				241	if(_needs_permute)
				242	{
				243	// Configure the function to transform the convoluted output to ACL's native ordering format NCHW
				244	_permuted_output.info()->set_data_layout(DataLayout::NCHW);
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	245	_permute_output_func.configure(compile_context, &_permuted_output, output, PermutationVector(2U, 0U, 1U));
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	246
				247	// Allocate tensors
				248	_permuted_output.allocator()->allocate();
				249	}
				250
				251	// Configure Activation Layer
				252	_is_activationlayer_enabled = act_info.enabled();
				253	if(_is_activationlayer_enabled)
				254	{
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	255	_activation_layer_func.configure(compile_context, output, nullptr, act_info);
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	256	}
				257
				258	// Setup flip axis data
				259	_flip_axis.allocator()->allocate();
				260	_flip_axis.map(true);
				261	auto axis_data = reinterpret_cast<uint32_t *>(_flip_axis.buffer());
				262	axis_data[0] = 0;
				263	axis_data[1] = 1;
				264	_flip_axis.unmap();
				265	}
				266
				267	Status CLFFTConvolutionLayer::validate(const ITensorInfo input, const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo output, const PadStrideInfo &conv_info,
				268	const ActivationLayerInfo &act_info)
				269	{
				270	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
				271	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
				272
				273	// Get indices for the width and height
				274	const size_t idx_width = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
				275	const size_t idx_height = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
				276
				277	// Input shape, kernel size and output tile
				278	const Size2D kernel_size = Size2D(weights->tensor_shape()[idx_width], weights->tensor_shape()[idx_height]);
				279
				280	// Strides
				281	const auto strides = conv_info.stride();
				282	ARM_COMPUTE_RETURN_ERROR_ON(strides.first != strides.second && strides.first != 1);
				283	ARM_COMPUTE_RETURN_ERROR_ON(kernel_size.x() != kernel_size.y());
				284	ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_left() != (kernel_size.x() / 2) \|\| conv_info.pad_right() != (kernel_size.x() / 2));
				285	ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_top() != (kernel_size.y() / 2) \|\| conv_info.pad_bottom() != (kernel_size.y() / 2));
				286
				287	// Validate biases
				288	if(biases != nullptr)
				289	{
				290	const size_t idx_channels = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
				291	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
				292	ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_channels] != biases->tensor_shape().x());
				293	}
				294
				295	// Checks performed when output is configured
				296	if((output != nullptr) && (output->total_size() != 0))
				297	{
				298	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
Vidhya Sudhan Loganathan	8ec0bb6	2019-04-23 10:40:44 +0100	[diff] [blame]	299	ARM_COMPUTE_RETURN_ERROR_ON((input->tensor_shape()[idx_height] != output->tensor_shape()[idx_height]) \|\| (input->tensor_shape()[idx_width] != output->tensor_shape()[idx_width]));
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	300
				301	// Validate Activation Layer
				302	if(act_info.enabled())
				303	{
				304	ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(output, nullptr, act_info));
				305	}
				306	}
				307
				308	return Status{};
				309	}
				310
				311	void CLFFTConvolutionLayer::run()
				312	{
				313	prepare();
				314
Georgios Pinitas	098516b	2019-04-25 18:25:06 +0100	[diff] [blame]	315	MemoryGroupResourceScope scope_mg(_memory_group);
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	316
				317	// Transform input
				318	if(_needs_permute)
				319	{
				320	_permute_input_func.run();
				321	}
				322	_pad_input_func.run();
				323	_transform_input_func.run();
				324
				325	// Perform operations to frequency domain
				326	_prod_func.run();
				327	_reduce_func.run();
				328
				329	// Transform output
				330	_itransform_output_func.run();
				331	_reshaped_output.allocator()->import_memory(_itransformed_output.cl_buffer());
				332	_extract_output_func.run();
				333	// Add bias
				334	if(_has_bias)
				335	{
				336	_bias_add_func.run();
				337	}
				338	if(_needs_permute)
				339	{
				340	_permute_output_func.run();
				341	}
				342
				343	// Run activation layer
				344	if(_is_activationlayer_enabled)
				345	{
				346	_activation_layer_func.run();
				347	}
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	348	}
				349
				350	void CLFFTConvolutionLayer::prepare()
				351	{
				352	if(!_is_prepared)
				353	{
				354	// Permute bias to NCHW
				355	if(_original_bias != nullptr)
				356	{
				357	_permuted_bias.allocator()->allocate();
				358	_permute_bias_func.run();
				359	_original_bias->mark_as_unused();
				360	}
				361
				362	const ICLTensor *cur_weights = _original_weights;
				363	// Permute weights
				364	if(_needs_permute)
				365	{
				366	ARM_COMPUTE_ERROR_ON(!cur_weights->is_used());
				367
				368	_permuted_weights.allocator()->allocate();
				369	_permute_weights_func.run();
				370	cur_weights->mark_as_unused();
				371	cur_weights = &_permuted_weights;
				372	}
				373
				374	// Flip weights
				375	_flipped_weights.allocator()->allocate();
				376	_flip_weights_func.run();
				377	cur_weights->mark_as_unused();
				378
				379	// Pad weights
				380	_padded_weights.allocator()->allocate();
				381	_pad_weights_func.run();
				382	_flipped_weights.mark_as_unused();
				383	CLScheduler::get().queue().finish();
				384	_flipped_weights.allocator()->free();
				385
Georgios Pinitas	098516b	2019-04-25 18:25:06 +0100	[diff] [blame]	386	// Transform weights to frequency domain
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	387	_transformed_weights.allocator()->allocate();
Georgios Pinitas	098516b	2019-04-25 18:25:06 +0100	[diff] [blame]	388	_transform_weights_func->run();
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	389	_padded_weights.mark_as_unused();
				390	CLScheduler::get().queue().finish();
Georgios Pinitas	098516b	2019-04-25 18:25:06 +0100	[diff] [blame]	391	// Delete object and release internal memory
				392	_transform_weights_func.reset();
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	393	_padded_weights.allocator()->free();
				394
				395	_is_prepared = true;
				396	}
				397	}
				398	} // namespace arm_compute