Blame - src/runtime/CL/functions/CLFFTConvolutionLayer.cpp - ml/ComputeLibrary

blob: 1def674bb6d6554dc77460acc4b43a10d618b3c8 [file] [log] [blame]

Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	1	/*
Michele Di Giorgio	d9eaf61	2020-07-08 11:12:57 +0100	[diff] [blame]	2	* Copyright (c) 2019-2020 Arm Limited.
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h"
				25
				26	#include "arm_compute/core/CL/ICLTensor.h"
				27	#include "arm_compute/core/Utils.h"
				28	#include "arm_compute/core/Validate.h"
				29	#include "arm_compute/core/utils/helpers/fft.h"
				30	#include "arm_compute/core/utils/misc/ShapeCalculator.h"
				31	#include "arm_compute/runtime/CL/CLScheduler.h"
				32	#include "arm_compute/runtime/CPP/CPPScheduler.h"
				33
				34	namespace arm_compute
				35	{
				36	namespace
				37	{
				38	int pad_decomposable(int N)
				39	{
				40	const auto supported_radix = CLFFTRadixStageKernel::supported_radix();
				41
				42	int pad = 0;
				43	bool is_decomposed = false;
				44	while(!is_decomposed)
				45	{
				46	const auto decomposed_vector = arm_compute::helpers::fft::decompose_stages(N++, supported_radix);
				47	is_decomposed = !decomposed_vector.empty();
				48	if(!is_decomposed)
				49	{
				50	++pad;
				51	}
				52	}
				53	return pad;
				54	}
				55	} // namespace
				56	CLFFTConvolutionLayer::CLFFTConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
				57	: _memory_group(memory_manager),
				58	_flip_weights_func(),
				59	_permute_input_func(),
				60	_permute_output_func(),
				61	_permute_weights_func(),
				62	_permute_bias_func(),
				63	_pad_input_func(),
				64	_pad_weights_func(),
				65	_transform_input_func(memory_manager),
Georgios Pinitas	098516b	2019-04-25 18:25:06 +0100	[diff] [blame]	66	_transform_weights_func(),
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	67	_itransform_output_func(memory_manager),
				68	_prod_func(),
				69	_reduce_func(),
				70	_extract_output_func(),
				71	_bias_add_func(),
				72	_activation_layer_func(),
				73	_permuted_input(),
				74	_permuted_weights(),
				75	_permuted_bias(),
				76	_permuted_output(),
				77	_padded_input(),
				78	_padded_weights(),
				79	_flip_axis(),
				80	_flipped_weights(),
				81	_transformed_input(),
				82	_transformed_weights(),
				83	_input_weights_product(),
				84	_output_product(),
				85	_output_reduced(),
				86	_itransformed_output(),
				87	_reshaped_output(),
				88	_bias_output(),
				89	_original_weights(nullptr),
				90	_original_bias(nullptr),
				91	_is_activationlayer_enabled(false),
				92	_needs_permute(false),
				93	_has_bias(false),
				94	_is_prepared(false)
				95	{
				96	}
				97
				98	void CLFFTConvolutionLayer::configure(ICLTensor input, const ICLTensor weights, const ICLTensor biases, ICLTensor output, const PadStrideInfo &conv_info,
				99	const ActivationLayerInfo &act_info)
				100	{
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	101	configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, act_info);
				102	}
				103
				104	void CLFFTConvolutionLayer::configure(const CLCompileContext &compile_context, ICLTensor input, const ICLTensor weights, const ICLTensor biases, ICLTensor output, const PadStrideInfo &conv_info,
				105	const ActivationLayerInfo &act_info)
				106	{
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	107	_original_weights = weights;
				108	_original_bias = biases;
				109
				110	// Flat if bias addition is required
				111	_has_bias = biases != nullptr;
				112
				113	// Get indices for the width and height
				114	const size_t idx_width = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::WIDTH);
				115	const size_t idx_height = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT);
				116
				117	// Input shape, kernel size and output tile
				118	const Size2D input_dims = Size2D(input->info()->tensor_shape()[idx_width], input->info()->tensor_shape()[idx_height]);
				119	const Size2D kernel_size = Size2D(weights->info()->tensor_shape()[idx_width], weights->info()->tensor_shape()[idx_height]);
				120	const Size2D pad_valid = Size2D(pad_decomposable(input_dims.x() + kernel_size.x() - 1),
				121	pad_decomposable(input_dims.y() + kernel_size.y() - 1));
				122	// Tensors to use
				123	ICLTensor *input_to_use = input;
				124	const ICLTensor *weights_to_use = weights;
				125	ICLTensor *output_to_use = _has_bias ? &_bias_output : output;
				126
				127	// Permute bias
Georgios Pinitas	68c6a79	2019-05-15 13:24:00 +0100	[diff] [blame]	128	if(biases != nullptr)
				129	{
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	130	_permute_bias_func.configure(compile_context, biases, &_permuted_bias, PermutationVector(1U, 2U, 0U));
Georgios Pinitas	68c6a79	2019-05-15 13:24:00 +0100	[diff] [blame]	131	_permuted_bias.info()->set_data_layout(DataLayout::NCHW);
				132	}
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	133
				134	// Permute input if needed
				135	_needs_permute = input->info()->data_layout() == DataLayout::NHWC;
				136	if(_needs_permute)
				137	{
				138	_memory_group.manage(&_permuted_input);
				139	// Configure the function to transform the input tensor from NHWC -> NCHW
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	140	_permute_input_func.configure(compile_context, input, &_permuted_input, PermutationVector(1U, 2U, 0U));
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	141	_permuted_input.info()->set_data_layout(DataLayout::NCHW);
				142
				143	// Configure the function to transform the weights tensor from HWI -> IHW
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	144	_permute_weights_func.configure(compile_context, weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	145	_permuted_weights.info()->set_data_layout(DataLayout::NCHW);
				146
				147	input_to_use = &_permuted_input;
				148	weights_to_use = &_permuted_weights;
				149	}
				150
				151	// Flip weights
				152	_flipped_weights.allocator()->init(weights_to_use->info()->clone()->set_is_resizable(true).reset_padding());
				153	_flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32));
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	154	_flip_weights_func.configure(compile_context, weights_to_use, &_flipped_weights, &_flip_axis);
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	155
				156	// Pad weights
				157	const PaddingList padding_w = { { 0, input_dims.x() + pad_valid.x() - 1 }, { 0, input_dims.y() + pad_valid.y() - 1 } };
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	158	_pad_weights_func.configure(compile_context, &_flipped_weights, &_padded_weights, padding_w);
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	159
				160	// Transform weights
Georgios Pinitas	098516b	2019-04-25 18:25:06 +0100	[diff] [blame]	161	_transform_weights_func = support::cpp14::make_unique<CLFFT2D>();
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	162	_transform_weights_func->configure(compile_context, &_padded_weights, &_transformed_weights, FFT2DInfo());
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	163
				164	// Pad input
				165	const PaddingList padding_in = { { 0, kernel_size.x() + pad_valid.x() - 1 }, { 0, kernel_size.y() + pad_valid.y() - 1 } };
				166	_memory_group.manage(&_padded_input);
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	167	_pad_input_func.configure(compile_context, input_to_use, &_padded_input, padding_in);
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	168	if(_needs_permute)
				169	{
				170	_permuted_input.allocator()->allocate();
				171	}
				172
				173	// Transform input
				174	_memory_group.manage(&_transformed_input);
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	175	_transform_input_func.configure(compile_context, &_padded_input, &_transformed_input, FFT2DInfo());
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	176	_padded_input.allocator()->allocate();
				177
				178	// Perform product
				179	_memory_group.manage(&_output_product);
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	180	_prod_func.configure(compile_context, &_transformed_input, &_transformed_weights, &_output_product);
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	181	_transformed_input.allocator()->allocate();
				182
				183	// Perform reduction
				184	_memory_group.manage(&_output_reduced);
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	185	_reduce_func.configure(compile_context, &_output_product, &_output_reduced, 2, ReductionOperation::SUM);
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	186	_output_product.allocator()->allocate();
				187
				188	// Transform output
				189	_memory_group.manage(&_itransformed_output);
				190	FFT2DInfo itranform_info;
				191	itranform_info.direction = FFTDirection::Inverse;
				192	_itransformed_output.allocator()->init(_output_reduced.info()->clone()->set_is_resizable(true).set_num_channels(1).reset_padding());
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	193	_itransform_output_func.configure(compile_context, &_output_reduced, &_itransformed_output, itranform_info);
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	194	_output_reduced.allocator()->allocate();
				195
				196	// Reshape output
				197	TensorShape reshaped_shape = _itransformed_output.info()->tensor_shape();
				198	reshaped_shape.remove_dimension(2);
				199	_reshaped_output.allocator()->init(_itransformed_output.info()->clone()->set_tensor_shape(reshaped_shape));
				200
				201	// Extract correct region
				202	const int start_left = kernel_size.x() - conv_info.pad_left() - 1;
				203	const int start_top = kernel_size.y() - conv_info.pad_top() - 1;
				204	const int end_right = _reshaped_output.info()->tensor_shape().x() - (kernel_size.x() - conv_info.pad_right() - 1) - pad_valid.x();
				205	const int end_botton = _reshaped_output.info()->tensor_shape().y() - (kernel_size.y() - conv_info.pad_bottom() - 1) - pad_valid.y();
				206	if(_has_bias)
				207	{
				208	_memory_group.manage(&_bias_output);
				209	}
				210	else if(_needs_permute)
				211	{
				212	output_to_use = &_permuted_output;
				213	_memory_group.manage(&_permuted_output);
				214	}
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	215	_extract_output_func.configure(compile_context, &_reshaped_output, output_to_use, Coordinates(start_left, start_top), Coordinates(end_right, end_botton));
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	216	_itransformed_output.allocator()->allocate();
				217
				218	// Add bias
				219	if(biases != nullptr)
				220	{
				221	output_to_use = output;
				222	if(_needs_permute)
				223	{
				224	output_to_use = &_permuted_output;
				225	_memory_group.manage(&_permuted_output);
				226	}
				227	auto_init_if_empty(output_to_use->info(), _bias_output.info());
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	228	_bias_add_func.configure(compile_context, &_bias_output, &_permuted_bias, output_to_use, ConvertPolicy::WRAP);
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	229	_bias_output.allocator()->allocate();
				230	}
				231
				232	// Permute output
				233	if(_needs_permute)
				234	{
				235	// Configure the function to transform the convoluted output to ACL's native ordering format NCHW
				236	_permuted_output.info()->set_data_layout(DataLayout::NCHW);
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	237	_permute_output_func.configure(compile_context, &_permuted_output, output, PermutationVector(2U, 0U, 1U));
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	238
				239	// Allocate tensors
				240	_permuted_output.allocator()->allocate();
				241	}
				242
				243	// Configure Activation Layer
				244	_is_activationlayer_enabled = act_info.enabled();
				245	if(_is_activationlayer_enabled)
				246	{
Manuel Bottini	2b84be5	2020-04-08 10:15:51 +0100	[diff] [blame]	247	_activation_layer_func.configure(compile_context, output, nullptr, act_info);
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	248	}
				249
				250	// Setup flip axis data
				251	_flip_axis.allocator()->allocate();
				252	_flip_axis.map(true);
				253	auto axis_data = reinterpret_cast<uint32_t *>(_flip_axis.buffer());
				254	axis_data[0] = 0;
				255	axis_data[1] = 1;
				256	_flip_axis.unmap();
				257	}
				258
				259	Status CLFFTConvolutionLayer::validate(const ITensorInfo input, const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo output, const PadStrideInfo &conv_info,
				260	const ActivationLayerInfo &act_info)
				261	{
				262	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
				263	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
				264
				265	// Get indices for the width and height
				266	const size_t idx_width = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
				267	const size_t idx_height = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
				268
				269	// Input shape, kernel size and output tile
				270	const Size2D kernel_size = Size2D(weights->tensor_shape()[idx_width], weights->tensor_shape()[idx_height]);
				271
				272	// Strides
				273	const auto strides = conv_info.stride();
				274	ARM_COMPUTE_RETURN_ERROR_ON(strides.first != strides.second && strides.first != 1);
				275	ARM_COMPUTE_RETURN_ERROR_ON(kernel_size.x() != kernel_size.y());
				276	ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_left() != (kernel_size.x() / 2) \|\| conv_info.pad_right() != (kernel_size.x() / 2));
				277	ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_top() != (kernel_size.y() / 2) \|\| conv_info.pad_bottom() != (kernel_size.y() / 2));
				278
				279	// Validate biases
				280	if(biases != nullptr)
				281	{
				282	const size_t idx_channels = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
				283	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
				284	ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_channels] != biases->tensor_shape().x());
				285	}
				286
				287	// Checks performed when output is configured
				288	if((output != nullptr) && (output->total_size() != 0))
				289	{
				290	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
Vidhya Sudhan Loganathan	8ec0bb6	2019-04-23 10:40:44 +0100	[diff] [blame]	291	ARM_COMPUTE_RETURN_ERROR_ON((input->tensor_shape()[idx_height] != output->tensor_shape()[idx_height]) \|\| (input->tensor_shape()[idx_width] != output->tensor_shape()[idx_width]));
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	292
				293	// Validate Activation Layer
				294	if(act_info.enabled())
				295	{
				296	ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(output, nullptr, act_info));
				297	}
				298	}
				299
				300	return Status{};
				301	}
				302
				303	void CLFFTConvolutionLayer::run()
				304	{
				305	prepare();
				306
Georgios Pinitas	098516b	2019-04-25 18:25:06 +0100	[diff] [blame]	307	MemoryGroupResourceScope scope_mg(_memory_group);
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	308
				309	// Transform input
				310	if(_needs_permute)
				311	{
				312	_permute_input_func.run();
				313	}
				314	_pad_input_func.run();
				315	_transform_input_func.run();
				316
				317	// Perform operations to frequency domain
				318	_prod_func.run();
				319	_reduce_func.run();
				320
				321	// Transform output
				322	_itransform_output_func.run();
				323	_reshaped_output.allocator()->import_memory(_itransformed_output.cl_buffer());
				324	_extract_output_func.run();
				325	// Add bias
				326	if(_has_bias)
				327	{
				328	_bias_add_func.run();
				329	}
				330	if(_needs_permute)
				331	{
				332	_permute_output_func.run();
				333	}
				334
				335	// Run activation layer
				336	if(_is_activationlayer_enabled)
				337	{
				338	_activation_layer_func.run();
				339	}
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	340	}
				341
				342	void CLFFTConvolutionLayer::prepare()
				343	{
				344	if(!_is_prepared)
				345	{
				346	// Permute bias to NCHW
				347	if(_original_bias != nullptr)
				348	{
				349	_permuted_bias.allocator()->allocate();
				350	_permute_bias_func.run();
				351	_original_bias->mark_as_unused();
				352	}
				353
				354	const ICLTensor *cur_weights = _original_weights;
				355	// Permute weights
				356	if(_needs_permute)
				357	{
				358	ARM_COMPUTE_ERROR_ON(!cur_weights->is_used());
				359
				360	_permuted_weights.allocator()->allocate();
				361	_permute_weights_func.run();
				362	cur_weights->mark_as_unused();
				363	cur_weights = &_permuted_weights;
				364	}
				365
				366	// Flip weights
				367	_flipped_weights.allocator()->allocate();
				368	_flip_weights_func.run();
				369	cur_weights->mark_as_unused();
				370
				371	// Pad weights
				372	_padded_weights.allocator()->allocate();
				373	_pad_weights_func.run();
				374	_flipped_weights.mark_as_unused();
				375	CLScheduler::get().queue().finish();
				376	_flipped_weights.allocator()->free();
				377
Georgios Pinitas	098516b	2019-04-25 18:25:06 +0100	[diff] [blame]	378	// Transform weights to frequency domain
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	379	_transformed_weights.allocator()->allocate();
Georgios Pinitas	098516b	2019-04-25 18:25:06 +0100	[diff] [blame]	380	_transform_weights_func->run();
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	381	_padded_weights.mark_as_unused();
				382	CLScheduler::get().queue().finish();
Georgios Pinitas	098516b	2019-04-25 18:25:06 +0100	[diff] [blame]	383	// Delete object and release internal memory
				384	_transform_weights_func.reset();
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	385	_padded_weights.allocator()->free();
				386
				387	_is_prepared = true;
				388	}
				389	}
				390	} // namespace arm_compute