Blame - src/runtime/CL/functions/CLFFTConvolutionLayer.cpp - ml/ComputeLibrary

blob: afb1cab520d6df03ae58e7a5b186daab35ec0eb3 [file] [log] [blame]

Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	1	/*
				2	* Copyright (c) 2019 ARM Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h"
				25
				26	#include "arm_compute/core/CL/ICLTensor.h"
				27	#include "arm_compute/core/Utils.h"
				28	#include "arm_compute/core/Validate.h"
				29	#include "arm_compute/core/utils/helpers/fft.h"
				30	#include "arm_compute/core/utils/misc/ShapeCalculator.h"
				31	#include "arm_compute/runtime/CL/CLScheduler.h"
				32	#include "arm_compute/runtime/CPP/CPPScheduler.h"
				33
				34	namespace arm_compute
				35	{
				36	namespace
				37	{
				38	int pad_decomposable(int N)
				39	{
				40	const auto supported_radix = CLFFTRadixStageKernel::supported_radix();
				41
				42	int pad = 0;
				43	bool is_decomposed = false;
				44	while(!is_decomposed)
				45	{
				46	const auto decomposed_vector = arm_compute::helpers::fft::decompose_stages(N++, supported_radix);
				47	is_decomposed = !decomposed_vector.empty();
				48	if(!is_decomposed)
				49	{
				50	++pad;
				51	}
				52	}
				53	return pad;
				54	}
				55	} // namespace
				56	CLFFTConvolutionLayer::CLFFTConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
				57	: _memory_group(memory_manager),
				58	_flip_weights_func(),
				59	_permute_input_func(),
				60	_permute_output_func(),
				61	_permute_weights_func(),
				62	_permute_bias_func(),
				63	_pad_input_func(),
				64	_pad_weights_func(),
				65	_transform_input_func(memory_manager),
Georgios Pinitas	098516b	2019-04-25 18:25:06 +0100	[diff] [blame]	66	_transform_weights_func(),
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	67	_itransform_output_func(memory_manager),
				68	_prod_func(),
				69	_reduce_func(),
				70	_extract_output_func(),
				71	_bias_add_func(),
				72	_activation_layer_func(),
				73	_permuted_input(),
				74	_permuted_weights(),
				75	_permuted_bias(),
				76	_permuted_output(),
				77	_padded_input(),
				78	_padded_weights(),
				79	_flip_axis(),
				80	_flipped_weights(),
				81	_transformed_input(),
				82	_transformed_weights(),
				83	_input_weights_product(),
				84	_output_product(),
				85	_output_reduced(),
				86	_itransformed_output(),
				87	_reshaped_output(),
				88	_bias_output(),
				89	_original_weights(nullptr),
				90	_original_bias(nullptr),
				91	_is_activationlayer_enabled(false),
				92	_needs_permute(false),
				93	_has_bias(false),
				94	_is_prepared(false)
				95	{
				96	}
				97
				98	void CLFFTConvolutionLayer::configure(ICLTensor input, const ICLTensor weights, const ICLTensor biases, ICLTensor output, const PadStrideInfo &conv_info,
				99	const ActivationLayerInfo &act_info)
				100	{
				101	_original_weights = weights;
				102	_original_bias = biases;
				103
				104	// Flat if bias addition is required
				105	_has_bias = biases != nullptr;
				106
				107	// Get indices for the width and height
				108	const size_t idx_width = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::WIDTH);
				109	const size_t idx_height = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT);
				110
				111	// Input shape, kernel size and output tile
				112	const Size2D input_dims = Size2D(input->info()->tensor_shape()[idx_width], input->info()->tensor_shape()[idx_height]);
				113	const Size2D kernel_size = Size2D(weights->info()->tensor_shape()[idx_width], weights->info()->tensor_shape()[idx_height]);
				114	const Size2D pad_valid = Size2D(pad_decomposable(input_dims.x() + kernel_size.x() - 1),
				115	pad_decomposable(input_dims.y() + kernel_size.y() - 1));
				116	// Tensors to use
				117	ICLTensor *input_to_use = input;
				118	const ICLTensor *weights_to_use = weights;
				119	ICLTensor *output_to_use = _has_bias ? &_bias_output : output;
				120
				121	// Permute bias
Georgios Pinitas	68c6a79	2019-05-15 13:24:00 +0100	[diff] [blame]	122	if(biases != nullptr)
				123	{
				124	_permute_bias_func.configure(biases, &_permuted_bias, PermutationVector(1U, 2U, 0U));
				125	_permuted_bias.info()->set_data_layout(DataLayout::NCHW);
				126	}
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	127
				128	// Permute input if needed
				129	_needs_permute = input->info()->data_layout() == DataLayout::NHWC;
				130	if(_needs_permute)
				131	{
				132	_memory_group.manage(&_permuted_input);
				133	// Configure the function to transform the input tensor from NHWC -> NCHW
				134	_permute_input_func.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
				135	_permuted_input.info()->set_data_layout(DataLayout::NCHW);
				136
				137	// Configure the function to transform the weights tensor from HWI -> IHW
				138	_permute_weights_func.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
				139	_permuted_weights.info()->set_data_layout(DataLayout::NCHW);
				140
				141	input_to_use = &_permuted_input;
				142	weights_to_use = &_permuted_weights;
				143	}
				144
				145	// Flip weights
				146	_flipped_weights.allocator()->init(weights_to_use->info()->clone()->set_is_resizable(true).reset_padding());
				147	_flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32));
				148	_flip_weights_func.configure(weights_to_use, &_flipped_weights, &_flip_axis);
				149
				150	// Pad weights
				151	const PaddingList padding_w = { { 0, input_dims.x() + pad_valid.x() - 1 }, { 0, input_dims.y() + pad_valid.y() - 1 } };
				152	_pad_weights_func.configure(&_flipped_weights, &_padded_weights, padding_w);
				153
				154	// Transform weights
Georgios Pinitas	098516b	2019-04-25 18:25:06 +0100	[diff] [blame]	155	_transform_weights_func = support::cpp14::make_unique<CLFFT2D>();
				156	_transform_weights_func->configure(&_padded_weights, &_transformed_weights, FFT2DInfo());
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	157
				158	// Pad input
				159	const PaddingList padding_in = { { 0, kernel_size.x() + pad_valid.x() - 1 }, { 0, kernel_size.y() + pad_valid.y() - 1 } };
				160	_memory_group.manage(&_padded_input);
				161	_pad_input_func.configure(input_to_use, &_padded_input, padding_in);
				162	if(_needs_permute)
				163	{
				164	_permuted_input.allocator()->allocate();
				165	}
				166
				167	// Transform input
				168	_memory_group.manage(&_transformed_input);
				169	_transform_input_func.configure(&_padded_input, &_transformed_input, FFT2DInfo());
				170	_padded_input.allocator()->allocate();
				171
				172	// Perform product
				173	_memory_group.manage(&_output_product);
				174	_prod_func.configure(&_transformed_input, &_transformed_weights, &_output_product);
				175	_transformed_input.allocator()->allocate();
				176
				177	// Perform reduction
				178	_memory_group.manage(&_output_reduced);
				179	_reduce_func.configure(&_output_product, &_output_reduced, 2, ReductionOperation::SUM);
				180	_output_product.allocator()->allocate();
				181
				182	// Transform output
				183	_memory_group.manage(&_itransformed_output);
				184	FFT2DInfo itranform_info;
				185	itranform_info.direction = FFTDirection::Inverse;
				186	_itransformed_output.allocator()->init(_output_reduced.info()->clone()->set_is_resizable(true).set_num_channels(1).reset_padding());
				187	_itransform_output_func.configure(&_output_reduced, &_itransformed_output, itranform_info);
				188	_output_reduced.allocator()->allocate();
				189
				190	// Reshape output
				191	TensorShape reshaped_shape = _itransformed_output.info()->tensor_shape();
				192	reshaped_shape.remove_dimension(2);
				193	_reshaped_output.allocator()->init(_itransformed_output.info()->clone()->set_tensor_shape(reshaped_shape));
				194
				195	// Extract correct region
				196	const int start_left = kernel_size.x() - conv_info.pad_left() - 1;
				197	const int start_top = kernel_size.y() - conv_info.pad_top() - 1;
				198	const int end_right = _reshaped_output.info()->tensor_shape().x() - (kernel_size.x() - conv_info.pad_right() - 1) - pad_valid.x();
				199	const int end_botton = _reshaped_output.info()->tensor_shape().y() - (kernel_size.y() - conv_info.pad_bottom() - 1) - pad_valid.y();
				200	if(_has_bias)
				201	{
				202	_memory_group.manage(&_bias_output);
				203	}
				204	else if(_needs_permute)
				205	{
				206	output_to_use = &_permuted_output;
				207	_memory_group.manage(&_permuted_output);
				208	}
				209	_extract_output_func.configure(&_reshaped_output, output_to_use, Coordinates(start_left, start_top), Coordinates(end_right, end_botton));
				210	_itransformed_output.allocator()->allocate();
				211
				212	// Add bias
				213	if(biases != nullptr)
				214	{
				215	output_to_use = output;
				216	if(_needs_permute)
				217	{
				218	output_to_use = &_permuted_output;
				219	_memory_group.manage(&_permuted_output);
				220	}
				221	auto_init_if_empty(output_to_use->info(), _bias_output.info());
				222	_bias_add_func.configure(&_bias_output, &_permuted_bias, output_to_use, ConvertPolicy::WRAP);
				223	_bias_output.allocator()->allocate();
				224	}
				225
				226	// Permute output
				227	if(_needs_permute)
				228	{
				229	// Configure the function to transform the convoluted output to ACL's native ordering format NCHW
				230	_permuted_output.info()->set_data_layout(DataLayout::NCHW);
				231	_permute_output_func.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
				232
				233	// Allocate tensors
				234	_permuted_output.allocator()->allocate();
				235	}
				236
				237	// Configure Activation Layer
				238	_is_activationlayer_enabled = act_info.enabled();
				239	if(_is_activationlayer_enabled)
				240	{
				241	_activation_layer_func.configure(output, nullptr, act_info);
				242	}
				243
				244	// Setup flip axis data
				245	_flip_axis.allocator()->allocate();
				246	_flip_axis.map(true);
				247	auto axis_data = reinterpret_cast<uint32_t *>(_flip_axis.buffer());
				248	axis_data[0] = 0;
				249	axis_data[1] = 1;
				250	_flip_axis.unmap();
				251	}
				252
				253	Status CLFFTConvolutionLayer::validate(const ITensorInfo input, const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo output, const PadStrideInfo &conv_info,
				254	const ActivationLayerInfo &act_info)
				255	{
				256	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
				257	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
				258
				259	// Get indices for the width and height
				260	const size_t idx_width = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
				261	const size_t idx_height = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
				262
				263	// Input shape, kernel size and output tile
				264	const Size2D kernel_size = Size2D(weights->tensor_shape()[idx_width], weights->tensor_shape()[idx_height]);
				265
				266	// Strides
				267	const auto strides = conv_info.stride();
				268	ARM_COMPUTE_RETURN_ERROR_ON(strides.first != strides.second && strides.first != 1);
				269	ARM_COMPUTE_RETURN_ERROR_ON(kernel_size.x() != kernel_size.y());
				270	ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_left() != (kernel_size.x() / 2) \|\| conv_info.pad_right() != (kernel_size.x() / 2));
				271	ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_top() != (kernel_size.y() / 2) \|\| conv_info.pad_bottom() != (kernel_size.y() / 2));
				272
				273	// Validate biases
				274	if(biases != nullptr)
				275	{
				276	const size_t idx_channels = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
				277	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
				278	ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_channels] != biases->tensor_shape().x());
				279	}
				280
				281	// Checks performed when output is configured
				282	if((output != nullptr) && (output->total_size() != 0))
				283	{
				284	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
Vidhya Sudhan Loganathan	8ec0bb6	2019-04-23 10:40:44 +0100	[diff] [blame]	285	ARM_COMPUTE_RETURN_ERROR_ON((input->tensor_shape()[idx_height] != output->tensor_shape()[idx_height]) \|\| (input->tensor_shape()[idx_width] != output->tensor_shape()[idx_width]));
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	286
				287	// Validate Activation Layer
				288	if(act_info.enabled())
				289	{
				290	ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(output, nullptr, act_info));
				291	}
				292	}
				293
				294	return Status{};
				295	}
				296
				297	void CLFFTConvolutionLayer::run()
				298	{
				299	prepare();
				300
Georgios Pinitas	098516b	2019-04-25 18:25:06 +0100	[diff] [blame]	301	MemoryGroupResourceScope scope_mg(_memory_group);
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	302
				303	// Transform input
				304	if(_needs_permute)
				305	{
				306	_permute_input_func.run();
				307	}
				308	_pad_input_func.run();
				309	_transform_input_func.run();
				310
				311	// Perform operations to frequency domain
				312	_prod_func.run();
				313	_reduce_func.run();
				314
				315	// Transform output
				316	_itransform_output_func.run();
				317	_reshaped_output.allocator()->import_memory(_itransformed_output.cl_buffer());
				318	_extract_output_func.run();
				319	// Add bias
				320	if(_has_bias)
				321	{
				322	_bias_add_func.run();
				323	}
				324	if(_needs_permute)
				325	{
				326	_permute_output_func.run();
				327	}
				328
				329	// Run activation layer
				330	if(_is_activationlayer_enabled)
				331	{
				332	_activation_layer_func.run();
				333	}
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	334	}
				335
				336	void CLFFTConvolutionLayer::prepare()
				337	{
				338	if(!_is_prepared)
				339	{
				340	// Permute bias to NCHW
				341	if(_original_bias != nullptr)
				342	{
				343	_permuted_bias.allocator()->allocate();
				344	_permute_bias_func.run();
				345	_original_bias->mark_as_unused();
				346	}
				347
				348	const ICLTensor *cur_weights = _original_weights;
				349	// Permute weights
				350	if(_needs_permute)
				351	{
				352	ARM_COMPUTE_ERROR_ON(!cur_weights->is_used());
				353
				354	_permuted_weights.allocator()->allocate();
				355	_permute_weights_func.run();
				356	cur_weights->mark_as_unused();
				357	cur_weights = &_permuted_weights;
				358	}
				359
				360	// Flip weights
				361	_flipped_weights.allocator()->allocate();
				362	_flip_weights_func.run();
				363	cur_weights->mark_as_unused();
				364
				365	// Pad weights
				366	_padded_weights.allocator()->allocate();
				367	_pad_weights_func.run();
				368	_flipped_weights.mark_as_unused();
				369	CLScheduler::get().queue().finish();
				370	_flipped_weights.allocator()->free();
				371
Georgios Pinitas	098516b	2019-04-25 18:25:06 +0100	[diff] [blame]	372	// Transform weights to frequency domain
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	373	_transformed_weights.allocator()->allocate();
Georgios Pinitas	098516b	2019-04-25 18:25:06 +0100	[diff] [blame]	374	_transform_weights_func->run();
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	375	_padded_weights.mark_as_unused();
				376	CLScheduler::get().queue().finish();
Georgios Pinitas	098516b	2019-04-25 18:25:06 +0100	[diff] [blame]	377	// Delete object and release internal memory
				378	_transform_weights_func.reset();
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	379	_padded_weights.allocator()->free();
				380
				381	_is_prepared = true;
				382	}
				383	}
				384	} // namespace arm_compute