Blame - src/runtime/CL/functions/CLConvolutionLayer.cpp - ml/ComputeLibrary

blob: 2c1ddc3e3b0ecae0dd83382afd4de32ce5f1d80a [file] [log] [blame]

Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1	/*
				2	* Copyright (c) 2017 ARM Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
				25
				26	#include "arm_compute/core/PixelValue.h"
Gian Marco Iodice	13edbff	2017-06-26 17:20:16 +0100	[diff] [blame]	27	#include "arm_compute/core/Size2D.h"
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	28	#include "arm_compute/core/Utils.h"
				29	#include "arm_compute/core/Validate.h"
Chunosov	5124be5	2017-11-22 20:42:13 +0700	[diff] [blame]	30	#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	31	#include "arm_compute/runtime/CL/CLScheduler.h"
				32
				33	#include <cmath>
Georgios Pinitas	baf174e	2017-09-08 19:47:30 +0100	[diff] [blame]	34	#include <memory>
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	35	#include <tuple>
				36
				37	using namespace arm_compute;
				38
Georgios Pinitas	baf174e	2017-09-08 19:47:30 +0100	[diff] [blame]	39	CLConvolutionLayerReshapeWeights::CLConvolutionLayerReshapeWeights(std::shared_ptr<IMemoryManager> memory_manager)
				40	: _memory_group(std::move(memory_manager)), _weights_reshape_kernel(), _weights_transposed_kernel(), _weights_reshaped(), _transpose1xW(false)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	41	{
				42	}
				43
				44	void CLConvolutionLayerReshapeWeights::configure(const ICLTensor weights, const ICLTensor biases, ICLTensor *output, bool transpose1xW)
				45	{
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	46	ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4);
				47
				48	if(biases != nullptr)
				49	{
Chunosov	5124be5	2017-11-22 20:42:13 +0700	[diff] [blame]	50	ARM_COMPUTE_ERROR_ON(is_data_type_quantized_asymmetric(weights->info()->data_type()));
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	51	ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases);
				52	ARM_COMPUTE_ERROR_ON(biases->info()->dimension(0) != weights->info()->dimension(3));
				53	ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 1);
				54	}
				55
Chunosov	5124be5	2017-11-22 20:42:13 +0700	[diff] [blame]	56	const bool append_biases = (biases != nullptr) && !is_data_type_quantized_asymmetric(weights->info()->data_type());
				57	const unsigned bias_element = (append_biases) ? 1 : 0;
				58	const ICLTensor *biases_to_use = (append_biases) ? biases : nullptr;
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	59
				60	_transpose1xW = transpose1xW;
				61
				62	if(transpose1xW)
				63	{
				64	// Create tensor to store the reshaped weights
				65	const unsigned int mat_weights_cols = weights->info()->dimension(3);
Chunosov	5124be5	2017-11-22 20:42:13 +0700	[diff] [blame]	66	const unsigned int mat_weights_rows = weights->info()->dimension(0) * weights->info()->dimension(1) * weights->info()->dimension(2) + bias_element;
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	67	TensorShape shape_wr(mat_weights_cols, mat_weights_rows);
Gian Marco Iodice	368da83	2017-07-03 12:33:49 +0100	[diff] [blame]	68	const DataType dt = weights->info()->data_type();
				69	const int fixed_point_position = weights->info()->fixed_point_position();
				70	TensorInfo info_wr(shape_wr, 1, dt, fixed_point_position);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	71
				72	_weights_reshaped.allocator()->init(info_wr);
Georgios Pinitas	baf174e	2017-09-08 19:47:30 +0100	[diff] [blame]	73	_memory_group.manage(&_weights_reshaped);
Chunosov	5124be5	2017-11-22 20:42:13 +0700	[diff] [blame]	74	_weights_reshape_kernel.configure(weights, biases_to_use, &_weights_reshaped);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	75	_weights_transposed_kernel.configure(&_weights_reshaped, output);
				76	_weights_reshaped.allocator()->allocate();
				77	}
				78	else
				79	{
Chunosov	5124be5	2017-11-22 20:42:13 +0700	[diff] [blame]	80	_weights_reshape_kernel.configure(weights, biases_to_use, output);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	81	}
Gian Marco	1d25ed5	2017-12-16 19:33:50 +0000	[diff] [blame]	82
				83	output->info()->set_quantization_info(weights->info()->quantization_info());
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	84	}
				85
				86	void CLConvolutionLayerReshapeWeights::run()
				87	{
Georgios Pinitas	baf174e	2017-09-08 19:47:30 +0100	[diff] [blame]	88	_memory_group.acquire();
				89
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	90	cl::CommandQueue q = CLScheduler::get().queue();
				91	CLScheduler::get().enqueue(_weights_reshape_kernel);
				92	if(_transpose1xW)
				93	{
				94	CLScheduler::get().enqueue(_weights_transposed_kernel);
				95	}
Georgios Pinitas	baf174e	2017-09-08 19:47:30 +0100	[diff] [blame]	96
				97	_memory_group.release();
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	98	}
				99
Georgios Pinitas	baf174e	2017-09-08 19:47:30 +0100	[diff] [blame]	100	CLConvolutionLayer::CLConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
Chunosov	5124be5	2017-11-22 20:42:13 +0700	[diff] [blame]	101	: _memory_group(memory_manager), _reshape_weights(), _input_im2col_kernel(), _input_interleave_kernel(), _mm_kernel(), _mm_gemmlowp(memory_manager), _gemmlowp_output_stage(), _output_col2im_kernel(),
Gian Marco	1d25ed5	2017-12-16 19:33:50 +0000	[diff] [blame]	102	_input_im2col_reshaped(), _input_interleaved_reshaped(), _weights_reshaped(), _weights_transposed(), _gemm_output(), _tmp_output(), _are_weights_reshaped(false), _is_quantized(false),
				103	_is_interleaved_transposed(false)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	104	{
				105	}
				106
Chunosov	5124be5	2017-11-22 20:42:13 +0700	[diff] [blame]	107	void CLConvolutionLayer::configure_mm(const ICLTensor input, const ICLTensor weights, ICLTensor *output, bool is_interleaved_transposed)
				108	{
				109	if(_is_quantized)
				110	{
				111	// Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
				112	// Extract and negate input and weights offset
				113	const QuantizationInfo input_quantization_info = input->info()->quantization_info();
				114	const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
				115
				116	input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset));
				117	weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset));
				118
				119	_mm_gemmlowp.configure(input, weights, output, GEMMInfo(false, false, true /* Reshape weights only for the first run*/));
				120
				121	// Revert back QuantizatioInfo as input and weights could be used in other convolution layers
				122	input->info()->set_quantization_info(input_quantization_info);
				123	weights->info()->set_quantization_info(weights_quantization_info);
				124	}
				125	else
				126	{
				127	_mm_kernel.configure(input, weights, output, 1.f, is_interleaved_transposed);
				128	}
				129	}
				130
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	131	void CLConvolutionLayer::configure(const ICLTensor input, const ICLTensor weights, const ICLTensor biases, ICLTensor output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info)
				132	{
Chunosov	5124be5	2017-11-22 20:42:13 +0700	[diff] [blame]	133	ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32);
Gian Marco Iodice	559d771	2017-08-08 08:38:09 +0100	[diff] [blame]	134	ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
				135	ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, weights);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	136	ARM_COMPUTE_ERROR_ON(!weights_info.are_reshaped() && weights->info()->dimension(2) != input->info()->dimension(2));
				137	ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4);
Chunosov	5124be5	2017-11-22 20:42:13 +0700	[diff] [blame]	138	ARM_COMPUTE_ERROR_ON(weights_info.are_reshaped() && is_data_type_quantized_asymmetric(input->info()->data_type()));
				139
				140	_is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	141
				142	if(biases != nullptr)
				143	{
Chunosov	5124be5	2017-11-22 20:42:13 +0700	[diff] [blame]	144	if(_is_quantized)
				145	{
				146	ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32);
				147	}
				148	else
				149	{
				150	ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
				151	}
Gian Marco Iodice	368da83	2017-07-03 12:33:49 +0100	[diff] [blame]	152	ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, biases);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	153	ARM_COMPUTE_ERROR_ON(!weights_info.are_reshaped() && biases->info()->dimension(0) != weights->info()->dimension(3));
				154	ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 1);
				155	}
				156
Chunosov	5124be5	2017-11-22 20:42:13 +0700	[diff] [blame]	157	const DataType dt = input->info()->data_type();
Gian Marco Iodice	368da83	2017-07-03 12:33:49 +0100	[diff] [blame]	158
Gian Marco	1d25ed5	2017-12-16 19:33:50 +0000	[diff] [blame]	159	// Set the GPU target for matrix multiply and im2col and col2im
Gian Marco Iodice	1246b63	2017-08-16 18:38:32 +0100	[diff] [blame]	160	_mm_kernel.set_target(CLScheduler::get().target());
Gian Marco	1d25ed5	2017-12-16 19:33:50 +0000	[diff] [blame]	161	_input_im2col_kernel.set_target(CLScheduler::get().target());
				162	_output_col2im_kernel.set_target(CLScheduler::get().target());
Gian Marco Iodice	1246b63	2017-08-16 18:38:32 +0100	[diff] [blame]	163
Gian Marco	1d25ed5	2017-12-16 19:33:50 +0000	[diff] [blame]	164	const bool append_bias = (biases != nullptr) && (!_is_quantized);
				165	_are_weights_reshaped = weights_info.are_reshaped();
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	166
Gian Marco	1d25ed5	2017-12-16 19:33:50 +0000	[diff] [blame]	167	const unsigned bias_element = (append_bias) ? 1 : 0;
				168	const ICLTensor *biases_to_use = (append_bias) ? biases : nullptr;
Chunosov	5124be5	2017-11-22 20:42:13 +0700	[diff] [blame]	169
Gian Marco Iodice	368da83	2017-07-03 12:33:49 +0100	[diff] [blame]	170	// Get parameters from conv_info
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	171	unsigned int stride_x = 0;
				172	unsigned int stride_y = 0;
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	173	std::tie(stride_x, stride_y) = conv_info.stride();
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	174
				175	// Get convolved dimensions
				176	unsigned int conv_w = 0;
				177	unsigned int conv_h = 0;
				178
Gian Marco Iodice	368da83	2017-07-03 12:33:49 +0100	[diff] [blame]	179	const unsigned int kernel_width = (_are_weights_reshaped) ? weights_info.kernel_size().first : weights->info()->dimension(0);
				180	const unsigned int kernel_height = (_are_weights_reshaped) ? weights_info.kernel_size().second : weights->info()->dimension(1);
Gian Marco Iodice	4e28869	2017-06-27 11:41:59 +0100	[diff] [blame]	181	std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_width, kernel_height,
				182	conv_info);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	183
				184	// Check if its a "fully connected" convolution
Gian Marco	1d25ed5	2017-12-16 19:33:50 +0000	[diff] [blame]	185	const bool is_fully_connected_convolution = ((conv_w == 1) && (conv_h == 1));
				186	_is_interleaved_transposed = (!is_fully_connected_convolution && !_is_quantized);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	187
Gian Marco Iodice	368da83	2017-07-03 12:33:49 +0100	[diff] [blame]	188	unsigned int mat_weights_cols = weights->info()->dimension(3);
Chunosov	5124be5	2017-11-22 20:42:13 +0700	[diff] [blame]	189	unsigned int mat_weights_rows = weights->info()->dimension(0) * weights->info()->dimension(1) * weights->info()->dimension(2) + bias_element;
Gian Marco Iodice	368da83	2017-07-03 12:33:49 +0100	[diff] [blame]	190
				191	// Reshape weights if needed
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	192	if(_are_weights_reshaped)
				193	{
Gian Marco	1d25ed5	2017-12-16 19:33:50 +0000	[diff] [blame]	194	if(is_fully_connected_convolution \|\| _is_quantized)
Georgios Pinitas	b660dcf	2017-12-13 10:48:06 +0000	[diff] [blame]	195	{
				196	mat_weights_cols = weights->info()->dimension(0);
				197	mat_weights_rows = weights->info()->dimension(1);
				198	}
				199	else
				200	{
				201	mat_weights_cols = weights_info.num_kernels();
				202	const unsigned int quarter_reshaped_cols = weights->info()->dimension(0) / 4;
				203	mat_weights_rows = quarter_reshaped_cols + bias_element;
				204	}
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	205	}
				206	else
				207	{
Gian Marco	1d25ed5	2017-12-16 19:33:50 +0000	[diff] [blame]	208	// _weights_reshaped will be auto configured in the kernel
				209	_reshape_weights.configure(weights, biases_to_use, &_weights_reshaped, _is_interleaved_transposed /* 1xW transpose */);
				210
Gian Marco Iodice	368da83	2017-07-03 12:33:49 +0100	[diff] [blame]	211	weights = &_weights_reshaped;
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	212	}
Gian Marco Iodice	368da83	2017-07-03 12:33:49 +0100	[diff] [blame]	213
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	214	// Create tensor to store im2col reshaped inputs
Gian Marco Iodice	368da83	2017-07-03 12:33:49 +0100	[diff] [blame]	215	const unsigned int mat_input_cols = mat_weights_rows;
				216	const unsigned int mat_input_rows = conv_w * conv_h;
				217	TensorShape shape_im2col = input->info()->tensor_shape();
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	218	shape_im2col.set(0, mat_input_cols);
				219	shape_im2col.set(1, mat_input_rows);
				220	shape_im2col.set(2, 1);
Gian Marco	bfa3b52	2017-12-12 10:08:38 +0000	[diff] [blame]	221	// FIXME: input->clone() doesn't work with subtensors for grouped convolutions.
				222	TensorInfo im2col_reshaped_info(shape_im2col, 1, dt, input->info()->fixed_point_position());
				223	im2col_reshaped_info.set_quantization_info(input->info()->quantization_info());
				224	_input_im2col_reshaped.allocator()->init(im2col_reshaped_info);
Georgios Pinitas	baf174e	2017-09-08 19:47:30 +0100	[diff] [blame]	225	_memory_group.manage(&_input_im2col_reshaped);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	226
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	227	// Create GEMM output tensor
				228	TensorShape shape_gemm = _input_im2col_reshaped.info()->tensor_shape();
				229	shape_gemm.set(0, mat_weights_cols);
				230	shape_gemm.set(1, mat_input_rows);
Chunosov	5124be5	2017-11-22 20:42:13 +0700	[diff] [blame]	231	const DataType gemm_data_type = _is_quantized ? DataType::S32 : dt;
				232	// GEMM output should be S32 for acquiring raw integer accumulator without quantized postprocessing for quantized asymmetric input.
Gian Marco	bfa3b52	2017-12-12 10:08:38 +0000	[diff] [blame]	233	// FIXME: input->clone() doesn't work with subtensors for grouped convolutions.
				234	TensorInfo info_gemm(shape_gemm, 1, gemm_data_type, input->info()->fixed_point_position());
				235	info_gemm.set_quantization_info(output->info()->quantization_info());
Chunosov	5124be5	2017-11-22 20:42:13 +0700	[diff] [blame]	236	_gemm_output.allocator()->init(info_gemm);
Georgios Pinitas	baf174e	2017-09-08 19:47:30 +0100	[diff] [blame]	237	_memory_group.manage(&_gemm_output);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	238
Gian Marco	1d25ed5	2017-12-16 19:33:50 +0000	[diff] [blame]	239	// Configure im2col
				240	_input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, append_bias);
Gian Marco Iodice	edfa9f4	2017-08-15 11:45:22 +0100	[diff] [blame]	241
				242	// Configure matrix multiply
Gian Marco	1d25ed5	2017-12-16 19:33:50 +0000	[diff] [blame]	243	if(_is_interleaved_transposed)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	244	{
Gian Marco	1d25ed5	2017-12-16 19:33:50 +0000	[diff] [blame]	245	// Configure GEMMInterleave4x4. _input_interleaved_reshaped will be auto configured in the kernel
Chunosov	5124be5	2017-11-22 20:42:13 +0700	[diff] [blame]	246	_input_interleave_kernel.configure(&_input_im2col_reshaped, &_input_interleaved_reshaped);
Gian Marco	1d25ed5	2017-12-16 19:33:50 +0000	[diff] [blame]	247	_memory_group.manage(&_input_interleaved_reshaped);
				248
				249	// Configure GEMM
Chunosov	5124be5	2017-11-22 20:42:13 +0700	[diff] [blame]	250	configure_mm(&_input_interleaved_reshaped, weights, &_gemm_output);
				251	_input_interleaved_reshaped.allocator()->allocate();
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	252	}
				253	else
				254	{
Chunosov	5124be5	2017-11-22 20:42:13 +0700	[diff] [blame]	255	configure_mm(&_input_im2col_reshaped, weights, &_gemm_output, false);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	256	}
Georgios Pinitas	baf174e	2017-09-08 19:47:30 +0100	[diff] [blame]	257	_input_im2col_reshaped.allocator()->allocate();
Chunosov	5124be5	2017-11-22 20:42:13 +0700	[diff] [blame]	258
				259	// Configure output stage for quantized case
				260	if(_is_quantized)
				261	{
				262	float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output->info()->quantization_info().scale;
				263	int output_multiplier, output_shift;
				264	quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
				265	_gemmlowp_output_stage.configure(&_gemm_output, biases, &_tmp_output, output_multiplier, output_shift, output->info()->quantization_info().offset);
				266	_gemm_output.allocator()->allocate();
				267	}
				268
				269	// Configure Col2Im
Chunosov	5124be5	2017-11-22 20:42:13 +0700	[diff] [blame]	270	_output_col2im_kernel.configure(_is_quantized ? &_tmp_output : &_gemm_output, output, std::make_pair(conv_w, conv_h));
				271	if(_is_quantized)
				272	{
				273	_tmp_output.allocator()->allocate();
				274	}
				275	else
				276	{
				277	_gemm_output.allocator()->allocate();
				278	}
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	279
Gian Marco Iodice	559d771	2017-08-08 08:38:09 +0100	[diff] [blame]	280	ARM_COMPUTE_ERROR_ON_MSG((output->info()->dimension(0) != conv_w) \|\| (output->info()->dimension(1) != conv_h), "Output shape does not match the expected one");
				281
Gian Marco Iodice	368da83	2017-07-03 12:33:49 +0100	[diff] [blame]	282	// Allocate intermediate tensor
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	283	if(!_are_weights_reshaped)
				284	{
Gian Marco Iodice	368da83	2017-07-03 12:33:49 +0100	[diff] [blame]	285	_weights_reshaped.allocator()->allocate();
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	286	}
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	287	}
				288
				289	void CLConvolutionLayer::run()
				290	{
				291	// Run weights reshaping (Runs once for every configure)
				292	if(!_are_weights_reshaped)
				293	{
				294	_are_weights_reshaped = true;
				295	_reshape_weights.run();
				296	}
				297
Georgios Pinitas	baf174e	2017-09-08 19:47:30 +0100	[diff] [blame]	298	_memory_group.acquire();
				299
Chunosov	5124be5	2017-11-22 20:42:13 +0700	[diff] [blame]	300	// Run im2col
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	301	CLScheduler::get().enqueue(_input_im2col_kernel);
Chunosov	5124be5	2017-11-22 20:42:13 +0700	[diff] [blame]	302
Gian Marco	1d25ed5	2017-12-16 19:33:50 +0000	[diff] [blame]	303	if(_is_interleaved_transposed)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	304	{
Chunosov	5124be5	2017-11-22 20:42:13 +0700	[diff] [blame]	305	// Run interleave4x4
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	306	CLScheduler::get().enqueue(_input_interleave_kernel);
				307	}
				308
				309	// Runs matrix multiply on reshaped matrices
Chunosov	5124be5	2017-11-22 20:42:13 +0700	[diff] [blame]	310	if(_is_quantized)
				311	{
				312	_mm_gemmlowp.run();
				313	}
				314	else
				315	{
				316	CLScheduler::get().enqueue(_mm_kernel);
				317	}
				318
				319	// Run output stage for quantized case
				320	if(_is_quantized)
				321	{
				322	_gemmlowp_output_stage.run();
				323	}
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	324
				325	// Reshape output matrix
				326	CLScheduler::get().enqueue(_output_col2im_kernel, false);
Georgios Pinitas	baf174e	2017-09-08 19:47:30 +0100	[diff] [blame]	327
				328	_memory_group.release();
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	329	}