Blame - src/runtime/CL/functions/CLFullyConnectedLayer.cpp - ml/ComputeLibrary

blob: 6cc2f4bdb722728f55fcc2ff086fb6ade225e705 [file] [log] [blame]

Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1	/*
				2	* Copyright (c) 2017 ARM Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
				25
Gian Marco Iodice	13edbff	2017-06-26 17:20:16 +0100	[diff] [blame]	26	#include "arm_compute/core/Size2D.h"
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	27	#include "arm_compute/core/Validate.h"
Georgios Pinitas	45bcc3a	2017-11-29 11:06:49 +0000	[diff] [blame]	28	#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	29	#include "arm_compute/runtime/CL/CLScheduler.h"
Gian Marco Iodice	edfa9f4	2017-08-15 11:45:22 +0100	[diff] [blame]	30	#include "support/ToolchainSupport.h"
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	31
				32	#include <algorithm>
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	33
Gian Marco Iodice	edfa9f4	2017-08-15 11:45:22 +0100	[diff] [blame]	34	using namespace arm_compute;
				35
				36	void CLFullyConnectedLayerReshapeWeights::configure(const ICLTensor input, ICLTensor output)
Moritz Pflanzer	768e9f1	2017-08-11 15:33:30 +0100	[diff] [blame]	37	{
Gian Marco Iodice	edfa9f4	2017-08-15 11:45:22 +0100	[diff] [blame]	38	auto k = arm_compute::support::cpp14::make_unique<CLTransposeKernel>();
				39	k->configure(input, output);
				40	_kernel = std::move(k);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	41	}
				42
Georgios Pinitas	baf174e	2017-09-08 19:47:30 +0100	[diff] [blame]	43	CLFullyConnectedLayer::CLFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager)
Georgios Pinitas	45bcc3a	2017-11-29 11:06:49 +0000	[diff] [blame]	44	: _memory_group(memory_manager), _im2col_kernel(), _reshape_weights_kernel(), _mm_kernel(), _mm_gemmlowp(memory_manager), _gemmlowp_output_stage(), _accumulate_biases_kernel(), _im2col_output(),
				45	_gemmlowp_output(), _reshape_weights_output(), _are_weights_reshaped(true), _is_fc_after_conv(true), _accumulate_biases(false), _is_quantized(false)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	46	{
				47	}
				48
Georgios Pinitas	45bcc3a	2017-11-29 11:06:49 +0000	[diff] [blame]	49	void CLFullyConnectedLayer::configure_mm(const ICLTensor input, const ICLTensor weights, ICLTensor *output, bool is_interleaved_transposed)
				50	{
				51	if(_is_quantized)
				52	{
				53	// Extract and negate input and weights offset
				54	QuantizationInfo input_quantization_info = input->info()->quantization_info();
				55	QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
				56	input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset));
				57	weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset));
				58	// Configure gemmlowp function
				59	_mm_gemmlowp.configure(input, weights, output);
				60	}
				61	else
				62	{
				63	// Configure matrix multiply kernel
				64	_mm_kernel.set_target(CLScheduler::get().target());
				65	_mm_kernel.configure(input, weights, output, 1.f, is_interleaved_transposed);
				66	}
				67	}
				68
				69	void CLFullyConnectedLayer::configure_conv_fc(const ICLTensor input, const ICLTensor weights, ICLTensor *output)
Gian Marco Iodice	edfa9f4	2017-08-15 11:45:22 +0100	[diff] [blame]	70	{
				71	ARM_COMPUTE_ERROR_ON((weights->info()->dimension(1) != (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
				72
Gian Marco Iodice	edfa9f4	2017-08-15 11:45:22 +0100	[diff] [blame]	73	// If the fully connected layer is called after a convolution layer, the input tensor must be linearized
				74
				75	// Initialize output tensor for im2col
Georgios Pinitas	45bcc3a	2017-11-29 11:06:49 +0000	[diff] [blame]	76	TensorShape shape_im2col = input->info()->tensor_shape();
				77	shape_im2col.collapse(3);
				78	_im2col_output.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_im2col));
Gian Marco Iodice	edfa9f4	2017-08-15 11:45:22 +0100	[diff] [blame]	79
				80	// Configure im2col kernel
Georgios Pinitas	baf174e	2017-09-08 19:47:30 +0100	[diff] [blame]	81	_memory_group.manage(&_im2col_output);
Gian Marco Iodice	edfa9f4	2017-08-15 11:45:22 +0100	[diff] [blame]	82	_im2col_kernel.configure(input, &_im2col_output, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false);
				83
				84	// Configure matrix multiply kernel
Georgios Pinitas	45bcc3a	2017-11-29 11:06:49 +0000	[diff] [blame]	85	configure_mm(&_im2col_output, weights, output, false);
Gian Marco Iodice	edfa9f4	2017-08-15 11:45:22 +0100	[diff] [blame]	86
				87	// Allocate the output tensor for im2col once all the configure methods have been called
				88	_im2col_output.allocator()->allocate();
				89	}
				90
Georgios Pinitas	45bcc3a	2017-11-29 11:06:49 +0000	[diff] [blame]	91	void CLFullyConnectedLayer::configure_fc_fc(const ICLTensor input, const ICLTensor weights, ICLTensor *output)
Gian Marco Iodice	edfa9f4	2017-08-15 11:45:22 +0100	[diff] [blame]	92	{
				93	ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1));
				94
				95	// Configure matrix multiply kernel
Georgios Pinitas	45bcc3a	2017-11-29 11:06:49 +0000	[diff] [blame]	96	configure_mm(input, weights, output, false);
Gian Marco Iodice	edfa9f4	2017-08-15 11:45:22 +0100	[diff] [blame]	97	}
				98
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	99	void CLFullyConnectedLayer::configure(const ICLTensor input, const ICLTensor weights, const ICLTensor biases, ICLTensor output, bool transpose_weights, bool are_weights_reshaped)
				100	{
Georgios Pinitas	45bcc3a	2017-11-29 11:06:49 +0000	[diff] [blame]	101	ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32);
Gian Marco Iodice	edfa9f4	2017-08-15 11:45:22 +0100	[diff] [blame]	102	ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
Georgios Pinitas	96880cf	2017-10-20 18:52:20 +0100	[diff] [blame]	103	ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 2);
Gian Marco Iodice	edfa9f4	2017-08-15 11:45:22 +0100	[diff] [blame]	104
				105	_are_weights_reshaped = transpose_weights ? are_weights_reshaped : true;
				106	_is_fc_after_conv = true;
				107	_accumulate_biases = false;
Georgios Pinitas	45bcc3a	2017-11-29 11:06:49 +0000	[diff] [blame]	108	_is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
Gian Marco Iodice	edfa9f4	2017-08-15 11:45:22 +0100	[diff] [blame]	109
Georgios Pinitas	45bcc3a	2017-11-29 11:06:49 +0000	[diff] [blame]	110	// Configure gemmlowp output
				111	if(_is_quantized)
				112	{
				113	_gemmlowp_output.allocator()->init(output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
				114	}
Anton Lokhmotov	3e80c7f	2017-11-20 11:02:10 +0000	[diff] [blame]	115
Georgios Pinitas	45bcc3a	2017-11-29 11:06:49 +0000	[diff] [blame]	116	// Configure accumulate biases kernel for non quantized asymmetric types
				117	if(biases != nullptr && !_is_quantized)
Gian Marco Iodice	edfa9f4	2017-08-15 11:45:22 +0100	[diff] [blame]	118	{
				119	ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
				120
				121	_accumulate_biases = true;
				122
				123	// Configure accumulate biases kernel
Georgios Pinitas	45bcc3a	2017-11-29 11:06:49 +0000	[diff] [blame]	124	_accumulate_biases_kernel.set_target(CLScheduler::get().target());
Gian Marco Iodice	edfa9f4	2017-08-15 11:45:22 +0100	[diff] [blame]	125	_accumulate_biases_kernel.configure(output, biases);
				126	}
				127
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	128	// With the Fully Connected layer we can have 4 different cases:
				129	// 1) Convolution layer -> Fully Connected layer without batches
				130	// 2) Fully Connected layer -> Fully Connected layer without batches
				131	// 3) Convolution layer -> Fully Connected layer with batches
				132	// 4) Fully Connected layer -> Fully Connected layer with batches
				133
Gian Marco Iodice	edfa9f4	2017-08-15 11:45:22 +0100	[diff] [blame]	134	const ICLTensor *weights_to_use = weights;
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	135
Gian Marco Iodice	edfa9f4	2017-08-15 11:45:22 +0100	[diff] [blame]	136	if(!_are_weights_reshaped)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	137	{
Moritz Pflanzer	768e9f1	2017-08-11 15:33:30 +0100	[diff] [blame]	138	weights_to_use = &_reshape_weights_output;
				139
Moritz Pflanzer	768e9f1	2017-08-11 15:33:30 +0100	[diff] [blame]	140	// Reshape the weights
Gian Marco Iodice	edfa9f4	2017-08-15 11:45:22 +0100	[diff] [blame]	141	_reshape_weights_kernel.configure(weights, &_reshape_weights_output);
Moritz Pflanzer	768e9f1	2017-08-11 15:33:30 +0100	[diff] [blame]	142	}
				143
Gian Marco Iodice	edfa9f4	2017-08-15 11:45:22 +0100	[diff] [blame]	144	// Check if we have a fully connected layer with batches
				145	const bool is_batched_fc_layer = output->info()->dimension(1) > 1;
				146
				147	if(is_batched_fc_layer)
Moritz Pflanzer	768e9f1	2017-08-11 15:33:30 +0100	[diff] [blame]	148	{
Gian Marco Iodice	edfa9f4	2017-08-15 11:45:22 +0100	[diff] [blame]	149	_is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && (std::equal(input->info()->tensor_shape().cbegin() + 3,
				150	input->info()->tensor_shape().cend(),
				151	output->info()->tensor_shape().cbegin() + 1));
Moritz Pflanzer	768e9f1	2017-08-11 15:33:30 +0100	[diff] [blame]	152	}
				153	else
				154	{
Gian Marco Iodice	edfa9f4	2017-08-15 11:45:22 +0100	[diff] [blame]	155	_is_fc_after_conv = input->info()->num_dimensions() > 1;
Moritz Pflanzer	768e9f1	2017-08-11 15:33:30 +0100	[diff] [blame]	156	}
				157
Georgios Pinitas	45bcc3a	2017-11-29 11:06:49 +0000	[diff] [blame]	158	ICLTensor *tmp_output = (_is_quantized) ? &_gemmlowp_output : output;
Gian Marco Iodice	edfa9f4	2017-08-15 11:45:22 +0100	[diff] [blame]	159	if(_is_fc_after_conv)
Moritz Pflanzer	768e9f1	2017-08-11 15:33:30 +0100	[diff] [blame]	160	{
Gian Marco Iodice	edfa9f4	2017-08-15 11:45:22 +0100	[diff] [blame]	161	// Fully Connected layer after a Convolution Layer without batches
Georgios Pinitas	45bcc3a	2017-11-29 11:06:49 +0000	[diff] [blame]	162	configure_conv_fc(input, weights_to_use, tmp_output);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	163	}
Gian Marco Iodice	edfa9f4	2017-08-15 11:45:22 +0100	[diff] [blame]	164	else
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	165	{
Gian Marco Iodice	edfa9f4	2017-08-15 11:45:22 +0100	[diff] [blame]	166	// Fully Connected layer after a Fully Connected Layer without batches
Georgios Pinitas	45bcc3a	2017-11-29 11:06:49 +0000	[diff] [blame]	167	configure_fc_fc(input, weights_to_use, tmp_output);
				168	}
				169
				170	// Configure output stage for asymmetric quantized types
				171	if(_is_quantized)
				172	{
				173	float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output->info()->quantization_info().scale;
				174	int output_multiplier, output_shift;
				175	quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
Gian Marco	58c5794	2017-11-28 09:10:03 +0000	[diff] [blame^]	176	_gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier, output_shift, output->info()->quantization_info().offset);
Georgios Pinitas	45bcc3a	2017-11-29 11:06:49 +0000	[diff] [blame]	177	_gemmlowp_output.allocator()->allocate();
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	178	}
				179
				180	// Allocate the transpose tensor if the are_weights_reshaped flag is false and once all the configure methods have been called
Gian Marco Iodice	edfa9f4	2017-08-15 11:45:22 +0100	[diff] [blame]	181	if(!_are_weights_reshaped)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	182	{
Moritz Pflanzer	768e9f1	2017-08-11 15:33:30 +0100	[diff] [blame]	183	// Allocate the tensor for the weights reshaped
				184	_reshape_weights_output.allocator()->allocate();
				185	}
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	186	}
				187
				188	void CLFullyConnectedLayer::run()
				189	{
				190	// Reshape of the weights (happens only once)
				191	if(!_are_weights_reshaped)
				192	{
				193	_are_weights_reshaped = true;
				194	_reshape_weights_kernel.run();
				195	}
				196
Georgios Pinitas	baf174e	2017-09-08 19:47:30 +0100	[diff] [blame]	197	_memory_group.acquire();
				198
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	199	// Linearize input if it comes from a convolutional layer
Gian Marco Iodice	edfa9f4	2017-08-15 11:45:22 +0100	[diff] [blame]	200	if(_is_fc_after_conv)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	201	{
				202	CLScheduler::get().enqueue(_im2col_kernel, false);
				203	}
				204
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	205	// Run matrix multiply
Georgios Pinitas	45bcc3a	2017-11-29 11:06:49 +0000	[diff] [blame]	206	if(_is_quantized)
				207	{
				208	_mm_gemmlowp.run();
				209	}
				210	else
				211	{
				212	CLScheduler::get().enqueue(_mm_kernel, !_accumulate_biases);
				213	}
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	214
				215	// Accumulate biases if provided
Georgios Pinitas	45bcc3a	2017-11-29 11:06:49 +0000	[diff] [blame]	216	if(_is_quantized)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	217	{
Georgios Pinitas	45bcc3a	2017-11-29 11:06:49 +0000	[diff] [blame]	218	_gemmlowp_output_stage.run();
				219	}
				220	else
				221	{
				222	if(_accumulate_biases)
				223	{
				224	CLScheduler::get().enqueue(_accumulate_biases_kernel);
				225	}
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	226	}
Georgios Pinitas	baf174e	2017-09-08 19:47:30 +0100	[diff] [blame]	227
				228	_memory_group.release();
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	229	}