blob: ecbac6f703dc2228e6b7c8149eddea5e6fb88442 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Sang-Hoon Parkb66aa3b2020-01-10 14:44:13 +00002 * Copyright (c) 2017-2020 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
25
Gian Marco Iodice13edbff2017-06-26 17:20:16 +010026#include "arm_compute/core/Size2D.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010027#include "arm_compute/core/Validate.h"
Michalis Spyroub27e13a2019-09-27 11:04:27 +010028#include "arm_compute/core/utils/misc/Cast.h"
Georgios Pinitas358ca202017-12-07 16:47:52 +000029#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Georgios Pinitas45bcc3a2017-11-29 11:06:49 +000030#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010031#include "arm_compute/runtime/CL/CLScheduler.h"
Matthew Bentham92046462020-03-07 22:15:55 +000032#include "support/MemorySupport.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010033
34#include <algorithm>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010035
Michalis Spyroub27e13a2019-09-27 11:04:27 +010036namespace arm_compute
37{
Georgios Pinitas358ca202017-12-07 16:47:52 +000038using namespace arm_compute::misc::shape_calculator;
Michalis Spyroub27e13a2019-09-27 11:04:27 +010039using namespace arm_compute::utils::cast;
Georgios Pinitas358ca202017-12-07 16:47:52 +000040
41namespace
42{
Georgios Pinitas8b721992019-10-28 16:24:28 +000043Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo &output,
Giorgio Arena1856ff72020-02-07 13:46:45 +000044 GEMMLowpOutputStageInfo &gemmlowp_output_stage, ActivationLayerInfo activation_info)
Georgios Pinitas358ca202017-12-07 16:47:52 +000045{
Georgios Pinitas8b721992019-10-28 16:24:28 +000046 gemmlowp_output_stage.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
47 gemmlowp_output_stage.gemmlowp_offset = 0;
48 gemmlowp_output_stage.gemmlowp_multiplier = 0;
49 gemmlowp_output_stage.gemmlowp_shift = 0;
50
Sang-Hoon Parkb66aa3b2020-01-10 14:44:13 +000051 const auto data_type = input.data_type();
52
Georgios Pinitas8b721992019-10-28 16:24:28 +000053 // Configure output stage for quantized case
Sang-Hoon Parkb66aa3b2020-01-10 14:44:13 +000054 if(is_data_type_quantized_asymmetric(data_type))
Georgios Pinitas8b721992019-10-28 16:24:28 +000055 {
Giorgio Arena1856ff72020-02-07 13:46:45 +000056 const QuantizationInfo oq_info = output.quantization_info();
57 const UniformQuantizationInfo iq_unif = input.quantization_info().uniform();
58 const UniformQuantizationInfo wq_unif = weights.quantization_info().uniform();
59 const UniformQuantizationInfo oq_unif = oq_info.uniform();
Georgios Pinitas8b721992019-10-28 16:24:28 +000060
Giorgio Arena1856ff72020-02-07 13:46:45 +000061 const auto output_quant_info = (output.total_size() == 0) ? iq_unif : oq_unif;
Georgios Pinitas8b721992019-10-28 16:24:28 +000062
Giorgio Arena1856ff72020-02-07 13:46:45 +000063 const float multiplier = (iq_unif.scale * wq_unif.scale) / output_quant_info.scale;
Georgios Pinitas8b721992019-10-28 16:24:28 +000064 int output_multiplier = 0;
65 int output_shift = 0;
Michele Di Giorgio14cbfb22019-10-23 10:53:10 +010066 ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift));
Georgios Pinitas8b721992019-10-28 16:24:28 +000067
Sang-Hoon Parkb66aa3b2020-01-10 14:44:13 +000068 PixelValue type_min{};
69 PixelValue type_max{};
70 std::tie(type_min, type_max) = get_min_max(data_type);
71
Giorgio Arena1856ff72020-02-07 13:46:45 +000072 if(activation_info.enabled())
73 {
74 switch(activation_info.activation())
75 {
76 case ActivationLayerInfo::ActivationFunction::RELU:
77 type_min = PixelValue(oq_unif.offset);
78 break;
79 case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
80 type_min = PixelValue(oq_unif.offset);
81 type_max = PixelValue(activation_info.a(), data_type, oq_info);
82 break;
83 case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU:
84 type_min = PixelValue(activation_info.b(), data_type, oq_info);
85 type_max = PixelValue(activation_info.a(), data_type, oq_info);
86 break;
87 default:
88 ARM_COMPUTE_ERROR("Activation function not supported.");
89 break;
90 }
91 }
92
Georgios Pinitas8b721992019-10-28 16:24:28 +000093 // Set the GEMMLowp output stage info
94 gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
95 gemmlowp_output_stage.gemmlowp_multiplier = output_multiplier;
96 gemmlowp_output_stage.gemmlowp_shift = output_shift;
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +000097 gemmlowp_output_stage.gemmlowp_multipliers.push_back(output_multiplier);
98 gemmlowp_output_stage.gemmlowp_shifts.push_back(output_shift);
Sang-Hoon Parkb66aa3b2020-01-10 14:44:13 +000099 type_min.get(gemmlowp_output_stage.gemmlowp_min_bound);
100 type_max.get(gemmlowp_output_stage.gemmlowp_max_bound);
Georgios Pinitas8b721992019-10-28 16:24:28 +0000101 }
102
103 return Status{};
104}
105
Georgios Pinitas44bfc3f2019-10-28 14:16:31 +0000106Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo *bias, const ITensorInfo &output, const FullyConnectedLayerInfo &fc_info)
Georgios Pinitas8b721992019-10-28 16:24:28 +0000107{
108 GEMMLowpOutputStageInfo gemmlowp_output_stage;
Giorgio Arena1856ff72020-02-07 13:46:45 +0000109 ARM_COMPUTE_RETURN_ON_ERROR(construct_gemmlowp_output_stage(input, weights, output, gemmlowp_output_stage, fc_info.activation_info));
Georgios Pinitas8b721992019-10-28 16:24:28 +0000110
Georgios Pinitas44bfc3f2019-10-28 14:16:31 +0000111 const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
112 false, // is_b_reshaped
113 true, // reshape_b_only_on_first_run
114 0, // depth_output_gemm3d
115 false, // reinterpret_input_as_3d
116 fc_info.retain_internal_weights, // retain_internal_weights
117 gemmlowp_output_stage, // gemmlowp_output_stage
118 fc_info.fp_mixed_precision, // fp_mixed_precision
119 true, // broadcast_bias
120 ActivationLayerInfo()); // activation_info
Georgios Pinitas8b721992019-10-28 16:24:28 +0000121
Georgios Pinitas358ca202017-12-07 16:47:52 +0000122 if(is_data_type_quantized_asymmetric(input.data_type()))
123 {
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100124 const UniformQuantizationInfo iq_info = input.quantization_info().uniform();
125 const UniformQuantizationInfo wq_info = weights.quantization_info().uniform();
126
Georgios Pinitas358ca202017-12-07 16:47:52 +0000127 // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
128 // Extract and negate input and weights offset
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100129 const QuantizationInfo input_quantization_info(iq_info.scale, -iq_info.offset);
130 const QuantizationInfo weights_quantization_info(wq_info.scale, -wq_info.offset);
Georgios Pinitas358ca202017-12-07 16:47:52 +0000131
132 // Validate gemmlowp function
133 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyCore::validate(&input.clone()->set_quantization_info(input_quantization_info),
134 &weights.clone()->set_quantization_info(weights_quantization_info),
Georgios Pinitas8b721992019-10-28 16:24:28 +0000135 bias,
136 &output,
137 gemm_info));
Georgios Pinitas358ca202017-12-07 16:47:52 +0000138 }
139 else
140 {
Georgios Pinitas8b721992019-10-28 16:24:28 +0000141 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMM::validate(&input, &weights, bias, &output, 1.f, 1.f, gemm_info));
Georgios Pinitas358ca202017-12-07 16:47:52 +0000142 }
143
144 return Status{};
145}
146} // namespace
Gian Marco Iodiceedfa9f42017-08-15 11:45:22 +0100147
148void CLFullyConnectedLayerReshapeWeights::configure(const ICLTensor *input, ICLTensor *output)
Moritz Pflanzer768e9f12017-08-11 15:33:30 +0100149{
Manuel Bottini2b84be52020-04-08 10:15:51 +0100150 configure(CLKernelLibrary::get().get_compile_context(), input, output);
151}
152
153void CLFullyConnectedLayerReshapeWeights::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
154{
Gian Marco Iodiceedfa9f42017-08-15 11:45:22 +0100155 auto k = arm_compute::support::cpp14::make_unique<CLTransposeKernel>();
Manuel Bottini2b84be52020-04-08 10:15:51 +0100156 k->configure(compile_context, input, output);
Gian Marco Iodiceedfa9f42017-08-15 11:45:22 +0100157 _kernel = std::move(k);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100158}
159
Georgios Pinitas358ca202017-12-07 16:47:52 +0000160Status CLFullyConnectedLayerReshapeWeights::validate(const ITensorInfo *input, const ITensorInfo *output)
161{
162 return CLTransposeKernel::validate(input, output);
163}
164
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100165CLFullyConnectedLayer::CLFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
Michalis Spyroub27e13a2019-09-27 11:04:27 +0100166 : _memory_group(memory_manager), _weights_manager(weights_manager), _convert_weights(), _convert_weights_managed(), _reshape_weights_managed_function(), _flatten_layer(), _reshape_weights_function(),
Georgios Pinitas8b721992019-10-28 16:24:28 +0000167 _mm_gemm(memory_manager, weights_manager), _mm_gemmlowp(memory_manager), _flatten_output(), _converted_weights_output(), _reshape_weights_output(), _are_weights_converted(true),
168 _are_weights_reshaped(true), _is_fc_after_conv(true), _is_quantized(false), _is_prepared(false), _original_weights(nullptr)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100169{
170}
Manuel Bottini2b84be52020-04-08 10:15:51 +0100171void CLFullyConnectedLayer::configure_mm(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
172 const FullyConnectedLayerInfo &fc_info)
Georgios Pinitas45bcc3a2017-11-29 11:06:49 +0000173{
Georgios Pinitas8b721992019-10-28 16:24:28 +0000174 GEMMLowpOutputStageInfo gemmlowp_output_stage;
Giorgio Arena1856ff72020-02-07 13:46:45 +0000175 construct_gemmlowp_output_stage(*input->info(), *weights->info(), *output->info(), gemmlowp_output_stage, fc_info.activation_info);
Georgios Pinitas8b721992019-10-28 16:24:28 +0000176
Georgios Pinitas44bfc3f2019-10-28 14:16:31 +0000177 const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
178 false, // is_b_reshaped
179 true, // reshape_b_only_on_first_run
180 0, // depth_output_gemm3d
181 false, // reinterpret_input_as_3d
182 fc_info.retain_internal_weights, // retain_internal_weights
183 gemmlowp_output_stage, // gemmlowp_output_stage
184 fc_info.fp_mixed_precision, // fp_mixed_precision
185 true, // broadcast_bias
Giorgio Arena1856ff72020-02-07 13:46:45 +0000186 fc_info.activation_info); // activation_info
Georgios Pinitas8b721992019-10-28 16:24:28 +0000187
Georgios Pinitas45bcc3a2017-11-29 11:06:49 +0000188 if(_is_quantized)
189 {
Chunosov5124be52017-11-22 20:42:13 +0700190 // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
Georgios Pinitas45bcc3a2017-11-29 11:06:49 +0000191 // Extract and negate input and weights offset
Chunosov5124be52017-11-22 20:42:13 +0700192 const QuantizationInfo input_quantization_info = input->info()->quantization_info();
193 const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
194
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100195 input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
196 weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
Chunosov5124be52017-11-22 20:42:13 +0700197
Georgios Pinitas45bcc3a2017-11-29 11:06:49 +0000198 // Configure gemmlowp function
Manuel Bottini2b84be52020-04-08 10:15:51 +0100199 _mm_gemmlowp.configure(compile_context, input, weights, bias, output, gemm_info);
Chunosov5124be52017-11-22 20:42:13 +0700200
201 // Revert back QuantizatioInfo as input and weights could be used in other fully connected layers
202 input->info()->set_quantization_info(input_quantization_info);
203 weights->info()->set_quantization_info(weights_quantization_info);
Georgios Pinitas45bcc3a2017-11-29 11:06:49 +0000204 }
205 else
206 {
207 // Configure matrix multiply kernel
Manuel Bottini2b84be52020-04-08 10:15:51 +0100208 _mm_gemm.configure(compile_context, input, weights, bias, output, 1.f, 1.f, gemm_info);
Georgios Pinitas45bcc3a2017-11-29 11:06:49 +0000209 }
210}
211
Manuel Bottini2b84be52020-04-08 10:15:51 +0100212void CLFullyConnectedLayer::configure_conv_fc(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
213 const FullyConnectedLayerInfo &fc_info)
Gian Marco Iodiceedfa9f42017-08-15 11:45:22 +0100214{
215 ARM_COMPUTE_ERROR_ON((weights->info()->dimension(1) != (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
216
Gian Marco Iodiceedfa9f42017-08-15 11:45:22 +0100217 // If the fully connected layer is called after a convolution layer, the input tensor must be linearized
218
Gian Marco Iodice215b4ea2018-06-28 16:29:29 +0100219 // Initialize output tensor for flatten
220 TensorShape shape_flatten = compute_flatten_shape(input->info());
221 _flatten_output.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_flatten).set_data_layout(DataLayout::NCHW));
Gian Marco Iodiceedfa9f42017-08-15 11:45:22 +0100222
Gian Marco Iodice215b4ea2018-06-28 16:29:29 +0100223 // Configure flatten kernel
224 _memory_group.manage(&_flatten_output);
Manuel Bottini2b84be52020-04-08 10:15:51 +0100225 _flatten_layer.configure(compile_context, input, &_flatten_output);
Gian Marco Iodiceedfa9f42017-08-15 11:45:22 +0100226
227 // Configure matrix multiply kernel
Manuel Bottini2b84be52020-04-08 10:15:51 +0100228 configure_mm(compile_context, &_flatten_output, weights, bias, output, fc_info);
Gian Marco Iodiceedfa9f42017-08-15 11:45:22 +0100229
Gian Marco Iodice215b4ea2018-06-28 16:29:29 +0100230 // Allocate the output tensor for flatten once all the configure methods have been called
231 _flatten_output.allocator()->allocate();
Gian Marco Iodiceedfa9f42017-08-15 11:45:22 +0100232}
233
Manuel Bottini2b84be52020-04-08 10:15:51 +0100234void CLFullyConnectedLayer::configure_fc_fc(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
235 const FullyConnectedLayerInfo &fc_info)
Gian Marco Iodiceedfa9f42017-08-15 11:45:22 +0100236{
237 ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1));
238
239 // Configure matrix multiply kernel
Manuel Bottini2b84be52020-04-08 10:15:51 +0100240 configure_mm(compile_context, input, weights, bias, output, fc_info);
Gian Marco Iodiceedfa9f42017-08-15 11:45:22 +0100241}
242
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100243void CLFullyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
244 FullyConnectedLayerInfo fc_info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100245{
Manuel Bottini2b84be52020-04-08 10:15:51 +0100246 configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, fc_info);
247}
248
249void CLFullyConnectedLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
250 FullyConnectedLayerInfo fc_info)
251{
Georgios Pinitas358ca202017-12-07 16:47:52 +0000252 ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
253
254 // Perform validate step
255 ARM_COMPUTE_ERROR_THROW_ON(CLFullyConnectedLayer::validate(input->info(),
256 weights->info(),
257 biases != nullptr ? biases->info() : nullptr,
258 output->info(),
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100259 fc_info));
Gian Marco Iodiceedfa9f42017-08-15 11:45:22 +0100260
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100261 _are_weights_converted = true;
262 _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
263 _is_fc_after_conv = true;
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100264 _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
Michele Di Giorgioba1ffe92018-08-22 14:28:30 +0100265 _is_prepared = fc_info.retain_internal_weights;
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100266 _original_weights = weights;
Gian Marco Iodiceedfa9f42017-08-15 11:45:22 +0100267
Michalis Spyroub27e13a2019-09-27 11:04:27 +0100268 if(_weights_manager)
269 {
270 _weights_manager->manage(weights);
271 }
272
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100273 const ICLTensor *weights_to_use = weights;
274
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100275 // With the Fully Connected layer we can have 4 different cases:
276 // 1) Convolution layer -> Fully Connected layer without batches
277 // 2) Fully Connected layer -> Fully Connected layer without batches
278 // 3) Convolution layer -> Fully Connected layer with batches
279 // 4) Fully Connected layer -> Fully Connected layer with batches
280
Gian Marco Iodiceedfa9f42017-08-15 11:45:22 +0100281 // Check if we have a fully connected layer with batches
282 const bool is_batched_fc_layer = output->info()->dimension(1) > 1;
Gian Marco Iodiceedfa9f42017-08-15 11:45:22 +0100283 if(is_batched_fc_layer)
Moritz Pflanzer768e9f12017-08-11 15:33:30 +0100284 {
Gian Marco Iodiceedfa9f42017-08-15 11:45:22 +0100285 _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && (std::equal(input->info()->tensor_shape().cbegin() + 3,
286 input->info()->tensor_shape().cend(),
287 output->info()->tensor_shape().cbegin() + 1));
Moritz Pflanzer768e9f12017-08-11 15:33:30 +0100288 }
289 else
290 {
Gian Marco Iodiceedfa9f42017-08-15 11:45:22 +0100291 _is_fc_after_conv = input->info()->num_dimensions() > 1;
Moritz Pflanzer768e9f12017-08-11 15:33:30 +0100292 }
293
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100294 // Reshape weights if needed
295 if(!_are_weights_reshaped)
296 {
Michalis Spyroub27e13a2019-09-27 11:04:27 +0100297 if(_weights_manager && _weights_manager->are_weights_managed(weights))
298 {
Manuel Bottini2b84be52020-04-08 10:15:51 +0100299 _reshape_weights_managed_function.configure(compile_context, weights);
Michalis Spyroub27e13a2019-09-27 11:04:27 +0100300 weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(weights, &_reshape_weights_managed_function));
301 }
302 else
303 {
304 // Reshape the weights
Manuel Bottini2b84be52020-04-08 10:15:51 +0100305 _reshape_weights_function.configure(compile_context, weights, &_reshape_weights_output);
Michalis Spyroub27e13a2019-09-27 11:04:27 +0100306 weights_to_use = &_reshape_weights_output;
307 }
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100308 }
309
310 // Convert weights if needed
311 if(_is_fc_after_conv && (input->info()->data_layout() != fc_info.weights_trained_layout))
312 {
Michalis Spyroub27e13a2019-09-27 11:04:27 +0100313 if(_weights_manager && _weights_manager->are_weights_managed(weights_to_use))
314 {
Manuel Bottini2b84be52020-04-08 10:15:51 +0100315 _convert_weights_managed.configure(compile_context, weights_to_use,
Michalis Spyroub27e13a2019-09-27 11:04:27 +0100316 input->info()->tensor_shape(),
317 fc_info.weights_trained_layout);
318 weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(weights, &_convert_weights_managed));
319 }
320 else
321 {
322 // Convert weights
Manuel Bottini2b84be52020-04-08 10:15:51 +0100323 _convert_weights.configure(compile_context, weights_to_use,
Michalis Spyroub27e13a2019-09-27 11:04:27 +0100324 &_converted_weights_output,
325 input->info()->tensor_shape(),
326 fc_info.weights_trained_layout);
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100327
Michalis Spyroub27e13a2019-09-27 11:04:27 +0100328 weights_to_use = &_converted_weights_output;
329 }
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100330 _are_weights_converted = false;
331 }
332
Gian Marco Iodiceedfa9f42017-08-15 11:45:22 +0100333 if(_is_fc_after_conv)
Moritz Pflanzer768e9f12017-08-11 15:33:30 +0100334 {
Gian Marco Iodiceedfa9f42017-08-15 11:45:22 +0100335 // Fully Connected layer after a Convolution Layer without batches
Manuel Bottini2b84be52020-04-08 10:15:51 +0100336 configure_conv_fc(compile_context, input, weights_to_use, biases, output, fc_info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100337 }
Gian Marco Iodiceedfa9f42017-08-15 11:45:22 +0100338 else
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100339 {
Gian Marco Iodiceedfa9f42017-08-15 11:45:22 +0100340 // Fully Connected layer after a Fully Connected Layer without batches
Manuel Bottini2b84be52020-04-08 10:15:51 +0100341 configure_fc_fc(compile_context, input, weights_to_use, biases, output, fc_info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100342 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100343}
344
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100345Status CLFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
346 FullyConnectedLayerInfo fc_info)
Georgios Pinitas358ca202017-12-07 16:47:52 +0000347{
348 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
Sang-Hoon Parkb66aa3b2020-01-10 14:44:13 +0000349 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
Georgios Pinitas358ca202017-12-07 16:47:52 +0000350 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
351 ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2);
Giorgio Arena1856ff72020-02-07 13:46:45 +0000352 ARM_COMPUTE_RETURN_ERROR_ON(fc_info.activation_info.enabled() && is_data_type_quantized(input->data_type()) && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::RELU
353 && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU);
Georgios Pinitas358ca202017-12-07 16:47:52 +0000354
Georgios Pinitas8b721992019-10-28 16:24:28 +0000355 bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
356 bool is_fc_after_conv = true;
Georgios Pinitas358ca202017-12-07 16:47:52 +0000357
Gian Marco Iodice215b4ea2018-06-28 16:29:29 +0100358 const ITensorInfo &flatten_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_flatten_shape(input)).set_data_layout(DataLayout::NCHW));
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100359 const ITensorInfo &reshaped_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*weights)));
Georgios Pinitas195b0ba2018-08-02 17:18:51 +0100360 const ITensorInfo &converted_weights = weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding()) : TensorInfo(*reshaped_weights.clone());
Georgios Pinitas358ca202017-12-07 16:47:52 +0000361
362 // With the Fully Connected layer we can have 4 different cases:
363 // 1) Convolution layer -> Fully Connected layer without batches
364 // 2) Fully Connected layer -> Fully Connected layer without batches
365 // 3) Convolution layer -> Fully Connected layer with batches
366 // 4) Fully Connected layer -> Fully Connected layer with batches
367
368 const ITensorInfo *input_to_use = input;
369 const ITensorInfo *weights_to_use = weights;
Georgios Pinitas358ca202017-12-07 16:47:52 +0000370
Georgios Pinitas358ca202017-12-07 16:47:52 +0000371 // Check if we have a fully connected layer with batches
372 const bool is_batched_fc_layer = output->dimension(1) > 1;
Georgios Pinitas358ca202017-12-07 16:47:52 +0000373 if(is_batched_fc_layer)
374 {
375 is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && (std::equal(input->tensor_shape().cbegin() + 3,
376 input->tensor_shape().cend(),
377 output->tensor_shape().cbegin() + 1));
378 }
379 else
380 {
381 is_fc_after_conv = input->num_dimensions() > 1;
382 }
383
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100384 if(!weights_reshaped)
385 {
386 // Validate reshape weights kernel
387 ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayerReshapeWeights::validate(weights, &reshaped_weights));
388 weights_to_use = &reshaped_weights;
389 }
390
391 if(is_fc_after_conv && (input->data_layout() != fc_info.weights_trained_layout))
392 {
393 // Validate convert weights kernel
394 ARM_COMPUTE_RETURN_ON_ERROR(CLConvertFullyConnectedWeights::validate(weights_to_use,
395 &converted_weights,
396 input->tensor_shape(),
397 fc_info.weights_trained_layout));
398 weights_to_use = &converted_weights;
399 }
400
Georgios Pinitas358ca202017-12-07 16:47:52 +0000401 if(is_fc_after_conv)
402 {
403 // Fully Connected layer after a Convolution Layer without batches
404 ARM_COMPUTE_RETURN_ERROR_ON((weights_to_use->dimension(1) != (input->dimension(0) * input->dimension(1) * input->dimension(2))));
405
Gian Marco Iodice215b4ea2018-06-28 16:29:29 +0100406 // Validate flatten kernel
407 ARM_COMPUTE_RETURN_ON_ERROR(CLFlattenLayer::validate(input, &flatten_input));
408 input_to_use = &flatten_input;
Georgios Pinitas358ca202017-12-07 16:47:52 +0000409 }
410 else
411 {
412 // Fully Connected layer after a Fully Connected Layer without batches
413 ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1));
414 }
Georgios Pinitas8b721992019-10-28 16:24:28 +0000415
Georgios Pinitas358ca202017-12-07 16:47:52 +0000416 // Validate matrix multiply kernel
Georgios Pinitas44bfc3f2019-10-28 14:16:31 +0000417 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(*input_to_use, *weights_to_use, biases, *output, fc_info));
Georgios Pinitas358ca202017-12-07 16:47:52 +0000418
419 return Status{};
420}
421
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100422void CLFullyConnectedLayer::run()
423{
Georgios Pinitase0437672018-05-02 14:07:55 +0100424 prepare();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100425
Georgios Pinitasda953f22019-04-02 17:27:03 +0100426 MemoryGroupResourceScope scope_mg(_memory_group);
Georgios Pinitasbaf174e2017-09-08 19:47:30 +0100427
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100428 // Linearize input if it comes from a convolutional layer
Gian Marco Iodiceedfa9f42017-08-15 11:45:22 +0100429 if(_is_fc_after_conv)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100430 {
Gian Marco Iodice215b4ea2018-06-28 16:29:29 +0100431 _flatten_layer.run();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100432 }
433
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100434 // Run matrix multiply
Georgios Pinitas45bcc3a2017-11-29 11:06:49 +0000435 if(_is_quantized)
436 {
437 _mm_gemmlowp.run();
438 }
439 else
440 {
Gian Marco Iodicec9c62c22018-04-06 10:00:10 +0100441 _mm_gemm.run();
Georgios Pinitas45bcc3a2017-11-29 11:06:49 +0000442 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100443}
Georgios Pinitase0437672018-05-02 14:07:55 +0100444
445void CLFullyConnectedLayer::prepare()
446{
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100447 if(!_is_prepared)
Georgios Pinitase0437672018-05-02 14:07:55 +0100448 {
Michalis Spyroub27e13a2019-09-27 11:04:27 +0100449 if(!_weights_manager)
450 {
451 ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
452 }
Georgios Pinitase0437672018-05-02 14:07:55 +0100453
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100454 auto release_unused = [](CLTensor * w)
455 {
456 if(!w->is_used())
457 {
458 CLScheduler::get().queue().finish();
459 w->allocator()->free();
460 }
461 };
462
463 // Pointer to current weights
464 const ICLTensor *cur_weights = _original_weights;
465
466 // Reshape of the weights if needed (happens only once)
467 if(!_are_weights_reshaped)
468 {
Michalis Spyroub27e13a2019-09-27 11:04:27 +0100469 if(_weights_manager && _weights_manager->are_weights_managed(_original_weights))
470 {
471 cur_weights = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->run(cur_weights, &_reshape_weights_managed_function));
472 }
473 else
474 {
475 // Run reshape weights kernel and mark weights as unused
476 _reshape_weights_output.allocator()->allocate();
477 _reshape_weights_function.run();
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100478
Michalis Spyroub27e13a2019-09-27 11:04:27 +0100479 cur_weights->mark_as_unused();
480 cur_weights = &_reshape_weights_output;
481 }
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100482 _are_weights_reshaped = true;
483 }
484
485 // Convert weights if needed (happens only once)
486 if(!_are_weights_converted)
487 {
Michalis Spyroub27e13a2019-09-27 11:04:27 +0100488 if(_weights_manager && _weights_manager->are_weights_managed(cur_weights))
489 {
490 _weights_manager->run(cur_weights, &_convert_weights_managed);
491 }
492 else
493 {
494 _converted_weights_output.allocator()->allocate();
495 _convert_weights.run();
496 cur_weights->mark_as_unused();
497 }
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100498
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100499 _are_weights_converted = true;
500 }
501
502 // Release reshaped weights if unused
503 release_unused(&_reshape_weights_output);
Georgios Pinitase0437672018-05-02 14:07:55 +0100504
505 // Prepare GEMM prepare and release unused weights
506 if(!_is_quantized)
507 {
508 _mm_gemm.prepare();
Georgios Pinitase0437672018-05-02 14:07:55 +0100509 }
510
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100511 // Release converted weights if unused
512 release_unused(&_reshape_weights_output);
513 release_unused(&_converted_weights_output);
514
515 _is_prepared = true;
Georgios Pinitase0437672018-05-02 14:07:55 +0100516 }
517}
Michalis Spyroub27e13a2019-09-27 11:04:27 +0100518} // namespace arm_compute