blob: ec782fc163f4028c9b9ed33f6b584de89f9949fa [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2017-2020 Arm Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
25
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +000026#include "arm_compute/core/Helpers.h"
Gian Marco Iodice13edbff2017-06-26 17:20:16 +010027#include "arm_compute/core/Size2D.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010028#include "arm_compute/core/Validate.h"
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +000029#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Giorgio Arenaa855af12018-07-16 17:20:38 +010030#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010031#include "arm_compute/runtime/NEON/NEScheduler.h"
Michalis Spyrouebcebf12020-10-21 00:04:14 +010032#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
33#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
Michalis Spyrouebcebf12020-10-21 00:04:14 +010034#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
35#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
36#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
37#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
38#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
39#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
40#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
41#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
42#include "src/core/NEON/kernels/NETransposeKernel.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010043
44#include <algorithm>
45#include <cmath>
46
Michele Di Giorgiof29d1b72019-10-29 10:58:13 +000047namespace arm_compute
48{
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +000049using namespace arm_compute::misc::shape_calculator;
50
Giorgio Arenaa855af12018-07-16 17:20:38 +010051namespace
Anthony Barbier6ff3b192017-09-04 18:44:23 +010052{
SiCongLi2e5fd632020-03-02 15:39:15 +000053// Get min, max bound of a quantized assymetric output tensor, with the effect of fused activation
54std::pair<PixelValue, PixelValue> get_quantized_asymmetric_output_min_max(const QuantizationInfo &q_info, const ActivationLayerInfo &act_info, DataType data_type)
55{
56 PixelValue type_min{};
57 PixelValue type_max{};
58 std::tie(type_min, type_max) = get_min_max(data_type);
59 const UniformQuantizationInfo q_unif = q_info.uniform();
60
61 if(act_info.enabled())
62 {
63 switch(act_info.activation())
64 {
65 case ActivationLayerInfo::ActivationFunction::RELU:
66 type_min = PixelValue(q_unif.offset);
67 break;
68 case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
69 type_min = PixelValue(q_unif.offset);
70 type_max = PixelValue(act_info.a(), data_type, q_info);
71 break;
72 case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU:
73 type_min = PixelValue(act_info.b(), data_type, q_info);
74 type_max = PixelValue(act_info.a(), data_type, q_info);
75 break;
76 default:
77 ARM_COMPUTE_ERROR("Activation function not supported.");
78 break;
79 }
80 }
81
82 return std::make_pair(type_min, type_max);
83}
84
85Status get_gemmlowp_output_stage_info(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const ActivationLayerInfo &act,
86 GEMMLowpOutputStageInfo &gemmlowp_output_stage_info)
87{
88 const auto data_type = input->data_type();
89 const QuantizationInfo oq_info = output->quantization_info();
90 const UniformQuantizationInfo iq_unif = input->quantization_info().uniform();
91 const UniformQuantizationInfo wq_unif = weights->quantization_info().uniform();
92 const UniformQuantizationInfo oq_unif = oq_info.uniform();
93
94 float multiplier = (iq_unif.scale * wq_unif.scale) / oq_unif.scale;
95 int32_t output_multiplier;
96 int32_t output_shift;
97
98 ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift));
99
100 PixelValue type_min{};
101 PixelValue type_max{};
102 std::tie(type_min, type_max) = get_quantized_asymmetric_output_min_max(oq_info, act, data_type);
103
104 gemmlowp_output_stage_info.gemmlowp_multiplier = output_multiplier;
105 gemmlowp_output_stage_info.gemmlowp_shift = output_shift;
106 gemmlowp_output_stage_info.gemmlowp_offset = oq_unif.offset;
107 gemmlowp_output_stage_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
108 gemmlowp_output_stage_info.gemmlowp_min_bound = type_min.get<int32_t>();
109 gemmlowp_output_stage_info.gemmlowp_max_bound = type_max.get<int32_t>();
110
111 return Status{};
112}
113
114Status validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ActivationLayerInfo &act)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100115{
SiCong Liadb32912020-02-17 16:39:27 +0000116 if(is_data_type_quantized_asymmetric(input->data_type()))
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100117 {
Giorgio Arenaa855af12018-07-16 17:20:38 +0100118 // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
119 // Extract and negate input and weights offset
SiCong Liadb32912020-02-17 16:39:27 +0000120 const QuantizationInfo input_quantization_info(input->quantization_info().uniform().scale, -input->quantization_info().uniform().offset);
121 const QuantizationInfo weights_quantization_info(weights->quantization_info().uniform().scale, -weights->quantization_info().uniform().offset);
122
SiCong Liadb32912020-02-17 16:39:27 +0000123 GEMMLowpOutputStageInfo gemmlowp_output_stage_info;
SiCongLi2e5fd632020-03-02 15:39:15 +0000124 ARM_COMPUTE_RETURN_ON_ERROR(get_gemmlowp_output_stage_info(input, weights, output, act, gemmlowp_output_stage_info));
SiCong Liadb32912020-02-17 16:39:27 +0000125
126 GEMMInfo gemm_info;
127 gemm_info.set_gemmlowp_output_stage(gemmlowp_output_stage_info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100128
Giorgio Arenaa855af12018-07-16 17:20:38 +0100129 // Validate gemmlowp function
SiCong Liadb32912020-02-17 16:39:27 +0000130 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyCore::validate(&input->clone()->set_quantization_info(input_quantization_info),
131 &weights->clone()->set_quantization_info(weights_quantization_info),
132 biases,
133 output,
134 gemm_info));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100135 }
136 else
137 {
SiCong Liadb32912020-02-17 16:39:27 +0000138 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMM::validate(input, weights, biases, output, 1.f, 1.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run */)));
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000139 }
140
141 return Status{};
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100142}
Giorgio Arenaa855af12018-07-16 17:20:38 +0100143} // namespace
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100144
Giorgio Arenaa855af12018-07-16 17:20:38 +0100145void NEFullyConnectedLayerReshapeWeights::configure(const ITensor *input, ITensor *output)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100146{
Georgios Pinitas40f51a62020-11-21 03:04:18 +0000147 auto k = std::make_unique<NETransposeKernel>();
Giorgio Arenaa855af12018-07-16 17:20:38 +0100148 k->configure(input, output);
149 _kernel = std::move(k);
150}
Georgios Pinitasbaf174e2017-09-08 19:47:30 +0100151
Giorgio Arenaa855af12018-07-16 17:20:38 +0100152Status NEFullyConnectedLayerReshapeWeights::validate(const ITensorInfo *input, const ITensorInfo *output)
153{
154 return NETransposeKernel::validate(input, output);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100155}
156
Michalis Spyrouebcebf12020-10-21 00:04:14 +0100157NEFullyConnectedLayer::~NEFullyConnectedLayer() = default;
158
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100159NEFullyConnectedLayer::NEFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
Georgios Pinitase2696b12020-12-03 20:37:43 +0000160 : _memory_group(std::move(memory_manager)), _weights_manager(weights_manager), _flatten(), _convert_weights(), _convert_weights_managed(), _reshape_weights_function(),
SiCongLi2e5fd632020-03-02 15:39:15 +0000161 _reshape_weights_managed_function(), _mm_gemm(nullptr, weights_manager), _mm_gemmlowp(nullptr, weights_manager), _flatten_output(), _converted_weights_output(), _reshape_weights_output(),
162 _original_weights(nullptr), _are_weights_converted(true), _are_weights_reshaped(false), _is_fc_after_conv(false), _is_quantized_asymmetric(false), _is_prepared(false)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100163{
164}
165
SiCongLi2e5fd632020-03-02 15:39:15 +0000166void NEFullyConnectedLayer::configure_mm(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act)
Giorgio Arenaa855af12018-07-16 17:20:38 +0100167{
SiCongLi2e5fd632020-03-02 15:39:15 +0000168 if(_is_quantized_asymmetric)
Giorgio Arenaa855af12018-07-16 17:20:38 +0100169 {
170 // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
171 // Extract and negate input and weights offset
172 const QuantizationInfo input_quantization_info = input->info()->quantization_info();
173 const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
174
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100175 input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
176 weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
Giorgio Arenaa855af12018-07-16 17:20:38 +0100177
SiCong Liadb32912020-02-17 16:39:27 +0000178 // Configure gemmlowp function and output stage for asymmetric quantized types
SiCong Liadb32912020-02-17 16:39:27 +0000179 GEMMLowpOutputStageInfo gemmlowp_output_stage_info;
SiCongLi2e5fd632020-03-02 15:39:15 +0000180 const Status status = get_gemmlowp_output_stage_info(input->info(), weights->info(), output->info(), act, gemmlowp_output_stage_info);
181 ARM_COMPUTE_ERROR_ON(status.error_code() != ErrorCode::OK);
182
SiCong Liadb32912020-02-17 16:39:27 +0000183 GEMMInfo gemm_info;
184 gemm_info.set_gemmlowp_output_stage(gemmlowp_output_stage_info);
SiCongLi2e5fd632020-03-02 15:39:15 +0000185 gemm_info.set_activation_info(act);
SiCong Liadb32912020-02-17 16:39:27 +0000186 _mm_gemmlowp.configure(input, weights, biases, output, gemm_info);
Giorgio Arenaa855af12018-07-16 17:20:38 +0100187
188 // Revert back QuantizatioInfo as input and weights could be used in other fully connected layers
189 input->info()->set_quantization_info(input_quantization_info);
190 weights->info()->set_quantization_info(weights_quantization_info);
191 }
192 else
193 {
194 // Configure matrix multiply kernel
SiCongLi2e5fd632020-03-02 15:39:15 +0000195 GEMMInfo gemm_info(false, false, true /* Reshape weights only for the first run */);
196 gemm_info.set_activation_info(act);
197 _mm_gemm.configure(input, weights, biases, output, 1.f, 1.0f, gemm_info);
Giorgio Arenaa855af12018-07-16 17:20:38 +0100198 }
199}
200
SiCongLi2e5fd632020-03-02 15:39:15 +0000201void NEFullyConnectedLayer::configure_conv_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act)
Giorgio Arenaa855af12018-07-16 17:20:38 +0100202{
203 ARM_COMPUTE_ERROR_ON((weights->info()->dimension(1) != (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
204
205 // If the fully connected layer is called after a convolution layer, the input tensor must be linearized
206
Giorgio Arena368e6352018-08-20 15:06:07 +0100207 // Initialize output tensor for flatten
208 TensorShape shape_flatten = compute_flatten_shape(input->info());
209 _flatten_output.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_flatten));
Giorgio Arenaa855af12018-07-16 17:20:38 +0100210
Giorgio Arena368e6352018-08-20 15:06:07 +0100211 // Configure flatten kernel
212 _memory_group.manage(&_flatten_output);
Michalis Spyrouebcebf12020-10-21 00:04:14 +0100213
Georgios Pinitase2696b12020-12-03 20:37:43 +0000214 _flatten.configure(input, &_flatten_output);
Giorgio Arenaa855af12018-07-16 17:20:38 +0100215
216 // Configure matrix multiply kernel
SiCongLi2e5fd632020-03-02 15:39:15 +0000217 configure_mm(&_flatten_output, weights, biases, output, act);
Giorgio Arenaa855af12018-07-16 17:20:38 +0100218
Giorgio Arena368e6352018-08-20 15:06:07 +0100219 // Allocate the output tensor for flatten once all the configure methods have been called
220 _flatten_output.allocator()->allocate();
Giorgio Arenaa855af12018-07-16 17:20:38 +0100221}
222
SiCongLi2e5fd632020-03-02 15:39:15 +0000223void NEFullyConnectedLayer::configure_fc_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act)
Giorgio Arenaa855af12018-07-16 17:20:38 +0100224{
225 ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1));
226
227 // Configure matrix multiply kernel
SiCongLi2e5fd632020-03-02 15:39:15 +0000228 configure_mm(input, weights, biases, output, act);
Giorgio Arenaa855af12018-07-16 17:20:38 +0100229}
230
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100231void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output,
232 FullyConnectedLayerInfo fc_info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100233{
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000234 // Perform validate step
Michele Di Giorgio9c700372020-01-08 11:33:44 +0000235 ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000236 ARM_COMPUTE_ERROR_THROW_ON(NEFullyConnectedLayer::validate(input->info(),
237 weights->info(),
238 biases != nullptr ? biases->info() : nullptr,
239 output->info(),
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100240 fc_info));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100241
SiCongLi2e5fd632020-03-02 15:39:15 +0000242 _are_weights_converted = true;
243 _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
244 _is_fc_after_conv = true;
245 _is_quantized_asymmetric = is_data_type_quantized_asymmetric(input->info()->data_type());
246 _original_weights = weights;
Moritz Pflanzer484e7b32017-08-09 11:43:18 +0100247
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100248 if(_weights_manager)
249 {
250 _weights_manager->manage(weights);
251 }
252
Giorgio Arenaa855af12018-07-16 17:20:38 +0100253 // With the Fully Connected layer we can have 4 different cases:
254 // 1) Convolution layer -> Fully Connected layer without batches
255 // 2) Fully Connected layer -> Fully Connected layer without batches
256 // 3) Convolution layer -> Fully Connected layer with batches
257 // 4) Fully Connected layer -> Fully Connected layer with batches
258
259 const ITensor *weights_to_use = weights;
260
Giorgio Arenaa855af12018-07-16 17:20:38 +0100261 // Check if we have a fully connected layer with batches
262 const bool is_batched_fc_layer = output->info()->dimension(1) > 1;
Giorgio Arenaa855af12018-07-16 17:20:38 +0100263 if(is_batched_fc_layer)
Moritz Pflanzer484e7b32017-08-09 11:43:18 +0100264 {
Giorgio Arenaa855af12018-07-16 17:20:38 +0100265 _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && (std::equal(input->info()->tensor_shape().cbegin() + 3,
266 input->info()->tensor_shape().cend(),
267 output->info()->tensor_shape().cbegin() + 1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100268 }
Giorgio Arenaa855af12018-07-16 17:20:38 +0100269 else
270 {
271 _is_fc_after_conv = input->info()->num_dimensions() > 1;
272 }
273
Georgios Pinitasef776a82018-07-25 17:57:49 +0100274 // Reshape weights if needed
275 if(!_are_weights_reshaped)
276 {
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100277 if(_weights_manager && _weights_manager->are_weights_managed(weights))
278 {
279 _reshape_weights_managed_function.configure(weights);
280 weights_to_use = _weights_manager->acquire(weights, &_reshape_weights_managed_function);
281 }
282 else
283 {
284 // Reshape the weights
285 _reshape_weights_function.configure(weights, &_reshape_weights_output);
286 weights_to_use = &_reshape_weights_output;
287 }
Georgios Pinitasef776a82018-07-25 17:57:49 +0100288 }
289
290 // Convert weights if needed
291 if(_is_fc_after_conv && (input->info()->data_layout() != fc_info.weights_trained_layout))
292 {
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100293 if(_weights_manager && _weights_manager->are_weights_managed(weights_to_use))
294 {
295 _convert_weights_managed.configure(weights_to_use,
296 input->info()->tensor_shape(),
297 fc_info.weights_trained_layout);
298 weights_to_use = _weights_manager->acquire(weights, &_convert_weights_managed);
299 }
300 else
301 {
302 // Convert weights
303 _convert_weights.configure(weights_to_use,
304 &_converted_weights_output,
305 input->info()->tensor_shape(),
306 fc_info.weights_trained_layout);
Georgios Pinitasef776a82018-07-25 17:57:49 +0100307
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100308 weights_to_use = &_converted_weights_output;
309 }
Georgios Pinitasef776a82018-07-25 17:57:49 +0100310 _are_weights_converted = false;
311 }
312
Giorgio Arenaa855af12018-07-16 17:20:38 +0100313 if(_is_fc_after_conv)
314 {
315 // Fully Connected layer after a Convolution Layer without batches
SiCongLi2e5fd632020-03-02 15:39:15 +0000316 configure_conv_fc(input, weights_to_use, biases, output, fc_info.activation_info);
Giorgio Arenaa855af12018-07-16 17:20:38 +0100317 }
318 else
319 {
320 // Fully Connected layer after a Fully Connected Layer without batches
SiCongLi2e5fd632020-03-02 15:39:15 +0000321 configure_fc_fc(input, weights_to_use, biases, output, fc_info.activation_info);
Giorgio Arenaa855af12018-07-16 17:20:38 +0100322 }
323
324 _are_weights_reshaped = _are_weights_reshaped || fc_info.retain_internal_weights;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100325}
326
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100327Status NEFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
328 FullyConnectedLayerInfo fc_info)
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000329{
Giorgio Arenaa855af12018-07-16 17:20:38 +0100330 ARM_COMPUTE_UNUSED(fc_info.retain_internal_weights);
331 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
Michele Di Giorgio9c700372020-01-08 11:33:44 +0000332 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000333 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000334 ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2);
Georgios Pinitasc6aef872020-04-29 13:37:09 +0100335 ARM_COMPUTE_RETURN_ERROR_ON(biases != nullptr && biases->num_dimensions() > 1);
Georgios Pinitas39096702020-11-20 04:37:59 +0000336 ARM_COMPUTE_RETURN_ERROR_ON(fc_info.activation_info.enabled() && is_data_type_quantized(input->data_type()) && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::RELU
337 && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU);
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000338
Giorgio Arenaa855af12018-07-16 17:20:38 +0100339 bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
340 bool is_fc_after_conv = true;
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000341
Giorgio Arena368e6352018-08-20 15:06:07 +0100342 const ITensorInfo &flatten_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_flatten_shape(input)));
Georgios Pinitasef776a82018-07-25 17:57:49 +0100343 const ITensorInfo &reshaped_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*weights)));
Georgios Pinitas195b0ba2018-08-02 17:18:51 +0100344 const ITensorInfo &converted_weights = weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding()) : TensorInfo(*reshaped_weights.clone());
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000345
Giorgio Arenaa855af12018-07-16 17:20:38 +0100346 // With the Fully Connected layer we can have 4 different cases:
347 // 1) Convolution layer -> Fully Connected layer without batches
348 // 2) Fully Connected layer -> Fully Connected layer without batches
349 // 3) Convolution layer -> Fully Connected layer with batches
350 // 4) Fully Connected layer -> Fully Connected layer with batches
351
352 const ITensorInfo *input_to_use = input;
353 const ITensorInfo *weights_to_use = weights;
Giorgio Arenaa855af12018-07-16 17:20:38 +0100354
Giorgio Arenaa855af12018-07-16 17:20:38 +0100355 // Check if we have a fully connected layer with batches
356 const bool is_batched_fc_layer = output->dimension(1) > 1;
357
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000358 if(is_batched_fc_layer)
359 {
Giorgio Arenaa855af12018-07-16 17:20:38 +0100360 is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && (std::equal(input->tensor_shape().cbegin() + 3,
361 input->tensor_shape().cend(),
362 output->tensor_shape().cbegin() + 1));
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000363 }
364 else
365 {
Giorgio Arenaa855af12018-07-16 17:20:38 +0100366 is_fc_after_conv = input->num_dimensions() > 1;
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000367 }
368
Georgios Pinitasef776a82018-07-25 17:57:49 +0100369 if(!weights_reshaped)
370 {
371 // Validate reshape weights kernel
372 ARM_COMPUTE_RETURN_ON_ERROR(NEFullyConnectedLayerReshapeWeights::validate(weights, &reshaped_weights));
373 weights_to_use = &reshaped_weights;
374 }
375
376 if(is_fc_after_conv && (input->data_layout() != fc_info.weights_trained_layout))
377 {
378 // Validate convert weights kernel
379 ARM_COMPUTE_RETURN_ON_ERROR(NEConvertFullyConnectedWeights::validate(weights_to_use,
380 &converted_weights,
381 input->tensor_shape(),
382 fc_info.weights_trained_layout));
383 weights_to_use = &converted_weights;
384 }
385
Giorgio Arenaa855af12018-07-16 17:20:38 +0100386 if(is_fc_after_conv)
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000387 {
Giorgio Arenaa855af12018-07-16 17:20:38 +0100388 // Fully Connected layer after a Convolution Layer without batches
389 ARM_COMPUTE_RETURN_ERROR_ON((weights_to_use->dimension(1) != (input->dimension(0) * input->dimension(1) * input->dimension(2))));
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000390
Giorgio Arena368e6352018-08-20 15:06:07 +0100391 // Validate flatten kernel
Georgios Pinitase2696b12020-12-03 20:37:43 +0000392 ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayer::validate(input, &flatten_input));
Giorgio Arena368e6352018-08-20 15:06:07 +0100393 input_to_use = &flatten_input;
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000394 }
Giorgio Arenaa855af12018-07-16 17:20:38 +0100395 else
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000396 {
Giorgio Arenaa855af12018-07-16 17:20:38 +0100397 // Fully Connected layer after a Fully Connected Layer without batches
398 ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1));
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000399 }
Giorgio Arenaa855af12018-07-16 17:20:38 +0100400 // Validate matrix multiply kernel
SiCongLi2e5fd632020-03-02 15:39:15 +0000401 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(input_to_use, weights_to_use, biases, output, fc_info.activation_info));
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000402
403 return Status{};
404}
405
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100406void NEFullyConnectedLayer::run()
407{
Georgios Pinitas72219332018-06-05 14:56:06 +0100408 prepare();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100409
Georgios Pinitasda953f22019-04-02 17:27:03 +0100410 MemoryGroupResourceScope scope_mg(_memory_group);
Georgios Pinitasbaf174e2017-09-08 19:47:30 +0100411
Moritz Pflanzer484e7b32017-08-09 11:43:18 +0100412 // Linearize input if it comes from a convolutional layer
Giorgio Arenaa855af12018-07-16 17:20:38 +0100413 if(_is_fc_after_conv)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100414 {
Georgios Pinitase2696b12020-12-03 20:37:43 +0000415 _flatten.run();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100416 }
417
Giorgio Arenaa855af12018-07-16 17:20:38 +0100418 // Run matrix multiply
SiCongLi2e5fd632020-03-02 15:39:15 +0000419 if(_is_quantized_asymmetric)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100420 {
Giorgio Arenaa855af12018-07-16 17:20:38 +0100421 _mm_gemmlowp.run();
422 }
423 else
424 {
425 _mm_gemm.run();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100426 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100427}
Georgios Pinitas72219332018-06-05 14:56:06 +0100428
429void NEFullyConnectedLayer::prepare()
430{
Georgios Pinitas72219332018-06-05 14:56:06 +0100431 if(!_is_prepared)
432 {
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100433 if(!_weights_manager)
434 {
435 ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
436 }
Georgios Pinitasef776a82018-07-25 17:57:49 +0100437
438 auto release_unused = [](Tensor * w)
439 {
440 if(!w->is_used())
441 {
442 w->allocator()->free();
443 }
444 };
445
446 // Pointer to current weights
447 const ITensor *cur_weights = _original_weights;
448
Giorgio Arenaa855af12018-07-16 17:20:38 +0100449 // Reshape of the weights (happens only once)
450 if(!_are_weights_reshaped)
451 {
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100452 if(_weights_manager && _weights_manager->are_weights_managed(_original_weights))
453 {
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100454 cur_weights = _weights_manager->run(cur_weights, &_reshape_weights_managed_function);
455 }
456 else
457 {
458 // Reshape of the weights (happens only once)
459 if(!_are_weights_reshaped)
460 {
461 // Run reshape weights kernel and mark weights as unused
462 _reshape_weights_output.allocator()->allocate();
463 _reshape_weights_function.run();
464 }
465 cur_weights->mark_as_unused();
466 cur_weights = &_reshape_weights_output;
467 }
Giorgio Arenaa855af12018-07-16 17:20:38 +0100468 _are_weights_reshaped = true;
469 }
Georgios Pinitas72219332018-06-05 14:56:06 +0100470
Georgios Pinitasef776a82018-07-25 17:57:49 +0100471 // Convert weights if needed (happens only once)
472 if(!_are_weights_converted)
473 {
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100474 if(_weights_manager && _weights_manager->are_weights_managed(cur_weights))
475 {
476 _weights_manager->run(cur_weights, &_convert_weights_managed);
477 }
478 else
479 {
480 _converted_weights_output.allocator()->allocate();
481 _convert_weights.run();
Michalis Spyrou20c2b502019-10-01 15:39:42 +0100482 cur_weights->mark_as_unused();
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100483 }
Georgios Pinitasef776a82018-07-25 17:57:49 +0100484
Georgios Pinitasef776a82018-07-25 17:57:49 +0100485 _are_weights_converted = true;
486 }
487
488 // Release reshaped weights if unused
489 release_unused(&_reshape_weights_output);
490
491 // Prepare GEMM prepare and release unused weights
SiCongLi2e5fd632020-03-02 15:39:15 +0000492 if(!_is_quantized_asymmetric)
Georgios Pinitasef776a82018-07-25 17:57:49 +0100493 {
494 _mm_gemm.prepare();
495 }
496
497 // Release converted weights if unused
498 release_unused(&_reshape_weights_output);
499 release_unused(&_converted_weights_output);
500
Georgios Pinitas72219332018-06-05 14:56:06 +0100501 _is_prepared = true;
502 }
Michele Di Giorgiof29d1b72019-10-29 10:58:13 +0000503}
Georgios Pinitasc6aef872020-04-29 13:37:09 +0100504} // namespace arm_compute