blob: daa14b1b3aaf7cd08d6ce638d392f56d2bdcd8a1 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Teresa Charlind1dc09c2021-03-04 15:24:45 +00002 * Copyright (c) 2017-2021 Arm Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
25
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +000026#include "arm_compute/core/Helpers.h"
Teresa Charlind1dc09c2021-03-04 15:24:45 +000027#include "arm_compute/core/ITensorPack.h"
Gian Marco Iodice13edbff2017-06-26 17:20:16 +010028#include "arm_compute/core/Size2D.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010029#include "arm_compute/core/Validate.h"
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +000030#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Giorgio Arenaa855af12018-07-16 17:20:38 +010031#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010032#include "arm_compute/runtime/NEON/NEScheduler.h"
Teresa Charlind1dc09c2021-03-04 15:24:45 +000033#include "src/core/cpu/kernels/CpuTransposeKernel.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010034
Anthony Barbier6ff3b192017-09-04 18:44:23 +010035#include <cmath>
36
Michele Di Giorgiof29d1b72019-10-29 10:58:13 +000037namespace arm_compute
38{
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +000039using namespace arm_compute::misc::shape_calculator;
40
Giorgio Arenaa855af12018-07-16 17:20:38 +010041namespace
Anthony Barbier6ff3b192017-09-04 18:44:23 +010042{
SiCongLi2e5fd632020-03-02 15:39:15 +000043// Get min, max bound of a quantized assymetric output tensor, with the effect of fused activation
44std::pair<PixelValue, PixelValue> get_quantized_asymmetric_output_min_max(const QuantizationInfo &q_info, const ActivationLayerInfo &act_info, DataType data_type)
45{
46 PixelValue type_min{};
47 PixelValue type_max{};
48 std::tie(type_min, type_max) = get_min_max(data_type);
49 const UniformQuantizationInfo q_unif = q_info.uniform();
50
51 if(act_info.enabled())
52 {
53 switch(act_info.activation())
54 {
55 case ActivationLayerInfo::ActivationFunction::RELU:
56 type_min = PixelValue(q_unif.offset);
57 break;
58 case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
59 type_min = PixelValue(q_unif.offset);
60 type_max = PixelValue(act_info.a(), data_type, q_info);
61 break;
62 case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU:
63 type_min = PixelValue(act_info.b(), data_type, q_info);
64 type_max = PixelValue(act_info.a(), data_type, q_info);
65 break;
66 default:
67 ARM_COMPUTE_ERROR("Activation function not supported.");
68 break;
69 }
70 }
71
72 return std::make_pair(type_min, type_max);
73}
74
75Status get_gemmlowp_output_stage_info(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const ActivationLayerInfo &act,
76 GEMMLowpOutputStageInfo &gemmlowp_output_stage_info)
77{
78 const auto data_type = input->data_type();
79 const QuantizationInfo oq_info = output->quantization_info();
80 const UniformQuantizationInfo iq_unif = input->quantization_info().uniform();
81 const UniformQuantizationInfo wq_unif = weights->quantization_info().uniform();
82 const UniformQuantizationInfo oq_unif = oq_info.uniform();
83
84 float multiplier = (iq_unif.scale * wq_unif.scale) / oq_unif.scale;
85 int32_t output_multiplier;
86 int32_t output_shift;
87
88 ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift));
89
90 PixelValue type_min{};
91 PixelValue type_max{};
92 std::tie(type_min, type_max) = get_quantized_asymmetric_output_min_max(oq_info, act, data_type);
93
94 gemmlowp_output_stage_info.gemmlowp_multiplier = output_multiplier;
95 gemmlowp_output_stage_info.gemmlowp_shift = output_shift;
96 gemmlowp_output_stage_info.gemmlowp_offset = oq_unif.offset;
97 gemmlowp_output_stage_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
98 gemmlowp_output_stage_info.gemmlowp_min_bound = type_min.get<int32_t>();
99 gemmlowp_output_stage_info.gemmlowp_max_bound = type_max.get<int32_t>();
100
101 return Status{};
102}
103
104Status validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ActivationLayerInfo &act)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100105{
SiCong Liadb32912020-02-17 16:39:27 +0000106 if(is_data_type_quantized_asymmetric(input->data_type()))
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100107 {
Giorgio Arenaa855af12018-07-16 17:20:38 +0100108 // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
109 // Extract and negate input and weights offset
SiCong Liadb32912020-02-17 16:39:27 +0000110 const QuantizationInfo input_quantization_info(input->quantization_info().uniform().scale, -input->quantization_info().uniform().offset);
111 const QuantizationInfo weights_quantization_info(weights->quantization_info().uniform().scale, -weights->quantization_info().uniform().offset);
112
SiCong Liadb32912020-02-17 16:39:27 +0000113 GEMMLowpOutputStageInfo gemmlowp_output_stage_info;
SiCongLi2e5fd632020-03-02 15:39:15 +0000114 ARM_COMPUTE_RETURN_ON_ERROR(get_gemmlowp_output_stage_info(input, weights, output, act, gemmlowp_output_stage_info));
SiCong Liadb32912020-02-17 16:39:27 +0000115
116 GEMMInfo gemm_info;
117 gemm_info.set_gemmlowp_output_stage(gemmlowp_output_stage_info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100118
Giorgio Arenaa855af12018-07-16 17:20:38 +0100119 // Validate gemmlowp function
SiCong Liadb32912020-02-17 16:39:27 +0000120 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyCore::validate(&input->clone()->set_quantization_info(input_quantization_info),
121 &weights->clone()->set_quantization_info(weights_quantization_info),
122 biases,
123 output,
124 gemm_info));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100125 }
126 else
127 {
SiCong Liadb32912020-02-17 16:39:27 +0000128 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMM::validate(input, weights, biases, output, 1.f, 1.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run */)));
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000129 }
130
131 return Status{};
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100132}
Giorgio Arenaa855af12018-07-16 17:20:38 +0100133} // namespace
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100134
Michalis Spyrouebcebf12020-10-21 00:04:14 +0100135NEFullyConnectedLayer::~NEFullyConnectedLayer() = default;
136
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100137NEFullyConnectedLayer::NEFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
Georgios Pinitase2696b12020-12-03 20:37:43 +0000138 : _memory_group(std::move(memory_manager)), _weights_manager(weights_manager), _flatten(), _convert_weights(), _convert_weights_managed(), _reshape_weights_function(),
SiCongLi2e5fd632020-03-02 15:39:15 +0000139 _reshape_weights_managed_function(), _mm_gemm(nullptr, weights_manager), _mm_gemmlowp(nullptr, weights_manager), _flatten_output(), _converted_weights_output(), _reshape_weights_output(),
140 _original_weights(nullptr), _are_weights_converted(true), _are_weights_reshaped(false), _is_fc_after_conv(false), _is_quantized_asymmetric(false), _is_prepared(false)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100141{
142}
143
SiCongLi2e5fd632020-03-02 15:39:15 +0000144void NEFullyConnectedLayer::configure_mm(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act)
Giorgio Arenaa855af12018-07-16 17:20:38 +0100145{
SiCongLi2e5fd632020-03-02 15:39:15 +0000146 if(_is_quantized_asymmetric)
Giorgio Arenaa855af12018-07-16 17:20:38 +0100147 {
148 // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
149 // Extract and negate input and weights offset
150 const QuantizationInfo input_quantization_info = input->info()->quantization_info();
151 const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
152
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100153 input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
154 weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
Giorgio Arenaa855af12018-07-16 17:20:38 +0100155
SiCong Liadb32912020-02-17 16:39:27 +0000156 // Configure gemmlowp function and output stage for asymmetric quantized types
SiCong Liadb32912020-02-17 16:39:27 +0000157 GEMMLowpOutputStageInfo gemmlowp_output_stage_info;
SiCongLi2e5fd632020-03-02 15:39:15 +0000158 const Status status = get_gemmlowp_output_stage_info(input->info(), weights->info(), output->info(), act, gemmlowp_output_stage_info);
159 ARM_COMPUTE_ERROR_ON(status.error_code() != ErrorCode::OK);
160
SiCong Liadb32912020-02-17 16:39:27 +0000161 GEMMInfo gemm_info;
162 gemm_info.set_gemmlowp_output_stage(gemmlowp_output_stage_info);
SiCongLi2e5fd632020-03-02 15:39:15 +0000163 gemm_info.set_activation_info(act);
SiCong Liadb32912020-02-17 16:39:27 +0000164 _mm_gemmlowp.configure(input, weights, biases, output, gemm_info);
Giorgio Arenaa855af12018-07-16 17:20:38 +0100165
166 // Revert back QuantizatioInfo as input and weights could be used in other fully connected layers
167 input->info()->set_quantization_info(input_quantization_info);
168 weights->info()->set_quantization_info(weights_quantization_info);
169 }
170 else
171 {
172 // Configure matrix multiply kernel
SiCongLi2e5fd632020-03-02 15:39:15 +0000173 GEMMInfo gemm_info(false, false, true /* Reshape weights only for the first run */);
174 gemm_info.set_activation_info(act);
175 _mm_gemm.configure(input, weights, biases, output, 1.f, 1.0f, gemm_info);
Giorgio Arenaa855af12018-07-16 17:20:38 +0100176 }
177}
178
SiCongLi2e5fd632020-03-02 15:39:15 +0000179void NEFullyConnectedLayer::configure_conv_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act)
Giorgio Arenaa855af12018-07-16 17:20:38 +0100180{
181 ARM_COMPUTE_ERROR_ON((weights->info()->dimension(1) != (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
182
183 // If the fully connected layer is called after a convolution layer, the input tensor must be linearized
184
Giorgio Arena368e6352018-08-20 15:06:07 +0100185 // Initialize output tensor for flatten
186 TensorShape shape_flatten = compute_flatten_shape(input->info());
187 _flatten_output.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_flatten));
Giorgio Arenaa855af12018-07-16 17:20:38 +0100188
Giorgio Arena368e6352018-08-20 15:06:07 +0100189 // Configure flatten kernel
190 _memory_group.manage(&_flatten_output);
Michalis Spyrouebcebf12020-10-21 00:04:14 +0100191
Georgios Pinitase2696b12020-12-03 20:37:43 +0000192 _flatten.configure(input, &_flatten_output);
Giorgio Arenaa855af12018-07-16 17:20:38 +0100193
194 // Configure matrix multiply kernel
SiCongLi2e5fd632020-03-02 15:39:15 +0000195 configure_mm(&_flatten_output, weights, biases, output, act);
Giorgio Arenaa855af12018-07-16 17:20:38 +0100196
Giorgio Arena368e6352018-08-20 15:06:07 +0100197 // Allocate the output tensor for flatten once all the configure methods have been called
198 _flatten_output.allocator()->allocate();
Giorgio Arenaa855af12018-07-16 17:20:38 +0100199}
200
SiCongLi2e5fd632020-03-02 15:39:15 +0000201void NEFullyConnectedLayer::configure_fc_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act)
Giorgio Arenaa855af12018-07-16 17:20:38 +0100202{
203 ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1));
204
205 // Configure matrix multiply kernel
SiCongLi2e5fd632020-03-02 15:39:15 +0000206 configure_mm(input, weights, biases, output, act);
Giorgio Arenaa855af12018-07-16 17:20:38 +0100207}
208
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100209void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output,
210 FullyConnectedLayerInfo fc_info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100211{
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000212 // Perform validate step
Michele Di Giorgio9c700372020-01-08 11:33:44 +0000213 ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000214 ARM_COMPUTE_ERROR_THROW_ON(NEFullyConnectedLayer::validate(input->info(),
215 weights->info(),
216 biases != nullptr ? biases->info() : nullptr,
217 output->info(),
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100218 fc_info));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100219
SiCongLi2e5fd632020-03-02 15:39:15 +0000220 _are_weights_converted = true;
221 _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
222 _is_fc_after_conv = true;
223 _is_quantized_asymmetric = is_data_type_quantized_asymmetric(input->info()->data_type());
224 _original_weights = weights;
Moritz Pflanzer484e7b32017-08-09 11:43:18 +0100225
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100226 if(_weights_manager)
227 {
228 _weights_manager->manage(weights);
229 }
230
Giorgio Arenaa855af12018-07-16 17:20:38 +0100231 // With the Fully Connected layer we can have 4 different cases:
232 // 1) Convolution layer -> Fully Connected layer without batches
233 // 2) Fully Connected layer -> Fully Connected layer without batches
234 // 3) Convolution layer -> Fully Connected layer with batches
235 // 4) Fully Connected layer -> Fully Connected layer with batches
236
237 const ITensor *weights_to_use = weights;
238
Giorgio Arenaa855af12018-07-16 17:20:38 +0100239 // Check if we have a fully connected layer with batches
240 const bool is_batched_fc_layer = output->info()->dimension(1) > 1;
Giorgio Arenaa855af12018-07-16 17:20:38 +0100241 if(is_batched_fc_layer)
Moritz Pflanzer484e7b32017-08-09 11:43:18 +0100242 {
Giorgio Arenaa855af12018-07-16 17:20:38 +0100243 _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && (std::equal(input->info()->tensor_shape().cbegin() + 3,
244 input->info()->tensor_shape().cend(),
245 output->info()->tensor_shape().cbegin() + 1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100246 }
Giorgio Arenaa855af12018-07-16 17:20:38 +0100247 else
248 {
249 _is_fc_after_conv = input->info()->num_dimensions() > 1;
250 }
251
Georgios Pinitasef776a82018-07-25 17:57:49 +0100252 // Reshape weights if needed
253 if(!_are_weights_reshaped)
254 {
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100255 if(_weights_manager && _weights_manager->are_weights_managed(weights))
256 {
257 _reshape_weights_managed_function.configure(weights);
258 weights_to_use = _weights_manager->acquire(weights, &_reshape_weights_managed_function);
259 }
260 else
261 {
262 // Reshape the weights
263 _reshape_weights_function.configure(weights, &_reshape_weights_output);
264 weights_to_use = &_reshape_weights_output;
265 }
Georgios Pinitasef776a82018-07-25 17:57:49 +0100266 }
267
268 // Convert weights if needed
269 if(_is_fc_after_conv && (input->info()->data_layout() != fc_info.weights_trained_layout))
270 {
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100271 if(_weights_manager && _weights_manager->are_weights_managed(weights_to_use))
272 {
273 _convert_weights_managed.configure(weights_to_use,
274 input->info()->tensor_shape(),
275 fc_info.weights_trained_layout);
276 weights_to_use = _weights_manager->acquire(weights, &_convert_weights_managed);
277 }
278 else
279 {
280 // Convert weights
281 _convert_weights.configure(weights_to_use,
282 &_converted_weights_output,
283 input->info()->tensor_shape(),
284 fc_info.weights_trained_layout);
Georgios Pinitasef776a82018-07-25 17:57:49 +0100285
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100286 weights_to_use = &_converted_weights_output;
287 }
Georgios Pinitasef776a82018-07-25 17:57:49 +0100288 _are_weights_converted = false;
289 }
290
Giorgio Arenaa855af12018-07-16 17:20:38 +0100291 if(_is_fc_after_conv)
292 {
293 // Fully Connected layer after a Convolution Layer without batches
SiCongLi2e5fd632020-03-02 15:39:15 +0000294 configure_conv_fc(input, weights_to_use, biases, output, fc_info.activation_info);
Giorgio Arenaa855af12018-07-16 17:20:38 +0100295 }
296 else
297 {
298 // Fully Connected layer after a Fully Connected Layer without batches
SiCongLi2e5fd632020-03-02 15:39:15 +0000299 configure_fc_fc(input, weights_to_use, biases, output, fc_info.activation_info);
Giorgio Arenaa855af12018-07-16 17:20:38 +0100300 }
301
302 _are_weights_reshaped = _are_weights_reshaped || fc_info.retain_internal_weights;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100303}
304
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100305Status NEFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
306 FullyConnectedLayerInfo fc_info)
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000307{
Giorgio Arenaa855af12018-07-16 17:20:38 +0100308 ARM_COMPUTE_UNUSED(fc_info.retain_internal_weights);
309 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
Michele Di Giorgio9c700372020-01-08 11:33:44 +0000310 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000311 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000312 ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2);
Georgios Pinitasc6aef872020-04-29 13:37:09 +0100313 ARM_COMPUTE_RETURN_ERROR_ON(biases != nullptr && biases->num_dimensions() > 1);
Georgios Pinitas39096702020-11-20 04:37:59 +0000314 ARM_COMPUTE_RETURN_ERROR_ON(fc_info.activation_info.enabled() && is_data_type_quantized(input->data_type()) && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::RELU
315 && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU);
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000316
Giorgio Arenaa855af12018-07-16 17:20:38 +0100317 bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
318 bool is_fc_after_conv = true;
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000319
Giorgio Arena368e6352018-08-20 15:06:07 +0100320 const ITensorInfo &flatten_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_flatten_shape(input)));
Georgios Pinitasef776a82018-07-25 17:57:49 +0100321 const ITensorInfo &reshaped_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*weights)));
Georgios Pinitas195b0ba2018-08-02 17:18:51 +0100322 const ITensorInfo &converted_weights = weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding()) : TensorInfo(*reshaped_weights.clone());
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000323
Giorgio Arenaa855af12018-07-16 17:20:38 +0100324 // With the Fully Connected layer we can have 4 different cases:
325 // 1) Convolution layer -> Fully Connected layer without batches
326 // 2) Fully Connected layer -> Fully Connected layer without batches
327 // 3) Convolution layer -> Fully Connected layer with batches
328 // 4) Fully Connected layer -> Fully Connected layer with batches
329
330 const ITensorInfo *input_to_use = input;
331 const ITensorInfo *weights_to_use = weights;
Giorgio Arenaa855af12018-07-16 17:20:38 +0100332
Giorgio Arenaa855af12018-07-16 17:20:38 +0100333 // Check if we have a fully connected layer with batches
334 const bool is_batched_fc_layer = output->dimension(1) > 1;
335
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000336 if(is_batched_fc_layer)
337 {
Giorgio Arenaa855af12018-07-16 17:20:38 +0100338 is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && (std::equal(input->tensor_shape().cbegin() + 3,
339 input->tensor_shape().cend(),
340 output->tensor_shape().cbegin() + 1));
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000341 }
342 else
343 {
Giorgio Arenaa855af12018-07-16 17:20:38 +0100344 is_fc_after_conv = input->num_dimensions() > 1;
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000345 }
346
Georgios Pinitasef776a82018-07-25 17:57:49 +0100347 if(!weights_reshaped)
348 {
349 // Validate reshape weights kernel
Teresa Charlin28fcc352021-04-07 20:39:49 +0100350 ARM_COMPUTE_RETURN_ON_ERROR(NETranspose::validate(weights, &reshaped_weights));
Georgios Pinitasef776a82018-07-25 17:57:49 +0100351 weights_to_use = &reshaped_weights;
352 }
353
354 if(is_fc_after_conv && (input->data_layout() != fc_info.weights_trained_layout))
355 {
356 // Validate convert weights kernel
357 ARM_COMPUTE_RETURN_ON_ERROR(NEConvertFullyConnectedWeights::validate(weights_to_use,
358 &converted_weights,
359 input->tensor_shape(),
360 fc_info.weights_trained_layout));
361 weights_to_use = &converted_weights;
362 }
363
Giorgio Arenaa855af12018-07-16 17:20:38 +0100364 if(is_fc_after_conv)
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000365 {
Giorgio Arenaa855af12018-07-16 17:20:38 +0100366 // Fully Connected layer after a Convolution Layer without batches
367 ARM_COMPUTE_RETURN_ERROR_ON((weights_to_use->dimension(1) != (input->dimension(0) * input->dimension(1) * input->dimension(2))));
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000368
Giorgio Arena368e6352018-08-20 15:06:07 +0100369 // Validate flatten kernel
Georgios Pinitase2696b12020-12-03 20:37:43 +0000370 ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayer::validate(input, &flatten_input));
Giorgio Arena368e6352018-08-20 15:06:07 +0100371 input_to_use = &flatten_input;
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000372 }
Giorgio Arenaa855af12018-07-16 17:20:38 +0100373 else
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000374 {
Giorgio Arenaa855af12018-07-16 17:20:38 +0100375 // Fully Connected layer after a Fully Connected Layer without batches
376 ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1));
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000377 }
Giorgio Arenaa855af12018-07-16 17:20:38 +0100378 // Validate matrix multiply kernel
SiCongLi2e5fd632020-03-02 15:39:15 +0000379 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(input_to_use, weights_to_use, biases, output, fc_info.activation_info));
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000380
381 return Status{};
382}
383
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100384void NEFullyConnectedLayer::run()
385{
Georgios Pinitas72219332018-06-05 14:56:06 +0100386 prepare();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100387
Georgios Pinitasda953f22019-04-02 17:27:03 +0100388 MemoryGroupResourceScope scope_mg(_memory_group);
Georgios Pinitasbaf174e2017-09-08 19:47:30 +0100389
Moritz Pflanzer484e7b32017-08-09 11:43:18 +0100390 // Linearize input if it comes from a convolutional layer
Giorgio Arenaa855af12018-07-16 17:20:38 +0100391 if(_is_fc_after_conv)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100392 {
Georgios Pinitase2696b12020-12-03 20:37:43 +0000393 _flatten.run();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100394 }
395
Giorgio Arenaa855af12018-07-16 17:20:38 +0100396 // Run matrix multiply
SiCongLi2e5fd632020-03-02 15:39:15 +0000397 if(_is_quantized_asymmetric)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100398 {
Giorgio Arenaa855af12018-07-16 17:20:38 +0100399 _mm_gemmlowp.run();
400 }
401 else
402 {
403 _mm_gemm.run();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100404 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100405}
Georgios Pinitas72219332018-06-05 14:56:06 +0100406
407void NEFullyConnectedLayer::prepare()
408{
Georgios Pinitas72219332018-06-05 14:56:06 +0100409 if(!_is_prepared)
410 {
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100411 if(!_weights_manager)
412 {
413 ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
414 }
Georgios Pinitasef776a82018-07-25 17:57:49 +0100415
416 auto release_unused = [](Tensor * w)
417 {
418 if(!w->is_used())
419 {
420 w->allocator()->free();
421 }
422 };
423
424 // Pointer to current weights
425 const ITensor *cur_weights = _original_weights;
426
Giorgio Arenaa855af12018-07-16 17:20:38 +0100427 // Reshape of the weights (happens only once)
428 if(!_are_weights_reshaped)
429 {
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100430 if(_weights_manager && _weights_manager->are_weights_managed(_original_weights))
431 {
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100432 cur_weights = _weights_manager->run(cur_weights, &_reshape_weights_managed_function);
433 }
434 else
435 {
436 // Reshape of the weights (happens only once)
437 if(!_are_weights_reshaped)
438 {
439 // Run reshape weights kernel and mark weights as unused
440 _reshape_weights_output.allocator()->allocate();
441 _reshape_weights_function.run();
442 }
443 cur_weights->mark_as_unused();
444 cur_weights = &_reshape_weights_output;
445 }
Giorgio Arenaa855af12018-07-16 17:20:38 +0100446 _are_weights_reshaped = true;
447 }
Georgios Pinitas72219332018-06-05 14:56:06 +0100448
Georgios Pinitasef776a82018-07-25 17:57:49 +0100449 // Convert weights if needed (happens only once)
450 if(!_are_weights_converted)
451 {
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100452 if(_weights_manager && _weights_manager->are_weights_managed(cur_weights))
453 {
454 _weights_manager->run(cur_weights, &_convert_weights_managed);
455 }
456 else
457 {
458 _converted_weights_output.allocator()->allocate();
459 _convert_weights.run();
Michalis Spyrou20c2b502019-10-01 15:39:42 +0100460 cur_weights->mark_as_unused();
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100461 }
Georgios Pinitasef776a82018-07-25 17:57:49 +0100462
Georgios Pinitasef776a82018-07-25 17:57:49 +0100463 _are_weights_converted = true;
464 }
465
466 // Release reshaped weights if unused
467 release_unused(&_reshape_weights_output);
468
469 // Prepare GEMM prepare and release unused weights
SiCongLi2e5fd632020-03-02 15:39:15 +0000470 if(!_is_quantized_asymmetric)
Georgios Pinitasef776a82018-07-25 17:57:49 +0100471 {
472 _mm_gemm.prepare();
473 }
474
475 // Release converted weights if unused
476 release_unused(&_reshape_weights_output);
477 release_unused(&_converted_weights_output);
478
Georgios Pinitas72219332018-06-05 14:56:06 +0100479 _is_prepared = true;
480 }
Michele Di Giorgiof29d1b72019-10-29 10:58:13 +0000481}
Georgios Pinitasc6aef872020-04-29 13:37:09 +0100482} // namespace arm_compute