blob: 6ff102cd4a99f3215cbf865949b203d6b755c1ed [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Teresa Charlind1dc09c2021-03-04 15:24:45 +00002 * Copyright (c) 2017-2021 Arm Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
25
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +000026#include "arm_compute/core/Helpers.h"
Teresa Charlind1dc09c2021-03-04 15:24:45 +000027#include "arm_compute/core/ITensorPack.h"
Gian Marco Iodice13edbff2017-06-26 17:20:16 +010028#include "arm_compute/core/Size2D.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010029#include "arm_compute/core/Validate.h"
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +000030#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Giorgio Arenaa855af12018-07-16 17:20:38 +010031#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010032#include "arm_compute/runtime/NEON/NEScheduler.h"
Michalis Spyrouebcebf12020-10-21 00:04:14 +010033#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
34#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
Michalis Spyrouebcebf12020-10-21 00:04:14 +010035#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
36#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
37#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
38#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
39#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
40#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
41#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
42#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
Teresa Charlind1dc09c2021-03-04 15:24:45 +000043#include "src/core/cpu/kernels/CpuTransposeKernel.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010044
Anthony Barbier6ff3b192017-09-04 18:44:23 +010045#include <cmath>
46
Michele Di Giorgiof29d1b72019-10-29 10:58:13 +000047namespace arm_compute
48{
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +000049using namespace arm_compute::misc::shape_calculator;
50
Giorgio Arenaa855af12018-07-16 17:20:38 +010051namespace
Anthony Barbier6ff3b192017-09-04 18:44:23 +010052{
SiCongLi2e5fd632020-03-02 15:39:15 +000053// Get min, max bound of a quantized assymetric output tensor, with the effect of fused activation
54std::pair<PixelValue, PixelValue> get_quantized_asymmetric_output_min_max(const QuantizationInfo &q_info, const ActivationLayerInfo &act_info, DataType data_type)
55{
56 PixelValue type_min{};
57 PixelValue type_max{};
58 std::tie(type_min, type_max) = get_min_max(data_type);
59 const UniformQuantizationInfo q_unif = q_info.uniform();
60
61 if(act_info.enabled())
62 {
63 switch(act_info.activation())
64 {
65 case ActivationLayerInfo::ActivationFunction::RELU:
66 type_min = PixelValue(q_unif.offset);
67 break;
68 case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
69 type_min = PixelValue(q_unif.offset);
70 type_max = PixelValue(act_info.a(), data_type, q_info);
71 break;
72 case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU:
73 type_min = PixelValue(act_info.b(), data_type, q_info);
74 type_max = PixelValue(act_info.a(), data_type, q_info);
75 break;
76 default:
77 ARM_COMPUTE_ERROR("Activation function not supported.");
78 break;
79 }
80 }
81
82 return std::make_pair(type_min, type_max);
83}
84
85Status get_gemmlowp_output_stage_info(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const ActivationLayerInfo &act,
86 GEMMLowpOutputStageInfo &gemmlowp_output_stage_info)
87{
88 const auto data_type = input->data_type();
89 const QuantizationInfo oq_info = output->quantization_info();
90 const UniformQuantizationInfo iq_unif = input->quantization_info().uniform();
91 const UniformQuantizationInfo wq_unif = weights->quantization_info().uniform();
92 const UniformQuantizationInfo oq_unif = oq_info.uniform();
93
94 float multiplier = (iq_unif.scale * wq_unif.scale) / oq_unif.scale;
95 int32_t output_multiplier;
96 int32_t output_shift;
97
98 ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift));
99
100 PixelValue type_min{};
101 PixelValue type_max{};
102 std::tie(type_min, type_max) = get_quantized_asymmetric_output_min_max(oq_info, act, data_type);
103
104 gemmlowp_output_stage_info.gemmlowp_multiplier = output_multiplier;
105 gemmlowp_output_stage_info.gemmlowp_shift = output_shift;
106 gemmlowp_output_stage_info.gemmlowp_offset = oq_unif.offset;
107 gemmlowp_output_stage_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
108 gemmlowp_output_stage_info.gemmlowp_min_bound = type_min.get<int32_t>();
109 gemmlowp_output_stage_info.gemmlowp_max_bound = type_max.get<int32_t>();
110
111 return Status{};
112}
113
114Status validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ActivationLayerInfo &act)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100115{
SiCong Liadb32912020-02-17 16:39:27 +0000116 if(is_data_type_quantized_asymmetric(input->data_type()))
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100117 {
Giorgio Arenaa855af12018-07-16 17:20:38 +0100118 // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
119 // Extract and negate input and weights offset
SiCong Liadb32912020-02-17 16:39:27 +0000120 const QuantizationInfo input_quantization_info(input->quantization_info().uniform().scale, -input->quantization_info().uniform().offset);
121 const QuantizationInfo weights_quantization_info(weights->quantization_info().uniform().scale, -weights->quantization_info().uniform().offset);
122
SiCong Liadb32912020-02-17 16:39:27 +0000123 GEMMLowpOutputStageInfo gemmlowp_output_stage_info;
SiCongLi2e5fd632020-03-02 15:39:15 +0000124 ARM_COMPUTE_RETURN_ON_ERROR(get_gemmlowp_output_stage_info(input, weights, output, act, gemmlowp_output_stage_info));
SiCong Liadb32912020-02-17 16:39:27 +0000125
126 GEMMInfo gemm_info;
127 gemm_info.set_gemmlowp_output_stage(gemmlowp_output_stage_info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100128
Giorgio Arenaa855af12018-07-16 17:20:38 +0100129 // Validate gemmlowp function
SiCong Liadb32912020-02-17 16:39:27 +0000130 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyCore::validate(&input->clone()->set_quantization_info(input_quantization_info),
131 &weights->clone()->set_quantization_info(weights_quantization_info),
132 biases,
133 output,
134 gemm_info));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100135 }
136 else
137 {
SiCong Liadb32912020-02-17 16:39:27 +0000138 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMM::validate(input, weights, biases, output, 1.f, 1.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run */)));
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000139 }
140
141 return Status{};
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100142}
Giorgio Arenaa855af12018-07-16 17:20:38 +0100143} // namespace
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100144
Michalis Spyrouebcebf12020-10-21 00:04:14 +0100145NEFullyConnectedLayer::~NEFullyConnectedLayer() = default;
146
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100147NEFullyConnectedLayer::NEFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
Georgios Pinitase2696b12020-12-03 20:37:43 +0000148 : _memory_group(std::move(memory_manager)), _weights_manager(weights_manager), _flatten(), _convert_weights(), _convert_weights_managed(), _reshape_weights_function(),
SiCongLi2e5fd632020-03-02 15:39:15 +0000149 _reshape_weights_managed_function(), _mm_gemm(nullptr, weights_manager), _mm_gemmlowp(nullptr, weights_manager), _flatten_output(), _converted_weights_output(), _reshape_weights_output(),
150 _original_weights(nullptr), _are_weights_converted(true), _are_weights_reshaped(false), _is_fc_after_conv(false), _is_quantized_asymmetric(false), _is_prepared(false)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100151{
152}
153
SiCongLi2e5fd632020-03-02 15:39:15 +0000154void NEFullyConnectedLayer::configure_mm(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act)
Giorgio Arenaa855af12018-07-16 17:20:38 +0100155{
SiCongLi2e5fd632020-03-02 15:39:15 +0000156 if(_is_quantized_asymmetric)
Giorgio Arenaa855af12018-07-16 17:20:38 +0100157 {
158 // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
159 // Extract and negate input and weights offset
160 const QuantizationInfo input_quantization_info = input->info()->quantization_info();
161 const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
162
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100163 input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
164 weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
Giorgio Arenaa855af12018-07-16 17:20:38 +0100165
SiCong Liadb32912020-02-17 16:39:27 +0000166 // Configure gemmlowp function and output stage for asymmetric quantized types
SiCong Liadb32912020-02-17 16:39:27 +0000167 GEMMLowpOutputStageInfo gemmlowp_output_stage_info;
SiCongLi2e5fd632020-03-02 15:39:15 +0000168 const Status status = get_gemmlowp_output_stage_info(input->info(), weights->info(), output->info(), act, gemmlowp_output_stage_info);
169 ARM_COMPUTE_ERROR_ON(status.error_code() != ErrorCode::OK);
170
SiCong Liadb32912020-02-17 16:39:27 +0000171 GEMMInfo gemm_info;
172 gemm_info.set_gemmlowp_output_stage(gemmlowp_output_stage_info);
SiCongLi2e5fd632020-03-02 15:39:15 +0000173 gemm_info.set_activation_info(act);
SiCong Liadb32912020-02-17 16:39:27 +0000174 _mm_gemmlowp.configure(input, weights, biases, output, gemm_info);
Giorgio Arenaa855af12018-07-16 17:20:38 +0100175
176 // Revert back QuantizatioInfo as input and weights could be used in other fully connected layers
177 input->info()->set_quantization_info(input_quantization_info);
178 weights->info()->set_quantization_info(weights_quantization_info);
179 }
180 else
181 {
182 // Configure matrix multiply kernel
SiCongLi2e5fd632020-03-02 15:39:15 +0000183 GEMMInfo gemm_info(false, false, true /* Reshape weights only for the first run */);
184 gemm_info.set_activation_info(act);
185 _mm_gemm.configure(input, weights, biases, output, 1.f, 1.0f, gemm_info);
Giorgio Arenaa855af12018-07-16 17:20:38 +0100186 }
187}
188
SiCongLi2e5fd632020-03-02 15:39:15 +0000189void NEFullyConnectedLayer::configure_conv_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act)
Giorgio Arenaa855af12018-07-16 17:20:38 +0100190{
191 ARM_COMPUTE_ERROR_ON((weights->info()->dimension(1) != (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
192
193 // If the fully connected layer is called after a convolution layer, the input tensor must be linearized
194
Giorgio Arena368e6352018-08-20 15:06:07 +0100195 // Initialize output tensor for flatten
196 TensorShape shape_flatten = compute_flatten_shape(input->info());
197 _flatten_output.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_flatten));
Giorgio Arenaa855af12018-07-16 17:20:38 +0100198
Giorgio Arena368e6352018-08-20 15:06:07 +0100199 // Configure flatten kernel
200 _memory_group.manage(&_flatten_output);
Michalis Spyrouebcebf12020-10-21 00:04:14 +0100201
Georgios Pinitase2696b12020-12-03 20:37:43 +0000202 _flatten.configure(input, &_flatten_output);
Giorgio Arenaa855af12018-07-16 17:20:38 +0100203
204 // Configure matrix multiply kernel
SiCongLi2e5fd632020-03-02 15:39:15 +0000205 configure_mm(&_flatten_output, weights, biases, output, act);
Giorgio Arenaa855af12018-07-16 17:20:38 +0100206
Giorgio Arena368e6352018-08-20 15:06:07 +0100207 // Allocate the output tensor for flatten once all the configure methods have been called
208 _flatten_output.allocator()->allocate();
Giorgio Arenaa855af12018-07-16 17:20:38 +0100209}
210
SiCongLi2e5fd632020-03-02 15:39:15 +0000211void NEFullyConnectedLayer::configure_fc_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act)
Giorgio Arenaa855af12018-07-16 17:20:38 +0100212{
213 ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1));
214
215 // Configure matrix multiply kernel
SiCongLi2e5fd632020-03-02 15:39:15 +0000216 configure_mm(input, weights, biases, output, act);
Giorgio Arenaa855af12018-07-16 17:20:38 +0100217}
218
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100219void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output,
220 FullyConnectedLayerInfo fc_info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100221{
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000222 // Perform validate step
Michele Di Giorgio9c700372020-01-08 11:33:44 +0000223 ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000224 ARM_COMPUTE_ERROR_THROW_ON(NEFullyConnectedLayer::validate(input->info(),
225 weights->info(),
226 biases != nullptr ? biases->info() : nullptr,
227 output->info(),
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100228 fc_info));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100229
SiCongLi2e5fd632020-03-02 15:39:15 +0000230 _are_weights_converted = true;
231 _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
232 _is_fc_after_conv = true;
233 _is_quantized_asymmetric = is_data_type_quantized_asymmetric(input->info()->data_type());
234 _original_weights = weights;
Moritz Pflanzer484e7b32017-08-09 11:43:18 +0100235
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100236 if(_weights_manager)
237 {
238 _weights_manager->manage(weights);
239 }
240
Giorgio Arenaa855af12018-07-16 17:20:38 +0100241 // With the Fully Connected layer we can have 4 different cases:
242 // 1) Convolution layer -> Fully Connected layer without batches
243 // 2) Fully Connected layer -> Fully Connected layer without batches
244 // 3) Convolution layer -> Fully Connected layer with batches
245 // 4) Fully Connected layer -> Fully Connected layer with batches
246
247 const ITensor *weights_to_use = weights;
248
Giorgio Arenaa855af12018-07-16 17:20:38 +0100249 // Check if we have a fully connected layer with batches
250 const bool is_batched_fc_layer = output->info()->dimension(1) > 1;
Giorgio Arenaa855af12018-07-16 17:20:38 +0100251 if(is_batched_fc_layer)
Moritz Pflanzer484e7b32017-08-09 11:43:18 +0100252 {
Giorgio Arenaa855af12018-07-16 17:20:38 +0100253 _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && (std::equal(input->info()->tensor_shape().cbegin() + 3,
254 input->info()->tensor_shape().cend(),
255 output->info()->tensor_shape().cbegin() + 1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100256 }
Giorgio Arenaa855af12018-07-16 17:20:38 +0100257 else
258 {
259 _is_fc_after_conv = input->info()->num_dimensions() > 1;
260 }
261
Georgios Pinitasef776a82018-07-25 17:57:49 +0100262 // Reshape weights if needed
263 if(!_are_weights_reshaped)
264 {
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100265 if(_weights_manager && _weights_manager->are_weights_managed(weights))
266 {
267 _reshape_weights_managed_function.configure(weights);
268 weights_to_use = _weights_manager->acquire(weights, &_reshape_weights_managed_function);
269 }
270 else
271 {
272 // Reshape the weights
273 _reshape_weights_function.configure(weights, &_reshape_weights_output);
274 weights_to_use = &_reshape_weights_output;
275 }
Georgios Pinitasef776a82018-07-25 17:57:49 +0100276 }
277
278 // Convert weights if needed
279 if(_is_fc_after_conv && (input->info()->data_layout() != fc_info.weights_trained_layout))
280 {
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100281 if(_weights_manager && _weights_manager->are_weights_managed(weights_to_use))
282 {
283 _convert_weights_managed.configure(weights_to_use,
284 input->info()->tensor_shape(),
285 fc_info.weights_trained_layout);
286 weights_to_use = _weights_manager->acquire(weights, &_convert_weights_managed);
287 }
288 else
289 {
290 // Convert weights
291 _convert_weights.configure(weights_to_use,
292 &_converted_weights_output,
293 input->info()->tensor_shape(),
294 fc_info.weights_trained_layout);
Georgios Pinitasef776a82018-07-25 17:57:49 +0100295
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100296 weights_to_use = &_converted_weights_output;
297 }
Georgios Pinitasef776a82018-07-25 17:57:49 +0100298 _are_weights_converted = false;
299 }
300
Giorgio Arenaa855af12018-07-16 17:20:38 +0100301 if(_is_fc_after_conv)
302 {
303 // Fully Connected layer after a Convolution Layer without batches
SiCongLi2e5fd632020-03-02 15:39:15 +0000304 configure_conv_fc(input, weights_to_use, biases, output, fc_info.activation_info);
Giorgio Arenaa855af12018-07-16 17:20:38 +0100305 }
306 else
307 {
308 // Fully Connected layer after a Fully Connected Layer without batches
SiCongLi2e5fd632020-03-02 15:39:15 +0000309 configure_fc_fc(input, weights_to_use, biases, output, fc_info.activation_info);
Giorgio Arenaa855af12018-07-16 17:20:38 +0100310 }
311
312 _are_weights_reshaped = _are_weights_reshaped || fc_info.retain_internal_weights;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100313}
314
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100315Status NEFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
316 FullyConnectedLayerInfo fc_info)
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000317{
Giorgio Arenaa855af12018-07-16 17:20:38 +0100318 ARM_COMPUTE_UNUSED(fc_info.retain_internal_weights);
319 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
Michele Di Giorgio9c700372020-01-08 11:33:44 +0000320 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000321 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000322 ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2);
Georgios Pinitasc6aef872020-04-29 13:37:09 +0100323 ARM_COMPUTE_RETURN_ERROR_ON(biases != nullptr && biases->num_dimensions() > 1);
Georgios Pinitas39096702020-11-20 04:37:59 +0000324 ARM_COMPUTE_RETURN_ERROR_ON(fc_info.activation_info.enabled() && is_data_type_quantized(input->data_type()) && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::RELU
325 && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU);
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000326
Giorgio Arenaa855af12018-07-16 17:20:38 +0100327 bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
328 bool is_fc_after_conv = true;
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000329
Giorgio Arena368e6352018-08-20 15:06:07 +0100330 const ITensorInfo &flatten_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_flatten_shape(input)));
Georgios Pinitasef776a82018-07-25 17:57:49 +0100331 const ITensorInfo &reshaped_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*weights)));
Georgios Pinitas195b0ba2018-08-02 17:18:51 +0100332 const ITensorInfo &converted_weights = weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding()) : TensorInfo(*reshaped_weights.clone());
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000333
Giorgio Arenaa855af12018-07-16 17:20:38 +0100334 // With the Fully Connected layer we can have 4 different cases:
335 // 1) Convolution layer -> Fully Connected layer without batches
336 // 2) Fully Connected layer -> Fully Connected layer without batches
337 // 3) Convolution layer -> Fully Connected layer with batches
338 // 4) Fully Connected layer -> Fully Connected layer with batches
339
340 const ITensorInfo *input_to_use = input;
341 const ITensorInfo *weights_to_use = weights;
Giorgio Arenaa855af12018-07-16 17:20:38 +0100342
Giorgio Arenaa855af12018-07-16 17:20:38 +0100343 // Check if we have a fully connected layer with batches
344 const bool is_batched_fc_layer = output->dimension(1) > 1;
345
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000346 if(is_batched_fc_layer)
347 {
Giorgio Arenaa855af12018-07-16 17:20:38 +0100348 is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && (std::equal(input->tensor_shape().cbegin() + 3,
349 input->tensor_shape().cend(),
350 output->tensor_shape().cbegin() + 1));
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000351 }
352 else
353 {
Giorgio Arenaa855af12018-07-16 17:20:38 +0100354 is_fc_after_conv = input->num_dimensions() > 1;
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000355 }
356
Georgios Pinitasef776a82018-07-25 17:57:49 +0100357 if(!weights_reshaped)
358 {
359 // Validate reshape weights kernel
Teresa Charlin28fcc352021-04-07 20:39:49 +0100360 ARM_COMPUTE_RETURN_ON_ERROR(NETranspose::validate(weights, &reshaped_weights));
Georgios Pinitasef776a82018-07-25 17:57:49 +0100361 weights_to_use = &reshaped_weights;
362 }
363
364 if(is_fc_after_conv && (input->data_layout() != fc_info.weights_trained_layout))
365 {
366 // Validate convert weights kernel
367 ARM_COMPUTE_RETURN_ON_ERROR(NEConvertFullyConnectedWeights::validate(weights_to_use,
368 &converted_weights,
369 input->tensor_shape(),
370 fc_info.weights_trained_layout));
371 weights_to_use = &converted_weights;
372 }
373
Giorgio Arenaa855af12018-07-16 17:20:38 +0100374 if(is_fc_after_conv)
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000375 {
Giorgio Arenaa855af12018-07-16 17:20:38 +0100376 // Fully Connected layer after a Convolution Layer without batches
377 ARM_COMPUTE_RETURN_ERROR_ON((weights_to_use->dimension(1) != (input->dimension(0) * input->dimension(1) * input->dimension(2))));
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000378
Giorgio Arena368e6352018-08-20 15:06:07 +0100379 // Validate flatten kernel
Georgios Pinitase2696b12020-12-03 20:37:43 +0000380 ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayer::validate(input, &flatten_input));
Giorgio Arena368e6352018-08-20 15:06:07 +0100381 input_to_use = &flatten_input;
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000382 }
Giorgio Arenaa855af12018-07-16 17:20:38 +0100383 else
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000384 {
Giorgio Arenaa855af12018-07-16 17:20:38 +0100385 // Fully Connected layer after a Fully Connected Layer without batches
386 ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1));
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000387 }
Giorgio Arenaa855af12018-07-16 17:20:38 +0100388 // Validate matrix multiply kernel
SiCongLi2e5fd632020-03-02 15:39:15 +0000389 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(input_to_use, weights_to_use, biases, output, fc_info.activation_info));
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000390
391 return Status{};
392}
393
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100394void NEFullyConnectedLayer::run()
395{
Georgios Pinitas72219332018-06-05 14:56:06 +0100396 prepare();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100397
Georgios Pinitasda953f22019-04-02 17:27:03 +0100398 MemoryGroupResourceScope scope_mg(_memory_group);
Georgios Pinitasbaf174e2017-09-08 19:47:30 +0100399
Moritz Pflanzer484e7b32017-08-09 11:43:18 +0100400 // Linearize input if it comes from a convolutional layer
Giorgio Arenaa855af12018-07-16 17:20:38 +0100401 if(_is_fc_after_conv)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100402 {
Georgios Pinitase2696b12020-12-03 20:37:43 +0000403 _flatten.run();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100404 }
405
Giorgio Arenaa855af12018-07-16 17:20:38 +0100406 // Run matrix multiply
SiCongLi2e5fd632020-03-02 15:39:15 +0000407 if(_is_quantized_asymmetric)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100408 {
Giorgio Arenaa855af12018-07-16 17:20:38 +0100409 _mm_gemmlowp.run();
410 }
411 else
412 {
413 _mm_gemm.run();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100414 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100415}
Georgios Pinitas72219332018-06-05 14:56:06 +0100416
417void NEFullyConnectedLayer::prepare()
418{
Georgios Pinitas72219332018-06-05 14:56:06 +0100419 if(!_is_prepared)
420 {
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100421 if(!_weights_manager)
422 {
423 ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
424 }
Georgios Pinitasef776a82018-07-25 17:57:49 +0100425
426 auto release_unused = [](Tensor * w)
427 {
428 if(!w->is_used())
429 {
430 w->allocator()->free();
431 }
432 };
433
434 // Pointer to current weights
435 const ITensor *cur_weights = _original_weights;
436
Giorgio Arenaa855af12018-07-16 17:20:38 +0100437 // Reshape of the weights (happens only once)
438 if(!_are_weights_reshaped)
439 {
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100440 if(_weights_manager && _weights_manager->are_weights_managed(_original_weights))
441 {
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100442 cur_weights = _weights_manager->run(cur_weights, &_reshape_weights_managed_function);
443 }
444 else
445 {
446 // Reshape of the weights (happens only once)
447 if(!_are_weights_reshaped)
448 {
449 // Run reshape weights kernel and mark weights as unused
450 _reshape_weights_output.allocator()->allocate();
451 _reshape_weights_function.run();
452 }
453 cur_weights->mark_as_unused();
454 cur_weights = &_reshape_weights_output;
455 }
Giorgio Arenaa855af12018-07-16 17:20:38 +0100456 _are_weights_reshaped = true;
457 }
Georgios Pinitas72219332018-06-05 14:56:06 +0100458
Georgios Pinitasef776a82018-07-25 17:57:49 +0100459 // Convert weights if needed (happens only once)
460 if(!_are_weights_converted)
461 {
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100462 if(_weights_manager && _weights_manager->are_weights_managed(cur_weights))
463 {
464 _weights_manager->run(cur_weights, &_convert_weights_managed);
465 }
466 else
467 {
468 _converted_weights_output.allocator()->allocate();
469 _convert_weights.run();
Michalis Spyrou20c2b502019-10-01 15:39:42 +0100470 cur_weights->mark_as_unused();
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100471 }
Georgios Pinitasef776a82018-07-25 17:57:49 +0100472
Georgios Pinitasef776a82018-07-25 17:57:49 +0100473 _are_weights_converted = true;
474 }
475
476 // Release reshaped weights if unused
477 release_unused(&_reshape_weights_output);
478
479 // Prepare GEMM prepare and release unused weights
SiCongLi2e5fd632020-03-02 15:39:15 +0000480 if(!_is_quantized_asymmetric)
Georgios Pinitasef776a82018-07-25 17:57:49 +0100481 {
482 _mm_gemm.prepare();
483 }
484
485 // Release converted weights if unused
486 release_unused(&_reshape_weights_output);
487 release_unused(&_converted_weights_output);
488
Georgios Pinitas72219332018-06-05 14:56:06 +0100489 _is_prepared = true;
490 }
Michele Di Giorgiof29d1b72019-10-29 10:58:13 +0000491}
Georgios Pinitasc6aef872020-04-29 13:37:09 +0100492} // namespace arm_compute