blob: b5f406da8d0a4f49ad1fb634bf637fa99a78a060 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Michele Di Giorgio807ce592020-01-03 14:39:37 +00002 * Copyright (c) 2017-2020 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
25
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +000026#include "arm_compute/core/Helpers.h"
Gian Marco Iodice13edbff2017-06-26 17:20:16 +010027#include "arm_compute/core/Size2D.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010028#include "arm_compute/core/Validate.h"
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +000029#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Giorgio Arenaa855af12018-07-16 17:20:38 +010030#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010031#include "arm_compute/runtime/NEON/NEScheduler.h"
32
33#include <algorithm>
34#include <cmath>
35
Michele Di Giorgiof29d1b72019-10-29 10:58:13 +000036namespace arm_compute
37{
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +000038using namespace arm_compute::misc::shape_calculator;
39
Giorgio Arenaa855af12018-07-16 17:20:38 +010040namespace
Anthony Barbier6ff3b192017-09-04 18:44:23 +010041{
SiCong Liadb32912020-02-17 16:39:27 +000042Status validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010043{
SiCong Liadb32912020-02-17 16:39:27 +000044 if(is_data_type_quantized_asymmetric(input->data_type()))
Anthony Barbier6ff3b192017-09-04 18:44:23 +010045 {
Giorgio Arenaa855af12018-07-16 17:20:38 +010046 // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
47 // Extract and negate input and weights offset
SiCong Liadb32912020-02-17 16:39:27 +000048 const QuantizationInfo input_quantization_info(input->quantization_info().uniform().scale, -input->quantization_info().uniform().offset);
49 const QuantizationInfo weights_quantization_info(weights->quantization_info().uniform().scale, -weights->quantization_info().uniform().offset);
50
51 const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
52 const UniformQuantizationInfo wq_info = weights->quantization_info().uniform();
53 const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
54
55 float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;
56 int32_t output_multiplier;
57 int32_t output_shift;
58 ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift));
59
60 GEMMLowpOutputStageInfo gemmlowp_output_stage_info;
61 gemmlowp_output_stage_info.gemmlowp_multiplier = output_multiplier;
62 gemmlowp_output_stage_info.gemmlowp_shift = output_shift;
63 gemmlowp_output_stage_info.gemmlowp_offset = oq_info.offset;
64 gemmlowp_output_stage_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
65 const auto min_max_bound = get_min_max(input->data_type());
66 gemmlowp_output_stage_info.gemmlowp_min_bound = (std::get<0>(min_max_bound)).get<int32_t>();
67 gemmlowp_output_stage_info.gemmlowp_max_bound = (std::get<1>(min_max_bound)).get<int32_t>();
68
69 GEMMInfo gemm_info;
70 gemm_info.set_gemmlowp_output_stage(gemmlowp_output_stage_info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010071
Giorgio Arenaa855af12018-07-16 17:20:38 +010072 // Validate gemmlowp function
SiCong Liadb32912020-02-17 16:39:27 +000073 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyCore::validate(&input->clone()->set_quantization_info(input_quantization_info),
74 &weights->clone()->set_quantization_info(weights_quantization_info),
75 biases,
76 output,
77 gemm_info));
Anthony Barbier6ff3b192017-09-04 18:44:23 +010078 }
79 else
80 {
SiCong Liadb32912020-02-17 16:39:27 +000081 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMM::validate(input, weights, biases, output, 1.f, 1.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run */)));
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +000082 }
83
84 return Status{};
Anthony Barbier6ff3b192017-09-04 18:44:23 +010085}
Giorgio Arenaa855af12018-07-16 17:20:38 +010086} // namespace
Anthony Barbier6ff3b192017-09-04 18:44:23 +010087
Giorgio Arenaa855af12018-07-16 17:20:38 +010088void NEFullyConnectedLayerReshapeWeights::configure(const ITensor *input, ITensor *output)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010089{
Giorgio Arenaa855af12018-07-16 17:20:38 +010090 auto k = arm_compute::support::cpp14::make_unique<NETransposeKernel>();
91 k->configure(input, output);
92 _kernel = std::move(k);
93}
Georgios Pinitasbaf174e2017-09-08 19:47:30 +010094
Giorgio Arenaa855af12018-07-16 17:20:38 +010095Status NEFullyConnectedLayerReshapeWeights::validate(const ITensorInfo *input, const ITensorInfo *output)
96{
97 return NETransposeKernel::validate(input, output);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010098}
99
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100100NEFullyConnectedLayer::NEFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
101 : _memory_group(std::move(memory_manager)), _weights_manager(weights_manager), _flatten_kernel(), _convert_weights(), _convert_weights_managed(), _reshape_weights_function(),
SiCong Liadb32912020-02-17 16:39:27 +0000102 _reshape_weights_managed_function(), _mm_gemm(nullptr, weights_manager), _mm_gemmlowp(), _flatten_output(), _converted_weights_output(), _reshape_weights_output(), _original_weights(nullptr),
103 _are_weights_converted(true), _are_weights_reshaped(false), _is_fc_after_conv(false), _is_quantized(false), _is_prepared(false)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100104{
105}
106
SiCong Liadb32912020-02-17 16:39:27 +0000107void NEFullyConnectedLayer::configure_mm(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output)
Giorgio Arenaa855af12018-07-16 17:20:38 +0100108{
109 if(_is_quantized)
110 {
111 // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
112 // Extract and negate input and weights offset
113 const QuantizationInfo input_quantization_info = input->info()->quantization_info();
114 const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
115
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100116 input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
117 weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
Giorgio Arenaa855af12018-07-16 17:20:38 +0100118
SiCong Liadb32912020-02-17 16:39:27 +0000119 // Configure gemmlowp function and output stage for asymmetric quantized types
120 const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
121 const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();
122 const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
123
124 float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;
125 int32_t output_multiplier;
126 int32_t output_shift;
127 quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
128
129 GEMMLowpOutputStageInfo gemmlowp_output_stage_info;
130 gemmlowp_output_stage_info.gemmlowp_multiplier = output_multiplier;
131 gemmlowp_output_stage_info.gemmlowp_shift = output_shift;
132 gemmlowp_output_stage_info.gemmlowp_offset = oq_info.offset;
133 gemmlowp_output_stage_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
134 const auto min_max_bound = get_min_max(input->info()->data_type());
135 gemmlowp_output_stage_info.gemmlowp_min_bound = (std::get<0>(min_max_bound)).get<int32_t>();
136 gemmlowp_output_stage_info.gemmlowp_max_bound = (std::get<1>(min_max_bound)).get<int32_t>();
137 GEMMInfo gemm_info;
138 gemm_info.set_gemmlowp_output_stage(gemmlowp_output_stage_info);
139 _mm_gemmlowp.configure(input, weights, biases, output, gemm_info);
Giorgio Arenaa855af12018-07-16 17:20:38 +0100140
141 // Revert back QuantizatioInfo as input and weights could be used in other fully connected layers
142 input->info()->set_quantization_info(input_quantization_info);
143 weights->info()->set_quantization_info(weights_quantization_info);
144 }
145 else
146 {
147 // Configure matrix multiply kernel
SiCong Liadb32912020-02-17 16:39:27 +0000148 _mm_gemm.configure(input, weights, biases, output, 1.f, 1.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run */));
Giorgio Arenaa855af12018-07-16 17:20:38 +0100149 }
150}
151
SiCong Liadb32912020-02-17 16:39:27 +0000152void NEFullyConnectedLayer::configure_conv_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output)
Giorgio Arenaa855af12018-07-16 17:20:38 +0100153{
154 ARM_COMPUTE_ERROR_ON((weights->info()->dimension(1) != (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
155
156 // If the fully connected layer is called after a convolution layer, the input tensor must be linearized
157
Giorgio Arena368e6352018-08-20 15:06:07 +0100158 // Initialize output tensor for flatten
159 TensorShape shape_flatten = compute_flatten_shape(input->info());
160 _flatten_output.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_flatten));
Giorgio Arenaa855af12018-07-16 17:20:38 +0100161
Giorgio Arena368e6352018-08-20 15:06:07 +0100162 // Configure flatten kernel
163 _memory_group.manage(&_flatten_output);
164 _flatten_kernel.configure(input, &_flatten_output);
Giorgio Arenaa855af12018-07-16 17:20:38 +0100165
166 // Configure matrix multiply kernel
SiCong Liadb32912020-02-17 16:39:27 +0000167 configure_mm(&_flatten_output, weights, biases, output);
Giorgio Arenaa855af12018-07-16 17:20:38 +0100168
Giorgio Arena368e6352018-08-20 15:06:07 +0100169 // Allocate the output tensor for flatten once all the configure methods have been called
170 _flatten_output.allocator()->allocate();
Giorgio Arenaa855af12018-07-16 17:20:38 +0100171}
172
SiCong Liadb32912020-02-17 16:39:27 +0000173void NEFullyConnectedLayer::configure_fc_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output)
Giorgio Arenaa855af12018-07-16 17:20:38 +0100174{
175 ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1));
176
177 // Configure matrix multiply kernel
SiCong Liadb32912020-02-17 16:39:27 +0000178 configure_mm(input, weights, biases, output);
Giorgio Arenaa855af12018-07-16 17:20:38 +0100179}
180
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100181void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output,
182 FullyConnectedLayerInfo fc_info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100183{
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000184 // Perform validate step
Michele Di Giorgio9c700372020-01-08 11:33:44 +0000185 ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000186 ARM_COMPUTE_ERROR_THROW_ON(NEFullyConnectedLayer::validate(input->info(),
187 weights->info(),
188 biases != nullptr ? biases->info() : nullptr,
189 output->info(),
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100190 fc_info));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100191
Georgios Pinitasef776a82018-07-25 17:57:49 +0100192 _are_weights_converted = true;
193 _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
194 _is_fc_after_conv = true;
Georgios Pinitasef776a82018-07-25 17:57:49 +0100195 _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
196 _original_weights = weights;
Moritz Pflanzer484e7b32017-08-09 11:43:18 +0100197
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100198 if(_weights_manager)
199 {
200 _weights_manager->manage(weights);
201 }
202
Giorgio Arenaa855af12018-07-16 17:20:38 +0100203 // With the Fully Connected layer we can have 4 different cases:
204 // 1) Convolution layer -> Fully Connected layer without batches
205 // 2) Fully Connected layer -> Fully Connected layer without batches
206 // 3) Convolution layer -> Fully Connected layer with batches
207 // 4) Fully Connected layer -> Fully Connected layer with batches
208
209 const ITensor *weights_to_use = weights;
210
Giorgio Arenaa855af12018-07-16 17:20:38 +0100211 // Check if we have a fully connected layer with batches
212 const bool is_batched_fc_layer = output->info()->dimension(1) > 1;
Giorgio Arenaa855af12018-07-16 17:20:38 +0100213 if(is_batched_fc_layer)
Moritz Pflanzer484e7b32017-08-09 11:43:18 +0100214 {
Giorgio Arenaa855af12018-07-16 17:20:38 +0100215 _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && (std::equal(input->info()->tensor_shape().cbegin() + 3,
216 input->info()->tensor_shape().cend(),
217 output->info()->tensor_shape().cbegin() + 1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100218 }
Giorgio Arenaa855af12018-07-16 17:20:38 +0100219 else
220 {
221 _is_fc_after_conv = input->info()->num_dimensions() > 1;
222 }
223
Georgios Pinitasef776a82018-07-25 17:57:49 +0100224 // Reshape weights if needed
225 if(!_are_weights_reshaped)
226 {
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100227 if(_weights_manager && _weights_manager->are_weights_managed(weights))
228 {
229 _reshape_weights_managed_function.configure(weights);
230 weights_to_use = _weights_manager->acquire(weights, &_reshape_weights_managed_function);
231 }
232 else
233 {
234 // Reshape the weights
235 _reshape_weights_function.configure(weights, &_reshape_weights_output);
236 weights_to_use = &_reshape_weights_output;
237 }
Georgios Pinitasef776a82018-07-25 17:57:49 +0100238 }
239
240 // Convert weights if needed
241 if(_is_fc_after_conv && (input->info()->data_layout() != fc_info.weights_trained_layout))
242 {
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100243 if(_weights_manager && _weights_manager->are_weights_managed(weights_to_use))
244 {
245 _convert_weights_managed.configure(weights_to_use,
246 input->info()->tensor_shape(),
247 fc_info.weights_trained_layout);
248 weights_to_use = _weights_manager->acquire(weights, &_convert_weights_managed);
249 }
250 else
251 {
252 // Convert weights
253 _convert_weights.configure(weights_to_use,
254 &_converted_weights_output,
255 input->info()->tensor_shape(),
256 fc_info.weights_trained_layout);
Georgios Pinitasef776a82018-07-25 17:57:49 +0100257
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100258 weights_to_use = &_converted_weights_output;
259 }
Georgios Pinitasef776a82018-07-25 17:57:49 +0100260 _are_weights_converted = false;
261 }
262
Giorgio Arenaa855af12018-07-16 17:20:38 +0100263 if(_is_fc_after_conv)
264 {
265 // Fully Connected layer after a Convolution Layer without batches
SiCong Liadb32912020-02-17 16:39:27 +0000266 configure_conv_fc(input, weights_to_use, biases, output);
Giorgio Arenaa855af12018-07-16 17:20:38 +0100267 }
268 else
269 {
270 // Fully Connected layer after a Fully Connected Layer without batches
SiCong Liadb32912020-02-17 16:39:27 +0000271 configure_fc_fc(input, weights_to_use, biases, output);
Giorgio Arenaa855af12018-07-16 17:20:38 +0100272 }
273
274 _are_weights_reshaped = _are_weights_reshaped || fc_info.retain_internal_weights;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100275}
276
Georgios Pinitas7d66a8e2018-07-17 12:28:42 +0100277Status NEFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
278 FullyConnectedLayerInfo fc_info)
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000279{
Giorgio Arenaa855af12018-07-16 17:20:38 +0100280 ARM_COMPUTE_UNUSED(fc_info.retain_internal_weights);
281 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
Michele Di Giorgio9c700372020-01-08 11:33:44 +0000282 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000283 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000284 ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2);
285
Giorgio Arenaa855af12018-07-16 17:20:38 +0100286 bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
287 bool is_fc_after_conv = true;
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000288
Giorgio Arena368e6352018-08-20 15:06:07 +0100289 const ITensorInfo &flatten_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_flatten_shape(input)));
Georgios Pinitasef776a82018-07-25 17:57:49 +0100290 const ITensorInfo &reshaped_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*weights)));
Georgios Pinitas195b0ba2018-08-02 17:18:51 +0100291 const ITensorInfo &converted_weights = weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding()) : TensorInfo(*reshaped_weights.clone());
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000292
Giorgio Arenaa855af12018-07-16 17:20:38 +0100293 // With the Fully Connected layer we can have 4 different cases:
294 // 1) Convolution layer -> Fully Connected layer without batches
295 // 2) Fully Connected layer -> Fully Connected layer without batches
296 // 3) Convolution layer -> Fully Connected layer with batches
297 // 4) Fully Connected layer -> Fully Connected layer with batches
298
299 const ITensorInfo *input_to_use = input;
300 const ITensorInfo *weights_to_use = weights;
Giorgio Arenaa855af12018-07-16 17:20:38 +0100301
Giorgio Arenaa855af12018-07-16 17:20:38 +0100302 // Check if we have a fully connected layer with batches
303 const bool is_batched_fc_layer = output->dimension(1) > 1;
304
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000305 if(is_batched_fc_layer)
306 {
Giorgio Arenaa855af12018-07-16 17:20:38 +0100307 is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && (std::equal(input->tensor_shape().cbegin() + 3,
308 input->tensor_shape().cend(),
309 output->tensor_shape().cbegin() + 1));
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000310 }
311 else
312 {
Giorgio Arenaa855af12018-07-16 17:20:38 +0100313 is_fc_after_conv = input->num_dimensions() > 1;
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000314 }
315
Georgios Pinitasef776a82018-07-25 17:57:49 +0100316 if(!weights_reshaped)
317 {
318 // Validate reshape weights kernel
319 ARM_COMPUTE_RETURN_ON_ERROR(NEFullyConnectedLayerReshapeWeights::validate(weights, &reshaped_weights));
320 weights_to_use = &reshaped_weights;
321 }
322
323 if(is_fc_after_conv && (input->data_layout() != fc_info.weights_trained_layout))
324 {
325 // Validate convert weights kernel
326 ARM_COMPUTE_RETURN_ON_ERROR(NEConvertFullyConnectedWeights::validate(weights_to_use,
327 &converted_weights,
328 input->tensor_shape(),
329 fc_info.weights_trained_layout));
330 weights_to_use = &converted_weights;
331 }
332
Giorgio Arenaa855af12018-07-16 17:20:38 +0100333 if(is_fc_after_conv)
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000334 {
Giorgio Arenaa855af12018-07-16 17:20:38 +0100335 // Fully Connected layer after a Convolution Layer without batches
336 ARM_COMPUTE_RETURN_ERROR_ON((weights_to_use->dimension(1) != (input->dimension(0) * input->dimension(1) * input->dimension(2))));
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000337
Giorgio Arena368e6352018-08-20 15:06:07 +0100338 // Validate flatten kernel
339 ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayerKernel::validate(input, &flatten_input));
340 input_to_use = &flatten_input;
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000341 }
Giorgio Arenaa855af12018-07-16 17:20:38 +0100342 else
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000343 {
Giorgio Arenaa855af12018-07-16 17:20:38 +0100344 // Fully Connected layer after a Fully Connected Layer without batches
345 ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1));
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000346 }
Giorgio Arenaa855af12018-07-16 17:20:38 +0100347 // Validate matrix multiply kernel
SiCong Liadb32912020-02-17 16:39:27 +0000348 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(input_to_use, weights_to_use, biases, output));
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000349
350 return Status{};
351}
352
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100353void NEFullyConnectedLayer::run()
354{
Georgios Pinitas72219332018-06-05 14:56:06 +0100355 prepare();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100356
Georgios Pinitasda953f22019-04-02 17:27:03 +0100357 MemoryGroupResourceScope scope_mg(_memory_group);
Georgios Pinitasbaf174e2017-09-08 19:47:30 +0100358
Moritz Pflanzer484e7b32017-08-09 11:43:18 +0100359 // Linearize input if it comes from a convolutional layer
Giorgio Arenaa855af12018-07-16 17:20:38 +0100360 if(_is_fc_after_conv)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100361 {
Giorgio Arena368e6352018-08-20 15:06:07 +0100362 NEScheduler::get().schedule(&_flatten_kernel, Window::DimY);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100363 }
364
Giorgio Arenaa855af12018-07-16 17:20:38 +0100365 // Run matrix multiply
366 if(_is_quantized)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100367 {
Giorgio Arenaa855af12018-07-16 17:20:38 +0100368 _mm_gemmlowp.run();
369 }
370 else
371 {
372 _mm_gemm.run();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100373 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100374}
Georgios Pinitas72219332018-06-05 14:56:06 +0100375
376void NEFullyConnectedLayer::prepare()
377{
Georgios Pinitas72219332018-06-05 14:56:06 +0100378 if(!_is_prepared)
379 {
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100380 if(!_weights_manager)
381 {
382 ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
383 }
Georgios Pinitasef776a82018-07-25 17:57:49 +0100384
385 auto release_unused = [](Tensor * w)
386 {
387 if(!w->is_used())
388 {
389 w->allocator()->free();
390 }
391 };
392
393 // Pointer to current weights
394 const ITensor *cur_weights = _original_weights;
395
Giorgio Arenaa855af12018-07-16 17:20:38 +0100396 // Reshape of the weights (happens only once)
397 if(!_are_weights_reshaped)
398 {
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100399 if(_weights_manager && _weights_manager->are_weights_managed(_original_weights))
400 {
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100401 cur_weights = _weights_manager->run(cur_weights, &_reshape_weights_managed_function);
402 }
403 else
404 {
405 // Reshape of the weights (happens only once)
406 if(!_are_weights_reshaped)
407 {
408 // Run reshape weights kernel and mark weights as unused
409 _reshape_weights_output.allocator()->allocate();
410 _reshape_weights_function.run();
411 }
412 cur_weights->mark_as_unused();
413 cur_weights = &_reshape_weights_output;
414 }
Giorgio Arenaa855af12018-07-16 17:20:38 +0100415 _are_weights_reshaped = true;
416 }
Georgios Pinitas72219332018-06-05 14:56:06 +0100417
Georgios Pinitasef776a82018-07-25 17:57:49 +0100418 // Convert weights if needed (happens only once)
419 if(!_are_weights_converted)
420 {
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100421 if(_weights_manager && _weights_manager->are_weights_managed(cur_weights))
422 {
423 _weights_manager->run(cur_weights, &_convert_weights_managed);
424 }
425 else
426 {
427 _converted_weights_output.allocator()->allocate();
428 _convert_weights.run();
Michalis Spyrou20c2b502019-10-01 15:39:42 +0100429 cur_weights->mark_as_unused();
Michalis Spyrou1a569a32019-09-10 17:20:34 +0100430 }
Georgios Pinitasef776a82018-07-25 17:57:49 +0100431
Georgios Pinitasef776a82018-07-25 17:57:49 +0100432 _are_weights_converted = true;
433 }
434
435 // Release reshaped weights if unused
436 release_unused(&_reshape_weights_output);
437
438 // Prepare GEMM prepare and release unused weights
439 if(!_is_quantized)
440 {
441 _mm_gemm.prepare();
442 }
443
444 // Release converted weights if unused
445 release_unused(&_reshape_weights_output);
446 release_unused(&_converted_weights_output);
447
Georgios Pinitas72219332018-06-05 14:56:06 +0100448 _is_prepared = true;
449 }
Michele Di Giorgiof29d1b72019-10-29 10:58:13 +0000450}
451} // namespace arm_compute