blob: 85a0b0311bca77b8eed6efd2427146b81ebff450 [file] [log] [blame]
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +01001/*
Jonathan Deakin464ed202023-01-12 11:41:14 +00002 * Copyright (c) 2021-2023 Arm Limited.
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Georgios Pinitas7891a732021-08-20 21:39:25 +010024#include "src/cpu/operators/CpuFullyConnected.h"
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +010025
26#include "arm_compute/core/Helpers.h"
27#include "arm_compute/core/ITensorPack.h"
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +010028#include "arm_compute/core/utils/misc/ShapeCalculator.h"
29#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010030#include "arm_compute/core/Validate.h"
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +010031#include "arm_compute/runtime/NEON/NEScheduler.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010032
ramelg013ae3d882021-09-12 23:07:47 +010033#include "src/common/utils/Log.h"
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +010034#include "src/core/helpers/AutoConfiguration.h"
35#include "src/core/helpers/MemoryHelpers.h"
Viet-Hoa Doa62129a2023-04-26 15:38:45 +010036#include "src/core/utils/quantization/AsymmHelpers.h"
Georgios Pinitas7891a732021-08-20 21:39:25 +010037#include "src/cpu/kernels/CpuTransposeKernel.h"
38#include "src/cpu/operators/CpuConvertFullyConnectedWeights.h"
39#include "src/cpu/operators/CpuFlatten.h"
40#include "src/cpu/operators/CpuGemm.h"
41#include "src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.h"
42#include "src/cpu/utils/CpuAuxTensorHandler.h"
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +010043
44namespace arm_compute
45{
46namespace cpu
47{
48using namespace arm_compute::experimental;
49using namespace arm_compute::misc::shape_calculator;
50
51namespace
52{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010053Status get_gemmlowp_output_stage_info(const ITensorInfo *src,
54 const ITensorInfo *weights,
55 const ITensorInfo *dst,
56 const ActivationLayerInfo &act,
57 GEMMLowpOutputStageInfo &gemmlowp_output_stage_info)
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +010058{
59 const auto data_type = src->data_type();
60 const QuantizationInfo oq_info = dst->quantization_info();
61 const UniformQuantizationInfo iq_unif = src->quantization_info().uniform();
62 const UniformQuantizationInfo wq_unif = weights->quantization_info().uniform();
63 const UniformQuantizationInfo oq_unif = oq_info.uniform();
64
65 float multiplier = (iq_unif.scale * wq_unif.scale) / oq_unif.scale;
66 int32_t output_multiplier;
67 int32_t output_shift;
68
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010069 ARM_COMPUTE_RETURN_ON_ERROR(
70 quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift));
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +010071
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010072 int32_t type_min = 0;
73 int32_t type_max = 0;
Viet-Hoa Do9c7c2d22023-04-11 17:16:27 +010074 std::tie(type_min, type_max) = quantization::get_quantized_asymmetric_output_min_max(oq_info, act, data_type);
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +010075
76 gemmlowp_output_stage_info.gemmlowp_multiplier = output_multiplier;
77 gemmlowp_output_stage_info.gemmlowp_shift = output_shift;
78 gemmlowp_output_stage_info.gemmlowp_offset = oq_unif.offset;
79 gemmlowp_output_stage_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
Viet-Hoa Doa62129a2023-04-26 15:38:45 +010080 gemmlowp_output_stage_info.gemmlowp_min_bound = type_min;
81 gemmlowp_output_stage_info.gemmlowp_max_bound = type_max;
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +010082
83 return Status{};
84}
85
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010086Status validate_mm(const ITensorInfo *src,
87 const ITensorInfo *weights,
88 const ITensorInfo *biases,
89 const ITensorInfo *dst,
90 const ActivationLayerInfo &act,
91 bool enable_fast_math,
92 WeightFormat weight_format)
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +010093{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010094 if (is_data_type_quantized_asymmetric(src->data_type()))
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +010095 {
96 // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
97 // Extract and negate src and weights offset
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010098 const QuantizationInfo src_quantization_info(src->quantization_info().uniform().scale,
99 -src->quantization_info().uniform().offset);
100 const QuantizationInfo weights_quantization_info(weights->quantization_info().uniform().scale,
101 -weights->quantization_info().uniform().offset);
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100102
103 GEMMLowpOutputStageInfo gemmlowp_output_stage_info;
104 ARM_COMPUTE_RETURN_ON_ERROR(get_gemmlowp_output_stage_info(src, weights, dst, act, gemmlowp_output_stage_info));
105
106 GEMMInfo gemm_info;
107 gemm_info.set_gemmlowp_output_stage(gemmlowp_output_stage_info);
cfRodf2c022e2021-11-05 11:29:53 +0000108 gemm_info.set_fast_math(enable_fast_math);
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100109
110 // Validate gemmlowp function
111 TensorInfo src_info = src->clone()->set_quantization_info(src_quantization_info);
112 TensorInfo weights_info = weights->clone()->set_quantization_info(weights_quantization_info);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100113 ARM_COMPUTE_RETURN_ON_ERROR(
114 CpuGemmLowpMatrixMultiplyCore::validate(&src_info, &weights_info, biases, dst, gemm_info));
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100115 }
116 else
117 {
Viet-Hoa Do9b0a6b42023-04-03 16:27:25 +0100118 GEMMInfo gemm_info;
Jonathan Deakin464ed202023-01-12 11:41:14 +0000119 gemm_info.set_weight_format(weight_format);
120 gemm_info.set_fixed_format(weight_format != WeightFormat::UNSPECIFIED);
cfRodf2c022e2021-11-05 11:29:53 +0000121 gemm_info.set_fast_math(enable_fast_math);
122 ARM_COMPUTE_RETURN_ON_ERROR(CpuGemm::validate(src, weights, biases, dst, 1.f, 1.0f, gemm_info));
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100123 }
124
125 return Status{};
126}
127} // namespace
128
129CpuFullyConnected::CpuFullyConnected()
130 : _flatten(nullptr),
131 _convert_weights(nullptr),
132 _transpose_weights(nullptr),
133 _mm_gemm(nullptr),
134 _mm_gemmlowp(nullptr),
135 _flattened_src(),
136 _converted_weights(),
137 _reshaped_weights(),
Georgios Pinitasfa1db172021-08-12 06:28:09 +0100138 _trans_weights(),
139 _trans_weights_idx(AuxTensorIdx::Count),
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100140 _aux_mem(Count),
Georgios Pinitasfa1db172021-08-12 06:28:09 +0100141 _needs_weights_conversion(false),
142 _needs_weights_reshape(false),
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100143 _is_fc_after_conv(false),
144 _is_quantized_asymmetric(false),
cfRodf2c022e2021-11-05 11:29:53 +0000145 _is_prepared(false),
Milos Puzovic13b623e2022-07-27 17:53:21 +0000146 _enable_fast_math(false),
147 _fixed_format(false),
Viet-Hoa Doa3e57c22023-03-13 16:20:04 +0000148 _weight_format(arm_compute::WeightFormat::UNSPECIFIED),
149 _dynamic_weights(false)
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100150{
151}
152
153CpuFullyConnected::~CpuFullyConnected() = default;
154
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100155void CpuFullyConnected::configure_mm(const ITensorInfo *src,
156 const ITensorInfo *weights,
157 const ITensorInfo *biases,
158 ITensorInfo *dst,
159 const ActivationLayerInfo &act)
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100160{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100161 if (_is_quantized_asymmetric)
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100162 {
163 // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
164 // Extract and negate src and weights offset
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100165 const QuantizationInfo src_quantization_info(src->quantization_info().uniform().scale,
166 -src->quantization_info().uniform().offset);
167 const QuantizationInfo weights_quantization_info(weights->quantization_info().uniform().scale,
168 -weights->quantization_info().uniform().offset);
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100169
170 TensorInfo src_info = src->clone()->set_quantization_info(src_quantization_info);
171 TensorInfo weights_info = weights->clone()->set_quantization_info(weights_quantization_info);
172
173 // Configure gemmlowp function and output stage for asymmetric quantized types
174 GEMMLowpOutputStageInfo gemmlowp_output_stage_info;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100175 const Status status =
176 get_gemmlowp_output_stage_info(&src_info, &weights_info, dst, act, gemmlowp_output_stage_info);
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100177 ARM_COMPUTE_ERROR_ON(status.error_code() != ErrorCode::OK);
178
Viet-Hoa Do9b0a6b42023-04-03 16:27:25 +0100179 GEMMInfo gemm_info;
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100180 gemm_info.set_gemmlowp_output_stage(gemmlowp_output_stage_info);
181 gemm_info.set_activation_info(act);
cfRodf2c022e2021-11-05 11:29:53 +0000182 gemm_info.set_fast_math(_enable_fast_math);
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100183 _mm_gemmlowp = std::make_unique<CpuGemmLowpMatrixMultiplyCore>();
184 _mm_gemmlowp->configure(&src_info, &weights_info, biases, dst, gemm_info);
185 }
186 else
187 {
188 // Configure matrix multiply kernel
Viet-Hoa Do9b0a6b42023-04-03 16:27:25 +0100189 GEMMInfo gemm_info;
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100190 gemm_info.set_activation_info(act);
cfRodf2c022e2021-11-05 11:29:53 +0000191 gemm_info.set_fast_math(_enable_fast_math);
Milos Puzovic13b623e2022-07-27 17:53:21 +0000192 gemm_info.set_fixed_format(_fixed_format);
193 gemm_info.set_weight_format(_weight_format);
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100194 _mm_gemm = std::make_unique<CpuGemm>();
195 _mm_gemm->configure(src, weights, biases, dst, 1.f, 1.0f, gemm_info);
196 }
197}
198
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100199void CpuFullyConnected::configure_conv_fc(const ITensorInfo *src,
200 const ITensorInfo *weights,
201 const ITensorInfo *biases,
202 ITensorInfo *dst,
203 const ActivationLayerInfo &act)
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100204{
205 ARM_COMPUTE_ERROR_ON((weights->dimension(1) != (src->dimension(0) * src->dimension(1) * src->dimension(2))));
206
207 // If the fully connected layer is called after a convolution layer, the src tensor must be linearized
208
209 // Initialize output tensor for flatten
210 auto_init_if_empty(_flattened_src, src->clone()->set_tensor_shape(compute_flatten_shape(src)));
211
212 _flatten = std::make_unique<CpuFlatten>();
213 _flatten->configure(src, &_flattened_src);
214
215 // Configure matrix multiply kernel
216 configure_mm(&_flattened_src, weights, biases, dst, act);
217}
218
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100219void CpuFullyConnected::configure_fc_fc(const ITensorInfo *src,
220 const ITensorInfo *weights,
221 const ITensorInfo *biases,
222 ITensorInfo *dst,
223 const ActivationLayerInfo &act)
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100224{
225 ARM_COMPUTE_ERROR_ON(src->dimension(0) != weights->dimension(1));
226
227 // Configure matrix multiply kernel
228 configure_mm(src, weights, biases, dst, act);
229}
230
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100231void CpuFullyConnected::configure(const ITensorInfo *src,
232 const ITensorInfo *weights,
233 const ITensorInfo *biases,
234 ITensorInfo *dst,
235 FullyConnectedLayerInfo fc_info,
236 const WeightsInfo &weights_info)
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100237{
238 // Perform validate step
239 ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100240 ARM_COMPUTE_ERROR_THROW_ON(
241 CpuFullyConnected::validate(src, weights, biases != nullptr ? biases : nullptr, dst, fc_info, weights_info));
ramelg013ae3d882021-09-12 23:07:47 +0100242 ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, fc_info);
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100243
Georgios Pinitasfa1db172021-08-12 06:28:09 +0100244 _needs_weights_conversion = false;
245 _needs_weights_reshape = fc_info.transpose_weights ? !fc_info.are_weights_reshaped : false;
246 _needs_weights_reshape = _needs_weights_reshape && !fc_info.retain_internal_weights;
247 _is_fc_after_conv = true;
248 _is_quantized_asymmetric = is_data_type_quantized_asymmetric(src->data_type());
249 _is_prepared = false;
250 _trans_weights_idx = AuxTensorIdx::Count;
cfRodf2c022e2021-11-05 11:29:53 +0000251 _enable_fast_math = fc_info.enable_fast_math;
Milos Puzovic13b623e2022-07-27 17:53:21 +0000252 _fixed_format = weights_info.weight_format() != WeightFormat::UNSPECIFIED;
253 _weight_format = weights_info.weight_format();
Viet-Hoa Doa3e57c22023-03-13 16:20:04 +0000254 _dynamic_weights = !weights->are_values_constant() && _needs_weights_reshape;
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100255
256 // With the Fully Connected layer we can have 4 different cases:
257 // 1) Convolution layer -> Fully Connected layer without batches
258 // 2) Fully Connected layer -> Fully Connected layer without batches
259 // 3) Convolution layer -> Fully Connected layer with batches
260 // 4) Fully Connected layer -> Fully Connected layer with batches
261
262 const ITensorInfo *weights_to_use = weights;
263
264 // Check if we have a fully connected layer with batches
265 const bool is_batched_fc_layer = dst->dimension(1) > 1;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100266 if (is_batched_fc_layer)
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100267 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100268 _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
269 (std::equal(src->tensor_shape().cbegin() + 3, src->tensor_shape().cend(),
270 dst->tensor_shape().cbegin() + 1));
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100271 }
272 else
273 {
274 _is_fc_after_conv = src->num_dimensions() > 1;
275 }
276
277 // Reshape weights if needed
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100278 if (_needs_weights_reshape)
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100279 {
280 // Reshape the weights
281 _transpose_weights = std::make_unique<kernels::CpuTransposeKernel>();
282 _transpose_weights->configure(weights, &_reshaped_weights);
Viet-Hoa Do9b0a6b42023-04-03 16:27:25 +0100283 _reshaped_weights.set_are_values_constant(weights->are_values_constant());
284
Georgios Pinitasfa1db172021-08-12 06:28:09 +0100285 weights_to_use = &_reshaped_weights;
286 _trans_weights_idx = AuxTensorIdx::TransposedWeights;
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100287 }
288
289 // Convert weights if needed
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100290 if (_is_fc_after_conv && (src->data_layout() != fc_info.weights_trained_layout))
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100291 {
292 // Convert weights
293 _convert_weights = std::make_unique<CpuConvertFullyConnectedWeights>();
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100294 _convert_weights->configure(weights_to_use, &_converted_weights, src->tensor_shape(),
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100295 fc_info.weights_trained_layout);
Viet-Hoa Do9b0a6b42023-04-03 16:27:25 +0100296 _converted_weights.set_are_values_constant(weights_to_use->are_values_constant());
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100297
Georgios Pinitasfa1db172021-08-12 06:28:09 +0100298 weights_to_use = &_converted_weights;
299 _needs_weights_conversion = true;
300 _trans_weights_idx = AuxTensorIdx::ConvertedWeights;
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100301 }
302
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100303 if (_is_fc_after_conv)
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100304 {
305 // Fully Connected layer after a Convolution Layer without batches
306 configure_conv_fc(src, weights_to_use, biases, dst, fc_info.activation_info);
307 }
308 else
309 {
310 // Fully Connected layer after a Fully Connected Layer without batches
311 configure_fc_fc(src, weights_to_use, biases, dst, fc_info.activation_info);
312 }
313
Georgios Pinitasfa1db172021-08-12 06:28:09 +0100314 // Retain the tensorinfo with the weights to use
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100315 if (_needs_weights_reshape || _needs_weights_conversion)
Georgios Pinitasfa1db172021-08-12 06:28:09 +0100316 {
317 _trans_weights = *weights_to_use;
318 }
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100319
320 // Set auxiliary memory requirements
321 auto gemm_mem_req = (_is_quantized_asymmetric) ? _mm_gemmlowp->workspace() : _mm_gemm->workspace();
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100322 for (unsigned int i = 0; i < gemm_mem_req.size(); ++i)
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100323 {
324 _aux_mem[i] = gemm_mem_req[i];
325 }
326
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100327 if (_aux_mem[Pretranspose].size > 0)
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100328 {
Giorgio Arena63e0beb2021-09-24 14:04:27 +0100329 // Release permuted weights at the end of prepare as they are further transposed by the assembly dispatch
330 // Do not release them if biases are dynamic and data type is quantized, since the weights tensor will be used for biases offset calculation
Viet-Hoa Doa3e57c22023-03-13 16:20:04 +0000331 // Keep all the auxiliary tensors in case of dynamic weights as they are recalculated every time.
332 _aux_mem[TransposedWeights] = MemoryInfo(
333 offset_int_vec(TransposedWeights),
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100334 _dynamic_weights ? MemoryLifetime::Temporary
335 : (_is_quantized_asymmetric && biases && !(biases->are_values_constant())) ? MemoryLifetime::Persistent
336 : MemoryLifetime::Prepare,
Viet-Hoa Doa3e57c22023-03-13 16:20:04 +0000337 _reshaped_weights.total_size());
338
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100339 _aux_mem[ConvertedWeights] = MemoryInfo(offset_int_vec(ConvertedWeights),
340 _dynamic_weights ? MemoryLifetime::Temporary : MemoryLifetime::Prepare,
341 _converted_weights.total_size());
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100342 }
343 else
344 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100345 _aux_mem[TransposedWeights] = MemoryInfo(offset_int_vec(TransposedWeights),
346 _dynamic_weights ? MemoryLifetime::Temporary
347 : _needs_weights_conversion ? MemoryLifetime::Prepare
348 : MemoryLifetime::Persistent,
349 _reshaped_weights.total_size());
Viet-Hoa Doa3e57c22023-03-13 16:20:04 +0000350
351 _aux_mem[ConvertedWeights] = MemoryInfo(
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100352 offset_int_vec(ConvertedWeights), _dynamic_weights ? MemoryLifetime::Temporary : MemoryLifetime::Persistent,
Viet-Hoa Doa3e57c22023-03-13 16:20:04 +0000353 _converted_weights.total_size());
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100354 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100355 _aux_mem[FlattenedSrc] =
356 MemoryInfo(offset_int_vec(FlattenedSrc), MemoryLifetime::Temporary, _flattened_src.total_size());
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100357}
358
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100359Status CpuFullyConnected::has_opt_impl(arm_compute::WeightFormat &expected_weight_format,
360 const ITensorInfo *src,
361 const ITensorInfo *weights,
362 const ITensorInfo *biases,
363 const ITensorInfo *dst,
364 FullyConnectedLayerInfo fc_info,
365 WeightsInfo weights_info)
Milos Puzovic13b623e2022-07-27 17:53:21 +0000366{
Viet-Hoa Do9b0a6b42023-04-03 16:27:25 +0100367 GEMMInfo gemm_info;
Milos Puzovic13b623e2022-07-27 17:53:21 +0000368 gemm_info.set_activation_info(fc_info.activation_info);
369 gemm_info.set_fast_math(fc_info.enable_fast_math);
370 gemm_info.set_fixed_format(weights_info.weight_format() != WeightFormat::UNSPECIFIED);
371 gemm_info.set_weight_format(weights_info.weight_format());
372
373 return CpuGemm::has_opt_impl(expected_weight_format, src, weights, biases, dst, gemm_info);
374}
375
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100376Status CpuFullyConnected::validate(const ITensorInfo *src,
377 const ITensorInfo *weights,
378 const ITensorInfo *biases,
379 const ITensorInfo *dst,
380 FullyConnectedLayerInfo fc_info,
381 const WeightsInfo &weights_info)
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100382{
383 ARM_COMPUTE_UNUSED(fc_info.retain_internal_weights);
384 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100385 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
386 DataType::F16, DataType::F32);
Jonathan Deakin464ed202023-01-12 11:41:14 +0000387
388 if (is_fixed_format_fast_math(weights_info.weight_format()))
389 {
390 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(src, DataType::F32);
391 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(weights, DataType::BFLOAT16);
392 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(dst, DataType::F32);
393 }
394 else
395 {
396 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, weights, dst);
397 }
398
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100399 ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100400 ARM_COMPUTE_RETURN_ERROR_ON(
401 fc_info.activation_info.enabled() && is_data_type_quantized(src->data_type()) &&
402 fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::RELU &&
403 fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU &&
404 fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU);
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100405
406 bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
407 bool is_fc_after_conv = true;
408
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100409 const ITensorInfo &flatten_src =
410 TensorInfo(src->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_flatten_shape(src)));
411 const ITensorInfo &reshaped_weights = TensorInfo(
412 weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*weights)));
413 const ITensorInfo &converted_weights = weights_reshaped
414 ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding())
415 : TensorInfo(*reshaped_weights.clone());
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100416
417 // With the Fully Connected layer we can have 4 different cases:
418 // 1) Convolution layer -> Fully Connected layer without batches
419 // 2) Fully Connected layer -> Fully Connected layer without batches
420 // 3) Convolution layer -> Fully Connected layer with batches
421 // 4) Fully Connected layer -> Fully Connected layer with batches
422
423 const ITensorInfo *src_to_use = src;
424 const ITensorInfo *weights_to_use = weights;
425
426 // Check if we have a fully connected layer with batches
427 const bool is_batched_fc_layer = dst->dimension(1) > 1;
428
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100429 if (biases != nullptr)
Giorgio Arena63e0beb2021-09-24 14:04:27 +0100430 {
431 ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100432 if (is_data_type_quantized(src->data_type()))
Giorgio Arena63e0beb2021-09-24 14:04:27 +0100433 {
434 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32);
435 }
436 else
437 {
438 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, biases);
439 }
440 }
441
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100442 if (is_batched_fc_layer)
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100443 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100444 is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
445 (std::equal(src->tensor_shape().cbegin() + 3, src->tensor_shape().cend(),
446 dst->tensor_shape().cbegin() + 1));
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100447 }
448 else
449 {
450 is_fc_after_conv = src->num_dimensions() > 1;
451 }
452
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100453 if (!weights_reshaped)
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100454 {
455 // Validate reshape weights kernel
456 ARM_COMPUTE_RETURN_ON_ERROR(kernels::CpuTransposeKernel::validate(weights, &reshaped_weights));
457 weights_to_use = &reshaped_weights;
458 }
459
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100460 if (is_fc_after_conv && (src->data_layout() != fc_info.weights_trained_layout))
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100461 {
462 // Validate convert weights kernel
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100463 ARM_COMPUTE_RETURN_ON_ERROR(CpuConvertFullyConnectedWeights::validate(
464 weights_to_use, &converted_weights, src->tensor_shape(), fc_info.weights_trained_layout));
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100465 weights_to_use = &converted_weights;
466 }
467
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100468 if (is_fc_after_conv)
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100469 {
470 // Fully Connected layer after a Convolution Layer without batches
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100471 ARM_COMPUTE_RETURN_ERROR_ON(
472 (weights_to_use->dimension(1) != (src->dimension(0) * src->dimension(1) * src->dimension(2))));
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100473
474 // Validate flatten kernel
475 ARM_COMPUTE_RETURN_ON_ERROR(CpuFlatten::validate(src, &flatten_src));
476 src_to_use = &flatten_src;
477 }
478 else
479 {
480 // Fully Connected layer after a Fully Connected Layer without batches
481 ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(0) != weights_to_use->dimension(1));
482 }
483 // Validate matrix multiply kernel
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100484 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(src_to_use, weights_to_use, biases, dst, fc_info.activation_info,
485 fc_info.enable_fast_math, weights_info.weight_format()));
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100486
487 return Status{};
488}
489
490void CpuFullyConnected::run(ITensorPack &tensors)
491{
492 prepare(tensors);
493
Viet-Hoa Doa3e57c22023-03-13 16:20:04 +0000494#ifdef ARM_COMPUTE_ASSERTS_ENABLED
495 ++_asrt_run_count;
496 ARM_COMPUTE_ERROR_ON(_dynamic_weights && _asrt_prepare_count != _asrt_run_count);
497#endif // ARM_COMPUTE_ASSERTS_ENABLED
498
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100499 auto src = tensors.get_const_tensor(ACL_SRC_0);
500
501 CpuAuxTensorHandler flattened_src(offset_int_vec(FlattenedSrc), _flattened_src, tensors, false);
Georgios Pinitasfa1db172021-08-12 06:28:09 +0100502 CpuAuxTensorHandler transformed_wei(offset_int_vec(_trans_weights_idx), _trans_weights, tensors, false);
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100503
504 // Linearize src if it comes from a convolutional layer
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100505 if (_is_fc_after_conv)
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100506 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100507 ITensorPack flatten_pack{{ACL_SRC, src}, {ACL_DST, flattened_src.get()}};
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100508 _flatten->run(flatten_pack);
509 }
510
511 ITensorPack gemm_pack = tensors;
512 gemm_pack.add_const_tensor(ACL_SRC_0, (_is_fc_after_conv) ? flattened_src.get() : src);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100513 if (_needs_weights_reshape || _needs_weights_conversion)
Georgios Pinitasfa1db172021-08-12 06:28:09 +0100514 {
515 gemm_pack.add_const_tensor(ACL_SRC_1, transformed_wei.get());
516 }
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100517
518 // Run matrix multiply
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100519 if (_is_quantized_asymmetric)
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100520 {
521 _mm_gemmlowp->run(gemm_pack);
522 }
523 else
524 {
525 _mm_gemm->run(gemm_pack);
526 }
527}
528
529void CpuFullyConnected::prepare(ITensorPack &tensors)
530{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100531 if (!_is_prepared || _dynamic_weights)
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100532 {
Viet-Hoa Doa3e57c22023-03-13 16:20:04 +0000533#ifdef ARM_COMPUTE_ASSERTS_ENABLED
534 ++_asrt_prepare_count;
535 ARM_COMPUTE_ERROR_ON(!_dynamic_weights && _asrt_prepare_count > 1);
536#endif // ARM_COMPUTE_ASSERTS_ENABLED
537
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100538 auto weights = tensors.get_const_tensor(ACL_SRC_1);
539
540 CpuAuxTensorHandler reshaped_weights(offset_int_vec(TransposedWeights), _reshaped_weights, tensors, false);
541 CpuAuxTensorHandler converted_weights(offset_int_vec(ConvertedWeights), _converted_weights, tensors, false);
542
543 // Pointer to current weights
544 const ITensor *cur_weights = weights;
545
546 // Reshape of the weights (happens only once)
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100547 if (_needs_weights_reshape)
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100548 {
549 // Run reshape weights kernel and mark weights as unused
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100550 ITensorPack transpose_pack{{ACL_SRC, weights}, {ACL_DST, reshaped_weights.get()}};
551 NEScheduler::get().schedule_op(_transpose_weights.get(), Window::DimY, _transpose_weights->window(),
552 transpose_pack);
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100553
554 cur_weights->mark_as_unused();
555 cur_weights = reshaped_weights.get();
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100556 }
557
558 // Convert weights if needed (happens only once)
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100559 if (_needs_weights_conversion)
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100560 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100561 ITensorPack convert_pack{{ACL_SRC, cur_weights}, {ACL_DST, converted_weights.get()}};
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100562 _convert_weights->run(convert_pack);
563
564 cur_weights->mark_as_unused();
565 cur_weights = converted_weights.get();
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100566 }
567
Georgios Pinitasfa1db172021-08-12 06:28:09 +0100568 ITensorPack gemm_pack = tensors;
569 gemm_pack.add_const_tensor(ACL_SRC_1, cur_weights);
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100570
571 // Prepare GEMM prepare and release unused weights
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100572 if (!_is_quantized_asymmetric)
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100573 {
Georgios Pinitasfa1db172021-08-12 06:28:09 +0100574 _mm_gemm->prepare(gemm_pack);
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100575 }
576 else
577 {
Georgios Pinitasfa1db172021-08-12 06:28:09 +0100578 _mm_gemmlowp->prepare(gemm_pack);
Michele Di Giorgiod9cdf142021-07-02 15:17:08 +0100579 }
580
581 _is_prepared = true;
582 }
583}
584
585experimental::MemoryRequirements CpuFullyConnected::workspace() const
586{
587 return _aux_mem;
588}
589} // namespace cpu
590} // namespace arm_compute