blob: 6969ac8ab34181e5cd598ffc4d1444cf006b7d76 [file] [log] [blame]
Georgios Pinitas529b5a22021-07-27 15:55:30 +01001/*
Jakub Sujak617ed502023-03-29 11:16:18 +01002 * Copyright (c) 2017-2021, 2023 Arm Limited.
Georgios Pinitas529b5a22021-07-27 15:55:30 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Georgios Pinitas7891a732021-08-20 21:39:25 +010024#include "src/gpu/cl/operators/ClFullyConnected.h"
Georgios Pinitas529b5a22021-07-27 15:55:30 +010025
26#include "arm_compute/core/Size2D.h"
Georgios Pinitas529b5a22021-07-27 15:55:30 +010027#include "arm_compute/core/utils/misc/ShapeCalculator.h"
28#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010029#include "arm_compute/core/Validate.h"
Georgios Pinitas529b5a22021-07-27 15:55:30 +010030#include "arm_compute/runtime/CL/CLScheduler.h"
Georgios Pinitas529b5a22021-07-27 15:55:30 +010031
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010032#include "src/common/utils/Log.h"
33#include "src/core/CL/kernels/CLFillBorderKernel.h"
Georgios Pinitas529b5a22021-07-27 15:55:30 +010034#include "src/core/helpers/MemoryHelpers.h"
Georgios Pinitas7891a732021-08-20 21:39:25 +010035#include "src/gpu/cl/operators/ClConvertFullyConnectedWeights.h"
36#include "src/gpu/cl/operators/ClFlatten.h"
37#include "src/gpu/cl/operators/ClGemm.h"
38#include "src/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.h"
Mohammed Suhail Munshi2e0714d2023-07-19 14:44:38 +010039#include "src/gpu/cl/operators/ClMatMul.h"
Georgios Pinitas7891a732021-08-20 21:39:25 +010040#include "src/gpu/cl/operators/ClTranspose.h"
41#include "src/gpu/cl/utils/ClAuxTensorHandler.h"
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +010042#include "src/runtime/heuristics/matmul_native/ClMatMulNativeKernelConfig.h"
43#include "src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h"
Georgios Pinitas529b5a22021-07-27 15:55:30 +010044#include "support/Cast.h"
45
46#include <algorithm>
47
48namespace arm_compute
49{
50namespace opencl
51{
52using namespace arm_compute::experimental;
53using namespace arm_compute::misc::shape_calculator;
54
55namespace
56{
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +010057// Function to calculate batched tensor shape in format [M, 1, B0, B1 ..] which is the format matmul expects
58inline TensorShape get_reshaped_matmul_tensor(const TensorShape &src)
59{
60 return TensorShape(src.x(), 1, src.y(), src.collapsed_from(2).z()); // Return value optimisation
61}
62
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010063Status construct_gemmlowp_output_stage(const ITensorInfo &src,
64 const ITensorInfo &weights,
65 const ITensorInfo &dst,
66 GEMMLowpOutputStageInfo &gemmlowp_output_stage,
67 ActivationLayerInfo activation_info)
Georgios Pinitas529b5a22021-07-27 15:55:30 +010068{
69 gemmlowp_output_stage.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
70 gemmlowp_output_stage.gemmlowp_offset = 0;
71 gemmlowp_output_stage.gemmlowp_multiplier = 0;
72 gemmlowp_output_stage.gemmlowp_shift = 0;
73
74 const auto data_type = src.data_type();
75
76 // Configure output stage for quantized case
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010077 if (is_data_type_quantized_asymmetric(data_type))
Georgios Pinitas529b5a22021-07-27 15:55:30 +010078 {
79 const QuantizationInfo oq_info = dst.quantization_info();
80 const UniformQuantizationInfo iq_unif = src.quantization_info().uniform();
81 const UniformQuantizationInfo wq_unif = weights.quantization_info().uniform();
82 const UniformQuantizationInfo oq_unif = oq_info.uniform();
83
84 const auto output_quant_info = (dst.total_size() == 0) ? iq_unif : oq_unif;
85
86 const float multiplier = (iq_unif.scale * wq_unif.scale) / output_quant_info.scale;
87 int output_multiplier = 0;
88 int output_shift = 0;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010089 ARM_COMPUTE_RETURN_ON_ERROR(
90 quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift));
Georgios Pinitas529b5a22021-07-27 15:55:30 +010091
92 PixelValue type_min{};
93 PixelValue type_max{};
94 std::tie(type_min, type_max) = get_min_max(data_type);
95
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010096 if (activation_info.enabled())
Georgios Pinitas529b5a22021-07-27 15:55:30 +010097 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010098 std::tie(type_min, type_max) =
99 get_quantized_activation_min_max(activation_info, data_type, output_quant_info);
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100100 }
101
102 // Set the GEMMLowp output stage info
103 gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
104 gemmlowp_output_stage.gemmlowp_multiplier = output_multiplier;
105 gemmlowp_output_stage.gemmlowp_shift = output_shift;
106 gemmlowp_output_stage.gemmlowp_multipliers.push_back(output_multiplier);
107 gemmlowp_output_stage.gemmlowp_shifts.push_back(output_shift);
108 type_min.get(gemmlowp_output_stage.gemmlowp_min_bound);
109 type_max.get(gemmlowp_output_stage.gemmlowp_max_bound);
110 }
111
112 return Status{};
113}
114
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100115Status validate_mm(const ITensorInfo &src,
116 const ITensorInfo &weights,
117 const ITensorInfo *bias,
118 const ITensorInfo &dst,
119 const FullyConnectedLayerInfo &fc_info,
120 bool use_matmul)
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100121{
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100122 // Note : If input is dynamic and data is not batched, use matmul, else use gemm
123 const bool transpose_weights = fc_info.transpose_weights ? !fc_info.are_weights_reshaped : false;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100124 const bool use_dynamic_gemm =
125 !use_matmul && !weights.are_values_constant() && transpose_weights; // use dynamic gemm as fallback for matmul
126 const bool is_quantized = is_data_type_quantized_asymmetric(src.data_type());
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100127
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100128 if (use_matmul)
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100129 {
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100130 const MatMulInfo m_info = MatMulInfo().adj_rhs(transpose_weights);
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100131
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100132 // Note: LHS is reshaped here to match ClMatMul expectations of batch index - From [M, B0, B1] to [M, 1, B0, B1]
133 TensorInfo lhs_to_use = src.clone()->set_tensor_shape(get_reshaped_matmul_tensor(src.tensor_shape()));
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100134
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100135 const GPUTarget gpu_target = CLScheduler::get().target();
136 std::unique_ptr<cl_matmul::IClMatMulNativeKernelConfig> t =
137 cl_matmul::ClMatMulNativeKernelConfigurationFactory::create(gpu_target);
138 const MatMulKernelInfo kernel_info = t->configure(&lhs_to_use, &weights, m_info);
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100139
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100140 return is_quantized ? kernels::ClMatMulLowpNativeKernel::validate(&lhs_to_use, &weights, bias, &dst,
141 kernel_info, fc_info.activation_info)
142 : kernels::ClMatMulNativeKernel::validate(&lhs_to_use, &weights, bias, &dst, kernel_info,
143 fc_info.activation_info);
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100144 }
145 else
146 {
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100147 GEMMLowpOutputStageInfo gemmlowp_output_stage;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100148 ARM_COMPUTE_RETURN_ON_ERROR(
149 construct_gemmlowp_output_stage(src, weights, dst, gemmlowp_output_stage, fc_info.activation_info));
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100150
151 const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
152 false, // is_b_reshaped
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100153 !use_dynamic_gemm, // reshape_b_only_on_first_run
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100154 0, // depth_output_gemm3d
155 false, // reinterpret_input_as_3d
156 fc_info.retain_internal_weights, // retain_internal_weights
157 gemmlowp_output_stage, // gemmlowp_output_stage
158 fc_info.fp_mixed_precision, // fp_mixed_precision
159 false, // fast_math
160 true, // broadcast_bias
161 ActivationLayerInfo()); // activation_info
162
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100163 if (is_quantized)
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100164 {
165 const UniformQuantizationInfo iq_info = src.quantization_info().uniform();
166 const UniformQuantizationInfo wq_info = weights.quantization_info().uniform();
167
168 // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
169 // Extract and negate src and weights offset
170 const QuantizationInfo src_quantization_info(iq_info.scale, -iq_info.offset);
171 const QuantizationInfo weights_quantization_info(wq_info.scale, -wq_info.offset);
172
173 // Validate gemmlowp function
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100174 ARM_COMPUTE_RETURN_ON_ERROR(ClGemmLowpMatrixMultiplyCore::validate(
175 &src.clone()->set_quantization_info(src_quantization_info),
176 &weights.clone()->set_quantization_info(weights_quantization_info), bias, &dst, gemm_info));
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100177 }
178 else
179 {
180 ARM_COMPUTE_RETURN_ON_ERROR(ClGemm::validate(&src, &weights, bias, &dst, 1.f, 1.f, gemm_info));
181 }
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100182 }
183
184 return Status{};
185}
186} // namespace
187
188ClFullyConnected::ClFullyConnected()
189 : _convert_weights(nullptr),
190 _flatten(nullptr),
191 _reshape_weights(nullptr),
192 _mm_gemm(nullptr),
193 _mm_gemmlowp(nullptr),
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100194 _matmul_native_kernel(nullptr),
195 _matmul_lowp_native_kernel(nullptr),
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100196 _aux_mem(Count)
197{
198}
199
200ClFullyConnected::~ClFullyConnected() = default;
201
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100202void ClFullyConnected::configure_mm(const CLCompileContext &compile_context,
203 ITensorInfo *src,
204 ITensorInfo *weights,
205 ITensorInfo *bias,
206 ITensorInfo *dst,
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100207 const FullyConnectedLayerInfo &fc_info)
208{
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100209 // If weights are dynamic and matmul is supported use matmul, else use gemm
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100210 if (_use_matmul)
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100211 {
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100212 // Specify whether transpose weights is necessary in matmul info
213 const MatMulInfo mat_info = MatMulInfo().adj_rhs(_transpose_weights);
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100214
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100215 // Note: MatMul does not need offset negation unlike gemm
216 // 1. Change shape when calling matmul to fit batch expectations.
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100217 _lhs_to_use = src->clone()->set_tensor_shape(get_reshaped_matmul_tensor(_lhs_to_use.tensor_shape()));
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100218
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100219 // 2. Use heuristics to get kernel info object
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100220 const GPUTarget gpu_target = CLScheduler::get().target();
221 std::unique_ptr<cl_matmul::IClMatMulNativeKernelConfig> kernel_config =
222 cl_matmul::ClMatMulNativeKernelConfigurationFactory::create(gpu_target);
223 MatMulKernelInfo kernel_info = kernel_config->configure(src, weights, mat_info);
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100224
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100225 // 3. Configure relevant matmul kernel
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100226 if (_is_quantized)
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100227 {
228 _matmul_lowp_native_kernel = std::make_unique<kernels::ClMatMulLowpNativeKernel>();
229 _matmul_lowp_native_kernel->set_target(gpu_target);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100230 _matmul_lowp_native_kernel->configure(compile_context, src, weights, bias, dst, kernel_info,
231 fc_info.activation_info);
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100232 }
233 else
234 {
235 _matmul_native_kernel = std::make_unique<kernels::ClMatMulNativeKernel>();
236 _matmul_native_kernel->set_target(gpu_target);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100237 _matmul_native_kernel->configure(compile_context, src, weights, bias, dst, kernel_info,
238 fc_info.activation_info);
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100239 }
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100240 }
241 else
242 {
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100243 // Configure GEMM
244 GEMMLowpOutputStageInfo gemmlowp_output_stage;
245 construct_gemmlowp_output_stage(*src, *weights, *dst, gemmlowp_output_stage, fc_info.activation_info);
246
247 const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
248 false, // is_b_reshaped
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100249 !_dynamic_gemm, // reshape_b_only_on_first_run
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100250 0, // depth_output_gemm3d
251 false, // reinterpret_input_as_3d
252 fc_info.retain_internal_weights, // retain_internal_weights
253 gemmlowp_output_stage, // gemmlowp_output_stage
254 fc_info.fp_mixed_precision, // fp_mixed_precision
255 false, // fast_math
256 true, // broadcast_bias
257 fc_info.activation_info); // activation_info
258
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100259 if (_is_quantized)
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100260 {
261 // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
262 // Extract and negate input and weights offset
263 const QuantizationInfo src_quantization_info = src->quantization_info();
264 const QuantizationInfo weights_quantization_info = weights->quantization_info();
265
266 TensorInfo src_info = src->clone()->set_quantization_info(src_quantization_info);
267 TensorInfo weights_info = weights->clone()->set_quantization_info(weights_quantization_info);
268
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100269 src_info.set_quantization_info(
270 QuantizationInfo(src_quantization_info.uniform().scale, -src_quantization_info.uniform().offset));
271 weights_info.set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale,
272 -weights_quantization_info.uniform().offset));
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100273
274 // Configure gemmlowp function
275 _mm_gemmlowp = std::make_unique<ClGemmLowpMatrixMultiplyCore>();
276 _mm_gemmlowp->configure(compile_context, &src_info, &weights_info, bias, dst, gemm_info);
277 }
278 else
279 {
280 // Configure matrix multiply kernel
281 _mm_gemm = std::make_unique<ClGemm>();
282 _mm_gemm->configure(compile_context, src, weights, bias, dst, 1.f, 1.f, gemm_info);
283 }
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100284 }
285}
286
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100287void ClFullyConnected::configure_conv_fc(const CLCompileContext &compile_context,
288 ITensorInfo *src,
289 ITensorInfo *weights,
290 ITensorInfo *bias,
291 ITensorInfo *dst,
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100292 const FullyConnectedLayerInfo &fc_info)
293{
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100294 // MatMul fuses transpose operation, so we use the first dimension for comparison where appropriate.
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100295 ARM_COMPUTE_ERROR_ON((weights->dimension((_use_matmul && _transpose_weights) ? 0 : 1) !=
296 (src->dimension(0) * src->dimension(1) * src->dimension(2))));
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100297
298 // If the fully connected layer is called after a convolution layer, the input tensor must be linearized
299
300 // Initialize output tensor for flatten
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100301 _flattened_src = src->clone()
302 ->set_is_resizable(true)
303 .reset_padding()
304 .set_tensor_shape(compute_flatten_shape(src))
305 .set_data_layout(DataLayout::NCHW);
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100306
307 // Configure flatten kernel
308 _flatten = std::make_unique<ClFlatten>();
309 _flatten->configure(compile_context, src, &_flattened_src);
310
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100311 // Note: if flatten has > 1 dimensions after, these dimensions are batch
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100312 // Configure matrix multiply kernel
313 configure_mm(compile_context, &_flattened_src, weights, bias, dst, fc_info);
314}
315
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100316void ClFullyConnected::configure_fc_fc(const CLCompileContext &compile_context,
317 ITensorInfo *src,
318 ITensorInfo *weights,
319 ITensorInfo *bias,
320 ITensorInfo *dst,
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100321 const FullyConnectedLayerInfo &fc_info)
322{
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100323 // MatMul fuses transpose operation, so we use the first dimension for comparison where appropriate.
324 ARM_COMPUTE_ERROR_ON(src->dimension(0) != weights->dimension((_use_matmul && _transpose_weights) ? 0 : 1));
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100325
326 // Configure matrix multiply kernel
327 configure_mm(compile_context, src, weights, bias, dst, fc_info);
328}
329
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100330void ClFullyConnected::configure(const CLCompileContext &compile_context,
331 ITensorInfo *src,
332 ITensorInfo *weights,
333 ITensorInfo *biases,
334 ITensorInfo *dst,
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100335 FullyConnectedLayerInfo fc_info)
336{
337 ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);
ramy.elgammal@arm.comf77b9692023-08-07 17:07:02 +0100338 const GPUTarget gpu_target = get_arch_from_target(CLScheduler::get().target());
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100339
340 // Perform validate step
341 ARM_COMPUTE_ERROR_THROW_ON(ClFullyConnected::validate(src, weights, biases, dst, fc_info));
ramelg012e53f172021-09-22 10:48:25 +0100342 ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, fc_info);
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100343
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100344 _transpose_weights = fc_info.transpose_weights ? !fc_info.are_weights_reshaped : false;
345 _is_fc_after_conv = true;
346 _is_quantized = is_data_type_quantized_asymmetric(src->data_type());
347 _is_prepared = fc_info.retain_internal_weights;
348 _weights_to_use = TensorInfo(*weights);
349 _weights_to_use_idx = ACL_SRC_1;
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100350
351 // When using dynamic weights - use matmul kernels.
Mohammed Suhail Munshi2e0714d2023-07-19 14:44:38 +0100352 // Note: MatMul is not used in the following cases (Gemm is used as fallback) :
353 // 1. When the weights tensor is not dynamic
354 // 2. MatMul does not support broadcasting batch dimension, and therefore is disabled if fc is batched.
355 // 3. When FC is after convolution and src tensor data layout does not match weights trained data layout (weights conversion kernel is required)
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100356 const bool is_batched_fc_layer = dst->dimension(1) > 1;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100357 _use_matmul = gpu_target != GPUTarget::MIDGARD && !weights->are_values_constant() && !is_batched_fc_layer &&
358 !(src->num_dimensions() > 1 && (src->data_layout() != fc_info.weights_trained_layout));
359 _dynamic_gemm = !weights->are_values_constant() && _transpose_weights && !_use_matmul;
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100360
361 // With the Fully Connected layer we can have 4 different cases:
362 // 1) Convolution layer -> Fully Connected layer without batches
363 // 2) Fully Connected layer -> Fully Connected layer without batches
364 // 3) Convolution layer -> Fully Connected layer with batches
365 // 4) Fully Connected layer -> Fully Connected layer with batches
366
367 // Check if we have a fully connected layer with batches
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100368 if (is_batched_fc_layer)
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100369 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100370 _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
371 (std::equal(src->tensor_shape().cbegin() + 3, src->tensor_shape().cend(),
372 dst->tensor_shape().cbegin() + 1));
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100373 }
374 else
375 {
376 _is_fc_after_conv = src->num_dimensions() > 1;
377 }
378
379 ITensorInfo *weights_used = weights;
380
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100381 // Reshape weights if needed - Not needed when matmul is in use as matmul fuses transpose op.
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100382 if (_transpose_weights && !_use_matmul)
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100383 {
384 // Reshape the weights
385 _reshape_weights = std::make_unique<ClTranspose>();
386 _reshape_weights->configure(compile_context, weights, &_reshaped_weights);
387 weights_used = &_reshaped_weights;
388 _weights_to_use_idx = offset_int_vec(TransposedWeights);
389 }
390
391 // Convert weights if needed
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100392 if (_is_fc_after_conv && (src->data_layout() != fc_info.weights_trained_layout))
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100393 {
394 // Convert weights
395 _convert_weights = std::make_unique<ClConvertFullyConnectedWeights>();
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100396 _convert_weights->configure(compile_context, weights_used, &_converted_weights, src->tensor_shape(),
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100397 fc_info.weights_trained_layout);
398
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100399 weights_used = &_converted_weights;
400 _weights_to_use_idx = offset_int_vec(ConvertedWeights);
401 _run_convert_weights = true;
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100402 }
403
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100404 if (_is_fc_after_conv)
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100405 {
406 // Fully Connected layer after a Convolution Layer without batches
407 configure_conv_fc(compile_context, src, weights_used, biases, dst, fc_info);
408 }
409 else
410 {
411 // Fully Connected layer after a Fully Connected Layer without batches
412 configure_fc_fc(compile_context, src, weights_used, biases, dst, fc_info);
413 }
414 // Update TensorInfo of final weights used (Need to be done in the end due to padding expansion)
415 _weights_to_use = *weights_used;
416
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100417 if (_use_matmul)
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100418 {
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100419 // Note : MatMul does not use transpose and does not need auxillary memory, so only converted weights are added to aux_mem
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100420 _aux_mem[ConvertedWeights] =
421 MemoryInfo(offset_int_vec(ConvertedWeights), MemoryLifetime::Temporary, _converted_weights.total_size());
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100422 }
423 else
424 {
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100425 // Set auxiliary memory requirements for gemm operators
426 auto gemm_mem_req = (_is_quantized) ? _mm_gemmlowp->workspace() : _mm_gemm->workspace();
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100427 for (unsigned int i = 0; i < gemm_mem_req.size(); ++i)
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100428 {
429 _aux_mem[i] = gemm_mem_req[i];
430 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100431 if (_aux_mem[1].size > 0 || _aux_mem[2].size > 0) // Persistent weights memory on GEMMs
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100432 {
433 // Release permuted weights at the of prepare as they are further transposed by the assembly dispatch
434 // Keep all the auxiliary tensors in case of dynamic weights as they are recalculated every time
435 _aux_mem[TransposedWeights] = MemoryInfo(
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100436 offset_int_vec(TransposedWeights), _dynamic_gemm ? MemoryLifetime::Temporary : MemoryLifetime::Prepare,
437 _reshaped_weights.total_size());
438 _aux_mem[ConvertedWeights] = MemoryInfo(offset_int_vec(ConvertedWeights),
439 _dynamic_gemm ? MemoryLifetime::Temporary : MemoryLifetime::Prepare,
440 _converted_weights.total_size());
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100441 }
442 else
443 {
444 // Release permuted weights at the of prepare as they are further transposed by the assembly dispatch
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100445 const auto transposed_wei_lft = (_weights_to_use_idx == offset_int_vec(TransposedWeights))
446 ? MemoryLifetime::Persistent
447 : MemoryLifetime::Prepare;
448 const auto converted_wei_lft = (_weights_to_use_idx == offset_int_vec(ConvertedWeights))
449 ? MemoryLifetime::Persistent
450 : MemoryLifetime::Prepare;
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100451
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100452 _aux_mem[TransposedWeights] = MemoryInfo(offset_int_vec(TransposedWeights),
453 _dynamic_gemm ? MemoryLifetime::Temporary : transposed_wei_lft,
454 _reshaped_weights.total_size());
455 _aux_mem[ConvertedWeights] = MemoryInfo(offset_int_vec(ConvertedWeights),
456 _dynamic_gemm ? MemoryLifetime::Temporary : converted_wei_lft,
457 _converted_weights.total_size());
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100458 }
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100459 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100460 _aux_mem[FlattenedSrc] =
461 MemoryInfo(offset_int_vec(FlattenedSrc), MemoryLifetime::Temporary, _flattened_src.total_size());
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100462}
463
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100464Status ClFullyConnected::validate(const ITensorInfo *src,
465 const ITensorInfo *weights,
466 const ITensorInfo *biases,
467 const ITensorInfo *dst,
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100468 FullyConnectedLayerInfo fc_info)
469{
470 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100471 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
472 DataType::F16, DataType::F32);
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100473 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, weights, dst);
474 ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100475 ARM_COMPUTE_RETURN_ERROR_ON(
476 fc_info.activation_info.enabled() && is_data_type_quantized(src->data_type()) &&
477 fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::RELU &&
478 fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU &&
479 fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU);
ramy.elgammal@arm.comf77b9692023-08-07 17:07:02 +0100480 const GPUTarget gpu_target = get_arch_from_target(CLScheduler::get().target());
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100481
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100482 const bool transpose_weights = fc_info.transpose_weights ? !fc_info.are_weights_reshaped : false;
483 bool is_fc_after_conv = true;
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100484
485 // When using dynamic weights - use matmul kernels.
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100486 // Note: MatMul does not support broadcasting so fallback with batched cases.
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100487 const bool is_batched_fc_layer = dst->dimension(1) > 1;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100488 const bool use_matmul = gpu_target != GPUTarget::MIDGARD && !weights->are_values_constant() &&
489 !is_batched_fc_layer &&
490 !(src->num_dimensions() > 1 && (src->data_layout() != fc_info.weights_trained_layout));
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100491
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100492 const ITensorInfo &flatten_src = TensorInfo(src->clone()
493 ->set_is_resizable(true)
494 .reset_padding()
495 .set_tensor_shape(compute_flatten_shape(src))
496 .set_data_layout(DataLayout::NCHW));
497 const ITensorInfo &reshaped_weights = TensorInfo(
498 weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*weights)));
499 const ITensorInfo &converted_weights = (transpose_weights && !use_matmul)
500 ? TensorInfo(*reshaped_weights.clone())
501 : TensorInfo(weights->clone()->set_is_resizable(true).reset_padding());
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100502
503 // With the Fully Connected layer we can have 4 different cases:
504 // 1) Convolution layer -> Fully Connected layer without batches
505 // 2) Fully Connected layer -> Fully Connected layer without batches
506 // 3) Convolution layer -> Fully Connected layer with batches
507 // 4) Fully Connected layer -> Fully Connected layer with batches
508
509 const ITensorInfo *src_to_use = src;
510 const ITensorInfo *weights_to_use = weights;
511
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100512 if (biases != nullptr)
Giorgio Arena63e0beb2021-09-24 14:04:27 +0100513 {
514 ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100515 if (is_data_type_quantized(src->data_type()))
Giorgio Arena63e0beb2021-09-24 14:04:27 +0100516 {
517 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32);
518 }
519 else
520 {
521 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, biases);
522 }
523 }
524
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100525 // Check if FC is after conv (flatten kernel is run in case where FC is after conv.)
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100526 if (is_batched_fc_layer)
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100527 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100528 is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
529 (std::equal(src->tensor_shape().cbegin() + 3, src->tensor_shape().cend(),
530 dst->tensor_shape().cbegin() + 1));
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100531 }
532 else
533 {
534 is_fc_after_conv = src->num_dimensions() > 1;
535 }
536
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100537 // Transpose kernel does not run when matmul is supported as matmul fuses transpose op.
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100538 if (transpose_weights && !use_matmul)
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100539 {
540 // Validate reshape weights kernel
541 ARM_COMPUTE_RETURN_ON_ERROR(ClTranspose::validate(weights, &reshaped_weights));
542 weights_to_use = &reshaped_weights;
543 }
544
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100545 if (is_fc_after_conv && (src->data_layout() != fc_info.weights_trained_layout))
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100546 {
547 // Validate convert weights kernel
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100548 ARM_COMPUTE_RETURN_ON_ERROR(ClConvertFullyConnectedWeights::validate(
549 weights_to_use, &converted_weights, src->tensor_shape(), fc_info.weights_trained_layout));
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100550 weights_to_use = &converted_weights;
551 }
552
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100553 if (is_fc_after_conv)
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100554 {
555 // Fully Connected layer after a Convolution Layer without batches
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100556 // K Index of matrix multiplication. MatMul performs transpose in kernel, so index is 0 when matmul and transpose enabled
557 const int weight_idx = (use_matmul && transpose_weights) ? 0 : 1;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100558 ARM_COMPUTE_RETURN_ERROR_ON(
559 (weights_to_use->dimension(weight_idx) != (src->dimension(0) * src->dimension(1) * src->dimension(2))));
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100560
561 // Validate flatten kernel
562 ARM_COMPUTE_RETURN_ON_ERROR(ClFlatten::validate(src, &flatten_src));
563 src_to_use = &flatten_src;
564 }
565 else
566 {
567 // Fully Connected layer after a Fully Connected Layer without batches
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100568 // K Index of matrix multiplication. MatMul performs transpose in kernel, so index is 0 when matmul and transpose enabled
569 const int weight_idx = (use_matmul && transpose_weights) ? 0 : 1;
570 ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(0) != weights_to_use->dimension(weight_idx));
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100571 }
572
573 // Validate matrix multiply kernel
Mohammed Suhail Munshi2e0714d2023-07-19 14:44:38 +0100574 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(*src_to_use, *weights_to_use, biases, *dst, fc_info, use_matmul));
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100575
576 return Status{};
577}
578
579void ClFullyConnected::run(ITensorPack &tensors)
580{
581 prepare(tensors);
582
Jakub Sujak617ed502023-03-29 11:16:18 +0100583#ifdef ARM_COMPUTE_ASSERTS_ENABLED
584 ++_asrt_run_count;
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100585 ARM_COMPUTE_ERROR_ON(_dynamic_gemm && _asrt_prepare_count != _asrt_run_count);
Jakub Sujak617ed502023-03-29 11:16:18 +0100586#endif // ARM_COMPUTE_ASSERTS_ENABLED
587
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100588 auto src = tensors.get_const_tensor(ACL_SRC_0);
589
590 CLAuxTensorHandler flattened_src(offset_int_vec(FlattenedSrc), _flattened_src, tensors, false);
591 CLAuxTensorHandler weights(_weights_to_use_idx, _weights_to_use, tensors, false);
592
593 // Linearize input if it comes from a convolutional layer
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100594 if (_is_fc_after_conv)
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100595 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100596 ITensorPack flatten_pack{{ACL_SRC, src}, {ACL_DST, flattened_src.get()}};
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100597 _flatten->run(flatten_pack);
598 }
599
600 ITensorPack gemm_pack = tensors;
601 gemm_pack.add_const_tensor(ACL_SRC_0, (_is_fc_after_conv) ? flattened_src.get() : src);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100602 if (_weights_to_use_idx != ACL_SRC_1)
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100603 {
604 gemm_pack.add_const_tensor(ACL_SRC_1, weights.get());
605 }
606
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100607 // Run MatMul Op
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100608 if (_use_matmul)
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100609 {
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100610 // Run matmul kernels for matrix multiplication
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100611 if (_is_quantized)
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100612 {
613 CLScheduler::get().enqueue_op(*_matmul_lowp_native_kernel, gemm_pack, true);
614 }
615 else
616 {
617 CLScheduler::get().enqueue_op(*_matmul_native_kernel, gemm_pack, true);
618 }
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100619 }
620 else
621 {
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100622 // Run matrix multiply
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100623 if (_is_quantized)
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100624 {
625 _mm_gemmlowp->run(gemm_pack);
626 }
627 else
628 {
629 _mm_gemm->run(gemm_pack);
630 }
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100631 }
632}
633
634void ClFullyConnected::prepare(ITensorPack &tensors)
635{
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100636 // Note : Running prepare() each run when _use_matmul is true is unnecessary unless weights conversion is needed.
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100637 if (!_is_prepared || _dynamic_gemm)
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100638 {
Jakub Sujak617ed502023-03-29 11:16:18 +0100639#ifdef ARM_COMPUTE_ASSERTS_ENABLED
640 ++_asrt_prepare_count;
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100641 ARM_COMPUTE_ERROR_ON(!_dynamic_gemm && !_use_matmul && _asrt_prepare_count > 1);
Jakub Sujak617ed502023-03-29 11:16:18 +0100642#endif // ARM_COMPUTE_ASSERTS_ENABLED
643
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100644 auto weights = tensors.get_const_tensor(ACL_SRC_1);
645
646 CLAuxTensorHandler reshaped_weights(offset_int_vec(TransposedWeights), _reshaped_weights, tensors, false);
647 CLAuxTensorHandler converted_weights(offset_int_vec(ConvertedWeights), _converted_weights, tensors, false);
648
649 // Pointer to current weights
650 const ITensor *cur_weights = weights;
651
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100652 // Reshape weights if needed. Disabled when matmul kernels are enabled as matmul fuses transpose.
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100653 if (_transpose_weights && !_use_matmul)
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100654 {
655 // Run reshape weights kernel and mark weights as unused
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100656 ITensorPack transpose_pack{{ACL_SRC, weights}, {ACL_DST, reshaped_weights.get()}};
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100657 _reshape_weights->run(transpose_pack);
658
659 cur_weights->mark_as_unused();
660 cur_weights = reshaped_weights.get();
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100661 }
662
Jakub Sujak617ed502023-03-29 11:16:18 +0100663 // Convert weights if needed
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100664 if (_run_convert_weights)
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100665 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100666 ITensorPack convert_pack{{ACL_SRC, cur_weights}, {ACL_DST, converted_weights.get()}};
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100667 _convert_weights->run(convert_pack);
668
669 cur_weights->mark_as_unused();
670 cur_weights = converted_weights.get();
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100671 }
672
Jakub Sujak617ed502023-03-29 11:16:18 +0100673 ITensorPack gemm_pack = tensors;
674 gemm_pack.add_const_tensor(ACL_SRC_1, cur_weights);
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100675
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100676 // Prepare GEMM prepare and release unused weights
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100677 if (_dynamic_gemm || !_use_matmul)
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100678 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100679 if (!_is_quantized)
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100680 {
681 _mm_gemm->prepare(gemm_pack);
682 }
683 else
684 {
685 _mm_gemmlowp->prepare(gemm_pack);
686 }
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100687 }
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100688
Georgios Pinitas529b5a22021-07-27 15:55:30 +0100689 _is_prepared = true;
690 }
691}
692
693experimental::MemoryRequirements ClFullyConnected::workspace() const
694{
695 return _aux_mem;
696}
697} // namespace opencl
698} // namespace arm_compute