blob: 2c9bb3cb661f22cab02dd6b62e56d891267f3a3d [file] [log] [blame]
Gian Marco05288a22017-11-21 10:57:50 +00001/*
SiCong Libbd8fac2021-02-04 13:12:19 +00002 * Copyright (c) 2017-2021 Arm Limited.
Gian Marco05288a22017-11-21 10:57:50 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
25
26#include "arm_compute/core/CL/ICLTensor.h"
27#include "arm_compute/core/Error.h"
28#include "arm_compute/core/Helpers.h"
Michele Di Giorgiof64d3362020-04-03 12:40:10 +010029#include "arm_compute/core/KernelDescriptors.h"
SiCong Libbd8fac2021-02-04 13:12:19 +000030#include "arm_compute/core/Log.h"
Gian Marco05288a22017-11-21 10:57:50 +000031#include "arm_compute/core/TensorInfo.h"
32#include "arm_compute/core/Types.h"
33#include "arm_compute/core/Validate.h"
Georgios Pinitas358ca202017-12-07 16:47:52 +000034#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +000035#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
Gian Marco05288a22017-11-21 10:57:50 +000036#include "arm_compute/runtime/CL/CLScheduler.h"
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +010037#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
38#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
39#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
40#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
41#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
42#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
43#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010044#include "src/core/helpers/AutoConfiguration.h"
SiCong Libbd8fac2021-02-04 13:12:19 +000045#include "src/runtime/CL/gemm_auto_heuristics/CLGEMMAutoHeuristics.h"
46#include "utils/TypePrinter.h"
Gian Marco05288a22017-11-21 10:57:50 +000047
giuros011c9efeb2019-01-11 14:04:43 +000048namespace arm_compute
49{
Georgios Pinitas358ca202017-12-07 16:47:52 +000050using namespace arm_compute::misc::shape_calculator;
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +000051using namespace arm_compute::cl_gemm;
Gian Marco05288a22017-11-21 10:57:50 +000052
Gian Marco19835e52018-01-30 13:35:54 +000053namespace
54{
SiCong Lidb353452021-02-08 15:16:13 +000055// Validate lhs_info and rhs_info for native kernel
56inline bool validate_lhs_rhs_info_native(const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const ITensorInfo *a, const ITensorInfo *b, const GEMMReshapeInfo &reshape_info)
57{
58 // Validate GEMMLHSMatrixInfo and GEMMRHSMatrixInfo for reshaped only rhs kernel
59 TensorInfo mm_result_s32_info{};
60 // Output tensor auto initialization if not yet initialized
61 auto_init_if_empty(mm_result_s32_info, a->clone()->set_tensor_shape(compute_mm_shape(*a, *b, false, reshape_info)).set_data_type(DataType::S32));
62 // Validate mm kernel
63 // NOTE: Ignore all other parameters (eg. output stage etc.) and only validate lhs and rhs info
64 // NOTE: This assumes:
65 // 1. lhs and rhs info's validity does not depend on these other parameters and vice versa(in CLGEMMLowpMatrixMultiplyNativeKernel.cpp validate_arguments).
66 // 2. lhs and rhs info does not cause window and padding issues through side effects (in CLGEMMLowpMatrixMultiplyNativeKernel.cpp validate_and_configure_window).
67 if(!bool(CLGEMMLowpMatrixMultiplyNativeKernel::validate(a, b, &mm_result_s32_info, lhs_info, rhs_info, reshape_info)))
68 {
69 return false;
70 }
71 return true;
72}
73
74// Automatically select between mlgo (prioritized) and default heuristics for native kernel configs
75std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> auto_select_gemm_config_native(auto_heuristics::CommonQuery query, const ITensorInfo *a, const ITensorInfo *b, const GEMMReshapeInfo &reshape_info)
76{
77 auto config = auto_heuristics::select_mlgo_gemm_config_native(query);
78 if(config)
79 {
80 if(validate_lhs_rhs_info_native(config.lhs_info, config.rhs_info, a, b, reshape_info))
81 {
82 ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use native config from mlgo heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
83 return { config.lhs_info, config.rhs_info };
84 }
85 }
86 config = auto_heuristics::select_default_gemm_config_native(query);
87 ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use native config from default heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
88 return { config.lhs_info, config.rhs_info };
89}
90
91// Validate lhs_info and rhs_info for reshaped only rhs kernel
SiCong Libbd8fac2021-02-04 13:12:19 +000092inline bool validate_lhs_rhs_info_reshaped_only_rhs(const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *output,
93 unsigned int m, unsigned int n, unsigned int k, bool reinterpret_input_as_3d, int depth_output_gemm3d)
Gian Marco19835e52018-01-30 13:35:54 +000094{
SiCong Libbd8fac2021-02-04 13:12:19 +000095 // Validate GEMMLHSMatrixInfo and GEMMRHSMatrixInfo for reshaped only rhs kernel
96 TensorInfo tmp_b_info{};
97 // Validate reshape RHS kernel
98 auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));
99 if(!bool(CLGEMMReshapeRHSMatrixKernel::validate(b, &tmp_b_info, rhs_info)))
100 {
101 return false;
102 }
103 // Validate mm kernel
104 // NOTE: Ignore all other parameters (eg. depth_output_gemm3d, output stage etc.) and only validate lhs and rhs info
105 // NOTE: This assumes:
106 // 1. lhs and rhs info's validity does not depend on these other parameters and vice versa(in CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp validate_arguments).
107 // 2. lhs and rhs info does not cause window and padding issues through side effects (in CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp validate_and_configure_window).
108 GEMMKernelInfo gemm_kernel_info;
109 gemm_kernel_info.m = m;
110 gemm_kernel_info.n = n;
111 gemm_kernel_info.k = k;
112 gemm_kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;
113 gemm_kernel_info.depth_output_gemm3d = depth_output_gemm3d;
114 gemm_kernel_info.lhs_info = lhs_info;
115 gemm_kernel_info.rhs_info = rhs_info;
116 // Since we ignore the output stage, output data type has to be S32 to pass the validation
117 TensorInfo output_info_copy(*output);
118 output_info_copy.set_data_type(DataType::S32);
119 if(!bool(CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::validate(a, &tmp_b_info, &output_info_copy, gemm_kernel_info)))
120 {
121 return false;
122 }
123 return true;
124}
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100125
SiCong Lidb353452021-02-08 15:16:13 +0000126// Automatically select between mlgo (prioritized) and default heuristics for reshaped only rhs kernel configs
SiCong Libbd8fac2021-02-04 13:12:19 +0000127std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> auto_select_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery query, bool reinterpret_input_as_3d, int depth_output_gemm3d,
128 const ITensorInfo *a,
129 const ITensorInfo *b, const ITensorInfo *output)
130{
131 auto config = auto_heuristics::select_mlgo_gemm_config_reshaped_only_rhs(query);
132 if(config)
133 {
134 if(validate_lhs_rhs_info_reshaped_only_rhs(config.lhs_info, config.rhs_info, a, b, output, query.m, query.n, query.k, reinterpret_input_as_3d, depth_output_gemm3d))
135 {
136 ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped_only_rhs config from mlgo heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
137 return { config.lhs_info, config.rhs_info };
138 }
139 }
SiCong Lidb353452021-02-08 15:16:13 +0000140 config = auto_heuristics::select_default_gemm_config_reshaped_only_rhs(query);
SiCong Libbd8fac2021-02-04 13:12:19 +0000141 ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped_only_rhs config from default heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
142 return { config.lhs_info, config.rhs_info };
143}
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100144
SiCong Libbd8fac2021-02-04 13:12:19 +0000145inline bool is_gemm_reshaped(CLGEMMKernelType kernel_type)
146{
147 switch(kernel_type)
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100148 {
149 case CLGEMMKernelType::NATIVE:
150 return false;
151 case CLGEMMKernelType::RESHAPED_ONLY_RHS:
152 return true;
153 default:
154 ARM_COMPUTE_ERROR("Not supported gemmlowp kernel!");
155 }
Gian Marco19835e52018-01-30 13:35:54 +0000156}
157} // namespace
158
Gian Marco05288a22017-11-21 10:57:50 +0000159CLGEMMLowpMatrixMultiplyCore::CLGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager)
Georgios Pinitas72219332018-06-05 14:56:06 +0100160 : _memory_group(std::move(memory_manager)),
Georgios Pinitas40f51a62020-11-21 03:04:18 +0000161 _weights_to_qasymm8(std::make_unique<CLDepthConvertLayerKernel>()),
162 _mm_native_kernel(std::make_unique<CLGEMMLowpMatrixMultiplyNativeKernel>()),
163 _mm_reshaped_only_rhs_kernel(std::make_unique<CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel>()),
164 _mtx_b_reshape_kernel(std::make_unique<CLGEMMReshapeRHSMatrixKernel>()),
165 _mtx_a_reduction_kernel(std::make_unique<CLGEMMLowpMatrixAReductionKernel>()),
166 _mtx_b_reduction_kernel(std::make_unique<CLGEMMLowpMatrixBReductionKernel>()),
167 _offset_contribution_kernel(std::make_unique<CLGEMMLowpOffsetContributionKernel>()),
168 _offset_contribution_output_stage_kernel(std::make_unique<CLGEMMLowpOffsetContributionOutputStageKernel>()),
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000169 _qasymm8_weights(),
Georgios Pinitas72219332018-06-05 14:56:06 +0100170 _vector_sum_col(),
171 _vector_sum_row(),
Georgios Pinitas72219332018-06-05 14:56:06 +0100172 _tmp_b(),
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100173 _mm_result_s32(),
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000174 _gemm_output_stage_multipliers(),
175 _gemm_output_stage_shifts(),
176 _matrix_a(nullptr),
Georgios Pinitas72219332018-06-05 14:56:06 +0100177 _original_b(nullptr),
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000178 _output(nullptr),
Georgios Pinitas72219332018-06-05 14:56:06 +0100179 _a_offset(0),
180 _b_offset(0),
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000181 _is_gemm_reshaped(true),
Georgios Pinitas72219332018-06-05 14:56:06 +0100182 _reshape_b_only_on_first_run(false),
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100183 _is_prepared(false),
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000184 _run_output_stage(false),
185 _convert_to_qasymm8(false),
186 _run_offset_contribution(false)
Gian Marco05288a22017-11-21 10:57:50 +0000187{
188}
189
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100190CLGEMMLowpMatrixMultiplyCore::~CLGEMMLowpMatrixMultiplyCore() = default;
191
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100192void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info)
Gian Marco05288a22017-11-21 10:57:50 +0000193{
Manuel Bottini2b84be52020-04-08 10:15:51 +0100194 configure(CLKernelLibrary::get().get_compile_context(), a, b, c, output, gemm_info);
195}
196
197void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info)
198{
Georgios Pinitas358ca202017-12-07 16:47:52 +0000199 ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100200 ARM_COMPUTE_ERROR_THROW_ON(CLGEMMLowpMatrixMultiplyCore::validate(a->info(), b->info(), c != nullptr ? c->info() : nullptr, output->info(), gemm_info));
Gian Marco05288a22017-11-21 10:57:50 +0000201
Georgios Pinitas72219332018-06-05 14:56:06 +0100202 _is_prepared = false;
203 _original_b = b;
Chunosov5124be52017-11-22 20:42:13 +0700204 _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100205 _a_offset = a->info()->quantization_info().uniform().offset;
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000206 _matrix_a = a;
207 _output = output;
208
209 _convert_to_qasymm8 = is_data_type_quantized_per_channel(b->info()->data_type()) && is_data_type_quantized_symmetric(b->info()->data_type())
morgolockd13931d2020-06-23 15:49:35 +0100210 && a->info()->data_type() == DataType::QASYMM8;
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000211 _b_offset = _convert_to_qasymm8 ? -128 : b->info()->quantization_info().uniform().offset;
Gian Marco05288a22017-11-21 10:57:50 +0000212
Gian Marco19835e52018-01-30 13:35:54 +0000213 // Get the GPU target
214 const GPUTarget gpu_target = CLScheduler::get().target();
Gian Marco7b4d5472018-01-10 15:56:30 +0000215
Gian Marco19835e52018-01-30 13:35:54 +0000216 // Set the target for the kernels
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100217 _mm_native_kernel->set_target(gpu_target);
218 _mm_reshaped_only_rhs_kernel->set_target(gpu_target);
Gian Marco05288a22017-11-21 10:57:50 +0000219
giuros018b6b4a92018-12-18 19:01:33 +0000220 GEMMRHSMatrixInfo rhs_info;
giuros011c9efeb2019-01-11 14:04:43 +0000221 GEMMLHSMatrixInfo lhs_info;
Gian Marco05288a22017-11-21 10:57:50 +0000222
Gian Marco19835e52018-01-30 13:35:54 +0000223 // Arguments used by GEMMReshapeInfo
224 // If we pass the matrix A and matrix B reshaped to CLGEMMMatrixMultiplyKernel, we need to pass m, n, k, mult_transpose1xW_width and mult_interleave4x4_height to CLGEMMReshapeInfo
225 // in order to know how the matrices have been reshaped
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000226 bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
227 const unsigned int m = reinterpret_input_as_3d ? (a->info()->dimension(1) * a->info()->dimension(2)) : a->info()->dimension(1);
228 const unsigned int n = b->info()->dimension(0);
229 const unsigned int k = a->info()->dimension(0);
230 const unsigned int batch_size = reinterpret_input_as_3d ? a->info()->dimension(3) : a->info()->dimension(2);
231 const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
Gian Marco19835e52018-01-30 13:35:54 +0000232
SiCong Lidb353452021-02-08 15:16:13 +0000233 const auto reshape_info = GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d);
234
Gian Marco19835e52018-01-30 13:35:54 +0000235 // Check if we need to reshape the matrix A and matrix B
SiCong Libbd8fac2021-02-04 13:12:19 +0000236 _is_gemm_reshaped = is_gemm_reshaped(auto_select_gemm_kernel(auto_heuristics::CommonQuery{ gpu_target, a->info()->data_type(), m, n, k, batch_size }, _reshape_b_only_on_first_run));
Gian Marco19835e52018-01-30 13:35:54 +0000237
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000238 if(_convert_to_qasymm8)
239 {
240 // Set data type for converted weights
241 TensorInfo weights_info(*b->info());
242 weights_info.set_data_type(DataType::QASYMM8);
243 _qasymm8_weights.allocator()->init(weights_info);
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100244 _weights_to_qasymm8->configure(compile_context, b, &_qasymm8_weights, ConvertPolicy::WRAP, 0);
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000245 }
246
247 const ICLTensor *matrix_b = _convert_to_qasymm8 ? &_qasymm8_weights : b;
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000248 if(_is_gemm_reshaped)
Georgios Pinitasebf6b8a2018-09-24 16:31:08 +0100249 {
Gian Marco05288a22017-11-21 10:57:50 +0000250 matrix_b = &_tmp_b;
251
Giorgio Arenabb54e4e2018-04-05 17:20:34 +0100252 if(!_reshape_b_only_on_first_run)
253 {
254 _memory_group.manage(&_tmp_b);
255 }
Gian Marco05288a22017-11-21 10:57:50 +0000256
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000257 // Pick up the GEMM configuration
SiCong Libbd8fac2021-02-04 13:12:19 +0000258 // It doesn't matter whether Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED, since it only affect the shape configuration
259 std::tie(lhs_info, rhs_info) = auto_select_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, DataType::QASYMM8, m, n, k, batch_size }, reinterpret_input_as_3d,
260 depth_output_gemm3d,
261 a->info(), _convert_to_qasymm8 ? _qasymm8_weights.info() : b->info(), output->info());
Gian Marco05288a22017-11-21 10:57:50 +0000262
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000263 // Configure reshape RHS kernel
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100264 _mtx_b_reshape_kernel->configure(compile_context, _convert_to_qasymm8 ? &_qasymm8_weights : b, &_tmp_b, rhs_info);
Gian Marco05288a22017-11-21 10:57:50 +0000265 }
Gian Marco05288a22017-11-21 10:57:50 +0000266
Michele Di Giorgiof64d3362020-04-03 12:40:10 +0100267 // Using default reduction info
268 const GEMMLowpReductionKernelInfo reduction_info {};
269
Gian Marco05288a22017-11-21 10:57:50 +0000270 // Initialize matrix B reduction kernel only if _a_offset is not equal to 0
271 if(_a_offset != 0)
272 {
Georgios Pinitas358ca202017-12-07 16:47:52 +0000273 TensorInfo info_vector_sum_col(compute_reductionA_shape(*b->info()), 1, DataType::S32);
Gian Marco05288a22017-11-21 10:57:50 +0000274 _vector_sum_col.allocator()->init(info_vector_sum_col);
Giorgio Arenabb54e4e2018-04-05 17:20:34 +0100275 if(!_reshape_b_only_on_first_run)
276 {
277 _memory_group.manage(&_vector_sum_col);
278 }
Gian Marco05288a22017-11-21 10:57:50 +0000279
280 // Configure Matrix B reduction kernel
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100281 _mtx_b_reduction_kernel->configure(compile_context, _convert_to_qasymm8 ? &_qasymm8_weights : b, &_vector_sum_col, reduction_info);
Gian Marco05288a22017-11-21 10:57:50 +0000282 }
283
284 // Initialize Matrix A reduction kernel only if _b_offset is not equal to 0
285 if(_b_offset != 0)
286 {
Georgios Pinitas358ca202017-12-07 16:47:52 +0000287 TensorInfo info_vector_sum_row(compute_reductionB_shape(*a->info()), 1, DataType::S32);
Gian Marco05288a22017-11-21 10:57:50 +0000288 _vector_sum_row.allocator()->init(info_vector_sum_row);
289 _memory_group.manage(&_vector_sum_row);
290
291 // Configure matrix A reduction kernel
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100292 _mtx_a_reduction_kernel->configure(compile_context, a, &_vector_sum_row, reduction_info);
Gian Marco05288a22017-11-21 10:57:50 +0000293 }
294
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000295 GEMMKernelInfo gemm_kernel_info;
296 gemm_kernel_info.m = m;
297 gemm_kernel_info.n = n;
298 gemm_kernel_info.k = k;
299 gemm_kernel_info.depth_output_gemm3d = depth_output_gemm3d;
300 gemm_kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;
301 gemm_kernel_info.lhs_info = lhs_info;
302 gemm_kernel_info.rhs_info = rhs_info;
303 gemm_kernel_info.a_offset = _a_offset;
304 gemm_kernel_info.b_offset = _b_offset;
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100305 // If GEMMLowpOutputStage != NONE, fuse the offset contribution with the output stage
306 if(gemm_info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE)
307 {
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100308 // Configure offset contribution kernel
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000309 const size_t num_filters = (gemm_info.gemmlowp_output_stage().is_quantized_per_channel) ? gemm_info.gemmlowp_output_stage().gemmlowp_multipliers.size() : 1;
310
311 _gemm_output_stage_multipliers.allocator()->init(TensorInfo(TensorShape(num_filters), 1, DataType::S32));
312 _gemm_output_stage_shifts.allocator()->init(TensorInfo(TensorShape(num_filters), 1, DataType::S32));
313
Manuel Bottini959c26d2019-12-02 16:22:35 +0000314 GEMMLowpOutputStageInfo gemmlowp_output_stage = gemm_info.gemmlowp_output_stage();
315 gemmlowp_output_stage.output_data_type = _matrix_a->info()->data_type();
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000316
317 gemm_kernel_info.output_stage = gemmlowp_output_stage;
318
319 if(_is_gemm_reshaped && gemmlowp_output_stage.type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT)
320 {
321 // Configure and tune matrix multiply kernel with fused output stage
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100322 _mm_reshaped_only_rhs_kernel->configure(compile_context, _matrix_a, matrix_b, output, gemm_kernel_info, _a_offset == 0 ? nullptr : &_vector_sum_col,
323 _b_offset == 0 ? nullptr : &_vector_sum_row, c, &_gemm_output_stage_multipliers, &_gemm_output_stage_shifts);
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000324 }
325 else
326 {
327 _run_output_stage = true;
328
329 _memory_group.manage(&_mm_result_s32);
330
331 if(_is_gemm_reshaped)
332 {
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100333 _mm_reshaped_only_rhs_kernel->configure(compile_context, _matrix_a, matrix_b, &_mm_result_s32, gemm_kernel_info);
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000334 }
335 else
336 {
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100337 // Pick up the GEMM configuration
SiCong Lidb353452021-02-08 15:16:13 +0000338 // It doesn't matter whether Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED, since it only affect the shape configuration
339 std::tie(lhs_info, rhs_info) = auto_select_gemm_config_native(auto_heuristics::CommonQuery{ gpu_target, DataType::QASYMM8, m, n, k, batch_size },
340 _matrix_a->info(), _convert_to_qasymm8 ? _qasymm8_weights.info() : matrix_b->info(), reshape_info);
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000341
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100342 // Configure matrix multiply kernel
SiCong Lidb353452021-02-08 15:16:13 +0000343 _mm_native_kernel->configure(compile_context, _matrix_a, matrix_b, &_mm_result_s32, lhs_info, rhs_info, reshape_info);
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100344
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100345 _offset_contribution_output_stage_kernel->configure(compile_context, &_mm_result_s32, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, c, output,
346 a->info()->dimension(0),
347 _a_offset, _b_offset, gemmlowp_output_stage, &_gemm_output_stage_multipliers, &_gemm_output_stage_shifts);
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000348 _mm_result_s32.allocator()->allocate();
349 }
350 }
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000351
352 _gemm_output_stage_multipliers.allocator()->allocate();
353 _gemm_output_stage_shifts.allocator()->allocate();
354 // Compute GEMM output multipliers and shifts for output stage
355 _gemm_output_stage_multipliers.map();
356 _gemm_output_stage_shifts.map();
357 std::memcpy(_gemm_output_stage_multipliers.ptr_to_element(Coordinates(0)), gemm_info.gemmlowp_output_stage().gemmlowp_multipliers.data(), num_filters * sizeof(int32_t));
358 std::memcpy(_gemm_output_stage_shifts.ptr_to_element(Coordinates(0)), gemm_info.gemmlowp_output_stage().gemmlowp_shifts.data(), num_filters * sizeof(int32_t));
359 _gemm_output_stage_multipliers.unmap();
360 _gemm_output_stage_shifts.unmap();
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100361 }
362 else
363 {
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000364 _run_offset_contribution = true;
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000365 if(_is_gemm_reshaped)
366 {
367 // Configure and tune matrix multiply kernel
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100368 _mm_reshaped_only_rhs_kernel->configure(compile_context, _matrix_a, matrix_b, output, gemm_kernel_info);
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000369 }
370 else
371 {
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100372 // Pick up the GEMM configuration
SiCong Lidb353452021-02-08 15:16:13 +0000373 // It doesn't matter whether Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED, since it only affect the shape configuration
374 std::tie(lhs_info, rhs_info) = auto_select_gemm_config_native(auto_heuristics::CommonQuery{ gpu_target, DataType::QASYMM8, m, n, k, batch_size },
375 a->info(), _convert_to_qasymm8 ? _qasymm8_weights.info() : b->info(), reshape_info);
Gian Marco Iodice06be6f82019-06-24 17:47:51 +0100376
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100377 // Configure matrix multiply kernel
SiCong Lidb353452021-02-08 15:16:13 +0000378 _mm_native_kernel->configure(compile_context, _matrix_a, matrix_b, output, lhs_info, rhs_info, reshape_info);
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000379 }
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100380
381 // Configure offset contribution kernel
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100382 _offset_contribution_kernel->configure(compile_context, output, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, c, a->info()->dimension(0), _a_offset,
383 _b_offset);
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100384 }
Gian Marco05288a22017-11-21 10:57:50 +0000385
386 // Allocate tensors
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000387 if(_is_gemm_reshaped)
Gian Marco05288a22017-11-21 10:57:50 +0000388 {
Georgios Pinitas72219332018-06-05 14:56:06 +0100389 if(!_reshape_b_only_on_first_run)
390 {
391 _tmp_b.allocator()->allocate();
392 }
Gian Marco05288a22017-11-21 10:57:50 +0000393 }
394
Georgios Pinitas72219332018-06-05 14:56:06 +0100395 if(_a_offset != 0 && !_reshape_b_only_on_first_run)
Gian Marco05288a22017-11-21 10:57:50 +0000396 {
397 _vector_sum_col.allocator()->allocate();
398 }
399
400 if(_b_offset != 0)
401 {
402 _vector_sum_row.allocator()->allocate();
403 }
404}
405
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100406Status CLGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info)
Georgios Pinitas358ca202017-12-07 16:47:52 +0000407{
Manuel Bottini959c26d2019-12-02 16:22:35 +0000408 ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);
409 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100410 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(b, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8, DataType::QSYMM8_PER_CHANNEL);
411 ARM_COMPUTE_RETURN_ERROR_ON(a->data_type() == DataType::QASYMM8 && b->data_type() == DataType::QASYMM8_SIGNED);
412 ARM_COMPUTE_RETURN_ERROR_ON(a->data_type() == DataType::QASYMM8_SIGNED && b->data_type() == DataType::QASYMM8);
Georgios Pinitas358ca202017-12-07 16:47:52 +0000413 ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(), "Matrix A already reshaped is not supported");
414 ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(), "Matrix B already reshaped is not supported");
415
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100416 int32_t a_offset = a->quantization_info().uniform().offset;
417 int32_t b_offset = b->quantization_info().uniform().offset;
Georgios Pinitas358ca202017-12-07 16:47:52 +0000418
Isabella Gottardic4f582e2018-10-11 19:14:55 +0100419 const ITensorInfo *matrix_a_info = a;
Isabella Gottardic4f582e2018-10-11 19:14:55 +0100420
giuros018b6b4a92018-12-18 19:01:33 +0000421 TensorInfo tmp_b_info{};
422 GEMMRHSMatrixInfo rhs_info;
giuros011c9efeb2019-01-11 14:04:43 +0000423 GEMMLHSMatrixInfo lhs_info;
Isabella Gottardic4f582e2018-10-11 19:14:55 +0100424
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000425 // Get the GPU target
426 const GPUTarget gpu_target = CLScheduler::get().target();
427
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000428 bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
429 const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
430 const unsigned int n = b->dimension(0);
431 const unsigned int k = a->dimension(0);
432 const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
433 const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
Gian Marco19835e52018-01-30 13:35:54 +0000434
SiCong Libbd8fac2021-02-04 13:12:19 +0000435 bool reshape_matrix_b = is_gemm_reshaped(auto_select_gemm_kernel(auto_heuristics::CommonQuery{ gpu_target, a->data_type(), m, n, k, batch_size }, gemm_info.reshape_b_only_on_first_run()));
Georgios Pinitasebf6b8a2018-09-24 16:31:08 +0100436
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000437 const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d);
Georgios Pinitasebf6b8a2018-09-24 16:31:08 +0100438
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000439 bool convert_to_qasymm8 = is_data_type_quantized_per_channel(b->data_type()) && is_data_type_quantized_symmetric(b->data_type())
440 && is_data_type_quantized_asymmetric(a->data_type());
441 TensorInfo weights_info(*b);
442 if(convert_to_qasymm8)
443 {
444 b_offset = -128;
445 weights_info.set_data_type(DataType::QASYMM8);
446 ARM_COMPUTE_RETURN_ON_ERROR(CLDepthConvertLayerKernel::validate(b, &weights_info, ConvertPolicy::WRAP, 0));
447 }
448 const ITensorInfo *matrix_b_info = &weights_info;
Gian Marco Iodice2ec6c1e2019-04-09 12:03:05 +0100449 if(reshape_matrix_b)
Georgios Pinitas358ca202017-12-07 16:47:52 +0000450 {
Isabella Gottardic4f582e2018-10-11 19:14:55 +0100451 matrix_b_info = &tmp_b_info;
Georgios Pinitas358ca202017-12-07 16:47:52 +0000452
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000453 // Pick up the GEMM configuration
SiCong Libbd8fac2021-02-04 13:12:19 +0000454 // NOTE: No need to validate mlgo configurations as they automatically fall back to default heuristics if validation fails
455 // It doesn't matter whether Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED, since it only affect the shape configuration
456 const auto res = select_default_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, DataType::QASYMM8, m, n, k, batch_size });
457 lhs_info = res.lhs_info;
458 rhs_info = res.rhs_info;
Isabella Gottardic4f582e2018-10-11 19:14:55 +0100459
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000460 // Validate reshape RHS kernel
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000461 auto_init_if_empty(tmp_b_info, weights_info.clone()->set_tensor_shape(compute_rhs_reshaped_shape(weights_info, rhs_info)));
462 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMReshapeRHSMatrixKernel::validate(&weights_info, &tmp_b_info, rhs_info));
Georgios Pinitas358ca202017-12-07 16:47:52 +0000463 }
Isabella Gottardic4f582e2018-10-11 19:14:55 +0100464
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100465 TensorInfo info_vector_sum_col{};
466 TensorInfo info_vector_sum_row{};
Georgios Pinitas358ca202017-12-07 16:47:52 +0000467
Michele Di Giorgiof64d3362020-04-03 12:40:10 +0100468 const GEMMLowpReductionKernelInfo reduction_info;
Georgios Pinitas358ca202017-12-07 16:47:52 +0000469 // Validate matrix B reduction kernel only if _a_offset is not equal to 0
470 if(a_offset != 0)
471 {
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000472 info_vector_sum_col = TensorInfo(compute_reductionA_shape(weights_info), 1, DataType::S32);
Georgios Pinitas358ca202017-12-07 16:47:52 +0000473
474 // Configure Matrix B reduction kernel
Michele Di Giorgiof64d3362020-04-03 12:40:10 +0100475 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixBReductionKernel::validate(&weights_info, &info_vector_sum_col, reduction_info));
Georgios Pinitas358ca202017-12-07 16:47:52 +0000476 }
477
478 // Validate Matrix A reduction kernel only if _b_offset is not equal to 0
479 if(b_offset != 0)
480 {
481 info_vector_sum_row = TensorInfo(compute_reductionB_shape(*a), 1, DataType::S32);
482
483 // Configure matrix A reduction kernel
Michele Di Giorgiof64d3362020-04-03 12:40:10 +0100484 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixAReductionKernel::validate(a, &info_vector_sum_row, reduction_info));
Georgios Pinitas358ca202017-12-07 16:47:52 +0000485 }
486
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000487 GEMMKernelInfo gemm_kernel_info;
488 gemm_kernel_info.m = m;
489 gemm_kernel_info.n = n;
490 gemm_kernel_info.k = k;
491 gemm_kernel_info.depth_output_gemm3d = depth_output_gemm3d;
492 gemm_kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;
493 gemm_kernel_info.lhs_info = lhs_info;
494 gemm_kernel_info.rhs_info = rhs_info;
495 gemm_kernel_info.a_offset = a_offset;
496 gemm_kernel_info.b_offset = b_offset;
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100497 if(gemm_info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE)
498 {
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000499 const size_t num_filters = (gemm_info.gemmlowp_output_stage().is_quantized_per_channel) ? gemm_info.gemmlowp_output_stage().gemmlowp_multipliers.size() : 1;
500
501 const TensorInfo gemm_output_stage_multipliers_shifts_info(TensorInfo(TensorShape(num_filters), 1, DataType::S32));
502
Manuel Bottini959c26d2019-12-02 16:22:35 +0000503 GEMMLowpOutputStageInfo gemmlowp_output_stage = gemm_info.gemmlowp_output_stage();
504 gemmlowp_output_stage.output_data_type = a->data_type();
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000505
506 gemm_kernel_info.output_stage = gemmlowp_output_stage;
507 if(reshape_matrix_b && gemm_info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT)
508 {
509 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::validate(matrix_a_info, matrix_b_info, output, gemm_kernel_info,
510 a_offset == 0 ? nullptr : &info_vector_sum_col,
511 b_offset == 0 ? nullptr : &info_vector_sum_row,
512 c,
513 &gemm_output_stage_multipliers_shifts_info,
514 &gemm_output_stage_multipliers_shifts_info));
515 }
516 else
517 {
518 TensorInfo mm_result_s32_info{};
519
520 if(reshape_matrix_b)
521 {
522 // Output tensor auto inizialitation if not yet initialized
523 auto_init_if_empty(mm_result_s32_info, a->clone()->set_tensor_shape(compute_mm_shape(*matrix_a_info, *matrix_b_info, reshape_info)).set_data_type(DataType::S32));
524
525 // Validate matrix multiply
526 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::validate(matrix_a_info, matrix_b_info, &mm_result_s32_info, gemm_kernel_info));
527 }
528 else
529 {
530 // Output tensor auto inizialitation if not yet initialized
531 auto_init_if_empty(mm_result_s32_info, a->clone()->set_tensor_shape(compute_mm_shape(*matrix_a_info, *matrix_b_info, false, reshape_info)).set_data_type(DataType::S32));
532
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100533 // Pick up the GEMM configuration
SiCong Lidb353452021-02-08 15:16:13 +0000534 // NOTE: No need to validate mlgo configurations as they automatically fall back to default heuristics if validation fails
535 // It doesn't matter whether Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED, since it only affect the shape configuration
536 const auto res = select_default_gemm_config_native(auto_heuristics::CommonQuery{ gpu_target, DataType::QASYMM8, m, n, k, batch_size });
537 lhs_info = res.lhs_info;
538 rhs_info = res.rhs_info;
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000539
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100540 // Validate matrix multiply
541 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyNativeKernel::validate(matrix_a_info, matrix_b_info, &mm_result_s32_info, lhs_info, rhs_info, reshape_info));
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000542 }
543
544 // Validate offset contribution kernel
545 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpOffsetContributionOutputStageKernel::validate(&mm_result_s32_info,
546 a_offset == 0 ? nullptr : &info_vector_sum_col,
547 b_offset == 0 ? nullptr : &info_vector_sum_row,
548 c,
549 output,
550 a_offset, b_offset,
551 gemmlowp_output_stage,
552 &gemm_output_stage_multipliers_shifts_info,
553 &gemm_output_stage_multipliers_shifts_info));
554 }
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100555 }
556 else
557 {
Gian Marco Iodice2ec6c1e2019-04-09 12:03:05 +0100558 if(reshape_matrix_b)
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000559 {
560 // Validate matrix multiply
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000561 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::validate(matrix_a_info, matrix_b_info, output, gemm_kernel_info));
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000562 }
563 else
564 {
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100565 // Pick up the GEMM configuration
SiCong Lidb353452021-02-08 15:16:13 +0000566 // It doesn't matter whether Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED, since it only affect the shape configuration
567 const auto res = select_default_gemm_config_native(auto_heuristics::CommonQuery{ gpu_target, DataType::QASYMM8, m, n, k, batch_size });
568 lhs_info = res.lhs_info;
569 rhs_info = res.rhs_info;
Gian Marco Iodice06be6f82019-06-24 17:47:51 +0100570
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100571 // Validate matrix multiply
572 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyNativeKernel::validate(matrix_a_info, matrix_b_info, output, lhs_info, rhs_info, reshape_info));
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000573 }
Gian Marco Iodice06be6f82019-06-24 17:47:51 +0100574
giuros012f7c1492019-03-18 12:30:02 +0000575 if(output->total_size() != 0)
576 {
577 // Validate offset contribution kernel
578 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpOffsetContributionKernel::validate(output,
579 a_offset == 0 ? nullptr : &info_vector_sum_col,
580 b_offset == 0 ? nullptr : &info_vector_sum_row,
581 c,
582 a_offset, b_offset));
583 }
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100584 }
Georgios Pinitas358ca202017-12-07 16:47:52 +0000585
586 return Status{};
587}
588
Gian Marco05288a22017-11-21 10:57:50 +0000589void CLGEMMLowpMatrixMultiplyCore::run()
590{
Georgios Pinitas72219332018-06-05 14:56:06 +0100591 prepare();
592
Georgios Pinitasda953f22019-04-02 17:27:03 +0100593 MemoryGroupResourceScope scope_mg(_memory_group);
Gian Marco05288a22017-11-21 10:57:50 +0000594
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000595 if(_is_gemm_reshaped)
Gian Marco05288a22017-11-21 10:57:50 +0000596 {
Georgios Pinitas72219332018-06-05 14:56:06 +0100597 if(!_reshape_b_only_on_first_run)
Chunosov5124be52017-11-22 20:42:13 +0700598 {
599 // Run reshape matrix B
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100600 CLScheduler::get().enqueue(*_mtx_b_reshape_kernel, false);
Chunosov5124be52017-11-22 20:42:13 +0700601 }
602 }
603
Georgios Pinitas72219332018-06-05 14:56:06 +0100604 // Run matrix B reduction kernel only if _a_offset is not equal to 0
605 if(_a_offset != 0 && !_reshape_b_only_on_first_run)
Chunosov5124be52017-11-22 20:42:13 +0700606 {
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100607 CLScheduler::get().enqueue(*_mtx_b_reduction_kernel, false);
Gian Marco05288a22017-11-21 10:57:50 +0000608 }
609
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000610 // Run matrix A reduction kernel only if _b_offset is not equal to 0
611 if(_b_offset != 0)
612 {
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100613 CLScheduler::get().enqueue(*_mtx_a_reduction_kernel, false);
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000614 }
615
Gian Marco05288a22017-11-21 10:57:50 +0000616 // Run matrix multiply
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000617 if(_is_gemm_reshaped)
618 {
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100619 CLScheduler::get().enqueue(*_mm_reshaped_only_rhs_kernel, false);
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000620 }
621 else
622 {
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100623 CLScheduler::get().enqueue(*_mm_native_kernel, false);
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000624 }
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000625 if(_run_output_stage)
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100626 {
627 // Run offset contribution/output stage kernel
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100628 CLScheduler::get().enqueue(*_offset_contribution_output_stage_kernel, true);
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100629 }
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000630 if(_run_offset_contribution)
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100631 {
632 // Run offset contribution kernel
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100633 CLScheduler::get().enqueue(*_offset_contribution_kernel, true);
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100634 }
Georgios Pinitas72219332018-06-05 14:56:06 +0100635}
Chunosov5124be52017-11-22 20:42:13 +0700636
Georgios Pinitas72219332018-06-05 14:56:06 +0100637void CLGEMMLowpMatrixMultiplyCore::prepare()
638{
639 if(!_is_prepared)
640 {
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000641 if(_convert_to_qasymm8)
642 {
643 _qasymm8_weights.allocator()->allocate();
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100644 CLScheduler::get().enqueue(*_weights_to_qasymm8, false);
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000645 }
646
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000647 if(_is_gemm_reshaped && _reshape_b_only_on_first_run)
Georgios Pinitas72219332018-06-05 14:56:06 +0100648 {
649 ARM_COMPUTE_ERROR_ON(!_original_b->is_used());
650
651 // Run reshape kernel and mark original weights tensor as unused
652 _tmp_b.allocator()->allocate();
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100653 CLScheduler::get().enqueue(*_mtx_b_reshape_kernel, false);
Georgios Pinitas72219332018-06-05 14:56:06 +0100654 _original_b->mark_as_unused();
655 }
656
657 // Run matrix B reduction kernel only if _a_offset is not equal to 0
658 if(_a_offset != 0 && _reshape_b_only_on_first_run)
659 {
660 _vector_sum_col.allocator()->allocate();
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100661 CLScheduler::get().enqueue(*_mtx_b_reduction_kernel, false);
Georgios Pinitas72219332018-06-05 14:56:06 +0100662 }
663
664 CLScheduler::get().queue().finish();
665 _is_prepared = true;
666 }
Gian Marco05288a22017-11-21 10:57:50 +0000667}
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000668} // namespace arm_compute