blob: b189955c04d11561592d0538179b1da0cff6f9db [file] [log] [blame]
Gian Marco05288a22017-11-21 10:57:50 +00001/*
SiCong Libbd8fac2021-02-04 13:12:19 +00002 * Copyright (c) 2017-2021 Arm Limited.
Gian Marco05288a22017-11-21 10:57:50 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
25
26#include "arm_compute/core/CL/ICLTensor.h"
27#include "arm_compute/core/Error.h"
28#include "arm_compute/core/Helpers.h"
Michele Di Giorgiof64d3362020-04-03 12:40:10 +010029#include "arm_compute/core/KernelDescriptors.h"
SiCong Libbd8fac2021-02-04 13:12:19 +000030#include "arm_compute/core/Log.h"
Gian Marco05288a22017-11-21 10:57:50 +000031#include "arm_compute/core/TensorInfo.h"
32#include "arm_compute/core/Types.h"
33#include "arm_compute/core/Validate.h"
Georgios Pinitas358ca202017-12-07 16:47:52 +000034#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +000035#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
Gian Marco05288a22017-11-21 10:57:50 +000036#include "arm_compute/runtime/CL/CLScheduler.h"
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +010037#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
38#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
39#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
40#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
41#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
42#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
43#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010044#include "src/core/helpers/AutoConfiguration.h"
SiCong Libbd8fac2021-02-04 13:12:19 +000045#include "src/runtime/CL/gemm_auto_heuristics/CLGEMMAutoHeuristics.h"
46#include "utils/TypePrinter.h"
Gian Marco05288a22017-11-21 10:57:50 +000047
giuros011c9efeb2019-01-11 14:04:43 +000048namespace arm_compute
49{
Georgios Pinitas358ca202017-12-07 16:47:52 +000050using namespace arm_compute::misc::shape_calculator;
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +000051using namespace arm_compute::cl_gemm;
Gian Marco05288a22017-11-21 10:57:50 +000052
Gian Marco19835e52018-01-30 13:35:54 +000053namespace
54{
SiCong Li1a28e732021-02-10 16:57:33 +000055inline bool validate_gemm_kernel(CLGEMMKernelType kernel_type)
56{
57 switch(kernel_type)
58 {
59 case CLGEMMKernelType::NATIVE:
60 case CLGEMMKernelType::RESHAPED_ONLY_RHS:
61 {
62 return true;
63 }
64 default:
65 {
66 return false;
67 }
68 }
69}
70//Automatically select between mlgo (prioritized) and default heuristics for gemm kernel type
71inline CLGEMMKernelType auto_select_gemm_kernel(auto_heuristics::CommonQuery query, bool reshape_b_only_on_first_run)
72{
73 auto gemm_kernel = auto_heuristics::select_mlgo_gemm_kernel(query, reshape_b_only_on_first_run);
74 if(bool(gemm_kernel))
75 {
76 if(validate_gemm_kernel(gemm_kernel.gemm_type))
77 {
78 ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use gemm kernel from mlgo heuristics: %s.", to_string(gemm_kernel.gemm_type).c_str());
79 return gemm_kernel.gemm_type;
80 }
81 }
82 gemm_kernel = auto_heuristics::select_default_gemm_kernel(query, reshape_b_only_on_first_run);
83 ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use gemm kernel from default heuristics: %s.", to_string(gemm_kernel.gemm_type).c_str());
84 return gemm_kernel.gemm_type;
85}
SiCong Lidb353452021-02-08 15:16:13 +000086// Validate lhs_info and rhs_info for native kernel
87inline bool validate_lhs_rhs_info_native(const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const ITensorInfo *a, const ITensorInfo *b, const GEMMReshapeInfo &reshape_info)
88{
89 // Validate GEMMLHSMatrixInfo and GEMMRHSMatrixInfo for reshaped only rhs kernel
90 TensorInfo mm_result_s32_info{};
91 // Output tensor auto initialization if not yet initialized
92 auto_init_if_empty(mm_result_s32_info, a->clone()->set_tensor_shape(compute_mm_shape(*a, *b, false, reshape_info)).set_data_type(DataType::S32));
93 // Validate mm kernel
94 // NOTE: Ignore all other parameters (eg. output stage etc.) and only validate lhs and rhs info
95 // NOTE: This assumes:
96 // 1. lhs and rhs info's validity does not depend on these other parameters and vice versa(in CLGEMMLowpMatrixMultiplyNativeKernel.cpp validate_arguments).
97 // 2. lhs and rhs info does not cause window and padding issues through side effects (in CLGEMMLowpMatrixMultiplyNativeKernel.cpp validate_and_configure_window).
98 if(!bool(CLGEMMLowpMatrixMultiplyNativeKernel::validate(a, b, &mm_result_s32_info, lhs_info, rhs_info, reshape_info)))
99 {
100 return false;
101 }
102 return true;
103}
104
105// Automatically select between mlgo (prioritized) and default heuristics for native kernel configs
106std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> auto_select_gemm_config_native(auto_heuristics::CommonQuery query, const ITensorInfo *a, const ITensorInfo *b, const GEMMReshapeInfo &reshape_info)
107{
108 auto config = auto_heuristics::select_mlgo_gemm_config_native(query);
109 if(config)
110 {
111 if(validate_lhs_rhs_info_native(config.lhs_info, config.rhs_info, a, b, reshape_info))
112 {
113 ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use native config from mlgo heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
114 return { config.lhs_info, config.rhs_info };
115 }
116 }
117 config = auto_heuristics::select_default_gemm_config_native(query);
118 ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use native config from default heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
119 return { config.lhs_info, config.rhs_info };
120}
121
122// Validate lhs_info and rhs_info for reshaped only rhs kernel
SiCong Libbd8fac2021-02-04 13:12:19 +0000123inline bool validate_lhs_rhs_info_reshaped_only_rhs(const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *output,
124 unsigned int m, unsigned int n, unsigned int k, bool reinterpret_input_as_3d, int depth_output_gemm3d)
Gian Marco19835e52018-01-30 13:35:54 +0000125{
SiCong Libbd8fac2021-02-04 13:12:19 +0000126 // Validate GEMMLHSMatrixInfo and GEMMRHSMatrixInfo for reshaped only rhs kernel
127 TensorInfo tmp_b_info{};
128 // Validate reshape RHS kernel
129 auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));
130 if(!bool(CLGEMMReshapeRHSMatrixKernel::validate(b, &tmp_b_info, rhs_info)))
131 {
132 return false;
133 }
134 // Validate mm kernel
135 // NOTE: Ignore all other parameters (eg. depth_output_gemm3d, output stage etc.) and only validate lhs and rhs info
136 // NOTE: This assumes:
137 // 1. lhs and rhs info's validity does not depend on these other parameters and vice versa(in CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp validate_arguments).
138 // 2. lhs and rhs info does not cause window and padding issues through side effects (in CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp validate_and_configure_window).
139 GEMMKernelInfo gemm_kernel_info;
140 gemm_kernel_info.m = m;
141 gemm_kernel_info.n = n;
142 gemm_kernel_info.k = k;
143 gemm_kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;
144 gemm_kernel_info.depth_output_gemm3d = depth_output_gemm3d;
145 gemm_kernel_info.lhs_info = lhs_info;
146 gemm_kernel_info.rhs_info = rhs_info;
147 // Since we ignore the output stage, output data type has to be S32 to pass the validation
148 TensorInfo output_info_copy(*output);
149 output_info_copy.set_data_type(DataType::S32);
150 if(!bool(CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::validate(a, &tmp_b_info, &output_info_copy, gemm_kernel_info)))
151 {
152 return false;
153 }
154 return true;
155}
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100156
SiCong Lidb353452021-02-08 15:16:13 +0000157// Automatically select between mlgo (prioritized) and default heuristics for reshaped only rhs kernel configs
SiCong Libbd8fac2021-02-04 13:12:19 +0000158std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> auto_select_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery query, bool reinterpret_input_as_3d, int depth_output_gemm3d,
159 const ITensorInfo *a,
160 const ITensorInfo *b, const ITensorInfo *output)
161{
162 auto config = auto_heuristics::select_mlgo_gemm_config_reshaped_only_rhs(query);
163 if(config)
164 {
165 if(validate_lhs_rhs_info_reshaped_only_rhs(config.lhs_info, config.rhs_info, a, b, output, query.m, query.n, query.k, reinterpret_input_as_3d, depth_output_gemm3d))
166 {
167 ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped_only_rhs config from mlgo heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
168 return { config.lhs_info, config.rhs_info };
169 }
170 }
SiCong Lidb353452021-02-08 15:16:13 +0000171 config = auto_heuristics::select_default_gemm_config_reshaped_only_rhs(query);
SiCong Libbd8fac2021-02-04 13:12:19 +0000172 ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped_only_rhs config from default heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
173 return { config.lhs_info, config.rhs_info };
174}
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100175
SiCong Libbd8fac2021-02-04 13:12:19 +0000176inline bool is_gemm_reshaped(CLGEMMKernelType kernel_type)
177{
178 switch(kernel_type)
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100179 {
180 case CLGEMMKernelType::NATIVE:
181 return false;
182 case CLGEMMKernelType::RESHAPED_ONLY_RHS:
183 return true;
184 default:
185 ARM_COMPUTE_ERROR("Not supported gemmlowp kernel!");
186 }
Gian Marco19835e52018-01-30 13:35:54 +0000187}
188} // namespace
189
Gian Marco05288a22017-11-21 10:57:50 +0000190CLGEMMLowpMatrixMultiplyCore::CLGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager)
Georgios Pinitas72219332018-06-05 14:56:06 +0100191 : _memory_group(std::move(memory_manager)),
Georgios Pinitas40f51a62020-11-21 03:04:18 +0000192 _weights_to_qasymm8(std::make_unique<CLDepthConvertLayerKernel>()),
193 _mm_native_kernel(std::make_unique<CLGEMMLowpMatrixMultiplyNativeKernel>()),
194 _mm_reshaped_only_rhs_kernel(std::make_unique<CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel>()),
195 _mtx_b_reshape_kernel(std::make_unique<CLGEMMReshapeRHSMatrixKernel>()),
196 _mtx_a_reduction_kernel(std::make_unique<CLGEMMLowpMatrixAReductionKernel>()),
197 _mtx_b_reduction_kernel(std::make_unique<CLGEMMLowpMatrixBReductionKernel>()),
198 _offset_contribution_kernel(std::make_unique<CLGEMMLowpOffsetContributionKernel>()),
199 _offset_contribution_output_stage_kernel(std::make_unique<CLGEMMLowpOffsetContributionOutputStageKernel>()),
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000200 _qasymm8_weights(),
Georgios Pinitas72219332018-06-05 14:56:06 +0100201 _vector_sum_col(),
202 _vector_sum_row(),
Georgios Pinitas72219332018-06-05 14:56:06 +0100203 _tmp_b(),
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100204 _mm_result_s32(),
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000205 _gemm_output_stage_multipliers(),
206 _gemm_output_stage_shifts(),
207 _matrix_a(nullptr),
Georgios Pinitas72219332018-06-05 14:56:06 +0100208 _original_b(nullptr),
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000209 _output(nullptr),
Georgios Pinitas72219332018-06-05 14:56:06 +0100210 _a_offset(0),
211 _b_offset(0),
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000212 _is_gemm_reshaped(true),
Georgios Pinitas72219332018-06-05 14:56:06 +0100213 _reshape_b_only_on_first_run(false),
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100214 _is_prepared(false),
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000215 _run_output_stage(false),
216 _convert_to_qasymm8(false),
217 _run_offset_contribution(false)
Gian Marco05288a22017-11-21 10:57:50 +0000218{
219}
220
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100221CLGEMMLowpMatrixMultiplyCore::~CLGEMMLowpMatrixMultiplyCore() = default;
222
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100223void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info)
Gian Marco05288a22017-11-21 10:57:50 +0000224{
Manuel Bottini2b84be52020-04-08 10:15:51 +0100225 configure(CLKernelLibrary::get().get_compile_context(), a, b, c, output, gemm_info);
226}
227
228void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info)
229{
Georgios Pinitas358ca202017-12-07 16:47:52 +0000230 ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100231 ARM_COMPUTE_ERROR_THROW_ON(CLGEMMLowpMatrixMultiplyCore::validate(a->info(), b->info(), c != nullptr ? c->info() : nullptr, output->info(), gemm_info));
Gian Marco05288a22017-11-21 10:57:50 +0000232
Georgios Pinitas72219332018-06-05 14:56:06 +0100233 _is_prepared = false;
234 _original_b = b;
Chunosov5124be52017-11-22 20:42:13 +0700235 _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100236 _a_offset = a->info()->quantization_info().uniform().offset;
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000237 _matrix_a = a;
238 _output = output;
239
240 _convert_to_qasymm8 = is_data_type_quantized_per_channel(b->info()->data_type()) && is_data_type_quantized_symmetric(b->info()->data_type())
morgolockd13931d2020-06-23 15:49:35 +0100241 && a->info()->data_type() == DataType::QASYMM8;
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000242 _b_offset = _convert_to_qasymm8 ? -128 : b->info()->quantization_info().uniform().offset;
Gian Marco05288a22017-11-21 10:57:50 +0000243
Gian Marco19835e52018-01-30 13:35:54 +0000244 // Get the GPU target
245 const GPUTarget gpu_target = CLScheduler::get().target();
Gian Marco7b4d5472018-01-10 15:56:30 +0000246
Gian Marco19835e52018-01-30 13:35:54 +0000247 // Set the target for the kernels
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100248 _mm_native_kernel->set_target(gpu_target);
249 _mm_reshaped_only_rhs_kernel->set_target(gpu_target);
Gian Marco05288a22017-11-21 10:57:50 +0000250
giuros018b6b4a92018-12-18 19:01:33 +0000251 GEMMRHSMatrixInfo rhs_info;
giuros011c9efeb2019-01-11 14:04:43 +0000252 GEMMLHSMatrixInfo lhs_info;
Gian Marco05288a22017-11-21 10:57:50 +0000253
Gian Marco19835e52018-01-30 13:35:54 +0000254 // Arguments used by GEMMReshapeInfo
255 // If we pass the matrix A and matrix B reshaped to CLGEMMMatrixMultiplyKernel, we need to pass m, n, k, mult_transpose1xW_width and mult_interleave4x4_height to CLGEMMReshapeInfo
256 // in order to know how the matrices have been reshaped
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000257 bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
258 const unsigned int m = reinterpret_input_as_3d ? (a->info()->dimension(1) * a->info()->dimension(2)) : a->info()->dimension(1);
259 const unsigned int n = b->info()->dimension(0);
260 const unsigned int k = a->info()->dimension(0);
261 const unsigned int batch_size = reinterpret_input_as_3d ? a->info()->dimension(3) : a->info()->dimension(2);
262 const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
Gian Marco19835e52018-01-30 13:35:54 +0000263
SiCong Lidb353452021-02-08 15:16:13 +0000264 const auto reshape_info = GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d);
265
Gian Marco19835e52018-01-30 13:35:54 +0000266 // Check if we need to reshape the matrix A and matrix B
SiCong Libbd8fac2021-02-04 13:12:19 +0000267 _is_gemm_reshaped = is_gemm_reshaped(auto_select_gemm_kernel(auto_heuristics::CommonQuery{ gpu_target, a->info()->data_type(), m, n, k, batch_size }, _reshape_b_only_on_first_run));
Gian Marco19835e52018-01-30 13:35:54 +0000268
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000269 if(_convert_to_qasymm8)
270 {
271 // Set data type for converted weights
272 TensorInfo weights_info(*b->info());
273 weights_info.set_data_type(DataType::QASYMM8);
274 _qasymm8_weights.allocator()->init(weights_info);
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100275 _weights_to_qasymm8->configure(compile_context, b, &_qasymm8_weights, ConvertPolicy::WRAP, 0);
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000276 }
277
278 const ICLTensor *matrix_b = _convert_to_qasymm8 ? &_qasymm8_weights : b;
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000279 if(_is_gemm_reshaped)
Georgios Pinitasebf6b8a2018-09-24 16:31:08 +0100280 {
Gian Marco05288a22017-11-21 10:57:50 +0000281 matrix_b = &_tmp_b;
282
Giorgio Arenabb54e4e2018-04-05 17:20:34 +0100283 if(!_reshape_b_only_on_first_run)
284 {
285 _memory_group.manage(&_tmp_b);
286 }
Gian Marco05288a22017-11-21 10:57:50 +0000287
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000288 // Pick up the GEMM configuration
SiCong Libbd8fac2021-02-04 13:12:19 +0000289 // It doesn't matter whether Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED, since it only affect the shape configuration
290 std::tie(lhs_info, rhs_info) = auto_select_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, DataType::QASYMM8, m, n, k, batch_size }, reinterpret_input_as_3d,
291 depth_output_gemm3d,
292 a->info(), _convert_to_qasymm8 ? _qasymm8_weights.info() : b->info(), output->info());
Gian Marco05288a22017-11-21 10:57:50 +0000293
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000294 // Configure reshape RHS kernel
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100295 _mtx_b_reshape_kernel->configure(compile_context, _convert_to_qasymm8 ? &_qasymm8_weights : b, &_tmp_b, rhs_info);
Gian Marco05288a22017-11-21 10:57:50 +0000296 }
Gian Marco05288a22017-11-21 10:57:50 +0000297
Michele Di Giorgiof64d3362020-04-03 12:40:10 +0100298 // Using default reduction info
299 const GEMMLowpReductionKernelInfo reduction_info {};
300
Gian Marco05288a22017-11-21 10:57:50 +0000301 // Initialize matrix B reduction kernel only if _a_offset is not equal to 0
302 if(_a_offset != 0)
303 {
Georgios Pinitas358ca202017-12-07 16:47:52 +0000304 TensorInfo info_vector_sum_col(compute_reductionA_shape(*b->info()), 1, DataType::S32);
Gian Marco05288a22017-11-21 10:57:50 +0000305 _vector_sum_col.allocator()->init(info_vector_sum_col);
Giorgio Arenabb54e4e2018-04-05 17:20:34 +0100306 if(!_reshape_b_only_on_first_run)
307 {
308 _memory_group.manage(&_vector_sum_col);
309 }
Gian Marco05288a22017-11-21 10:57:50 +0000310
311 // Configure Matrix B reduction kernel
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100312 _mtx_b_reduction_kernel->configure(compile_context, _convert_to_qasymm8 ? &_qasymm8_weights : b, &_vector_sum_col, reduction_info);
Gian Marco05288a22017-11-21 10:57:50 +0000313 }
314
315 // Initialize Matrix A reduction kernel only if _b_offset is not equal to 0
316 if(_b_offset != 0)
317 {
Georgios Pinitas358ca202017-12-07 16:47:52 +0000318 TensorInfo info_vector_sum_row(compute_reductionB_shape(*a->info()), 1, DataType::S32);
Gian Marco05288a22017-11-21 10:57:50 +0000319 _vector_sum_row.allocator()->init(info_vector_sum_row);
320 _memory_group.manage(&_vector_sum_row);
321
322 // Configure matrix A reduction kernel
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100323 _mtx_a_reduction_kernel->configure(compile_context, a, &_vector_sum_row, reduction_info);
Gian Marco05288a22017-11-21 10:57:50 +0000324 }
325
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000326 GEMMKernelInfo gemm_kernel_info;
327 gemm_kernel_info.m = m;
328 gemm_kernel_info.n = n;
329 gemm_kernel_info.k = k;
330 gemm_kernel_info.depth_output_gemm3d = depth_output_gemm3d;
331 gemm_kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;
332 gemm_kernel_info.lhs_info = lhs_info;
333 gemm_kernel_info.rhs_info = rhs_info;
334 gemm_kernel_info.a_offset = _a_offset;
335 gemm_kernel_info.b_offset = _b_offset;
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100336 // If GEMMLowpOutputStage != NONE, fuse the offset contribution with the output stage
337 if(gemm_info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE)
338 {
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100339 // Configure offset contribution kernel
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000340 const size_t num_filters = (gemm_info.gemmlowp_output_stage().is_quantized_per_channel) ? gemm_info.gemmlowp_output_stage().gemmlowp_multipliers.size() : 1;
341
342 _gemm_output_stage_multipliers.allocator()->init(TensorInfo(TensorShape(num_filters), 1, DataType::S32));
343 _gemm_output_stage_shifts.allocator()->init(TensorInfo(TensorShape(num_filters), 1, DataType::S32));
344
Manuel Bottini959c26d2019-12-02 16:22:35 +0000345 GEMMLowpOutputStageInfo gemmlowp_output_stage = gemm_info.gemmlowp_output_stage();
346 gemmlowp_output_stage.output_data_type = _matrix_a->info()->data_type();
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000347
348 gemm_kernel_info.output_stage = gemmlowp_output_stage;
349
350 if(_is_gemm_reshaped && gemmlowp_output_stage.type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT)
351 {
352 // Configure and tune matrix multiply kernel with fused output stage
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100353 _mm_reshaped_only_rhs_kernel->configure(compile_context, _matrix_a, matrix_b, output, gemm_kernel_info, _a_offset == 0 ? nullptr : &_vector_sum_col,
354 _b_offset == 0 ? nullptr : &_vector_sum_row, c, &_gemm_output_stage_multipliers, &_gemm_output_stage_shifts);
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000355 }
356 else
357 {
358 _run_output_stage = true;
359
360 _memory_group.manage(&_mm_result_s32);
361
362 if(_is_gemm_reshaped)
363 {
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100364 _mm_reshaped_only_rhs_kernel->configure(compile_context, _matrix_a, matrix_b, &_mm_result_s32, gemm_kernel_info);
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000365 }
366 else
367 {
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100368 // Pick up the GEMM configuration
SiCong Lidb353452021-02-08 15:16:13 +0000369 // It doesn't matter whether Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED, since it only affect the shape configuration
370 std::tie(lhs_info, rhs_info) = auto_select_gemm_config_native(auto_heuristics::CommonQuery{ gpu_target, DataType::QASYMM8, m, n, k, batch_size },
371 _matrix_a->info(), _convert_to_qasymm8 ? _qasymm8_weights.info() : matrix_b->info(), reshape_info);
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000372
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100373 // Configure matrix multiply kernel
SiCong Lidb353452021-02-08 15:16:13 +0000374 _mm_native_kernel->configure(compile_context, _matrix_a, matrix_b, &_mm_result_s32, lhs_info, rhs_info, reshape_info);
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100375
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100376 _offset_contribution_output_stage_kernel->configure(compile_context, &_mm_result_s32, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, c, output,
377 a->info()->dimension(0),
378 _a_offset, _b_offset, gemmlowp_output_stage, &_gemm_output_stage_multipliers, &_gemm_output_stage_shifts);
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000379 _mm_result_s32.allocator()->allocate();
380 }
381 }
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000382
383 _gemm_output_stage_multipliers.allocator()->allocate();
384 _gemm_output_stage_shifts.allocator()->allocate();
385 // Compute GEMM output multipliers and shifts for output stage
386 _gemm_output_stage_multipliers.map();
387 _gemm_output_stage_shifts.map();
388 std::memcpy(_gemm_output_stage_multipliers.ptr_to_element(Coordinates(0)), gemm_info.gemmlowp_output_stage().gemmlowp_multipliers.data(), num_filters * sizeof(int32_t));
389 std::memcpy(_gemm_output_stage_shifts.ptr_to_element(Coordinates(0)), gemm_info.gemmlowp_output_stage().gemmlowp_shifts.data(), num_filters * sizeof(int32_t));
390 _gemm_output_stage_multipliers.unmap();
391 _gemm_output_stage_shifts.unmap();
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100392 }
393 else
394 {
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000395 _run_offset_contribution = true;
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000396 if(_is_gemm_reshaped)
397 {
398 // Configure and tune matrix multiply kernel
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100399 _mm_reshaped_only_rhs_kernel->configure(compile_context, _matrix_a, matrix_b, output, gemm_kernel_info);
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000400 }
401 else
402 {
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100403 // Pick up the GEMM configuration
SiCong Lidb353452021-02-08 15:16:13 +0000404 // It doesn't matter whether Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED, since it only affect the shape configuration
405 std::tie(lhs_info, rhs_info) = auto_select_gemm_config_native(auto_heuristics::CommonQuery{ gpu_target, DataType::QASYMM8, m, n, k, batch_size },
406 a->info(), _convert_to_qasymm8 ? _qasymm8_weights.info() : b->info(), reshape_info);
Gian Marco Iodice06be6f82019-06-24 17:47:51 +0100407
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100408 // Configure matrix multiply kernel
SiCong Lidb353452021-02-08 15:16:13 +0000409 _mm_native_kernel->configure(compile_context, _matrix_a, matrix_b, output, lhs_info, rhs_info, reshape_info);
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000410 }
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100411
412 // Configure offset contribution kernel
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100413 _offset_contribution_kernel->configure(compile_context, output, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, c, a->info()->dimension(0), _a_offset,
414 _b_offset);
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100415 }
Gian Marco05288a22017-11-21 10:57:50 +0000416
417 // Allocate tensors
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000418 if(_is_gemm_reshaped)
Gian Marco05288a22017-11-21 10:57:50 +0000419 {
Georgios Pinitas72219332018-06-05 14:56:06 +0100420 if(!_reshape_b_only_on_first_run)
421 {
422 _tmp_b.allocator()->allocate();
423 }
Gian Marco05288a22017-11-21 10:57:50 +0000424 }
425
Georgios Pinitas72219332018-06-05 14:56:06 +0100426 if(_a_offset != 0 && !_reshape_b_only_on_first_run)
Gian Marco05288a22017-11-21 10:57:50 +0000427 {
428 _vector_sum_col.allocator()->allocate();
429 }
430
431 if(_b_offset != 0)
432 {
433 _vector_sum_row.allocator()->allocate();
434 }
435}
436
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100437Status CLGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info)
Georgios Pinitas358ca202017-12-07 16:47:52 +0000438{
Manuel Bottini959c26d2019-12-02 16:22:35 +0000439 ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);
440 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100441 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(b, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8, DataType::QSYMM8_PER_CHANNEL);
442 ARM_COMPUTE_RETURN_ERROR_ON(a->data_type() == DataType::QASYMM8 && b->data_type() == DataType::QASYMM8_SIGNED);
443 ARM_COMPUTE_RETURN_ERROR_ON(a->data_type() == DataType::QASYMM8_SIGNED && b->data_type() == DataType::QASYMM8);
Georgios Pinitas358ca202017-12-07 16:47:52 +0000444 ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(), "Matrix A already reshaped is not supported");
445 ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(), "Matrix B already reshaped is not supported");
446
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100447 int32_t a_offset = a->quantization_info().uniform().offset;
448 int32_t b_offset = b->quantization_info().uniform().offset;
Georgios Pinitas358ca202017-12-07 16:47:52 +0000449
Isabella Gottardic4f582e2018-10-11 19:14:55 +0100450 const ITensorInfo *matrix_a_info = a;
Isabella Gottardic4f582e2018-10-11 19:14:55 +0100451
giuros018b6b4a92018-12-18 19:01:33 +0000452 TensorInfo tmp_b_info{};
453 GEMMRHSMatrixInfo rhs_info;
giuros011c9efeb2019-01-11 14:04:43 +0000454 GEMMLHSMatrixInfo lhs_info;
Isabella Gottardic4f582e2018-10-11 19:14:55 +0100455
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000456 // Get the GPU target
457 const GPUTarget gpu_target = CLScheduler::get().target();
458
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000459 bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
460 const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
461 const unsigned int n = b->dimension(0);
462 const unsigned int k = a->dimension(0);
463 const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
464 const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
Gian Marco19835e52018-01-30 13:35:54 +0000465
SiCong Libbd8fac2021-02-04 13:12:19 +0000466 bool reshape_matrix_b = is_gemm_reshaped(auto_select_gemm_kernel(auto_heuristics::CommonQuery{ gpu_target, a->data_type(), m, n, k, batch_size }, gemm_info.reshape_b_only_on_first_run()));
Georgios Pinitasebf6b8a2018-09-24 16:31:08 +0100467
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000468 const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d);
Georgios Pinitasebf6b8a2018-09-24 16:31:08 +0100469
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000470 bool convert_to_qasymm8 = is_data_type_quantized_per_channel(b->data_type()) && is_data_type_quantized_symmetric(b->data_type())
471 && is_data_type_quantized_asymmetric(a->data_type());
472 TensorInfo weights_info(*b);
473 if(convert_to_qasymm8)
474 {
475 b_offset = -128;
476 weights_info.set_data_type(DataType::QASYMM8);
477 ARM_COMPUTE_RETURN_ON_ERROR(CLDepthConvertLayerKernel::validate(b, &weights_info, ConvertPolicy::WRAP, 0));
478 }
479 const ITensorInfo *matrix_b_info = &weights_info;
Gian Marco Iodice2ec6c1e2019-04-09 12:03:05 +0100480 if(reshape_matrix_b)
Georgios Pinitas358ca202017-12-07 16:47:52 +0000481 {
Isabella Gottardic4f582e2018-10-11 19:14:55 +0100482 matrix_b_info = &tmp_b_info;
Georgios Pinitas358ca202017-12-07 16:47:52 +0000483
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000484 // Pick up the GEMM configuration
SiCong Libbd8fac2021-02-04 13:12:19 +0000485 // NOTE: No need to validate mlgo configurations as they automatically fall back to default heuristics if validation fails
486 // It doesn't matter whether Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED, since it only affect the shape configuration
487 const auto res = select_default_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, DataType::QASYMM8, m, n, k, batch_size });
488 lhs_info = res.lhs_info;
489 rhs_info = res.rhs_info;
Isabella Gottardic4f582e2018-10-11 19:14:55 +0100490
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000491 // Validate reshape RHS kernel
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000492 auto_init_if_empty(tmp_b_info, weights_info.clone()->set_tensor_shape(compute_rhs_reshaped_shape(weights_info, rhs_info)));
493 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMReshapeRHSMatrixKernel::validate(&weights_info, &tmp_b_info, rhs_info));
Georgios Pinitas358ca202017-12-07 16:47:52 +0000494 }
Isabella Gottardic4f582e2018-10-11 19:14:55 +0100495
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100496 TensorInfo info_vector_sum_col{};
497 TensorInfo info_vector_sum_row{};
Georgios Pinitas358ca202017-12-07 16:47:52 +0000498
Michele Di Giorgiof64d3362020-04-03 12:40:10 +0100499 const GEMMLowpReductionKernelInfo reduction_info;
Georgios Pinitas358ca202017-12-07 16:47:52 +0000500 // Validate matrix B reduction kernel only if _a_offset is not equal to 0
501 if(a_offset != 0)
502 {
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000503 info_vector_sum_col = TensorInfo(compute_reductionA_shape(weights_info), 1, DataType::S32);
Georgios Pinitas358ca202017-12-07 16:47:52 +0000504
505 // Configure Matrix B reduction kernel
Michele Di Giorgiof64d3362020-04-03 12:40:10 +0100506 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixBReductionKernel::validate(&weights_info, &info_vector_sum_col, reduction_info));
Georgios Pinitas358ca202017-12-07 16:47:52 +0000507 }
508
509 // Validate Matrix A reduction kernel only if _b_offset is not equal to 0
510 if(b_offset != 0)
511 {
512 info_vector_sum_row = TensorInfo(compute_reductionB_shape(*a), 1, DataType::S32);
513
514 // Configure matrix A reduction kernel
Michele Di Giorgiof64d3362020-04-03 12:40:10 +0100515 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixAReductionKernel::validate(a, &info_vector_sum_row, reduction_info));
Georgios Pinitas358ca202017-12-07 16:47:52 +0000516 }
517
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000518 GEMMKernelInfo gemm_kernel_info;
519 gemm_kernel_info.m = m;
520 gemm_kernel_info.n = n;
521 gemm_kernel_info.k = k;
522 gemm_kernel_info.depth_output_gemm3d = depth_output_gemm3d;
523 gemm_kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;
524 gemm_kernel_info.lhs_info = lhs_info;
525 gemm_kernel_info.rhs_info = rhs_info;
526 gemm_kernel_info.a_offset = a_offset;
527 gemm_kernel_info.b_offset = b_offset;
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100528 if(gemm_info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE)
529 {
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000530 const size_t num_filters = (gemm_info.gemmlowp_output_stage().is_quantized_per_channel) ? gemm_info.gemmlowp_output_stage().gemmlowp_multipliers.size() : 1;
531
532 const TensorInfo gemm_output_stage_multipliers_shifts_info(TensorInfo(TensorShape(num_filters), 1, DataType::S32));
533
Manuel Bottini959c26d2019-12-02 16:22:35 +0000534 GEMMLowpOutputStageInfo gemmlowp_output_stage = gemm_info.gemmlowp_output_stage();
535 gemmlowp_output_stage.output_data_type = a->data_type();
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000536
537 gemm_kernel_info.output_stage = gemmlowp_output_stage;
538 if(reshape_matrix_b && gemm_info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT)
539 {
540 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::validate(matrix_a_info, matrix_b_info, output, gemm_kernel_info,
541 a_offset == 0 ? nullptr : &info_vector_sum_col,
542 b_offset == 0 ? nullptr : &info_vector_sum_row,
543 c,
544 &gemm_output_stage_multipliers_shifts_info,
545 &gemm_output_stage_multipliers_shifts_info));
546 }
547 else
548 {
549 TensorInfo mm_result_s32_info{};
550
551 if(reshape_matrix_b)
552 {
553 // Output tensor auto inizialitation if not yet initialized
554 auto_init_if_empty(mm_result_s32_info, a->clone()->set_tensor_shape(compute_mm_shape(*matrix_a_info, *matrix_b_info, reshape_info)).set_data_type(DataType::S32));
555
556 // Validate matrix multiply
557 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::validate(matrix_a_info, matrix_b_info, &mm_result_s32_info, gemm_kernel_info));
558 }
559 else
560 {
561 // Output tensor auto inizialitation if not yet initialized
562 auto_init_if_empty(mm_result_s32_info, a->clone()->set_tensor_shape(compute_mm_shape(*matrix_a_info, *matrix_b_info, false, reshape_info)).set_data_type(DataType::S32));
563
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100564 // Pick up the GEMM configuration
SiCong Lidb353452021-02-08 15:16:13 +0000565 // NOTE: No need to validate mlgo configurations as they automatically fall back to default heuristics if validation fails
566 // It doesn't matter whether Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED, since it only affect the shape configuration
567 const auto res = select_default_gemm_config_native(auto_heuristics::CommonQuery{ gpu_target, DataType::QASYMM8, m, n, k, batch_size });
568 lhs_info = res.lhs_info;
569 rhs_info = res.rhs_info;
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000570
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100571 // Validate matrix multiply
572 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyNativeKernel::validate(matrix_a_info, matrix_b_info, &mm_result_s32_info, lhs_info, rhs_info, reshape_info));
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000573 }
574
575 // Validate offset contribution kernel
576 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpOffsetContributionOutputStageKernel::validate(&mm_result_s32_info,
577 a_offset == 0 ? nullptr : &info_vector_sum_col,
578 b_offset == 0 ? nullptr : &info_vector_sum_row,
579 c,
580 output,
581 a_offset, b_offset,
582 gemmlowp_output_stage,
583 &gemm_output_stage_multipliers_shifts_info,
584 &gemm_output_stage_multipliers_shifts_info));
585 }
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100586 }
587 else
588 {
Gian Marco Iodice2ec6c1e2019-04-09 12:03:05 +0100589 if(reshape_matrix_b)
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000590 {
591 // Validate matrix multiply
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000592 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::validate(matrix_a_info, matrix_b_info, output, gemm_kernel_info));
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000593 }
594 else
595 {
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100596 // Pick up the GEMM configuration
SiCong Lidb353452021-02-08 15:16:13 +0000597 // It doesn't matter whether Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED, since it only affect the shape configuration
598 const auto res = select_default_gemm_config_native(auto_heuristics::CommonQuery{ gpu_target, DataType::QASYMM8, m, n, k, batch_size });
599 lhs_info = res.lhs_info;
600 rhs_info = res.rhs_info;
Gian Marco Iodice06be6f82019-06-24 17:47:51 +0100601
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100602 // Validate matrix multiply
603 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyNativeKernel::validate(matrix_a_info, matrix_b_info, output, lhs_info, rhs_info, reshape_info));
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000604 }
Gian Marco Iodice06be6f82019-06-24 17:47:51 +0100605
giuros012f7c1492019-03-18 12:30:02 +0000606 if(output->total_size() != 0)
607 {
608 // Validate offset contribution kernel
609 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpOffsetContributionKernel::validate(output,
610 a_offset == 0 ? nullptr : &info_vector_sum_col,
611 b_offset == 0 ? nullptr : &info_vector_sum_row,
612 c,
613 a_offset, b_offset));
614 }
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100615 }
Georgios Pinitas358ca202017-12-07 16:47:52 +0000616
617 return Status{};
618}
619
Gian Marco05288a22017-11-21 10:57:50 +0000620void CLGEMMLowpMatrixMultiplyCore::run()
621{
Georgios Pinitas72219332018-06-05 14:56:06 +0100622 prepare();
623
Georgios Pinitasda953f22019-04-02 17:27:03 +0100624 MemoryGroupResourceScope scope_mg(_memory_group);
Gian Marco05288a22017-11-21 10:57:50 +0000625
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000626 if(_is_gemm_reshaped)
Gian Marco05288a22017-11-21 10:57:50 +0000627 {
Georgios Pinitas72219332018-06-05 14:56:06 +0100628 if(!_reshape_b_only_on_first_run)
Chunosov5124be52017-11-22 20:42:13 +0700629 {
630 // Run reshape matrix B
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100631 CLScheduler::get().enqueue(*_mtx_b_reshape_kernel, false);
Chunosov5124be52017-11-22 20:42:13 +0700632 }
633 }
634
Georgios Pinitas72219332018-06-05 14:56:06 +0100635 // Run matrix B reduction kernel only if _a_offset is not equal to 0
636 if(_a_offset != 0 && !_reshape_b_only_on_first_run)
Chunosov5124be52017-11-22 20:42:13 +0700637 {
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100638 CLScheduler::get().enqueue(*_mtx_b_reduction_kernel, false);
Gian Marco05288a22017-11-21 10:57:50 +0000639 }
640
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000641 // Run matrix A reduction kernel only if _b_offset is not equal to 0
642 if(_b_offset != 0)
643 {
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100644 CLScheduler::get().enqueue(*_mtx_a_reduction_kernel, false);
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000645 }
646
Gian Marco05288a22017-11-21 10:57:50 +0000647 // Run matrix multiply
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000648 if(_is_gemm_reshaped)
649 {
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100650 CLScheduler::get().enqueue(*_mm_reshaped_only_rhs_kernel, false);
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000651 }
652 else
653 {
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100654 CLScheduler::get().enqueue(*_mm_native_kernel, false);
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000655 }
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000656 if(_run_output_stage)
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100657 {
658 // Run offset contribution/output stage kernel
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100659 CLScheduler::get().enqueue(*_offset_contribution_output_stage_kernel, true);
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100660 }
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000661 if(_run_offset_contribution)
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100662 {
663 // Run offset contribution kernel
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100664 CLScheduler::get().enqueue(*_offset_contribution_kernel, true);
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100665 }
Georgios Pinitas72219332018-06-05 14:56:06 +0100666}
Chunosov5124be52017-11-22 20:42:13 +0700667
Georgios Pinitas72219332018-06-05 14:56:06 +0100668void CLGEMMLowpMatrixMultiplyCore::prepare()
669{
670 if(!_is_prepared)
671 {
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000672 if(_convert_to_qasymm8)
673 {
674 _qasymm8_weights.allocator()->allocate();
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100675 CLScheduler::get().enqueue(*_weights_to_qasymm8, false);
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000676 }
677
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000678 if(_is_gemm_reshaped && _reshape_b_only_on_first_run)
Georgios Pinitas72219332018-06-05 14:56:06 +0100679 {
680 ARM_COMPUTE_ERROR_ON(!_original_b->is_used());
681
682 // Run reshape kernel and mark original weights tensor as unused
683 _tmp_b.allocator()->allocate();
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100684 CLScheduler::get().enqueue(*_mtx_b_reshape_kernel, false);
Georgios Pinitas72219332018-06-05 14:56:06 +0100685 _original_b->mark_as_unused();
686 }
687
688 // Run matrix B reduction kernel only if _a_offset is not equal to 0
689 if(_a_offset != 0 && _reshape_b_only_on_first_run)
690 {
691 _vector_sum_col.allocator()->allocate();
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100692 CLScheduler::get().enqueue(*_mtx_b_reduction_kernel, false);
Georgios Pinitas72219332018-06-05 14:56:06 +0100693 }
694
695 CLScheduler::get().queue().finish();
696 _is_prepared = true;
697 }
Gian Marco05288a22017-11-21 10:57:50 +0000698}
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000699} // namespace arm_compute