blob: 5a9ff7990f409b4089e981574dcad67f5f7bf1de [file] [log] [blame]
Gian Marco05288a22017-11-21 10:57:50 +00001/*
SiCong Libbd8fac2021-02-04 13:12:19 +00002 * Copyright (c) 2017-2021 Arm Limited.
Gian Marco05288a22017-11-21 10:57:50 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
25
26#include "arm_compute/core/CL/ICLTensor.h"
27#include "arm_compute/core/Error.h"
28#include "arm_compute/core/Helpers.h"
Michele Di Giorgiof64d3362020-04-03 12:40:10 +010029#include "arm_compute/core/KernelDescriptors.h"
SiCong Libbd8fac2021-02-04 13:12:19 +000030#include "arm_compute/core/Log.h"
Gian Marco05288a22017-11-21 10:57:50 +000031#include "arm_compute/core/TensorInfo.h"
32#include "arm_compute/core/Types.h"
33#include "arm_compute/core/Validate.h"
Georgios Pinitas358ca202017-12-07 16:47:52 +000034#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +000035#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
Gian Marco05288a22017-11-21 10:57:50 +000036#include "arm_compute/runtime/CL/CLScheduler.h"
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +010037#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
38#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
39#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
40#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
41#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
42#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
43#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010044#include "src/core/helpers/AutoConfiguration.h"
SiCong Libbd8fac2021-02-04 13:12:19 +000045#include "src/runtime/CL/gemm_auto_heuristics/CLGEMMAutoHeuristics.h"
46#include "utils/TypePrinter.h"
Gian Marco05288a22017-11-21 10:57:50 +000047
giuros011c9efeb2019-01-11 14:04:43 +000048namespace arm_compute
49{
Georgios Pinitas358ca202017-12-07 16:47:52 +000050using namespace arm_compute::misc::shape_calculator;
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +000051using namespace arm_compute::cl_gemm;
Gian Marco05288a22017-11-21 10:57:50 +000052
Gian Marco19835e52018-01-30 13:35:54 +000053namespace
54{
SiCong Li1a28e732021-02-10 16:57:33 +000055inline bool validate_gemm_kernel(CLGEMMKernelType kernel_type)
56{
57 switch(kernel_type)
58 {
59 case CLGEMMKernelType::NATIVE:
60 case CLGEMMKernelType::RESHAPED_ONLY_RHS:
61 {
62 return true;
63 }
64 default:
65 {
66 return false;
67 }
68 }
69}
70//Automatically select between mlgo (prioritized) and default heuristics for gemm kernel type
71inline CLGEMMKernelType auto_select_gemm_kernel(auto_heuristics::CommonQuery query, bool reshape_b_only_on_first_run)
72{
73 auto gemm_kernel = auto_heuristics::select_mlgo_gemm_kernel(query, reshape_b_only_on_first_run);
74 if(bool(gemm_kernel))
75 {
76 if(validate_gemm_kernel(gemm_kernel.gemm_type))
77 {
78 ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use gemm kernel from mlgo heuristics: %s.", to_string(gemm_kernel.gemm_type).c_str());
79 return gemm_kernel.gemm_type;
80 }
81 }
82 gemm_kernel = auto_heuristics::select_default_gemm_kernel(query, reshape_b_only_on_first_run);
83 ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use gemm kernel from default heuristics: %s.", to_string(gemm_kernel.gemm_type).c_str());
84 return gemm_kernel.gemm_type;
85}
SiCong Lidb353452021-02-08 15:16:13 +000086// Validate lhs_info and rhs_info for native kernel
87inline bool validate_lhs_rhs_info_native(const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const ITensorInfo *a, const ITensorInfo *b, const GEMMReshapeInfo &reshape_info)
88{
89 // Validate GEMMLHSMatrixInfo and GEMMRHSMatrixInfo for reshaped only rhs kernel
90 TensorInfo mm_result_s32_info{};
91 // Output tensor auto initialization if not yet initialized
92 auto_init_if_empty(mm_result_s32_info, a->clone()->set_tensor_shape(compute_mm_shape(*a, *b, false, reshape_info)).set_data_type(DataType::S32));
93 // Validate mm kernel
94 // NOTE: Ignore all other parameters (eg. output stage etc.) and only validate lhs and rhs info
95 // NOTE: This assumes:
96 // 1. lhs and rhs info's validity does not depend on these other parameters and vice versa(in CLGEMMLowpMatrixMultiplyNativeKernel.cpp validate_arguments).
97 // 2. lhs and rhs info does not cause window and padding issues through side effects (in CLGEMMLowpMatrixMultiplyNativeKernel.cpp validate_and_configure_window).
98 if(!bool(CLGEMMLowpMatrixMultiplyNativeKernel::validate(a, b, &mm_result_s32_info, lhs_info, rhs_info, reshape_info)))
99 {
100 return false;
101 }
102 return true;
103}
104
105// Automatically select between mlgo (prioritized) and default heuristics for native kernel configs
106std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> auto_select_gemm_config_native(auto_heuristics::CommonQuery query, const ITensorInfo *a, const ITensorInfo *b, const GEMMReshapeInfo &reshape_info)
107{
108 auto config = auto_heuristics::select_mlgo_gemm_config_native(query);
109 if(config)
110 {
111 if(validate_lhs_rhs_info_native(config.lhs_info, config.rhs_info, a, b, reshape_info))
112 {
113 ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use native config from mlgo heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
114 return { config.lhs_info, config.rhs_info };
115 }
116 }
117 config = auto_heuristics::select_default_gemm_config_native(query);
118 ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use native config from default heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
119 return { config.lhs_info, config.rhs_info };
120}
121
122// Validate lhs_info and rhs_info for reshaped only rhs kernel
SiCong Libbd8fac2021-02-04 13:12:19 +0000123inline bool validate_lhs_rhs_info_reshaped_only_rhs(const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *output,
124 unsigned int m, unsigned int n, unsigned int k, bool reinterpret_input_as_3d, int depth_output_gemm3d)
Gian Marco19835e52018-01-30 13:35:54 +0000125{
SiCong Libbd8fac2021-02-04 13:12:19 +0000126 // Validate GEMMLHSMatrixInfo and GEMMRHSMatrixInfo for reshaped only rhs kernel
127 TensorInfo tmp_b_info{};
128 // Validate reshape RHS kernel
129 auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));
130 if(!bool(CLGEMMReshapeRHSMatrixKernel::validate(b, &tmp_b_info, rhs_info)))
131 {
132 return false;
133 }
134 // Validate mm kernel
135 // NOTE: Ignore all other parameters (eg. depth_output_gemm3d, output stage etc.) and only validate lhs and rhs info
136 // NOTE: This assumes:
137 // 1. lhs and rhs info's validity does not depend on these other parameters and vice versa(in CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp validate_arguments).
138 // 2. lhs and rhs info does not cause window and padding issues through side effects (in CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp validate_and_configure_window).
139 GEMMKernelInfo gemm_kernel_info;
140 gemm_kernel_info.m = m;
141 gemm_kernel_info.n = n;
142 gemm_kernel_info.k = k;
143 gemm_kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;
144 gemm_kernel_info.depth_output_gemm3d = depth_output_gemm3d;
145 gemm_kernel_info.lhs_info = lhs_info;
146 gemm_kernel_info.rhs_info = rhs_info;
147 // Since we ignore the output stage, output data type has to be S32 to pass the validation
148 TensorInfo output_info_copy(*output);
149 output_info_copy.set_data_type(DataType::S32);
150 if(!bool(CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::validate(a, &tmp_b_info, &output_info_copy, gemm_kernel_info)))
151 {
152 return false;
153 }
154 return true;
155}
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100156
SiCong Lidb353452021-02-08 15:16:13 +0000157// Automatically select between mlgo (prioritized) and default heuristics for reshaped only rhs kernel configs
SiCong Libbd8fac2021-02-04 13:12:19 +0000158std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> auto_select_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery query, bool reinterpret_input_as_3d, int depth_output_gemm3d,
159 const ITensorInfo *a,
160 const ITensorInfo *b, const ITensorInfo *output)
161{
162 auto config = auto_heuristics::select_mlgo_gemm_config_reshaped_only_rhs(query);
163 if(config)
164 {
165 if(validate_lhs_rhs_info_reshaped_only_rhs(config.lhs_info, config.rhs_info, a, b, output, query.m, query.n, query.k, reinterpret_input_as_3d, depth_output_gemm3d))
166 {
167 ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped_only_rhs config from mlgo heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
168 return { config.lhs_info, config.rhs_info };
169 }
170 }
SiCong Lidb353452021-02-08 15:16:13 +0000171 config = auto_heuristics::select_default_gemm_config_reshaped_only_rhs(query);
SiCong Libbd8fac2021-02-04 13:12:19 +0000172 ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped_only_rhs config from default heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
173 return { config.lhs_info, config.rhs_info };
174}
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100175
SiCong Libbd8fac2021-02-04 13:12:19 +0000176inline bool is_gemm_reshaped(CLGEMMKernelType kernel_type)
177{
178 switch(kernel_type)
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100179 {
180 case CLGEMMKernelType::NATIVE:
181 return false;
182 case CLGEMMKernelType::RESHAPED_ONLY_RHS:
183 return true;
184 default:
185 ARM_COMPUTE_ERROR("Not supported gemmlowp kernel!");
186 }
Gian Marco19835e52018-01-30 13:35:54 +0000187}
188} // namespace
189
Gian Marco05288a22017-11-21 10:57:50 +0000190CLGEMMLowpMatrixMultiplyCore::CLGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager)
Georgios Pinitas72219332018-06-05 14:56:06 +0100191 : _memory_group(std::move(memory_manager)),
Georgios Pinitas40f51a62020-11-21 03:04:18 +0000192 _weights_to_qasymm8(std::make_unique<CLDepthConvertLayerKernel>()),
193 _mm_native_kernel(std::make_unique<CLGEMMLowpMatrixMultiplyNativeKernel>()),
194 _mm_reshaped_only_rhs_kernel(std::make_unique<CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel>()),
195 _mtx_b_reshape_kernel(std::make_unique<CLGEMMReshapeRHSMatrixKernel>()),
196 _mtx_a_reduction_kernel(std::make_unique<CLGEMMLowpMatrixAReductionKernel>()),
197 _mtx_b_reduction_kernel(std::make_unique<CLGEMMLowpMatrixBReductionKernel>()),
198 _offset_contribution_kernel(std::make_unique<CLGEMMLowpOffsetContributionKernel>()),
199 _offset_contribution_output_stage_kernel(std::make_unique<CLGEMMLowpOffsetContributionOutputStageKernel>()),
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000200 _qasymm8_weights(),
Georgios Pinitas72219332018-06-05 14:56:06 +0100201 _vector_sum_col(),
202 _vector_sum_row(),
Georgios Pinitas72219332018-06-05 14:56:06 +0100203 _tmp_b(),
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100204 _mm_result_s32(),
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000205 _gemm_output_stage_multipliers(),
206 _gemm_output_stage_shifts(),
207 _matrix_a(nullptr),
Georgios Pinitas72219332018-06-05 14:56:06 +0100208 _original_b(nullptr),
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000209 _output(nullptr),
Georgios Pinitas72219332018-06-05 14:56:06 +0100210 _a_offset(0),
211 _b_offset(0),
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000212 _is_gemm_reshaped(true),
Georgios Pinitas72219332018-06-05 14:56:06 +0100213 _reshape_b_only_on_first_run(false),
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100214 _is_prepared(false),
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000215 _run_output_stage(false),
216 _convert_to_qasymm8(false),
217 _run_offset_contribution(false)
Gian Marco05288a22017-11-21 10:57:50 +0000218{
219}
220
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100221CLGEMMLowpMatrixMultiplyCore::~CLGEMMLowpMatrixMultiplyCore() = default;
222
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100223void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info)
Gian Marco05288a22017-11-21 10:57:50 +0000224{
Manuel Bottini2b84be52020-04-08 10:15:51 +0100225 configure(CLKernelLibrary::get().get_compile_context(), a, b, c, output, gemm_info);
226}
227
228void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info)
229{
Georgios Pinitas358ca202017-12-07 16:47:52 +0000230 ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100231 ARM_COMPUTE_ERROR_THROW_ON(CLGEMMLowpMatrixMultiplyCore::validate(a->info(), b->info(), c != nullptr ? c->info() : nullptr, output->info(), gemm_info));
Gian Marco05288a22017-11-21 10:57:50 +0000232
Georgios Pinitas72219332018-06-05 14:56:06 +0100233 _is_prepared = false;
234 _original_b = b;
Chunosov5124be52017-11-22 20:42:13 +0700235 _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100236 _a_offset = a->info()->quantization_info().uniform().offset;
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000237 _matrix_a = a;
238 _output = output;
239
240 _convert_to_qasymm8 = is_data_type_quantized_per_channel(b->info()->data_type()) && is_data_type_quantized_symmetric(b->info()->data_type())
morgolockd13931d2020-06-23 15:49:35 +0100241 && a->info()->data_type() == DataType::QASYMM8;
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000242 _b_offset = _convert_to_qasymm8 ? -128 : b->info()->quantization_info().uniform().offset;
Gian Marco05288a22017-11-21 10:57:50 +0000243
Gian Marco19835e52018-01-30 13:35:54 +0000244 // Get the GPU target
245 const GPUTarget gpu_target = CLScheduler::get().target();
Gian Marco7b4d5472018-01-10 15:56:30 +0000246
Gian Marco19835e52018-01-30 13:35:54 +0000247 // Set the target for the kernels
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100248 _mm_native_kernel->set_target(gpu_target);
249 _mm_reshaped_only_rhs_kernel->set_target(gpu_target);
Gian Marco05288a22017-11-21 10:57:50 +0000250
giuros018b6b4a92018-12-18 19:01:33 +0000251 GEMMRHSMatrixInfo rhs_info;
giuros011c9efeb2019-01-11 14:04:43 +0000252 GEMMLHSMatrixInfo lhs_info;
Gian Marco05288a22017-11-21 10:57:50 +0000253
Gian Marco19835e52018-01-30 13:35:54 +0000254 // Arguments used by GEMMReshapeInfo
255 // If we pass the matrix A and matrix B reshaped to CLGEMMMatrixMultiplyKernel, we need to pass m, n, k, mult_transpose1xW_width and mult_interleave4x4_height to CLGEMMReshapeInfo
256 // in order to know how the matrices have been reshaped
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000257 bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
258 const unsigned int m = reinterpret_input_as_3d ? (a->info()->dimension(1) * a->info()->dimension(2)) : a->info()->dimension(1);
259 const unsigned int n = b->info()->dimension(0);
260 const unsigned int k = a->info()->dimension(0);
261 const unsigned int batch_size = reinterpret_input_as_3d ? a->info()->dimension(3) : a->info()->dimension(2);
262 const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
Gian Marco19835e52018-01-30 13:35:54 +0000263
SiCong Lidb353452021-02-08 15:16:13 +0000264 const auto reshape_info = GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d);
265
Gian Marco19835e52018-01-30 13:35:54 +0000266 // Check if we need to reshape the matrix A and matrix B
SiCong Libbd8fac2021-02-04 13:12:19 +0000267 _is_gemm_reshaped = is_gemm_reshaped(auto_select_gemm_kernel(auto_heuristics::CommonQuery{ gpu_target, a->info()->data_type(), m, n, k, batch_size }, _reshape_b_only_on_first_run));
Gian Marco19835e52018-01-30 13:35:54 +0000268
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000269 if(_convert_to_qasymm8)
270 {
271 // Set data type for converted weights
272 TensorInfo weights_info(*b->info());
273 weights_info.set_data_type(DataType::QASYMM8);
274 _qasymm8_weights.allocator()->init(weights_info);
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100275 _weights_to_qasymm8->configure(compile_context, b, &_qasymm8_weights, ConvertPolicy::WRAP, 0);
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000276 }
277
278 const ICLTensor *matrix_b = _convert_to_qasymm8 ? &_qasymm8_weights : b;
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000279 if(_is_gemm_reshaped)
Georgios Pinitasebf6b8a2018-09-24 16:31:08 +0100280 {
Gian Marco05288a22017-11-21 10:57:50 +0000281 matrix_b = &_tmp_b;
282
Giorgio Arenabb54e4e2018-04-05 17:20:34 +0100283 if(!_reshape_b_only_on_first_run)
284 {
285 _memory_group.manage(&_tmp_b);
286 }
Gian Marco05288a22017-11-21 10:57:50 +0000287
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000288 // Pick up the GEMM configuration
SiCong Libbd8fac2021-02-04 13:12:19 +0000289 // It doesn't matter whether Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED, since it only affect the shape configuration
290 std::tie(lhs_info, rhs_info) = auto_select_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, DataType::QASYMM8, m, n, k, batch_size }, reinterpret_input_as_3d,
291 depth_output_gemm3d,
292 a->info(), _convert_to_qasymm8 ? _qasymm8_weights.info() : b->info(), output->info());
Gian Marco05288a22017-11-21 10:57:50 +0000293
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000294 // Configure reshape RHS kernel
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100295 _mtx_b_reshape_kernel->configure(compile_context, _convert_to_qasymm8 ? &_qasymm8_weights : b, &_tmp_b, rhs_info);
Gian Marco05288a22017-11-21 10:57:50 +0000296 }
Gian Marco05288a22017-11-21 10:57:50 +0000297
Michele Di Giorgiof64d3362020-04-03 12:40:10 +0100298 // Using default reduction info
299 const GEMMLowpReductionKernelInfo reduction_info {};
300
Gian Marco05288a22017-11-21 10:57:50 +0000301 // Initialize matrix B reduction kernel only if _a_offset is not equal to 0
302 if(_a_offset != 0)
303 {
Georgios Pinitas358ca202017-12-07 16:47:52 +0000304 TensorInfo info_vector_sum_col(compute_reductionA_shape(*b->info()), 1, DataType::S32);
Gian Marco05288a22017-11-21 10:57:50 +0000305 _vector_sum_col.allocator()->init(info_vector_sum_col);
Giorgio Arenabb54e4e2018-04-05 17:20:34 +0100306 if(!_reshape_b_only_on_first_run)
307 {
308 _memory_group.manage(&_vector_sum_col);
309 }
Gian Marco05288a22017-11-21 10:57:50 +0000310
311 // Configure Matrix B reduction kernel
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100312 _mtx_b_reduction_kernel->configure(compile_context, _convert_to_qasymm8 ? &_qasymm8_weights : b, &_vector_sum_col, reduction_info);
Gian Marco05288a22017-11-21 10:57:50 +0000313 }
314
315 // Initialize Matrix A reduction kernel only if _b_offset is not equal to 0
316 if(_b_offset != 0)
317 {
Georgios Pinitas358ca202017-12-07 16:47:52 +0000318 TensorInfo info_vector_sum_row(compute_reductionB_shape(*a->info()), 1, DataType::S32);
Gian Marco05288a22017-11-21 10:57:50 +0000319 _vector_sum_row.allocator()->init(info_vector_sum_row);
320 _memory_group.manage(&_vector_sum_row);
321
322 // Configure matrix A reduction kernel
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100323 _mtx_a_reduction_kernel->configure(compile_context, a, &_vector_sum_row, reduction_info);
Gian Marco05288a22017-11-21 10:57:50 +0000324 }
325
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000326 GEMMKernelInfo gemm_kernel_info;
327 gemm_kernel_info.m = m;
328 gemm_kernel_info.n = n;
329 gemm_kernel_info.k = k;
330 gemm_kernel_info.depth_output_gemm3d = depth_output_gemm3d;
331 gemm_kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;
332 gemm_kernel_info.lhs_info = lhs_info;
333 gemm_kernel_info.rhs_info = rhs_info;
334 gemm_kernel_info.a_offset = _a_offset;
335 gemm_kernel_info.b_offset = _b_offset;
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100336 // If GEMMLowpOutputStage != NONE, fuse the offset contribution with the output stage
337 if(gemm_info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE)
338 {
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100339 // Configure offset contribution kernel
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000340 const size_t num_filters = (gemm_info.gemmlowp_output_stage().is_quantized_per_channel) ? gemm_info.gemmlowp_output_stage().gemmlowp_multipliers.size() : 1;
341
342 _gemm_output_stage_multipliers.allocator()->init(TensorInfo(TensorShape(num_filters), 1, DataType::S32));
343 _gemm_output_stage_shifts.allocator()->init(TensorInfo(TensorShape(num_filters), 1, DataType::S32));
344
Manuel Bottini959c26d2019-12-02 16:22:35 +0000345 GEMMLowpOutputStageInfo gemmlowp_output_stage = gemm_info.gemmlowp_output_stage();
346 gemmlowp_output_stage.output_data_type = _matrix_a->info()->data_type();
Giorgio Arena538a0762021-05-14 16:38:43 +0100347 if(num_filters == 1)
348 {
349 // Per-channel quantization with OFM == 1 is equivalent to uniform quantization.
350 // Setting this flag to false prevents the kernel from adding useless padding to the output multipliers and shifts
351 gemmlowp_output_stage.is_quantized_per_channel = false;
352 }
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000353
354 gemm_kernel_info.output_stage = gemmlowp_output_stage;
355
356 if(_is_gemm_reshaped && gemmlowp_output_stage.type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT)
357 {
358 // Configure and tune matrix multiply kernel with fused output stage
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100359 _mm_reshaped_only_rhs_kernel->configure(compile_context, _matrix_a, matrix_b, output, gemm_kernel_info, _a_offset == 0 ? nullptr : &_vector_sum_col,
360 _b_offset == 0 ? nullptr : &_vector_sum_row, c, &_gemm_output_stage_multipliers, &_gemm_output_stage_shifts);
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000361 }
362 else
363 {
364 _run_output_stage = true;
365
366 _memory_group.manage(&_mm_result_s32);
367
368 if(_is_gemm_reshaped)
369 {
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100370 _mm_reshaped_only_rhs_kernel->configure(compile_context, _matrix_a, matrix_b, &_mm_result_s32, gemm_kernel_info);
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000371 }
372 else
373 {
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100374 // Pick up the GEMM configuration
SiCong Lidb353452021-02-08 15:16:13 +0000375 // It doesn't matter whether Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED, since it only affect the shape configuration
376 std::tie(lhs_info, rhs_info) = auto_select_gemm_config_native(auto_heuristics::CommonQuery{ gpu_target, DataType::QASYMM8, m, n, k, batch_size },
377 _matrix_a->info(), _convert_to_qasymm8 ? _qasymm8_weights.info() : matrix_b->info(), reshape_info);
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000378
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100379 // Configure matrix multiply kernel
SiCong Lidb353452021-02-08 15:16:13 +0000380 _mm_native_kernel->configure(compile_context, _matrix_a, matrix_b, &_mm_result_s32, lhs_info, rhs_info, reshape_info);
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100381
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100382 _offset_contribution_output_stage_kernel->configure(compile_context, &_mm_result_s32, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, c, output,
383 a->info()->dimension(0),
384 _a_offset, _b_offset, gemmlowp_output_stage, &_gemm_output_stage_multipliers, &_gemm_output_stage_shifts);
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000385 _mm_result_s32.allocator()->allocate();
386 }
387 }
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000388
389 _gemm_output_stage_multipliers.allocator()->allocate();
390 _gemm_output_stage_shifts.allocator()->allocate();
391 // Compute GEMM output multipliers and shifts for output stage
392 _gemm_output_stage_multipliers.map();
393 _gemm_output_stage_shifts.map();
394 std::memcpy(_gemm_output_stage_multipliers.ptr_to_element(Coordinates(0)), gemm_info.gemmlowp_output_stage().gemmlowp_multipliers.data(), num_filters * sizeof(int32_t));
395 std::memcpy(_gemm_output_stage_shifts.ptr_to_element(Coordinates(0)), gemm_info.gemmlowp_output_stage().gemmlowp_shifts.data(), num_filters * sizeof(int32_t));
396 _gemm_output_stage_multipliers.unmap();
397 _gemm_output_stage_shifts.unmap();
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100398 }
399 else
400 {
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000401 _run_offset_contribution = true;
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000402 if(_is_gemm_reshaped)
403 {
404 // Configure and tune matrix multiply kernel
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100405 _mm_reshaped_only_rhs_kernel->configure(compile_context, _matrix_a, matrix_b, output, gemm_kernel_info);
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000406 }
407 else
408 {
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100409 // Pick up the GEMM configuration
SiCong Lidb353452021-02-08 15:16:13 +0000410 // It doesn't matter whether Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED, since it only affect the shape configuration
411 std::tie(lhs_info, rhs_info) = auto_select_gemm_config_native(auto_heuristics::CommonQuery{ gpu_target, DataType::QASYMM8, m, n, k, batch_size },
412 a->info(), _convert_to_qasymm8 ? _qasymm8_weights.info() : b->info(), reshape_info);
Gian Marco Iodice06be6f82019-06-24 17:47:51 +0100413
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100414 // Configure matrix multiply kernel
SiCong Lidb353452021-02-08 15:16:13 +0000415 _mm_native_kernel->configure(compile_context, _matrix_a, matrix_b, output, lhs_info, rhs_info, reshape_info);
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000416 }
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100417
418 // Configure offset contribution kernel
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100419 _offset_contribution_kernel->configure(compile_context, output, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, c, a->info()->dimension(0), _a_offset,
420 _b_offset);
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100421 }
Gian Marco05288a22017-11-21 10:57:50 +0000422
423 // Allocate tensors
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000424 if(_is_gemm_reshaped)
Gian Marco05288a22017-11-21 10:57:50 +0000425 {
Georgios Pinitas72219332018-06-05 14:56:06 +0100426 if(!_reshape_b_only_on_first_run)
427 {
428 _tmp_b.allocator()->allocate();
429 }
Gian Marco05288a22017-11-21 10:57:50 +0000430 }
431
Georgios Pinitas72219332018-06-05 14:56:06 +0100432 if(_a_offset != 0 && !_reshape_b_only_on_first_run)
Gian Marco05288a22017-11-21 10:57:50 +0000433 {
434 _vector_sum_col.allocator()->allocate();
435 }
436
437 if(_b_offset != 0)
438 {
439 _vector_sum_row.allocator()->allocate();
440 }
441}
442
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100443Status CLGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info)
Georgios Pinitas358ca202017-12-07 16:47:52 +0000444{
Manuel Bottini959c26d2019-12-02 16:22:35 +0000445 ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);
446 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100447 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(b, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8, DataType::QSYMM8_PER_CHANNEL);
448 ARM_COMPUTE_RETURN_ERROR_ON(a->data_type() == DataType::QASYMM8 && b->data_type() == DataType::QASYMM8_SIGNED);
449 ARM_COMPUTE_RETURN_ERROR_ON(a->data_type() == DataType::QASYMM8_SIGNED && b->data_type() == DataType::QASYMM8);
Georgios Pinitas358ca202017-12-07 16:47:52 +0000450 ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(), "Matrix A already reshaped is not supported");
451 ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(), "Matrix B already reshaped is not supported");
452
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100453 int32_t a_offset = a->quantization_info().uniform().offset;
454 int32_t b_offset = b->quantization_info().uniform().offset;
Georgios Pinitas358ca202017-12-07 16:47:52 +0000455
Isabella Gottardic4f582e2018-10-11 19:14:55 +0100456 const ITensorInfo *matrix_a_info = a;
Isabella Gottardic4f582e2018-10-11 19:14:55 +0100457
giuros018b6b4a92018-12-18 19:01:33 +0000458 TensorInfo tmp_b_info{};
459 GEMMRHSMatrixInfo rhs_info;
giuros011c9efeb2019-01-11 14:04:43 +0000460 GEMMLHSMatrixInfo lhs_info;
Isabella Gottardic4f582e2018-10-11 19:14:55 +0100461
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000462 // Get the GPU target
463 const GPUTarget gpu_target = CLScheduler::get().target();
464
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000465 bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
466 const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
467 const unsigned int n = b->dimension(0);
468 const unsigned int k = a->dimension(0);
469 const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
470 const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
Gian Marco19835e52018-01-30 13:35:54 +0000471
SiCong Libbd8fac2021-02-04 13:12:19 +0000472 bool reshape_matrix_b = is_gemm_reshaped(auto_select_gemm_kernel(auto_heuristics::CommonQuery{ gpu_target, a->data_type(), m, n, k, batch_size }, gemm_info.reshape_b_only_on_first_run()));
Georgios Pinitasebf6b8a2018-09-24 16:31:08 +0100473
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000474 const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d);
Georgios Pinitasebf6b8a2018-09-24 16:31:08 +0100475
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000476 bool convert_to_qasymm8 = is_data_type_quantized_per_channel(b->data_type()) && is_data_type_quantized_symmetric(b->data_type())
477 && is_data_type_quantized_asymmetric(a->data_type());
478 TensorInfo weights_info(*b);
479 if(convert_to_qasymm8)
480 {
481 b_offset = -128;
482 weights_info.set_data_type(DataType::QASYMM8);
483 ARM_COMPUTE_RETURN_ON_ERROR(CLDepthConvertLayerKernel::validate(b, &weights_info, ConvertPolicy::WRAP, 0));
484 }
485 const ITensorInfo *matrix_b_info = &weights_info;
Gian Marco Iodice2ec6c1e2019-04-09 12:03:05 +0100486 if(reshape_matrix_b)
Georgios Pinitas358ca202017-12-07 16:47:52 +0000487 {
Isabella Gottardic4f582e2018-10-11 19:14:55 +0100488 matrix_b_info = &tmp_b_info;
Georgios Pinitas358ca202017-12-07 16:47:52 +0000489
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000490 // Pick up the GEMM configuration
SiCong Libbd8fac2021-02-04 13:12:19 +0000491 // NOTE: No need to validate mlgo configurations as they automatically fall back to default heuristics if validation fails
492 // It doesn't matter whether Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED, since it only affect the shape configuration
493 const auto res = select_default_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, DataType::QASYMM8, m, n, k, batch_size });
494 lhs_info = res.lhs_info;
495 rhs_info = res.rhs_info;
Isabella Gottardic4f582e2018-10-11 19:14:55 +0100496
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000497 // Validate reshape RHS kernel
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000498 auto_init_if_empty(tmp_b_info, weights_info.clone()->set_tensor_shape(compute_rhs_reshaped_shape(weights_info, rhs_info)));
499 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMReshapeRHSMatrixKernel::validate(&weights_info, &tmp_b_info, rhs_info));
Georgios Pinitas358ca202017-12-07 16:47:52 +0000500 }
Isabella Gottardic4f582e2018-10-11 19:14:55 +0100501
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100502 TensorInfo info_vector_sum_col{};
503 TensorInfo info_vector_sum_row{};
Georgios Pinitas358ca202017-12-07 16:47:52 +0000504
Michele Di Giorgiof64d3362020-04-03 12:40:10 +0100505 const GEMMLowpReductionKernelInfo reduction_info;
Georgios Pinitas358ca202017-12-07 16:47:52 +0000506 // Validate matrix B reduction kernel only if _a_offset is not equal to 0
507 if(a_offset != 0)
508 {
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000509 info_vector_sum_col = TensorInfo(compute_reductionA_shape(weights_info), 1, DataType::S32);
Georgios Pinitas358ca202017-12-07 16:47:52 +0000510
511 // Configure Matrix B reduction kernel
Michele Di Giorgiof64d3362020-04-03 12:40:10 +0100512 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixBReductionKernel::validate(&weights_info, &info_vector_sum_col, reduction_info));
Georgios Pinitas358ca202017-12-07 16:47:52 +0000513 }
514
515 // Validate Matrix A reduction kernel only if _b_offset is not equal to 0
516 if(b_offset != 0)
517 {
518 info_vector_sum_row = TensorInfo(compute_reductionB_shape(*a), 1, DataType::S32);
519
520 // Configure matrix A reduction kernel
Michele Di Giorgiof64d3362020-04-03 12:40:10 +0100521 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixAReductionKernel::validate(a, &info_vector_sum_row, reduction_info));
Georgios Pinitas358ca202017-12-07 16:47:52 +0000522 }
523
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000524 GEMMKernelInfo gemm_kernel_info;
525 gemm_kernel_info.m = m;
526 gemm_kernel_info.n = n;
527 gemm_kernel_info.k = k;
528 gemm_kernel_info.depth_output_gemm3d = depth_output_gemm3d;
529 gemm_kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;
530 gemm_kernel_info.lhs_info = lhs_info;
531 gemm_kernel_info.rhs_info = rhs_info;
532 gemm_kernel_info.a_offset = a_offset;
533 gemm_kernel_info.b_offset = b_offset;
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100534 if(gemm_info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE)
535 {
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000536 const size_t num_filters = (gemm_info.gemmlowp_output_stage().is_quantized_per_channel) ? gemm_info.gemmlowp_output_stage().gemmlowp_multipliers.size() : 1;
537
538 const TensorInfo gemm_output_stage_multipliers_shifts_info(TensorInfo(TensorShape(num_filters), 1, DataType::S32));
539
Manuel Bottini959c26d2019-12-02 16:22:35 +0000540 GEMMLowpOutputStageInfo gemmlowp_output_stage = gemm_info.gemmlowp_output_stage();
541 gemmlowp_output_stage.output_data_type = a->data_type();
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000542
543 gemm_kernel_info.output_stage = gemmlowp_output_stage;
544 if(reshape_matrix_b && gemm_info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT)
545 {
546 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::validate(matrix_a_info, matrix_b_info, output, gemm_kernel_info,
547 a_offset == 0 ? nullptr : &info_vector_sum_col,
548 b_offset == 0 ? nullptr : &info_vector_sum_row,
549 c,
550 &gemm_output_stage_multipliers_shifts_info,
551 &gemm_output_stage_multipliers_shifts_info));
552 }
553 else
554 {
555 TensorInfo mm_result_s32_info{};
556
557 if(reshape_matrix_b)
558 {
559 // Output tensor auto inizialitation if not yet initialized
560 auto_init_if_empty(mm_result_s32_info, a->clone()->set_tensor_shape(compute_mm_shape(*matrix_a_info, *matrix_b_info, reshape_info)).set_data_type(DataType::S32));
561
562 // Validate matrix multiply
563 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::validate(matrix_a_info, matrix_b_info, &mm_result_s32_info, gemm_kernel_info));
564 }
565 else
566 {
567 // Output tensor auto inizialitation if not yet initialized
568 auto_init_if_empty(mm_result_s32_info, a->clone()->set_tensor_shape(compute_mm_shape(*matrix_a_info, *matrix_b_info, false, reshape_info)).set_data_type(DataType::S32));
569
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100570 // Pick up the GEMM configuration
SiCong Lidb353452021-02-08 15:16:13 +0000571 // NOTE: No need to validate mlgo configurations as they automatically fall back to default heuristics if validation fails
572 // It doesn't matter whether Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED, since it only affect the shape configuration
573 const auto res = select_default_gemm_config_native(auto_heuristics::CommonQuery{ gpu_target, DataType::QASYMM8, m, n, k, batch_size });
574 lhs_info = res.lhs_info;
575 rhs_info = res.rhs_info;
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000576
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100577 // Validate matrix multiply
578 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyNativeKernel::validate(matrix_a_info, matrix_b_info, &mm_result_s32_info, lhs_info, rhs_info, reshape_info));
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000579 }
580
581 // Validate offset contribution kernel
582 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpOffsetContributionOutputStageKernel::validate(&mm_result_s32_info,
583 a_offset == 0 ? nullptr : &info_vector_sum_col,
584 b_offset == 0 ? nullptr : &info_vector_sum_row,
585 c,
586 output,
587 a_offset, b_offset,
588 gemmlowp_output_stage,
589 &gemm_output_stage_multipliers_shifts_info,
590 &gemm_output_stage_multipliers_shifts_info));
591 }
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100592 }
593 else
594 {
Gian Marco Iodice2ec6c1e2019-04-09 12:03:05 +0100595 if(reshape_matrix_b)
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000596 {
597 // Validate matrix multiply
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000598 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::validate(matrix_a_info, matrix_b_info, output, gemm_kernel_info));
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000599 }
600 else
601 {
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100602 // Pick up the GEMM configuration
SiCong Lidb353452021-02-08 15:16:13 +0000603 // It doesn't matter whether Datatype is DataType::QASYMM8 or DataType::QASYMM8_SIGNED, since it only affect the shape configuration
604 const auto res = select_default_gemm_config_native(auto_heuristics::CommonQuery{ gpu_target, DataType::QASYMM8, m, n, k, batch_size });
605 lhs_info = res.lhs_info;
606 rhs_info = res.rhs_info;
Gian Marco Iodice06be6f82019-06-24 17:47:51 +0100607
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100608 // Validate matrix multiply
609 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyNativeKernel::validate(matrix_a_info, matrix_b_info, output, lhs_info, rhs_info, reshape_info));
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000610 }
Gian Marco Iodice06be6f82019-06-24 17:47:51 +0100611
giuros012f7c1492019-03-18 12:30:02 +0000612 if(output->total_size() != 0)
613 {
614 // Validate offset contribution kernel
615 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpOffsetContributionKernel::validate(output,
616 a_offset == 0 ? nullptr : &info_vector_sum_col,
617 b_offset == 0 ? nullptr : &info_vector_sum_row,
618 c,
619 a_offset, b_offset));
620 }
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100621 }
Georgios Pinitas358ca202017-12-07 16:47:52 +0000622
623 return Status{};
624}
625
Gian Marco05288a22017-11-21 10:57:50 +0000626void CLGEMMLowpMatrixMultiplyCore::run()
627{
Georgios Pinitas72219332018-06-05 14:56:06 +0100628 prepare();
629
Georgios Pinitasda953f22019-04-02 17:27:03 +0100630 MemoryGroupResourceScope scope_mg(_memory_group);
Gian Marco05288a22017-11-21 10:57:50 +0000631
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000632 if(_is_gemm_reshaped)
Gian Marco05288a22017-11-21 10:57:50 +0000633 {
Georgios Pinitas72219332018-06-05 14:56:06 +0100634 if(!_reshape_b_only_on_first_run)
Chunosov5124be52017-11-22 20:42:13 +0700635 {
636 // Run reshape matrix B
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100637 CLScheduler::get().enqueue(*_mtx_b_reshape_kernel, false);
Chunosov5124be52017-11-22 20:42:13 +0700638 }
639 }
640
Georgios Pinitas72219332018-06-05 14:56:06 +0100641 // Run matrix B reduction kernel only if _a_offset is not equal to 0
642 if(_a_offset != 0 && !_reshape_b_only_on_first_run)
Chunosov5124be52017-11-22 20:42:13 +0700643 {
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100644 CLScheduler::get().enqueue(*_mtx_b_reduction_kernel, false);
Gian Marco05288a22017-11-21 10:57:50 +0000645 }
646
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000647 // Run matrix A reduction kernel only if _b_offset is not equal to 0
648 if(_b_offset != 0)
649 {
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100650 CLScheduler::get().enqueue(*_mtx_a_reduction_kernel, false);
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000651 }
652
Gian Marco05288a22017-11-21 10:57:50 +0000653 // Run matrix multiply
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000654 if(_is_gemm_reshaped)
655 {
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100656 CLScheduler::get().enqueue(*_mm_reshaped_only_rhs_kernel, false);
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000657 }
658 else
659 {
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100660 CLScheduler::get().enqueue(*_mm_native_kernel, false);
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000661 }
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000662 if(_run_output_stage)
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100663 {
664 // Run offset contribution/output stage kernel
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100665 CLScheduler::get().enqueue(*_offset_contribution_output_stage_kernel, true);
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100666 }
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000667 if(_run_offset_contribution)
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100668 {
669 // Run offset contribution kernel
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100670 CLScheduler::get().enqueue(*_offset_contribution_kernel, true);
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100671 }
Georgios Pinitas72219332018-06-05 14:56:06 +0100672}
Chunosov5124be52017-11-22 20:42:13 +0700673
Georgios Pinitas72219332018-06-05 14:56:06 +0100674void CLGEMMLowpMatrixMultiplyCore::prepare()
675{
676 if(!_is_prepared)
677 {
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000678 if(_convert_to_qasymm8)
679 {
680 _qasymm8_weights.allocator()->allocate();
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100681 CLScheduler::get().enqueue(*_weights_to_qasymm8, false);
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000682 }
683
Gian Marco Iodicedb63b9c2019-01-17 09:47:04 +0000684 if(_is_gemm_reshaped && _reshape_b_only_on_first_run)
Georgios Pinitas72219332018-06-05 14:56:06 +0100685 {
686 ARM_COMPUTE_ERROR_ON(!_original_b->is_used());
687
688 // Run reshape kernel and mark original weights tensor as unused
689 _tmp_b.allocator()->allocate();
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100690 CLScheduler::get().enqueue(*_mtx_b_reshape_kernel, false);
Georgios Pinitas72219332018-06-05 14:56:06 +0100691 _original_b->mark_as_unused();
692 }
693
694 // Run matrix B reduction kernel only if _a_offset is not equal to 0
695 if(_a_offset != 0 && _reshape_b_only_on_first_run)
696 {
697 _vector_sum_col.allocator()->allocate();
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +0100698 CLScheduler::get().enqueue(*_mtx_b_reduction_kernel, false);
Georgios Pinitas72219332018-06-05 14:56:06 +0100699 }
700
701 CLScheduler::get().queue().finish();
702 _is_prepared = true;
703 }
Gian Marco05288a22017-11-21 10:57:50 +0000704}
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000705} // namespace arm_compute