blob: 0bb6b0c083826d73f583e396fd59c3d14cf683f7 [file] [log] [blame]
Gunes Bayir9d0c4de2023-04-13 18:22:58 +01001/*
2 * Copyright (c) 2023 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "src/gpu/cl/kernels/ClMatMulLowpNativeKernel.h"
25
26#include "arm_compute/core/CL/CLHelpers.h"
27#include "arm_compute/core/CL/ICLTensor.h"
28#include "arm_compute/core/ITensorPack.h"
Gunes Bayire87fa662023-09-07 12:20:33 +010029#include "arm_compute/core/QuantizationInfo.h"
Gunes Bayir9d0c4de2023-04-13 18:22:58 +010030#include "arm_compute/core/TensorInfo.h"
Gunes Bayire87fa662023-09-07 12:20:33 +010031#include "arm_compute/core/utils/ActivationFunctionUtils.h"
Matthew Bentham314d3e22023-06-23 10:53:52 +000032#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
Gunes Bayir9d0c4de2023-04-13 18:22:58 +010033#include "arm_compute/core/utils/misc/ShapeCalculator.h"
34#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010035#include "arm_compute/core/utils/StringUtils.h"
Gunes Bayir9d0c4de2023-04-13 18:22:58 +010036
37#include "src/common/utils/Log.h"
38#include "src/core/helpers/AutoConfiguration.h"
39#include "src/core/helpers/WindowHelpers.h"
40#include "src/gpu/cl/ClCompileContext.h"
Gunes Bayire87fa662023-09-07 12:20:33 +010041#include "src/gpu/cl/kernels/helpers/MatMulKernelHelpers.h"
Gunes Bayir9d0c4de2023-04-13 18:22:58 +010042#include "support/Cast.h"
43#include "support/StringSupport.h"
44
45namespace arm_compute
46{
47namespace opencl
48{
49namespace kernels
50{
51namespace
52{
53Status validate_matmul_kernel_info(const MatMulKernelInfo &matmul_kernel_info)
54{
55 const bool adj_lhs = matmul_kernel_info.adj_lhs;
56 const bool adj_rhs = matmul_kernel_info.adj_rhs;
57 const int m0 = matmul_kernel_info.m0;
58 const int n0 = matmul_kernel_info.n0;
59 const int k0 = matmul_kernel_info.k0;
60
61 // Validate M0
62 ARM_COMPUTE_RETURN_ERROR_ON_MSG(m0 < 1, "Only positive integers are supported for M0");
63
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010064 if (adj_lhs)
Gunes Bayir9d0c4de2023-04-13 18:22:58 +010065 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010066 ARM_COMPUTE_RETURN_ERROR_ON_MSG(((m0 & (m0 - 1)) && (m0 != 3)) || (m0 > 16),
67 "Only 1,2,3,4,8,16 are supported for M0 for Lhs transposed");
Gunes Bayir9d0c4de2023-04-13 18:22:58 +010068 }
69
70 // Validate N0
71 ARM_COMPUTE_RETURN_ERROR_ON_MSG(n0 < 1, "Only positive integers are supported for N0");
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010072 ARM_COMPUTE_RETURN_ERROR_ON_MSG(((n0 & (n0 - 1)) && (n0 != 3)) || (n0 > 16),
73 "Only 1,2,3,4,8,16 are supported for N0");
Gunes Bayir9d0c4de2023-04-13 18:22:58 +010074
75 // Validate K0
76 ARM_COMPUTE_RETURN_ERROR_ON_MSG(k0 < 1, "Only positive integers are supported for K0");
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010077 if (!adj_lhs || adj_rhs)
Gunes Bayir9d0c4de2023-04-13 18:22:58 +010078 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010079 ARM_COMPUTE_RETURN_ERROR_ON_MSG(((k0 & (k0 - 1)) && (k0 != 3)) || (k0 > 16),
80 "Only 1,2,3,4,8,16 are supported for K0");
Gunes Bayir9d0c4de2023-04-13 18:22:58 +010081 }
82
83 return Status{};
84}
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010085} // namespace
Gunes Bayir9d0c4de2023-04-13 18:22:58 +010086ClMatMulLowpNativeKernel::ClMatMulLowpNativeKernel()
87{
88 _type = CLKernelType::GEMM;
89}
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010090Status ClMatMulLowpNativeKernel::validate(const ITensorInfo *lhs,
91 const ITensorInfo *rhs,
92 const ITensorInfo *bias,
93 const ITensorInfo *dst,
94 const MatMulKernelInfo &matmul_kernel_info,
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +010095 const ActivationLayerInfo &act_info)
Gunes Bayir9d0c4de2023-04-13 18:22:58 +010096{
Mohammed Suhail Munshi94abde42023-05-25 16:48:43 +010097 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs, dst);
Gunes Bayir9d0c4de2023-04-13 18:22:58 +010098 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED);
99 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lhs, rhs);
100 ARM_COMPUTE_RETURN_ON_ERROR(validate_matmul_kernel_info(matmul_kernel_info));
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100101 ARM_COMPUTE_RETURN_ON_ERROR(
102 validate_matmul_input_shapes(lhs->tensor_shape(), rhs->tensor_shape(), matmul_kernel_info));
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100103
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100104 ARM_COMPUTE_RETURN_ERROR_ON_MSG((act_info.activation() != ActivationFunction::IDENTITY &&
105 act_info.activation() != ActivationFunction::RELU &&
106 act_info.activation() != ActivationFunction::LU_BOUNDED_RELU &&
107 act_info.activation() != ActivationFunction::BOUNDED_RELU),
Mohammed Suhail Munshic9eeee52023-06-30 15:43:29 +0100108 "Activation Function specified is unsupported.");
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100109 const TensorShape expected_output_shape =
110 misc::shape_calculator::compute_matmul_shape(lhs->tensor_shape(), rhs->tensor_shape(), matmul_kernel_info);
Mohammed Suhail Munshic9eeee52023-06-30 15:43:29 +0100111
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100112 if (dst->total_size() != 0)
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100113 {
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100114 const TensorInfo tensor_info_output = dst->clone()->set_tensor_shape(expected_output_shape);
Mohammed Suhail Munshi94abde42023-05-25 16:48:43 +0100115 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(dst, &tensor_info_output);
116 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lhs, dst);
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100117 }
118
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100119 if (bias != nullptr)
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100120 {
121 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32);
122 ARM_COMPUTE_RETURN_ERROR_ON(bias->num_dimensions() > 1);
123 ARM_COMPUTE_RETURN_ERROR_ON(expected_output_shape[0] != bias->dimension(0));
124 }
125
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100126 return Status{};
127}
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100128void ClMatMulLowpNativeKernel::configure(const ClCompileContext &compile_context,
129 ITensorInfo *lhs,
130 ITensorInfo *rhs,
131 ITensorInfo *bias,
132 ITensorInfo *dst,
133 const MatMulKernelInfo &matmul_kernel_info,
Mohammed Suhail Munshi94abde42023-05-25 16:48:43 +0100134 const ActivationLayerInfo &act_info)
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100135{
Mohammed Suhail Munshi94abde42023-05-25 16:48:43 +0100136 ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, rhs, dst, &compile_context, &matmul_kernel_info);
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100137 ARM_COMPUTE_LOG_PARAMS(lhs, rhs, bias, dst, matmul_kernel_info);
138 ARM_COMPUTE_ERROR_THROW_ON(validate(lhs, rhs, bias, dst, matmul_kernel_info));
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100139
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100140 // dst tensor auto initialization if not yet initialized
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100141 auto_init_if_empty(*dst, lhs->clone()->set_tensor_shape(misc::shape_calculator::compute_matmul_shape(
142 lhs->tensor_shape(), rhs->tensor_shape(), matmul_kernel_info)));
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100143
Mohammed Suhail Munshi94abde42023-05-25 16:48:43 +0100144 const int m = dst->dimension(1);
145 const int n = dst->dimension(0);
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100146 const int k = matmul_kernel_info.adj_lhs ? lhs->tensor_shape().y() : lhs->tensor_shape().x();
147 const bool adj_lhs = matmul_kernel_info.adj_lhs;
148
149 int m0 = adj_lhs ? adjust_vec_size(matmul_kernel_info.m0, m) : std::min(matmul_kernel_info.m0, m);
150 int n0 = adjust_vec_size(matmul_kernel_info.n0, n);
151
152 // Configure kernel window
Mohammed Suhail Munshi94abde42023-05-25 16:48:43 +0100153 Window win = calculate_max_window(*dst, Steps(n0, m0));
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100154 win = win.collapse(win, Window::DimZ);
155 IClKernel::configure_internal(win);
156
157 // Calculate partial (store instead of load) M0 and partial N0 for the partial blocks at the end of a row/column if any. This is to avoid padding.
158 const unsigned int partial_store_m0 = m % m0;
159 const unsigned int partial_store_n0 = n % n0;
160
161 CLBuildOptions build_opts;
162 build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(lhs->data_type()));
163 build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
164 build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
165 build_opts.add_option("-DK0=" + support::cpp11::to_string(matmul_kernel_info.k0));
166 build_opts.add_option("-DPARTIAL_STORE_M0=" + support::cpp11::to_string(partial_store_m0));
167 build_opts.add_option("-DPARTIAL_STORE_N0=" + support::cpp11::to_string(partial_store_n0));
168 build_opts.add_option("-DK=" + support::cpp11::to_string(k));
169
170 const UniformQuantizationInfo lqinfo = lhs->quantization_info().uniform();
171 const UniformQuantizationInfo rqinfo = rhs->quantization_info().uniform();
Mohammed Suhail Munshi94abde42023-05-25 16:48:43 +0100172 const UniformQuantizationInfo dqinfo = dst->quantization_info().uniform();
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100173
174 float multiplier = lqinfo.scale * rqinfo.scale / dqinfo.scale;
175 int output_multiplier = 0;
176 int output_shift = 0;
177 arm_compute::quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
178
179 build_opts.add_option("-DDST_MULTIPLIER=" + support::cpp11::to_string(output_multiplier));
180 build_opts.add_option("-DDST_SHIFT=" + support::cpp11::to_string(output_shift));
181
Mohammed Suhail Munshia2bb80e2023-06-19 14:57:57 +0100182 // Note : Offset is not negated, unlike gemmlowp kernels
183 build_opts.add_option("-DLHS_OFFSET=" + support::cpp11::to_string(lqinfo.offset));
184 build_opts.add_option("-DRHS_OFFSET=" + support::cpp11::to_string(rqinfo.offset));
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100185 build_opts.add_option("-DDST_OFFSET=" + support::cpp11::to_string(dqinfo.offset));
186 build_opts.add_option_if(bias != nullptr, "-DBIAS");
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100187
Mohammed Suhail Munshic9eeee52023-06-30 15:43:29 +0100188 // Floating point boundaries are quantized prior to being passed as arguments.
189 // Note: We expect the input and output tensors to always adopt a per-tensor quantization approach
190 int a_val{};
191 int b_val{};
192 std::tie(b_val, a_val) = get_quantized_activation_min_max(act_info, dst->data_type(), dqinfo);
193
194 build_opts.add_option("-DA_VAL=" + support::cpp11::to_string(a_val));
195 build_opts.add_option("-DB_VAL=" + support::cpp11::to_string(b_val));
Mohammed Suhail Munshi94abde42023-05-25 16:48:43 +0100196 build_opts.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(act_info.activation())));
Mohammed Suhail Munshic9eeee52023-06-30 15:43:29 +0100197 build_opts.add_option("-DZERO_POINT=" + support::cpp11::to_string(dqinfo.offset));
Mohammed Suhail Munshi94abde42023-05-25 16:48:43 +0100198
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100199 std::string kernel_name("mat_mul_native_quantized");
200 kernel_name += matmul_kernel_info.adj_lhs ? "_t" : "_nt";
201 kernel_name += matmul_kernel_info.adj_rhs ? "_t" : "_nt";
202
203 // A macro guard to compile ONLY the kernel of interest
204 build_opts.add_option("-D" + upper_string(kernel_name));
205
206 // Create kernel
207 _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
208
209 // Set config_id for enabling LWS tuning
Mohammed Suhail Munshi94abde42023-05-25 16:48:43 +0100210 const size_t number_of_batches = dst->tensor_shape().total_size() / (m * n);
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100211
212 _config_id = kernel_name;
213 _config_id += "_";
214 _config_id += lower_string(string_from_data_type(lhs->data_type()));
215 _config_id += "_";
216 _config_id += support::cpp11::to_string(m);
217 _config_id += "_";
218 _config_id += support::cpp11::to_string(n);
219 _config_id += "_";
220 _config_id += support::cpp11::to_string(k);
221 _config_id += "_";
222 _config_id += support::cpp11::to_string(number_of_batches);
223 _config_id += "_";
224 _config_id += support::cpp11::to_string(m0);
225 _config_id += "_";
226 _config_id += support::cpp11::to_string(n0);
227 _config_id += "_";
228 _config_id += support::cpp11::to_string(matmul_kernel_info.k0);
229}
230
231void ClMatMulLowpNativeKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
232{
233 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
234 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
235
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100236 const ICLTensor *lhs =
237 utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_0));
238 const ICLTensor *rhs =
239 utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_1));
240 const ICLTensor *bias =
241 utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_2));
242 ICLTensor *dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
Mohammed Suhail Munshi94abde42023-05-25 16:48:43 +0100243 ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, rhs, dst);
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100244 ARM_COMPUTE_LOG_PARAMS(lhs, rhs, bias, dst);
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100245
246 unsigned int idx = 0;
247 Window window_collapsed = window.collapse(ICLKernel::window(), Window::DimZ);
248
249 add_3d_tensor_nhw_argument(idx, lhs);
250 add_3d_tensor_nhw_argument(idx, rhs);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100251 if (bias != nullptr)
Mohammed Suhail Munshi8e2dede2023-06-27 14:25:58 +0100252 {
253 add_3d_tensor_nhw_argument(idx, bias);
254 }
Mohammed Suhail Munshi94abde42023-05-25 16:48:43 +0100255 add_3d_tensor_nhw_argument(idx, dst);
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100256
257 enqueue(queue, *this, window_collapsed, lws_hint());
258}
259
260} // namespace kernels
261} // namespace opencl
262} // namespace arm_compute