Blame - src/gpu/cl/kernels/ClMatMulLowpNativeKernel.cpp - ml/ComputeLibrary

blob: 0bb6b0c083826d73f583e396fd59c3d14cf683f7 [file] [log] [blame]

Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	1	/*
				2	* Copyright (c) 2023 Arm Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "src/gpu/cl/kernels/ClMatMulLowpNativeKernel.h"
				25
				26	#include "arm_compute/core/CL/CLHelpers.h"
				27	#include "arm_compute/core/CL/ICLTensor.h"
				28	#include "arm_compute/core/ITensorPack.h"
Gunes Bayir	e87fa66	2023-09-07 12:20:33 +0100	[diff] [blame]	29	#include "arm_compute/core/QuantizationInfo.h"
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	30	#include "arm_compute/core/TensorInfo.h"
Gunes Bayir	e87fa66	2023-09-07 12:20:33 +0100	[diff] [blame]	31	#include "arm_compute/core/utils/ActivationFunctionUtils.h"
Matthew Bentham	314d3e2	2023-06-23 10:53:52 +0000	[diff] [blame]	32	#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	33	#include "arm_compute/core/utils/misc/ShapeCalculator.h"
				34	#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	35	#include "arm_compute/core/utils/StringUtils.h"
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	36
				37	#include "src/common/utils/Log.h"
				38	#include "src/core/helpers/AutoConfiguration.h"
				39	#include "src/core/helpers/WindowHelpers.h"
				40	#include "src/gpu/cl/ClCompileContext.h"
Gunes Bayir	e87fa66	2023-09-07 12:20:33 +0100	[diff] [blame]	41	#include "src/gpu/cl/kernels/helpers/MatMulKernelHelpers.h"
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	42	#include "support/Cast.h"
				43	#include "support/StringSupport.h"
				44
				45	namespace arm_compute
				46	{
				47	namespace opencl
				48	{
				49	namespace kernels
				50	{
				51	namespace
				52	{
				53	Status validate_matmul_kernel_info(const MatMulKernelInfo &matmul_kernel_info)
				54	{
				55	const bool adj_lhs = matmul_kernel_info.adj_lhs;
				56	const bool adj_rhs = matmul_kernel_info.adj_rhs;
				57	const int m0 = matmul_kernel_info.m0;
				58	const int n0 = matmul_kernel_info.n0;
				59	const int k0 = matmul_kernel_info.k0;
				60
				61	// Validate M0
				62	ARM_COMPUTE_RETURN_ERROR_ON_MSG(m0 < 1, "Only positive integers are supported for M0");
				63
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	64	if (adj_lhs)
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	65	{
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	66	ARM_COMPUTE_RETURN_ERROR_ON_MSG(((m0 & (m0 - 1)) && (m0 != 3)) \|\| (m0 > 16),
				67	"Only 1,2,3,4,8,16 are supported for M0 for Lhs transposed");
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	68	}
				69
				70	// Validate N0
				71	ARM_COMPUTE_RETURN_ERROR_ON_MSG(n0 < 1, "Only positive integers are supported for N0");
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	72	ARM_COMPUTE_RETURN_ERROR_ON_MSG(((n0 & (n0 - 1)) && (n0 != 3)) \|\| (n0 > 16),
				73	"Only 1,2,3,4,8,16 are supported for N0");
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	74
				75	// Validate K0
				76	ARM_COMPUTE_RETURN_ERROR_ON_MSG(k0 < 1, "Only positive integers are supported for K0");
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	77	if (!adj_lhs \|\| adj_rhs)
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	78	{
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	79	ARM_COMPUTE_RETURN_ERROR_ON_MSG(((k0 & (k0 - 1)) && (k0 != 3)) \|\| (k0 > 16),
				80	"Only 1,2,3,4,8,16 are supported for K0");
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	81	}
				82
				83	return Status{};
				84	}
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	85	} // namespace
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	86	ClMatMulLowpNativeKernel::ClMatMulLowpNativeKernel()
				87	{
				88	_type = CLKernelType::GEMM;
				89	}
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	90	Status ClMatMulLowpNativeKernel::validate(const ITensorInfo *lhs,
				91	const ITensorInfo *rhs,
				92	const ITensorInfo *bias,
				93	const ITensorInfo *dst,
				94	const MatMulKernelInfo &matmul_kernel_info,
Mohammed Suhail Munshi	8e2dede	2023-06-27 14:25:58 +0100	[diff] [blame]	95	const ActivationLayerInfo &act_info)
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	96	{
Mohammed Suhail Munshi	94abde4	2023-05-25 16:48:43 +0100	[diff] [blame]	97	ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs, dst);
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	98	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED);
				99	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lhs, rhs);
				100	ARM_COMPUTE_RETURN_ON_ERROR(validate_matmul_kernel_info(matmul_kernel_info));
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	101	ARM_COMPUTE_RETURN_ON_ERROR(
				102	validate_matmul_input_shapes(lhs->tensor_shape(), rhs->tensor_shape(), matmul_kernel_info));
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	103
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	104	ARM_COMPUTE_RETURN_ERROR_ON_MSG((act_info.activation() != ActivationFunction::IDENTITY &&
				105	act_info.activation() != ActivationFunction::RELU &&
				106	act_info.activation() != ActivationFunction::LU_BOUNDED_RELU &&
				107	act_info.activation() != ActivationFunction::BOUNDED_RELU),
Mohammed Suhail Munshi	c9eeee5	2023-06-30 15:43:29 +0100	[diff] [blame]	108	"Activation Function specified is unsupported.");
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	109	const TensorShape expected_output_shape =
				110	misc::shape_calculator::compute_matmul_shape(lhs->tensor_shape(), rhs->tensor_shape(), matmul_kernel_info);
Mohammed Suhail Munshi	c9eeee5	2023-06-30 15:43:29 +0100	[diff] [blame]	111
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	112	if (dst->total_size() != 0)
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	113	{
Mohammed Suhail Munshi	8e2dede	2023-06-27 14:25:58 +0100	[diff] [blame]	114	const TensorInfo tensor_info_output = dst->clone()->set_tensor_shape(expected_output_shape);
Mohammed Suhail Munshi	94abde4	2023-05-25 16:48:43 +0100	[diff] [blame]	115	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(dst, &tensor_info_output);
				116	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lhs, dst);
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	117	}
				118
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	119	if (bias != nullptr)
Mohammed Suhail Munshi	8e2dede	2023-06-27 14:25:58 +0100	[diff] [blame]	120	{
				121	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32);
				122	ARM_COMPUTE_RETURN_ERROR_ON(bias->num_dimensions() > 1);
				123	ARM_COMPUTE_RETURN_ERROR_ON(expected_output_shape[0] != bias->dimension(0));
				124	}
				125
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	126	return Status{};
				127	}
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	128	void ClMatMulLowpNativeKernel::configure(const ClCompileContext &compile_context,
				129	ITensorInfo *lhs,
				130	ITensorInfo *rhs,
				131	ITensorInfo *bias,
				132	ITensorInfo *dst,
				133	const MatMulKernelInfo &matmul_kernel_info,
Mohammed Suhail Munshi	94abde4	2023-05-25 16:48:43 +0100	[diff] [blame]	134	const ActivationLayerInfo &act_info)
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	135	{
Mohammed Suhail Munshi	94abde4	2023-05-25 16:48:43 +0100	[diff] [blame]	136	ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, rhs, dst, &compile_context, &matmul_kernel_info);
Mohammed Suhail Munshi	8e2dede	2023-06-27 14:25:58 +0100	[diff] [blame]	137	ARM_COMPUTE_LOG_PARAMS(lhs, rhs, bias, dst, matmul_kernel_info);
				138	ARM_COMPUTE_ERROR_THROW_ON(validate(lhs, rhs, bias, dst, matmul_kernel_info));
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	139
Mohammed Suhail Munshi	8e2dede	2023-06-27 14:25:58 +0100	[diff] [blame]	140	// dst tensor auto initialization if not yet initialized
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	141	auto_init_if_empty(*dst, lhs->clone()->set_tensor_shape(misc::shape_calculator::compute_matmul_shape(
				142	lhs->tensor_shape(), rhs->tensor_shape(), matmul_kernel_info)));
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	143
Mohammed Suhail Munshi	94abde4	2023-05-25 16:48:43 +0100	[diff] [blame]	144	const int m = dst->dimension(1);
				145	const int n = dst->dimension(0);
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	146	const int k = matmul_kernel_info.adj_lhs ? lhs->tensor_shape().y() : lhs->tensor_shape().x();
				147	const bool adj_lhs = matmul_kernel_info.adj_lhs;
				148
				149	int m0 = adj_lhs ? adjust_vec_size(matmul_kernel_info.m0, m) : std::min(matmul_kernel_info.m0, m);
				150	int n0 = adjust_vec_size(matmul_kernel_info.n0, n);
				151
				152	// Configure kernel window
Mohammed Suhail Munshi	94abde4	2023-05-25 16:48:43 +0100	[diff] [blame]	153	Window win = calculate_max_window(*dst, Steps(n0, m0));
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	154	win = win.collapse(win, Window::DimZ);
				155	IClKernel::configure_internal(win);
				156
				157	// Calculate partial (store instead of load) M0 and partial N0 for the partial blocks at the end of a row/column if any. This is to avoid padding.
				158	const unsigned int partial_store_m0 = m % m0;
				159	const unsigned int partial_store_n0 = n % n0;
				160
				161	CLBuildOptions build_opts;
				162	build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(lhs->data_type()));
				163	build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
				164	build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
				165	build_opts.add_option("-DK0=" + support::cpp11::to_string(matmul_kernel_info.k0));
				166	build_opts.add_option("-DPARTIAL_STORE_M0=" + support::cpp11::to_string(partial_store_m0));
				167	build_opts.add_option("-DPARTIAL_STORE_N0=" + support::cpp11::to_string(partial_store_n0));
				168	build_opts.add_option("-DK=" + support::cpp11::to_string(k));
				169
				170	const UniformQuantizationInfo lqinfo = lhs->quantization_info().uniform();
				171	const UniformQuantizationInfo rqinfo = rhs->quantization_info().uniform();
Mohammed Suhail Munshi	94abde4	2023-05-25 16:48:43 +0100	[diff] [blame]	172	const UniformQuantizationInfo dqinfo = dst->quantization_info().uniform();
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	173
				174	float multiplier = lqinfo.scale * rqinfo.scale / dqinfo.scale;
				175	int output_multiplier = 0;
				176	int output_shift = 0;
				177	arm_compute::quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
				178
				179	build_opts.add_option("-DDST_MULTIPLIER=" + support::cpp11::to_string(output_multiplier));
				180	build_opts.add_option("-DDST_SHIFT=" + support::cpp11::to_string(output_shift));
				181
Mohammed Suhail Munshi	a2bb80e	2023-06-19 14:57:57 +0100	[diff] [blame]	182	// Note : Offset is not negated, unlike gemmlowp kernels
				183	build_opts.add_option("-DLHS_OFFSET=" + support::cpp11::to_string(lqinfo.offset));
				184	build_opts.add_option("-DRHS_OFFSET=" + support::cpp11::to_string(rqinfo.offset));
Mohammed Suhail Munshi	8e2dede	2023-06-27 14:25:58 +0100	[diff] [blame]	185	build_opts.add_option("-DDST_OFFSET=" + support::cpp11::to_string(dqinfo.offset));
				186	build_opts.add_option_if(bias != nullptr, "-DBIAS");
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	187
Mohammed Suhail Munshi	c9eeee5	2023-06-30 15:43:29 +0100	[diff] [blame]	188	// Floating point boundaries are quantized prior to being passed as arguments.
				189	// Note: We expect the input and output tensors to always adopt a per-tensor quantization approach
				190	int a_val{};
				191	int b_val{};
				192	std::tie(b_val, a_val) = get_quantized_activation_min_max(act_info, dst->data_type(), dqinfo);
				193
				194	build_opts.add_option("-DA_VAL=" + support::cpp11::to_string(a_val));
				195	build_opts.add_option("-DB_VAL=" + support::cpp11::to_string(b_val));
Mohammed Suhail Munshi	94abde4	2023-05-25 16:48:43 +0100	[diff] [blame]	196	build_opts.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(act_info.activation())));
Mohammed Suhail Munshi	c9eeee5	2023-06-30 15:43:29 +0100	[diff] [blame]	197	build_opts.add_option("-DZERO_POINT=" + support::cpp11::to_string(dqinfo.offset));
Mohammed Suhail Munshi	94abde4	2023-05-25 16:48:43 +0100	[diff] [blame]	198
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	199	std::string kernel_name("mat_mul_native_quantized");
				200	kernel_name += matmul_kernel_info.adj_lhs ? "_t" : "_nt";
				201	kernel_name += matmul_kernel_info.adj_rhs ? "_t" : "_nt";
				202
				203	// A macro guard to compile ONLY the kernel of interest
				204	build_opts.add_option("-D" + upper_string(kernel_name));
				205
				206	// Create kernel
				207	_kernel = create_kernel(compile_context, kernel_name, build_opts.options());
				208
				209	// Set config_id for enabling LWS tuning
Mohammed Suhail Munshi	94abde4	2023-05-25 16:48:43 +0100	[diff] [blame]	210	const size_t number_of_batches = dst->tensor_shape().total_size() / (m * n);
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	211
				212	_config_id = kernel_name;
				213	_config_id += "_";
				214	_config_id += lower_string(string_from_data_type(lhs->data_type()));
				215	_config_id += "_";
				216	_config_id += support::cpp11::to_string(m);
				217	_config_id += "_";
				218	_config_id += support::cpp11::to_string(n);
				219	_config_id += "_";
				220	_config_id += support::cpp11::to_string(k);
				221	_config_id += "_";
				222	_config_id += support::cpp11::to_string(number_of_batches);
				223	_config_id += "_";
				224	_config_id += support::cpp11::to_string(m0);
				225	_config_id += "_";
				226	_config_id += support::cpp11::to_string(n0);
				227	_config_id += "_";
				228	_config_id += support::cpp11::to_string(matmul_kernel_info.k0);
				229	}
				230
				231	void ClMatMulLowpNativeKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
				232	{
				233	ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
				234	ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
				235
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	236	const ICLTensor *lhs =
				237	utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_0));
				238	const ICLTensor *rhs =
				239	utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_1));
				240	const ICLTensor *bias =
				241	utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_2));
				242	ICLTensor dst = utils::cast::polymorphic_downcast<ICLTensor >(tensors.get_tensor(TensorType::ACL_DST));
Mohammed Suhail Munshi	94abde4	2023-05-25 16:48:43 +0100	[diff] [blame]	243	ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, rhs, dst);
Mohammed Suhail Munshi	8e2dede	2023-06-27 14:25:58 +0100	[diff] [blame]	244	ARM_COMPUTE_LOG_PARAMS(lhs, rhs, bias, dst);
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	245
				246	unsigned int idx = 0;
				247	Window window_collapsed = window.collapse(ICLKernel::window(), Window::DimZ);
				248
				249	add_3d_tensor_nhw_argument(idx, lhs);
				250	add_3d_tensor_nhw_argument(idx, rhs);
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	251	if (bias != nullptr)
Mohammed Suhail Munshi	8e2dede	2023-06-27 14:25:58 +0100	[diff] [blame]	252	{
				253	add_3d_tensor_nhw_argument(idx, bias);
				254	}
Mohammed Suhail Munshi	94abde4	2023-05-25 16:48:43 +0100	[diff] [blame]	255	add_3d_tensor_nhw_argument(idx, dst);
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	256
				257	enqueue(queue, *this, window_collapsed, lws_hint());
				258	}
				259
				260	} // namespace kernels
				261	} // namespace opencl
				262	} // namespace arm_compute