Blame - src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.cpp - ml/ComputeLibrary

blob: b9e0d5adf86a940744ed58fcb7712165cc4ea19f [file] [log] [blame]

Gian Marco Iodice	352c07d	2023-05-03 12:21:38 +0100	[diff] [blame]	1	/*
				2	* Copyright (c) 2023 Arm Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.h"
				25
				26	#include "arm_compute/core/KernelDescriptors.h"
				27	#include "arm_compute/core/TensorInfo.h"
				28	#include "arm_compute/core/TensorShape.h"
				29	#include "src/gpu/cl/kernels/ClMatMulNativeKernel.h"
				30
				31	#include <limits>
				32	#include <utility>
				33
				34	namespace arm_compute
				35	{
				36	namespace cl_matmul
				37	{
				38	MatMulKernelInfo select_info(const MatMulKernelInfo &info0,
				39	const MatMulKernelInfo &info1,
				40	unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type, bool rhs_lock_padding)
				41	{
				42	ARM_COMPUTE_ERROR_ON_MSG(info1.export_rhs_to_cl_image == true, "The fallback MatMul configuration cannot have export_to_cl_image = true");
				43	ARM_COMPUTE_ERROR_ON_MSG(info0.adj_lhs != info1.adj_lhs, "The MatMul configurations must have the same adj_lhs value");
				44	ARM_COMPUTE_ERROR_ON_MSG(info0.adj_rhs != info1.adj_rhs, "The MatMul configurations must have the same adj_rhs value");
				45
				46	const bool adj_lhs = info0.adj_lhs;
				47	const bool adj_rhs = info0.adj_rhs;
				48
				49	TensorInfo lhs_info = !adj_lhs ? TensorInfo(TensorShape(k, m, b), 1, data_type) : TensorInfo(TensorShape(m, k, b), 1, data_type);
				50	TensorInfo rhs_info = !adj_rhs ? TensorInfo(TensorShape(n, k, b), 1, data_type) : TensorInfo(TensorShape(k, n, b), 1, data_type);
				51	TensorInfo dst_info;
				52
				53	if(rhs_lock_padding == false)
				54	{
				55	if(bool(opencl::kernels::ClMatMulNativeKernel::validate(&lhs_info, &rhs_info, &dst_info, info0)))
				56	{
				57	return info0;
				58	}
				59	else
				60	{
				61	return info1;
				62	}
				63	}
				64	else
				65	{
				66	return info1;
				67	}
				68	}
				69
				70	MatMulKernelInfo find_info(const MatMulNativeConfigsMatrix &configs, bool adj_lhs, bool adj_rhs, unsigned int m, unsigned int n, unsigned int k, unsigned int b)
				71	{
				72	size_t min_acc = std::numeric_limits<size_t>::max();
				73	size_t min_idx = 0;
				74
				75	ARM_COMPUTE_ERROR_ON(configs.size() == 0);
				76	const size_t num_rows = configs.size();
				77	const size_t num_cols = configs[0].size();
				78
				79	ARM_COMPUTE_ERROR_ON_MSG(num_cols != 8U, "The entry should have 8 integer values representing: M, N, K, B, M0, N0. K0, IMG_RHS");
				80	ARM_COMPUTE_UNUSED(num_cols);
				81
				82	// Find nearest GeMM workload
				83	// Note: the workload does not depend on the K dimension
				84	for(size_t y = 0; y < num_rows; ++y)
				85	{
				86	size_t mc0 = static_cast<size_t>(configs[y][0]);
				87	size_t nc0 = static_cast<size_t>(configs[y][1]);
				88	size_t kc0 = static_cast<size_t>(configs[y][2]);
				89	size_t bc0 = static_cast<size_t>(configs[y][3]);
				90
				91	size_t acc = 0;
				92	acc += (m - mc0) * (m - mc0);
				93	acc += (n - nc0) * (n - nc0);
				94	acc += (k - kc0) * (k - kc0);
				95	acc += (b - bc0) * (b - bc0);
				96	acc = std::sqrt(acc);
				97	if(acc < min_acc)
				98	{
				99	min_acc = acc;
				100	min_idx = y;
				101	}
				102	}
				103
				104	// Get the configuration from the nearest GeMM shape
				105	MatMulKernelInfo desc;
				106	desc.adj_lhs = adj_lhs;
				107	desc.adj_rhs = adj_rhs;
				108	desc.m0 = configs[min_idx][4];
				109	desc.n0 = configs[min_idx][5];
				110	desc.k0 = configs[min_idx][6];
				111	desc.export_rhs_to_cl_image = configs[min_idx][7];
				112
				113	return desc;
				114	}
				115	} // namespace cl_matmul
				116	} // namespace arm_compute