Blame - src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.cpp - ml/ComputeLibrary

blob: 49485c83a961d52a2f78bb90e4d2dd93adac4789 [file] [log] [blame]

Gian Marco Iodice	9d3bd41	2022-12-30 09:45:00 +0000	[diff] [blame]	1	/*
Gian Marco Iodice	a68df8d	2023-01-17 16:59:59 +0000	[diff] [blame]	2	* Copyright (c) 2022 Arm Limited.
Gian Marco Iodice	9d3bd41	2022-12-30 09:45:00 +0000	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.h"
				25	#include "src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.h"
				26
				27	#include "arm_compute/core/CL/CLHelpers.h"
				28	#include "arm_compute/core/GPUTarget.h"
				29	#include "arm_compute/core/TensorInfo.h"
				30	#include "arm_compute/core/TensorShape.h"
				31
				32	namespace arm_compute
				33	{
				34	namespace cl_dwc
				35	{
				36	ClDWCNativeDefaultConfigValhall::ClDWCNativeDefaultConfigValhall(GPUTarget gpu)
				37	: IClDWCNativeKernelConfig(gpu)
				38	{
				39	}
				40
				41	DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure(const ITensorInfo src, const ITensorInfo wei, const PadStrideInfo &conv_info, const Size2D &dilation,
				42	unsigned int depth_multiplier)
				43	{
				44	using ConfigurationFunctionExecutorPtr = DWCComputeKernelInfo (ClDWCNativeDefaultConfigValhall::)(const ITensorInfo src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
				45	unsigned int depth_multiplier);
				46
				47	ClDWCNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G78(&ClDWCNativeDefaultConfigValhall::configure_G78_f32,
				48	&ClDWCNativeDefaultConfigValhall::configure_G78_f16,
				49	&ClDWCNativeDefaultConfigValhall::configure_G78_u8);
				50
				51	ClDWCNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G77(&ClDWCNativeDefaultConfigValhall::configure_G78_f32,
				52	&ClDWCNativeDefaultConfigValhall::configure_G77_f16,
				53	&ClDWCNativeDefaultConfigValhall::configure_G78_u8);
				54
				55	ConfigurationFunctionExecutorPtr func = nullptr;
				56	switch(_target)
				57	{
				58	case GPUTarget::G77:
				59	func = configs_G77.get_function(src->data_type());
				60	break;
				61	case GPUTarget::G78:
				62	default:
				63	func = configs_G78.get_function(src->data_type());
				64	break;
				65	}
				66
				67	ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not supported for depthwise convolution");
				68	return (this->*func)(src, wei, conv_info, dilation, depth_multiplier);
				69	}
				70
				71	DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f32(const ITensorInfo src, const ITensorInfo wei, const PadStrideInfo &conv_info, const Size2D &dilation,
				72	unsigned int depth_multiplier)
				73	{
				74	DWCComputeKernelInfo desc;
				75
				76	if(src->data_layout() == DataLayout::NHWC)
				77	{
				78	const size_t idx_c = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL);
				79	const size_t idx_w = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH);
				80	const TensorShape wei_shape = wei->tensor_shape();
				81	const size_t kernel_c = wei_shape[idx_c];
				82	const size_t kernel_w = wei_shape[idx_w];
				83
				84	desc.export_input_to_cl_image = false;
				85	desc.export_weights_to_cl_image = use_cl_image_for_weights(wei, depth_multiplier);
				86
				87	if(depth_multiplier == 1)
				88	{
				89	desc.n0 = 4;
				90	}
				91	else
				92	{
				93	if((depth_multiplier % 4) == 0)
				94	{
				95	desc.n0 = 4;
				96	}
				97	else if((depth_multiplier % 2) == 0)
				98	{
				99	desc.n0 = 2;
				100	}
				101	else
				102	{
				103	desc.n0 = 1;
				104	}
				105	}
				106
				107	// Note: If we reduce n0, export to cl_image must be false
				108	ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) && (desc.export_weights_to_cl_image == true));
				109
				110	desc.n0 = adjust_vec_size(desc.n0, kernel_c);
				111
				112	// Set m0 only if stride_x == 1 and dilation_x == 1
				113	if(conv_info.stride().first == 1 && dilation.x() == 1)
				114	{
				115	if((kernel_w >= 9) \|\| (kernel_w == 1))
				116	{
				117	desc.m0 = 1;
				118	}
				119	else
				120	{
				121	desc.m0 = 2;
				122	}
				123	}
				124	else
				125	{
				126	desc.m0 = 1;
				127	}
				128	}
				129
				130	return desc;
				131	}
				132
				133	DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f16(const ITensorInfo src, const ITensorInfo wei, const PadStrideInfo &conv_info, const Size2D &dilation,
				134	unsigned int depth_multiplier)
				135	{
				136	DWCComputeKernelInfo desc;
				137
				138	if(src->data_layout() == DataLayout::NHWC)
				139	{
				140	// Src and weights have the same dimension indices
				141	const size_t idx_c = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL);
				142	const size_t idx_w = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH);
				143	const TensorShape src_shape = src->tensor_shape();
				144	const TensorShape wei_shape = wei->tensor_shape();
				145	const size_t src_w = src_shape[idx_w];
				146	const size_t kernel_c = wei_shape[idx_c];
				147	const size_t kernel_w = wei_shape[idx_w];
				148
				149	desc.export_input_to_cl_image = false;
				150	desc.export_weights_to_cl_image = use_cl_image_for_weights(wei, depth_multiplier);
				151
				152	if(depth_multiplier == 1)
				153	{
				154	if(desc.export_weights_to_cl_image == false)
				155	{
				156	desc.n0 = 8;
				157	}
				158	else
				159	{
				160	desc.n0 = 4;
				161	}
				162	}
				163	else
				164	{
				165	if((depth_multiplier % 4) == 0)
				166	{
				167	desc.n0 = 4;
				168	}
				169	else if((depth_multiplier % 2) == 0)
				170	{
				171	desc.n0 = 2;
				172	}
				173	else
				174	{
				175	desc.n0 = 1;
				176	}
				177	}
				178
				179	// Note: If we reduce n0, export to cl_image must be false
				180	ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) && (desc.export_weights_to_cl_image == true));
				181
				182	desc.n0 = adjust_vec_size(desc.n0, kernel_c);
				183
				184	// Set m0 only if stride_x == 1 and dilation_x == 1
				185	if(conv_info.stride().first == 1 && dilation.x() == 1)
				186	{
				187	if((kernel_w >= 9) \|\| (kernel_w == 1))
				188	{
				189	desc.m0 = 1;
				190	}
				191	else
				192	{
				193	if((src_w % 5) == 0)
				194	{
				195	desc.m0 = 5;
				196	}
				197	else
				198	{
				199	desc.m0 = 4;
				200	}
				201	}
				202	}
				203	else
				204	{
				205	desc.m0 = 1;
				206	}
				207	}
				208
				209	return desc;
				210	}
				211
				212	DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_u8(const ITensorInfo src, const ITensorInfo wei, const PadStrideInfo &conv_info, const Size2D &dilation,
				213	unsigned int depth_multiplier)
				214	{
				215	ARM_COMPUTE_UNUSED(wei);
				216
				217	DWCComputeKernelInfo desc;
				218
				219	if(src->data_layout() == DataLayout::NHWC)
				220	{
				221	desc.export_input_to_cl_image = false;
				222	desc.export_weights_to_cl_image = false;
				223	desc.n0 = (depth_multiplier == 1) ? 4 : 1;
				224	if(conv_info.stride().first == 1 && dilation.x() == 1 && depth_multiplier == 1)
				225	{
				226	desc.m0 = 2;
				227	}
				228	else
				229	{
				230	desc.m0 = 1;
				231	}
				232	}
				233
				234	return desc;
				235	}
				236
				237	DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G77_f16(const ITensorInfo src, const ITensorInfo wei, const PadStrideInfo &conv_info, const Size2D &dilation,
				238	unsigned int depth_multiplier)
				239	{
				240	DWCComputeKernelInfo desc;
				241
				242	if(src->data_layout() == DataLayout::NHWC)
				243	{
				244	const size_t idx_c = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL);
				245	const size_t idx_w = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH);
				246	const TensorShape wei_shape = wei->tensor_shape();
				247	const size_t kernel_c = wei_shape[idx_c];
				248	const size_t kernel_w = wei_shape[idx_w];
				249
				250	desc.export_input_to_cl_image = false;
				251	desc.export_weights_to_cl_image = use_cl_image_for_weights(wei, depth_multiplier);
				252
				253	if(depth_multiplier == 1)
				254	{
				255	if(desc.export_weights_to_cl_image == false)
				256	{
				257	desc.n0 = 8;
				258	}
				259	else
				260	{
				261	desc.n0 = 4;
				262	}
				263	}
				264	else
				265	{
				266	if((depth_multiplier % 4) == 0)
				267	{
				268	desc.n0 = 4;
				269	}
				270	else if((depth_multiplier % 2) == 0)
				271	{
				272	desc.n0 = 2;
				273	}
				274	else
				275	{
				276	desc.n0 = 1;
				277	}
				278	}
				279
				280	// Note: If we reduce n0, export to cl_image must be false
				281	ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) && (desc.export_weights_to_cl_image == true));
				282
				283	desc.n0 = adjust_vec_size(desc.n0, kernel_c);
				284
				285	// Set m0 only if stride_x == 1 and dilation_x == 1
				286	if(conv_info.stride().first == 1 && dilation.x() == 1)
				287	{
				288	if((kernel_w >= 9) \|\| (kernel_w == 1))
				289	{
				290	desc.m0 = 1;
				291	}
				292	else
				293	{
Gian Marco Iodice	a68df8d	2023-01-17 16:59:59 +0000	[diff] [blame]	294	desc.m0 = 2;
Gian Marco Iodice	9d3bd41	2022-12-30 09:45:00 +0000	[diff] [blame]	295	}
				296	}
				297	else
				298	{
				299	desc.m0 = 1;
				300	}
				301	}
				302
				303	return desc;
				304	}
				305	} // namespace cl_dwc
				306	} // namespace arm_compute