blob: 4b7666d5aa0989c1a4c72da8ce3c9c02ffc90890 [file] [log] [blame]
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +01001/*
2 * Copyright (c) 2022 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Ramy Elgammaldf6a3b02022-11-30 16:23:10 +000024#include "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigValhall.h"
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +010025
26#include "arm_compute/core/CL/CLHelpers.h"
27#include "arm_compute/core/CL/CLKernelLibrary.h"
28#include "arm_compute/core/GPUTarget.h"
29#include "arm_compute/core/TensorInfo.h"
30#include "arm_compute/core/TensorShape.h"
31#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010032
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +010033#include <utility>
34
35namespace arm_compute
36{
37namespace cl_direct_conv
38{
39using namespace arm_compute::misc::shape_calculator;
40
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010041ClDirectConvDefaultConfigValhall::ClDirectConvDefaultConfigValhall(GPUTarget gpu) : IClDirectConvKernelConfig(gpu)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +010042{
43}
44
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010045DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure(const ITensorInfo *src,
46 const ITensorInfo *wei,
47 const PadStrideInfo &conv_info)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +010048{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010049 using ConfigurationFunctionExecutorPtr = DirectConvComputeKernelInfo (ClDirectConvDefaultConfigValhall::*)(
50 const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info);
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +010051
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010052 ClDirectConvConfigArray<ConfigurationFunctionExecutorPtr> configs_G78(
53 &ClDirectConvDefaultConfigValhall::configure_G78_f32, &ClDirectConvDefaultConfigValhall::configure_G78_f16,
54 &ClDirectConvDefaultConfigValhall::configure_G78_u8);
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +010055
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010056 ClDirectConvConfigArray<ConfigurationFunctionExecutorPtr> configs_G57(
57 &ClDirectConvDefaultConfigValhall::configure_G57_f32, &ClDirectConvDefaultConfigValhall::configure_G57_f16,
58 &ClDirectConvDefaultConfigValhall::configure_G78_u8);
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +010059
60 ConfigurationFunctionExecutorPtr func = nullptr;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010061 switch (_target)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +010062 {
63 case GPUTarget::G57:
64 func = configs_G57.get_function(src->data_type());
65 break;
66 case GPUTarget::G78:
67 default:
68 func = configs_G78.get_function(src->data_type());
69 break;
70 }
71
72 ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not supported for direct convolution");
73 return (this->*func)(src, wei, conv_info);
74}
75
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010076DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G78_f32(const ITensorInfo *src,
77 const ITensorInfo *wei,
78 const PadStrideInfo &conv_info)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +010079{
80 DirectConvComputeKernelInfo desc;
81
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010082 if (src->data_layout() == DataLayout::NHWC)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +010083 {
84 // Get the output shape
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010085 const TensorShape wei_shape = wei->tensor_shape();
86 const TensorShape dst_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
Gian Marco Iodicead9a7ed2022-09-16 14:14:21 +010087 const bool export_weights_to_cl_image = export_to_cl_image(wei);
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +010088
89 const int32_t ofm = dst_shape[0];
90 const int32_t m = dst_shape[1] * dst_shape[2];
91 const bool is_pointwise = (wei_shape[1] == wei_shape[2]) && wei_shape[1] == 1;
92
Gian Marco Iodicead9a7ed2022-09-16 14:14:21 +010093 desc.export_weights_to_cl_image = export_weights_to_cl_image;
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +010094
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010095 if (dst_shape[0] <= 4)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +010096 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010097 if (is_pointwise)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +010098 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010099 if (ofm == 4)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100100 {
101 desc.m0 = 1;
102 desc.n0 = 4;
103 desc.k0 = 16;
104 }
105 else
106 {
107 desc.m0 = 1;
108 desc.n0 = 1;
109 desc.k0 = 16;
110 }
111 }
112 else
113 {
114 desc.m0 = 1;
115 desc.n0 = 2;
116 desc.k0 = 16;
117 }
118 }
119 else
120 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100121 if (m < 64)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100122 {
123 desc.m0 = 1;
124 desc.n0 = 1;
125 desc.k0 = 16;
126 }
127 else
128 {
129 desc.m0 = 4;
130 desc.n0 = 4;
131 desc.k0 = 4;
132 }
133 }
134 }
135
136 return desc;
137}
138
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100139DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G78_f16(const ITensorInfo *src,
140 const ITensorInfo *wei,
141 const PadStrideInfo &conv_info)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100142{
143 DirectConvComputeKernelInfo desc;
144
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100145 if (src->data_layout() == DataLayout::NHWC)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100146 {
147 // Get the output shape
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100148 const TensorShape wei_shape = wei->tensor_shape();
149 const TensorShape dst_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
Gian Marco Iodicead9a7ed2022-09-16 14:14:21 +0100150 const bool export_weights_to_cl_image = export_to_cl_image(wei);
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100151
152 const int32_t ofm = dst_shape[0];
153 const int32_t m = dst_shape[1] * dst_shape[2];
Gian Marco Iodice3394f3e2022-09-16 14:14:21 +0100154 const int32_t k = wei_shape[0];
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100155 const bool is_pointwise = (wei_shape[1] == wei_shape[2]) && wei_shape[1] == 1;
156
Gian Marco Iodicead9a7ed2022-09-16 14:14:21 +0100157 desc.export_weights_to_cl_image = export_weights_to_cl_image;
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100158
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100159 if (dst_shape[0] <= 4)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100160 {
Gian Marco Iodice3394f3e2022-09-16 14:14:21 +0100161 // k0 should be as larger as possible. However, we should avoid
162 // having left-over for loops that make the implementation slower.
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100163 if ((k % 16) == 0)
Gian Marco Iodice3394f3e2022-09-16 14:14:21 +0100164 {
165 desc.k0 = 16;
166 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100167 else if ((k % 8) == 0)
Gian Marco Iodice3394f3e2022-09-16 14:14:21 +0100168 {
169 desc.k0 = 8;
170 }
171 else
172 {
173 desc.k0 = 4;
174 }
175
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100176 if (is_pointwise)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100177 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100178 if (ofm == 4)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100179 {
180 desc.m0 = 1;
181 desc.n0 = 4;
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100182 }
183 else
184 {
185 desc.m0 = 1;
186 desc.n0 = 1;
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100187 }
188 }
189 else
190 {
191 desc.m0 = 1;
192 desc.n0 = dst_shape[0];
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100193 }
194 }
195 else
196 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100197 if (m < 64)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100198 {
199 desc.m0 = 1;
200 desc.n0 = 1;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100201 if ((k % 16) == 0)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100202 {
Gian Marco Iodice3394f3e2022-09-16 14:14:21 +0100203 desc.k0 = 16;
204 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100205 else if ((k % 8) == 0)
Gian Marco Iodice3394f3e2022-09-16 14:14:21 +0100206 {
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100207 desc.k0 = 8;
208 }
209 else
210 {
Gian Marco Iodice3394f3e2022-09-16 14:14:21 +0100211 desc.k0 = 4;
212 }
213 }
214 else
215 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100216 if (ofm >= 16)
Gian Marco Iodice3394f3e2022-09-16 14:14:21 +0100217 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100218 if (m / 6 > 24000)
Gian Marco Iodice3394f3e2022-09-16 14:14:21 +0100219 {
220 desc.m0 = 6;
221 }
222 else
223 {
224 desc.m0 = 5;
225 }
226 desc.n0 = 8;
227 desc.k0 = 4;
228 }
229 else
230 {
231 desc.m0 = 2;
232 desc.n0 = 8;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100233 if ((k % 16) == 0)
Gian Marco Iodice3394f3e2022-09-16 14:14:21 +0100234 {
235 desc.k0 = 16;
236 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100237 else if ((k % 8) == 0)
Gian Marco Iodice3394f3e2022-09-16 14:14:21 +0100238 {
239 desc.k0 = 8;
240 }
241 else
242 {
243 desc.k0 = 4;
244 }
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100245 }
246 }
247 }
248 }
249
250 return desc;
251}
252
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100253DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G78_u8(const ITensorInfo *src,
254 const ITensorInfo *wei,
255 const PadStrideInfo &conv_info)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100256{
257 DirectConvComputeKernelInfo desc;
258
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100259 if (src->data_layout() == DataLayout::NHWC)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100260 {
261 // Get the output shape
262 TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
263
264 desc.n0 = 4;
265
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100266 if (output_shape[0] > 16)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100267 {
268 desc.m0 = 4;
269 }
270
271 desc.k0 = 16;
272
273 desc.export_weights_to_cl_image = false;
274 }
275
276 return desc;
277}
278
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100279DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G57_f32(const ITensorInfo *src,
280 const ITensorInfo *wei,
281 const PadStrideInfo &conv_info)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100282{
283 DirectConvComputeKernelInfo desc;
284
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100285 if (src->data_layout() == DataLayout::NHWC)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100286 {
287 // Get the output shape
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100288 const TensorShape wei_shape = wei->tensor_shape();
289 const TensorShape dst_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
Gian Marco Iodicead9a7ed2022-09-16 14:14:21 +0100290 const bool export_weights_to_cl_image = export_to_cl_image(wei);
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100291
292 const int32_t m = dst_shape[1] * dst_shape[2];
293 const bool is_pointwise = (wei_shape[1] == wei_shape[2]) && wei_shape[1] == 1;
294
Gian Marco Iodicead9a7ed2022-09-16 14:14:21 +0100295 desc.export_weights_to_cl_image = export_weights_to_cl_image;
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100296
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100297 if (dst_shape[0] <= 4)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100298 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100299 if (is_pointwise)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100300 {
301 desc.m0 = 1;
302 desc.n0 = 1;
303 desc.k0 = 16;
304 }
305 else
306 {
307 desc.m0 = 1;
308 desc.n0 = dst_shape[0];
309 desc.k0 = 16;
310 }
311 }
312 else
313 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100314 if (m < 64)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100315 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100316 if (m == 1)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100317 {
318 desc.m0 = 1;
319 desc.n0 = 1;
320 desc.k0 = 16;
321 }
322 else
323 {
324 desc.m0 = 4;
325 desc.n0 = 2;
326 desc.k0 = 8;
327 }
328 }
329 else
330 {
331 desc.m0 = 4;
332 desc.n0 = 4;
333 desc.k0 = 4;
334 }
335 }
336 }
337
338 return desc;
339}
340
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100341DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G57_f16(const ITensorInfo *src,
342 const ITensorInfo *wei,
343 const PadStrideInfo &conv_info)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100344{
345 DirectConvComputeKernelInfo desc;
346
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100347 if (src->data_layout() == DataLayout::NHWC)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100348 {
349 // Get the output shape
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100350 const TensorShape wei_shape = wei->tensor_shape();
351 const TensorShape dst_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
Gian Marco Iodicead9a7ed2022-09-16 14:14:21 +0100352 const bool export_weights_to_cl_image = export_to_cl_image(wei);
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100353
354 const int32_t ofm = dst_shape[0];
355 const int32_t m = dst_shape[1] * dst_shape[2];
356 const bool is_pointwise = (wei_shape[1] == wei_shape[2]) && wei_shape[1] == 1;
357
Gian Marco Iodicead9a7ed2022-09-16 14:14:21 +0100358 desc.export_weights_to_cl_image = export_weights_to_cl_image;
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100359
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100360 if (dst_shape[0] <= 4)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100361 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100362 if (is_pointwise)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100363 {
364 desc.m0 = 2;
365 desc.n0 = 1;
366 desc.k0 = 16;
367 }
368 else
369 {
370 desc.m0 = 1;
371 desc.n0 = dst_shape[0];
372 desc.k0 = 16;
373 }
374 }
375 else
376 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100377 if (m < 64)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100378 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100379 if (m == 1)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100380 {
381 desc.m0 = 1;
382 desc.n0 = 1;
383 desc.k0 = 16;
384 }
385 else
386 {
387 desc.m0 = 4;
388 desc.n0 = 2;
389 desc.k0 = 8;
390 }
391 }
392 else
393 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100394 if (ofm > 16)
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100395 {
396 desc.m0 = 4;
397 desc.n0 = 8;
398 desc.k0 = 8;
399 }
400 else
401 {
402 desc.m0 = 8;
403 desc.n0 = 4;
404 desc.k0 = 4;
405 }
406 }
407 }
408 }
409
410 return desc;
411}
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100412} // namespace cl_direct_conv
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100413} // namespace arm_compute