blob: ad946783350790cd69d36ce548acd69d013ee63c [file] [log] [blame]
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +01001/*
2 * Copyright (c) 2022 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.h"
25
26#include "arm_compute/core/CL/CLHelpers.h"
27#include "arm_compute/core/CL/CLKernelLibrary.h"
28#include "arm_compute/core/GPUTarget.h"
29#include "arm_compute/core/TensorInfo.h"
30#include "arm_compute/core/TensorShape.h"
31#include "arm_compute/core/utils/misc/ShapeCalculator.h"
32#include <utility>
33
34namespace arm_compute
35{
36namespace cl_direct_conv
37{
38using namespace arm_compute::misc::shape_calculator;
39
40ClDirectConvDefaultConfigValhall::ClDirectConvDefaultConfigValhall(GPUTarget gpu)
41 : IClDirectConvKernelConfig(gpu)
42{
43}
44
45DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
46{
47 using ConfigurationFunctionExecutorPtr = DirectConvComputeKernelInfo (ClDirectConvDefaultConfigValhall::*)(const ITensorInfo * src, const ITensorInfo * wei, const PadStrideInfo & conv_info);
48
49 ClDirectConvConfigArray<ConfigurationFunctionExecutorPtr> configs_G78(&ClDirectConvDefaultConfigValhall::configure_G78_f32,
50 &ClDirectConvDefaultConfigValhall::configure_G78_f16,
51 &ClDirectConvDefaultConfigValhall::configure_G78_u8);
52
53 ClDirectConvConfigArray<ConfigurationFunctionExecutorPtr> configs_G57(&ClDirectConvDefaultConfigValhall::configure_G57_f32,
54 &ClDirectConvDefaultConfigValhall::configure_G57_f16,
55 &ClDirectConvDefaultConfigValhall::configure_G78_u8);
56
57 ConfigurationFunctionExecutorPtr func = nullptr;
58 switch(_target)
59 {
60 case GPUTarget::G57:
61 func = configs_G57.get_function(src->data_type());
62 break;
63 case GPUTarget::G78:
64 default:
65 func = configs_G78.get_function(src->data_type());
66 break;
67 }
68
69 ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not supported for direct convolution");
70 return (this->*func)(src, wei, conv_info);
71}
72
73DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G78_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
74{
75 DirectConvComputeKernelInfo desc;
76
77 if(src->data_layout() == DataLayout::NHWC)
78 {
79 // Get the output shape
Gian Marco Iodicead9a7ed2022-09-16 14:14:21 +010080 const TensorShape wei_shape = wei->tensor_shape();
81 const TensorShape dst_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
82 const bool export_weights_to_cl_image = export_to_cl_image(wei);
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +010083
84 const int32_t ofm = dst_shape[0];
85 const int32_t m = dst_shape[1] * dst_shape[2];
86 const bool is_pointwise = (wei_shape[1] == wei_shape[2]) && wei_shape[1] == 1;
87
Gian Marco Iodicead9a7ed2022-09-16 14:14:21 +010088 desc.export_weights_to_cl_image = export_weights_to_cl_image;
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +010089
90 if(dst_shape[0] <= 4)
91 {
92 if(is_pointwise)
93 {
94 if(ofm == 4)
95 {
96 desc.m0 = 1;
97 desc.n0 = 4;
98 desc.k0 = 16;
99 }
100 else
101 {
102 desc.m0 = 1;
103 desc.n0 = 1;
104 desc.k0 = 16;
105 }
106 }
107 else
108 {
109 desc.m0 = 1;
110 desc.n0 = 2;
111 desc.k0 = 16;
112 }
113 }
114 else
115 {
116 if(m < 64)
117 {
118 desc.m0 = 1;
119 desc.n0 = 1;
120 desc.k0 = 16;
121 }
122 else
123 {
124 desc.m0 = 4;
125 desc.n0 = 4;
126 desc.k0 = 4;
127 }
128 }
129 }
130
131 return desc;
132}
133
134DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G78_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
135{
136 DirectConvComputeKernelInfo desc;
137
138 if(src->data_layout() == DataLayout::NHWC)
139 {
140 // Get the output shape
Gian Marco Iodicead9a7ed2022-09-16 14:14:21 +0100141 const TensorShape wei_shape = wei->tensor_shape();
142 const TensorShape dst_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
143 const bool export_weights_to_cl_image = export_to_cl_image(wei);
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100144
145 const int32_t ofm = dst_shape[0];
146 const int32_t m = dst_shape[1] * dst_shape[2];
147 const bool is_pointwise = (wei_shape[1] == wei_shape[2]) && wei_shape[1] == 1;
148
Gian Marco Iodicead9a7ed2022-09-16 14:14:21 +0100149 desc.export_weights_to_cl_image = export_weights_to_cl_image;
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100150
151 if(dst_shape[0] <= 4)
152 {
153 if(is_pointwise)
154 {
155 if(ofm == 4)
156 {
157 desc.m0 = 1;
158 desc.n0 = 4;
159 desc.k0 = 16;
160 }
161 else
162 {
163 desc.m0 = 1;
164 desc.n0 = 1;
165 desc.k0 = 16;
166 }
167 }
168 else
169 {
170 desc.m0 = 1;
171 desc.n0 = dst_shape[0];
172 desc.k0 = 16;
173 }
174 }
175 else
176 {
177 if(m < 64)
178 {
179 desc.m0 = 1;
180 desc.n0 = 1;
181 desc.k0 = 16;
182 }
183 else
184 {
185 if(ofm > 16)
186 {
187 desc.m0 = 4;
188 desc.n0 = 4;
189 desc.k0 = 8;
190 }
191 else
192 {
193 desc.m0 = 4;
194 desc.n0 = 4;
195 desc.k0 = 16;
196 }
197 }
198 }
199 }
200
201 return desc;
202}
203
204DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G78_u8(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
205{
206 DirectConvComputeKernelInfo desc;
207
208 if(src->data_layout() == DataLayout::NHWC)
209 {
210 // Get the output shape
211 TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
212
213 desc.n0 = 4;
214
215 if(output_shape[0] > 16)
216 {
217 desc.m0 = 4;
218 }
219
220 desc.k0 = 16;
221
222 desc.export_weights_to_cl_image = false;
223 }
224
225 return desc;
226}
227
228DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G57_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
229{
230 DirectConvComputeKernelInfo desc;
231
232 if(src->data_layout() == DataLayout::NHWC)
233 {
234 // Get the output shape
Gian Marco Iodicead9a7ed2022-09-16 14:14:21 +0100235 const TensorShape wei_shape = wei->tensor_shape();
236 const TensorShape dst_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
237 const bool export_weights_to_cl_image = export_to_cl_image(wei);
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100238
239 const int32_t m = dst_shape[1] * dst_shape[2];
240 const bool is_pointwise = (wei_shape[1] == wei_shape[2]) && wei_shape[1] == 1;
241
Gian Marco Iodicead9a7ed2022-09-16 14:14:21 +0100242 desc.export_weights_to_cl_image = export_weights_to_cl_image;
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100243
244 if(dst_shape[0] <= 4)
245 {
246 if(is_pointwise)
247 {
248 desc.m0 = 1;
249 desc.n0 = 1;
250 desc.k0 = 16;
251 }
252 else
253 {
254 desc.m0 = 1;
255 desc.n0 = dst_shape[0];
256 desc.k0 = 16;
257 }
258 }
259 else
260 {
261 if(m < 64)
262 {
263 if(m == 1)
264 {
265 desc.m0 = 1;
266 desc.n0 = 1;
267 desc.k0 = 16;
268 }
269 else
270 {
271 desc.m0 = 4;
272 desc.n0 = 2;
273 desc.k0 = 8;
274 }
275 }
276 else
277 {
278 desc.m0 = 4;
279 desc.n0 = 4;
280 desc.k0 = 4;
281 }
282 }
283 }
284
285 return desc;
286}
287
288DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G57_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
289{
290 DirectConvComputeKernelInfo desc;
291
292 if(src->data_layout() == DataLayout::NHWC)
293 {
294 // Get the output shape
Gian Marco Iodicead9a7ed2022-09-16 14:14:21 +0100295 const TensorShape wei_shape = wei->tensor_shape();
296 const TensorShape dst_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
297 const bool export_weights_to_cl_image = export_to_cl_image(wei);
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100298
299 const int32_t ofm = dst_shape[0];
300 const int32_t m = dst_shape[1] * dst_shape[2];
301 const bool is_pointwise = (wei_shape[1] == wei_shape[2]) && wei_shape[1] == 1;
302
Gian Marco Iodicead9a7ed2022-09-16 14:14:21 +0100303 desc.export_weights_to_cl_image = export_weights_to_cl_image;
Gian Marco Iodice2cc50b32022-05-30 14:41:49 +0100304
305 if(dst_shape[0] <= 4)
306 {
307 if(is_pointwise)
308 {
309 desc.m0 = 2;
310 desc.n0 = 1;
311 desc.k0 = 16;
312 }
313 else
314 {
315 desc.m0 = 1;
316 desc.n0 = dst_shape[0];
317 desc.k0 = 16;
318 }
319 }
320 else
321 {
322 if(m < 64)
323 {
324 if(m == 1)
325 {
326 desc.m0 = 1;
327 desc.n0 = 1;
328 desc.k0 = 16;
329 }
330 else
331 {
332 desc.m0 = 4;
333 desc.n0 = 2;
334 desc.k0 = 8;
335 }
336 }
337 else
338 {
339 if(ofm > 16)
340 {
341 desc.m0 = 4;
342 desc.n0 = 8;
343 desc.k0 = 8;
344 }
345 else
346 {
347 desc.m0 = 8;
348 desc.n0 = 4;
349 desc.k0 = 4;
350 }
351 }
352 }
353 }
354
355 return desc;
356}
357} // namespace opencl
358} // namespace arm_compute