blob: 49485c83a961d52a2f78bb90e4d2dd93adac4789 [file] [log] [blame]
Gian Marco Iodice9d3bd412022-12-30 09:45:00 +00001/*
Gian Marco Iodicea68df8d2023-01-17 16:59:59 +00002 * Copyright (c) 2022 Arm Limited.
Gian Marco Iodice9d3bd412022-12-30 09:45:00 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.h"
25#include "src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.h"
26
27#include "arm_compute/core/CL/CLHelpers.h"
28#include "arm_compute/core/GPUTarget.h"
29#include "arm_compute/core/TensorInfo.h"
30#include "arm_compute/core/TensorShape.h"
31
32namespace arm_compute
33{
34namespace cl_dwc
35{
36ClDWCNativeDefaultConfigValhall::ClDWCNativeDefaultConfigValhall(GPUTarget gpu)
37 : IClDWCNativeKernelConfig(gpu)
38{
39}
40
41DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
42 unsigned int depth_multiplier)
43{
44 using ConfigurationFunctionExecutorPtr = DWCComputeKernelInfo (ClDWCNativeDefaultConfigValhall::*)(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
45 unsigned int depth_multiplier);
46
47 ClDWCNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G78(&ClDWCNativeDefaultConfigValhall::configure_G78_f32,
48 &ClDWCNativeDefaultConfigValhall::configure_G78_f16,
49 &ClDWCNativeDefaultConfigValhall::configure_G78_u8);
50
51 ClDWCNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G77(&ClDWCNativeDefaultConfigValhall::configure_G78_f32,
52 &ClDWCNativeDefaultConfigValhall::configure_G77_f16,
53 &ClDWCNativeDefaultConfigValhall::configure_G78_u8);
54
55 ConfigurationFunctionExecutorPtr func = nullptr;
56 switch(_target)
57 {
58 case GPUTarget::G77:
59 func = configs_G77.get_function(src->data_type());
60 break;
61 case GPUTarget::G78:
62 default:
63 func = configs_G78.get_function(src->data_type());
64 break;
65 }
66
67 ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not supported for depthwise convolution");
68 return (this->*func)(src, wei, conv_info, dilation, depth_multiplier);
69}
70
71DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
72 unsigned int depth_multiplier)
73{
74 DWCComputeKernelInfo desc;
75
76 if(src->data_layout() == DataLayout::NHWC)
77 {
78 const size_t idx_c = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL);
79 const size_t idx_w = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH);
80 const TensorShape wei_shape = wei->tensor_shape();
81 const size_t kernel_c = wei_shape[idx_c];
82 const size_t kernel_w = wei_shape[idx_w];
83
84 desc.export_input_to_cl_image = false;
85 desc.export_weights_to_cl_image = use_cl_image_for_weights(wei, depth_multiplier);
86
87 if(depth_multiplier == 1)
88 {
89 desc.n0 = 4;
90 }
91 else
92 {
93 if((depth_multiplier % 4) == 0)
94 {
95 desc.n0 = 4;
96 }
97 else if((depth_multiplier % 2) == 0)
98 {
99 desc.n0 = 2;
100 }
101 else
102 {
103 desc.n0 = 1;
104 }
105 }
106
107 // Note: If we reduce n0, export to cl_image must be false
108 ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) && (desc.export_weights_to_cl_image == true));
109
110 desc.n0 = adjust_vec_size(desc.n0, kernel_c);
111
112 // Set m0 only if stride_x == 1 and dilation_x == 1
113 if(conv_info.stride().first == 1 && dilation.x() == 1)
114 {
115 if((kernel_w >= 9) || (kernel_w == 1))
116 {
117 desc.m0 = 1;
118 }
119 else
120 {
121 desc.m0 = 2;
122 }
123 }
124 else
125 {
126 desc.m0 = 1;
127 }
128 }
129
130 return desc;
131}
132
133DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
134 unsigned int depth_multiplier)
135{
136 DWCComputeKernelInfo desc;
137
138 if(src->data_layout() == DataLayout::NHWC)
139 {
140 // Src and weights have the same dimension indices
141 const size_t idx_c = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL);
142 const size_t idx_w = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH);
143 const TensorShape src_shape = src->tensor_shape();
144 const TensorShape wei_shape = wei->tensor_shape();
145 const size_t src_w = src_shape[idx_w];
146 const size_t kernel_c = wei_shape[idx_c];
147 const size_t kernel_w = wei_shape[idx_w];
148
149 desc.export_input_to_cl_image = false;
150 desc.export_weights_to_cl_image = use_cl_image_for_weights(wei, depth_multiplier);
151
152 if(depth_multiplier == 1)
153 {
154 if(desc.export_weights_to_cl_image == false)
155 {
156 desc.n0 = 8;
157 }
158 else
159 {
160 desc.n0 = 4;
161 }
162 }
163 else
164 {
165 if((depth_multiplier % 4) == 0)
166 {
167 desc.n0 = 4;
168 }
169 else if((depth_multiplier % 2) == 0)
170 {
171 desc.n0 = 2;
172 }
173 else
174 {
175 desc.n0 = 1;
176 }
177 }
178
179 // Note: If we reduce n0, export to cl_image must be false
180 ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) && (desc.export_weights_to_cl_image == true));
181
182 desc.n0 = adjust_vec_size(desc.n0, kernel_c);
183
184 // Set m0 only if stride_x == 1 and dilation_x == 1
185 if(conv_info.stride().first == 1 && dilation.x() == 1)
186 {
187 if((kernel_w >= 9) || (kernel_w == 1))
188 {
189 desc.m0 = 1;
190 }
191 else
192 {
193 if((src_w % 5) == 0)
194 {
195 desc.m0 = 5;
196 }
197 else
198 {
199 desc.m0 = 4;
200 }
201 }
202 }
203 else
204 {
205 desc.m0 = 1;
206 }
207 }
208
209 return desc;
210}
211
212DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_u8(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
213 unsigned int depth_multiplier)
214{
215 ARM_COMPUTE_UNUSED(wei);
216
217 DWCComputeKernelInfo desc;
218
219 if(src->data_layout() == DataLayout::NHWC)
220 {
221 desc.export_input_to_cl_image = false;
222 desc.export_weights_to_cl_image = false;
223 desc.n0 = (depth_multiplier == 1) ? 4 : 1;
224 if(conv_info.stride().first == 1 && dilation.x() == 1 && depth_multiplier == 1)
225 {
226 desc.m0 = 2;
227 }
228 else
229 {
230 desc.m0 = 1;
231 }
232 }
233
234 return desc;
235}
236
237DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G77_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
238 unsigned int depth_multiplier)
239{
240 DWCComputeKernelInfo desc;
241
242 if(src->data_layout() == DataLayout::NHWC)
243 {
244 const size_t idx_c = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL);
245 const size_t idx_w = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH);
246 const TensorShape wei_shape = wei->tensor_shape();
247 const size_t kernel_c = wei_shape[idx_c];
248 const size_t kernel_w = wei_shape[idx_w];
249
250 desc.export_input_to_cl_image = false;
251 desc.export_weights_to_cl_image = use_cl_image_for_weights(wei, depth_multiplier);
252
253 if(depth_multiplier == 1)
254 {
255 if(desc.export_weights_to_cl_image == false)
256 {
257 desc.n0 = 8;
258 }
259 else
260 {
261 desc.n0 = 4;
262 }
263 }
264 else
265 {
266 if((depth_multiplier % 4) == 0)
267 {
268 desc.n0 = 4;
269 }
270 else if((depth_multiplier % 2) == 0)
271 {
272 desc.n0 = 2;
273 }
274 else
275 {
276 desc.n0 = 1;
277 }
278 }
279
280 // Note: If we reduce n0, export to cl_image must be false
281 ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) && (desc.export_weights_to_cl_image == true));
282
283 desc.n0 = adjust_vec_size(desc.n0, kernel_c);
284
285 // Set m0 only if stride_x == 1 and dilation_x == 1
286 if(conv_info.stride().first == 1 && dilation.x() == 1)
287 {
288 if((kernel_w >= 9) || (kernel_w == 1))
289 {
290 desc.m0 = 1;
291 }
292 else
293 {
Gian Marco Iodicea68df8d2023-01-17 16:59:59 +0000294 desc.m0 = 2;
Gian Marco Iodice9d3bd412022-12-30 09:45:00 +0000295 }
296 }
297 else
298 {
299 desc.m0 = 1;
300 }
301 }
302
303 return desc;
304}
305} // namespace cl_dwc
306} // namespace arm_compute