blob: f55685ee499712ece027e0fa9ca959c3c7228b52 [file] [log] [blame]
Gian Marco Iodice9d3bd412022-12-30 09:45:00 +00001/*
2 * Copyright (c) 2022 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigBifrost.h"
25#include "src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.h"
26
27#include "arm_compute/core/CL/CLHelpers.h"
28#include "arm_compute/core/GPUTarget.h"
29#include "arm_compute/core/TensorInfo.h"
30#include "arm_compute/core/TensorShape.h"
31
32namespace arm_compute
33{
34namespace cl_dwc
35{
36namespace
37{
38DWCComputeKernelInfo configure_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
39 unsigned int depth_multiplier, bool is_g71)
40{
41 DWCComputeKernelInfo desc;
42
43 if(src->data_layout() == DataLayout::NHWC)
44 {
45 const size_t idx_c = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL);
46 const size_t idx_w = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH);
47 const TensorShape wei_shape = wei->tensor_shape();
48 const size_t kernel_c = wei_shape[idx_c];
49 const size_t kernel_w = wei_shape[idx_w];
50
51 desc.export_input_to_cl_image = false;
52
53 if(is_g71)
54 {
55 desc.export_weights_to_cl_image = false;
56 }
57 else
58 {
59 desc.export_weights_to_cl_image = use_cl_image_for_weights(wei, depth_multiplier);
60 }
61
62 if(depth_multiplier == 1)
63 {
64 desc.n0 = 4;
65 }
66 else
67 {
68 if((depth_multiplier % 4) == 0)
69 {
70 desc.n0 = 4;
71 }
72 else if((depth_multiplier % 2) == 0)
73 {
74 desc.n0 = 2;
75 }
76 else
77 {
78 desc.n0 = 1;
79 }
80 }
81
82 // Note: If we reduce n0, export to cl_image must be false
83 ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) && (desc.export_weights_to_cl_image == true));
84
85 desc.n0 = adjust_vec_size(desc.n0, kernel_c);
86
87 // Set m0 only if stride_x == 1 and dilation_x == 1
88 if(conv_info.stride().first == 1 && dilation.x() == 1)
89 {
90 if((kernel_w >= 9) || (kernel_w == 1))
91 {
92 desc.m0 = 1;
93 }
94 else
95 {
96 desc.m0 = 2;
97 }
98 }
99 else
100 {
101 desc.m0 = 1;
102 }
103 }
104
105 return desc;
106}
107
108DWCComputeKernelInfo configure_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
109 unsigned int depth_multiplier, bool is_g71)
110{
111 DWCComputeKernelInfo desc;
112
113 if(src->data_layout() == DataLayout::NHWC)
114 {
115 // Src and weights have the same dimension indices
116 const size_t idx_c = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL);
117 const size_t idx_w = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH);
118 const TensorShape src_shape = src->tensor_shape();
119 const TensorShape wei_shape = wei->tensor_shape();
120 const size_t src_w = src_shape[idx_w];
121 const size_t kernel_c = wei_shape[idx_c];
122 const size_t kernel_w = wei_shape[idx_w];
123
124 desc.export_input_to_cl_image = false;
125
126 if(is_g71)
127 {
128 desc.export_weights_to_cl_image = false;
129 }
130 else
131 {
132 desc.export_weights_to_cl_image = use_cl_image_for_weights(wei, depth_multiplier);
133 }
134
135 if(depth_multiplier == 1)
136 {
137 if(desc.export_weights_to_cl_image == false)
138 {
139 desc.n0 = 8;
140 }
141 else
142 {
143 desc.n0 = 4;
144 }
145 }
146 else
147 {
148 if((depth_multiplier % 4) == 0)
149 {
150 desc.n0 = 4;
151 }
152 else if((depth_multiplier % 2) == 0)
153 {
154 desc.n0 = 2;
155 }
156 else
157 {
158 desc.n0 = 1;
159 }
160 }
161
162 // Note: If we reduce n0, export to cl_image must be false
163 ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) && (desc.export_weights_to_cl_image == true));
164
165 desc.n0 = adjust_vec_size(desc.n0, kernel_c);
166
167 // Set m0 only if stride_x == 1 and dilation_x == 1
168 if(conv_info.stride().first == 1 && dilation.x() == 1)
169 {
170 if((kernel_w >= 9) || (kernel_w == 1))
171 {
172 desc.m0 = 1;
173 }
174 else
175 {
176 if((src_w % 5) == 0)
177 {
178 desc.m0 = 5;
179 }
180 else
181 {
182 desc.m0 = 4;
183 }
184 }
185 }
186 else
187 {
188 desc.m0 = 1;
189 }
190 }
191
192 return desc;
193}
194} // namespace
195
196ClDWCNativeDefaultConfigBifrost::ClDWCNativeDefaultConfigBifrost(GPUTarget gpu)
197 : IClDWCNativeKernelConfig(gpu)
198{
199}
200
201DWCComputeKernelInfo ClDWCNativeDefaultConfigBifrost::configure(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
202 unsigned int depth_multiplier)
203{
204 using ConfigurationFunctionExecutorPtr = DWCComputeKernelInfo (ClDWCNativeDefaultConfigBifrost::*)(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
205 unsigned int depth_multiplier);
206
207 ClDWCNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G71(&ClDWCNativeDefaultConfigBifrost::configure_G71_f32,
208 &ClDWCNativeDefaultConfigBifrost::configure_G71_f16,
209 &ClDWCNativeDefaultConfigBifrost::configure_G7x_u8);
210
211 ClDWCNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G7x(&ClDWCNativeDefaultConfigBifrost::configure_G7x_f32,
212 &ClDWCNativeDefaultConfigBifrost::configure_G7x_f16,
213 &ClDWCNativeDefaultConfigBifrost::configure_G7x_u8);
214
215 ConfigurationFunctionExecutorPtr func = nullptr;
216 switch(_target)
217 {
218 case GPUTarget::G71:
219 func = configs_G71.get_function(src->data_type());
220 break;
221 default:
222 func = configs_G7x.get_function(src->data_type());
223 break;
224 }
225
226 ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not supported for depthwise convolution");
227 return (this->*func)(src, wei, conv_info, dilation, depth_multiplier);
228}
229
230DWCComputeKernelInfo ClDWCNativeDefaultConfigBifrost::configure_G71_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
231 unsigned int depth_multiplier)
232{
233 return configure_f32(src, wei, conv_info, dilation, depth_multiplier, true);
234}
235
236DWCComputeKernelInfo ClDWCNativeDefaultConfigBifrost::configure_G71_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
237 unsigned int depth_multiplier)
238{
239 return configure_f16(src, wei, conv_info, dilation, depth_multiplier, true);
240}
241
242DWCComputeKernelInfo ClDWCNativeDefaultConfigBifrost::configure_G7x_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
243 unsigned int depth_multiplier)
244{
245 return configure_f32(src, wei, conv_info, dilation, depth_multiplier, false);
246}
247
248DWCComputeKernelInfo ClDWCNativeDefaultConfigBifrost::configure_G7x_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
249 unsigned int depth_multiplier)
250{
251 return configure_f16(src, wei, conv_info, dilation, depth_multiplier, false);
252}
253
254DWCComputeKernelInfo ClDWCNativeDefaultConfigBifrost::configure_G7x_u8(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
255 unsigned int depth_multiplier)
256{
257 ARM_COMPUTE_UNUSED(wei);
258
259 DWCComputeKernelInfo desc;
260
261 if(src->data_layout() == DataLayout::NHWC)
262 {
263 desc.export_input_to_cl_image = false;
264 desc.export_weights_to_cl_image = false;
265 desc.n0 = (depth_multiplier == 1) ? 4 : 1;
266 if(conv_info.stride().first == 1 && dilation.x() == 1 && depth_multiplier == 1)
267 {
268 desc.m0 = 2;
269 }
270 else
271 {
272 desc.m0 = 1;
273 }
274 }
275
276 return desc;
277}
278} // namespace cl_dwc
279} // namespace arm_compute