blob: 8936db6abe09125797843c749fc320250419b548 [file] [log] [blame]
Mohammed Suhail Munshia18d85c2023-01-03 10:16:16 +00001/*
Gunes Bayir2b9fa592024-01-17 16:07:03 +00002 * Copyright (c) 2023-2024 Arm Limited.
Mohammed Suhail Munshia18d85c2023-01-03 10:16:16 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "ClTemplatePool2d.h"
25
Matthew Bentham314d3e22023-06-23 10:53:52 +000026#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010027#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Matthew Bentham314d3e22023-06-23 10:53:52 +000028#include "arm_compute/core/utils/StringUtils.h"
Mohammed Suhail Munshia18d85c2023-01-03 10:16:16 +000029
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010030#include "src/core/helpers/WindowHelpers.h"
31#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
32#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
Mohammed Suhail Munshia18d85c2023-01-03 10:16:16 +000033#include "support/StringSupport.h"
34
35namespace arm_compute
36{
37namespace experimental
38{
39namespace dynamic_fusion
40{
41namespace
42{
43// Shape indexes for NHWC Datalayout
Mohammed Suhail Munshia18d85c2023-01-03 10:16:16 +000044constexpr static int32_t height_idx = 2;
45constexpr static int32_t width_idx = 1;
46constexpr static int32_t channel_idx = 0;
Omar Al Khatib3c7c1fa2023-03-07 09:57:49 +000047} // namespace
Mohammed Suhail Munshia18d85c2023-01-03 10:16:16 +000048ClTemplatePool2d::ClTemplatePool2d(ComponentId id,
49 const ArgumentPack<ITensorInfo> &tensors,
50 const Attributes &attributes,
51 const Settings &settings)
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010052 : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes}, _settings{settings}
Mohammed Suhail Munshia18d85c2023-01-03 10:16:16 +000053{
54 _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
55 _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
56 ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
57}
58
59std::string ClTemplatePool2d::get_name() const
60{
61 return "pool2d";
62}
63
64std::string ClTemplatePool2d::get_component_code(const ComponentGroup &comp_group) const
65{
66 ARM_COMPUTE_UNUSED(comp_group);
67
68 // Condition to use 2x2 optimized kernel
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010069 if (_attributes.pool_size() == Size2D(2, 2))
Mohammed Suhail Munshia18d85c2023-01-03 10:16:16 +000070 {
71 return get_2x2_kernel_code();
72 }
73 else
74 {
75 return get_MxN_kernel_code();
76 }
77}
78
79std::string ClTemplatePool2d::get_MxN_kernel_code() const
80{
Gunes Bayir2b9fa592024-01-17 16:07:03 +000081 const auto pool_type = _attributes.pool_type();
82 const bool fp_mixed_precision = (_src->data_type() == DataType::F16) && pool_type != PoolingType::MAX;
Mohammed Suhail Munshia18d85c2023-01-03 10:16:16 +000083
84 // Define pool op macro.
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010085 std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_"
86 : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_";
Mohammed Suhail Munshia18d85c2023-01-03 10:16:16 +000087
88 // Kernel start
89 // Note: If C is not multiple of N0, we shift back of PARTIAL_N0 elements to compute the leftover elements for get_global_id(0) == 0
90 // Note: If C is less than N0, N0 should be SHRINKED to the closest smaller N0. This operation is performed on the host side
91 std::string code = R"_(
92//------------------ START KERNEL {{meta_kernel_id}} ---------------------
93// IN_0(src) {{src}}
94// OUT(dst, accum) {{dst}}
95
96{
97 const int idx_out_c = g_ind_0;
98 const int idx_out_w = g_ind_1;
99)_";
100
101 // Add macro for POOL_OP
102 code += "\n" + pool_op + "\n";
103
104 code += R"_(
105 const int idx_out_h = g_ind_2 % {{DST_HEIGHT}};
106 const int idx_out_n = g_ind_2 / {{DST_HEIGHT}};
107)_";
108
109 // Define common variables.
110 code += R"_(
111 __global unsigned char *in_base_ptr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_n * {{src}}_stride_w;
112
113 __global unsigned char *out_base_ptr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_w * {{dst}}_stride_y + idx_out_h * {{dst}}_stride_z + idx_out_n * {{dst}}_stride_w;
114
115 VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)
116 res0 = {{INITIAL_VALUE}};
117
118 const int idx_in_w = idx_out_w * {{STRIDE_X}} - {{PAD_X}};
119 const int idx_in_h = idx_out_h * {{STRIDE_Y}} - {{PAD_Y}};
120
121 const int pool_x_s = max((int)0, -idx_in_w);
122 const int pool_x_e = min((int){{POOL_SIZE_X}}, (int){{SRC_WIDTH}} - idx_in_w);
123 const int pool_y_s = max((int)0, -idx_in_h);
124 const int pool_y_e = min((int){{POOL_SIZE_Y}}, (int){{SRC_HEIGHT}} - idx_in_h);
125)_";
126
127 // Determine filter size depending on if padding is excluded or not
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100128 if (_attributes.exclude_padding())
Mohammed Suhail Munshia18d85c2023-01-03 10:16:16 +0000129 {
130 code += R"_(
131 const int filter_size = (pool_y_e - pool_y_s) * (pool_x_e - pool_x_s);
132)_";
133 }
134 else
135 {
136 code += R"_(
137 const int filter_size = {{POOL_SIZE_X}} * {{POOL_SIZE_Y}};
138)_";
139 }
140
141 // Loop through pool size
142 // if global pooling
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100143 if (_attributes.pool_size().x() == _src->dimension(width_idx) &&
144 _attributes.pool_size().y() == _src->dimension(height_idx))
Mohammed Suhail Munshia18d85c2023-01-03 10:16:16 +0000145 {
146 // Begin loop
147 code += R"_(
148 // Global pooling path
149 for(int y = 0; y < {{POOL_SIZE_Y}}; ++y)
150 {
151 #pragma unroll 8
152 for(int x = 0; x < {{POOL_SIZE_X}}; ++x)
153 {
154 VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)
155 data0;
156)_";
157 }
158 else // if local pooling size
159 {
160 code += R"_(
161 for(int y = pool_y_s; y < pool_y_e; ++y)
162 {
163 #pragma unroll 8
164 for(int x = pool_x_s; x < pool_x_e; ++x)
165 {
166 VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)
167 data0;
168)_";
169 } // end else
170
171 // if condition inside loop - use 32bit acc if mixed_precision.
172 // End loop through pooling section.
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100173 if (fp_mixed_precision)
Mohammed Suhail Munshia18d85c2023-01-03 10:16:16 +0000174 {
175 // In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE
176 code += R"_(
177 data0 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + (x + idx_in_w) * {{src}}_stride_y + (y + idx_in_h) * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0));
178 res0 = POOL_OP(res0, data0);
179 }
180 }
181)_";
182 }
183 else // load data, compute result and end loop
184 {
185 code += R"_(
186 data0 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + (x + idx_in_w) * {{src}}_stride_y + (y + idx_in_h) * {{src}}_stride_z));
187 res0 = POOL_OP(res0, data0);
188 }
189 }
190)_";
191 }
192
193 // For Pool AVG ONLY, divide pool output by filter size
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100194 if (pool_type == PoolingType::AVG)
Mohammed Suhail Munshia18d85c2023-01-03 10:16:16 +0000195 {
196 code += R"_(
197 res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))filter_size;
198)_";
199 }
200
201 // If mixed precision convert datatype before storing. Then end kernel.
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100202 if (fp_mixed_precision)
Mohammed Suhail Munshia18d85c2023-01-03 10:16:16 +0000203 {
204 code += R"_(
205 VEC_DATA_TYPE({{DATA_TYPE}}, N0)
206 res_converted0 = CONVERT(res0, VEC_DATA_TYPE({{DATA_TYPE}}, N0));
207 STORE_VECTOR_SELECT(res_converted, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0);
208)_";
209 }
210 else
211 {
212 // Store data
213 code += R"_(
214 STORE_VECTOR_SELECT(res, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0);
215)_";
216 }
217
218 code += R"_(
219//------------------ END KERNEL {{meta_kernel_id}} ---------------------
220}
221)_";
222
223 return code;
224}
225
226std::string ClTemplatePool2d::get_2x2_kernel_code() const
227{
Gunes Bayir2b9fa592024-01-17 16:07:03 +0000228 const auto pool_type = _attributes.pool_type();
229 const bool fp_mixed_precision = (_src->data_type() == DataType::F16) && pool_type != PoolingType::MAX;
230 std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_"
231 : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_";
Mohammed Suhail Munshia18d85c2023-01-03 10:16:16 +0000232
233 std::string code = R"_(
234//------------------ START KERNEL {{meta_kernel_id}} ---------------------
235// IN_0(src) {{src}}
236// OUT(dst, accum) {{dst}}
237
238#define SELECT_TYPE SELECT_VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)
239
240{
241 const int idx_out_c = g_ind_0;
242 const int idx_out_w = g_ind_1;
243)_";
244
245 // Add pool op macro
246 code += "\n" + pool_op + "\n";
247
248 // If batch size != 1, the batch size dimension is collapsed over the height dimension
249 code += R"_(
250 const int idx_out_h = g_ind_2 % {{DST_HEIGHT}};
251 const int idx_out_n = g_ind_2 / {{DST_HEIGHT}};
252)_";
253
254 code += R"_(
255 const int idx_in_w = idx_out_w * {{STRIDE_X}} - {{PAD_X}};
256 const int idx_in_h = idx_out_h * {{STRIDE_Y}} - {{PAD_Y}};
257
258 __global unsigned char *in_base_ptr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_n * {{src}}_stride_w;
259 __global unsigned char *out_base_ptr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_w * {{dst}}_stride_y + idx_out_h * {{dst}}_stride_z + idx_out_n *
260 {{dst}}_stride_w;
261 const int pool_x_s = max((int)0, -idx_in_w);
262 const int pool_x_e = min((int)2, (int){{SRC_WIDTH}} - idx_in_w);
263 const int pool_y_s = max((int)0, -idx_in_h);
264 const int pool_y_e = min((int)2, (int){{SRC_HEIGHT}} - idx_in_h);
265
266 const int filter_size = (pool_x_e - pool_x_s) * (pool_y_e - pool_y_s);
267 const int x0 = pool_x_s + idx_in_w;
268 const int y0 = pool_y_s + idx_in_h;
269 const int x1 = pool_x_e - 1 + idx_in_w;
270 const int y1 = pool_y_e - 1 + idx_in_h;
271
272 REPEAT_VAR_INIT_TO_CONST(4, VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0), data, 0);
273)_";
274
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100275 if (fp_mixed_precision)
Mohammed Suhail Munshia18d85c2023-01-03 10:16:16 +0000276 {
277 // In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE
278 code += R"_(
279 data0 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y0 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0));
280 data1 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y0 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0));
281 data2 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y1 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0));
282 data3 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y1 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0));
283)_";
284 }
285 else
286 {
287 code += R"_(
288 data0 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y0 * {{src}}_stride_z));
289 data1 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y0 * {{src}}_stride_z));
290 data2 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y1 * {{src}}_stride_z));
291 data3 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y1 * {{src}}_stride_z));
292)_";
293 }
294
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100295 if (pool_type != PoolingType::MAX)
Mohammed Suhail Munshia18d85c2023-01-03 10:16:16 +0000296 {
297 // Make invalid the values loaded if the x or y coordinate was clamped (out-of-bound)
298 code += R"_(
299 if(filter_size != 4)
300 {
301 SELECT_TYPE cond_w_s = (SELECT_TYPE)idx_in_w < (SELECT_TYPE)0;
302 SELECT_TYPE cond_w_e = (SELECT_TYPE)idx_in_w >= (SELECT_TYPE)({{SRC_WIDTH}} - 1);
303 SELECT_TYPE cond_h_s = (SELECT_TYPE)idx_in_h < (SELECT_TYPE)0;
304 SELECT_TYPE cond_h_e = (SELECT_TYPE)idx_in_h >= (SELECT_TYPE)({{SRC_HEIGHT}} - 1);
305
306 data0 = select(data0, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_s | cond_h_s));
307 data1 = select(data1, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_e | cond_h_s));
308 data2 = select(data2, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_s | cond_h_e));
309 data3 = select(data3, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_e | cond_h_e));
310 }
311)_";
312 }
313
314 code += R"_(
315 VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)
316 res0 = data0;
317 res0 = POOL_OP(res0, data1);
318 res0 = POOL_OP(res0, data2);
319 res0 = POOL_OP(res0, data3);
320)_";
321
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100322 if (pool_type == PoolingType::AVG)
Mohammed Suhail Munshia18d85c2023-01-03 10:16:16 +0000323 {
324 // If avg pooling divide result accordingly.
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100325 if (_attributes.exclude_padding())
Mohammed Suhail Munshia18d85c2023-01-03 10:16:16 +0000326 {
327 code += R"_(
328 res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))filter_size;
329)_";
330 }
331 else
332 {
333 code += R"_(
334 res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))4;
335)_";
336 }
337 }
338
339 // Store result
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100340 if (fp_mixed_precision)
Mohammed Suhail Munshia18d85c2023-01-03 10:16:16 +0000341 {
342 code += R"_(
343 VEC_DATA_TYPE({{DATA_TYPE}}, N0)
344 res_converted0 = CONVERT(res0, VEC_DATA_TYPE({{DATA_TYPE}}, N0));
345 STORE_VECTOR_SELECT(res_converted, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0);
346)_";
347 }
348 else
349 {
350 code += R"_(
351 STORE_VECTOR_SELECT(res, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0);
352)_";
353 }
354
355 code += R"_(
356 //------------------ END KERNEL {{meta_kernel_id}} ---------------------
357}
358#undef SELECT_TYPE
359)_";
360
361 return code;
362}
363
364void ClTemplatePool2d::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
365{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100366 vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
367 "src");
Mohammed Suhail Munshia18d85c2023-01-03 10:16:16 +0000368
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100369 vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
370 "dst");
Mohammed Suhail Munshia18d85c2023-01-03 10:16:16 +0000371}
372
373TagLUT ClTemplatePool2d::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
374{
375 ARM_COMPUTE_UNUSED(comp_group);
376
377 TagLUT lut{};
378 // Arguments and global shared variables
379 lut["src"] = vtable.get_variable(_src);
380 lut["dst"] = vtable.get_variable(_dst);
381
382 // Local build options
383 lut["meta_kernel_id"] = id();
384
385 // Retrieve relevant data
Gunes Bayir2b9fa592024-01-17 16:07:03 +0000386 const auto padding = _attributes.pad();
387 const auto stride = _attributes.stride();
388 const auto pool_size = _attributes.pool_size();
389 const auto data_type = _src->data_type();
390 const auto use_fp_mixed_precision =
391 (_src->data_type() == DataType::F16) && _attributes.pool_type() != PoolingType::MAX;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100392 const std::string max_initial_value =
393 _settings.use_inf_as_limit() ? "(-INFINITY)"
394 : float_to_string_with_full_precision(std::numeric_limits<float>::lowest());
Mohammed Suhail Munshia18d85c2023-01-03 10:16:16 +0000395
396 // pool specific
397 lut["STRIDE_X"] = stride.x();
398 lut["STRIDE_Y"] = stride.y();
399 lut["PAD_X"] = padding.left;
400 lut["PAD_Y"] = padding.top;
401 lut["POOL_SIZE_X"] = pool_size.width;
402 lut["POOL_SIZE_Y"] = pool_size.height;
403
404 // Datatypes and variables
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100405 lut["ACC_DATA_TYPE"] = get_cl_type_from_data_type(
406 (use_fp_mixed_precision) ? (DataType::F32) : (data_type)); // Type of accumulators to use.
Mohammed Suhail Munshia18d85c2023-01-03 10:16:16 +0000407 lut["DATA_TYPE"] = get_cl_type_from_data_type(data_type);
408 lut["SRC_WIDTH"] = _src->dimension(width_idx);
409 lut["SRC_HEIGHT"] = _src->dimension(height_idx);
Adnan AlSinan227db8d2023-02-14 14:24:09 +0000410 lut["INITIAL_VALUE"] = (_attributes.pool_type() == PoolingType::MAX) ? max_initial_value : std::string("0");
Mohammed Suhail Munshia18d85c2023-01-03 10:16:16 +0000411
412 // Tensor specific data
413 lut["DST_HEIGHT"] = _dst->dimension(height_idx);
414
415 return lut;
416}
417
418CLBuildOptions ClTemplatePool2d::get_build_options(const ComponentGroup &comp_group) const
419{
420 const auto root_window = comp_group.get_root_component()->template_writer()->get_window();
421 const unsigned int n0 = root_window.x().step();
422 const unsigned int partial_store_n0 = _dst->dimension(0) % n0;
423
424 CLBuildOptions build_opts{};
425 build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
426 build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
427
428 return build_opts;
429}
430
431std::string ClTemplatePool2d::get_config_id() const
432{
433 const DataType data_type = _src->data_type();
434 const DataLayout data_layout = _src->data_layout();
435
436 std::string config_id{};
437 config_id += "pooling_layer_2d_";
438 config_id += lower_string(string_from_data_type(data_type));
439 config_id += "_";
440 config_id += lower_string(string_from_data_layout(data_layout));
441 config_id += "_";
442 config_id += support::cpp11::to_string(_dst->dimension(width_idx));
443 config_id += "_";
444 config_id += support::cpp11::to_string(_dst->dimension(height_idx));
445 config_id += "_";
446 config_id += support::cpp11::to_string(_dst->dimension(channel_idx));
447
448 return config_id;
449}
450
451std::set<std::string> ClTemplatePool2d::get_headers_list() const
452{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100453 return std::set<std::string>{"helpers.h", "tile_helpers.h", "repeat.h"};
Mohammed Suhail Munshia18d85c2023-01-03 10:16:16 +0000454}
455
456Window ClTemplatePool2d::get_window() const
457{
458 ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
459 const auto output_shape = _dst->tensor_shape();
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100460 const unsigned int vec_size = adjust_vec_size(((_dst->data_type() == DataType::F32) ? 2 : 4), _dst->dimension(0));
Mohammed Suhail Munshia18d85c2023-01-03 10:16:16 +0000461
462 // Create and configure kernel window
463 auto win = calculate_max_window(output_shape, Steps(vec_size));
464 win = win.collapse_if_possible(win, Window::DimZ); // collapse window on batch size.
465 return win;
466}
467
468} // namespace dynamic_fusion
469} // namespace experimental
470} // namespace arm_compute