blob: 59c35431710c4c664fedc38851372a75a603309a [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Georgios Pinitas99089ce2019-02-06 14:16:18 +00002 * Copyright (c) 2017-2019 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h"
25
26#include "arm_compute/core/AccessWindowStatic.h"
Anthony Barbiereaefd002018-07-20 17:49:35 +010027#include "arm_compute/core/CPP/Validate.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010028#include "arm_compute/core/Error.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010029#include "arm_compute/core/Helpers.h"
30#include "arm_compute/core/ITensor.h"
Georgios Pinitas55186712018-01-08 17:37:12 +000031#include "arm_compute/core/NEON/NEAsymm.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010032#include "arm_compute/core/NEON/NEFixedPoint.h"
Georgios Pinitascdf51452017-08-31 14:21:36 +010033#include "arm_compute/core/NEON/NEMath.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010034#include "arm_compute/core/TensorInfo.h"
35#include "arm_compute/core/Utils.h"
36#include "arm_compute/core/Validate.h"
37#include "arm_compute/core/Window.h"
Giorgio Arena9fb6c7e2018-08-22 12:15:25 +010038#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010039
Georgios Pinitas55186712018-01-08 17:37:12 +000040#include "support/ToolchainSupport.h"
41
Anthony Barbier6ff3b192017-09-04 18:44:23 +010042#include <algorithm>
43#include <arm_neon.h>
Georgios Pinitascdf51452017-08-31 14:21:36 +010044#include <cmath>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010045#include <limits>
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +010046#include <set>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010047#include <string>
48#include <tuple>
49
50using namespace arm_compute;
Giorgio Arena9fb6c7e2018-08-22 12:15:25 +010051using namespace misc::shape_calculator;
Anthony Barbier6ff3b192017-09-04 18:44:23 +010052
53namespace
54{
Pablo Tello77e6c552018-12-04 15:33:49 +000055inline float calculate_avg_scale(bool exclude_padding, DataLayout data_layout, const Coordinates &id, const int pool_size_x, const int pool_size_y, const int upper_bound_w, const int upper_bound_h,
Anthony Barbier6ff3b192017-09-04 18:44:23 +010056 const int pad_x, const int pad_y, const int stride_x, const int stride_y)
57{
Michalis Spyrou57dac842018-03-01 16:03:50 +000058 const unsigned int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
59 const unsigned int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
60
61 int start_x = id[idx_width] * stride_x - pad_x;
62 int start_y = id[idx_height] * stride_y - pad_y;
63
64 const int end_x = std::min(start_x + pool_size_x, upper_bound_w);
65 const int end_y = std::min(start_y + pool_size_y, upper_bound_h);
Georgios Pinitasadaae7e2017-10-30 15:56:32 +000066 if(exclude_padding)
67 {
68 start_x = std::max(0, start_x);
69 start_y = std::max(0, start_y);
70 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +010071 return 1.f / ((end_y - start_y) * (end_x - start_x));
72}
73
Pablo Tello77e6c552018-12-04 15:33:49 +000074inline void scale_vector_s16x8(bool exclude_padding, uint16x8_t &v, const Coordinates &id, int id_offset, int step,
Georgios Pinitas55186712018-01-08 17:37:12 +000075 const int pool_size, const int upper_bound_w, const int upper_bound_h,
76 const int pad_x, const int pad_y, const int stride_x, const int stride_y)
77{
78 int start_x = (id.x() + id_offset) * stride_x - pad_x;
79 int start_y = id.y() * stride_y - pad_y;
80 const int end_y = std::min(start_y + pool_size, upper_bound_h);
81 if(exclude_padding)
82 {
83 start_y = std::max(0, start_y);
84 }
85
86 std::array<uint16_t, 8> elems =
87 {
88 {
89 vgetq_lane_u16(v, 0),
90 vgetq_lane_u16(v, 1),
91 vgetq_lane_u16(v, 2),
92 vgetq_lane_u16(v, 3),
93 vgetq_lane_u16(v, 4),
94 vgetq_lane_u16(v, 5),
95 vgetq_lane_u16(v, 6),
96 vgetq_lane_u16(v, 7),
97 }
98 };
99
100 for(auto &el : elems)
101 {
102 int c_start_x = start_x;
103 const int end_x = std::min(c_start_x + pool_size, upper_bound_w);
104 if(exclude_padding)
105 {
106 c_start_x = std::max(0, c_start_x);
107 }
108 float scale = 1.f / ((end_y - start_y) * (end_x - c_start_x));
109 el *= scale;
110 start_x += step * stride_x;
111 }
112
113 v = vsetq_lane_u16(elems[0], v, 0);
114 v = vsetq_lane_u16(elems[1], v, 1);
115 v = vsetq_lane_u16(elems[2], v, 2);
116 v = vsetq_lane_u16(elems[3], v, 3);
117 v = vsetq_lane_u16(elems[4], v, 4);
118 v = vsetq_lane_u16(elems[5], v, 5);
119 v = vsetq_lane_u16(elems[6], v, 6);
120 v = vsetq_lane_u16(elems[7], v, 7);
121}
122
Georgios Pinitas13d96e02018-08-23 11:20:23 +0100123Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, unsigned int &pooled_w, unsigned int pooled_h)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100124{
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000125 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100126
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000127 int pool_stride_x = 0;
128 int pool_stride_y = 0;
129 PoolingType pool_type = pool_info.pool_type();
130 const PadStrideInfo pad_stride_info = pool_info.pad_stride_info();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100131 std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100132
Anthony Barbiereaefd002018-07-20 17:49:35 +0100133 ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
Vidhya Sudhan Loganathan7485d5a2018-07-04 09:34:00 +0100134 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
Georgios Pinitas55186712018-01-08 17:37:12 +0000135 ARM_COMPUTE_RETURN_ERROR_ON(pool_type == PoolingType::L2 && is_data_type_quantized(input->data_type()));
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000136
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000137 if(output->total_size() != 0)
Georgios Pinitas1dad50e2017-07-03 17:51:34 +0100138 {
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000139 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
Michalis Spyrou57dac842018-03-01 16:03:50 +0000140 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
141 ARM_COMPUTE_RETURN_ERROR_ON((output->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH)) != pooled_w)
142 || (output->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT)) != pooled_h));
Georgios Pinitas1dad50e2017-07-03 17:51:34 +0100143 }
144
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000145 return Status{};
146}
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100147
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000148Status validate_arguments_pool_info(const unsigned int pool_size_x, const unsigned int pool_size_y)
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000149{
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000150 ARM_COMPUTE_RETURN_ERROR_ON(pool_size_x == 0);
151 ARM_COMPUTE_RETURN_ERROR_ON(pool_size_y == 0);
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000152
153 return Status{};
154}
155
156std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const PoolingLayerInfo &pool_info, unsigned int &num_elems_processed_per_iteration,
157 BorderSize &border_size,
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000158 unsigned int pooled_w, unsigned int pooled_h, int pool_size_x, int pool_size_y)
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000159{
Giorgio Arena9fb6c7e2018-08-22 12:15:25 +0100160 // Output auto inizialitation if not yet initialized
161 auto_init_if_empty(*output, input->clone()->set_tensor_shape(compute_pool_shape(*input, pool_info)));
162
Michalis Spyrou57dac842018-03-01 16:03:50 +0000163 DataLayout data_layout = input->data_layout();
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000164 unsigned int num_elems_read_per_iteration = 0;
165 unsigned int num_elems_horizontal_window = 0;
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000166 int pool_stride_x = 0;
167 int pool_stride_y = 0;
Michalis Spyrou57dac842018-03-01 16:03:50 +0000168 const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
169 const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
170 const int input_width = input->dimension(idx_width);
171 const int input_height = input->dimension(idx_height);
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000172 const PadStrideInfo pad_stride_info = pool_info.pad_stride_info();
173 std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000174 const int pool_pad_right = pad_stride_info.pad_right();
175 const int pool_pad_top = pad_stride_info.pad_top();
176 const int pool_pad_left = pad_stride_info.pad_left();
177 const int pool_pad_bottom = pad_stride_info.pad_bottom();
178 const bool is_square = pool_size_x == pool_size_y;
Michalis Spyrou57dac842018-03-01 16:03:50 +0000179
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000180 // Check output dimensions
Michalis Spyrou57dac842018-03-01 16:03:50 +0000181 std::tie(pooled_w, pooled_h) = scaled_dimensions(input->dimension(idx_width),
182 input->dimension(idx_height),
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000183 pool_size_x,
184 pool_size_y,
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000185 pad_stride_info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100186
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000187 //If it's not squared and optimized will be executed the MxN
188 num_elems_read_per_iteration = 1;
189 num_elems_processed_per_iteration = 1;
190 num_elems_horizontal_window = 1;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100191
Michalis Spyrou57dac842018-03-01 16:03:50 +0000192 const bool is_nhwc = data_layout == DataLayout::NHWC;
193
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000194 if(is_square)
195 {
196 switch(input->data_type())
197 {
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000198 case DataType::QASYMM8:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000199 if(is_nhwc)
200 {
Michalis Spyrouced25572018-10-01 16:26:20 +0100201 num_elems_processed_per_iteration = 16;
Michalis Spyrou57dac842018-03-01 16:03:50 +0000202 break;
203 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000204 switch(pool_size_x)
205 {
206 case 2:
207 num_elems_read_per_iteration = 16;
208 num_elems_processed_per_iteration = (pool_stride_x == 2) ? 8 : 15;
209 num_elems_horizontal_window = (pool_stride_x == 2) ? 8 : 16;
210 break;
211 case 3:
212 num_elems_read_per_iteration = 16;
213 num_elems_processed_per_iteration = (pool_stride_x == 2) ? 7 : 14;
214 num_elems_horizontal_window = (pool_stride_x == 2) ? 8 : 16;
215 break;
216 default:
217 break;
218 }
219 break;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000220#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
221 case DataType::F16:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000222 if(is_nhwc)
223 {
224 num_elems_processed_per_iteration = 8;
225 break;
226 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000227 switch(pool_size_x)
228 {
229 case 2:
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000230 case 3:
231 num_elems_read_per_iteration = 4;
232 num_elems_processed_per_iteration = 1;
233 num_elems_horizontal_window = 1;
234 break;
235 default:
236 break;
237 }
238 break;
239#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
240 case DataType::F32:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000241 if(is_nhwc)
242 {
Georgios Pinitas64f1a902018-09-18 13:42:51 +0100243 num_elems_processed_per_iteration = 4;
Michalis Spyrou57dac842018-03-01 16:03:50 +0000244 break;
245 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000246 switch(pool_size_x)
247 {
248 case 2:
249 num_elems_read_per_iteration = 2;
250 break;
251 case 3:
252 num_elems_read_per_iteration = 4; // We use vload4 for pooling3
253 break;
254 case 7:
255 num_elems_read_per_iteration = 8; // We use vload8 for pooling7
256 break;
257 default:
258 break;
259 }
260 num_elems_processed_per_iteration = 1;
261 num_elems_horizontal_window = 1;
262 break;
263 default:
264 ARM_COMPUTE_ERROR("Element size not supported");
265 break;
266 }
267 }
Michalis Spyrou57dac842018-03-01 16:03:50 +0000268 else
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000269 {
Michalis Spyrou57dac842018-03-01 16:03:50 +0000270 if(is_nhwc)
271 {
Michalis Spyrouced25572018-10-01 16:26:20 +0100272 num_elems_processed_per_iteration = 16 / input->element_size();
Michalis Spyrou57dac842018-03-01 16:03:50 +0000273 }
274 }
275
276 bool window_changed = false;
277 Window win{};
278 if(data_layout == DataLayout::NCHW)
279 {
280 // Number of iterations in X dimension
281 const int num_iterations_x = (pooled_w + num_elems_processed_per_iteration - 1) / num_elems_processed_per_iteration;
282
283 // Upper limit for the number of right/bottom border elements that are accessed
284 const int upper_bound_w = ((num_iterations_x - 1) * num_elems_processed_per_iteration * pool_stride_x - pool_pad_left + num_elems_read_per_iteration) - input_width;
285 const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_top + pool_size_y) - input_height;
286
287 border_size = BorderSize(pool_pad_top, pool_pad_right, pool_pad_bottom, pool_pad_left);
288 border_size.right = std::max(upper_bound_w, pool_pad_right);
289 border_size.bottom = std::max(upper_bound_h, pool_pad_bottom);
290
291 TensorShape output_shape{ input->tensor_shape() };
292 output_shape.set(0, pooled_w);
293 output_shape.set(1, pooled_h);
294 TensorInfo output_info(input->clone()->set_tensor_shape(output_shape));
295
296 win = calculate_max_window(output_info, Steps(num_elems_processed_per_iteration));
297 AccessWindowStatic input_access(input, -pool_pad_left, -pool_pad_top, input_width + border_size.right, input_height + border_size.bottom);
298
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000299 AccessWindowHorizontal output_access(output, 0, num_elems_horizontal_window);
300 window_changed = update_window_and_padding(win, input_access, output_access);
301 output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
302 }
303 else
304 {
Michalis Spyrou57dac842018-03-01 16:03:50 +0000305 TensorShape output_shape{ input->tensor_shape() };
306 output_shape.set(1, pooled_w);
307 output_shape.set(2, pooled_h);
308 TensorInfo output_info(input->clone()->set_tensor_shape(output_shape));
309
310 win = calculate_max_window(output_info, Steps(num_elems_processed_per_iteration));
311 AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
312
313 AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
314 window_changed = update_window_and_padding(win, input_access, output_access);
315 output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000316 }
317
318 Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
319 return std::make_pair(err, win);
320}
321} // namespace
322
323NEPoolingLayerKernel::NEPoolingLayerKernel()
Georgios Pinitas14d9d982019-12-13 12:33:09 +0000324 : _func(nullptr), _input(nullptr), _output(nullptr), _pool_info(), _data_layout(DataLayout::UNKNOWN), _num_elems_processed_per_iteration(0), _border_size(0), _is_square(false)
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000325{
326}
327
328BorderSize NEPoolingLayerKernel::border_size() const
329{
330 return _border_size;
331}
332
333void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info)
334{
335 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
336
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000337 const PadStrideInfo pad_stride_info = pool_info.pad_stride_info();
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000338 const bool is_global_pooling = pool_info.is_global_pooling();
Diego Lopez Recas61ef5bf2017-12-11 12:36:55 +0000339 const int pool_stride_x = pad_stride_info.stride().first;
Michalis Spyrou57dac842018-03-01 16:03:50 +0000340
341 // Get data layout
342 const DataLayout data_layout = input->info()->data_layout();
343 const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
344 const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000345
346 // Update pool size in case of global pooling
Pablo Tello77e6c552018-12-04 15:33:49 +0000347 const Size2D pool_size(
348 is_global_pooling ? input->info()->dimension(idx_width) : pool_info.pool_size().width,
349 is_global_pooling ? input->info()->dimension(idx_height) : pool_info.pool_size().height);
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000350
351 // Validate pool info before calling scaled_dimensions
Pablo Tello77e6c552018-12-04 15:33:49 +0000352 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_pool_info(pool_size.x(), pool_size.y()));
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000353
354 // Check output dimensions
Michalis Spyroubcfd09a2019-05-01 13:03:59 +0100355 unsigned int pooled_w;
356 unsigned int pooled_h;
Michalis Spyrou57dac842018-03-01 16:03:50 +0000357 std::tie(pooled_w, pooled_h) = scaled_dimensions(input->info()->dimension(idx_width),
358 input->info()->dimension(idx_height),
Pablo Tello77e6c552018-12-04 15:33:49 +0000359 pool_size.x(),
360 pool_size.y(),
Diego Lopez Recas61ef5bf2017-12-11 12:36:55 +0000361 pad_stride_info);
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000362
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000363 // Perform validation step
Georgios Pinitas13d96e02018-08-23 11:20:23 +0100364 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pool_info, pooled_w, pooled_h));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100365
366 // Set instance variables
Georgios Pinitas14d9d982019-12-13 12:33:09 +0000367 _input = input;
368 _output = output;
369 _pool_info = pool_info;
370 _data_layout = input->info()->data_layout();
371 _is_square = (pool_size.x() == pool_size.y());
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100372
Georgios Pinitas55186712018-01-08 17:37:12 +0000373 // Get data type
374 const DataType data_type = input->info()->data_type();
Georgios Pinitas14d9d982019-12-13 12:33:09 +0000375 const bool is_nchw = _data_layout == DataLayout::NCHW;
Georgios Pinitas55186712018-01-08 17:37:12 +0000376
Vidhya Sudhan Loganathan7485d5a2018-07-04 09:34:00 +0100377 if(data_type == DataType::QASYMM8)
Georgios Pinitas55186712018-01-08 17:37:12 +0000378 {
Pablo Tello77e6c552018-12-04 15:33:49 +0000379 if(pool_size.x() == 2 && pool_stride_x < 3 && _is_square)
Georgios Pinitas55186712018-01-08 17:37:12 +0000380 {
Pablo Tello77e6c552018-12-04 15:33:49 +0000381 if(is_nchw)
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100382 {
Pablo Tello77e6c552018-12-04 15:33:49 +0000383 _func = &NEPoolingLayerKernel::pooling2_qasymm8_nchw;
384 }
385 else
386 {
387 _func = &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc;
Georgios Pinitas55186712018-01-08 17:37:12 +0000388 }
389 }
Pablo Tello77e6c552018-12-04 15:33:49 +0000390 else if(pool_size.x() == 3 && pool_stride_x < 3 && _is_square)
Georgios Pinitas55186712018-01-08 17:37:12 +0000391 {
Pablo Tello77e6c552018-12-04 15:33:49 +0000392 if(is_nchw)
Georgios Pinitas55186712018-01-08 17:37:12 +0000393 {
Pablo Tello77e6c552018-12-04 15:33:49 +0000394 _func = &NEPoolingLayerKernel::pooling3_qasymm8_nchw;
395 }
396 else
397 {
398 _func = &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc;
Georgios Pinitas55186712018-01-08 17:37:12 +0000399 }
400 }
401 else
402 {
Pablo Tello77e6c552018-12-04 15:33:49 +0000403 if(is_nchw)
Georgios Pinitas55186712018-01-08 17:37:12 +0000404 {
Pablo Tello77e6c552018-12-04 15:33:49 +0000405 _func = &NEPoolingLayerKernel::poolingMxN_qasymm8_nchw;
406 }
407 else
408 {
409 _func = &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc;
Georgios Pinitas55186712018-01-08 17:37:12 +0000410 }
411 }
412 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000413 else if(data_type == DataType::F16)
414 {
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000415 if(_is_square)
Georgios Pinitas55186712018-01-08 17:37:12 +0000416 {
Pablo Tello77e6c552018-12-04 15:33:49 +0000417 switch(pool_size.x())
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000418 {
419 case 2:
Pablo Tello77e6c552018-12-04 15:33:49 +0000420 {
421 if(is_nchw)
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000422 {
Pablo Tello77e6c552018-12-04 15:33:49 +0000423 _func = &NEPoolingLayerKernel::pooling2_f16_nchw;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000424 }
Pablo Tello77e6c552018-12-04 15:33:49 +0000425 else
426 {
427 _func = &NEPoolingLayerKernel::poolingMxN_f16_nhwc;
428 }
429 }
430 break;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000431 case 3:
Pablo Tello77e6c552018-12-04 15:33:49 +0000432 {
433 if(is_nchw)
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000434 {
Pablo Tello77e6c552018-12-04 15:33:49 +0000435 _func = &NEPoolingLayerKernel::pooling3_f16_nchw;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000436 }
Pablo Tello77e6c552018-12-04 15:33:49 +0000437 else
438 {
439 _func = &NEPoolingLayerKernel::poolingMxN_f16_nhwc;
440 }
441 }
442 break;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000443 default:
Pablo Tello77e6c552018-12-04 15:33:49 +0000444 {
445 if(is_nchw)
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000446 {
Pablo Tello77e6c552018-12-04 15:33:49 +0000447 _func = &NEPoolingLayerKernel::poolingMxN_f16_nchw;
448 }
449 else
450 {
451 _func = &NEPoolingLayerKernel::poolingMxN_f16_nhwc;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000452 }
453 break;
Pablo Tello77e6c552018-12-04 15:33:49 +0000454 }
455 break;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000456 }
457 }
458 else
459 {
Pablo Tello77e6c552018-12-04 15:33:49 +0000460 if(is_nchw)
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000461 {
Pablo Tello77e6c552018-12-04 15:33:49 +0000462 _func = &NEPoolingLayerKernel::poolingMxN_f16_nchw;
463 }
464 else
465 {
466 _func = &NEPoolingLayerKernel::poolingMxN_f16_nhwc;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000467 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000468 }
469 }
470 else if(data_type == DataType::F32)
471 {
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000472 if(_is_square)
Georgios Pinitas55186712018-01-08 17:37:12 +0000473 {
Pablo Tello77e6c552018-12-04 15:33:49 +0000474 switch(pool_size.x())
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000475 {
476 case 2:
Pablo Tello77e6c552018-12-04 15:33:49 +0000477 {
478 if(is_nchw)
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000479 {
Pablo Tello77e6c552018-12-04 15:33:49 +0000480 _func = &NEPoolingLayerKernel::pooling2_f32_nchw;
481 }
482 else
483 {
484 _func = &NEPoolingLayerKernel::poolingMxN_f32_nhwc;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000485 }
486 break;
Pablo Tello77e6c552018-12-04 15:33:49 +0000487 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000488 case 3:
Pablo Tello77e6c552018-12-04 15:33:49 +0000489 {
490 if(is_nchw)
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000491 {
Pablo Tello77e6c552018-12-04 15:33:49 +0000492 _func = &NEPoolingLayerKernel::pooling3_f32_nchw;
493 }
494 else
495 {
496 _func = &NEPoolingLayerKernel::poolingMxN_f32_nhwc;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000497 }
498 break;
Pablo Tello77e6c552018-12-04 15:33:49 +0000499 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000500 case 7:
Pablo Tello77e6c552018-12-04 15:33:49 +0000501 {
502 if(is_nchw)
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000503 {
Pablo Tello77e6c552018-12-04 15:33:49 +0000504 _func = &NEPoolingLayerKernel::pooling7_f32_nchw;
505 }
506 else
507 {
508 _func = &NEPoolingLayerKernel::poolingMxN_f32_nhwc;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000509 }
510 break;
Pablo Tello77e6c552018-12-04 15:33:49 +0000511 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000512 default:
Pablo Tello77e6c552018-12-04 15:33:49 +0000513 {
514 if(is_nchw)
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000515 {
Pablo Tello77e6c552018-12-04 15:33:49 +0000516 _func = &NEPoolingLayerKernel::poolingMxN_f32_nchw;
517 }
518 else
519 {
520 _func = &NEPoolingLayerKernel::poolingMxN_f32_nhwc;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000521 }
522 break;
Pablo Tello77e6c552018-12-04 15:33:49 +0000523 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000524 }
525 }
526 else
527 {
Pablo Tello77e6c552018-12-04 15:33:49 +0000528 if(is_nchw)
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000529 {
Pablo Tello77e6c552018-12-04 15:33:49 +0000530 _func = &NEPoolingLayerKernel::poolingMxN_f32_nchw;
531 }
532 else
533 {
534 _func = &NEPoolingLayerKernel::poolingMxN_f32_nhwc;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000535 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000536 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100537 }
538
539 // Configure kernel window
Pablo Tello77e6c552018-12-04 15:33:49 +0000540 auto win_config = validate_and_configure_window(input->info(), output->info(), pool_info, _num_elems_processed_per_iteration, _border_size, pooled_w, pooled_h, pool_size.x(), pool_size.y());
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000541 ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
542 INEKernel::configure(win_config.second);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100543}
544
Pablo Tello77e6c552018-12-04 15:33:49 +0000545void NEPoolingLayerKernel::pooling2_qasymm8_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding)
Georgios Pinitas55186712018-01-08 17:37:12 +0000546{
547 Iterator input(_input, window_input);
548 Iterator output(_output, window);
549
Michalis Spyroubd0e6122018-01-23 09:52:16 +0000550 constexpr int pool_size = 2;
551 int pool_stride_x = 0;
552 int pool_stride_y = 0;
553 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
554 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
555 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
556 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
Georgios Pinitas55186712018-01-08 17:37:12 +0000557 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +0000558 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
559 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Georgios Pinitas55186712018-01-08 17:37:12 +0000560
Michalis Spyroubd0e6122018-01-23 09:52:16 +0000561 const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
562 const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
Georgios Pinitas55186712018-01-08 17:37:12 +0000563
564 const int scale_step_x = (pool_stride_x == 1) ? 2 : 1;
565
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100566 const UniformQuantizationInfo input_qinfo = _input->info()->quantization_info().uniform();
567 const UniformQuantizationInfo output_qinfo = _output->info()->quantization_info().uniform();
568 const bool have_different_qinfo = input_qinfo != output_qinfo;
569
Georgios Pinitas55186712018-01-08 17:37:12 +0000570 execute_window_loop(window, [&](const Coordinates & id)
571 {
572 const auto top_data = vld1q_u8(reinterpret_cast<const uint8_t *>(input_top_ptr + input.offset()));
573 const auto bottom_data = vld1q_u8(reinterpret_cast<const uint8_t *>(input_bottom_ptr + input.offset()));
574 uint8x8_t lower_res = {};
575 uint8x8_t upper_res = {};
576
577 if(pooling_type != PoolingType::MAX)
578 {
579 const uint16x8x2_t top_data_u16 = { { vmovl_u8(vget_low_u8(top_data)), vmovl_u8(vget_high_u8(top_data)) } };
580 const uint16x8x2_t bottom_data_u16 = { { vmovl_u8(vget_low_u8(bottom_data)), vmovl_u8(vget_high_u8(bottom_data)) } };
581
582 // Add rows
583 const uint16x8x2_t vrsum =
584 {
585 {
586 vaddq_u16(top_data_u16.val[0], bottom_data_u16.val[0]),
587 vaddq_u16(top_data_u16.val[1], bottom_data_u16.val[1]),
588 }
589 };
590
591 // Pair-wise add row data
592 const uint16x4x2_t vpsum =
593 {
594 {
595 vpadd_u16(vget_low_u16(vrsum.val[0]), vget_high_u16(vrsum.val[0])),
596 vpadd_u16(vget_low_u16(vrsum.val[1]), vget_high_u16(vrsum.val[1])),
597 }
598 };
599
600 uint16x8_t res_lower = vcombine_u16(vpsum.val[0], vpsum.val[1]);
601
602 // Scale lower result
Pablo Tello77e6c552018-12-04 15:33:49 +0000603 scale_vector_s16x8(exclude_padding, res_lower, id, 0, scale_step_x,
604 pool_size, upper_bound_w, upper_bound_h,
605 pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Georgios Pinitas55186712018-01-08 17:37:12 +0000606 lower_res = vmovn_u16(res_lower);
607
608 // Compute upper result for stride_x == 1
609 if(pool_stride_x == 1)
610 {
611 // Shifted row sum
612 const uint16x8x2_t vrsum_shifted =
613 {
614 {
615 vextq_u16(vrsum.val[0], vrsum.val[1], 1),
616 vextq_u16(vrsum.val[1], vrsum.val[1], 1)
617 }
618 };
619
620 // Pair-wise add shifted row
621 const uint16x4x2_t vpsum_shifted =
622 {
623 {
624 vpadd_u16(vget_low_u16(vrsum_shifted.val[0]), vget_high_u16(vrsum_shifted.val[0])),
625 vpadd_u16(vget_low_u16(vrsum_shifted.val[1]), vget_high_u16(vrsum_shifted.val[1])),
626 }
627 };
628 uint16x8_t res_upper = vcombine_u16(vpsum_shifted.val[0], vpsum_shifted.val[1]);
629
630 // Scale lower result
Pablo Tello77e6c552018-12-04 15:33:49 +0000631 scale_vector_s16x8(exclude_padding, res_upper, id, 1, 2,
632 pool_size, upper_bound_w, upper_bound_h,
633 pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Georgios Pinitas55186712018-01-08 17:37:12 +0000634 upper_res = vmovn_u16(res_upper);
635 }
636 }
637 else
638 {
639 const uint8x16_t max_data = vmaxq_u8(top_data, bottom_data);
640 lower_res = vpmax_u8(vget_low_u8(max_data), vget_high_u8(max_data));
641 if(pool_stride_x == 1)
642 {
643 const uint8x16_t max_data_shifted = vextq_u8(max_data, max_data, 1);
644 upper_res = vpmax_u8(vget_low_u8(max_data_shifted), vget_high_u8(max_data_shifted));
645 }
646 }
647
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100648 if(have_different_qinfo)
Pablo Telloa52e4cf2019-04-01 14:55:18 +0100649 {
650 const auto requantized_output = vquantize(vdequantize(vcombine_u8(lower_res, upper_res), input_qinfo), output_qinfo);
651 lower_res = vget_low_u8(requantized_output);
652 upper_res = vget_high_u8(requantized_output);
653 }
654
Georgios Pinitas55186712018-01-08 17:37:12 +0000655 // Store result
656 if(pool_stride_x == 1)
657 {
658 const uint8x8x2_t res = { { lower_res, upper_res } };
659 vst2_u8(reinterpret_cast<uint8_t *>(output.ptr()), res);
660 }
661 else
662 {
663 vst1_u8(reinterpret_cast<uint8_t *>(output.ptr()), lower_res);
664 }
665 },
666 input, output);
667}
668
Pablo Tello77e6c552018-12-04 15:33:49 +0000669void NEPoolingLayerKernel::pooling3_f16_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding)
Pablo Tello0c34fe22017-06-26 17:17:42 +0100670{
Pablo Tello77e6c552018-12-04 15:33:49 +0000671 ARM_COMPUTE_UNUSED(pooling_type);
672 ARM_COMPUTE_UNUSED(exclude_padding);
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000673#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tello0c34fe22017-06-26 17:17:42 +0100674 Iterator input(_input, window_input);
675 Iterator output(_output, window);
676
Michalis Spyroubd0e6122018-01-23 09:52:16 +0000677 constexpr const int pool_size = 3;
678 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
679 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
680 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
681 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
682 int pool_stride_x = 0;
683 int pool_stride_y = 0;
Pablo Tello0c34fe22017-06-26 17:17:42 +0100684 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +0000685 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
686 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Pablo Tello0c34fe22017-06-26 17:17:42 +0100687
Michalis Spyroubd0e6122018-01-23 09:52:16 +0000688 const unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
689 const unsigned char *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
690 const unsigned char *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 2));
Pablo Tello0c34fe22017-06-26 17:17:42 +0100691
692 execute_window_loop(window, [&](const Coordinates & id)
693 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100694 float16x4_t top_data = vld1_f16(reinterpret_cast<const float16_t *>(input_top_ptr + input.offset()));
695 float16x4_t middle_data = vld1_f16(reinterpret_cast<const float16_t *>(input_middle_ptr + input.offset()));
696 float16x4_t bottom_data = vld1_f16(reinterpret_cast<const float16_t *>(input_bottom_ptr + input.offset()));
697 float16x4_t res = {};
698
699 // Get power of 2 in case of l2 pooling
700 if(pooling_type == PoolingType::L2)
701 {
702 top_data = vmul_f16(top_data, top_data);
703 middle_data = vmul_f16(middle_data, middle_data);
704 bottom_data = vmul_f16(bottom_data, bottom_data);
705 }
706
707 if(pooling_type != PoolingType::MAX)
Pablo Tello0c34fe22017-06-26 17:17:42 +0100708 {
709 // Calculate scale
Pablo Tello77e6c552018-12-04 15:33:49 +0000710 const float scale = calculate_avg_scale(exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Pablo Tello0c34fe22017-06-26 17:17:42 +0100711 const float16x4_t scale_v = vdup_n_f16(scale);
712 // Perform pooling
713 const float16x4_t sum_data = vadd_f16(vadd_f16(top_data, bottom_data), middle_data);
714 res = vpadd_f16(vset_lane_f16(0.f, sum_data, 3), sum_data);
715 res = vmul_f16(vpadd_f16(res, res), scale_v);
716 }
717 else
718 {
719 const float16x4_t max_data = vmax_f16(vmax_f16(top_data, bottom_data), middle_data);
720 res = vpmax_f16(vset_lane_f16(-std::numeric_limits<float>::max(), max_data, 3), max_data);
721 res = vpmax_f16(res, res);
722 }
Georgios Pinitascdf51452017-08-31 14:21:36 +0100723
724 // Calculate square-root in case of l2 pooling
725 if(pooling_type == PoolingType::L2)
726 {
727 res = vinv_f16(vinvsqrt_f16(res));
728 }
729
Pablo Tello0c34fe22017-06-26 17:17:42 +0100730 *(reinterpret_cast<float16_t *>(output.ptr())) = vget_lane_f16(res, 0);
731 },
732 input, output);
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000733#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tello0c34fe22017-06-26 17:17:42 +0100734 ARM_COMPUTE_UNUSED(window_input);
735 ARM_COMPUTE_UNUSED(window);
736 ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a");
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000737#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tello0c34fe22017-06-26 17:17:42 +0100738}
739
Pablo Tello77e6c552018-12-04 15:33:49 +0000740void NEPoolingLayerKernel::pooling2_f16_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding)
Pablo Tello0c34fe22017-06-26 17:17:42 +0100741{
Pablo Tello77e6c552018-12-04 15:33:49 +0000742 ARM_COMPUTE_UNUSED(pooling_type);
743 ARM_COMPUTE_UNUSED(exclude_padding);
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000744#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tello0c34fe22017-06-26 17:17:42 +0100745 Iterator input(_input, window_input);
746 Iterator output(_output, window);
Michalis Spyroubd0e6122018-01-23 09:52:16 +0000747 constexpr int pool_size = 2;
748 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
749 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
750 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
751 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
752 int pool_stride_x, pool_stride_y = 0;
753 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
754 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
755 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Pablo Tello0c34fe22017-06-26 17:17:42 +0100756
Michalis Spyroubd0e6122018-01-23 09:52:16 +0000757 const unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
758 const unsigned char *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
Pablo Tello0c34fe22017-06-26 17:17:42 +0100759
760 execute_window_loop(window, [&](const Coordinates & id)
761 {
Georgios Pinitas13d96e02018-08-23 11:20:23 +0100762 float16x4_t top_data = vld1_f16(reinterpret_cast<const float16_t *>(input_top_ptr + input.offset()));
763 float16x4_t bottom_data = vld1_f16(reinterpret_cast<const float16_t *>(input_bottom_ptr + input.offset()));
764 float16x4_t res = {};
Pablo Tello0c34fe22017-06-26 17:17:42 +0100765
Georgios Pinitascdf51452017-08-31 14:21:36 +0100766 // Get power of 2 in case of l2 pooling
767 if(pooling_type == PoolingType::L2)
768 {
Georgios Pinitas13d96e02018-08-23 11:20:23 +0100769 top_data = vmul_f16(top_data, top_data);
770 bottom_data = vmul_f16(bottom_data, bottom_data);
Georgios Pinitascdf51452017-08-31 14:21:36 +0100771 }
772
773 if(pooling_type != PoolingType::MAX)
Pablo Tello0c34fe22017-06-26 17:17:42 +0100774 {
Pablo Tello77e6c552018-12-04 15:33:49 +0000775 const float scale = calculate_avg_scale(exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Georgios Pinitas13d96e02018-08-23 11:20:23 +0100776 const float16x4_t scale_v = vdup_n_f16(scale);
777
778 const float16x4_t sum_data = vadd_f16(top_data, bottom_data);
779 res = vmul_f16(vpadd_f16(sum_data, sum_data), scale_v);
Pablo Tello0c34fe22017-06-26 17:17:42 +0100780 }
781 else
782 {
Georgios Pinitas13d96e02018-08-23 11:20:23 +0100783 const float16x4_t max_data = vmax_f16(top_data, bottom_data);
784 res = vpmax_f16(max_data, max_data);
Pablo Tello0c34fe22017-06-26 17:17:42 +0100785 }
Georgios Pinitascdf51452017-08-31 14:21:36 +0100786
787 // Calculate square-root in case of l2 pooling
788 if(pooling_type == PoolingType::L2)
789 {
Georgios Pinitas13d96e02018-08-23 11:20:23 +0100790 res = vinv_f16(vinvsqrt_f16(res));
Georgios Pinitascdf51452017-08-31 14:21:36 +0100791 }
792
793 // Store result
Georgios Pinitas13d96e02018-08-23 11:20:23 +0100794 *(reinterpret_cast<float16_t *>(output.ptr())) = vget_lane_f16(res, 0);
Pablo Tello0c34fe22017-06-26 17:17:42 +0100795 },
796 input, output);
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000797#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tello0c34fe22017-06-26 17:17:42 +0100798 ARM_COMPUTE_UNUSED(window_input);
799 ARM_COMPUTE_UNUSED(window);
800 ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a");
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000801#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tello0c34fe22017-06-26 17:17:42 +0100802}
803
Pablo Tello77e6c552018-12-04 15:33:49 +0000804void NEPoolingLayerKernel::pooling3_qasymm8_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding)
Georgios Pinitas55186712018-01-08 17:37:12 +0000805{
806 Iterator input(_input, window_input);
807 Iterator output(_output, window);
808
Michalis Spyroubd0e6122018-01-23 09:52:16 +0000809 constexpr int pool_size = 3;
810 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
811 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
812 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
813 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
814 int pool_stride_x = 0;
815 int pool_stride_y = 0;
Georgios Pinitas55186712018-01-08 17:37:12 +0000816 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +0000817 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
818 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Georgios Pinitas55186712018-01-08 17:37:12 +0000819
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100820 const UniformQuantizationInfo &input_qinfo = _input->info()->quantization_info().uniform();
821 const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform();
Georgios Pinitasd66094e2019-04-15 15:44:17 +0100822
Michalis Spyroubd0e6122018-01-23 09:52:16 +0000823 const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
824 const uint8_t *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
825 const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 2));
Georgios Pinitas55186712018-01-08 17:37:12 +0000826
827 execute_window_loop(window, [&](const Coordinates & id)
828 {
829 const auto top_data = vld1q_u8(reinterpret_cast<const uint8_t *>(input_top_ptr + input.offset()));
830 const auto middle_data = vld1q_u8(reinterpret_cast<const uint8_t *>(input_middle_ptr + input.offset()));
831 const auto bottom_data = vld1q_u8(reinterpret_cast<const uint8_t *>(input_bottom_ptr + input.offset()));
Georgios Pinitasd66094e2019-04-15 15:44:17 +0100832 uint8x8_t fres = {};
833 uint8x16_t fqres = {};
Georgios Pinitas55186712018-01-08 17:37:12 +0000834
835 if(pooling_type == PoolingType::AVG)
836 {
837 // Convert data to u16
838 const uint16x8x2_t top_data_u16 = { { vmovl_u8(vget_low_u8(top_data)), vmovl_u8(vget_high_u8(top_data)) } };
839 const uint16x8x2_t middle_data_u16 = { { vmovl_u8(vget_low_u8(middle_data)), vmovl_u8(vget_high_u8(middle_data)) } };
840 const uint16x8x2_t bottom_data_u16 = { { vmovl_u8(vget_low_u8(bottom_data)), vmovl_u8(vget_high_u8(bottom_data)) } };
841
842 // Calculate row sums
843 const uint16x8x2_t vrsum =
844 {
845 {
846 vaddq_u16(vaddq_u16(top_data_u16.val[0], bottom_data_u16.val[0]), middle_data_u16.val[0]),
847 vaddq_u16(vaddq_u16(top_data_u16.val[1], bottom_data_u16.val[1]), middle_data_u16.val[1]),
848 }
849 };
850 const uint16x8x2_t vrsum_shifted_1 =
851 {
852 {
853 vextq_u16(vrsum.val[0], vrsum.val[1], 1),
854 vextq_u16(vrsum.val[1], vrsum.val[1], 1)
855 }
856 };
857 const uint16x8x2_t vrsum_shifted_2 =
858 {
859 {
860 vextq_u16(vrsum.val[0], vrsum.val[1], 2),
861 vextq_u16(vrsum.val[1], vrsum.val[1], 2)
862 }
863 };
864 // Calculate final sum
865 uint16x8x2_t final_sum =
866 {
867 {
868 vaddq_u16(vaddq_u16(vrsum.val[0], vrsum_shifted_1.val[0]), vrsum_shifted_2.val[0]),
869 vaddq_u16(vaddq_u16(vrsum.val[1], vrsum_shifted_1.val[1]), vrsum_shifted_2.val[1]),
870 }
871 };
872 if(pool_stride_x == 2)
873 {
874 uint16x8_t res =
875 {
876 vgetq_lane_u16(final_sum.val[0], 0),
877 vgetq_lane_u16(final_sum.val[0], 2),
878 vgetq_lane_u16(final_sum.val[0], 4),
879 vgetq_lane_u16(final_sum.val[0], 6),
880 vgetq_lane_u16(final_sum.val[1], 0),
881 vgetq_lane_u16(final_sum.val[1], 2),
882 vgetq_lane_u16(final_sum.val[1], 4),
883 vgetq_lane_u16(final_sum.val[1], 6),
884 };
885
Pablo Tello77e6c552018-12-04 15:33:49 +0000886 scale_vector_s16x8(exclude_padding, res, id, 0, 1,
887 pool_size, upper_bound_w, upper_bound_h,
888 pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Georgios Pinitasd66094e2019-04-15 15:44:17 +0100889 fres = vmovn_u16(res);
Georgios Pinitas55186712018-01-08 17:37:12 +0000890 }
891 else
892 {
893 // Scale lower result
Pablo Tello77e6c552018-12-04 15:33:49 +0000894 scale_vector_s16x8(exclude_padding, final_sum.val[0], id, 0, 1,
895 pool_size, upper_bound_w, upper_bound_h,
896 pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Georgios Pinitas55186712018-01-08 17:37:12 +0000897 // Scale lower result
Pablo Tello77e6c552018-12-04 15:33:49 +0000898 scale_vector_s16x8(exclude_padding, final_sum.val[1], id, 8, 1,
899 pool_size, upper_bound_w, upper_bound_h,
900 pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Georgios Pinitasd66094e2019-04-15 15:44:17 +0100901 fqres = vcombine_u8(vmovn_u16(final_sum.val[0]), vmovn_u16(final_sum.val[1]));
Georgios Pinitas55186712018-01-08 17:37:12 +0000902 }
903 }
904 else
905 {
906 const uint8x16_t max_data = vmaxq_u8(vmaxq_u8(top_data, bottom_data), middle_data);
907 const uint8x16_t max_data_shift1 = vextq_u8(max_data, max_data, 1);
908 const uint8x16_t max_data_shift2 = vextq_u8(max_data, max_data, 2);
909 const uint8x16_t final_max = vmaxq_u8(vmaxq_u8(max_data, max_data_shift1), max_data_shift2);
910
911 if(pool_stride_x == 2)
912 {
913 const uint8x8x2_t table = { { vget_low_u8(final_max), vget_high_u8(final_max) } };
914 static const uint8x8_t lookup_val = { 0, 2, 4, 6, 8, 10, 12, 14 };
Georgios Pinitasd66094e2019-04-15 15:44:17 +0100915 fres = vtbl2_u8(table, lookup_val);
Georgios Pinitas55186712018-01-08 17:37:12 +0000916 }
917 else
918 {
Georgios Pinitasd66094e2019-04-15 15:44:17 +0100919 fqres = final_max;
Georgios Pinitas55186712018-01-08 17:37:12 +0000920 }
921 }
Georgios Pinitasd66094e2019-04-15 15:44:17 +0100922
923 // Store result
924 if(pool_stride_x == 1)
925 {
926 if(input_qinfo != output_qinfo)
927 {
928 fqres = vquantize(vdequantize(fqres, input_qinfo), output_qinfo);
929 }
930 vst1q_u8(reinterpret_cast<uint8_t *>(output.ptr()), fqres);
931 }
932 else
933 {
934 if(input_qinfo != output_qinfo)
935 {
936 fres = vquantize(vdequantize(fres, input_qinfo), output_qinfo);
937 }
938 vst1_u8(reinterpret_cast<uint8_t *>(output.ptr()), fres);
939 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000940 },
941 input, output);
942}
943
Pablo Tello77e6c552018-12-04 15:33:49 +0000944void NEPoolingLayerKernel::poolingMxN_f16_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100945{
Pablo Tello77e6c552018-12-04 15:33:49 +0000946 ARM_COMPUTE_UNUSED(pooling_type);
947 ARM_COMPUTE_UNUSED(exclude_padding);
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000948#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
949 Iterator input(_input, window_input);
950 Iterator output(_output, window);
951
952 const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().x() : _pool_info.pool_size().width;
953 const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().height;
954 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
955 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
956 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
957 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
958 int pool_stride_x = 0;
959 int pool_stride_y = 0;
960 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
961 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
962 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
963
964 execute_window_loop(window, [&](const Coordinates & id)
965 {
966 float16_t res = 0.0f;
967 float16x8_t vres = vdupq_n_f16(0.0f);
968
969 if(pooling_type != PoolingType::MAX)
970 {
971 // Calculate scale
Pablo Tello77e6c552018-12-04 15:33:49 +0000972 const float scale = calculate_avg_scale(exclude_padding, DataLayout::NCHW, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000973
974 // Perform pooling
975
976 for(int y = 0; y < pool_size_y; ++y)
977 {
978 int x = 0;
979 for(; x <= (pool_size_x - 8); x += 8)
980 {
Georgios Pinitas0922dbb2019-11-25 18:39:27 +0000981 const float16x8_t data = vld1q_f16(reinterpret_cast<const float16_t *>(input.ptr() + (x - pool_pad_left) * static_cast<int>(_input->info()->strides_in_bytes().x()) +
982 (y - pool_pad_top) * static_cast<int>(_input->info()->strides_in_bytes().y())));
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000983
984 // Get power of 2 in case of l2 pooling and accumulate
985 if(pooling_type == PoolingType::L2)
986 {
987 vres = vaddq_f16(vres, vmulq_f16(data, data));
988 }
989 else
990 {
991 vres = vaddq_f16(vres, data);
992 }
993 }
994
995 // Leftover for loop
996 for(; x < pool_size_x; ++x)
997 {
Georgios Pinitas0922dbb2019-11-25 18:39:27 +0000998 float16_t data = *(reinterpret_cast<const float16_t *>(input.ptr() + (x - pool_pad_left) * static_cast<int>(_input->info()->strides_in_bytes().x())
999 + (y - pool_pad_top) * static_cast<int>(_input->info()->strides_in_bytes().y())));
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001000
1001 // Get power of 2 in case of l2 pooling
1002 if(pooling_type == PoolingType::L2)
1003 {
1004 data *= data;
1005 }
1006
1007 res += data;
1008 }
1009 }
1010
1011 // Reduction
1012 float16x4_t tmp = vpadd_f16(vget_high_f16(vres), vget_low_f16(vres));
1013 res += vget_lane_f16(tmp, 0);
1014 res += vget_lane_f16(tmp, 1);
1015 res += vget_lane_f16(tmp, 2);
1016 res += vget_lane_f16(tmp, 3);
1017
1018 // Divide by scale
1019 res *= scale;
1020 }
1021 else
1022 {
1023 float16x8_t vres = vdupq_n_f16(std::numeric_limits<float>::lowest());
1024 res = std::numeric_limits<float>::lowest();
1025
1026 for(int y = 0; y < pool_size_y; ++y)
1027 {
1028 int x = 0;
1029 for(; x <= (pool_size_x - 8); x += 8)
1030 {
Georgios Pinitas0922dbb2019-11-25 18:39:27 +00001031 const float16x8_t data = vld1q_f16(reinterpret_cast<const float16_t *>(input.ptr() + (x - pool_pad_left) * static_cast<int>(_input->info()->strides_in_bytes().x()) +
1032 (y - pool_pad_top) * static_cast<int>(_input->info()->strides_in_bytes().y())));
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001033 vres = vmaxq_f16(vres, data);
1034 }
1035
1036 // Leftover for loop
1037 for(; x < pool_size_x; ++x)
1038 {
Georgios Pinitas0922dbb2019-11-25 18:39:27 +00001039 const float16_t data = *(reinterpret_cast<const float16_t *>(input.ptr() + (x - pool_pad_left) * static_cast<int>(_input->info()->strides_in_bytes().x())
1040 + (y - pool_pad_top) * static_cast<int>(_input->info()->strides_in_bytes().y())));
1041 res = std::max(res, data);
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001042 }
1043 }
1044
1045 float16x4_t tmp = vpmax_f16(vget_high_f16(vres), vget_low_f16(vres));
1046 res = std::max(res, vget_lane_f16(tmp, 0));
1047 res = std::max(res, vget_lane_f16(tmp, 1));
1048 res = std::max(res, vget_lane_f16(tmp, 2));
1049 res = std::max(res, vget_lane_f16(tmp, 3));
1050 }
1051
1052 // Calculate square-root in case of l2 pooling
1053 if(pooling_type == PoolingType::L2)
1054 {
1055 res = std::sqrt(res);
1056 }
1057
1058 // Store result
1059 *(reinterpret_cast<float16_t *>(output.ptr())) = res;
1060 },
1061 input, output);
1062
1063#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
1064 ARM_COMPUTE_UNUSED(window_input);
1065 ARM_COMPUTE_UNUSED(window);
1066 ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a");
1067#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
1068}
1069
Pablo Tello77e6c552018-12-04 15:33:49 +00001070void NEPoolingLayerKernel::poolingMxN_f16_nhwc(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding)
Michalis Spyrou57dac842018-03-01 16:03:50 +00001071{
Pablo Tello77e6c552018-12-04 15:33:49 +00001072 ARM_COMPUTE_UNUSED(pooling_type);
1073 ARM_COMPUTE_UNUSED(exclude_padding);
Michalis Spyrou57dac842018-03-01 16:03:50 +00001074#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
1075 Iterator input(_input, window_input);
1076 Iterator output(_output, window);
1077
1078 const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().width;
1079 const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().z() : _pool_info.pool_size().height;
1080 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1081 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1082 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1083 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1084 int pool_stride_x = 0;
1085 int pool_stride_y = 0;
1086 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
1087 const int upper_bound_w = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_right);
1088 const int upper_bound_h = _input->info()->dimension(2) + (exclude_padding ? 0 : pool_pad_bottom);
1089
1090 float16x8_t vres;
1091
1092 execute_window_loop(window, [&](const Coordinates & id)
1093 {
Michalis Spyrouced25572018-10-01 16:26:20 +01001094 const int idx_width = id.y() * pool_stride_x;
1095 const int idx_height = id.z() * pool_stride_y;
1096 const int pool_limit_y = pool_pad_top - idx_height;
1097 const int pool_limit_x = pool_pad_left - idx_width;
1098
1099 const int pool_start_y = std::max(0, window_input.z().start() + pool_limit_y);
1100 const int pool_end_y = std::min(pool_size_y, window_input.z().end() + pool_limit_y);
1101 const int pool_start_x = std::max(0, window_input.y().start() + pool_limit_x);
1102 const int pool_end_x = std::min(pool_size_x, window_input.y().end() + pool_limit_x);
1103
Michalis Spyrou57dac842018-03-01 16:03:50 +00001104 if(pooling_type != PoolingType::MAX)
1105 {
1106 // Calculate scale
Pablo Tello77e6c552018-12-04 15:33:49 +00001107 const float scale = calculate_avg_scale(exclude_padding, DataLayout::NHWC, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
1108 pool_stride_y);
Michalis Spyrou57dac842018-03-01 16:03:50 +00001109 const float16x8_t scale_v = vdupq_n_f16(scale);
1110
1111 // Perform pooling
1112 vres = vdupq_n_f16(0.0f);
Michalis Spyrouced25572018-10-01 16:26:20 +01001113 for(int y = pool_start_y; y < pool_end_y; ++y)
Michalis Spyrou57dac842018-03-01 16:03:50 +00001114 {
Michalis Spyrouced25572018-10-01 16:26:20 +01001115 for(int x = pool_start_x; x < pool_end_x; ++x)
Michalis Spyrou57dac842018-03-01 16:03:50 +00001116 {
Georgios Pinitas0922dbb2019-11-25 18:39:27 +00001117 const float16x8_t data = vld1q_f16(reinterpret_cast<const float16_t *>(input.ptr() + (x - pool_pad_left) * static_cast<int>(_input->info()->strides_in_bytes().y()) +
1118 (y - pool_pad_top) * static_cast<int>(_input->info()->strides_in_bytes().z())));
Michalis Spyrou57dac842018-03-01 16:03:50 +00001119
1120 // Get power of 2 in case of l2 pooling and accumulate
1121 if(pooling_type == PoolingType::L2)
1122 {
1123 vres = vaddq_f16(vres, vmulq_f16(data, data));
1124 }
1125 else
1126 {
1127 vres = vaddq_f16(vres, data);
1128 }
1129 }
1130 }
1131 // Divide by scale
1132 vres = vmulq_f16(vres, scale_v);
1133 }
1134 else
1135 {
1136 vres = vdupq_n_f16(std::numeric_limits<float>::lowest());
Michalis Spyrouced25572018-10-01 16:26:20 +01001137
1138 for(int y = pool_start_y; y < pool_end_y; ++y)
Michalis Spyrou57dac842018-03-01 16:03:50 +00001139 {
Michalis Spyrouced25572018-10-01 16:26:20 +01001140 for(int x = pool_start_x; x < pool_end_x; ++x)
Michalis Spyrou57dac842018-03-01 16:03:50 +00001141 {
Georgios Pinitas0922dbb2019-11-25 18:39:27 +00001142 const float16x8_t data = vld1q_f16(reinterpret_cast<const float16_t *>(input.ptr() + (x - pool_pad_left) * static_cast<int>(_input->info()->strides_in_bytes().y()) +
1143 (y - pool_pad_top) * static_cast<int>(_input->info()->strides_in_bytes().z())));
Michalis Spyrou57dac842018-03-01 16:03:50 +00001144 vres = vmaxq_f16(vres, data);
1145 }
1146 }
1147 }
1148
1149 // Calculate square-root in case of l2 pooling
1150 if(pooling_type == PoolingType::L2)
1151 {
1152 float16x8_t sqrt_reciprocal = vrsqrteq_f16(vres);
1153 vres = vmulq_f16(vres, vmulq_f16(vrsqrtsq_f16(vmulq_f16(vres, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal));
1154 }
1155
1156 // Store result
1157 vst1q_f16(reinterpret_cast<float16_t *>(output.ptr()), vres);
1158 },
1159 input, output);
1160
1161#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
1162 ARM_COMPUTE_UNUSED(window_input);
1163 ARM_COMPUTE_UNUSED(window);
1164 ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a");
1165#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
1166}
1167
Pablo Tello77e6c552018-12-04 15:33:49 +00001168void NEPoolingLayerKernel::poolingMxN_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding)
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001169{
1170 Iterator input(_input, window_input);
1171 Iterator output(_output, window);
1172
1173 const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().x() : _pool_info.pool_size().width;
1174 const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().height;
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001175 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1176 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1177 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1178 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1179 int pool_stride_x = 0;
1180 int pool_stride_y = 0;
Gian Marco Iodice16824302017-09-28 15:41:37 +01001181 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001182 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
1183 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Gian Marco Iodice16824302017-09-28 15:41:37 +01001184
1185 execute_window_loop(window, [&](const Coordinates & id)
1186 {
1187 float res = 0.0f;
1188
1189 if(pooling_type != PoolingType::MAX)
1190 {
1191 // Calculate scale
Pablo Tello77e6c552018-12-04 15:33:49 +00001192 const float scale = calculate_avg_scale(exclude_padding, DataLayout::NCHW, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Gian Marco Iodice16824302017-09-28 15:41:37 +01001193
1194 // Perform pooling
1195 float32x4_t vres = vdupq_n_f32(0.0f);
1196
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001197 for(int y = 0; y < pool_size_y; ++y)
Gian Marco Iodice16824302017-09-28 15:41:37 +01001198 {
1199 int x = 0;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001200 for(; x <= (pool_size_x - 4); x += 4)
Gian Marco Iodice16824302017-09-28 15:41:37 +01001201 {
Michalis Spyrou7c60c992019-10-10 14:33:47 +01001202 const float32x4_t data = vld1q_f32(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_left) * static_cast<int>(_input->info()->strides_in_bytes().x()) + (y - pool_pad_top) * static_cast<int>
1203 (_input->info()->strides_in_bytes().y())));
Gian Marco Iodice16824302017-09-28 15:41:37 +01001204
1205 // Get power of 2 in case of l2 pooling and accumulate
1206 if(pooling_type == PoolingType::L2)
1207 {
1208 vres = vmlaq_f32(vres, data, data);
1209 }
1210 else
1211 {
1212 vres = vaddq_f32(vres, data);
1213 }
1214 }
1215
1216 // Leftover for loop
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001217 for(; x < pool_size_x; ++x)
Gian Marco Iodice16824302017-09-28 15:41:37 +01001218 {
Michalis Spyrou7c60c992019-10-10 14:33:47 +01001219 float data = *(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_left) * static_cast<int>(_input->info()->strides_in_bytes().x()) + (y - pool_pad_top) * static_cast<int>
1220 (_input->info()->strides_in_bytes().y())));
Gian Marco Iodice16824302017-09-28 15:41:37 +01001221
1222 // Get power of 2 in case of l2 pooling
1223 if(pooling_type == PoolingType::L2)
1224 {
1225 data *= data;
1226 }
1227
1228 res += data;
1229 }
1230 }
1231
1232#if defined(__aarch64__)
1233 // Reduction operation available on 64 bit architectures only
1234 res += vaddvq_f32(vres);
1235#else // __aarch64__
1236 // Reduction
1237 float32x2_t tmp = vpadd_f32(vget_high_f32(vres), vget_low_f32(vres));
1238 tmp = vpadd_f32(tmp, tmp);
1239
1240 res += vget_lane_f32(tmp, 0);
1241#endif // __aarch64__
1242 // Divide by scale
1243 res *= scale;
1244 }
1245 else
1246 {
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001247 float32x4_t vres = vdupq_n_f32(std::numeric_limits<float>::lowest());
1248 res = std::numeric_limits<float>::lowest();
Gian Marco Iodice16824302017-09-28 15:41:37 +01001249
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001250 for(int y = 0; y < pool_size_y; ++y)
Gian Marco Iodice16824302017-09-28 15:41:37 +01001251 {
1252 int x = 0;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001253 for(; x <= (pool_size_x - 4); x += 4)
Gian Marco Iodice16824302017-09-28 15:41:37 +01001254 {
Michalis Spyrou7c60c992019-10-10 14:33:47 +01001255 const float32x4_t data = vld1q_f32(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_left) * static_cast<int>(_input->info()->strides_in_bytes().x()) + (y - pool_pad_top) * static_cast<int>
1256 (_input->info()->strides_in_bytes().y())));
Gian Marco Iodice16824302017-09-28 15:41:37 +01001257 vres = vmaxq_f32(vres, data);
1258 }
1259
1260 // Leftover for loop
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001261 for(; x < pool_size_x; ++x)
Gian Marco Iodice16824302017-09-28 15:41:37 +01001262 {
Michalis Spyrou7c60c992019-10-10 14:33:47 +01001263 const float data = *(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_left) * static_cast<int>(_input->info()->strides_in_bytes().x()) + (y - pool_pad_top) * static_cast<int>
1264 (_input->info()->strides_in_bytes().y())));
Gian Marco Iodice16824302017-09-28 15:41:37 +01001265 res = std::max(res, data);
1266 }
1267 }
1268
1269#if defined(__aarch64__)
1270 // Reduction operation available on 64 bit architectures only
1271 res = std::max(vmaxvq_f32(vres), res);
1272#else // __aarch64__
1273 float32x2_t tmp = vpmax_f32(vget_high_f32(vres), vget_low_f32(vres));
1274 tmp = vpmax_f32(tmp, tmp);
1275
1276 res = std::max(res, vget_lane_f32(tmp, 0));
1277#endif // __aarch64__
1278 }
1279
1280 // Calculate square-root in case of l2 pooling
1281 if(pooling_type == PoolingType::L2)
1282 {
1283 res = std::sqrt(res);
1284 }
1285
1286 // Store result
1287 *(reinterpret_cast<float *>(output.ptr())) = res;
1288 },
1289 input, output);
1290}
1291
Pablo Tello77e6c552018-12-04 15:33:49 +00001292void NEPoolingLayerKernel::pooling2_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding)
1293{
1294 Iterator input(_input, window_input);
1295 Iterator output(_output, window);
1296
1297 constexpr int pool_size = 2;
1298 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1299 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1300 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1301 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1302 int pool_stride_x = 0;
1303 int pool_stride_y = 0;
1304 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
1305 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
1306 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
1307
1308 const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
1309 const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
1310
1311 execute_window_loop(window, [&](const Coordinates & id)
1312 {
1313 float32x2_t top_data = vld1_f32(reinterpret_cast<const float *>(input_top_ptr + input.offset()));
1314 float32x2_t bottom_data = vld1_f32(reinterpret_cast<const float *>(input_bottom_ptr + input.offset()));
1315 float32x2_t res = {};
1316 float final_res = 0;
1317
1318 // Get power of 2 in case of l2 pooling
1319 if(pooling_type == PoolingType::L2)
1320 {
1321 top_data = vmul_f32(top_data, top_data);
1322 bottom_data = vmul_f32(bottom_data, bottom_data);
1323 }
1324
1325 if(pooling_type != PoolingType::MAX)
1326 {
1327 // Calculate scale
1328 float scale = calculate_avg_scale(exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
1329 const float32x2_t scale_v = vdup_n_f32(scale);
1330
1331 // Perform pooling
1332 const float32x2_t sum_data = vadd_f32(top_data, bottom_data);
1333 res = vmul_f32(vpadd_f32(sum_data, sum_data), scale_v);
1334 }
1335 else
1336 {
1337 const float32x2_t max_data = vmax_f32(top_data, bottom_data);
1338 res = vpmax_f32(max_data, max_data);
1339 }
1340 final_res = vget_lane_f32(res, 0);
1341
1342 // Calculate square-root in case of l2 pooling
1343 if(pooling_type == PoolingType::L2)
1344 {
1345 final_res = sqrt(final_res);
1346 }
1347
1348 // Store result
1349 *(reinterpret_cast<float *>(output.ptr())) = final_res;
1350 },
1351 input, output);
1352}
1353
1354void NEPoolingLayerKernel::pooling3_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding)
1355{
1356 Iterator input(_input, window_input);
1357 Iterator output(_output, window);
1358
1359 constexpr const int pool_size = 3;
1360 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1361 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1362 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1363 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1364 int pool_stride_x = 0;
1365 int pool_stride_y = 0;
1366 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
1367 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
1368 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
1369
1370 const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
1371 const uint8_t *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
1372 const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 2));
1373
1374 execute_window_loop(window, [&](const Coordinates & id)
1375 {
1376 float32x4_t top_data = vld1q_f32(reinterpret_cast<const float *>(input_top_ptr + input.offset()));
1377 float32x4_t middle_data = vld1q_f32(reinterpret_cast<const float *>(input_middle_ptr + input.offset()));
1378 float32x4_t bottom_data = vld1q_f32(reinterpret_cast<const float *>(input_bottom_ptr + input.offset()));
1379 float32x2_t res = {};
1380 float final_res = 0;
1381
1382 // Get power of 2 in case of l2 pooling
1383 if(pooling_type == PoolingType::L2)
1384 {
1385 top_data = vmulq_f32(top_data, top_data);
1386 middle_data = vmulq_f32(middle_data, middle_data);
1387 bottom_data = vmulq_f32(bottom_data, bottom_data);
1388 }
1389
1390 if(pooling_type != PoolingType::MAX)
1391 {
1392 // Calculate scale
1393 float scale = calculate_avg_scale(exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
1394 const float32x2_t scale_v = vdup_n_f32(scale);
1395
1396 // Perform pooling
1397 const float32x4_t sum_data = vaddq_f32(vaddq_f32(top_data, bottom_data), middle_data);
1398 res = vpadd_f32(vget_high_f32(vsetq_lane_f32(0.f, sum_data, 3)), vget_low_f32(sum_data));
1399 res = vmul_f32(vpadd_f32(res, res), scale_v);
1400 }
1401 else
1402 {
1403 const float32x4_t max_data = vmaxq_f32(vmaxq_f32(top_data, bottom_data), middle_data);
1404 res = vpmax_f32(vget_high_f32(vsetq_lane_f32(-std::numeric_limits<float>::max(), max_data, 3)), vget_low_f32(max_data));
1405 res = vpmax_f32(res, res);
1406 }
1407 final_res = vget_lane_f32(res, 0);
1408
1409 // Calculate square-root in case of l2 pooling
1410 if(pooling_type == PoolingType::L2)
1411 {
1412 final_res = sqrt(final_res);
1413 }
1414
1415 // Store result
1416 *(reinterpret_cast<float *>(output.ptr())) = final_res;
1417 },
1418 input, output);
1419}
1420
1421void NEPoolingLayerKernel::pooling7_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding)
1422{
1423 Iterator input(_input, window_input);
1424 Iterator output(_output, window);
1425
1426 constexpr const int pool_size = 7;
1427 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1428 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1429 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1430 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1431 int pool_stride_x = 0;
1432 int pool_stride_y = 0;
1433 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
1434 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
1435 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
1436
1437 std::array<const uint8_t *, pool_size> input_ptrs{ {} };
1438 for(int i = 0; i < pool_size; ++i)
1439 {
1440 input_ptrs[i] = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + i));
1441 }
1442
1443 execute_window_loop(window, [&](const Coordinates & id)
1444 {
1445 float32x2_t res = {};
1446 float final_res = 0.f;
1447 if(pooling_type != PoolingType::MAX)
1448 {
1449 // Calculate scale
1450 float scale = calculate_avg_scale(exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
1451 const float32x2_t scale_v = vdup_n_f32(scale);
1452
1453 // Perform pooling
1454 float32x4x2_t data = vld2q_f32(reinterpret_cast<const float *>(input_ptrs[0] + input.offset()));
1455 // Get power of 2 in case of l2 pooling
1456 if(pooling_type == PoolingType::L2)
1457 {
1458 data.val[0] = vmulq_f32(data.val[0], data.val[0]);
1459 data.val[1] = vmulq_f32(data.val[1], data.val[1]);
1460 }
1461 float32x4_t sum_data = vaddq_f32(data.val[0], vsetq_lane_f32(0.f, data.val[1], 3));
1462 for(int i = 1; i < pool_size; ++i)
1463 {
1464 data = vld2q_f32(reinterpret_cast<const float *>(input_ptrs[i] + input.offset()));
1465 // Get power of 2 in case of l2 pooling
1466 if(pooling_type == PoolingType::L2)
1467 {
1468 data.val[0] = vmulq_f32(data.val[0], data.val[0]);
1469 data.val[1] = vmulq_f32(data.val[1], data.val[1]);
1470 }
1471 sum_data = vaddq_f32(sum_data, data.val[0]);
1472 sum_data = vaddq_f32(sum_data, vsetq_lane_f32(0.f, data.val[1], 3));
1473 }
1474 res = vpadd_f32(vget_high_f32(sum_data), vget_low_f32(sum_data));
1475 res = vmul_f32(vpadd_f32(res, res), scale_v);
1476 }
1477 else
1478 {
1479 float32x4x2_t max_data = vld2q_f32(reinterpret_cast<const float *>(input_ptrs[0] + input.offset()));
1480 for(int i = 1; i < pool_size; ++i)
1481 {
1482 const float32x4x2_t data = vld2q_f32(reinterpret_cast<const float *>(input_ptrs[i] + input.offset()));
1483 max_data = vmax2q_f32(max_data, data);
1484 }
1485 res = vpmax_f32(vget_high_f32(vsetq_lane_f32(-std::numeric_limits<float>::max(), max_data.val[1], 3)), vget_low_f32(max_data.val[1]));
1486 res = vpmax_f32(res, vpmax_f32(vget_high_f32(max_data.val[0]), vget_low_f32(max_data.val[0])));
1487 res = vpmax_f32(res, res);
1488 }
1489 final_res = vget_lane_f32(res, 0);
1490
1491 // Calculate square-root in case of l2 pooling
1492 if(pooling_type == PoolingType::L2)
1493 {
1494 final_res = sqrt(final_res);
1495 }
1496
1497 // Store result
1498 *(reinterpret_cast<float *>(output.ptr())) = final_res;
1499 },
1500 input, output);
1501}
1502
1503void NEPoolingLayerKernel::poolingMxN_f32_nhwc(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding)
Michalis Spyrou57dac842018-03-01 16:03:50 +00001504{
1505 Iterator input(_input, window_input);
1506 Iterator output(_output, window);
1507
1508 const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().width;
1509 const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().z() : _pool_info.pool_size().height;
1510 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1511 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1512 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1513 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1514 int pool_stride_x = 0;
1515 int pool_stride_y = 0;
1516 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
1517 const int upper_bound_w = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_right);
1518 const int upper_bound_h = _input->info()->dimension(2) + (exclude_padding ? 0 : pool_pad_bottom);
1519
1520 float32x4_t vres;
1521
1522 execute_window_loop(window, [&](const Coordinates & id)
1523 {
Michalis Spyrouced25572018-10-01 16:26:20 +01001524 const int idx_width = id.y() * pool_stride_x;
1525 const int idx_height = id.z() * pool_stride_y;
1526 const int pool_limit_y = pool_pad_top - idx_height;
1527 const int pool_limit_x = pool_pad_left - idx_width;
1528
1529 const int pool_start_y = std::max(0, window_input.z().start() + pool_limit_y);
1530 const int pool_end_y = std::min(pool_size_y, window_input.z().end() + pool_limit_y);
1531 const int pool_start_x = std::max(0, window_input.y().start() + pool_limit_x);
1532 const int pool_end_x = std::min(pool_size_x, window_input.y().end() + pool_limit_x);
1533
Michalis Spyrou57dac842018-03-01 16:03:50 +00001534 if(pooling_type != PoolingType::MAX)
1535 {
1536 // Calculate scale
Pablo Tello77e6c552018-12-04 15:33:49 +00001537 const float scale = calculate_avg_scale(exclude_padding, DataLayout::NHWC, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
1538 pool_stride_y);
Michalis Spyrou57dac842018-03-01 16:03:50 +00001539 const float32x4_t scale_v = vdupq_n_f32(scale);
1540
1541 // Perform pooling
1542 vres = vdupq_n_f32(0.0f);
1543
Michalis Spyrouced25572018-10-01 16:26:20 +01001544 for(int y = pool_start_y; y < pool_end_y; ++y)
Michalis Spyrou57dac842018-03-01 16:03:50 +00001545 {
Michalis Spyrouced25572018-10-01 16:26:20 +01001546 for(int x = pool_start_x; x < pool_end_x; ++x)
Michalis Spyrou57dac842018-03-01 16:03:50 +00001547 {
Michalis Spyrou7c60c992019-10-10 14:33:47 +01001548 const float32x4_t data = vld1q_f32(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_left) * static_cast<int>(_input->info()->strides_in_bytes().y()) + (y - pool_pad_top) * static_cast<int>
1549 (_input->info()->strides_in_bytes().z())));
Michalis Spyrou57dac842018-03-01 16:03:50 +00001550
1551 // Get power of 2 in case of l2 pooling and accumulate
1552 if(pooling_type == PoolingType::L2)
1553 {
1554 vres = vmlaq_f32(vres, data, data);
1555 }
1556 else
1557 {
1558 vres = vaddq_f32(vres, data);
1559 }
1560 }
1561 }
1562 // Divide by scale
1563 vres = vmulq_f32(vres, scale_v);
1564 }
1565 else
1566 {
1567 vres = vdupq_n_f32(std::numeric_limits<float>::lowest());
Michalis Spyrouced25572018-10-01 16:26:20 +01001568 for(int y = pool_start_y; y < pool_end_y; ++y)
Michalis Spyrou57dac842018-03-01 16:03:50 +00001569 {
Michalis Spyrouced25572018-10-01 16:26:20 +01001570 for(int x = pool_start_x; x < pool_end_x; ++x)
Michalis Spyrou57dac842018-03-01 16:03:50 +00001571 {
Michalis Spyrou7c60c992019-10-10 14:33:47 +01001572 const float32x4_t data = vld1q_f32(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_left) * static_cast<int>(_input->info()->strides_in_bytes().y()) + (y - pool_pad_top) * static_cast<int>
1573 (_input->info()->strides_in_bytes().z())));
Michalis Spyrou57dac842018-03-01 16:03:50 +00001574 vres = vmaxq_f32(vres, data);
1575 }
1576 }
1577 }
1578
1579 // Calculate square-root in case of l2 pooling
1580 if(pooling_type == PoolingType::L2)
1581 {
Georgios Pinitas27f223d2019-12-16 19:23:02 +00001582 float32x4_t l2_res = { static_cast<float>(sqrt(vgetq_lane_f32(vres, 0))),
1583 static_cast<float>(sqrt(vgetq_lane_f32(vres, 1))),
1584 static_cast<float>(sqrt(vgetq_lane_f32(vres, 2))),
1585 static_cast<float>(sqrt(vgetq_lane_f32(vres, 3)))
1586 };
1587 vres = l2_res;
Michalis Spyrou57dac842018-03-01 16:03:50 +00001588 }
1589
1590 // Store result
1591 vst1q_f32(reinterpret_cast<float *>(output.ptr()), vres);
1592 },
1593 input, output);
1594}
1595
Pablo Tello77e6c552018-12-04 15:33:49 +00001596void NEPoolingLayerKernel::poolingMxN_qasymm8_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding)
Georgios Pinitas55186712018-01-08 17:37:12 +00001597{
1598 Iterator input(_input, window_input);
1599 Iterator output(_output, window);
1600
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001601 const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().x() : _pool_info.pool_size().width;
1602 const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().height;
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001603 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1604 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1605 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1606 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1607 int pool_stride_x = 0;
1608 int pool_stride_y = 0;
Georgios Pinitas55186712018-01-08 17:37:12 +00001609 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001610 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
1611 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Georgios Pinitas55186712018-01-08 17:37:12 +00001612
Georgios Pinitas4c5469b2019-05-21 13:32:43 +01001613 const UniformQuantizationInfo &input_qinfo = _input->info()->quantization_info().uniform();
1614 const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform();
1615
Georgios Pinitas55186712018-01-08 17:37:12 +00001616 execute_window_loop(window, [&](const Coordinates & id)
1617 {
1618 uint8_t res = 0;
1619
1620 if(pooling_type != PoolingType::MAX)
1621 {
1622 uint32x4_t vres = vdupq_n_u32(0);
1623 uint32_t sres = 0;
1624
1625 // Calculate scale
Pablo Tello77e6c552018-12-04 15:33:49 +00001626 const float scale = calculate_avg_scale(exclude_padding, DataLayout::NCHW, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Georgios Pinitas55186712018-01-08 17:37:12 +00001627
1628 // Perform pooling
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001629 for(int y = 0; y < pool_size_y; ++y)
Georgios Pinitas55186712018-01-08 17:37:12 +00001630 {
1631 int x = 0;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001632 for(; x <= (pool_size_x - 8); x += 8)
Georgios Pinitas55186712018-01-08 17:37:12 +00001633 {
Michalis Spyrou7c60c992019-10-10 14:33:47 +01001634 const uint8x8_t data = vld1_u8(reinterpret_cast<const uint8_t *>(input.ptr() + (x - pool_pad_left) * static_cast<int>(_input->info()->strides_in_bytes().x()) + (y - pool_pad_top) * static_cast<int>
1635 (_input->info()->strides_in_bytes().y())));
Georgios Pinitas55186712018-01-08 17:37:12 +00001636
1637 const uint16x8_t data_u16 = vmovl_u8(data);
1638 vres = vaddq_u32(vres, vaddl_u16(vget_high_u16(data_u16), vget_low_u16(data_u16)));
1639 }
1640
1641 // Leftover for loop
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001642 for(; x < pool_size_x; ++x)
Georgios Pinitas55186712018-01-08 17:37:12 +00001643 {
Michalis Spyrou7c60c992019-10-10 14:33:47 +01001644 uint8_t data = *(reinterpret_cast<const uint8_t *>(input.ptr() + (x - pool_pad_left) * static_cast<int>(_input->info()->strides_in_bytes().x()) + (y - pool_pad_top) * static_cast<int>
1645 (_input->info()->strides_in_bytes().y())));
Georgios Pinitas55186712018-01-08 17:37:12 +00001646 sres += data;
1647 }
1648 }
1649
1650 // Reduction
1651 const auto tmp = vpadd_u32(vget_high_u32(vres), vget_low_u32(vres));
1652 sres += vget_lane_u32(tmp, 0) + vget_lane_u32(tmp, 1);
1653
1654 // Divide by scale
1655 res = static_cast<uint8_t>(support::cpp11::round(sres * scale));
1656 }
1657 else
1658 {
1659 uint8x8_t vres = vdup_n_u8(0);
1660 res = 0;
1661
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001662 for(int y = 0; y < pool_size_y; ++y)
Georgios Pinitas55186712018-01-08 17:37:12 +00001663 {
1664 int x = 0;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001665 for(; x <= (pool_size_x - 8); x += 8)
Georgios Pinitas55186712018-01-08 17:37:12 +00001666 {
Michalis Spyrou7c60c992019-10-10 14:33:47 +01001667 const uint8x8_t data = vld1_u8(reinterpret_cast<const uint8_t *>(input.ptr() + (x - pool_pad_left) * static_cast<int>(_input->info()->strides_in_bytes().x()) + (y - pool_pad_top) * static_cast<int>
1668 (_input->info()->strides_in_bytes().y())));
Georgios Pinitas55186712018-01-08 17:37:12 +00001669 vres = vmax_u8(vres, data);
1670 }
1671
1672 // Leftover for loop
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001673 for(; x < pool_size_x; ++x)
Georgios Pinitas55186712018-01-08 17:37:12 +00001674 {
Michalis Spyrou7c60c992019-10-10 14:33:47 +01001675 const uint8_t data = *(reinterpret_cast<const uint8_t *>(input.ptr() + (x - pool_pad_left) * static_cast<int>(_input->info()->strides_in_bytes().x()) + (y - pool_pad_top) * static_cast<int>
1676 (_input->info()->strides_in_bytes().y())));
Georgios Pinitas55186712018-01-08 17:37:12 +00001677 res = std::max(res, data);
1678 }
1679 }
1680
1681 // Reduce max
1682 vres = vpmax_u8(vres, vres);
1683 vres = vpmax_u8(vres, vres);
1684 vres = vpmax_u8(vres, vres);
1685
1686 // Get max value
1687 res = std::max(res, vget_lane_u8(vres, 0));
1688 }
1689
1690 // Store result
Georgios Pinitas4c5469b2019-05-21 13:32:43 +01001691 res = (input_qinfo != output_qinfo) ? quantize_qasymm8(dequantize_qasymm8(res, input_qinfo), output_qinfo) : res;
Georgios Pinitas55186712018-01-08 17:37:12 +00001692 *(reinterpret_cast<uint8_t *>(output.ptr())) = res;
1693 },
1694 input, output);
1695}
1696
Pablo Tello77e6c552018-12-04 15:33:49 +00001697void NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding)
Michalis Spyrou57dac842018-03-01 16:03:50 +00001698{
1699 Iterator input(_input, window_input);
1700 Iterator output(_output, window);
1701
1702 const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().width;
1703 const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().z() : _pool_info.pool_size().height;
1704 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1705 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1706 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1707 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1708 int pool_stride_x = 0;
1709 int pool_stride_y = 0;
1710 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
1711 const int upper_bound_w = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_right);
1712 const int upper_bound_h = _input->info()->dimension(2) + (exclude_padding ? 0 : pool_pad_bottom);
1713
Georgios Pinitas4c5469b2019-05-21 13:32:43 +01001714 const float32x4_t half_scale_v = vdupq_n_f32(0.5f);
1715 const UniformQuantizationInfo input_qinfo = _input->info()->quantization_info().uniform();
1716 const UniformQuantizationInfo output_qinfo = _output->info()->quantization_info().uniform();
Georgios Pinitas283fc602018-11-09 10:46:43 +00001717
Michalis Spyrou57dac842018-03-01 16:03:50 +00001718 execute_window_loop(window, [&](const Coordinates & id)
1719 {
Michalis Spyrouced25572018-10-01 16:26:20 +01001720 const int idx_width = id.y() * pool_stride_x;
1721 const int idx_height = id.z() * pool_stride_y;
1722 const int pool_limit_y = pool_pad_top - idx_height;
1723 const int pool_limit_x = pool_pad_left - idx_width;
1724
1725 const int pool_start_y = std::max(0, window_input.z().start() + pool_limit_y);
1726 const int pool_end_y = std::min(pool_size_y, window_input.z().end() + pool_limit_y);
1727 const int pool_start_x = std::max(0, window_input.y().start() + pool_limit_x);
1728 const int pool_end_x = std::min(pool_size_x, window_input.y().end() + pool_limit_x);
1729
Michalis Spyrou57dac842018-03-01 16:03:50 +00001730 if(pooling_type != PoolingType::MAX)
1731 {
1732 uint32x4_t vres1 = vdupq_n_u32(0);
1733 uint32x4_t vres2 = vdupq_n_u32(0);
Michalis Spyrouced25572018-10-01 16:26:20 +01001734 uint32x4_t vres3 = vdupq_n_u32(0);
1735 uint32x4_t vres4 = vdupq_n_u32(0);
Michalis Spyrou57dac842018-03-01 16:03:50 +00001736
1737 // Calculate scale
Pablo Tello77e6c552018-12-04 15:33:49 +00001738 const float scale = calculate_avg_scale(exclude_padding, DataLayout::NHWC, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
1739 pool_stride_y);
Michalis Spyrou57dac842018-03-01 16:03:50 +00001740 const float32x4_t scale_v = vdupq_n_f32(scale);
1741
1742 // Perform pooling
Michalis Spyrouced25572018-10-01 16:26:20 +01001743 for(int y = pool_start_y; y < pool_end_y; ++y)
Michalis Spyrou57dac842018-03-01 16:03:50 +00001744 {
Michalis Spyrouced25572018-10-01 16:26:20 +01001745 for(int x = pool_start_x; x < pool_end_x; ++x)
Michalis Spyrou57dac842018-03-01 16:03:50 +00001746 {
Michalis Spyrou7c60c992019-10-10 14:33:47 +01001747 const uint8x16_t data = vld1q_u8(reinterpret_cast<const uint8_t *>(input.ptr() + (x - pool_pad_left) * static_cast<int>(_input->info()->strides_in_bytes().y()) + (y - pool_pad_top) * static_cast<int>
1748 (_input->info()->strides_in_bytes().z())));
Michalis Spyrou57dac842018-03-01 16:03:50 +00001749
Michalis Spyrouced25572018-10-01 16:26:20 +01001750 const uint16x8_t data_u16 = vmovl_u8(vget_low_u8(data));
1751 const uint16x8_t data2_u16 = vmovl_u8(vget_high_u8(data));
1752 vres1 = vaddq_u32(vres1, vmovl_u16(vget_low_u16(data_u16)));
1753 vres2 = vaddq_u32(vres2, vmovl_u16(vget_high_u16(data_u16)));
1754 vres3 = vaddq_u32(vres3, vmovl_u16(vget_low_u16(data2_u16)));
1755 vres4 = vaddq_u32(vres4, vmovl_u16(vget_high_u16(data2_u16)));
Michalis Spyrou57dac842018-03-01 16:03:50 +00001756 }
1757 }
Georgios Pinitas283fc602018-11-09 10:46:43 +00001758 // Divide by scale and add 0.5f to round to nearest instead of rounding towards zero
1759 vres1 = vcvtq_u32_f32(vmlaq_f32(half_scale_v, vcvtq_f32_u32(vres1), scale_v));
1760 vres2 = vcvtq_u32_f32(vmlaq_f32(half_scale_v, vcvtq_f32_u32(vres2), scale_v));
1761 vres3 = vcvtq_u32_f32(vmlaq_f32(half_scale_v, vcvtq_f32_u32(vres3), scale_v));
1762 vres4 = vcvtq_u32_f32(vmlaq_f32(half_scale_v, vcvtq_f32_u32(vres4), scale_v));
Michalis Spyrou57dac842018-03-01 16:03:50 +00001763
Michalis Spyrouced25572018-10-01 16:26:20 +01001764 uint8x8_t res1 = vmovn_u16(vcombine_u16(vmovn_u32(vres1), vmovn_u32(vres2)));
1765 uint8x8_t res2 = vmovn_u16(vcombine_u16(vmovn_u32(vres3), vmovn_u32(vres4)));
Pablo Telloa52e4cf2019-04-01 14:55:18 +01001766 if(input_qinfo != output_qinfo)
1767 {
1768 const auto requantized_output = vquantize(vdequantize(vcombine_u8(res1, res2), input_qinfo), output_qinfo);
1769 res1 = vget_low_u8(requantized_output);
1770 res2 = vget_high_u8(requantized_output);
1771 }
Michalis Spyrou57dac842018-03-01 16:03:50 +00001772
1773 // Store result
Michalis Spyrouced25572018-10-01 16:26:20 +01001774 vst1_u8(output.ptr(), res1);
1775 vst1_u8(output.ptr() + 8, res2);
Michalis Spyrou57dac842018-03-01 16:03:50 +00001776 }
1777 else
1778 {
Michalis Spyrouced25572018-10-01 16:26:20 +01001779 uint8x16_t vres = vdupq_n_u8(0);
Michalis Spyrou57dac842018-03-01 16:03:50 +00001780
Michalis Spyrouced25572018-10-01 16:26:20 +01001781 for(int y = pool_start_y; y < pool_end_y; ++y)
Michalis Spyrou57dac842018-03-01 16:03:50 +00001782 {
Michalis Spyrouced25572018-10-01 16:26:20 +01001783 for(int x = pool_start_x; x < pool_end_x; ++x)
Michalis Spyrou57dac842018-03-01 16:03:50 +00001784 {
Michalis Spyrou7c60c992019-10-10 14:33:47 +01001785 const uint8x16_t data = vld1q_u8(reinterpret_cast<const uint8_t *>(input.ptr() + (x - pool_pad_left) * static_cast<int>(_input->info()->strides_in_bytes().y()) + (y - pool_pad_top) * static_cast<int>
1786 (_input->info()->strides_in_bytes().z())));
Michalis Spyrouced25572018-10-01 16:26:20 +01001787 vres = vmaxq_u8(vres, data);
Michalis Spyrou57dac842018-03-01 16:03:50 +00001788 }
1789 }
1790
1791 // Store result
Pablo Telloa52e4cf2019-04-01 14:55:18 +01001792 vst1q_u8(output.ptr(), (input_qinfo != output_qinfo) ? vquantize(vdequantize(vres, input_qinfo), output_qinfo) : vres);
Michalis Spyrou57dac842018-03-01 16:03:50 +00001793 }
1794 },
1795 input, output);
1796}
1797
Michalis Spyrouafa5d812017-11-30 14:25:57 +00001798Status NEPoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
1799{
1800 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
1801
1802 unsigned int pooled_w = 0;
1803 unsigned int pooled_h = 0;
1804 unsigned int num_elems_processed_per_iteration = 0;
1805 BorderSize border_size(0);
1806
Michalis Spyrou57dac842018-03-01 16:03:50 +00001807 const bool is_global_pooling = pool_info.is_global_pooling();
1808 unsigned int pool_size_x = 0;
1809 unsigned int pool_size_y = 0;
1810
1811 // Get data layout
1812 const DataLayout data_layout = input->data_layout();
1813 const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
1814 const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
1815
1816 pool_size_x = is_global_pooling ? input->dimension(idx_width) : pool_info.pool_size().width;
1817 pool_size_y = is_global_pooling ? input->dimension(idx_height) : pool_info.pool_size().height;
Michalis Spyrouafa5d812017-11-30 14:25:57 +00001818
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001819 // Validate pool info before calling scaled_dimensions
1820 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_pool_info(pool_size_x, pool_size_y));
Michalis Spyrouafa5d812017-11-30 14:25:57 +00001821
1822 // Check output dimensions
Michalis Spyrou57dac842018-03-01 16:03:50 +00001823 std::tie(pooled_w, pooled_h) = scaled_dimensions(input->dimension(idx_width),
1824 input->dimension(idx_height),
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001825 pool_size_x,
1826 pool_size_y,
Michalis Spyrouafa5d812017-11-30 14:25:57 +00001827 pool_info.pad_stride_info());
1828
Georgios Pinitas13d96e02018-08-23 11:20:23 +01001829 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, pool_info, pooled_w, pooled_h));
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001830 ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), pool_info, num_elems_processed_per_iteration, border_size, pooled_w, pooled_h,
1831 pool_size_x, pool_size_y)
1832 .first);
Michalis Spyrouafa5d812017-11-30 14:25:57 +00001833
1834 return Status{};
1835}
1836
Moritz Pflanzerc186b572017-09-07 09:48:04 +01001837void NEPoolingLayerKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001838{
Moritz Pflanzerc186b572017-09-07 09:48:04 +01001839 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001840 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
1841 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
1842 ARM_COMPUTE_ERROR_ON(_func == nullptr);
1843
Pablo Tello77e6c552018-12-04 15:33:49 +00001844 const unsigned int pool_stride_x = _pool_info.pad_stride_info().stride().first;
1845 const unsigned int pool_stride_y = _pool_info.pad_stride_info().stride().second;
1846 const unsigned int pool_size = _pool_info.pool_size().width;
1847 const bool exclude_padding = _pool_info.exclude_padding();
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001848
Michalis Spyrou57dac842018-03-01 16:03:50 +00001849 Window window_input(window);
Georgios Pinitas14d9d982019-12-13 12:33:09 +00001850 if(_data_layout == DataLayout::NCHW)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001851 {
Michalis Spyrou57dac842018-03-01 16:03:50 +00001852 // Set step for input in x and y direction for the input
1853 unsigned int window_x_inc = 0;
1854 switch(_input->info()->data_type())
Pablo Tello0c34fe22017-06-26 17:17:42 +01001855 {
Michalis Spyrou57dac842018-03-01 16:03:50 +00001856 case DataType::QASYMM8:
1857 {
1858 window_x_inc = pool_stride_x;
1859 if((pool_size == 2 || pool_size == 3) && pool_stride_x < 3)
1860 {
1861 window_x_inc = (pool_stride_x == 2) ? _num_elems_processed_per_iteration * 2 : _num_elems_processed_per_iteration;
1862 }
1863 break;
1864 }
Pablo Tello77e6c552018-12-04 15:33:49 +00001865
Georgios Pinitas13d96e02018-08-23 11:20:23 +01001866 case DataType::F16:
Michalis Spyrou57dac842018-03-01 16:03:50 +00001867 case DataType::F32:
1868 {
1869 window_x_inc = pool_stride_x;
1870 break;
1871 }
1872 default:
1873 {
1874 ARM_COMPUTE_ERROR("Not supported");
1875 }
Georgios Pinitas55186712018-01-08 17:37:12 +00001876 }
Michalis Spyrou57dac842018-03-01 16:03:50 +00001877 window_input.set(Window::DimX, Window::Dimension(window.x().start() * pool_stride_x, window.x().end() * pool_stride_x, window_x_inc));
1878 window_input.set(Window::DimY, Window::Dimension(window.y().start() * pool_stride_y, window.y().end() * pool_stride_y, pool_stride_y));
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001879 }
Michalis Spyrou57dac842018-03-01 16:03:50 +00001880 else
1881 {
Georgios Pinitascac13b12018-04-27 19:07:19 +01001882 window_input.set(Window::DimX, Window::Dimension(window.x().start(), window.x().end(), _num_elems_processed_per_iteration));
Michalis Spyrou57dac842018-03-01 16:03:50 +00001883 window_input.set(Window::DimY, Window::Dimension(0, _input->info()->dimension(1), pool_stride_x));
1884 window_input.set(Window::DimZ, Window::Dimension(0, _input->info()->dimension(2), pool_stride_y));
1885 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001886
1887 // Run function
Pablo Tello77e6c552018-12-04 15:33:49 +00001888 (this->*_func)(window_input, window, _pool_info.pool_type(), exclude_padding);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001889}