blob: ffb6d08993889ebfa1487212a902403c870332ab [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Georgios Pinitas55186712018-01-08 17:37:12 +00002 * Copyright (c) 2017-2018 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h"
25
26#include "arm_compute/core/AccessWindowStatic.h"
27#include "arm_compute/core/Error.h"
28#include "arm_compute/core/FixedPoint.h"
29#include "arm_compute/core/Helpers.h"
30#include "arm_compute/core/ITensor.h"
Georgios Pinitas55186712018-01-08 17:37:12 +000031#include "arm_compute/core/NEON/NEAsymm.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010032#include "arm_compute/core/NEON/NEFixedPoint.h"
Georgios Pinitascdf51452017-08-31 14:21:36 +010033#include "arm_compute/core/NEON/NEMath.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010034#include "arm_compute/core/TensorInfo.h"
35#include "arm_compute/core/Utils.h"
36#include "arm_compute/core/Validate.h"
37#include "arm_compute/core/Window.h"
38
Georgios Pinitas55186712018-01-08 17:37:12 +000039#include "support/ToolchainSupport.h"
40
Anthony Barbier6ff3b192017-09-04 18:44:23 +010041#include <algorithm>
42#include <arm_neon.h>
Georgios Pinitascdf51452017-08-31 14:21:36 +010043#include <cmath>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010044#include <limits>
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +010045#include <set>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010046#include <string>
47#include <tuple>
48
49using namespace arm_compute;
50
51namespace
52{
Michalis Spyrouafa5d812017-11-30 14:25:57 +000053void auto_init(const ITensorInfo *input, ITensorInfo *output, unsigned int pooled_w, unsigned int pooled_h)
54{
55 TensorShape output_shape{ input->tensor_shape() };
Michalis Spyrou57dac842018-03-01 16:03:50 +000056 output_shape.set(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH), pooled_w);
57 output_shape.set(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT), pooled_h);
Michalis Spyrouafa5d812017-11-30 14:25:57 +000058
59 auto_init_if_empty(*output, input->clone()->set_tensor_shape(output_shape));
60}
61
Michalis Spyrou57dac842018-03-01 16:03:50 +000062template <bool exclude_padding, DataLayout data_layout>
Isabella Gottardi7567f5f2018-01-30 15:26:00 +000063inline float calculate_avg_scale(const Coordinates &id, const int pool_size_x, const int pool_size_y, const int upper_bound_w, const int upper_bound_h,
Anthony Barbier6ff3b192017-09-04 18:44:23 +010064 const int pad_x, const int pad_y, const int stride_x, const int stride_y)
65{
Michalis Spyrou57dac842018-03-01 16:03:50 +000066 const unsigned int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
67 const unsigned int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
68
69 int start_x = id[idx_width] * stride_x - pad_x;
70 int start_y = id[idx_height] * stride_y - pad_y;
71
72 const int end_x = std::min(start_x + pool_size_x, upper_bound_w);
73 const int end_y = std::min(start_y + pool_size_y, upper_bound_h);
Georgios Pinitasadaae7e2017-10-30 15:56:32 +000074 if(exclude_padding)
75 {
76 start_x = std::max(0, start_x);
77 start_y = std::max(0, start_y);
78 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +010079 return 1.f / ((end_y - start_y) * (end_x - start_x));
80}
81
82inline qint8_t calculate_avg_scale_q8(const Coordinates &id, int pool_size, int upper_bound_w, int upper_bound_h,
83 int pad_x, int pad_y, int stride_x, int stride_y, int fixed_point_position)
84{
Pablo Tello0c34fe22017-06-26 17:17:42 +010085 static const std::array<qint8_t, 10> scale_values_q8 =
Anthony Barbier6ff3b192017-09-04 18:44:23 +010086 { { 0x0, 0x0, 0x40, 0x2A, 0x20, 0x19, 0x15, 0x12, 0x10, 0xE } };
87 const int start_x = id.x() * stride_x - pad_x;
88 const int start_y = id.y() * stride_y - pad_y;
89 const int end_x = std::min(start_x + pool_size, upper_bound_w);
90 const int end_y = std::min(start_y + pool_size, upper_bound_h);
91 const int val = ((end_y - start_y) * (end_x - start_x));
Michalis Spyroubbd9fb92017-06-22 12:57:51 +010092 return sshr_qs8(scale_values_q8[val], (7 - fixed_point_position));
93}
94
95inline qint16_t calculate_avg_scale_q16(const Coordinates &id, int pool_size, int upper_bound_w, int upper_bound_h,
96 int pad_x, int pad_y, int stride_x, int stride_y, int fixed_point_position)
97{
98 static std::array<qint16_t, 10> scale_values_q16 =
99 { { 0x0, 0x0, 0x4000, 0x2AAB, 0x2000, 0x199A, 0x1555, 0x1249, 0x1000, 0xE38 } };
100 const int start_x = id.x() * stride_x - pad_x;
101 const int start_y = id.y() * stride_y - pad_y;
102 const int end_x = std::min(start_x + pool_size, upper_bound_w);
103 const int end_y = std::min(start_y + pool_size, upper_bound_h);
104 const int val = ((end_y - start_y) * (end_x - start_x));
105 return sshr_qs16(scale_values_q16[val], (15 - fixed_point_position));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100106}
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100107
Georgios Pinitas55186712018-01-08 17:37:12 +0000108template <bool exclude_padding>
109inline void scale_vector_s16x8(uint16x8_t &v, const Coordinates &id, int id_offset, int step,
110 const int pool_size, const int upper_bound_w, const int upper_bound_h,
111 const int pad_x, const int pad_y, const int stride_x, const int stride_y)
112{
113 int start_x = (id.x() + id_offset) * stride_x - pad_x;
114 int start_y = id.y() * stride_y - pad_y;
115 const int end_y = std::min(start_y + pool_size, upper_bound_h);
116 if(exclude_padding)
117 {
118 start_y = std::max(0, start_y);
119 }
120
121 std::array<uint16_t, 8> elems =
122 {
123 {
124 vgetq_lane_u16(v, 0),
125 vgetq_lane_u16(v, 1),
126 vgetq_lane_u16(v, 2),
127 vgetq_lane_u16(v, 3),
128 vgetq_lane_u16(v, 4),
129 vgetq_lane_u16(v, 5),
130 vgetq_lane_u16(v, 6),
131 vgetq_lane_u16(v, 7),
132 }
133 };
134
135 for(auto &el : elems)
136 {
137 int c_start_x = start_x;
138 const int end_x = std::min(c_start_x + pool_size, upper_bound_w);
139 if(exclude_padding)
140 {
141 c_start_x = std::max(0, c_start_x);
142 }
143 float scale = 1.f / ((end_y - start_y) * (end_x - c_start_x));
144 el *= scale;
145 start_x += step * stride_x;
146 }
147
148 v = vsetq_lane_u16(elems[0], v, 0);
149 v = vsetq_lane_u16(elems[1], v, 1);
150 v = vsetq_lane_u16(elems[2], v, 2);
151 v = vsetq_lane_u16(elems[3], v, 3);
152 v = vsetq_lane_u16(elems[4], v, 4);
153 v = vsetq_lane_u16(elems[5], v, 5);
154 v = vsetq_lane_u16(elems[6], v, 6);
155 v = vsetq_lane_u16(elems[7], v, 7);
156}
157
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000158Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, unsigned int &pooled_w, unsigned int pooled_h, int pool_size_x)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100159{
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000160 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100161
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000162 int pool_stride_x = 0;
163 int pool_stride_y = 0;
164 PoolingType pool_type = pool_info.pool_type();
165 const PadStrideInfo pad_stride_info = pool_info.pad_stride_info();
166 const bool exclude_padding = pool_info.exclude_padding();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100167 std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
Gian Marco Iodice16824302017-09-28 15:41:37 +0100168 static const std::set<int> supported_pool_sizes = { 2, 3 };
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100169
Georgios Pinitas55186712018-01-08 17:37:12 +0000170 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32);
171 ARM_COMPUTE_RETURN_ERROR_ON(pool_type == PoolingType::L2 && is_data_type_quantized(input->data_type()));
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000172
173 ARM_COMPUTE_RETURN_ERROR_ON((supported_pool_sizes.find(pool_size_x) == supported_pool_sizes.end()) && ((input->data_type() != DataType::F32) && (input->data_type() != DataType::QASYMM8))
174 && (pool_type != PoolingType::MAX));
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000175 ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_fixed_point(input->data_type()) && pool_stride_x > 2);
176 ARM_COMPUTE_RETURN_ERROR_ON(exclude_padding && is_data_type_fixed_point(input->data_type()));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100177
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000178 if(output->total_size() != 0)
Georgios Pinitas1dad50e2017-07-03 17:51:34 +0100179 {
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000180 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
181 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
Michalis Spyrou57dac842018-03-01 16:03:50 +0000182 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
183 ARM_COMPUTE_RETURN_ERROR_ON((output->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH)) != pooled_w)
184 || (output->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT)) != pooled_h));
Georgios Pinitas1dad50e2017-07-03 17:51:34 +0100185 }
186
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000187 return Status{};
188}
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100189
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000190Status validate_arguments_pool_info(const unsigned int pool_size_x, const unsigned int pool_size_y)
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000191{
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000192 ARM_COMPUTE_RETURN_ERROR_ON(pool_size_x == 0);
193 ARM_COMPUTE_RETURN_ERROR_ON(pool_size_y == 0);
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000194
195 return Status{};
196}
197
198std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const PoolingLayerInfo &pool_info, unsigned int &num_elems_processed_per_iteration,
199 BorderSize &border_size,
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000200 unsigned int pooled_w, unsigned int pooled_h, int pool_size_x, int pool_size_y)
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000201{
Michalis Spyrou57dac842018-03-01 16:03:50 +0000202 // Get data layout
203 DataLayout data_layout = input->data_layout();
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000204 unsigned int num_elems_read_per_iteration = 0;
205 unsigned int num_elems_horizontal_window = 0;
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000206 int pool_stride_x = 0;
207 int pool_stride_y = 0;
Michalis Spyrou57dac842018-03-01 16:03:50 +0000208 const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
209 const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
210 const int input_width = input->dimension(idx_width);
211 const int input_height = input->dimension(idx_height);
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000212 const PadStrideInfo pad_stride_info = pool_info.pad_stride_info();
213 std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000214 const int pool_pad_right = pad_stride_info.pad_right();
215 const int pool_pad_top = pad_stride_info.pad_top();
216 const int pool_pad_left = pad_stride_info.pad_left();
217 const int pool_pad_bottom = pad_stride_info.pad_bottom();
218 const bool is_square = pool_size_x == pool_size_y;
Michalis Spyrou57dac842018-03-01 16:03:50 +0000219
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000220 // Check output dimensions
Michalis Spyrou57dac842018-03-01 16:03:50 +0000221 std::tie(pooled_w, pooled_h) = scaled_dimensions(input->dimension(idx_width),
222 input->dimension(idx_height),
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000223 pool_size_x,
224 pool_size_y,
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000225 pad_stride_info);
Michalis Spyrou57dac842018-03-01 16:03:50 +0000226 auto_init(input, output, pooled_w, pooled_h);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100227
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000228 //If it's not squared and optimized will be executed the MxN
229 num_elems_read_per_iteration = 1;
230 num_elems_processed_per_iteration = 1;
231 num_elems_horizontal_window = 1;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100232
Michalis Spyrou57dac842018-03-01 16:03:50 +0000233 const bool is_nhwc = data_layout == DataLayout::NHWC;
234
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000235 if(is_square)
236 {
237 switch(input->data_type())
238 {
239 case DataType::QS8:
240 num_elems_read_per_iteration = 16;
241 switch(pool_size_x)
242 {
243 case 2:
244 num_elems_horizontal_window = (pool_stride_x == 2) ? 8 : 16;
245 num_elems_processed_per_iteration = (pool_stride_x == 2) ? 8 : 15;
246 break;
247 case 3:
248 num_elems_horizontal_window = (pool_stride_x == 2) ? 8 : 16;
249 num_elems_processed_per_iteration = (pool_stride_x == 2) ? 7 : 14;
250 break;
251 default:
252 break;
253 }
254 break;
255 case DataType::QASYMM8:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000256 if(is_nhwc)
257 {
258 num_elems_processed_per_iteration = 8;
259 break;
260 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000261 switch(pool_size_x)
262 {
263 case 2:
264 num_elems_read_per_iteration = 16;
265 num_elems_processed_per_iteration = (pool_stride_x == 2) ? 8 : 15;
266 num_elems_horizontal_window = (pool_stride_x == 2) ? 8 : 16;
267 break;
268 case 3:
269 num_elems_read_per_iteration = 16;
270 num_elems_processed_per_iteration = (pool_stride_x == 2) ? 7 : 14;
271 num_elems_horizontal_window = (pool_stride_x == 2) ? 8 : 16;
272 break;
273 default:
274 break;
275 }
276 break;
277 case DataType::QS16:
278 num_elems_read_per_iteration = 8;
279 switch(pool_size_x)
280 {
281 case 2:
282 num_elems_horizontal_window = (pool_stride_x == 2) ? 4 : 8;
283 num_elems_processed_per_iteration = (pool_stride_x == 2) ? 4 : 7;
284 break;
285 case 3:
286 num_elems_horizontal_window = (pool_stride_x == 2) ? 4 : 8;
287 num_elems_processed_per_iteration = (pool_stride_x == 2) ? 3 : 6;
288 break;
289 default:
290 break;
291 }
292 break;
293#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
294 case DataType::F16:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000295 if(is_nhwc)
296 {
297 num_elems_processed_per_iteration = 8;
298 break;
299 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000300 switch(pool_size_x)
301 {
302 case 2:
303 num_elems_read_per_iteration = 16;
304 num_elems_processed_per_iteration = 8;
305 num_elems_horizontal_window = 8;
306 break;
307 case 3:
308 num_elems_read_per_iteration = 4;
309 num_elems_processed_per_iteration = 1;
310 num_elems_horizontal_window = 1;
311 break;
312 default:
313 break;
314 }
315 break;
316#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
317 case DataType::F32:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000318 if(is_nhwc)
319 {
320 num_elems_processed_per_iteration = 4;
321 break;
322 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000323 switch(pool_size_x)
324 {
325 case 2:
326 num_elems_read_per_iteration = 2;
327 break;
328 case 3:
329 num_elems_read_per_iteration = 4; // We use vload4 for pooling3
330 break;
331 case 7:
332 num_elems_read_per_iteration = 8; // We use vload8 for pooling7
333 break;
334 default:
335 break;
336 }
337 num_elems_processed_per_iteration = 1;
338 num_elems_horizontal_window = 1;
339 break;
340 default:
341 ARM_COMPUTE_ERROR("Element size not supported");
342 break;
343 }
344 }
Michalis Spyrou57dac842018-03-01 16:03:50 +0000345 else
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000346 {
Michalis Spyrou57dac842018-03-01 16:03:50 +0000347 if(is_nhwc)
348 {
349 if(DataType::QASYMM8 == input->data_type())
350 {
351 num_elems_processed_per_iteration = 8;
352 }
353 else
354 {
355 num_elems_processed_per_iteration = 4;
356 }
357 }
358 }
359
360 bool window_changed = false;
361 Window win{};
362 if(data_layout == DataLayout::NCHW)
363 {
364 // Number of iterations in X dimension
365 const int num_iterations_x = (pooled_w + num_elems_processed_per_iteration - 1) / num_elems_processed_per_iteration;
366
367 // Upper limit for the number of right/bottom border elements that are accessed
368 const int upper_bound_w = ((num_iterations_x - 1) * num_elems_processed_per_iteration * pool_stride_x - pool_pad_left + num_elems_read_per_iteration) - input_width;
369 const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_top + pool_size_y) - input_height;
370
371 border_size = BorderSize(pool_pad_top, pool_pad_right, pool_pad_bottom, pool_pad_left);
372 border_size.right = std::max(upper_bound_w, pool_pad_right);
373 border_size.bottom = std::max(upper_bound_h, pool_pad_bottom);
374
375 TensorShape output_shape{ input->tensor_shape() };
376 output_shape.set(0, pooled_w);
377 output_shape.set(1, pooled_h);
378 TensorInfo output_info(input->clone()->set_tensor_shape(output_shape));
379
380 win = calculate_max_window(output_info, Steps(num_elems_processed_per_iteration));
381 AccessWindowStatic input_access(input, -pool_pad_left, -pool_pad_top, input_width + border_size.right, input_height + border_size.bottom);
382
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000383 AccessWindowHorizontal output_access(output, 0, num_elems_horizontal_window);
384 window_changed = update_window_and_padding(win, input_access, output_access);
385 output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
386 }
387 else
388 {
Michalis Spyrou57dac842018-03-01 16:03:50 +0000389 TensorShape output_shape{ input->tensor_shape() };
390 output_shape.set(1, pooled_w);
391 output_shape.set(2, pooled_h);
392 TensorInfo output_info(input->clone()->set_tensor_shape(output_shape));
393
394 win = calculate_max_window(output_info, Steps(num_elems_processed_per_iteration));
395 AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
396
397 AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
398 window_changed = update_window_and_padding(win, input_access, output_access);
399 output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000400 }
401
402 Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
403 return std::make_pair(err, win);
404}
405} // namespace
406
407NEPoolingLayerKernel::NEPoolingLayerKernel()
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000408 : _func(nullptr), _input(nullptr), _output(nullptr), _pool_info(), _num_elems_processed_per_iteration(0), _border_size(0), _is_square(false)
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000409{
410}
411
412BorderSize NEPoolingLayerKernel::border_size() const
413{
414 return _border_size;
415}
416
417void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info)
418{
419 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
420
Diego Lopez Recas61ef5bf2017-12-11 12:36:55 +0000421 const PoolingType pool_type = pool_info.pool_type();
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000422 const PadStrideInfo pad_stride_info = pool_info.pad_stride_info();
423 const bool exclude_padding = pool_info.exclude_padding();
424 const bool is_global_pooling = pool_info.is_global_pooling();
Diego Lopez Recas61ef5bf2017-12-11 12:36:55 +0000425 const int pool_stride_x = pad_stride_info.stride().first;
Michalis Spyrou57dac842018-03-01 16:03:50 +0000426 unsigned int pool_size_x = 0;
427 unsigned int pool_size_y = 0;
428
429 // Get data layout
430 const DataLayout data_layout = input->info()->data_layout();
431 const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
432 const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000433
434 // Update pool size in case of global pooling
Michalis Spyrou57dac842018-03-01 16:03:50 +0000435 pool_size_x = is_global_pooling ? input->info()->dimension(idx_width) : pool_info.pool_size().width;
436 pool_size_y = is_global_pooling ? input->info()->dimension(idx_height) : pool_info.pool_size().height;
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000437
438 // Validate pool info before calling scaled_dimensions
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000439 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_pool_info(pool_size_x, pool_size_y));
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000440
441 // Check output dimensions
Diego Lopez Recas61ef5bf2017-12-11 12:36:55 +0000442 unsigned int pooled_w, pooled_h;
Michalis Spyrou57dac842018-03-01 16:03:50 +0000443 std::tie(pooled_w, pooled_h) = scaled_dimensions(input->info()->dimension(idx_width),
444 input->info()->dimension(idx_height),
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000445 pool_size_x,
446 pool_size_y,
Diego Lopez Recas61ef5bf2017-12-11 12:36:55 +0000447 pad_stride_info);
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000448
449 // Output auto initialization if not yet initialized
450 auto_init(input->info(), output->info(), pooled_w, pooled_h);
451
452 // Perform validation step
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000453 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pool_info, pooled_w, pooled_h, pool_size_x));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100454
455 // Set instance variables
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000456 _input = input;
457 _output = output;
458 _pool_info = pool_info;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000459 _is_square = (pool_size_x == pool_size_y);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100460
Georgios Pinitas55186712018-01-08 17:37:12 +0000461 // Get data type
462 const DataType data_type = input->info()->data_type();
Michalis Spyrou57dac842018-03-01 16:03:50 +0000463 const bool is_nchw = data_layout == DataLayout::NCHW;
Georgios Pinitas55186712018-01-08 17:37:12 +0000464
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100465 // Select appropriate function
Georgios Pinitas55186712018-01-08 17:37:12 +0000466 if(data_type == DataType::QS8)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100467 {
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000468 if(_is_square)
Georgios Pinitas55186712018-01-08 17:37:12 +0000469 {
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000470 switch(pool_size_x)
471 {
472 case 2:
473 switch(pool_type)
474 {
475 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000476 _func = &NEPoolingLayerKernel::pooling2_q8_nchw<PoolingType::AVG>;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000477 break;
478 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000479 _func = &NEPoolingLayerKernel::pooling2_q8_nchw<PoolingType::MAX>;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000480 break;
481 default:
482 ARM_COMPUTE_ERROR("Unsupported pooling type!");
483 }
484 break;
485 case 3:
486 switch(pool_type)
487 {
488 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000489 _func = &NEPoolingLayerKernel::pooling3_q8_nchw<PoolingType::AVG>;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000490 break;
491 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000492 _func = &NEPoolingLayerKernel::pooling3_q8_nchw<PoolingType::MAX>;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000493 break;
494 default:
495 ARM_COMPUTE_ERROR("Unsupported pooling type!");
496 }
497 break;
498 default:
499 switch(pool_type)
500 {
501 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000502 _func = &NEPoolingLayerKernel::poolingMxN_q8_nchw<PoolingType::MAX>;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000503 break;
504 default:
505 ARM_COMPUTE_ERROR("Unsupported pooling type!");
506 }
507 break;
508 }
509 }
510 else
511 {
512 switch(pool_type)
513 {
514 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000515 _func = &NEPoolingLayerKernel::poolingMxN_q8_nchw<PoolingType::MAX>;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000516 break;
517 default:
518 ARM_COMPUTE_ERROR("Unsupported pooling type!");
519 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000520 }
521 }
522 else if(data_type == DataType::QASYMM8)
523 {
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000524 if(pool_size_x == 2 && pool_stride_x < 3 && _is_square)
Georgios Pinitas55186712018-01-08 17:37:12 +0000525 {
526 switch(pool_type)
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100527 {
Georgios Pinitas55186712018-01-08 17:37:12 +0000528 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000529 if(is_nchw)
530 {
531 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling2_qasymm8_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling2_qasymm8_nchw<PoolingType::AVG, false>;
532 }
533 else
534 {
535 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc<PoolingType::AVG, false>;
536 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000537 break;
538 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000539 if(is_nchw)
540 {
541 _func = &NEPoolingLayerKernel::pooling2_qasymm8_nchw<PoolingType::MAX>;
542 }
543 else
544 {
545 _func = &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc<PoolingType::MAX>;
546 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000547 break;
548 default:
549 ARM_COMPUTE_ERROR("Unsupported pooling type!");
550 }
551 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000552 else if(pool_size_x == 3 && pool_stride_x < 3 && _is_square)
Georgios Pinitas55186712018-01-08 17:37:12 +0000553 {
554 switch(pool_type)
555 {
556 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000557 if(is_nchw)
558 {
559 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_qasymm8_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling3_qasymm8_nchw<PoolingType::AVG, false>;
560 }
561 else
562 {
563 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc<PoolingType::AVG, false>;
564 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000565 break;
566 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000567 if(is_nchw)
568 {
569 _func = &NEPoolingLayerKernel::pooling3_qasymm8_nchw<PoolingType::MAX>;
570 }
571 else
572 {
573 _func = &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc<PoolingType::MAX>;
574 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000575 break;
576 default:
577 ARM_COMPUTE_ERROR("Unsupported pooling type!");
578 }
579 }
580 else
581 {
582 switch(pool_type)
583 {
584 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000585 if(is_nchw)
586 {
587 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_qasymm8_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_qasymm8_nchw<PoolingType::AVG, false>;
588 }
589 else
590 {
591 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc<PoolingType::AVG, false>;
592 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000593 break;
594 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000595 if(is_nchw)
596 {
597 _func = &NEPoolingLayerKernel::poolingMxN_qasymm8_nchw<PoolingType::MAX>;
598 }
599 else
600 {
601 _func = &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc<PoolingType::MAX>;
602 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000603 break;
604 default:
605 ARM_COMPUTE_ERROR("Unsupported pooling type!");
606 }
607 }
608 }
609 else if(data_type == DataType::QS16)
610 {
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000611 if(_is_square)
Georgios Pinitas55186712018-01-08 17:37:12 +0000612 {
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000613 switch(pool_size_x)
614 {
615 case 2:
616 switch(pool_type)
617 {
618 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000619 _func = &NEPoolingLayerKernel::pooling2_q16_nchw<PoolingType::AVG>;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000620 break;
621 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000622 _func = &NEPoolingLayerKernel::pooling2_q16_nchw<PoolingType::MAX>;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000623 break;
624 default:
625 ARM_COMPUTE_ERROR("Unsupported pooling type!");
626 }
627 break;
628 case 3:
629 switch(pool_type)
630 {
631 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000632 _func = &NEPoolingLayerKernel::pooling3_q16_nchw<PoolingType::AVG>;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000633 break;
634 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000635 _func = &NEPoolingLayerKernel::pooling3_q16_nchw<PoolingType::MAX>;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000636 break;
637 default:
638 ARM_COMPUTE_ERROR("Unsupported pooling type!");
639 }
640 break;
641 default:
642 switch(pool_type)
643 {
644 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000645 _func = &NEPoolingLayerKernel::poolingMxN_q16_nchw<PoolingType::MAX>;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000646 break;
647 default:
648 ARM_COMPUTE_ERROR("Unsupported pooling type!");
649 }
650 break;
651 }
652 }
653 else
654 {
655 switch(pool_type)
656 {
657 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000658 _func = &NEPoolingLayerKernel::poolingMxN_q16_nchw<PoolingType::MAX>;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000659 break;
660 default:
661 ARM_COMPUTE_ERROR("Unsupported pooling type!");
662 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000663 }
664 }
665 else if(data_type == DataType::F16)
666 {
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000667 if(_is_square)
Georgios Pinitas55186712018-01-08 17:37:12 +0000668 {
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000669 switch(pool_size_x)
670 {
671 case 2:
672 switch(pool_type)
673 {
674 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000675 if(is_nchw)
676 {
677 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling2_f16_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling2_f16_nchw<PoolingType::AVG, false>;
678 }
679 else
680 {
681 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::AVG, false>;
682 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000683 break;
684 case PoolingType::L2:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000685 if(is_nchw)
686 {
687 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling2_f16_nchw<PoolingType::L2, true> : &NEPoolingLayerKernel::pooling2_f16_nchw<PoolingType::L2, false>;
688 }
689 else
690 {
691 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::L2, false>;
692 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000693 break;
694 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000695 if(is_nchw)
696 {
697 _func = &NEPoolingLayerKernel::pooling2_f16_nchw<PoolingType::MAX, false>;
698 }
699 else
700 {
701 _func = &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::MAX, false>;
702 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000703 break;
704 default:
705 ARM_COMPUTE_ERROR("Unsupported pooling type!");
706 }
707 break;
708 case 3:
709 switch(pool_type)
710 {
711 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000712 if(is_nchw)
713 {
714 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_f16_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling3_f16_nchw<PoolingType::AVG, false>;
715 }
716 else
717 {
718 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::AVG, false>;
719 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000720 break;
721 case PoolingType::L2:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000722 if(is_nchw)
723 {
724 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_f16_nchw<PoolingType::L2, true> : &NEPoolingLayerKernel::pooling3_f16_nchw<PoolingType::L2, false>;
725 }
726 else
727 {
728 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::L2, false>;
729 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000730 break;
731 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000732 if(is_nchw)
733 {
734 _func = &NEPoolingLayerKernel::pooling3_f16_nchw<PoolingType::MAX, false>;
735 }
736 else
737 {
738 _func = &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::MAX, false>;
739 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000740 break;
741 default:
742 ARM_COMPUTE_ERROR("Unsupported pooling type!");
743 }
744 break;
745 default:
746 switch(pool_type)
747 {
748 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000749 if(is_nchw)
750 {
751 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::AVG, false>;
752 }
753 else
754 {
755 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::AVG, false>;
756 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000757 break;
758 case PoolingType::L2:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000759 if(is_nchw)
760 {
761 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::L2, false>;
762 }
763 else
764 {
765 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::L2, false>;
766 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000767 break;
768 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000769 if(is_nchw)
770 {
771 _func = &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::MAX, false>;
772 }
773 else
774 {
775 _func = &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::MAX, false>;
776 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000777 break;
778 default:
779 ARM_COMPUTE_ERROR("Unsupported pooling type!");
780 }
781 break;
782 }
783 }
784 else
785 {
786 switch(pool_type)
787 {
788 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000789 if(is_nchw)
790 {
791 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::AVG, false>;
792 }
793 else
794 {
795 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::AVG, false>;
796 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000797 break;
798 case PoolingType::L2:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000799 if(is_nchw)
800 {
801 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::L2, false>;
802 }
803 else
804 {
805 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::L2, false>;
806 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000807 break;
808 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000809 if(is_nchw)
810 {
811 _func = &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::MAX, false>;
812 }
813 else
814 {
815 _func = &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::MAX, false>;
816 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000817 break;
818 default:
819 ARM_COMPUTE_ERROR("Unsupported pooling type!");
820 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000821 }
822 }
823 else if(data_type == DataType::F32)
824 {
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000825 if(_is_square)
Georgios Pinitas55186712018-01-08 17:37:12 +0000826 {
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000827 switch(pool_size_x)
828 {
829 case 2:
830 switch(pool_type)
831 {
832 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000833 if(is_nchw)
834 {
835 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling2_f32_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling2_f32_nchw<PoolingType::AVG, false>;
836 }
837 else
838 {
839 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, false>;
840 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000841 break;
842 case PoolingType::L2:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000843 if(is_nchw)
844 {
845 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling2_f32_nchw<PoolingType::L2, true> : &NEPoolingLayerKernel::pooling2_f32_nchw<PoolingType::L2, false>;
846 }
847 else
848 {
849 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, false>;
850 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000851 break;
852 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000853 if(is_nchw)
854 {
855 _func = &NEPoolingLayerKernel::pooling2_f32_nchw<PoolingType::MAX, false>;
856 }
857 else
858 {
859 _func = &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::MAX, false>;
860 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000861 break;
862 default:
863 ARM_COMPUTE_ERROR("Unsupported pooling type!");
864 }
865 break;
866 case 3:
867 switch(pool_type)
868 {
869 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000870 if(is_nchw)
871 {
872 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_f32_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling3_f32_nchw<PoolingType::AVG, false>;
873 }
874 else
875 {
876 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, false>;
877 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000878 break;
879 case PoolingType::L2:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000880 if(is_nchw)
881 {
882 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_f32_nchw<PoolingType::L2, true> : &NEPoolingLayerKernel::pooling3_f32_nchw<PoolingType::L2, false>;
883 }
884 else
885 {
886 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, false>;
887 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000888 break;
889 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000890 if(is_nchw)
891 {
892 _func = &NEPoolingLayerKernel::pooling3_f32_nchw<PoolingType::MAX, false>;
893 }
894 else
895 {
896 _func = &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::MAX, false>;
897 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000898 break;
899 default:
900 ARM_COMPUTE_ERROR("Unsupported pooling type!");
901 }
902 break;
903 case 7:
904 switch(pool_type)
905 {
906 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000907 if(is_nchw)
908 {
909 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling7_f32_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling7_f32_nchw<PoolingType::AVG, false>;
910 }
911 else
912 {
913 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, false>;
914 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000915 break;
916 case PoolingType::L2:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000917 if(is_nchw)
918 {
919 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling7_f32_nchw<PoolingType::L2, true> : &NEPoolingLayerKernel::pooling7_f32_nchw<PoolingType::L2, false>;
920 }
921 else
922 {
923 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, false>;
924 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000925 break;
926 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000927 if(is_nchw)
928 {
929 _func = &NEPoolingLayerKernel::pooling7_f32_nchw<PoolingType::MAX, false>;
930 }
931 else
932 {
933 _func = &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::MAX, false>;
934 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000935 break;
936 default:
937 ARM_COMPUTE_ERROR("Unsupported pooling type!");
938 }
939 break;
940 default:
941 switch(pool_type)
942 {
943 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000944 if(is_nchw)
945 {
946 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::AVG, false>;
947 }
948 else
949 {
950 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, false>;
951 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000952 break;
953 case PoolingType::L2:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000954 if(is_nchw)
955 {
956 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::L2, false>;
957 }
958 else
959 {
960 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, false>;
961 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000962 break;
963 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000964 if(is_nchw)
965 {
966 _func = &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::MAX, false>;
967 }
968 else
969 {
970 _func = &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::MAX, false>;
971 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000972 break;
973 default:
974 ARM_COMPUTE_ERROR("Unsupported pooling type!");
975 }
976 break;
977 }
978 }
979 else
980 {
981 switch(pool_type)
982 {
983 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000984 if(is_nchw)
985 {
986 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::AVG, false>;
987 }
988 else
989 {
990 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, false>;
991 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000992 break;
993 case PoolingType::L2:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000994 if(is_nchw)
995 {
996 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::L2, false>;
997 }
998 else
999 {
1000 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, false>;
1001 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001002 break;
1003 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +00001004 if(is_nchw)
1005 {
1006 _func = &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::MAX, false>;
1007 }
1008 else
1009 {
1010 _func = &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::MAX, false>;
1011 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001012 break;
1013 default:
1014 ARM_COMPUTE_ERROR("Unsupported pooling type!");
1015 }
Georgios Pinitas55186712018-01-08 17:37:12 +00001016 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001017 }
1018
1019 // Configure kernel window
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001020 auto win_config = validate_and_configure_window(input->info(), output->info(), pool_info, _num_elems_processed_per_iteration, _border_size, pooled_w, pooled_h, pool_size_x, pool_size_y);
Michalis Spyrouafa5d812017-11-30 14:25:57 +00001021 ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
1022 INEKernel::configure(win_config.second);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001023}
1024
1025template <PoolingType pooling_type>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001026void NEPoolingLayerKernel::pooling2_q8_nchw(const Window &window_input, const Window &window)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001027{
1028 Iterator input(_input, window_input);
1029 Iterator output(_output, window);
1030
1031 const int fixed_point_position = _input->info()->fixed_point_position();
1032 constexpr int pool_size = 2;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001033 int pool_stride_x = 0;
1034 int pool_stride_y = 0;
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001035 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1036 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1037 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1038 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001039 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001040 const int upper_bound_w = _input->info()->dimension(0) + pool_pad_right;
1041 const int upper_bound_h = _input->info()->dimension(1) + pool_pad_bottom;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001042
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001043 const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
1044 const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001045
1046 execute_window_loop(window, [&](const Coordinates & id)
1047 {
1048 const auto top_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_top_ptr + input.offset()));
1049 const auto bottom_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_bottom_ptr + input.offset()));
Michalis Spyroubbd9fb92017-06-22 12:57:51 +01001050 qint8x8_t lower_res = {};
1051 qint8x8_t upper_res = {};
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001052 if(pooling_type == PoolingType::AVG)
1053 {
1054 // Calculate scale
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001055 const qint8_t scale = calculate_avg_scale_q8(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y, fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001056 const qint8x8_t scale_vec = vdup_n_qs8(scale);
1057
1058 // Perform pooling
1059 const qint8x16_t sum_data = vqaddq_qs8(top_data, bottom_data);
Michalis Spyroubbd9fb92017-06-22 12:57:51 +01001060 lower_res = vqmul_qs8(vpadd_s8(vget_low_s8(sum_data), vget_high_s8(sum_data)), scale_vec, fixed_point_position);
1061 if(pool_stride_x == 1)
1062 {
1063 const qint8x16_t sum_data_shifted = vextq_s8(sum_data, sum_data, 1);
1064 upper_res = vqmul_qs8(vpadd_s8(vget_low_s8(sum_data_shifted), vget_high_s8(sum_data_shifted)), scale_vec, fixed_point_position);
1065 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001066 }
1067 else
1068 {
1069 const qint8x16_t max_data = vmaxq_s8(top_data, bottom_data);
Michalis Spyroubbd9fb92017-06-22 12:57:51 +01001070 lower_res = vpmax_s8(vget_low_s8(max_data), vget_high_s8(max_data));
1071 if(pool_stride_x == 1)
1072 {
1073 const qint8x16_t max_data_shifted = vextq_s8(max_data, max_data, 1);
1074 upper_res = vpmax_s8(vget_low_s8(max_data_shifted), vget_high_s8(max_data_shifted));
1075 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001076 }
Michalis Spyroubbd9fb92017-06-22 12:57:51 +01001077 if(pool_stride_x == 1)
1078 {
Georgios Pinitasdc460f12017-08-24 19:02:44 +01001079 const qint8x8x2_t res = { { lower_res, upper_res } };
Michalis Spyroubbd9fb92017-06-22 12:57:51 +01001080 vst2_s8(reinterpret_cast<qint8_t *>(output.ptr()), res);
1081 }
1082 else
1083 {
1084 vst1_qs8(reinterpret_cast<qint8_t *>(output.ptr()), lower_res);
1085 }
1086 },
1087 input, output);
1088}
1089
Georgios Pinitas55186712018-01-08 17:37:12 +00001090template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001091void NEPoolingLayerKernel::pooling2_qasymm8_nchw(const Window &window_input, const Window &window)
Georgios Pinitas55186712018-01-08 17:37:12 +00001092{
1093 Iterator input(_input, window_input);
1094 Iterator output(_output, window);
1095
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001096 constexpr int pool_size = 2;
1097 int pool_stride_x = 0;
1098 int pool_stride_y = 0;
1099 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1100 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1101 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1102 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
Georgios Pinitas55186712018-01-08 17:37:12 +00001103 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001104 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
1105 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Georgios Pinitas55186712018-01-08 17:37:12 +00001106
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001107 const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
1108 const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
Georgios Pinitas55186712018-01-08 17:37:12 +00001109
1110 const int scale_step_x = (pool_stride_x == 1) ? 2 : 1;
1111
1112 execute_window_loop(window, [&](const Coordinates & id)
1113 {
1114 const auto top_data = vld1q_u8(reinterpret_cast<const uint8_t *>(input_top_ptr + input.offset()));
1115 const auto bottom_data = vld1q_u8(reinterpret_cast<const uint8_t *>(input_bottom_ptr + input.offset()));
1116 uint8x8_t lower_res = {};
1117 uint8x8_t upper_res = {};
1118
1119 if(pooling_type != PoolingType::MAX)
1120 {
1121 const uint16x8x2_t top_data_u16 = { { vmovl_u8(vget_low_u8(top_data)), vmovl_u8(vget_high_u8(top_data)) } };
1122 const uint16x8x2_t bottom_data_u16 = { { vmovl_u8(vget_low_u8(bottom_data)), vmovl_u8(vget_high_u8(bottom_data)) } };
1123
1124 // Add rows
1125 const uint16x8x2_t vrsum =
1126 {
1127 {
1128 vaddq_u16(top_data_u16.val[0], bottom_data_u16.val[0]),
1129 vaddq_u16(top_data_u16.val[1], bottom_data_u16.val[1]),
1130 }
1131 };
1132
1133 // Pair-wise add row data
1134 const uint16x4x2_t vpsum =
1135 {
1136 {
1137 vpadd_u16(vget_low_u16(vrsum.val[0]), vget_high_u16(vrsum.val[0])),
1138 vpadd_u16(vget_low_u16(vrsum.val[1]), vget_high_u16(vrsum.val[1])),
1139 }
1140 };
1141
1142 uint16x8_t res_lower = vcombine_u16(vpsum.val[0], vpsum.val[1]);
1143
1144 // Scale lower result
1145 scale_vector_s16x8<exclude_padding>(res_lower, id, 0, scale_step_x,
1146 pool_size, upper_bound_w, upper_bound_h,
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001147 pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Georgios Pinitas55186712018-01-08 17:37:12 +00001148 lower_res = vmovn_u16(res_lower);
1149
1150 // Compute upper result for stride_x == 1
1151 if(pool_stride_x == 1)
1152 {
1153 // Shifted row sum
1154 const uint16x8x2_t vrsum_shifted =
1155 {
1156 {
1157 vextq_u16(vrsum.val[0], vrsum.val[1], 1),
1158 vextq_u16(vrsum.val[1], vrsum.val[1], 1)
1159 }
1160 };
1161
1162 // Pair-wise add shifted row
1163 const uint16x4x2_t vpsum_shifted =
1164 {
1165 {
1166 vpadd_u16(vget_low_u16(vrsum_shifted.val[0]), vget_high_u16(vrsum_shifted.val[0])),
1167 vpadd_u16(vget_low_u16(vrsum_shifted.val[1]), vget_high_u16(vrsum_shifted.val[1])),
1168 }
1169 };
1170 uint16x8_t res_upper = vcombine_u16(vpsum_shifted.val[0], vpsum_shifted.val[1]);
1171
1172 // Scale lower result
1173 scale_vector_s16x8<exclude_padding>(res_upper, id, 1, 2,
1174 pool_size, upper_bound_w, upper_bound_h,
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001175 pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Georgios Pinitas55186712018-01-08 17:37:12 +00001176 upper_res = vmovn_u16(res_upper);
1177 }
1178 }
1179 else
1180 {
1181 const uint8x16_t max_data = vmaxq_u8(top_data, bottom_data);
1182 lower_res = vpmax_u8(vget_low_u8(max_data), vget_high_u8(max_data));
1183 if(pool_stride_x == 1)
1184 {
1185 const uint8x16_t max_data_shifted = vextq_u8(max_data, max_data, 1);
1186 upper_res = vpmax_u8(vget_low_u8(max_data_shifted), vget_high_u8(max_data_shifted));
1187 }
1188 }
1189
1190 // Store result
1191 if(pool_stride_x == 1)
1192 {
1193 const uint8x8x2_t res = { { lower_res, upper_res } };
1194 vst2_u8(reinterpret_cast<uint8_t *>(output.ptr()), res);
1195 }
1196 else
1197 {
1198 vst1_u8(reinterpret_cast<uint8_t *>(output.ptr()), lower_res);
1199 }
1200 },
1201 input, output);
1202}
1203
Michalis Spyroubbd9fb92017-06-22 12:57:51 +01001204template <PoolingType pooling_type>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001205void NEPoolingLayerKernel::pooling2_q16_nchw(const Window &window_input, const Window &window)
Michalis Spyroubbd9fb92017-06-22 12:57:51 +01001206{
1207 Iterator input(_input, window_input);
1208 Iterator output(_output, window);
1209
1210 const int fixed_point_position = _input->info()->fixed_point_position();
1211 constexpr int pool_size = 2;
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001212 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1213 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1214 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1215 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
Michalis Spyroubbd9fb92017-06-22 12:57:51 +01001216 int pool_stride_x = 0;
1217 int pool_stride_y = 0;
Michalis Spyroubbd9fb92017-06-22 12:57:51 +01001218 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001219 const int upper_bound_w = _input->info()->dimension(0) + pool_pad_right;
1220 const int upper_bound_h = _input->info()->dimension(1) + pool_pad_bottom;
Michalis Spyroubbd9fb92017-06-22 12:57:51 +01001221
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001222 const unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
1223 const unsigned char *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
Michalis Spyroubbd9fb92017-06-22 12:57:51 +01001224
1225 execute_window_loop(window, [&](const Coordinates & id)
1226 {
1227 const auto top_data = vld1q_qs16(reinterpret_cast<const qint16_t *>(input_top_ptr + input.offset()));
1228 const auto bottom_data = vld1q_qs16(reinterpret_cast<const qint16_t *>(input_bottom_ptr + input.offset()));
1229 qint16x4_t lower_res = {};
1230 qint16x4_t upper_res = {};
1231 if(pooling_type == PoolingType::AVG)
1232 {
1233 // Calculate scale
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001234 const qint16_t scale = calculate_avg_scale_q16(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y, fixed_point_position);
Michalis Spyroubbd9fb92017-06-22 12:57:51 +01001235 const qint16x4_t scale_vec = vdup_n_qs16(scale);
1236
1237 // Perform pooling
1238 const qint16x8_t sum_data = vqaddq_qs16(top_data, bottom_data);
1239 lower_res = vqmul_qs16(vpadd_s16(vget_low_s16(sum_data), vget_high_s16(sum_data)), scale_vec, fixed_point_position);
1240 if(pool_stride_x == 1)
1241 {
1242 const qint16x8_t sum_data_shifted = vextq_s16(sum_data, sum_data, 1);
1243 upper_res = vqmul_qs16(vpadd_s16(vget_low_s16(sum_data_shifted), vget_high_s16(sum_data_shifted)), scale_vec, fixed_point_position);
1244 }
1245 }
1246 else
1247 {
1248 const qint16x8_t max_data = vmaxq_s16(top_data, bottom_data);
1249 lower_res = vpmax_s16(vget_low_s16(max_data), vget_high_s16(max_data));
1250 if(pool_stride_x == 1)
1251 {
1252 const qint16x8_t max_data_shifted = vextq_s16(max_data, max_data, 1);
1253 upper_res = vpmax_s16(vget_low_s16(max_data_shifted), vget_high_s16(max_data_shifted));
1254 }
1255 }
1256 if(pool_stride_x == 1)
1257 {
Georgios Pinitasdc460f12017-08-24 19:02:44 +01001258 const qint16x4x2_t res = { { lower_res, upper_res } };
Michalis Spyroubbd9fb92017-06-22 12:57:51 +01001259 vst2_s16(reinterpret_cast<qint16_t *>(output.ptr()), res);
1260 }
1261 else
1262 {
1263 vst1_qs16(reinterpret_cast<qint16_t *>(output.ptr()), lower_res);
1264 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001265 },
1266 input, output);
1267}
1268
Georgios Pinitasadaae7e2017-10-30 15:56:32 +00001269template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001270void NEPoolingLayerKernel::pooling3_f16_nchw(const Window &window_input, const Window &window)
Pablo Tello0c34fe22017-06-26 17:17:42 +01001271{
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +00001272#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tello0c34fe22017-06-26 17:17:42 +01001273 Iterator input(_input, window_input);
1274 Iterator output(_output, window);
1275
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001276 constexpr const int pool_size = 3;
1277 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1278 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1279 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1280 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1281 int pool_stride_x = 0;
1282 int pool_stride_y = 0;
Pablo Tello0c34fe22017-06-26 17:17:42 +01001283 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001284 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
1285 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Pablo Tello0c34fe22017-06-26 17:17:42 +01001286
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001287 const unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
1288 const unsigned char *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
1289 const unsigned char *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 2));
Pablo Tello0c34fe22017-06-26 17:17:42 +01001290
1291 execute_window_loop(window, [&](const Coordinates & id)
1292 {
Georgios Pinitascdf51452017-08-31 14:21:36 +01001293 float16x4_t top_data = vld1_f16(reinterpret_cast<const float16_t *>(input_top_ptr + input.offset()));
1294 float16x4_t middle_data = vld1_f16(reinterpret_cast<const float16_t *>(input_middle_ptr + input.offset()));
1295 float16x4_t bottom_data = vld1_f16(reinterpret_cast<const float16_t *>(input_bottom_ptr + input.offset()));
1296 float16x4_t res = {};
1297
1298 // Get power of 2 in case of l2 pooling
1299 if(pooling_type == PoolingType::L2)
1300 {
1301 top_data = vmul_f16(top_data, top_data);
1302 middle_data = vmul_f16(middle_data, middle_data);
1303 bottom_data = vmul_f16(bottom_data, bottom_data);
1304 }
1305
1306 if(pooling_type != PoolingType::MAX)
Pablo Tello0c34fe22017-06-26 17:17:42 +01001307 {
1308 // Calculate scale
Michalis Spyrou57dac842018-03-01 16:03:50 +00001309 const float scale = calculate_avg_scale<exclude_padding, DataLayout::NCHW>(id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Pablo Tello0c34fe22017-06-26 17:17:42 +01001310 const float16x4_t scale_v = vdup_n_f16(scale);
1311 // Perform pooling
1312 const float16x4_t sum_data = vadd_f16(vadd_f16(top_data, bottom_data), middle_data);
1313 res = vpadd_f16(vset_lane_f16(0.f, sum_data, 3), sum_data);
1314 res = vmul_f16(vpadd_f16(res, res), scale_v);
1315 }
1316 else
1317 {
1318 const float16x4_t max_data = vmax_f16(vmax_f16(top_data, bottom_data), middle_data);
1319 res = vpmax_f16(vset_lane_f16(-std::numeric_limits<float>::max(), max_data, 3), max_data);
1320 res = vpmax_f16(res, res);
1321 }
Georgios Pinitascdf51452017-08-31 14:21:36 +01001322
1323 // Calculate square-root in case of l2 pooling
1324 if(pooling_type == PoolingType::L2)
1325 {
1326 res = vinv_f16(vinvsqrt_f16(res));
1327 }
1328
Pablo Tello0c34fe22017-06-26 17:17:42 +01001329 *(reinterpret_cast<float16_t *>(output.ptr())) = vget_lane_f16(res, 0);
1330 },
1331 input, output);
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +00001332#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tello0c34fe22017-06-26 17:17:42 +01001333 ARM_COMPUTE_UNUSED(window_input);
1334 ARM_COMPUTE_UNUSED(window);
1335 ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a");
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +00001336#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tello0c34fe22017-06-26 17:17:42 +01001337}
1338
Georgios Pinitasadaae7e2017-10-30 15:56:32 +00001339template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001340void NEPoolingLayerKernel::pooling2_f16_nchw(const Window &window_input, const Window &window)
Pablo Tello0c34fe22017-06-26 17:17:42 +01001341{
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +00001342#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tello0c34fe22017-06-26 17:17:42 +01001343 Iterator input(_input, window_input);
1344 Iterator output(_output, window);
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001345 constexpr int pool_size = 2;
1346 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1347 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1348 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1349 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1350 int pool_stride_x, pool_stride_y = 0;
1351 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
1352 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
1353 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Pablo Tello0c34fe22017-06-26 17:17:42 +01001354
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001355 const unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
1356 const unsigned char *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
Pablo Tello0c34fe22017-06-26 17:17:42 +01001357
1358 execute_window_loop(window, [&](const Coordinates & id)
1359 {
Georgios Pinitascdf51452017-08-31 14:21:36 +01001360 auto top_data = vld2q_f16(reinterpret_cast<const float16_t *>(input_top_ptr + input.offset()));
1361 auto bottom_data = vld2q_f16(reinterpret_cast<const float16_t *>(input_bottom_ptr + input.offset()));
Pablo Tello0c34fe22017-06-26 17:17:42 +01001362 float16x8_t res = {};
1363
Georgios Pinitascdf51452017-08-31 14:21:36 +01001364 // Get power of 2 in case of l2 pooling
1365 if(pooling_type == PoolingType::L2)
1366 {
1367 top_data.val[0] = vmulq_f16(top_data.val[0], top_data.val[0]);
1368 top_data.val[1] = vmulq_f16(top_data.val[1], top_data.val[1]);
1369 bottom_data.val[0] = vmulq_f16(bottom_data.val[0], bottom_data.val[0]);
1370 bottom_data.val[1] = vmulq_f16(bottom_data.val[1], bottom_data.val[1]);
1371 }
1372
1373 if(pooling_type != PoolingType::MAX)
Pablo Tello0c34fe22017-06-26 17:17:42 +01001374 {
Michalis Spyrou57dac842018-03-01 16:03:50 +00001375 const float scale = calculate_avg_scale<exclude_padding, DataLayout::NCHW>(id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Pablo Tello0c34fe22017-06-26 17:17:42 +01001376 const float16x8_t scale_v = vdupq_n_f16(scale);
1377 res = vmulq_f16(scale_v, vaddq_f16(bottom_data.val[1], vaddq_f16(bottom_data.val[0], vaddq_f16(top_data.val[0], top_data.val[1]))));
1378 }
1379 else
1380 {
1381 res = vmaxq_f16(bottom_data.val[1], vmaxq_f16(bottom_data.val[0], vmaxq_f16(top_data.val[0], top_data.val[1])));
1382 }
Georgios Pinitascdf51452017-08-31 14:21:36 +01001383
1384 // Calculate square-root in case of l2 pooling
1385 if(pooling_type == PoolingType::L2)
1386 {
1387 res = vinvq_f16(vinvsqrtq_f16(res));
1388 }
1389
1390 // Store result
Pablo Tello0c34fe22017-06-26 17:17:42 +01001391 vst1q_f16(reinterpret_cast<float16_t *>(output.ptr()), res);
1392 },
1393 input, output);
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +00001394#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tello0c34fe22017-06-26 17:17:42 +01001395 ARM_COMPUTE_UNUSED(window_input);
1396 ARM_COMPUTE_UNUSED(window);
1397 ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a");
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +00001398#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tello0c34fe22017-06-26 17:17:42 +01001399}
1400
Georgios Pinitasadaae7e2017-10-30 15:56:32 +00001401template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001402void NEPoolingLayerKernel::pooling2_f32_nchw(const Window &window_input, const Window &window)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001403{
1404 Iterator input(_input, window_input);
1405 Iterator output(_output, window);
1406
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001407 constexpr int pool_size = 2;
1408 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1409 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1410 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1411 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1412 int pool_stride_x = 0;
1413 int pool_stride_y = 0;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001414 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001415 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
1416 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001417
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001418 const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
1419 const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001420
1421 execute_window_loop(window, [&](const Coordinates & id)
1422 {
Georgios Pinitascdf51452017-08-31 14:21:36 +01001423 float32x2_t top_data = vld1_f32(reinterpret_cast<const float *>(input_top_ptr + input.offset()));
1424 float32x2_t bottom_data = vld1_f32(reinterpret_cast<const float *>(input_bottom_ptr + input.offset()));
1425 float32x2_t res = {};
1426 float final_res = 0;
1427
1428 // Get power of 2 in case of l2 pooling
1429 if(pooling_type == PoolingType::L2)
1430 {
1431 top_data = vmul_f32(top_data, top_data);
1432 bottom_data = vmul_f32(bottom_data, bottom_data);
1433 }
1434
1435 if(pooling_type != PoolingType::MAX)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001436 {
1437 // Calculate scale
Michalis Spyrou57dac842018-03-01 16:03:50 +00001438 float scale = calculate_avg_scale<exclude_padding, DataLayout::NCHW>(id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001439 const float32x2_t scale_v = vdup_n_f32(scale);
1440
1441 // Perform pooling
1442 const float32x2_t sum_data = vadd_f32(top_data, bottom_data);
1443 res = vmul_f32(vpadd_f32(sum_data, sum_data), scale_v);
1444 }
1445 else
1446 {
1447 const float32x2_t max_data = vmax_f32(top_data, bottom_data);
1448 res = vpmax_f32(max_data, max_data);
1449 }
Georgios Pinitascdf51452017-08-31 14:21:36 +01001450 final_res = vget_lane_f32(res, 0);
1451
1452 // Calculate square-root in case of l2 pooling
1453 if(pooling_type == PoolingType::L2)
1454 {
1455 final_res = sqrt(final_res);
1456 }
1457
1458 // Store result
1459 *(reinterpret_cast<float *>(output.ptr())) = final_res;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001460 },
1461 input, output);
1462}
1463
1464template <PoolingType pooling_type>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001465void NEPoolingLayerKernel::pooling3_q8_nchw(const Window &window_input, const Window &window)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001466{
1467 Iterator input(_input, window_input);
1468 Iterator output(_output, window);
1469
1470 const int fixed_point_position = _input->info()->fixed_point_position();
1471 constexpr int pool_size = 3;
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001472 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1473 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1474 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1475 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001476 int pool_stride_x = 0;
1477 int pool_stride_y = 0;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001478 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001479 const int upper_bound_w = _input->info()->dimension(0) + pool_pad_right;
1480 const int upper_bound_h = _input->info()->dimension(1) + pool_pad_bottom;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001481
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001482 const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
1483 const uint8_t *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
1484 const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 2));
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001485
1486 execute_window_loop(window, [&](const Coordinates & id)
1487 {
1488 const auto top_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_top_ptr + input.offset()));
1489 const auto middle_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_middle_ptr + input.offset()));
1490 const auto bottom_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_bottom_ptr + input.offset()));
1491 qint8x8_t res = {};
1492 if(pooling_type == PoolingType::AVG)
1493 {
1494 // Calculate scale
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001495 const qint8_t scale = calculate_avg_scale_q8(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y, fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001496
1497 // Perform pooling for stride 2
1498 const qint8x16_t sum_data = vqaddq_qs8(vqaddq_qs8(top_data, bottom_data), middle_data);
1499 const qint8x16_t sum_data2 = vextq_s8(sum_data, sum_data, 1);
1500 const qint8x16_t sum_data3 = vextq_s8(sum_data, sum_data, 2);
1501 const qint8x16_t final_sum = vqaddq_qs8(vqaddq_qs8(sum_data, sum_data2), sum_data3);
1502 if(pool_stride_x == 2)
1503 {
1504 const qint8x8x2_t table = { { vget_low_s8(final_sum), vget_high_s8(final_sum) } };
1505 static const qint8x8_t lookup_val = { 0, 2, 4, 6, 8, 10, 12, 14 };
Michalis Spyroubbd9fb92017-06-22 12:57:51 +01001506 const qint8x8_t scale_vec = vdup_n_qs8(scale);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001507 res = vtbl2_s8(table, lookup_val);
Michalis Spyroubbd9fb92017-06-22 12:57:51 +01001508 res = vqmul_qs8(res, scale_vec, fixed_point_position);
1509 vst1_qs8(reinterpret_cast<qint8_t *>(output.ptr()), res);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001510 }
1511 else
1512 {
Michalis Spyroubbd9fb92017-06-22 12:57:51 +01001513 const qint8x16_t scale_vec = vdupq_n_qs8(scale);
1514 vst1q_qs8(reinterpret_cast<qint8_t *>(output.ptr()), vqmulq_qs8(final_sum, scale_vec, fixed_point_position));
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001515 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001516 }
1517 else
1518 {
1519 const qint8x16_t max_data = vmaxq_s8(vmaxq_s8(top_data, bottom_data), middle_data);
1520 const qint8x16_t max_data2 = vextq_s8(max_data, max_data, 1);
1521 const qint8x16_t max_data3 = vextq_s8(max_data, max_data, 2);
1522 const qint8x16_t final_max = vmaxq_s8(vmaxq_s8(max_data, max_data2), max_data3);
1523
1524 if(pool_stride_x == 2)
1525 {
1526 const qint8x8x2_t table = { { vget_low_s8(final_max), vget_high_s8(final_max) } };
1527 static const qint8x8_t lookup_val = { 0, 2, 4, 6, 8, 10, 12, 14 };
1528 res = vtbl2_s8(table, lookup_val);
Michalis Spyroubbd9fb92017-06-22 12:57:51 +01001529 vst1_qs8(reinterpret_cast<qint8_t *>(output.ptr()), res);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001530 }
1531 else
1532 {
Michalis Spyroubbd9fb92017-06-22 12:57:51 +01001533 vst1q_qs8(reinterpret_cast<qint8_t *>(output.ptr()), final_max);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001534 }
1535 }
Michalis Spyroubbd9fb92017-06-22 12:57:51 +01001536 },
1537 input, output);
1538}
1539
Georgios Pinitas55186712018-01-08 17:37:12 +00001540template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001541void NEPoolingLayerKernel::pooling3_qasymm8_nchw(const Window &window_input, const Window &window)
Georgios Pinitas55186712018-01-08 17:37:12 +00001542{
1543 Iterator input(_input, window_input);
1544 Iterator output(_output, window);
1545
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001546 constexpr int pool_size = 3;
1547 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1548 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1549 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1550 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1551 int pool_stride_x = 0;
1552 int pool_stride_y = 0;
Georgios Pinitas55186712018-01-08 17:37:12 +00001553 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001554 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
1555 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Georgios Pinitas55186712018-01-08 17:37:12 +00001556
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001557 const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
1558 const uint8_t *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
1559 const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 2));
Georgios Pinitas55186712018-01-08 17:37:12 +00001560
1561 execute_window_loop(window, [&](const Coordinates & id)
1562 {
1563 const auto top_data = vld1q_u8(reinterpret_cast<const uint8_t *>(input_top_ptr + input.offset()));
1564 const auto middle_data = vld1q_u8(reinterpret_cast<const uint8_t *>(input_middle_ptr + input.offset()));
1565 const auto bottom_data = vld1q_u8(reinterpret_cast<const uint8_t *>(input_bottom_ptr + input.offset()));
1566
1567 if(pooling_type == PoolingType::AVG)
1568 {
1569 // Convert data to u16
1570 const uint16x8x2_t top_data_u16 = { { vmovl_u8(vget_low_u8(top_data)), vmovl_u8(vget_high_u8(top_data)) } };
1571 const uint16x8x2_t middle_data_u16 = { { vmovl_u8(vget_low_u8(middle_data)), vmovl_u8(vget_high_u8(middle_data)) } };
1572 const uint16x8x2_t bottom_data_u16 = { { vmovl_u8(vget_low_u8(bottom_data)), vmovl_u8(vget_high_u8(bottom_data)) } };
1573
1574 // Calculate row sums
1575 const uint16x8x2_t vrsum =
1576 {
1577 {
1578 vaddq_u16(vaddq_u16(top_data_u16.val[0], bottom_data_u16.val[0]), middle_data_u16.val[0]),
1579 vaddq_u16(vaddq_u16(top_data_u16.val[1], bottom_data_u16.val[1]), middle_data_u16.val[1]),
1580 }
1581 };
1582 const uint16x8x2_t vrsum_shifted_1 =
1583 {
1584 {
1585 vextq_u16(vrsum.val[0], vrsum.val[1], 1),
1586 vextq_u16(vrsum.val[1], vrsum.val[1], 1)
1587 }
1588 };
1589 const uint16x8x2_t vrsum_shifted_2 =
1590 {
1591 {
1592 vextq_u16(vrsum.val[0], vrsum.val[1], 2),
1593 vextq_u16(vrsum.val[1], vrsum.val[1], 2)
1594 }
1595 };
1596 // Calculate final sum
1597 uint16x8x2_t final_sum =
1598 {
1599 {
1600 vaddq_u16(vaddq_u16(vrsum.val[0], vrsum_shifted_1.val[0]), vrsum_shifted_2.val[0]),
1601 vaddq_u16(vaddq_u16(vrsum.val[1], vrsum_shifted_1.val[1]), vrsum_shifted_2.val[1]),
1602 }
1603 };
1604 if(pool_stride_x == 2)
1605 {
1606 uint16x8_t res =
1607 {
1608 vgetq_lane_u16(final_sum.val[0], 0),
1609 vgetq_lane_u16(final_sum.val[0], 2),
1610 vgetq_lane_u16(final_sum.val[0], 4),
1611 vgetq_lane_u16(final_sum.val[0], 6),
1612 vgetq_lane_u16(final_sum.val[1], 0),
1613 vgetq_lane_u16(final_sum.val[1], 2),
1614 vgetq_lane_u16(final_sum.val[1], 4),
1615 vgetq_lane_u16(final_sum.val[1], 6),
1616 };
1617
1618 scale_vector_s16x8<exclude_padding>(res, id, 0, 1,
1619 pool_size, upper_bound_w, upper_bound_h,
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001620 pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Georgios Pinitas55186712018-01-08 17:37:12 +00001621 vst1_u8(reinterpret_cast<uint8_t *>(output.ptr()), vmovn_u16(res));
1622 }
1623 else
1624 {
1625 // Scale lower result
1626 scale_vector_s16x8<exclude_padding>(final_sum.val[0], id, 0, 1,
1627 pool_size, upper_bound_w, upper_bound_h,
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001628 pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Georgios Pinitas55186712018-01-08 17:37:12 +00001629 // Scale lower result
1630 scale_vector_s16x8<exclude_padding>(final_sum.val[1], id, 8, 1,
1631 pool_size, upper_bound_w, upper_bound_h,
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001632 pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Georgios Pinitas55186712018-01-08 17:37:12 +00001633 const uint8x16_t res = vcombine_u8(vmovn_u16(final_sum.val[0]), vmovn_u16(final_sum.val[1]));
1634 vst1q_u8(reinterpret_cast<uint8_t *>(output.ptr()), res);
1635 }
1636 }
1637 else
1638 {
1639 const uint8x16_t max_data = vmaxq_u8(vmaxq_u8(top_data, bottom_data), middle_data);
1640 const uint8x16_t max_data_shift1 = vextq_u8(max_data, max_data, 1);
1641 const uint8x16_t max_data_shift2 = vextq_u8(max_data, max_data, 2);
1642 const uint8x16_t final_max = vmaxq_u8(vmaxq_u8(max_data, max_data_shift1), max_data_shift2);
1643
1644 if(pool_stride_x == 2)
1645 {
1646 const uint8x8x2_t table = { { vget_low_u8(final_max), vget_high_u8(final_max) } };
1647 static const uint8x8_t lookup_val = { 0, 2, 4, 6, 8, 10, 12, 14 };
1648 const uint8x8_t res = vtbl2_u8(table, lookup_val);
1649 vst1_u8(reinterpret_cast<uint8_t *>(output.ptr()), res);
1650 }
1651 else
1652 {
1653 vst1q_u8(reinterpret_cast<uint8_t *>(output.ptr()), final_max);
1654 }
1655 }
1656 },
1657 input, output);
1658}
1659
Michalis Spyroubbd9fb92017-06-22 12:57:51 +01001660template <PoolingType pooling_type>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001661void NEPoolingLayerKernel::pooling3_q16_nchw(const Window &window_input, const Window &window)
Michalis Spyroubbd9fb92017-06-22 12:57:51 +01001662{
1663 Iterator input(_input, window_input);
1664 Iterator output(_output, window);
1665
1666 const int fixed_point_position = _input->info()->fixed_point_position();
1667 constexpr int pool_size = 3;
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001668 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1669 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1670 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1671 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
Michalis Spyroubbd9fb92017-06-22 12:57:51 +01001672 int pool_stride_x = 0;
1673 int pool_stride_y = 0;
Michalis Spyroubbd9fb92017-06-22 12:57:51 +01001674 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001675 const int upper_bound_w = _input->info()->dimension(0) + pool_pad_right;
1676 const int upper_bound_h = _input->info()->dimension(1) + pool_pad_bottom;
Michalis Spyroubbd9fb92017-06-22 12:57:51 +01001677
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001678 const unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
1679 const unsigned char *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
1680 const unsigned char *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 2));
Michalis Spyroubbd9fb92017-06-22 12:57:51 +01001681
1682 execute_window_loop(window, [&](const Coordinates & id)
1683 {
1684 const auto top_data = vld1q_qs16(reinterpret_cast<const qint16_t *>(input_top_ptr + input.offset()));
1685 const auto middle_data = vld1q_qs16(reinterpret_cast<const qint16_t *>(input_middle_ptr + input.offset()));
1686 const auto bottom_data = vld1q_qs16(reinterpret_cast<const qint16_t *>(input_bottom_ptr + input.offset()));
1687
1688 if(pooling_type == PoolingType::AVG)
1689 {
1690 // Calculate scale
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001691 const qint16_t scale = calculate_avg_scale_q16(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y, fixed_point_position);
Michalis Spyroubbd9fb92017-06-22 12:57:51 +01001692
1693 // Perform pooling for stride 2
1694 const qint16x8_t sum_data = vqaddq_qs16(vqaddq_qs16(top_data, bottom_data), middle_data);
1695 const qint16x8_t sum_data2 = vextq_s16(sum_data, sum_data, 1);
1696 const qint16x8_t sum_data3 = vextq_s16(sum_data, sum_data, 2);
1697 const qint16x8_t final_sum = vqaddq_qs16(vqaddq_qs16(sum_data, sum_data2), sum_data3);
1698 if(pool_stride_x == 2)
1699 {
1700 const qint16x4_t tmp = { vgetq_lane_s16(final_sum, 0), vgetq_lane_s16(final_sum, 2), vgetq_lane_s16(final_sum, 4), vgetq_lane_s16(final_sum, 6) };
1701 const qint16x4_t scale_vec = vdup_n_qs16(scale);
1702 vst1_qs16(reinterpret_cast<qint16_t *>(output.ptr()), vqmul_qs16(tmp, scale_vec, fixed_point_position));
1703 }
1704 else
1705 {
1706 const qint16x8_t scale_vec = vdupq_n_qs16(scale);
1707 vst1q_qs16(reinterpret_cast<qint16_t *>(output.ptr()), vqmulq_qs16(final_sum, scale_vec, fixed_point_position));
1708 }
1709 }
1710 else
1711 {
1712 const qint16x8_t max_data = vmaxq_s16(vmaxq_s16(top_data, bottom_data), middle_data);
1713 const qint16x8_t max_data2 = vextq_s16(max_data, max_data, 1);
1714 const qint16x8_t max_data3 = vextq_s16(max_data, max_data, 2);
1715 const qint16x8_t final_max = vmaxq_s16(vmaxq_s16(max_data, max_data2), max_data3);
1716
1717 if(pool_stride_x == 2)
1718 {
1719 const qint16x4_t tmp = { vgetq_lane_s16(final_max, 0), vgetq_lane_s16(final_max, 2), vgetq_lane_s16(final_max, 4), vgetq_lane_s16(final_max, 6) };
1720 vst1_qs16(reinterpret_cast<qint16_t *>(output.ptr()), tmp);
1721 }
1722 else
1723 {
1724 vst1q_qs16(reinterpret_cast<qint16_t *>(output.ptr()), final_max);
1725 }
1726 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001727 },
1728 input, output);
1729}
1730
Georgios Pinitasadaae7e2017-10-30 15:56:32 +00001731template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001732void NEPoolingLayerKernel::pooling3_f32_nchw(const Window &window_input, const Window &window)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001733{
1734 Iterator input(_input, window_input);
1735 Iterator output(_output, window);
1736
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001737 constexpr const int pool_size = 3;
1738 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1739 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1740 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1741 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1742 int pool_stride_x = 0;
1743 int pool_stride_y = 0;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001744 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001745 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
1746 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001747
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001748 const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
1749 const uint8_t *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
1750 const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 2));
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001751
1752 execute_window_loop(window, [&](const Coordinates & id)
1753 {
Georgios Pinitascdf51452017-08-31 14:21:36 +01001754 float32x4_t top_data = vld1q_f32(reinterpret_cast<const float *>(input_top_ptr + input.offset()));
1755 float32x4_t middle_data = vld1q_f32(reinterpret_cast<const float *>(input_middle_ptr + input.offset()));
1756 float32x4_t bottom_data = vld1q_f32(reinterpret_cast<const float *>(input_bottom_ptr + input.offset()));
1757 float32x2_t res = {};
1758 float final_res = 0;
1759
1760 // Get power of 2 in case of l2 pooling
1761 if(pooling_type == PoolingType::L2)
1762 {
1763 top_data = vmulq_f32(top_data, top_data);
1764 middle_data = vmulq_f32(middle_data, middle_data);
1765 bottom_data = vmulq_f32(bottom_data, bottom_data);
1766 }
1767
1768 if(pooling_type != PoolingType::MAX)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001769 {
1770 // Calculate scale
Michalis Spyrou57dac842018-03-01 16:03:50 +00001771 float scale = calculate_avg_scale<exclude_padding, DataLayout::NCHW>(id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001772 const float32x2_t scale_v = vdup_n_f32(scale);
1773
1774 // Perform pooling
1775 const float32x4_t sum_data = vaddq_f32(vaddq_f32(top_data, bottom_data), middle_data);
1776 res = vpadd_f32(vget_high_f32(vsetq_lane_f32(0.f, sum_data, 3)), vget_low_f32(sum_data));
1777 res = vmul_f32(vpadd_f32(res, res), scale_v);
1778 }
1779 else
1780 {
1781 const float32x4_t max_data = vmaxq_f32(vmaxq_f32(top_data, bottom_data), middle_data);
1782 res = vpmax_f32(vget_high_f32(vsetq_lane_f32(-std::numeric_limits<float>::max(), max_data, 3)), vget_low_f32(max_data));
1783 res = vpmax_f32(res, res);
1784 }
Georgios Pinitascdf51452017-08-31 14:21:36 +01001785 final_res = vget_lane_f32(res, 0);
1786
1787 // Calculate square-root in case of l2 pooling
1788 if(pooling_type == PoolingType::L2)
1789 {
1790 final_res = sqrt(final_res);
1791 }
1792
1793 // Store result
1794 *(reinterpret_cast<float *>(output.ptr())) = final_res;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001795 },
1796 input, output);
1797}
1798
Georgios Pinitasadaae7e2017-10-30 15:56:32 +00001799template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001800void NEPoolingLayerKernel::pooling7_f32_nchw(const Window &window_input, const Window &window)
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001801{
1802 Iterator input(_input, window_input);
1803 Iterator output(_output, window);
1804
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001805 constexpr const int pool_size = 7;
1806 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1807 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1808 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1809 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1810 int pool_stride_x = 0;
1811 int pool_stride_y = 0;
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001812 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001813 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
1814 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001815
1816 std::array<const uint8_t *, pool_size> input_ptrs{ {} };
1817 for(int i = 0; i < pool_size; ++i)
1818 {
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001819 input_ptrs[i] = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + i));
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001820 }
1821
1822 execute_window_loop(window, [&](const Coordinates & id)
1823 {
Georgios Pinitascdf51452017-08-31 14:21:36 +01001824 float32x2_t res = {};
1825 float final_res = 0.f;
1826 if(pooling_type != PoolingType::MAX)
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001827 {
1828 // Calculate scale
Michalis Spyrou57dac842018-03-01 16:03:50 +00001829 float scale = calculate_avg_scale<exclude_padding, DataLayout::NCHW>(id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001830 const float32x2_t scale_v = vdup_n_f32(scale);
1831
1832 // Perform pooling
Georgios Pinitascdf51452017-08-31 14:21:36 +01001833 float32x4x2_t data = vld2q_f32(reinterpret_cast<const float *>(input_ptrs[0] + input.offset()));
1834 // Get power of 2 in case of l2 pooling
1835 if(pooling_type == PoolingType::L2)
1836 {
1837 data.val[0] = vmulq_f32(data.val[0], data.val[0]);
1838 data.val[1] = vmulq_f32(data.val[1], data.val[1]);
1839 }
1840 float32x4_t sum_data = vaddq_f32(data.val[0], vsetq_lane_f32(0.f, data.val[1], 3));
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001841 for(int i = 1; i < pool_size; ++i)
1842 {
Georgios Pinitascdf51452017-08-31 14:21:36 +01001843 data = vld2q_f32(reinterpret_cast<const float *>(input_ptrs[i] + input.offset()));
1844 // Get power of 2 in case of l2 pooling
1845 if(pooling_type == PoolingType::L2)
1846 {
1847 data.val[0] = vmulq_f32(data.val[0], data.val[0]);
1848 data.val[1] = vmulq_f32(data.val[1], data.val[1]);
1849 }
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001850 sum_data = vaddq_f32(sum_data, data.val[0]);
1851 sum_data = vaddq_f32(sum_data, vsetq_lane_f32(0.f, data.val[1], 3));
1852 }
1853 res = vpadd_f32(vget_high_f32(sum_data), vget_low_f32(sum_data));
1854 res = vmul_f32(vpadd_f32(res, res), scale_v);
1855 }
1856 else
1857 {
1858 float32x4x2_t max_data = vld2q_f32(reinterpret_cast<const float *>(input_ptrs[0] + input.offset()));
1859 for(int i = 1; i < pool_size; ++i)
1860 {
1861 const float32x4x2_t data = vld2q_f32(reinterpret_cast<const float *>(input_ptrs[i] + input.offset()));
1862 max_data = vmax2q_f32(max_data, data);
1863 }
1864 res = vpmax_f32(vget_high_f32(vsetq_lane_f32(-std::numeric_limits<float>::max(), max_data.val[1], 3)), vget_low_f32(max_data.val[1]));
1865 res = vpmax_f32(res, vpmax_f32(vget_high_f32(max_data.val[0]), vget_low_f32(max_data.val[0])));
1866 res = vpmax_f32(res, res);
1867 }
Georgios Pinitascdf51452017-08-31 14:21:36 +01001868 final_res = vget_lane_f32(res, 0);
1869
1870 // Calculate square-root in case of l2 pooling
1871 if(pooling_type == PoolingType::L2)
1872 {
1873 final_res = sqrt(final_res);
1874 }
1875
1876 // Store result
1877 *(reinterpret_cast<float *>(output.ptr())) = final_res;
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001878 },
1879 input, output);
1880}
1881
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001882template <PoolingType pooling_type>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001883void NEPoolingLayerKernel::poolingMxN_q8_nchw(const Window &window_input, const Window &window)
Gian Marco Iodice16824302017-09-28 15:41:37 +01001884{
1885 Iterator input(_input, window_input);
1886 Iterator output(_output, window);
1887
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001888 const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().x() : _pool_info.pool_size().width;
1889 const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().height;
1890 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1891 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1892 int pool_stride_x = 0;
1893 int pool_stride_y = 0;
1894 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
1895
1896 execute_window_loop(window, [&](const Coordinates & id)
1897 {
1898 qint8x16_t vres = {};
1899 qint8_t res = {};
1900
1901 //PoolingType::MAX
1902 for(int y = 0; y < pool_size_y; ++y)
1903 {
1904 int x = 0;
1905 for(; x <= (pool_size_x - 16); x += 16)
1906 {
1907 const qint8x16_t data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() +
1908 (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
1909 vres = vmaxq_s8(vres, data);
1910 }
1911
1912 // Leftover for loop
1913 for(; x < pool_size_x; ++x)
1914 {
1915 qint8_t data = *(reinterpret_cast<const qint8_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() + (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
1916 res = std::max(res, data);
1917 }
1918 }
1919 //Reduce
1920 const qint8x8_t half_vres = vpmax_s8(vget_low_s8(vres), vget_high_s8(vres));
1921 res = std::max(res, vget_lane_s8(half_vres, 0));
1922 res = std::max(res, vget_lane_s8(half_vres, 1));
1923 res = std::max(res, vget_lane_s8(half_vres, 2));
1924 res = std::max(res, vget_lane_s8(half_vres, 3));
1925 res = std::max(res, vget_lane_s8(half_vres, 4));
1926 res = std::max(res, vget_lane_s8(half_vres, 5));
1927 res = std::max(res, vget_lane_s8(half_vres, 6));
1928 res = std::max(res, vget_lane_s8(half_vres, 7));
1929
1930 // Store result
1931 *(reinterpret_cast<qint8_t *>(output.ptr())) = res;
1932 },
1933 input, output);
1934}
1935
1936template <PoolingType pooling_type>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001937void NEPoolingLayerKernel::poolingMxN_q16_nchw(const Window &window_input, const Window &window)
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001938{
1939 Iterator input(_input, window_input);
1940 Iterator output(_output, window);
1941
1942 const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().x() : _pool_info.pool_size().width;
1943 const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().height;
1944 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1945 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1946 int pool_stride_x = 0;
1947 int pool_stride_y = 0;
1948 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
1949
1950 execute_window_loop(window, [&](const Coordinates & id)
1951 {
1952 qint16x8_t vres = {};
1953 qint16_t res = {};
1954
1955 //PoolingType::MAX
1956 for(int y = 0; y < pool_size_y; ++y)
1957 {
1958 int x = 0;
1959 for(; x <= (pool_size_x - 8); x += 8)
1960 {
1961 const qint16x8_t data = vld1q_qs16(reinterpret_cast<const qint16_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() +
1962 (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
1963 vres = vmaxq_s16(vres, data);
1964 }
1965
1966 // Leftover for loop
1967 for(; x < pool_size_x; ++x)
1968 {
1969 qint16_t data = *(reinterpret_cast<const qint16_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() + (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
1970 res = std::max(res, data);
1971 }
1972 }
1973 //Reduce
1974 const qint16x4_t half_vres = vpmax_s16(vget_low_s16(vres), vget_high_s16(vres));
1975 res = std::max(res, vget_lane_s16(half_vres, 0));
1976 res = std::max(res, vget_lane_s16(half_vres, 1));
1977 res = std::max(res, vget_lane_s16(half_vres, 2));
1978 res = std::max(res, vget_lane_s16(half_vres, 3));
1979
1980 // Store result
1981 *(reinterpret_cast<qint16_t *>(output.ptr())) = res;
1982 },
1983 input, output);
1984}
1985
1986template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001987void NEPoolingLayerKernel::poolingMxN_f16_nchw(const Window &window_input, const Window &window)
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001988{
1989#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
1990 Iterator input(_input, window_input);
1991 Iterator output(_output, window);
1992
1993 const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().x() : _pool_info.pool_size().width;
1994 const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().height;
1995 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1996 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1997 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1998 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1999 int pool_stride_x = 0;
2000 int pool_stride_y = 0;
2001 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
2002 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
2003 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
2004
2005 execute_window_loop(window, [&](const Coordinates & id)
2006 {
2007 float16_t res = 0.0f;
2008 float16x8_t vres = vdupq_n_f16(0.0f);
2009
2010 if(pooling_type != PoolingType::MAX)
2011 {
2012 // Calculate scale
Michalis Spyrou57dac842018-03-01 16:03:50 +00002013 const float scale = calculate_avg_scale<exclude_padding, DataLayout::NCHW>(id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00002014
2015 // Perform pooling
2016
2017 for(int y = 0; y < pool_size_y; ++y)
2018 {
2019 int x = 0;
2020 for(; x <= (pool_size_x - 8); x += 8)
2021 {
2022 const float16x8_t data = vld1q_f16(reinterpret_cast<const float16_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() +
2023 (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
2024
2025 // Get power of 2 in case of l2 pooling and accumulate
2026 if(pooling_type == PoolingType::L2)
2027 {
2028 vres = vaddq_f16(vres, vmulq_f16(data, data));
2029 }
2030 else
2031 {
2032 vres = vaddq_f16(vres, data);
2033 }
2034 }
2035
2036 // Leftover for loop
2037 for(; x < pool_size_x; ++x)
2038 {
2039 float16_t data = *(reinterpret_cast<const float16_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() + (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
2040
2041 // Get power of 2 in case of l2 pooling
2042 if(pooling_type == PoolingType::L2)
2043 {
2044 data *= data;
2045 }
2046
2047 res += data;
2048 }
2049 }
2050
2051 // Reduction
2052 float16x4_t tmp = vpadd_f16(vget_high_f16(vres), vget_low_f16(vres));
2053 res += vget_lane_f16(tmp, 0);
2054 res += vget_lane_f16(tmp, 1);
2055 res += vget_lane_f16(tmp, 2);
2056 res += vget_lane_f16(tmp, 3);
2057
2058 // Divide by scale
2059 res *= scale;
2060 }
2061 else
2062 {
2063 float16x8_t vres = vdupq_n_f16(std::numeric_limits<float>::lowest());
2064 res = std::numeric_limits<float>::lowest();
2065
2066 for(int y = 0; y < pool_size_y; ++y)
2067 {
2068 int x = 0;
2069 for(; x <= (pool_size_x - 8); x += 8)
2070 {
2071 const float16x8_t data = vld1q_f16(reinterpret_cast<const float16_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() +
2072 (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
2073 vres = vmaxq_f16(vres, data);
2074 }
2075
2076 // Leftover for loop
2077 for(; x < pool_size_x; ++x)
2078 {
2079 const float16_t data = *(reinterpret_cast<const float16_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() + (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
2080 res = std::max(res, data);
2081 }
2082 }
2083
2084 float16x4_t tmp = vpmax_f16(vget_high_f16(vres), vget_low_f16(vres));
2085 res = std::max(res, vget_lane_f16(tmp, 0));
2086 res = std::max(res, vget_lane_f16(tmp, 1));
2087 res = std::max(res, vget_lane_f16(tmp, 2));
2088 res = std::max(res, vget_lane_f16(tmp, 3));
2089 }
2090
2091 // Calculate square-root in case of l2 pooling
2092 if(pooling_type == PoolingType::L2)
2093 {
2094 res = std::sqrt(res);
2095 }
2096
2097 // Store result
2098 *(reinterpret_cast<float16_t *>(output.ptr())) = res;
2099 },
2100 input, output);
2101
2102#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
2103 ARM_COMPUTE_UNUSED(window_input);
2104 ARM_COMPUTE_UNUSED(window);
2105 ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a");
2106#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
2107}
2108
2109template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +00002110void NEPoolingLayerKernel::poolingMxN_f16_nhwc(const Window &window_input, const Window &window)
2111{
2112#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
2113 Iterator input(_input, window_input);
2114 Iterator output(_output, window);
2115
2116 const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().width;
2117 const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().z() : _pool_info.pool_size().height;
2118 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
2119 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
2120 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
2121 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
2122 int pool_stride_x = 0;
2123 int pool_stride_y = 0;
2124 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
2125 const int upper_bound_w = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_right);
2126 const int upper_bound_h = _input->info()->dimension(2) + (exclude_padding ? 0 : pool_pad_bottom);
2127
2128 float16x8_t vres;
2129
2130 execute_window_loop(window, [&](const Coordinates & id)
2131 {
2132 const int idx_width = id.y() * pool_stride_x;
2133 const int idx_height = id.z() * pool_stride_y;
2134 if(pooling_type != PoolingType::MAX)
2135 {
2136 // Calculate scale
2137 const float scale = calculate_avg_scale<exclude_padding, DataLayout::NHWC>(id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
2138 pool_stride_y);
2139 const float16x8_t scale_v = vdupq_n_f16(scale);
2140
2141 // Perform pooling
2142 vres = vdupq_n_f16(0.0f);
2143
2144 for(int y = 0; y < pool_size_y; ++y)
2145 {
2146 if(y + idx_height > window_input.z().end() || y + idx_height - pool_pad_top < window_input.z().start())
2147 {
2148 continue;
2149 }
2150
2151 for(int x = 0; x < pool_size_x; ++x)
2152 {
2153 if(x + idx_width > window_input.y().end() || x + idx_width - pool_pad_left < window_input.y().start())
2154 {
2155 continue;
2156 }
2157
2158 const float16x8_t data = vld1q_f16(reinterpret_cast<const float16_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().y() +
2159 (y - pool_pad_top) * _input->info()->strides_in_bytes().z()));
2160
2161 // Get power of 2 in case of l2 pooling and accumulate
2162 if(pooling_type == PoolingType::L2)
2163 {
2164 vres = vaddq_f16(vres, vmulq_f16(data, data));
2165 }
2166 else
2167 {
2168 vres = vaddq_f16(vres, data);
2169 }
2170 }
2171 }
2172 // Divide by scale
2173 vres = vmulq_f16(vres, scale_v);
2174 }
2175 else
2176 {
2177 vres = vdupq_n_f16(std::numeric_limits<float>::lowest());
2178 for(int y = 0; y < pool_size_y; ++y)
2179 {
2180 if(y + idx_height > window_input.z().end() || y + idx_height - pool_pad_top < window_input.z().start())
2181 {
2182 continue;
2183 }
2184
2185 for(int x = 0; x < pool_size_x; ++x)
2186 {
2187 if(x + idx_width > window_input.y().end() || x + idx_width - pool_pad_left < window_input.y().start())
2188 {
2189 continue;
2190 }
2191
2192 const float16x8_t data = vld1q_f16(reinterpret_cast<const float16_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().y() +
2193 (y - pool_pad_top) * _input->info()->strides_in_bytes().z()));
2194 vres = vmaxq_f16(vres, data);
2195 }
2196 }
2197 }
2198
2199 // Calculate square-root in case of l2 pooling
2200 if(pooling_type == PoolingType::L2)
2201 {
2202 float16x8_t sqrt_reciprocal = vrsqrteq_f16(vres);
2203 vres = vmulq_f16(vres, vmulq_f16(vrsqrtsq_f16(vmulq_f16(vres, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal));
2204 }
2205
2206 // Store result
2207 vst1q_f16(reinterpret_cast<float16_t *>(output.ptr()), vres);
2208 },
2209 input, output);
2210
2211#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
2212 ARM_COMPUTE_UNUSED(window_input);
2213 ARM_COMPUTE_UNUSED(window);
2214 ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a");
2215#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
2216}
2217
2218template <PoolingType pooling_type, bool exclude_padding>
2219void NEPoolingLayerKernel::poolingMxN_f32_nchw(const Window &window_input, const Window &window)
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00002220{
2221 Iterator input(_input, window_input);
2222 Iterator output(_output, window);
2223
2224 const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().x() : _pool_info.pool_size().width;
2225 const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().height;
Michalis Spyroubd0e6122018-01-23 09:52:16 +00002226 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
2227 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
2228 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
2229 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
2230 int pool_stride_x = 0;
2231 int pool_stride_y = 0;
Gian Marco Iodice16824302017-09-28 15:41:37 +01002232 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +00002233 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
2234 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Gian Marco Iodice16824302017-09-28 15:41:37 +01002235
2236 execute_window_loop(window, [&](const Coordinates & id)
2237 {
2238 float res = 0.0f;
2239
2240 if(pooling_type != PoolingType::MAX)
2241 {
2242 // Calculate scale
Michalis Spyrou57dac842018-03-01 16:03:50 +00002243 const float scale = calculate_avg_scale<exclude_padding, DataLayout::NCHW>(id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Gian Marco Iodice16824302017-09-28 15:41:37 +01002244
2245 // Perform pooling
2246 float32x4_t vres = vdupq_n_f32(0.0f);
2247
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00002248 for(int y = 0; y < pool_size_y; ++y)
Gian Marco Iodice16824302017-09-28 15:41:37 +01002249 {
2250 int x = 0;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00002251 for(; x <= (pool_size_x - 4); x += 4)
Gian Marco Iodice16824302017-09-28 15:41:37 +01002252 {
Michalis Spyroubd0e6122018-01-23 09:52:16 +00002253 const float32x4_t data = vld1q_f32(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() +
2254 (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
Gian Marco Iodice16824302017-09-28 15:41:37 +01002255
2256 // Get power of 2 in case of l2 pooling and accumulate
2257 if(pooling_type == PoolingType::L2)
2258 {
2259 vres = vmlaq_f32(vres, data, data);
2260 }
2261 else
2262 {
2263 vres = vaddq_f32(vres, data);
2264 }
2265 }
2266
2267 // Leftover for loop
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00002268 for(; x < pool_size_x; ++x)
Gian Marco Iodice16824302017-09-28 15:41:37 +01002269 {
Michalis Spyroubd0e6122018-01-23 09:52:16 +00002270 float data = *(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() + (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
Gian Marco Iodice16824302017-09-28 15:41:37 +01002271
2272 // Get power of 2 in case of l2 pooling
2273 if(pooling_type == PoolingType::L2)
2274 {
2275 data *= data;
2276 }
2277
2278 res += data;
2279 }
2280 }
2281
2282#if defined(__aarch64__)
2283 // Reduction operation available on 64 bit architectures only
2284 res += vaddvq_f32(vres);
2285#else // __aarch64__
2286 // Reduction
2287 float32x2_t tmp = vpadd_f32(vget_high_f32(vres), vget_low_f32(vres));
2288 tmp = vpadd_f32(tmp, tmp);
2289
2290 res += vget_lane_f32(tmp, 0);
2291#endif // __aarch64__
2292 // Divide by scale
2293 res *= scale;
2294 }
2295 else
2296 {
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00002297 float32x4_t vres = vdupq_n_f32(std::numeric_limits<float>::lowest());
2298 res = std::numeric_limits<float>::lowest();
Gian Marco Iodice16824302017-09-28 15:41:37 +01002299
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00002300 for(int y = 0; y < pool_size_y; ++y)
Gian Marco Iodice16824302017-09-28 15:41:37 +01002301 {
2302 int x = 0;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00002303 for(; x <= (pool_size_x - 4); x += 4)
Gian Marco Iodice16824302017-09-28 15:41:37 +01002304 {
Michalis Spyroubd0e6122018-01-23 09:52:16 +00002305 const float32x4_t data = vld1q_f32(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() +
2306 (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
Gian Marco Iodice16824302017-09-28 15:41:37 +01002307 vres = vmaxq_f32(vres, data);
2308 }
2309
2310 // Leftover for loop
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00002311 for(; x < pool_size_x; ++x)
Gian Marco Iodice16824302017-09-28 15:41:37 +01002312 {
Michalis Spyroubd0e6122018-01-23 09:52:16 +00002313 const float data = *(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() + (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
Gian Marco Iodice16824302017-09-28 15:41:37 +01002314 res = std::max(res, data);
2315 }
2316 }
2317
2318#if defined(__aarch64__)
2319 // Reduction operation available on 64 bit architectures only
2320 res = std::max(vmaxvq_f32(vres), res);
2321#else // __aarch64__
2322 float32x2_t tmp = vpmax_f32(vget_high_f32(vres), vget_low_f32(vres));
2323 tmp = vpmax_f32(tmp, tmp);
2324
2325 res = std::max(res, vget_lane_f32(tmp, 0));
2326#endif // __aarch64__
2327 }
2328
2329 // Calculate square-root in case of l2 pooling
2330 if(pooling_type == PoolingType::L2)
2331 {
2332 res = std::sqrt(res);
2333 }
2334
2335 // Store result
2336 *(reinterpret_cast<float *>(output.ptr())) = res;
2337 },
2338 input, output);
2339}
2340
Georgios Pinitas55186712018-01-08 17:37:12 +00002341template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +00002342void NEPoolingLayerKernel::poolingMxN_f32_nhwc(const Window &window_input, const Window &window)
2343{
2344 Iterator input(_input, window_input);
2345 Iterator output(_output, window);
2346
2347 const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().width;
2348 const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().z() : _pool_info.pool_size().height;
2349 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
2350 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
2351 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
2352 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
2353 int pool_stride_x = 0;
2354 int pool_stride_y = 0;
2355 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
2356 const int upper_bound_w = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_right);
2357 const int upper_bound_h = _input->info()->dimension(2) + (exclude_padding ? 0 : pool_pad_bottom);
2358
2359 float32x4_t vres;
2360
2361 execute_window_loop(window, [&](const Coordinates & id)
2362 {
2363 const int idx_width = id.y() * pool_stride_x;
2364 const int idx_height = id.z() * pool_stride_y;
2365 if(pooling_type != PoolingType::MAX)
2366 {
2367 // Calculate scale
2368 const float scale = calculate_avg_scale<exclude_padding, DataLayout::NHWC>(id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
2369 pool_stride_y);
2370 const float32x4_t scale_v = vdupq_n_f32(scale);
2371
2372 // Perform pooling
2373 vres = vdupq_n_f32(0.0f);
2374
2375 for(int y = 0; y < pool_size_y; ++y)
2376 {
2377 if(y + idx_height > window_input.z().end() || y + idx_height - pool_pad_top < window_input.z().start())
2378 {
2379 continue;
2380 }
2381
2382 for(int x = 0; x < pool_size_x; ++x)
2383 {
2384 if(x + idx_width > window_input.y().end() || x + idx_width - pool_pad_left < window_input.y().start())
2385 {
2386 continue;
2387 }
2388
2389 const float32x4_t data = vld1q_f32(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().y() +
2390 (y - pool_pad_top) * _input->info()->strides_in_bytes().z()));
2391
2392 // Get power of 2 in case of l2 pooling and accumulate
2393 if(pooling_type == PoolingType::L2)
2394 {
2395 vres = vmlaq_f32(vres, data, data);
2396 }
2397 else
2398 {
2399 vres = vaddq_f32(vres, data);
2400 }
2401 }
2402 }
2403 // Divide by scale
2404 vres = vmulq_f32(vres, scale_v);
2405 }
2406 else
2407 {
2408 vres = vdupq_n_f32(std::numeric_limits<float>::lowest());
2409 for(int y = 0; y < pool_size_y; ++y)
2410 {
2411 if(y + idx_height > window_input.z().end() || y + idx_height - pool_pad_top < window_input.z().start())
2412 {
2413 continue;
2414 }
2415
2416 for(int x = 0; x < pool_size_x; ++x)
2417 {
2418 if(x + idx_width > window_input.y().end() || x + idx_width - pool_pad_left < window_input.y().start())
2419 {
2420 continue;
2421 }
2422
2423 const float32x4_t data = vld1q_f32(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().y() +
2424 (y - pool_pad_top) * _input->info()->strides_in_bytes().z()));
2425 vres = vmaxq_f32(vres, data);
2426 }
2427 }
2428 }
2429
2430 // Calculate square-root in case of l2 pooling
2431 if(pooling_type == PoolingType::L2)
2432 {
2433 float32x4_t sqrt_reciprocal = vrsqrteq_f32(vres);
2434 vres = vmulq_f32(vres, vmulq_f32(vrsqrtsq_f32(vmulq_f32(vres, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal));
2435 }
2436
2437 // Store result
2438 vst1q_f32(reinterpret_cast<float *>(output.ptr()), vres);
2439 },
2440 input, output);
2441}
2442
2443template <PoolingType pooling_type, bool exclude_padding>
2444void NEPoolingLayerKernel::poolingMxN_qasymm8_nchw(const Window &window_input, const Window &window)
Georgios Pinitas55186712018-01-08 17:37:12 +00002445{
2446 Iterator input(_input, window_input);
2447 Iterator output(_output, window);
2448
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00002449 const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().x() : _pool_info.pool_size().width;
2450 const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().height;
Michalis Spyroubd0e6122018-01-23 09:52:16 +00002451 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
2452 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
2453 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
2454 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
2455 int pool_stride_x = 0;
2456 int pool_stride_y = 0;
Georgios Pinitas55186712018-01-08 17:37:12 +00002457 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +00002458 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
2459 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Georgios Pinitas55186712018-01-08 17:37:12 +00002460
2461 execute_window_loop(window, [&](const Coordinates & id)
2462 {
2463 uint8_t res = 0;
2464
2465 if(pooling_type != PoolingType::MAX)
2466 {
2467 uint32x4_t vres = vdupq_n_u32(0);
2468 uint32_t sres = 0;
2469
2470 // Calculate scale
Michalis Spyrou57dac842018-03-01 16:03:50 +00002471 const float scale = calculate_avg_scale<exclude_padding, DataLayout::NCHW>(id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Georgios Pinitas55186712018-01-08 17:37:12 +00002472
2473 // Perform pooling
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00002474 for(int y = 0; y < pool_size_y; ++y)
Georgios Pinitas55186712018-01-08 17:37:12 +00002475 {
2476 int x = 0;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00002477 for(; x <= (pool_size_x - 8); x += 8)
Georgios Pinitas55186712018-01-08 17:37:12 +00002478 {
Michalis Spyroubd0e6122018-01-23 09:52:16 +00002479 const uint8x8_t data = vld1_u8(reinterpret_cast<const uint8_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() +
2480 (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
Georgios Pinitas55186712018-01-08 17:37:12 +00002481
2482 const uint16x8_t data_u16 = vmovl_u8(data);
2483 vres = vaddq_u32(vres, vaddl_u16(vget_high_u16(data_u16), vget_low_u16(data_u16)));
2484 }
2485
2486 // Leftover for loop
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00002487 for(; x < pool_size_x; ++x)
Georgios Pinitas55186712018-01-08 17:37:12 +00002488 {
Michalis Spyroubd0e6122018-01-23 09:52:16 +00002489 uint8_t data = *(reinterpret_cast<const uint8_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() + (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
Georgios Pinitas55186712018-01-08 17:37:12 +00002490 sres += data;
2491 }
2492 }
2493
2494 // Reduction
2495 const auto tmp = vpadd_u32(vget_high_u32(vres), vget_low_u32(vres));
2496 sres += vget_lane_u32(tmp, 0) + vget_lane_u32(tmp, 1);
2497
2498 // Divide by scale
2499 res = static_cast<uint8_t>(support::cpp11::round(sres * scale));
2500 }
2501 else
2502 {
2503 uint8x8_t vres = vdup_n_u8(0);
2504 res = 0;
2505
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00002506 for(int y = 0; y < pool_size_y; ++y)
Georgios Pinitas55186712018-01-08 17:37:12 +00002507 {
2508 int x = 0;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00002509 for(; x <= (pool_size_x - 8); x += 8)
Georgios Pinitas55186712018-01-08 17:37:12 +00002510 {
Michalis Spyroubd0e6122018-01-23 09:52:16 +00002511 const uint8x8_t data = vld1_u8(reinterpret_cast<const uint8_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() +
2512 (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
Georgios Pinitas55186712018-01-08 17:37:12 +00002513 vres = vmax_u8(vres, data);
2514 }
2515
2516 // Leftover for loop
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00002517 for(; x < pool_size_x; ++x)
Georgios Pinitas55186712018-01-08 17:37:12 +00002518 {
Michalis Spyroubd0e6122018-01-23 09:52:16 +00002519 const uint8_t data = *(reinterpret_cast<const uint8_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() + (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
Georgios Pinitas55186712018-01-08 17:37:12 +00002520 res = std::max(res, data);
2521 }
2522 }
2523
2524 // Reduce max
2525 vres = vpmax_u8(vres, vres);
2526 vres = vpmax_u8(vres, vres);
2527 vres = vpmax_u8(vres, vres);
2528
2529 // Get max value
2530 res = std::max(res, vget_lane_u8(vres, 0));
2531 }
2532
2533 // Store result
2534 *(reinterpret_cast<uint8_t *>(output.ptr())) = res;
2535 },
2536 input, output);
2537}
2538
Michalis Spyrou57dac842018-03-01 16:03:50 +00002539template <PoolingType pooling_type, bool exclude_padding>
2540void NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc(const Window &window_input, const Window &window)
2541{
2542 Iterator input(_input, window_input);
2543 Iterator output(_output, window);
2544
2545 const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().width;
2546 const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().z() : _pool_info.pool_size().height;
2547 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
2548 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
2549 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
2550 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
2551 int pool_stride_x = 0;
2552 int pool_stride_y = 0;
2553 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
2554 const int upper_bound_w = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_right);
2555 const int upper_bound_h = _input->info()->dimension(2) + (exclude_padding ? 0 : pool_pad_bottom);
2556
2557 execute_window_loop(window, [&](const Coordinates & id)
2558 {
2559 const int idx_width = id.y() * pool_stride_x;
2560 const int idx_height = id.z() * pool_stride_y;
2561 if(pooling_type != PoolingType::MAX)
2562 {
2563 uint32x4_t vres1 = vdupq_n_u32(0);
2564 uint32x4_t vres2 = vdupq_n_u32(0);
2565
2566 // Calculate scale
2567 const float scale = calculate_avg_scale<exclude_padding, DataLayout::NHWC>(id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
2568 pool_stride_y);
2569 const float32x4_t scale_v = vdupq_n_f32(scale);
2570
2571 // Perform pooling
2572 for(int y = 0; y < pool_size_y; ++y)
2573 {
2574 if(y + idx_height > window_input.z().end() || y + idx_height - pool_pad_top < window_input.z().start())
2575 {
2576 continue;
2577 }
2578
2579 for(int x = 0; x < pool_size_x; ++x)
2580 {
2581 if(x + idx_width > window_input.y().end() || x + idx_width - pool_pad_left < window_input.y().start())
2582 {
2583 continue;
2584 }
2585
2586 const uint8x8_t data = vld1_u8(reinterpret_cast<const uint8_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().y() +
2587 (y - pool_pad_top) * _input->info()->strides_in_bytes().z()));
2588
2589 const uint16x8_t data_u16 = vmovl_u8(data);
2590 vres1 = vaddq_u32(vres1, vmovl_u16(vget_low_u16(data_u16)));
2591 vres2 = vaddq_u32(vres2, vmovl_u16(vget_high_u16(data_u16)));
2592 }
2593 }
2594 // Divide by scale
2595 vres1 = vcvtq_u32_f32(vmulq_f32(vcvtq_f32_u32(vres1), scale_v));
2596 vres2 = vcvtq_u32_f32(vmulq_f32(vcvtq_f32_u32(vres2), scale_v));
2597
2598 uint8x8_t res = vmovn_u16(vcombine_u16(vmovn_u32(vres1), vmovn_u32(vres2)));
2599
2600 // Store result
2601 vst1_u8(output.ptr(), res);
2602 }
2603 else
2604 {
2605 uint8x8_t vres = vdup_n_u8(0);
2606
2607 for(int y = 0; y < pool_size_y; ++y)
2608 {
2609 if(y + idx_height > window_input.z().end() || y + idx_height - pool_pad_top < window_input.z().start())
2610 {
2611 continue;
2612 }
2613
2614 for(int x = 0; x < pool_size_x; ++x)
2615 {
2616 if(x + idx_width > window_input.y().end() || x + idx_width - pool_pad_left < window_input.y().start())
2617 {
2618 continue;
2619 }
2620
2621 const uint8x8_t data = vld1_u8(reinterpret_cast<const uint8_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().y() +
2622 (y - pool_pad_top) * _input->info()->strides_in_bytes().z()));
2623 vres = vmax_u8(vres, data);
2624 }
2625 }
2626
2627 // Store result
2628 vst1_u8(output.ptr(), vres);
2629 }
2630 },
2631 input, output);
2632}
2633
Michalis Spyrouafa5d812017-11-30 14:25:57 +00002634Status NEPoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
2635{
2636 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
2637
2638 unsigned int pooled_w = 0;
2639 unsigned int pooled_h = 0;
2640 unsigned int num_elems_processed_per_iteration = 0;
2641 BorderSize border_size(0);
2642
Michalis Spyrou57dac842018-03-01 16:03:50 +00002643 const bool is_global_pooling = pool_info.is_global_pooling();
2644 unsigned int pool_size_x = 0;
2645 unsigned int pool_size_y = 0;
2646
2647 // Get data layout
2648 const DataLayout data_layout = input->data_layout();
2649 const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
2650 const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
2651
2652 pool_size_x = is_global_pooling ? input->dimension(idx_width) : pool_info.pool_size().width;
2653 pool_size_y = is_global_pooling ? input->dimension(idx_height) : pool_info.pool_size().height;
Michalis Spyrouafa5d812017-11-30 14:25:57 +00002654
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00002655 // Validate pool info before calling scaled_dimensions
2656 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_pool_info(pool_size_x, pool_size_y));
Michalis Spyrouafa5d812017-11-30 14:25:57 +00002657
2658 // Check output dimensions
Michalis Spyrou57dac842018-03-01 16:03:50 +00002659 std::tie(pooled_w, pooled_h) = scaled_dimensions(input->dimension(idx_width),
2660 input->dimension(idx_height),
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00002661 pool_size_x,
2662 pool_size_y,
Michalis Spyrouafa5d812017-11-30 14:25:57 +00002663 pool_info.pad_stride_info());
2664
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00002665 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, pool_info, pooled_w, pooled_h, pool_size_x));
2666 ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), pool_info, num_elems_processed_per_iteration, border_size, pooled_w, pooled_h,
2667 pool_size_x, pool_size_y)
2668 .first);
Michalis Spyrouafa5d812017-11-30 14:25:57 +00002669
2670 return Status{};
2671}
2672
Moritz Pflanzerc186b572017-09-07 09:48:04 +01002673void NEPoolingLayerKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01002674{
Moritz Pflanzerc186b572017-09-07 09:48:04 +01002675 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01002676 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
2677 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
2678 ARM_COMPUTE_ERROR_ON(_func == nullptr);
2679
Pablo Tello0c34fe22017-06-26 17:17:42 +01002680 const unsigned int pool_stride_x = _pool_info.pad_stride_info().stride().first;
2681 const unsigned int pool_stride_y = _pool_info.pad_stride_info().stride().second;
Isabella Gottardi6e464c32018-01-26 12:32:45 +00002682 const unsigned int pool_size = _pool_info.pool_size().width;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01002683
Michalis Spyrou57dac842018-03-01 16:03:50 +00002684 Window window_input(window);
2685 if(_input->info()->data_layout() == DataLayout::NCHW)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01002686 {
Michalis Spyrou57dac842018-03-01 16:03:50 +00002687 // Set step for input in x and y direction for the input
2688 unsigned int window_x_inc = 0;
2689 switch(_input->info()->data_type())
Pablo Tello0c34fe22017-06-26 17:17:42 +01002690 {
Michalis Spyrou57dac842018-03-01 16:03:50 +00002691 case DataType::QS8:
2692 case DataType::QS16:
2693 case DataType::F16:
Georgios Pinitas55186712018-01-08 17:37:12 +00002694 {
2695 window_x_inc = (pool_stride_x == 2) ? _num_elems_processed_per_iteration * 2 : _num_elems_processed_per_iteration;
Michalis Spyrou57dac842018-03-01 16:03:50 +00002696 break;
Georgios Pinitas55186712018-01-08 17:37:12 +00002697 }
Michalis Spyrou57dac842018-03-01 16:03:50 +00002698 case DataType::QASYMM8:
2699 {
2700 window_x_inc = pool_stride_x;
2701 if((pool_size == 2 || pool_size == 3) && pool_stride_x < 3)
2702 {
2703 window_x_inc = (pool_stride_x == 2) ? _num_elems_processed_per_iteration * 2 : _num_elems_processed_per_iteration;
2704 }
2705 break;
2706 }
2707 case DataType::F32:
2708 {
2709 window_x_inc = pool_stride_x;
2710 break;
2711 }
2712 default:
2713 {
2714 ARM_COMPUTE_ERROR("Not supported");
2715 }
Georgios Pinitas55186712018-01-08 17:37:12 +00002716 }
Michalis Spyrou57dac842018-03-01 16:03:50 +00002717 window_input.set(Window::DimX, Window::Dimension(window.x().start() * pool_stride_x, window.x().end() * pool_stride_x, window_x_inc));
2718 window_input.set(Window::DimY, Window::Dimension(window.y().start() * pool_stride_y, window.y().end() * pool_stride_y, pool_stride_y));
Anthony Barbier6ff3b192017-09-04 18:44:23 +01002719 }
Michalis Spyrou57dac842018-03-01 16:03:50 +00002720 else
2721 {
2722 window_input.set(Window::DimX, Window::Dimension(0, _input->info()->dimension(0), _num_elems_processed_per_iteration));
2723 window_input.set(Window::DimY, Window::Dimension(0, _input->info()->dimension(1), pool_stride_x));
2724 window_input.set(Window::DimZ, Window::Dimension(0, _input->info()->dimension(2), pool_stride_y));
2725 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +01002726
2727 // Run function
2728 (this->*_func)(window_input, window);
2729}