blob: fdd3410b4095824766860b0c9edaf35a4d106505 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Georgios Pinitas55186712018-01-08 17:37:12 +00002 * Copyright (c) 2017-2018 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h"
25
26#include "arm_compute/core/AccessWindowStatic.h"
Anthony Barbiereaefd002018-07-20 17:49:35 +010027#include "arm_compute/core/CPP/Validate.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010028#include "arm_compute/core/Error.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010029#include "arm_compute/core/Helpers.h"
30#include "arm_compute/core/ITensor.h"
Georgios Pinitas55186712018-01-08 17:37:12 +000031#include "arm_compute/core/NEON/NEAsymm.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010032#include "arm_compute/core/NEON/NEFixedPoint.h"
Georgios Pinitascdf51452017-08-31 14:21:36 +010033#include "arm_compute/core/NEON/NEMath.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010034#include "arm_compute/core/TensorInfo.h"
35#include "arm_compute/core/Utils.h"
36#include "arm_compute/core/Validate.h"
37#include "arm_compute/core/Window.h"
Giorgio Arena9fb6c7e2018-08-22 12:15:25 +010038#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010039
Georgios Pinitas55186712018-01-08 17:37:12 +000040#include "support/ToolchainSupport.h"
41
Anthony Barbier6ff3b192017-09-04 18:44:23 +010042#include <algorithm>
43#include <arm_neon.h>
Georgios Pinitascdf51452017-08-31 14:21:36 +010044#include <cmath>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010045#include <limits>
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +010046#include <set>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010047#include <string>
48#include <tuple>
49
50using namespace arm_compute;
Giorgio Arena9fb6c7e2018-08-22 12:15:25 +010051using namespace misc::shape_calculator;
Anthony Barbier6ff3b192017-09-04 18:44:23 +010052
53namespace
54{
Michalis Spyrou57dac842018-03-01 16:03:50 +000055template <bool exclude_padding, DataLayout data_layout>
Isabella Gottardi7567f5f2018-01-30 15:26:00 +000056inline float calculate_avg_scale(const Coordinates &id, const int pool_size_x, const int pool_size_y, const int upper_bound_w, const int upper_bound_h,
Anthony Barbier6ff3b192017-09-04 18:44:23 +010057 const int pad_x, const int pad_y, const int stride_x, const int stride_y)
58{
Michalis Spyrou57dac842018-03-01 16:03:50 +000059 const unsigned int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
60 const unsigned int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
61
62 int start_x = id[idx_width] * stride_x - pad_x;
63 int start_y = id[idx_height] * stride_y - pad_y;
64
65 const int end_x = std::min(start_x + pool_size_x, upper_bound_w);
66 const int end_y = std::min(start_y + pool_size_y, upper_bound_h);
Georgios Pinitasadaae7e2017-10-30 15:56:32 +000067 if(exclude_padding)
68 {
69 start_x = std::max(0, start_x);
70 start_y = std::max(0, start_y);
71 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +010072 return 1.f / ((end_y - start_y) * (end_x - start_x));
73}
74
Georgios Pinitas55186712018-01-08 17:37:12 +000075template <bool exclude_padding>
76inline void scale_vector_s16x8(uint16x8_t &v, const Coordinates &id, int id_offset, int step,
77 const int pool_size, const int upper_bound_w, const int upper_bound_h,
78 const int pad_x, const int pad_y, const int stride_x, const int stride_y)
79{
80 int start_x = (id.x() + id_offset) * stride_x - pad_x;
81 int start_y = id.y() * stride_y - pad_y;
82 const int end_y = std::min(start_y + pool_size, upper_bound_h);
83 if(exclude_padding)
84 {
85 start_y = std::max(0, start_y);
86 }
87
88 std::array<uint16_t, 8> elems =
89 {
90 {
91 vgetq_lane_u16(v, 0),
92 vgetq_lane_u16(v, 1),
93 vgetq_lane_u16(v, 2),
94 vgetq_lane_u16(v, 3),
95 vgetq_lane_u16(v, 4),
96 vgetq_lane_u16(v, 5),
97 vgetq_lane_u16(v, 6),
98 vgetq_lane_u16(v, 7),
99 }
100 };
101
102 for(auto &el : elems)
103 {
104 int c_start_x = start_x;
105 const int end_x = std::min(c_start_x + pool_size, upper_bound_w);
106 if(exclude_padding)
107 {
108 c_start_x = std::max(0, c_start_x);
109 }
110 float scale = 1.f / ((end_y - start_y) * (end_x - c_start_x));
111 el *= scale;
112 start_x += step * stride_x;
113 }
114
115 v = vsetq_lane_u16(elems[0], v, 0);
116 v = vsetq_lane_u16(elems[1], v, 1);
117 v = vsetq_lane_u16(elems[2], v, 2);
118 v = vsetq_lane_u16(elems[3], v, 3);
119 v = vsetq_lane_u16(elems[4], v, 4);
120 v = vsetq_lane_u16(elems[5], v, 5);
121 v = vsetq_lane_u16(elems[6], v, 6);
122 v = vsetq_lane_u16(elems[7], v, 7);
123}
124
Georgios Pinitas13d96e02018-08-23 11:20:23 +0100125Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, unsigned int &pooled_w, unsigned int pooled_h)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100126{
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000127 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100128
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000129 int pool_stride_x = 0;
130 int pool_stride_y = 0;
131 PoolingType pool_type = pool_info.pool_type();
132 const PadStrideInfo pad_stride_info = pool_info.pad_stride_info();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100133 std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100134
Anthony Barbiereaefd002018-07-20 17:49:35 +0100135 ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
Vidhya Sudhan Loganathan7485d5a2018-07-04 09:34:00 +0100136 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
Georgios Pinitas55186712018-01-08 17:37:12 +0000137 ARM_COMPUTE_RETURN_ERROR_ON(pool_type == PoolingType::L2 && is_data_type_quantized(input->data_type()));
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000138
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000139 if(output->total_size() != 0)
Georgios Pinitas1dad50e2017-07-03 17:51:34 +0100140 {
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000141 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
Michalis Spyrou57dac842018-03-01 16:03:50 +0000142 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
143 ARM_COMPUTE_RETURN_ERROR_ON((output->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH)) != pooled_w)
144 || (output->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT)) != pooled_h));
Georgios Pinitas1dad50e2017-07-03 17:51:34 +0100145 }
146
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000147 return Status{};
148}
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100149
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000150Status validate_arguments_pool_info(const unsigned int pool_size_x, const unsigned int pool_size_y)
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000151{
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000152 ARM_COMPUTE_RETURN_ERROR_ON(pool_size_x == 0);
153 ARM_COMPUTE_RETURN_ERROR_ON(pool_size_y == 0);
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000154
155 return Status{};
156}
157
158std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const PoolingLayerInfo &pool_info, unsigned int &num_elems_processed_per_iteration,
159 BorderSize &border_size,
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000160 unsigned int pooled_w, unsigned int pooled_h, int pool_size_x, int pool_size_y)
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000161{
Giorgio Arena9fb6c7e2018-08-22 12:15:25 +0100162 // Output auto inizialitation if not yet initialized
163 auto_init_if_empty(*output, input->clone()->set_tensor_shape(compute_pool_shape(*input, pool_info)));
164
Michalis Spyrou57dac842018-03-01 16:03:50 +0000165 DataLayout data_layout = input->data_layout();
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000166 unsigned int num_elems_read_per_iteration = 0;
167 unsigned int num_elems_horizontal_window = 0;
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000168 int pool_stride_x = 0;
169 int pool_stride_y = 0;
Michalis Spyrou57dac842018-03-01 16:03:50 +0000170 const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
171 const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
172 const int input_width = input->dimension(idx_width);
173 const int input_height = input->dimension(idx_height);
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000174 const PadStrideInfo pad_stride_info = pool_info.pad_stride_info();
175 std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000176 const int pool_pad_right = pad_stride_info.pad_right();
177 const int pool_pad_top = pad_stride_info.pad_top();
178 const int pool_pad_left = pad_stride_info.pad_left();
179 const int pool_pad_bottom = pad_stride_info.pad_bottom();
180 const bool is_square = pool_size_x == pool_size_y;
Michalis Spyrou57dac842018-03-01 16:03:50 +0000181
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000182 // Check output dimensions
Michalis Spyrou57dac842018-03-01 16:03:50 +0000183 std::tie(pooled_w, pooled_h) = scaled_dimensions(input->dimension(idx_width),
184 input->dimension(idx_height),
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000185 pool_size_x,
186 pool_size_y,
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000187 pad_stride_info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100188
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000189 //If it's not squared and optimized will be executed the MxN
190 num_elems_read_per_iteration = 1;
191 num_elems_processed_per_iteration = 1;
192 num_elems_horizontal_window = 1;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100193
Michalis Spyrou57dac842018-03-01 16:03:50 +0000194 const bool is_nhwc = data_layout == DataLayout::NHWC;
195
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000196 if(is_square)
197 {
198 switch(input->data_type())
199 {
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000200 case DataType::QASYMM8:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000201 if(is_nhwc)
202 {
Michalis Spyrouced25572018-10-01 16:26:20 +0100203 num_elems_processed_per_iteration = 16;
Michalis Spyrou57dac842018-03-01 16:03:50 +0000204 break;
205 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000206 switch(pool_size_x)
207 {
208 case 2:
209 num_elems_read_per_iteration = 16;
210 num_elems_processed_per_iteration = (pool_stride_x == 2) ? 8 : 15;
211 num_elems_horizontal_window = (pool_stride_x == 2) ? 8 : 16;
212 break;
213 case 3:
214 num_elems_read_per_iteration = 16;
215 num_elems_processed_per_iteration = (pool_stride_x == 2) ? 7 : 14;
216 num_elems_horizontal_window = (pool_stride_x == 2) ? 8 : 16;
217 break;
218 default:
219 break;
220 }
221 break;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000222#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
223 case DataType::F16:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000224 if(is_nhwc)
225 {
226 num_elems_processed_per_iteration = 8;
227 break;
228 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000229 switch(pool_size_x)
230 {
231 case 2:
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000232 case 3:
233 num_elems_read_per_iteration = 4;
234 num_elems_processed_per_iteration = 1;
235 num_elems_horizontal_window = 1;
236 break;
237 default:
238 break;
239 }
240 break;
241#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
242 case DataType::F32:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000243 if(is_nhwc)
244 {
Georgios Pinitas64f1a902018-09-18 13:42:51 +0100245 num_elems_processed_per_iteration = 4;
Michalis Spyrou57dac842018-03-01 16:03:50 +0000246 break;
247 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000248 switch(pool_size_x)
249 {
250 case 2:
251 num_elems_read_per_iteration = 2;
252 break;
253 case 3:
254 num_elems_read_per_iteration = 4; // We use vload4 for pooling3
255 break;
256 case 7:
257 num_elems_read_per_iteration = 8; // We use vload8 for pooling7
258 break;
259 default:
260 break;
261 }
262 num_elems_processed_per_iteration = 1;
263 num_elems_horizontal_window = 1;
264 break;
265 default:
266 ARM_COMPUTE_ERROR("Element size not supported");
267 break;
268 }
269 }
Michalis Spyrou57dac842018-03-01 16:03:50 +0000270 else
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000271 {
Michalis Spyrou57dac842018-03-01 16:03:50 +0000272 if(is_nhwc)
273 {
Michalis Spyrouced25572018-10-01 16:26:20 +0100274 num_elems_processed_per_iteration = 16 / input->element_size();
Michalis Spyrou57dac842018-03-01 16:03:50 +0000275 }
276 }
277
278 bool window_changed = false;
279 Window win{};
280 if(data_layout == DataLayout::NCHW)
281 {
282 // Number of iterations in X dimension
283 const int num_iterations_x = (pooled_w + num_elems_processed_per_iteration - 1) / num_elems_processed_per_iteration;
284
285 // Upper limit for the number of right/bottom border elements that are accessed
286 const int upper_bound_w = ((num_iterations_x - 1) * num_elems_processed_per_iteration * pool_stride_x - pool_pad_left + num_elems_read_per_iteration) - input_width;
287 const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_top + pool_size_y) - input_height;
288
289 border_size = BorderSize(pool_pad_top, pool_pad_right, pool_pad_bottom, pool_pad_left);
290 border_size.right = std::max(upper_bound_w, pool_pad_right);
291 border_size.bottom = std::max(upper_bound_h, pool_pad_bottom);
292
293 TensorShape output_shape{ input->tensor_shape() };
294 output_shape.set(0, pooled_w);
295 output_shape.set(1, pooled_h);
296 TensorInfo output_info(input->clone()->set_tensor_shape(output_shape));
297
298 win = calculate_max_window(output_info, Steps(num_elems_processed_per_iteration));
299 AccessWindowStatic input_access(input, -pool_pad_left, -pool_pad_top, input_width + border_size.right, input_height + border_size.bottom);
300
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000301 AccessWindowHorizontal output_access(output, 0, num_elems_horizontal_window);
302 window_changed = update_window_and_padding(win, input_access, output_access);
303 output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
304 }
305 else
306 {
Michalis Spyrou57dac842018-03-01 16:03:50 +0000307 TensorShape output_shape{ input->tensor_shape() };
308 output_shape.set(1, pooled_w);
309 output_shape.set(2, pooled_h);
310 TensorInfo output_info(input->clone()->set_tensor_shape(output_shape));
311
312 win = calculate_max_window(output_info, Steps(num_elems_processed_per_iteration));
313 AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
314
315 AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
316 window_changed = update_window_and_padding(win, input_access, output_access);
317 output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000318 }
319
320 Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
321 return std::make_pair(err, win);
322}
323} // namespace
324
325NEPoolingLayerKernel::NEPoolingLayerKernel()
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000326 : _func(nullptr), _input(nullptr), _output(nullptr), _pool_info(), _num_elems_processed_per_iteration(0), _border_size(0), _is_square(false)
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000327{
328}
329
330BorderSize NEPoolingLayerKernel::border_size() const
331{
332 return _border_size;
333}
334
335void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info)
336{
337 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
338
Diego Lopez Recas61ef5bf2017-12-11 12:36:55 +0000339 const PoolingType pool_type = pool_info.pool_type();
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000340 const PadStrideInfo pad_stride_info = pool_info.pad_stride_info();
341 const bool exclude_padding = pool_info.exclude_padding();
342 const bool is_global_pooling = pool_info.is_global_pooling();
Diego Lopez Recas61ef5bf2017-12-11 12:36:55 +0000343 const int pool_stride_x = pad_stride_info.stride().first;
Michalis Spyrou57dac842018-03-01 16:03:50 +0000344 unsigned int pool_size_x = 0;
345 unsigned int pool_size_y = 0;
346
347 // Get data layout
348 const DataLayout data_layout = input->info()->data_layout();
349 const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
350 const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000351
352 // Update pool size in case of global pooling
Michalis Spyrou57dac842018-03-01 16:03:50 +0000353 pool_size_x = is_global_pooling ? input->info()->dimension(idx_width) : pool_info.pool_size().width;
354 pool_size_y = is_global_pooling ? input->info()->dimension(idx_height) : pool_info.pool_size().height;
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000355
356 // Validate pool info before calling scaled_dimensions
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000357 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_pool_info(pool_size_x, pool_size_y));
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000358
359 // Check output dimensions
Diego Lopez Recas61ef5bf2017-12-11 12:36:55 +0000360 unsigned int pooled_w, pooled_h;
Michalis Spyrou57dac842018-03-01 16:03:50 +0000361 std::tie(pooled_w, pooled_h) = scaled_dimensions(input->info()->dimension(idx_width),
362 input->info()->dimension(idx_height),
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000363 pool_size_x,
364 pool_size_y,
Diego Lopez Recas61ef5bf2017-12-11 12:36:55 +0000365 pad_stride_info);
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000366
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000367 // Perform validation step
Georgios Pinitas13d96e02018-08-23 11:20:23 +0100368 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pool_info, pooled_w, pooled_h));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100369
370 // Set instance variables
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000371 _input = input;
372 _output = output;
373 _pool_info = pool_info;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000374 _is_square = (pool_size_x == pool_size_y);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100375
Georgios Pinitas55186712018-01-08 17:37:12 +0000376 // Get data type
377 const DataType data_type = input->info()->data_type();
Michalis Spyrou57dac842018-03-01 16:03:50 +0000378 const bool is_nchw = data_layout == DataLayout::NCHW;
Georgios Pinitas55186712018-01-08 17:37:12 +0000379
Vidhya Sudhan Loganathan7485d5a2018-07-04 09:34:00 +0100380 if(data_type == DataType::QASYMM8)
Georgios Pinitas55186712018-01-08 17:37:12 +0000381 {
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000382 if(pool_size_x == 2 && pool_stride_x < 3 && _is_square)
Georgios Pinitas55186712018-01-08 17:37:12 +0000383 {
384 switch(pool_type)
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100385 {
Georgios Pinitas55186712018-01-08 17:37:12 +0000386 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000387 if(is_nchw)
388 {
389 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling2_qasymm8_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling2_qasymm8_nchw<PoolingType::AVG, false>;
390 }
391 else
392 {
393 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc<PoolingType::AVG, false>;
394 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000395 break;
396 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000397 if(is_nchw)
398 {
399 _func = &NEPoolingLayerKernel::pooling2_qasymm8_nchw<PoolingType::MAX>;
400 }
401 else
402 {
403 _func = &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc<PoolingType::MAX>;
404 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000405 break;
406 default:
407 ARM_COMPUTE_ERROR("Unsupported pooling type!");
408 }
409 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000410 else if(pool_size_x == 3 && pool_stride_x < 3 && _is_square)
Georgios Pinitas55186712018-01-08 17:37:12 +0000411 {
412 switch(pool_type)
413 {
414 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000415 if(is_nchw)
416 {
417 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_qasymm8_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling3_qasymm8_nchw<PoolingType::AVG, false>;
418 }
419 else
420 {
421 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc<PoolingType::AVG, false>;
422 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000423 break;
424 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000425 if(is_nchw)
426 {
427 _func = &NEPoolingLayerKernel::pooling3_qasymm8_nchw<PoolingType::MAX>;
428 }
429 else
430 {
431 _func = &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc<PoolingType::MAX>;
432 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000433 break;
434 default:
435 ARM_COMPUTE_ERROR("Unsupported pooling type!");
436 }
437 }
438 else
439 {
440 switch(pool_type)
441 {
442 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000443 if(is_nchw)
444 {
445 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_qasymm8_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_qasymm8_nchw<PoolingType::AVG, false>;
446 }
447 else
448 {
449 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc<PoolingType::AVG, false>;
450 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000451 break;
452 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000453 if(is_nchw)
454 {
455 _func = &NEPoolingLayerKernel::poolingMxN_qasymm8_nchw<PoolingType::MAX>;
456 }
457 else
458 {
459 _func = &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc<PoolingType::MAX>;
460 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000461 break;
462 default:
463 ARM_COMPUTE_ERROR("Unsupported pooling type!");
464 }
465 }
466 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000467 else if(data_type == DataType::F16)
468 {
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000469 if(_is_square)
Georgios Pinitas55186712018-01-08 17:37:12 +0000470 {
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000471 switch(pool_size_x)
472 {
473 case 2:
474 switch(pool_type)
475 {
476 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000477 if(is_nchw)
478 {
479 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling2_f16_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling2_f16_nchw<PoolingType::AVG, false>;
480 }
481 else
482 {
483 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::AVG, false>;
484 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000485 break;
486 case PoolingType::L2:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000487 if(is_nchw)
488 {
489 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling2_f16_nchw<PoolingType::L2, true> : &NEPoolingLayerKernel::pooling2_f16_nchw<PoolingType::L2, false>;
490 }
491 else
492 {
493 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::L2, false>;
494 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000495 break;
496 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000497 if(is_nchw)
498 {
499 _func = &NEPoolingLayerKernel::pooling2_f16_nchw<PoolingType::MAX, false>;
500 }
501 else
502 {
503 _func = &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::MAX, false>;
504 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000505 break;
506 default:
507 ARM_COMPUTE_ERROR("Unsupported pooling type!");
508 }
509 break;
510 case 3:
511 switch(pool_type)
512 {
513 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000514 if(is_nchw)
515 {
516 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_f16_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling3_f16_nchw<PoolingType::AVG, false>;
517 }
518 else
519 {
520 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::AVG, false>;
521 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000522 break;
523 case PoolingType::L2:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000524 if(is_nchw)
525 {
526 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_f16_nchw<PoolingType::L2, true> : &NEPoolingLayerKernel::pooling3_f16_nchw<PoolingType::L2, false>;
527 }
528 else
529 {
530 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::L2, false>;
531 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000532 break;
533 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000534 if(is_nchw)
535 {
536 _func = &NEPoolingLayerKernel::pooling3_f16_nchw<PoolingType::MAX, false>;
537 }
538 else
539 {
540 _func = &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::MAX, false>;
541 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000542 break;
543 default:
544 ARM_COMPUTE_ERROR("Unsupported pooling type!");
545 }
546 break;
547 default:
548 switch(pool_type)
549 {
550 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000551 if(is_nchw)
552 {
553 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::AVG, false>;
554 }
555 else
556 {
557 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::AVG, false>;
558 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000559 break;
560 case PoolingType::L2:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000561 if(is_nchw)
562 {
563 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::L2, false>;
564 }
565 else
566 {
567 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::L2, false>;
568 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000569 break;
570 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000571 if(is_nchw)
572 {
573 _func = &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::MAX, false>;
574 }
575 else
576 {
577 _func = &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::MAX, false>;
578 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000579 break;
580 default:
581 ARM_COMPUTE_ERROR("Unsupported pooling type!");
582 }
583 break;
584 }
585 }
586 else
587 {
588 switch(pool_type)
589 {
590 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000591 if(is_nchw)
592 {
593 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::AVG, false>;
594 }
595 else
596 {
597 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::AVG, false>;
598 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000599 break;
600 case PoolingType::L2:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000601 if(is_nchw)
602 {
603 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::L2, false>;
604 }
605 else
606 {
607 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::L2, false>;
608 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000609 break;
610 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000611 if(is_nchw)
612 {
613 _func = &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::MAX, false>;
614 }
615 else
616 {
617 _func = &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::MAX, false>;
618 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000619 break;
620 default:
621 ARM_COMPUTE_ERROR("Unsupported pooling type!");
622 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000623 }
624 }
625 else if(data_type == DataType::F32)
626 {
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000627 if(_is_square)
Georgios Pinitas55186712018-01-08 17:37:12 +0000628 {
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000629 switch(pool_size_x)
630 {
631 case 2:
632 switch(pool_type)
633 {
634 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000635 if(is_nchw)
636 {
637 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling2_f32_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling2_f32_nchw<PoolingType::AVG, false>;
638 }
639 else
640 {
641 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, false>;
642 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000643 break;
644 case PoolingType::L2:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000645 if(is_nchw)
646 {
647 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling2_f32_nchw<PoolingType::L2, true> : &NEPoolingLayerKernel::pooling2_f32_nchw<PoolingType::L2, false>;
648 }
649 else
650 {
651 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, false>;
652 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000653 break;
654 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000655 if(is_nchw)
656 {
657 _func = &NEPoolingLayerKernel::pooling2_f32_nchw<PoolingType::MAX, false>;
658 }
659 else
660 {
661 _func = &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::MAX, false>;
662 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000663 break;
664 default:
665 ARM_COMPUTE_ERROR("Unsupported pooling type!");
666 }
667 break;
668 case 3:
669 switch(pool_type)
670 {
671 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000672 if(is_nchw)
673 {
674 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_f32_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling3_f32_nchw<PoolingType::AVG, false>;
675 }
676 else
677 {
678 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, false>;
679 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000680 break;
681 case PoolingType::L2:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000682 if(is_nchw)
683 {
684 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_f32_nchw<PoolingType::L2, true> : &NEPoolingLayerKernel::pooling3_f32_nchw<PoolingType::L2, false>;
685 }
686 else
687 {
688 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, false>;
689 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000690 break;
691 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000692 if(is_nchw)
693 {
694 _func = &NEPoolingLayerKernel::pooling3_f32_nchw<PoolingType::MAX, false>;
695 }
696 else
697 {
698 _func = &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::MAX, false>;
699 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000700 break;
701 default:
702 ARM_COMPUTE_ERROR("Unsupported pooling type!");
703 }
704 break;
705 case 7:
706 switch(pool_type)
707 {
708 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000709 if(is_nchw)
710 {
711 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling7_f32_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling7_f32_nchw<PoolingType::AVG, false>;
712 }
713 else
714 {
715 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, false>;
716 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000717 break;
718 case PoolingType::L2:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000719 if(is_nchw)
720 {
721 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling7_f32_nchw<PoolingType::L2, true> : &NEPoolingLayerKernel::pooling7_f32_nchw<PoolingType::L2, false>;
722 }
723 else
724 {
725 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, false>;
726 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000727 break;
728 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000729 if(is_nchw)
730 {
731 _func = &NEPoolingLayerKernel::pooling7_f32_nchw<PoolingType::MAX, false>;
732 }
733 else
734 {
735 _func = &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::MAX, false>;
736 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000737 break;
738 default:
739 ARM_COMPUTE_ERROR("Unsupported pooling type!");
740 }
741 break;
742 default:
743 switch(pool_type)
744 {
745 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000746 if(is_nchw)
747 {
748 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::AVG, false>;
749 }
750 else
751 {
752 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, false>;
753 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000754 break;
755 case PoolingType::L2:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000756 if(is_nchw)
757 {
758 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::L2, false>;
759 }
760 else
761 {
762 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, false>;
763 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000764 break;
765 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000766 if(is_nchw)
767 {
768 _func = &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::MAX, false>;
769 }
770 else
771 {
772 _func = &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::MAX, false>;
773 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000774 break;
775 default:
776 ARM_COMPUTE_ERROR("Unsupported pooling type!");
777 }
778 break;
779 }
780 }
781 else
782 {
783 switch(pool_type)
784 {
785 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000786 if(is_nchw)
787 {
788 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::AVG, false>;
789 }
790 else
791 {
792 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, false>;
793 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000794 break;
795 case PoolingType::L2:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000796 if(is_nchw)
797 {
798 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::L2, false>;
799 }
800 else
801 {
802 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, false>;
803 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000804 break;
805 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000806 if(is_nchw)
807 {
808 _func = &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::MAX, false>;
809 }
810 else
811 {
812 _func = &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::MAX, false>;
813 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000814 break;
815 default:
816 ARM_COMPUTE_ERROR("Unsupported pooling type!");
817 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000818 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100819 }
820
821 // Configure kernel window
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000822 auto win_config = validate_and_configure_window(input->info(), output->info(), pool_info, _num_elems_processed_per_iteration, _border_size, pooled_w, pooled_h, pool_size_x, pool_size_y);
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000823 ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
824 INEKernel::configure(win_config.second);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100825}
826
Georgios Pinitas55186712018-01-08 17:37:12 +0000827template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +0000828void NEPoolingLayerKernel::pooling2_qasymm8_nchw(const Window &window_input, const Window &window)
Georgios Pinitas55186712018-01-08 17:37:12 +0000829{
830 Iterator input(_input, window_input);
831 Iterator output(_output, window);
832
Michalis Spyroubd0e6122018-01-23 09:52:16 +0000833 constexpr int pool_size = 2;
834 int pool_stride_x = 0;
835 int pool_stride_y = 0;
836 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
837 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
838 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
839 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
Georgios Pinitas55186712018-01-08 17:37:12 +0000840 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +0000841 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
842 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Georgios Pinitas55186712018-01-08 17:37:12 +0000843
Michalis Spyroubd0e6122018-01-23 09:52:16 +0000844 const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
845 const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
Georgios Pinitas55186712018-01-08 17:37:12 +0000846
847 const int scale_step_x = (pool_stride_x == 1) ? 2 : 1;
848
849 execute_window_loop(window, [&](const Coordinates & id)
850 {
851 const auto top_data = vld1q_u8(reinterpret_cast<const uint8_t *>(input_top_ptr + input.offset()));
852 const auto bottom_data = vld1q_u8(reinterpret_cast<const uint8_t *>(input_bottom_ptr + input.offset()));
853 uint8x8_t lower_res = {};
854 uint8x8_t upper_res = {};
855
856 if(pooling_type != PoolingType::MAX)
857 {
858 const uint16x8x2_t top_data_u16 = { { vmovl_u8(vget_low_u8(top_data)), vmovl_u8(vget_high_u8(top_data)) } };
859 const uint16x8x2_t bottom_data_u16 = { { vmovl_u8(vget_low_u8(bottom_data)), vmovl_u8(vget_high_u8(bottom_data)) } };
860
861 // Add rows
862 const uint16x8x2_t vrsum =
863 {
864 {
865 vaddq_u16(top_data_u16.val[0], bottom_data_u16.val[0]),
866 vaddq_u16(top_data_u16.val[1], bottom_data_u16.val[1]),
867 }
868 };
869
870 // Pair-wise add row data
871 const uint16x4x2_t vpsum =
872 {
873 {
874 vpadd_u16(vget_low_u16(vrsum.val[0]), vget_high_u16(vrsum.val[0])),
875 vpadd_u16(vget_low_u16(vrsum.val[1]), vget_high_u16(vrsum.val[1])),
876 }
877 };
878
879 uint16x8_t res_lower = vcombine_u16(vpsum.val[0], vpsum.val[1]);
880
881 // Scale lower result
882 scale_vector_s16x8<exclude_padding>(res_lower, id, 0, scale_step_x,
883 pool_size, upper_bound_w, upper_bound_h,
Michalis Spyroubd0e6122018-01-23 09:52:16 +0000884 pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Georgios Pinitas55186712018-01-08 17:37:12 +0000885 lower_res = vmovn_u16(res_lower);
886
887 // Compute upper result for stride_x == 1
888 if(pool_stride_x == 1)
889 {
890 // Shifted row sum
891 const uint16x8x2_t vrsum_shifted =
892 {
893 {
894 vextq_u16(vrsum.val[0], vrsum.val[1], 1),
895 vextq_u16(vrsum.val[1], vrsum.val[1], 1)
896 }
897 };
898
899 // Pair-wise add shifted row
900 const uint16x4x2_t vpsum_shifted =
901 {
902 {
903 vpadd_u16(vget_low_u16(vrsum_shifted.val[0]), vget_high_u16(vrsum_shifted.val[0])),
904 vpadd_u16(vget_low_u16(vrsum_shifted.val[1]), vget_high_u16(vrsum_shifted.val[1])),
905 }
906 };
907 uint16x8_t res_upper = vcombine_u16(vpsum_shifted.val[0], vpsum_shifted.val[1]);
908
909 // Scale lower result
910 scale_vector_s16x8<exclude_padding>(res_upper, id, 1, 2,
911 pool_size, upper_bound_w, upper_bound_h,
Michalis Spyroubd0e6122018-01-23 09:52:16 +0000912 pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Georgios Pinitas55186712018-01-08 17:37:12 +0000913 upper_res = vmovn_u16(res_upper);
914 }
915 }
916 else
917 {
918 const uint8x16_t max_data = vmaxq_u8(top_data, bottom_data);
919 lower_res = vpmax_u8(vget_low_u8(max_data), vget_high_u8(max_data));
920 if(pool_stride_x == 1)
921 {
922 const uint8x16_t max_data_shifted = vextq_u8(max_data, max_data, 1);
923 upper_res = vpmax_u8(vget_low_u8(max_data_shifted), vget_high_u8(max_data_shifted));
924 }
925 }
926
927 // Store result
928 if(pool_stride_x == 1)
929 {
930 const uint8x8x2_t res = { { lower_res, upper_res } };
931 vst2_u8(reinterpret_cast<uint8_t *>(output.ptr()), res);
932 }
933 else
934 {
935 vst1_u8(reinterpret_cast<uint8_t *>(output.ptr()), lower_res);
936 }
937 },
938 input, output);
939}
940
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000941template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +0000942void NEPoolingLayerKernel::pooling3_f16_nchw(const Window &window_input, const Window &window)
Pablo Tello0c34fe22017-06-26 17:17:42 +0100943{
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000944#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tello0c34fe22017-06-26 17:17:42 +0100945 Iterator input(_input, window_input);
946 Iterator output(_output, window);
947
Michalis Spyroubd0e6122018-01-23 09:52:16 +0000948 constexpr const int pool_size = 3;
949 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
950 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
951 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
952 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
953 int pool_stride_x = 0;
954 int pool_stride_y = 0;
Pablo Tello0c34fe22017-06-26 17:17:42 +0100955 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +0000956 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
957 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Pablo Tello0c34fe22017-06-26 17:17:42 +0100958
Michalis Spyroubd0e6122018-01-23 09:52:16 +0000959 const unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
960 const unsigned char *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
961 const unsigned char *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 2));
Pablo Tello0c34fe22017-06-26 17:17:42 +0100962
963 execute_window_loop(window, [&](const Coordinates & id)
964 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100965 float16x4_t top_data = vld1_f16(reinterpret_cast<const float16_t *>(input_top_ptr + input.offset()));
966 float16x4_t middle_data = vld1_f16(reinterpret_cast<const float16_t *>(input_middle_ptr + input.offset()));
967 float16x4_t bottom_data = vld1_f16(reinterpret_cast<const float16_t *>(input_bottom_ptr + input.offset()));
968 float16x4_t res = {};
969
970 // Get power of 2 in case of l2 pooling
971 if(pooling_type == PoolingType::L2)
972 {
973 top_data = vmul_f16(top_data, top_data);
974 middle_data = vmul_f16(middle_data, middle_data);
975 bottom_data = vmul_f16(bottom_data, bottom_data);
976 }
977
978 if(pooling_type != PoolingType::MAX)
Pablo Tello0c34fe22017-06-26 17:17:42 +0100979 {
980 // Calculate scale
Michalis Spyrou57dac842018-03-01 16:03:50 +0000981 const float scale = calculate_avg_scale<exclude_padding, DataLayout::NCHW>(id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Pablo Tello0c34fe22017-06-26 17:17:42 +0100982 const float16x4_t scale_v = vdup_n_f16(scale);
983 // Perform pooling
984 const float16x4_t sum_data = vadd_f16(vadd_f16(top_data, bottom_data), middle_data);
985 res = vpadd_f16(vset_lane_f16(0.f, sum_data, 3), sum_data);
986 res = vmul_f16(vpadd_f16(res, res), scale_v);
987 }
988 else
989 {
990 const float16x4_t max_data = vmax_f16(vmax_f16(top_data, bottom_data), middle_data);
991 res = vpmax_f16(vset_lane_f16(-std::numeric_limits<float>::max(), max_data, 3), max_data);
992 res = vpmax_f16(res, res);
993 }
Georgios Pinitascdf51452017-08-31 14:21:36 +0100994
995 // Calculate square-root in case of l2 pooling
996 if(pooling_type == PoolingType::L2)
997 {
998 res = vinv_f16(vinvsqrt_f16(res));
999 }
1000
Pablo Tello0c34fe22017-06-26 17:17:42 +01001001 *(reinterpret_cast<float16_t *>(output.ptr())) = vget_lane_f16(res, 0);
1002 },
1003 input, output);
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +00001004#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tello0c34fe22017-06-26 17:17:42 +01001005 ARM_COMPUTE_UNUSED(window_input);
1006 ARM_COMPUTE_UNUSED(window);
1007 ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a");
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +00001008#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tello0c34fe22017-06-26 17:17:42 +01001009}
1010
Georgios Pinitasadaae7e2017-10-30 15:56:32 +00001011template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001012void NEPoolingLayerKernel::pooling2_f16_nchw(const Window &window_input, const Window &window)
Pablo Tello0c34fe22017-06-26 17:17:42 +01001013{
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +00001014#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tello0c34fe22017-06-26 17:17:42 +01001015 Iterator input(_input, window_input);
1016 Iterator output(_output, window);
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001017 constexpr int pool_size = 2;
1018 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1019 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1020 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1021 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1022 int pool_stride_x, pool_stride_y = 0;
1023 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
1024 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
1025 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Pablo Tello0c34fe22017-06-26 17:17:42 +01001026
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001027 const unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
1028 const unsigned char *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
Pablo Tello0c34fe22017-06-26 17:17:42 +01001029
1030 execute_window_loop(window, [&](const Coordinates & id)
1031 {
Georgios Pinitas13d96e02018-08-23 11:20:23 +01001032 float16x4_t top_data = vld1_f16(reinterpret_cast<const float16_t *>(input_top_ptr + input.offset()));
1033 float16x4_t bottom_data = vld1_f16(reinterpret_cast<const float16_t *>(input_bottom_ptr + input.offset()));
1034 float16x4_t res = {};
Pablo Tello0c34fe22017-06-26 17:17:42 +01001035
Georgios Pinitascdf51452017-08-31 14:21:36 +01001036 // Get power of 2 in case of l2 pooling
1037 if(pooling_type == PoolingType::L2)
1038 {
Georgios Pinitas13d96e02018-08-23 11:20:23 +01001039 top_data = vmul_f16(top_data, top_data);
1040 bottom_data = vmul_f16(bottom_data, bottom_data);
Georgios Pinitascdf51452017-08-31 14:21:36 +01001041 }
1042
1043 if(pooling_type != PoolingType::MAX)
Pablo Tello0c34fe22017-06-26 17:17:42 +01001044 {
Michalis Spyrou57dac842018-03-01 16:03:50 +00001045 const float scale = calculate_avg_scale<exclude_padding, DataLayout::NCHW>(id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Georgios Pinitas13d96e02018-08-23 11:20:23 +01001046 const float16x4_t scale_v = vdup_n_f16(scale);
1047
1048 const float16x4_t sum_data = vadd_f16(top_data, bottom_data);
1049 res = vmul_f16(vpadd_f16(sum_data, sum_data), scale_v);
Pablo Tello0c34fe22017-06-26 17:17:42 +01001050 }
1051 else
1052 {
Georgios Pinitas13d96e02018-08-23 11:20:23 +01001053 const float16x4_t max_data = vmax_f16(top_data, bottom_data);
1054 res = vpmax_f16(max_data, max_data);
Pablo Tello0c34fe22017-06-26 17:17:42 +01001055 }
Georgios Pinitascdf51452017-08-31 14:21:36 +01001056
1057 // Calculate square-root in case of l2 pooling
1058 if(pooling_type == PoolingType::L2)
1059 {
Georgios Pinitas13d96e02018-08-23 11:20:23 +01001060 res = vinv_f16(vinvsqrt_f16(res));
Georgios Pinitascdf51452017-08-31 14:21:36 +01001061 }
1062
1063 // Store result
Georgios Pinitas13d96e02018-08-23 11:20:23 +01001064 *(reinterpret_cast<float16_t *>(output.ptr())) = vget_lane_f16(res, 0);
Pablo Tello0c34fe22017-06-26 17:17:42 +01001065 },
1066 input, output);
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +00001067#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tello0c34fe22017-06-26 17:17:42 +01001068 ARM_COMPUTE_UNUSED(window_input);
1069 ARM_COMPUTE_UNUSED(window);
1070 ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a");
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +00001071#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tello0c34fe22017-06-26 17:17:42 +01001072}
1073
Georgios Pinitasadaae7e2017-10-30 15:56:32 +00001074template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001075void NEPoolingLayerKernel::pooling2_f32_nchw(const Window &window_input, const Window &window)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001076{
1077 Iterator input(_input, window_input);
1078 Iterator output(_output, window);
1079
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001080 constexpr int pool_size = 2;
1081 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1082 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1083 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1084 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1085 int pool_stride_x = 0;
1086 int pool_stride_y = 0;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001087 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001088 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
1089 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001090
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001091 const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
1092 const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001093
1094 execute_window_loop(window, [&](const Coordinates & id)
1095 {
Georgios Pinitascdf51452017-08-31 14:21:36 +01001096 float32x2_t top_data = vld1_f32(reinterpret_cast<const float *>(input_top_ptr + input.offset()));
1097 float32x2_t bottom_data = vld1_f32(reinterpret_cast<const float *>(input_bottom_ptr + input.offset()));
1098 float32x2_t res = {};
1099 float final_res = 0;
1100
1101 // Get power of 2 in case of l2 pooling
1102 if(pooling_type == PoolingType::L2)
1103 {
1104 top_data = vmul_f32(top_data, top_data);
1105 bottom_data = vmul_f32(bottom_data, bottom_data);
1106 }
1107
1108 if(pooling_type != PoolingType::MAX)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001109 {
1110 // Calculate scale
Michalis Spyrou57dac842018-03-01 16:03:50 +00001111 float scale = calculate_avg_scale<exclude_padding, DataLayout::NCHW>(id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001112 const float32x2_t scale_v = vdup_n_f32(scale);
1113
1114 // Perform pooling
1115 const float32x2_t sum_data = vadd_f32(top_data, bottom_data);
1116 res = vmul_f32(vpadd_f32(sum_data, sum_data), scale_v);
1117 }
1118 else
1119 {
1120 const float32x2_t max_data = vmax_f32(top_data, bottom_data);
1121 res = vpmax_f32(max_data, max_data);
1122 }
Georgios Pinitascdf51452017-08-31 14:21:36 +01001123 final_res = vget_lane_f32(res, 0);
1124
1125 // Calculate square-root in case of l2 pooling
1126 if(pooling_type == PoolingType::L2)
1127 {
1128 final_res = sqrt(final_res);
1129 }
1130
1131 // Store result
1132 *(reinterpret_cast<float *>(output.ptr())) = final_res;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001133 },
1134 input, output);
1135}
1136
Georgios Pinitas55186712018-01-08 17:37:12 +00001137template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001138void NEPoolingLayerKernel::pooling3_qasymm8_nchw(const Window &window_input, const Window &window)
Georgios Pinitas55186712018-01-08 17:37:12 +00001139{
1140 Iterator input(_input, window_input);
1141 Iterator output(_output, window);
1142
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001143 constexpr int pool_size = 3;
1144 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1145 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1146 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1147 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1148 int pool_stride_x = 0;
1149 int pool_stride_y = 0;
Georgios Pinitas55186712018-01-08 17:37:12 +00001150 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001151 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
1152 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Georgios Pinitas55186712018-01-08 17:37:12 +00001153
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001154 const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
1155 const uint8_t *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
1156 const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 2));
Georgios Pinitas55186712018-01-08 17:37:12 +00001157
1158 execute_window_loop(window, [&](const Coordinates & id)
1159 {
1160 const auto top_data = vld1q_u8(reinterpret_cast<const uint8_t *>(input_top_ptr + input.offset()));
1161 const auto middle_data = vld1q_u8(reinterpret_cast<const uint8_t *>(input_middle_ptr + input.offset()));
1162 const auto bottom_data = vld1q_u8(reinterpret_cast<const uint8_t *>(input_bottom_ptr + input.offset()));
1163
1164 if(pooling_type == PoolingType::AVG)
1165 {
1166 // Convert data to u16
1167 const uint16x8x2_t top_data_u16 = { { vmovl_u8(vget_low_u8(top_data)), vmovl_u8(vget_high_u8(top_data)) } };
1168 const uint16x8x2_t middle_data_u16 = { { vmovl_u8(vget_low_u8(middle_data)), vmovl_u8(vget_high_u8(middle_data)) } };
1169 const uint16x8x2_t bottom_data_u16 = { { vmovl_u8(vget_low_u8(bottom_data)), vmovl_u8(vget_high_u8(bottom_data)) } };
1170
1171 // Calculate row sums
1172 const uint16x8x2_t vrsum =
1173 {
1174 {
1175 vaddq_u16(vaddq_u16(top_data_u16.val[0], bottom_data_u16.val[0]), middle_data_u16.val[0]),
1176 vaddq_u16(vaddq_u16(top_data_u16.val[1], bottom_data_u16.val[1]), middle_data_u16.val[1]),
1177 }
1178 };
1179 const uint16x8x2_t vrsum_shifted_1 =
1180 {
1181 {
1182 vextq_u16(vrsum.val[0], vrsum.val[1], 1),
1183 vextq_u16(vrsum.val[1], vrsum.val[1], 1)
1184 }
1185 };
1186 const uint16x8x2_t vrsum_shifted_2 =
1187 {
1188 {
1189 vextq_u16(vrsum.val[0], vrsum.val[1], 2),
1190 vextq_u16(vrsum.val[1], vrsum.val[1], 2)
1191 }
1192 };
1193 // Calculate final sum
1194 uint16x8x2_t final_sum =
1195 {
1196 {
1197 vaddq_u16(vaddq_u16(vrsum.val[0], vrsum_shifted_1.val[0]), vrsum_shifted_2.val[0]),
1198 vaddq_u16(vaddq_u16(vrsum.val[1], vrsum_shifted_1.val[1]), vrsum_shifted_2.val[1]),
1199 }
1200 };
1201 if(pool_stride_x == 2)
1202 {
1203 uint16x8_t res =
1204 {
1205 vgetq_lane_u16(final_sum.val[0], 0),
1206 vgetq_lane_u16(final_sum.val[0], 2),
1207 vgetq_lane_u16(final_sum.val[0], 4),
1208 vgetq_lane_u16(final_sum.val[0], 6),
1209 vgetq_lane_u16(final_sum.val[1], 0),
1210 vgetq_lane_u16(final_sum.val[1], 2),
1211 vgetq_lane_u16(final_sum.val[1], 4),
1212 vgetq_lane_u16(final_sum.val[1], 6),
1213 };
1214
1215 scale_vector_s16x8<exclude_padding>(res, id, 0, 1,
1216 pool_size, upper_bound_w, upper_bound_h,
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001217 pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Georgios Pinitas55186712018-01-08 17:37:12 +00001218 vst1_u8(reinterpret_cast<uint8_t *>(output.ptr()), vmovn_u16(res));
1219 }
1220 else
1221 {
1222 // Scale lower result
1223 scale_vector_s16x8<exclude_padding>(final_sum.val[0], id, 0, 1,
1224 pool_size, upper_bound_w, upper_bound_h,
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001225 pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Georgios Pinitas55186712018-01-08 17:37:12 +00001226 // Scale lower result
1227 scale_vector_s16x8<exclude_padding>(final_sum.val[1], id, 8, 1,
1228 pool_size, upper_bound_w, upper_bound_h,
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001229 pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Georgios Pinitas55186712018-01-08 17:37:12 +00001230 const uint8x16_t res = vcombine_u8(vmovn_u16(final_sum.val[0]), vmovn_u16(final_sum.val[1]));
1231 vst1q_u8(reinterpret_cast<uint8_t *>(output.ptr()), res);
1232 }
1233 }
1234 else
1235 {
1236 const uint8x16_t max_data = vmaxq_u8(vmaxq_u8(top_data, bottom_data), middle_data);
1237 const uint8x16_t max_data_shift1 = vextq_u8(max_data, max_data, 1);
1238 const uint8x16_t max_data_shift2 = vextq_u8(max_data, max_data, 2);
1239 const uint8x16_t final_max = vmaxq_u8(vmaxq_u8(max_data, max_data_shift1), max_data_shift2);
1240
1241 if(pool_stride_x == 2)
1242 {
1243 const uint8x8x2_t table = { { vget_low_u8(final_max), vget_high_u8(final_max) } };
1244 static const uint8x8_t lookup_val = { 0, 2, 4, 6, 8, 10, 12, 14 };
1245 const uint8x8_t res = vtbl2_u8(table, lookup_val);
1246 vst1_u8(reinterpret_cast<uint8_t *>(output.ptr()), res);
1247 }
1248 else
1249 {
1250 vst1q_u8(reinterpret_cast<uint8_t *>(output.ptr()), final_max);
1251 }
1252 }
1253 },
1254 input, output);
1255}
1256
Georgios Pinitasadaae7e2017-10-30 15:56:32 +00001257template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001258void NEPoolingLayerKernel::pooling3_f32_nchw(const Window &window_input, const Window &window)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001259{
1260 Iterator input(_input, window_input);
1261 Iterator output(_output, window);
1262
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001263 constexpr const int pool_size = 3;
1264 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1265 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1266 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1267 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1268 int pool_stride_x = 0;
1269 int pool_stride_y = 0;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001270 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001271 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
1272 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001273
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001274 const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
1275 const uint8_t *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
1276 const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 2));
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001277
1278 execute_window_loop(window, [&](const Coordinates & id)
1279 {
Georgios Pinitascdf51452017-08-31 14:21:36 +01001280 float32x4_t top_data = vld1q_f32(reinterpret_cast<const float *>(input_top_ptr + input.offset()));
1281 float32x4_t middle_data = vld1q_f32(reinterpret_cast<const float *>(input_middle_ptr + input.offset()));
1282 float32x4_t bottom_data = vld1q_f32(reinterpret_cast<const float *>(input_bottom_ptr + input.offset()));
1283 float32x2_t res = {};
1284 float final_res = 0;
1285
1286 // Get power of 2 in case of l2 pooling
1287 if(pooling_type == PoolingType::L2)
1288 {
1289 top_data = vmulq_f32(top_data, top_data);
1290 middle_data = vmulq_f32(middle_data, middle_data);
1291 bottom_data = vmulq_f32(bottom_data, bottom_data);
1292 }
1293
1294 if(pooling_type != PoolingType::MAX)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001295 {
1296 // Calculate scale
Michalis Spyrou57dac842018-03-01 16:03:50 +00001297 float scale = calculate_avg_scale<exclude_padding, DataLayout::NCHW>(id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001298 const float32x2_t scale_v = vdup_n_f32(scale);
1299
1300 // Perform pooling
1301 const float32x4_t sum_data = vaddq_f32(vaddq_f32(top_data, bottom_data), middle_data);
1302 res = vpadd_f32(vget_high_f32(vsetq_lane_f32(0.f, sum_data, 3)), vget_low_f32(sum_data));
1303 res = vmul_f32(vpadd_f32(res, res), scale_v);
1304 }
1305 else
1306 {
1307 const float32x4_t max_data = vmaxq_f32(vmaxq_f32(top_data, bottom_data), middle_data);
1308 res = vpmax_f32(vget_high_f32(vsetq_lane_f32(-std::numeric_limits<float>::max(), max_data, 3)), vget_low_f32(max_data));
1309 res = vpmax_f32(res, res);
1310 }
Georgios Pinitascdf51452017-08-31 14:21:36 +01001311 final_res = vget_lane_f32(res, 0);
1312
1313 // Calculate square-root in case of l2 pooling
1314 if(pooling_type == PoolingType::L2)
1315 {
1316 final_res = sqrt(final_res);
1317 }
1318
1319 // Store result
1320 *(reinterpret_cast<float *>(output.ptr())) = final_res;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001321 },
1322 input, output);
1323}
1324
Georgios Pinitasadaae7e2017-10-30 15:56:32 +00001325template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001326void NEPoolingLayerKernel::pooling7_f32_nchw(const Window &window_input, const Window &window)
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001327{
1328 Iterator input(_input, window_input);
1329 Iterator output(_output, window);
1330
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001331 constexpr const int pool_size = 7;
1332 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1333 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1334 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1335 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1336 int pool_stride_x = 0;
1337 int pool_stride_y = 0;
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001338 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001339 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
1340 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001341
1342 std::array<const uint8_t *, pool_size> input_ptrs{ {} };
1343 for(int i = 0; i < pool_size; ++i)
1344 {
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001345 input_ptrs[i] = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + i));
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001346 }
1347
1348 execute_window_loop(window, [&](const Coordinates & id)
1349 {
Georgios Pinitascdf51452017-08-31 14:21:36 +01001350 float32x2_t res = {};
1351 float final_res = 0.f;
1352 if(pooling_type != PoolingType::MAX)
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001353 {
1354 // Calculate scale
Michalis Spyrou57dac842018-03-01 16:03:50 +00001355 float scale = calculate_avg_scale<exclude_padding, DataLayout::NCHW>(id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001356 const float32x2_t scale_v = vdup_n_f32(scale);
1357
1358 // Perform pooling
Georgios Pinitascdf51452017-08-31 14:21:36 +01001359 float32x4x2_t data = vld2q_f32(reinterpret_cast<const float *>(input_ptrs[0] + input.offset()));
1360 // Get power of 2 in case of l2 pooling
1361 if(pooling_type == PoolingType::L2)
1362 {
1363 data.val[0] = vmulq_f32(data.val[0], data.val[0]);
1364 data.val[1] = vmulq_f32(data.val[1], data.val[1]);
1365 }
1366 float32x4_t sum_data = vaddq_f32(data.val[0], vsetq_lane_f32(0.f, data.val[1], 3));
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001367 for(int i = 1; i < pool_size; ++i)
1368 {
Georgios Pinitascdf51452017-08-31 14:21:36 +01001369 data = vld2q_f32(reinterpret_cast<const float *>(input_ptrs[i] + input.offset()));
1370 // Get power of 2 in case of l2 pooling
1371 if(pooling_type == PoolingType::L2)
1372 {
1373 data.val[0] = vmulq_f32(data.val[0], data.val[0]);
1374 data.val[1] = vmulq_f32(data.val[1], data.val[1]);
1375 }
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001376 sum_data = vaddq_f32(sum_data, data.val[0]);
1377 sum_data = vaddq_f32(sum_data, vsetq_lane_f32(0.f, data.val[1], 3));
1378 }
1379 res = vpadd_f32(vget_high_f32(sum_data), vget_low_f32(sum_data));
1380 res = vmul_f32(vpadd_f32(res, res), scale_v);
1381 }
1382 else
1383 {
1384 float32x4x2_t max_data = vld2q_f32(reinterpret_cast<const float *>(input_ptrs[0] + input.offset()));
1385 for(int i = 1; i < pool_size; ++i)
1386 {
1387 const float32x4x2_t data = vld2q_f32(reinterpret_cast<const float *>(input_ptrs[i] + input.offset()));
1388 max_data = vmax2q_f32(max_data, data);
1389 }
1390 res = vpmax_f32(vget_high_f32(vsetq_lane_f32(-std::numeric_limits<float>::max(), max_data.val[1], 3)), vget_low_f32(max_data.val[1]));
1391 res = vpmax_f32(res, vpmax_f32(vget_high_f32(max_data.val[0]), vget_low_f32(max_data.val[0])));
1392 res = vpmax_f32(res, res);
1393 }
Georgios Pinitascdf51452017-08-31 14:21:36 +01001394 final_res = vget_lane_f32(res, 0);
1395
1396 // Calculate square-root in case of l2 pooling
1397 if(pooling_type == PoolingType::L2)
1398 {
1399 final_res = sqrt(final_res);
1400 }
1401
1402 // Store result
1403 *(reinterpret_cast<float *>(output.ptr())) = final_res;
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001404 },
1405 input, output);
1406}
1407
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001408template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001409void NEPoolingLayerKernel::poolingMxN_f16_nchw(const Window &window_input, const Window &window)
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001410{
1411#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
1412 Iterator input(_input, window_input);
1413 Iterator output(_output, window);
1414
1415 const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().x() : _pool_info.pool_size().width;
1416 const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().height;
1417 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1418 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1419 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1420 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1421 int pool_stride_x = 0;
1422 int pool_stride_y = 0;
1423 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
1424 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
1425 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
1426
1427 execute_window_loop(window, [&](const Coordinates & id)
1428 {
1429 float16_t res = 0.0f;
1430 float16x8_t vres = vdupq_n_f16(0.0f);
1431
1432 if(pooling_type != PoolingType::MAX)
1433 {
1434 // Calculate scale
Michalis Spyrou57dac842018-03-01 16:03:50 +00001435 const float scale = calculate_avg_scale<exclude_padding, DataLayout::NCHW>(id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001436
1437 // Perform pooling
1438
1439 for(int y = 0; y < pool_size_y; ++y)
1440 {
1441 int x = 0;
1442 for(; x <= (pool_size_x - 8); x += 8)
1443 {
1444 const float16x8_t data = vld1q_f16(reinterpret_cast<const float16_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() +
1445 (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
1446
1447 // Get power of 2 in case of l2 pooling and accumulate
1448 if(pooling_type == PoolingType::L2)
1449 {
1450 vres = vaddq_f16(vres, vmulq_f16(data, data));
1451 }
1452 else
1453 {
1454 vres = vaddq_f16(vres, data);
1455 }
1456 }
1457
1458 // Leftover for loop
1459 for(; x < pool_size_x; ++x)
1460 {
1461 float16_t data = *(reinterpret_cast<const float16_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() + (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
1462
1463 // Get power of 2 in case of l2 pooling
1464 if(pooling_type == PoolingType::L2)
1465 {
1466 data *= data;
1467 }
1468
1469 res += data;
1470 }
1471 }
1472
1473 // Reduction
1474 float16x4_t tmp = vpadd_f16(vget_high_f16(vres), vget_low_f16(vres));
1475 res += vget_lane_f16(tmp, 0);
1476 res += vget_lane_f16(tmp, 1);
1477 res += vget_lane_f16(tmp, 2);
1478 res += vget_lane_f16(tmp, 3);
1479
1480 // Divide by scale
1481 res *= scale;
1482 }
1483 else
1484 {
1485 float16x8_t vres = vdupq_n_f16(std::numeric_limits<float>::lowest());
1486 res = std::numeric_limits<float>::lowest();
1487
1488 for(int y = 0; y < pool_size_y; ++y)
1489 {
1490 int x = 0;
1491 for(; x <= (pool_size_x - 8); x += 8)
1492 {
1493 const float16x8_t data = vld1q_f16(reinterpret_cast<const float16_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() +
1494 (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
1495 vres = vmaxq_f16(vres, data);
1496 }
1497
1498 // Leftover for loop
1499 for(; x < pool_size_x; ++x)
1500 {
1501 const float16_t data = *(reinterpret_cast<const float16_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() + (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
1502 res = std::max(res, data);
1503 }
1504 }
1505
1506 float16x4_t tmp = vpmax_f16(vget_high_f16(vres), vget_low_f16(vres));
1507 res = std::max(res, vget_lane_f16(tmp, 0));
1508 res = std::max(res, vget_lane_f16(tmp, 1));
1509 res = std::max(res, vget_lane_f16(tmp, 2));
1510 res = std::max(res, vget_lane_f16(tmp, 3));
1511 }
1512
1513 // Calculate square-root in case of l2 pooling
1514 if(pooling_type == PoolingType::L2)
1515 {
1516 res = std::sqrt(res);
1517 }
1518
1519 // Store result
1520 *(reinterpret_cast<float16_t *>(output.ptr())) = res;
1521 },
1522 input, output);
1523
1524#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
1525 ARM_COMPUTE_UNUSED(window_input);
1526 ARM_COMPUTE_UNUSED(window);
1527 ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a");
1528#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
1529}
1530
1531template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001532void NEPoolingLayerKernel::poolingMxN_f16_nhwc(const Window &window_input, const Window &window)
1533{
1534#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
1535 Iterator input(_input, window_input);
1536 Iterator output(_output, window);
1537
1538 const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().width;
1539 const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().z() : _pool_info.pool_size().height;
1540 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1541 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1542 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1543 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1544 int pool_stride_x = 0;
1545 int pool_stride_y = 0;
1546 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
1547 const int upper_bound_w = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_right);
1548 const int upper_bound_h = _input->info()->dimension(2) + (exclude_padding ? 0 : pool_pad_bottom);
1549
1550 float16x8_t vres;
1551
1552 execute_window_loop(window, [&](const Coordinates & id)
1553 {
Michalis Spyrouced25572018-10-01 16:26:20 +01001554 const int idx_width = id.y() * pool_stride_x;
1555 const int idx_height = id.z() * pool_stride_y;
1556 const int pool_limit_y = pool_pad_top - idx_height;
1557 const int pool_limit_x = pool_pad_left - idx_width;
1558
1559 const int pool_start_y = std::max(0, window_input.z().start() + pool_limit_y);
1560 const int pool_end_y = std::min(pool_size_y, window_input.z().end() + pool_limit_y);
1561 const int pool_start_x = std::max(0, window_input.y().start() + pool_limit_x);
1562 const int pool_end_x = std::min(pool_size_x, window_input.y().end() + pool_limit_x);
1563
Michalis Spyrou57dac842018-03-01 16:03:50 +00001564 if(pooling_type != PoolingType::MAX)
1565 {
1566 // Calculate scale
1567 const float scale = calculate_avg_scale<exclude_padding, DataLayout::NHWC>(id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
1568 pool_stride_y);
1569 const float16x8_t scale_v = vdupq_n_f16(scale);
1570
1571 // Perform pooling
1572 vres = vdupq_n_f16(0.0f);
Michalis Spyrouced25572018-10-01 16:26:20 +01001573 for(int y = pool_start_y; y < pool_end_y; ++y)
Michalis Spyrou57dac842018-03-01 16:03:50 +00001574 {
Michalis Spyrouced25572018-10-01 16:26:20 +01001575 for(int x = pool_start_x; x < pool_end_x; ++x)
Michalis Spyrou57dac842018-03-01 16:03:50 +00001576 {
Michalis Spyrou57dac842018-03-01 16:03:50 +00001577 const float16x8_t data = vld1q_f16(reinterpret_cast<const float16_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().y() +
1578 (y - pool_pad_top) * _input->info()->strides_in_bytes().z()));
1579
1580 // Get power of 2 in case of l2 pooling and accumulate
1581 if(pooling_type == PoolingType::L2)
1582 {
1583 vres = vaddq_f16(vres, vmulq_f16(data, data));
1584 }
1585 else
1586 {
1587 vres = vaddq_f16(vres, data);
1588 }
1589 }
1590 }
1591 // Divide by scale
1592 vres = vmulq_f16(vres, scale_v);
1593 }
1594 else
1595 {
1596 vres = vdupq_n_f16(std::numeric_limits<float>::lowest());
Michalis Spyrouced25572018-10-01 16:26:20 +01001597
1598 for(int y = pool_start_y; y < pool_end_y; ++y)
Michalis Spyrou57dac842018-03-01 16:03:50 +00001599 {
Michalis Spyrouced25572018-10-01 16:26:20 +01001600 for(int x = pool_start_x; x < pool_end_x; ++x)
Michalis Spyrou57dac842018-03-01 16:03:50 +00001601 {
Michalis Spyrou57dac842018-03-01 16:03:50 +00001602 const float16x8_t data = vld1q_f16(reinterpret_cast<const float16_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().y() +
1603 (y - pool_pad_top) * _input->info()->strides_in_bytes().z()));
1604 vres = vmaxq_f16(vres, data);
1605 }
1606 }
1607 }
1608
1609 // Calculate square-root in case of l2 pooling
1610 if(pooling_type == PoolingType::L2)
1611 {
1612 float16x8_t sqrt_reciprocal = vrsqrteq_f16(vres);
1613 vres = vmulq_f16(vres, vmulq_f16(vrsqrtsq_f16(vmulq_f16(vres, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal));
1614 }
1615
1616 // Store result
1617 vst1q_f16(reinterpret_cast<float16_t *>(output.ptr()), vres);
1618 },
1619 input, output);
1620
1621#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
1622 ARM_COMPUTE_UNUSED(window_input);
1623 ARM_COMPUTE_UNUSED(window);
1624 ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a");
1625#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
1626}
1627
1628template <PoolingType pooling_type, bool exclude_padding>
1629void NEPoolingLayerKernel::poolingMxN_f32_nchw(const Window &window_input, const Window &window)
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001630{
1631 Iterator input(_input, window_input);
1632 Iterator output(_output, window);
1633
1634 const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().x() : _pool_info.pool_size().width;
1635 const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().height;
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001636 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1637 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1638 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1639 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1640 int pool_stride_x = 0;
1641 int pool_stride_y = 0;
Gian Marco Iodice16824302017-09-28 15:41:37 +01001642 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001643 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
1644 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Gian Marco Iodice16824302017-09-28 15:41:37 +01001645
1646 execute_window_loop(window, [&](const Coordinates & id)
1647 {
1648 float res = 0.0f;
1649
1650 if(pooling_type != PoolingType::MAX)
1651 {
1652 // Calculate scale
Michalis Spyrou57dac842018-03-01 16:03:50 +00001653 const float scale = calculate_avg_scale<exclude_padding, DataLayout::NCHW>(id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Gian Marco Iodice16824302017-09-28 15:41:37 +01001654
1655 // Perform pooling
1656 float32x4_t vres = vdupq_n_f32(0.0f);
1657
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001658 for(int y = 0; y < pool_size_y; ++y)
Gian Marco Iodice16824302017-09-28 15:41:37 +01001659 {
1660 int x = 0;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001661 for(; x <= (pool_size_x - 4); x += 4)
Gian Marco Iodice16824302017-09-28 15:41:37 +01001662 {
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001663 const float32x4_t data = vld1q_f32(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() +
1664 (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
Gian Marco Iodice16824302017-09-28 15:41:37 +01001665
1666 // Get power of 2 in case of l2 pooling and accumulate
1667 if(pooling_type == PoolingType::L2)
1668 {
1669 vres = vmlaq_f32(vres, data, data);
1670 }
1671 else
1672 {
1673 vres = vaddq_f32(vres, data);
1674 }
1675 }
1676
1677 // Leftover for loop
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001678 for(; x < pool_size_x; ++x)
Gian Marco Iodice16824302017-09-28 15:41:37 +01001679 {
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001680 float data = *(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() + (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
Gian Marco Iodice16824302017-09-28 15:41:37 +01001681
1682 // Get power of 2 in case of l2 pooling
1683 if(pooling_type == PoolingType::L2)
1684 {
1685 data *= data;
1686 }
1687
1688 res += data;
1689 }
1690 }
1691
1692#if defined(__aarch64__)
1693 // Reduction operation available on 64 bit architectures only
1694 res += vaddvq_f32(vres);
1695#else // __aarch64__
1696 // Reduction
1697 float32x2_t tmp = vpadd_f32(vget_high_f32(vres), vget_low_f32(vres));
1698 tmp = vpadd_f32(tmp, tmp);
1699
1700 res += vget_lane_f32(tmp, 0);
1701#endif // __aarch64__
1702 // Divide by scale
1703 res *= scale;
1704 }
1705 else
1706 {
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001707 float32x4_t vres = vdupq_n_f32(std::numeric_limits<float>::lowest());
1708 res = std::numeric_limits<float>::lowest();
Gian Marco Iodice16824302017-09-28 15:41:37 +01001709
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001710 for(int y = 0; y < pool_size_y; ++y)
Gian Marco Iodice16824302017-09-28 15:41:37 +01001711 {
1712 int x = 0;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001713 for(; x <= (pool_size_x - 4); x += 4)
Gian Marco Iodice16824302017-09-28 15:41:37 +01001714 {
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001715 const float32x4_t data = vld1q_f32(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() +
1716 (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
Gian Marco Iodice16824302017-09-28 15:41:37 +01001717 vres = vmaxq_f32(vres, data);
1718 }
1719
1720 // Leftover for loop
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001721 for(; x < pool_size_x; ++x)
Gian Marco Iodice16824302017-09-28 15:41:37 +01001722 {
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001723 const float data = *(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() + (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
Gian Marco Iodice16824302017-09-28 15:41:37 +01001724 res = std::max(res, data);
1725 }
1726 }
1727
1728#if defined(__aarch64__)
1729 // Reduction operation available on 64 bit architectures only
1730 res = std::max(vmaxvq_f32(vres), res);
1731#else // __aarch64__
1732 float32x2_t tmp = vpmax_f32(vget_high_f32(vres), vget_low_f32(vres));
1733 tmp = vpmax_f32(tmp, tmp);
1734
1735 res = std::max(res, vget_lane_f32(tmp, 0));
1736#endif // __aarch64__
1737 }
1738
1739 // Calculate square-root in case of l2 pooling
1740 if(pooling_type == PoolingType::L2)
1741 {
1742 res = std::sqrt(res);
1743 }
1744
1745 // Store result
1746 *(reinterpret_cast<float *>(output.ptr())) = res;
1747 },
1748 input, output);
1749}
1750
Georgios Pinitas55186712018-01-08 17:37:12 +00001751template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001752void NEPoolingLayerKernel::poolingMxN_f32_nhwc(const Window &window_input, const Window &window)
1753{
1754 Iterator input(_input, window_input);
1755 Iterator output(_output, window);
1756
1757 const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().width;
1758 const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().z() : _pool_info.pool_size().height;
1759 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1760 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1761 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1762 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1763 int pool_stride_x = 0;
1764 int pool_stride_y = 0;
1765 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
1766 const int upper_bound_w = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_right);
1767 const int upper_bound_h = _input->info()->dimension(2) + (exclude_padding ? 0 : pool_pad_bottom);
1768
1769 float32x4_t vres;
1770
1771 execute_window_loop(window, [&](const Coordinates & id)
1772 {
Michalis Spyrouced25572018-10-01 16:26:20 +01001773 const int idx_width = id.y() * pool_stride_x;
1774 const int idx_height = id.z() * pool_stride_y;
1775 const int pool_limit_y = pool_pad_top - idx_height;
1776 const int pool_limit_x = pool_pad_left - idx_width;
1777
1778 const int pool_start_y = std::max(0, window_input.z().start() + pool_limit_y);
1779 const int pool_end_y = std::min(pool_size_y, window_input.z().end() + pool_limit_y);
1780 const int pool_start_x = std::max(0, window_input.y().start() + pool_limit_x);
1781 const int pool_end_x = std::min(pool_size_x, window_input.y().end() + pool_limit_x);
1782
Michalis Spyrou57dac842018-03-01 16:03:50 +00001783 if(pooling_type != PoolingType::MAX)
1784 {
1785 // Calculate scale
1786 const float scale = calculate_avg_scale<exclude_padding, DataLayout::NHWC>(id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
1787 pool_stride_y);
1788 const float32x4_t scale_v = vdupq_n_f32(scale);
1789
1790 // Perform pooling
1791 vres = vdupq_n_f32(0.0f);
1792
Michalis Spyrouced25572018-10-01 16:26:20 +01001793 for(int y = pool_start_y; y < pool_end_y; ++y)
Michalis Spyrou57dac842018-03-01 16:03:50 +00001794 {
Michalis Spyrouced25572018-10-01 16:26:20 +01001795 for(int x = pool_start_x; x < pool_end_x; ++x)
Michalis Spyrou57dac842018-03-01 16:03:50 +00001796 {
Michalis Spyrou57dac842018-03-01 16:03:50 +00001797 const float32x4_t data = vld1q_f32(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().y() +
1798 (y - pool_pad_top) * _input->info()->strides_in_bytes().z()));
1799
1800 // Get power of 2 in case of l2 pooling and accumulate
1801 if(pooling_type == PoolingType::L2)
1802 {
1803 vres = vmlaq_f32(vres, data, data);
1804 }
1805 else
1806 {
1807 vres = vaddq_f32(vres, data);
1808 }
1809 }
1810 }
1811 // Divide by scale
1812 vres = vmulq_f32(vres, scale_v);
1813 }
1814 else
1815 {
1816 vres = vdupq_n_f32(std::numeric_limits<float>::lowest());
Michalis Spyrouced25572018-10-01 16:26:20 +01001817 for(int y = pool_start_y; y < pool_end_y; ++y)
Michalis Spyrou57dac842018-03-01 16:03:50 +00001818 {
Michalis Spyrouced25572018-10-01 16:26:20 +01001819 for(int x = pool_start_x; x < pool_end_x; ++x)
Michalis Spyrou57dac842018-03-01 16:03:50 +00001820 {
Michalis Spyrou57dac842018-03-01 16:03:50 +00001821 const float32x4_t data = vld1q_f32(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().y() +
1822 (y - pool_pad_top) * _input->info()->strides_in_bytes().z()));
1823 vres = vmaxq_f32(vres, data);
1824 }
1825 }
1826 }
1827
1828 // Calculate square-root in case of l2 pooling
1829 if(pooling_type == PoolingType::L2)
1830 {
1831 float32x4_t sqrt_reciprocal = vrsqrteq_f32(vres);
1832 vres = vmulq_f32(vres, vmulq_f32(vrsqrtsq_f32(vmulq_f32(vres, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal));
1833 }
1834
1835 // Store result
1836 vst1q_f32(reinterpret_cast<float *>(output.ptr()), vres);
1837 },
1838 input, output);
1839}
1840
1841template <PoolingType pooling_type, bool exclude_padding>
1842void NEPoolingLayerKernel::poolingMxN_qasymm8_nchw(const Window &window_input, const Window &window)
Georgios Pinitas55186712018-01-08 17:37:12 +00001843{
1844 Iterator input(_input, window_input);
1845 Iterator output(_output, window);
1846
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001847 const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().x() : _pool_info.pool_size().width;
1848 const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().height;
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001849 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1850 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1851 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1852 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1853 int pool_stride_x = 0;
1854 int pool_stride_y = 0;
Georgios Pinitas55186712018-01-08 17:37:12 +00001855 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001856 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
1857 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Georgios Pinitas55186712018-01-08 17:37:12 +00001858
1859 execute_window_loop(window, [&](const Coordinates & id)
1860 {
1861 uint8_t res = 0;
1862
1863 if(pooling_type != PoolingType::MAX)
1864 {
1865 uint32x4_t vres = vdupq_n_u32(0);
1866 uint32_t sres = 0;
1867
1868 // Calculate scale
Michalis Spyrou57dac842018-03-01 16:03:50 +00001869 const float scale = calculate_avg_scale<exclude_padding, DataLayout::NCHW>(id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Georgios Pinitas55186712018-01-08 17:37:12 +00001870
1871 // Perform pooling
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001872 for(int y = 0; y < pool_size_y; ++y)
Georgios Pinitas55186712018-01-08 17:37:12 +00001873 {
1874 int x = 0;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001875 for(; x <= (pool_size_x - 8); x += 8)
Georgios Pinitas55186712018-01-08 17:37:12 +00001876 {
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001877 const uint8x8_t data = vld1_u8(reinterpret_cast<const uint8_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() +
1878 (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
Georgios Pinitas55186712018-01-08 17:37:12 +00001879
1880 const uint16x8_t data_u16 = vmovl_u8(data);
1881 vres = vaddq_u32(vres, vaddl_u16(vget_high_u16(data_u16), vget_low_u16(data_u16)));
1882 }
1883
1884 // Leftover for loop
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001885 for(; x < pool_size_x; ++x)
Georgios Pinitas55186712018-01-08 17:37:12 +00001886 {
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001887 uint8_t data = *(reinterpret_cast<const uint8_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() + (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
Georgios Pinitas55186712018-01-08 17:37:12 +00001888 sres += data;
1889 }
1890 }
1891
1892 // Reduction
1893 const auto tmp = vpadd_u32(vget_high_u32(vres), vget_low_u32(vres));
1894 sres += vget_lane_u32(tmp, 0) + vget_lane_u32(tmp, 1);
1895
1896 // Divide by scale
1897 res = static_cast<uint8_t>(support::cpp11::round(sres * scale));
1898 }
1899 else
1900 {
1901 uint8x8_t vres = vdup_n_u8(0);
1902 res = 0;
1903
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001904 for(int y = 0; y < pool_size_y; ++y)
Georgios Pinitas55186712018-01-08 17:37:12 +00001905 {
1906 int x = 0;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001907 for(; x <= (pool_size_x - 8); x += 8)
Georgios Pinitas55186712018-01-08 17:37:12 +00001908 {
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001909 const uint8x8_t data = vld1_u8(reinterpret_cast<const uint8_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() +
1910 (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
Georgios Pinitas55186712018-01-08 17:37:12 +00001911 vres = vmax_u8(vres, data);
1912 }
1913
1914 // Leftover for loop
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001915 for(; x < pool_size_x; ++x)
Georgios Pinitas55186712018-01-08 17:37:12 +00001916 {
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001917 const uint8_t data = *(reinterpret_cast<const uint8_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() + (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
Georgios Pinitas55186712018-01-08 17:37:12 +00001918 res = std::max(res, data);
1919 }
1920 }
1921
1922 // Reduce max
1923 vres = vpmax_u8(vres, vres);
1924 vres = vpmax_u8(vres, vres);
1925 vres = vpmax_u8(vres, vres);
1926
1927 // Get max value
1928 res = std::max(res, vget_lane_u8(vres, 0));
1929 }
1930
1931 // Store result
1932 *(reinterpret_cast<uint8_t *>(output.ptr())) = res;
1933 },
1934 input, output);
1935}
1936
Michalis Spyrou57dac842018-03-01 16:03:50 +00001937template <PoolingType pooling_type, bool exclude_padding>
1938void NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc(const Window &window_input, const Window &window)
1939{
1940 Iterator input(_input, window_input);
1941 Iterator output(_output, window);
1942
1943 const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().width;
1944 const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().z() : _pool_info.pool_size().height;
1945 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1946 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1947 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1948 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1949 int pool_stride_x = 0;
1950 int pool_stride_y = 0;
1951 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
1952 const int upper_bound_w = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_right);
1953 const int upper_bound_h = _input->info()->dimension(2) + (exclude_padding ? 0 : pool_pad_bottom);
1954
1955 execute_window_loop(window, [&](const Coordinates & id)
1956 {
Michalis Spyrouced25572018-10-01 16:26:20 +01001957 const int idx_width = id.y() * pool_stride_x;
1958 const int idx_height = id.z() * pool_stride_y;
1959 const int pool_limit_y = pool_pad_top - idx_height;
1960 const int pool_limit_x = pool_pad_left - idx_width;
1961
1962 const int pool_start_y = std::max(0, window_input.z().start() + pool_limit_y);
1963 const int pool_end_y = std::min(pool_size_y, window_input.z().end() + pool_limit_y);
1964 const int pool_start_x = std::max(0, window_input.y().start() + pool_limit_x);
1965 const int pool_end_x = std::min(pool_size_x, window_input.y().end() + pool_limit_x);
1966
Michalis Spyrou57dac842018-03-01 16:03:50 +00001967 if(pooling_type != PoolingType::MAX)
1968 {
1969 uint32x4_t vres1 = vdupq_n_u32(0);
1970 uint32x4_t vres2 = vdupq_n_u32(0);
Michalis Spyrouced25572018-10-01 16:26:20 +01001971 uint32x4_t vres3 = vdupq_n_u32(0);
1972 uint32x4_t vres4 = vdupq_n_u32(0);
Michalis Spyrou57dac842018-03-01 16:03:50 +00001973
1974 // Calculate scale
1975 const float scale = calculate_avg_scale<exclude_padding, DataLayout::NHWC>(id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
1976 pool_stride_y);
1977 const float32x4_t scale_v = vdupq_n_f32(scale);
1978
1979 // Perform pooling
Michalis Spyrouced25572018-10-01 16:26:20 +01001980 for(int y = pool_start_y; y < pool_end_y; ++y)
Michalis Spyrou57dac842018-03-01 16:03:50 +00001981 {
Michalis Spyrouced25572018-10-01 16:26:20 +01001982 for(int x = pool_start_x; x < pool_end_x; ++x)
Michalis Spyrou57dac842018-03-01 16:03:50 +00001983 {
Michalis Spyrouced25572018-10-01 16:26:20 +01001984 const uint8x16_t data = vld1q_u8(reinterpret_cast<const uint8_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().y() +
1985 (y - pool_pad_top) * _input->info()->strides_in_bytes().z()));
Michalis Spyrou57dac842018-03-01 16:03:50 +00001986
Michalis Spyrouced25572018-10-01 16:26:20 +01001987 const uint16x8_t data_u16 = vmovl_u8(vget_low_u8(data));
1988 const uint16x8_t data2_u16 = vmovl_u8(vget_high_u8(data));
1989 vres1 = vaddq_u32(vres1, vmovl_u16(vget_low_u16(data_u16)));
1990 vres2 = vaddq_u32(vres2, vmovl_u16(vget_high_u16(data_u16)));
1991 vres3 = vaddq_u32(vres3, vmovl_u16(vget_low_u16(data2_u16)));
1992 vres4 = vaddq_u32(vres4, vmovl_u16(vget_high_u16(data2_u16)));
Michalis Spyrou57dac842018-03-01 16:03:50 +00001993 }
1994 }
1995 // Divide by scale
1996 vres1 = vcvtq_u32_f32(vmulq_f32(vcvtq_f32_u32(vres1), scale_v));
1997 vres2 = vcvtq_u32_f32(vmulq_f32(vcvtq_f32_u32(vres2), scale_v));
Michalis Spyrouced25572018-10-01 16:26:20 +01001998 vres3 = vcvtq_u32_f32(vmulq_f32(vcvtq_f32_u32(vres3), scale_v));
1999 vres4 = vcvtq_u32_f32(vmulq_f32(vcvtq_f32_u32(vres4), scale_v));
Michalis Spyrou57dac842018-03-01 16:03:50 +00002000
Michalis Spyrouced25572018-10-01 16:26:20 +01002001 uint8x8_t res1 = vmovn_u16(vcombine_u16(vmovn_u32(vres1), vmovn_u32(vres2)));
2002 uint8x8_t res2 = vmovn_u16(vcombine_u16(vmovn_u32(vres3), vmovn_u32(vres4)));
Michalis Spyrou57dac842018-03-01 16:03:50 +00002003
2004 // Store result
Michalis Spyrouced25572018-10-01 16:26:20 +01002005 vst1_u8(output.ptr(), res1);
2006 vst1_u8(output.ptr() + 8, res2);
Michalis Spyrou57dac842018-03-01 16:03:50 +00002007 }
2008 else
2009 {
Michalis Spyrouced25572018-10-01 16:26:20 +01002010 uint8x16_t vres = vdupq_n_u8(0);
Michalis Spyrou57dac842018-03-01 16:03:50 +00002011
Michalis Spyrouced25572018-10-01 16:26:20 +01002012 for(int y = pool_start_y; y < pool_end_y; ++y)
Michalis Spyrou57dac842018-03-01 16:03:50 +00002013 {
Michalis Spyrouced25572018-10-01 16:26:20 +01002014 for(int x = pool_start_x; x < pool_end_x; ++x)
Michalis Spyrou57dac842018-03-01 16:03:50 +00002015 {
Michalis Spyrouced25572018-10-01 16:26:20 +01002016 const uint8x16_t data = vld1q_u8(reinterpret_cast<const uint8_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().y() +
2017 (y - pool_pad_top) * _input->info()->strides_in_bytes().z()));
2018 vres = vmaxq_u8(vres, data);
Michalis Spyrou57dac842018-03-01 16:03:50 +00002019 }
2020 }
2021
2022 // Store result
Michalis Spyrouced25572018-10-01 16:26:20 +01002023 vst1q_u8(output.ptr(), vres);
Michalis Spyrou57dac842018-03-01 16:03:50 +00002024 }
2025 },
2026 input, output);
2027}
2028
Michalis Spyrouafa5d812017-11-30 14:25:57 +00002029Status NEPoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
2030{
2031 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
2032
2033 unsigned int pooled_w = 0;
2034 unsigned int pooled_h = 0;
2035 unsigned int num_elems_processed_per_iteration = 0;
2036 BorderSize border_size(0);
2037
Michalis Spyrou57dac842018-03-01 16:03:50 +00002038 const bool is_global_pooling = pool_info.is_global_pooling();
2039 unsigned int pool_size_x = 0;
2040 unsigned int pool_size_y = 0;
2041
2042 // Get data layout
2043 const DataLayout data_layout = input->data_layout();
2044 const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
2045 const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
2046
2047 pool_size_x = is_global_pooling ? input->dimension(idx_width) : pool_info.pool_size().width;
2048 pool_size_y = is_global_pooling ? input->dimension(idx_height) : pool_info.pool_size().height;
Michalis Spyrouafa5d812017-11-30 14:25:57 +00002049
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00002050 // Validate pool info before calling scaled_dimensions
2051 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_pool_info(pool_size_x, pool_size_y));
Michalis Spyrouafa5d812017-11-30 14:25:57 +00002052
2053 // Check output dimensions
Michalis Spyrou57dac842018-03-01 16:03:50 +00002054 std::tie(pooled_w, pooled_h) = scaled_dimensions(input->dimension(idx_width),
2055 input->dimension(idx_height),
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00002056 pool_size_x,
2057 pool_size_y,
Michalis Spyrouafa5d812017-11-30 14:25:57 +00002058 pool_info.pad_stride_info());
2059
Georgios Pinitas13d96e02018-08-23 11:20:23 +01002060 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, pool_info, pooled_w, pooled_h));
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00002061 ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), pool_info, num_elems_processed_per_iteration, border_size, pooled_w, pooled_h,
2062 pool_size_x, pool_size_y)
2063 .first);
Michalis Spyrouafa5d812017-11-30 14:25:57 +00002064
2065 return Status{};
2066}
2067
Moritz Pflanzerc186b572017-09-07 09:48:04 +01002068void NEPoolingLayerKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01002069{
Moritz Pflanzerc186b572017-09-07 09:48:04 +01002070 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01002071 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
2072 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
2073 ARM_COMPUTE_ERROR_ON(_func == nullptr);
2074
Pablo Tello0c34fe22017-06-26 17:17:42 +01002075 const unsigned int pool_stride_x = _pool_info.pad_stride_info().stride().first;
2076 const unsigned int pool_stride_y = _pool_info.pad_stride_info().stride().second;
Isabella Gottardi6e464c32018-01-26 12:32:45 +00002077 const unsigned int pool_size = _pool_info.pool_size().width;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01002078
Michalis Spyrou57dac842018-03-01 16:03:50 +00002079 Window window_input(window);
2080 if(_input->info()->data_layout() == DataLayout::NCHW)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01002081 {
Michalis Spyrou57dac842018-03-01 16:03:50 +00002082 // Set step for input in x and y direction for the input
2083 unsigned int window_x_inc = 0;
2084 switch(_input->info()->data_type())
Pablo Tello0c34fe22017-06-26 17:17:42 +01002085 {
Michalis Spyrou57dac842018-03-01 16:03:50 +00002086 case DataType::QASYMM8:
2087 {
2088 window_x_inc = pool_stride_x;
2089 if((pool_size == 2 || pool_size == 3) && pool_stride_x < 3)
2090 {
2091 window_x_inc = (pool_stride_x == 2) ? _num_elems_processed_per_iteration * 2 : _num_elems_processed_per_iteration;
2092 }
2093 break;
2094 }
Georgios Pinitas13d96e02018-08-23 11:20:23 +01002095 case DataType::F16:
Michalis Spyrou57dac842018-03-01 16:03:50 +00002096 case DataType::F32:
2097 {
2098 window_x_inc = pool_stride_x;
2099 break;
2100 }
2101 default:
2102 {
2103 ARM_COMPUTE_ERROR("Not supported");
2104 }
Georgios Pinitas55186712018-01-08 17:37:12 +00002105 }
Michalis Spyrou57dac842018-03-01 16:03:50 +00002106 window_input.set(Window::DimX, Window::Dimension(window.x().start() * pool_stride_x, window.x().end() * pool_stride_x, window_x_inc));
2107 window_input.set(Window::DimY, Window::Dimension(window.y().start() * pool_stride_y, window.y().end() * pool_stride_y, pool_stride_y));
Anthony Barbier6ff3b192017-09-04 18:44:23 +01002108 }
Michalis Spyrou57dac842018-03-01 16:03:50 +00002109 else
2110 {
Georgios Pinitascac13b12018-04-27 19:07:19 +01002111 window_input.set(Window::DimX, Window::Dimension(window.x().start(), window.x().end(), _num_elems_processed_per_iteration));
Michalis Spyrou57dac842018-03-01 16:03:50 +00002112 window_input.set(Window::DimY, Window::Dimension(0, _input->info()->dimension(1), pool_stride_x));
2113 window_input.set(Window::DimZ, Window::Dimension(0, _input->info()->dimension(2), pool_stride_y));
2114 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +01002115
2116 // Run function
2117 (this->*_func)(window_input, window);
2118}