blob: 2ca6090674f6700a960c34d24b619bfe6aec1c5b [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Georgios Pinitas55186712018-01-08 17:37:12 +00002 * Copyright (c) 2017-2018 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h"
25
26#include "arm_compute/core/AccessWindowStatic.h"
Anthony Barbiereaefd002018-07-20 17:49:35 +010027#include "arm_compute/core/CPP/Validate.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010028#include "arm_compute/core/Error.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010029#include "arm_compute/core/Helpers.h"
30#include "arm_compute/core/ITensor.h"
Georgios Pinitas55186712018-01-08 17:37:12 +000031#include "arm_compute/core/NEON/NEAsymm.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010032#include "arm_compute/core/NEON/NEFixedPoint.h"
Georgios Pinitascdf51452017-08-31 14:21:36 +010033#include "arm_compute/core/NEON/NEMath.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010034#include "arm_compute/core/TensorInfo.h"
35#include "arm_compute/core/Utils.h"
36#include "arm_compute/core/Validate.h"
37#include "arm_compute/core/Window.h"
38
Georgios Pinitas55186712018-01-08 17:37:12 +000039#include "support/ToolchainSupport.h"
40
Anthony Barbier6ff3b192017-09-04 18:44:23 +010041#include <algorithm>
42#include <arm_neon.h>
Georgios Pinitascdf51452017-08-31 14:21:36 +010043#include <cmath>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010044#include <limits>
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +010045#include <set>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010046#include <string>
47#include <tuple>
48
49using namespace arm_compute;
50
51namespace
52{
Michalis Spyrouafa5d812017-11-30 14:25:57 +000053void auto_init(const ITensorInfo *input, ITensorInfo *output, unsigned int pooled_w, unsigned int pooled_h)
54{
55 TensorShape output_shape{ input->tensor_shape() };
Michalis Spyrou57dac842018-03-01 16:03:50 +000056 output_shape.set(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH), pooled_w);
57 output_shape.set(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT), pooled_h);
Michalis Spyrouafa5d812017-11-30 14:25:57 +000058
59 auto_init_if_empty(*output, input->clone()->set_tensor_shape(output_shape));
60}
61
Michalis Spyrou57dac842018-03-01 16:03:50 +000062template <bool exclude_padding, DataLayout data_layout>
Isabella Gottardi7567f5f2018-01-30 15:26:00 +000063inline float calculate_avg_scale(const Coordinates &id, const int pool_size_x, const int pool_size_y, const int upper_bound_w, const int upper_bound_h,
Anthony Barbier6ff3b192017-09-04 18:44:23 +010064 const int pad_x, const int pad_y, const int stride_x, const int stride_y)
65{
Michalis Spyrou57dac842018-03-01 16:03:50 +000066 const unsigned int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
67 const unsigned int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
68
69 int start_x = id[idx_width] * stride_x - pad_x;
70 int start_y = id[idx_height] * stride_y - pad_y;
71
72 const int end_x = std::min(start_x + pool_size_x, upper_bound_w);
73 const int end_y = std::min(start_y + pool_size_y, upper_bound_h);
Georgios Pinitasadaae7e2017-10-30 15:56:32 +000074 if(exclude_padding)
75 {
76 start_x = std::max(0, start_x);
77 start_y = std::max(0, start_y);
78 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +010079 return 1.f / ((end_y - start_y) * (end_x - start_x));
80}
81
Georgios Pinitas55186712018-01-08 17:37:12 +000082template <bool exclude_padding>
83inline void scale_vector_s16x8(uint16x8_t &v, const Coordinates &id, int id_offset, int step,
84 const int pool_size, const int upper_bound_w, const int upper_bound_h,
85 const int pad_x, const int pad_y, const int stride_x, const int stride_y)
86{
87 int start_x = (id.x() + id_offset) * stride_x - pad_x;
88 int start_y = id.y() * stride_y - pad_y;
89 const int end_y = std::min(start_y + pool_size, upper_bound_h);
90 if(exclude_padding)
91 {
92 start_y = std::max(0, start_y);
93 }
94
95 std::array<uint16_t, 8> elems =
96 {
97 {
98 vgetq_lane_u16(v, 0),
99 vgetq_lane_u16(v, 1),
100 vgetq_lane_u16(v, 2),
101 vgetq_lane_u16(v, 3),
102 vgetq_lane_u16(v, 4),
103 vgetq_lane_u16(v, 5),
104 vgetq_lane_u16(v, 6),
105 vgetq_lane_u16(v, 7),
106 }
107 };
108
109 for(auto &el : elems)
110 {
111 int c_start_x = start_x;
112 const int end_x = std::min(c_start_x + pool_size, upper_bound_w);
113 if(exclude_padding)
114 {
115 c_start_x = std::max(0, c_start_x);
116 }
117 float scale = 1.f / ((end_y - start_y) * (end_x - c_start_x));
118 el *= scale;
119 start_x += step * stride_x;
120 }
121
122 v = vsetq_lane_u16(elems[0], v, 0);
123 v = vsetq_lane_u16(elems[1], v, 1);
124 v = vsetq_lane_u16(elems[2], v, 2);
125 v = vsetq_lane_u16(elems[3], v, 3);
126 v = vsetq_lane_u16(elems[4], v, 4);
127 v = vsetq_lane_u16(elems[5], v, 5);
128 v = vsetq_lane_u16(elems[6], v, 6);
129 v = vsetq_lane_u16(elems[7], v, 7);
130}
131
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000132Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, unsigned int &pooled_w, unsigned int pooled_h, int pool_size_x)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100133{
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000134 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100135
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000136 int pool_stride_x = 0;
137 int pool_stride_y = 0;
138 PoolingType pool_type = pool_info.pool_type();
139 const PadStrideInfo pad_stride_info = pool_info.pad_stride_info();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100140 std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
Gian Marco Iodice16824302017-09-28 15:41:37 +0100141 static const std::set<int> supported_pool_sizes = { 2, 3 };
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100142
Anthony Barbiereaefd002018-07-20 17:49:35 +0100143 ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
Vidhya Sudhan Loganathan7485d5a2018-07-04 09:34:00 +0100144 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
Georgios Pinitas55186712018-01-08 17:37:12 +0000145 ARM_COMPUTE_RETURN_ERROR_ON(pool_type == PoolingType::L2 && is_data_type_quantized(input->data_type()));
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000146
147 ARM_COMPUTE_RETURN_ERROR_ON((supported_pool_sizes.find(pool_size_x) == supported_pool_sizes.end()) && ((input->data_type() != DataType::F32) && (input->data_type() != DataType::QASYMM8))
148 && (pool_type != PoolingType::MAX));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100149
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000150 if(output->total_size() != 0)
Georgios Pinitas1dad50e2017-07-03 17:51:34 +0100151 {
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000152 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
Michalis Spyrou57dac842018-03-01 16:03:50 +0000153 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
154 ARM_COMPUTE_RETURN_ERROR_ON((output->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH)) != pooled_w)
155 || (output->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT)) != pooled_h));
Georgios Pinitas1dad50e2017-07-03 17:51:34 +0100156 }
157
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000158 return Status{};
159}
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100160
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000161Status validate_arguments_pool_info(const unsigned int pool_size_x, const unsigned int pool_size_y)
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000162{
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000163 ARM_COMPUTE_RETURN_ERROR_ON(pool_size_x == 0);
164 ARM_COMPUTE_RETURN_ERROR_ON(pool_size_y == 0);
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000165
166 return Status{};
167}
168
169std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const PoolingLayerInfo &pool_info, unsigned int &num_elems_processed_per_iteration,
170 BorderSize &border_size,
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000171 unsigned int pooled_w, unsigned int pooled_h, int pool_size_x, int pool_size_y)
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000172{
Michalis Spyrou57dac842018-03-01 16:03:50 +0000173 // Get data layout
174 DataLayout data_layout = input->data_layout();
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000175 unsigned int num_elems_read_per_iteration = 0;
176 unsigned int num_elems_horizontal_window = 0;
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000177 int pool_stride_x = 0;
178 int pool_stride_y = 0;
Michalis Spyrou57dac842018-03-01 16:03:50 +0000179 const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
180 const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
181 const int input_width = input->dimension(idx_width);
182 const int input_height = input->dimension(idx_height);
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000183 const PadStrideInfo pad_stride_info = pool_info.pad_stride_info();
184 std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000185 const int pool_pad_right = pad_stride_info.pad_right();
186 const int pool_pad_top = pad_stride_info.pad_top();
187 const int pool_pad_left = pad_stride_info.pad_left();
188 const int pool_pad_bottom = pad_stride_info.pad_bottom();
189 const bool is_square = pool_size_x == pool_size_y;
Michalis Spyrou57dac842018-03-01 16:03:50 +0000190
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000191 // Check output dimensions
Michalis Spyrou57dac842018-03-01 16:03:50 +0000192 std::tie(pooled_w, pooled_h) = scaled_dimensions(input->dimension(idx_width),
193 input->dimension(idx_height),
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000194 pool_size_x,
195 pool_size_y,
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000196 pad_stride_info);
Michalis Spyrou57dac842018-03-01 16:03:50 +0000197 auto_init(input, output, pooled_w, pooled_h);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100198
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000199 //If it's not squared and optimized will be executed the MxN
200 num_elems_read_per_iteration = 1;
201 num_elems_processed_per_iteration = 1;
202 num_elems_horizontal_window = 1;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100203
Michalis Spyrou57dac842018-03-01 16:03:50 +0000204 const bool is_nhwc = data_layout == DataLayout::NHWC;
205
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000206 if(is_square)
207 {
208 switch(input->data_type())
209 {
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000210 case DataType::QASYMM8:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000211 if(is_nhwc)
212 {
213 num_elems_processed_per_iteration = 8;
214 break;
215 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000216 switch(pool_size_x)
217 {
218 case 2:
219 num_elems_read_per_iteration = 16;
220 num_elems_processed_per_iteration = (pool_stride_x == 2) ? 8 : 15;
221 num_elems_horizontal_window = (pool_stride_x == 2) ? 8 : 16;
222 break;
223 case 3:
224 num_elems_read_per_iteration = 16;
225 num_elems_processed_per_iteration = (pool_stride_x == 2) ? 7 : 14;
226 num_elems_horizontal_window = (pool_stride_x == 2) ? 8 : 16;
227 break;
228 default:
229 break;
230 }
231 break;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000232#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
233 case DataType::F16:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000234 if(is_nhwc)
235 {
236 num_elems_processed_per_iteration = 8;
237 break;
238 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000239 switch(pool_size_x)
240 {
241 case 2:
242 num_elems_read_per_iteration = 16;
243 num_elems_processed_per_iteration = 8;
244 num_elems_horizontal_window = 8;
245 break;
246 case 3:
247 num_elems_read_per_iteration = 4;
248 num_elems_processed_per_iteration = 1;
249 num_elems_horizontal_window = 1;
250 break;
251 default:
252 break;
253 }
254 break;
255#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
256 case DataType::F32:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000257 if(is_nhwc)
258 {
259 num_elems_processed_per_iteration = 4;
260 break;
261 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000262 switch(pool_size_x)
263 {
264 case 2:
265 num_elems_read_per_iteration = 2;
266 break;
267 case 3:
268 num_elems_read_per_iteration = 4; // We use vload4 for pooling3
269 break;
270 case 7:
271 num_elems_read_per_iteration = 8; // We use vload8 for pooling7
272 break;
273 default:
274 break;
275 }
276 num_elems_processed_per_iteration = 1;
277 num_elems_horizontal_window = 1;
278 break;
279 default:
280 ARM_COMPUTE_ERROR("Element size not supported");
281 break;
282 }
283 }
Michalis Spyrou57dac842018-03-01 16:03:50 +0000284 else
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000285 {
Michalis Spyrou57dac842018-03-01 16:03:50 +0000286 if(is_nhwc)
287 {
288 if(DataType::QASYMM8 == input->data_type())
289 {
290 num_elems_processed_per_iteration = 8;
291 }
292 else
293 {
294 num_elems_processed_per_iteration = 4;
295 }
296 }
297 }
298
299 bool window_changed = false;
300 Window win{};
301 if(data_layout == DataLayout::NCHW)
302 {
303 // Number of iterations in X dimension
304 const int num_iterations_x = (pooled_w + num_elems_processed_per_iteration - 1) / num_elems_processed_per_iteration;
305
306 // Upper limit for the number of right/bottom border elements that are accessed
307 const int upper_bound_w = ((num_iterations_x - 1) * num_elems_processed_per_iteration * pool_stride_x - pool_pad_left + num_elems_read_per_iteration) - input_width;
308 const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_top + pool_size_y) - input_height;
309
310 border_size = BorderSize(pool_pad_top, pool_pad_right, pool_pad_bottom, pool_pad_left);
311 border_size.right = std::max(upper_bound_w, pool_pad_right);
312 border_size.bottom = std::max(upper_bound_h, pool_pad_bottom);
313
314 TensorShape output_shape{ input->tensor_shape() };
315 output_shape.set(0, pooled_w);
316 output_shape.set(1, pooled_h);
317 TensorInfo output_info(input->clone()->set_tensor_shape(output_shape));
318
319 win = calculate_max_window(output_info, Steps(num_elems_processed_per_iteration));
320 AccessWindowStatic input_access(input, -pool_pad_left, -pool_pad_top, input_width + border_size.right, input_height + border_size.bottom);
321
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000322 AccessWindowHorizontal output_access(output, 0, num_elems_horizontal_window);
323 window_changed = update_window_and_padding(win, input_access, output_access);
324 output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
325 }
326 else
327 {
Michalis Spyrou57dac842018-03-01 16:03:50 +0000328 TensorShape output_shape{ input->tensor_shape() };
329 output_shape.set(1, pooled_w);
330 output_shape.set(2, pooled_h);
331 TensorInfo output_info(input->clone()->set_tensor_shape(output_shape));
332
333 win = calculate_max_window(output_info, Steps(num_elems_processed_per_iteration));
334 AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
335
336 AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
337 window_changed = update_window_and_padding(win, input_access, output_access);
338 output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000339 }
340
341 Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
342 return std::make_pair(err, win);
343}
344} // namespace
345
346NEPoolingLayerKernel::NEPoolingLayerKernel()
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000347 : _func(nullptr), _input(nullptr), _output(nullptr), _pool_info(), _num_elems_processed_per_iteration(0), _border_size(0), _is_square(false)
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000348{
349}
350
351BorderSize NEPoolingLayerKernel::border_size() const
352{
353 return _border_size;
354}
355
356void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info)
357{
358 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
359
Diego Lopez Recas61ef5bf2017-12-11 12:36:55 +0000360 const PoolingType pool_type = pool_info.pool_type();
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000361 const PadStrideInfo pad_stride_info = pool_info.pad_stride_info();
362 const bool exclude_padding = pool_info.exclude_padding();
363 const bool is_global_pooling = pool_info.is_global_pooling();
Diego Lopez Recas61ef5bf2017-12-11 12:36:55 +0000364 const int pool_stride_x = pad_stride_info.stride().first;
Michalis Spyrou57dac842018-03-01 16:03:50 +0000365 unsigned int pool_size_x = 0;
366 unsigned int pool_size_y = 0;
367
368 // Get data layout
369 const DataLayout data_layout = input->info()->data_layout();
370 const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
371 const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000372
373 // Update pool size in case of global pooling
Michalis Spyrou57dac842018-03-01 16:03:50 +0000374 pool_size_x = is_global_pooling ? input->info()->dimension(idx_width) : pool_info.pool_size().width;
375 pool_size_y = is_global_pooling ? input->info()->dimension(idx_height) : pool_info.pool_size().height;
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000376
377 // Validate pool info before calling scaled_dimensions
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000378 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_pool_info(pool_size_x, pool_size_y));
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000379
380 // Check output dimensions
Diego Lopez Recas61ef5bf2017-12-11 12:36:55 +0000381 unsigned int pooled_w, pooled_h;
Michalis Spyrou57dac842018-03-01 16:03:50 +0000382 std::tie(pooled_w, pooled_h) = scaled_dimensions(input->info()->dimension(idx_width),
383 input->info()->dimension(idx_height),
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000384 pool_size_x,
385 pool_size_y,
Diego Lopez Recas61ef5bf2017-12-11 12:36:55 +0000386 pad_stride_info);
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000387
388 // Output auto initialization if not yet initialized
389 auto_init(input->info(), output->info(), pooled_w, pooled_h);
390
391 // Perform validation step
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000392 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pool_info, pooled_w, pooled_h, pool_size_x));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100393
394 // Set instance variables
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000395 _input = input;
396 _output = output;
397 _pool_info = pool_info;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000398 _is_square = (pool_size_x == pool_size_y);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100399
Georgios Pinitas55186712018-01-08 17:37:12 +0000400 // Get data type
401 const DataType data_type = input->info()->data_type();
Michalis Spyrou57dac842018-03-01 16:03:50 +0000402 const bool is_nchw = data_layout == DataLayout::NCHW;
Georgios Pinitas55186712018-01-08 17:37:12 +0000403
Vidhya Sudhan Loganathan7485d5a2018-07-04 09:34:00 +0100404 if(data_type == DataType::QASYMM8)
Georgios Pinitas55186712018-01-08 17:37:12 +0000405 {
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000406 if(pool_size_x == 2 && pool_stride_x < 3 && _is_square)
Georgios Pinitas55186712018-01-08 17:37:12 +0000407 {
408 switch(pool_type)
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100409 {
Georgios Pinitas55186712018-01-08 17:37:12 +0000410 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000411 if(is_nchw)
412 {
413 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling2_qasymm8_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling2_qasymm8_nchw<PoolingType::AVG, false>;
414 }
415 else
416 {
417 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc<PoolingType::AVG, false>;
418 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000419 break;
420 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000421 if(is_nchw)
422 {
423 _func = &NEPoolingLayerKernel::pooling2_qasymm8_nchw<PoolingType::MAX>;
424 }
425 else
426 {
427 _func = &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc<PoolingType::MAX>;
428 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000429 break;
430 default:
431 ARM_COMPUTE_ERROR("Unsupported pooling type!");
432 }
433 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000434 else if(pool_size_x == 3 && pool_stride_x < 3 && _is_square)
Georgios Pinitas55186712018-01-08 17:37:12 +0000435 {
436 switch(pool_type)
437 {
438 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000439 if(is_nchw)
440 {
441 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_qasymm8_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling3_qasymm8_nchw<PoolingType::AVG, false>;
442 }
443 else
444 {
445 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc<PoolingType::AVG, false>;
446 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000447 break;
448 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000449 if(is_nchw)
450 {
451 _func = &NEPoolingLayerKernel::pooling3_qasymm8_nchw<PoolingType::MAX>;
452 }
453 else
454 {
455 _func = &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc<PoolingType::MAX>;
456 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000457 break;
458 default:
459 ARM_COMPUTE_ERROR("Unsupported pooling type!");
460 }
461 }
462 else
463 {
464 switch(pool_type)
465 {
466 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000467 if(is_nchw)
468 {
469 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_qasymm8_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_qasymm8_nchw<PoolingType::AVG, false>;
470 }
471 else
472 {
473 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc<PoolingType::AVG, false>;
474 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000475 break;
476 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000477 if(is_nchw)
478 {
479 _func = &NEPoolingLayerKernel::poolingMxN_qasymm8_nchw<PoolingType::MAX>;
480 }
481 else
482 {
483 _func = &NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc<PoolingType::MAX>;
484 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000485 break;
486 default:
487 ARM_COMPUTE_ERROR("Unsupported pooling type!");
488 }
489 }
490 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000491 else if(data_type == DataType::F16)
492 {
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000493 if(_is_square)
Georgios Pinitas55186712018-01-08 17:37:12 +0000494 {
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000495 switch(pool_size_x)
496 {
497 case 2:
498 switch(pool_type)
499 {
500 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000501 if(is_nchw)
502 {
503 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling2_f16_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling2_f16_nchw<PoolingType::AVG, false>;
504 }
505 else
506 {
507 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::AVG, false>;
508 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000509 break;
510 case PoolingType::L2:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000511 if(is_nchw)
512 {
513 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling2_f16_nchw<PoolingType::L2, true> : &NEPoolingLayerKernel::pooling2_f16_nchw<PoolingType::L2, false>;
514 }
515 else
516 {
517 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::L2, false>;
518 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000519 break;
520 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000521 if(is_nchw)
522 {
523 _func = &NEPoolingLayerKernel::pooling2_f16_nchw<PoolingType::MAX, false>;
524 }
525 else
526 {
527 _func = &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::MAX, false>;
528 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000529 break;
530 default:
531 ARM_COMPUTE_ERROR("Unsupported pooling type!");
532 }
533 break;
534 case 3:
535 switch(pool_type)
536 {
537 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000538 if(is_nchw)
539 {
540 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_f16_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling3_f16_nchw<PoolingType::AVG, false>;
541 }
542 else
543 {
544 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::AVG, false>;
545 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000546 break;
547 case PoolingType::L2:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000548 if(is_nchw)
549 {
550 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_f16_nchw<PoolingType::L2, true> : &NEPoolingLayerKernel::pooling3_f16_nchw<PoolingType::L2, false>;
551 }
552 else
553 {
554 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::L2, false>;
555 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000556 break;
557 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000558 if(is_nchw)
559 {
560 _func = &NEPoolingLayerKernel::pooling3_f16_nchw<PoolingType::MAX, false>;
561 }
562 else
563 {
564 _func = &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::MAX, false>;
565 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000566 break;
567 default:
568 ARM_COMPUTE_ERROR("Unsupported pooling type!");
569 }
570 break;
571 default:
572 switch(pool_type)
573 {
574 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000575 if(is_nchw)
576 {
577 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::AVG, false>;
578 }
579 else
580 {
581 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::AVG, false>;
582 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000583 break;
584 case PoolingType::L2:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000585 if(is_nchw)
586 {
587 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::L2, false>;
588 }
589 else
590 {
591 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::L2, false>;
592 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000593 break;
594 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000595 if(is_nchw)
596 {
597 _func = &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::MAX, false>;
598 }
599 else
600 {
601 _func = &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::MAX, false>;
602 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000603 break;
604 default:
605 ARM_COMPUTE_ERROR("Unsupported pooling type!");
606 }
607 break;
608 }
609 }
610 else
611 {
612 switch(pool_type)
613 {
614 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000615 if(is_nchw)
616 {
617 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::AVG, false>;
618 }
619 else
620 {
621 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::AVG, false>;
622 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000623 break;
624 case PoolingType::L2:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000625 if(is_nchw)
626 {
627 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::L2, false>;
628 }
629 else
630 {
631 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::L2, false>;
632 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000633 break;
634 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000635 if(is_nchw)
636 {
637 _func = &NEPoolingLayerKernel::poolingMxN_f16_nchw<PoolingType::MAX, false>;
638 }
639 else
640 {
641 _func = &NEPoolingLayerKernel::poolingMxN_f16_nhwc<PoolingType::MAX, false>;
642 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000643 break;
644 default:
645 ARM_COMPUTE_ERROR("Unsupported pooling type!");
646 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000647 }
648 }
649 else if(data_type == DataType::F32)
650 {
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000651 if(_is_square)
Georgios Pinitas55186712018-01-08 17:37:12 +0000652 {
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000653 switch(pool_size_x)
654 {
655 case 2:
656 switch(pool_type)
657 {
658 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000659 if(is_nchw)
660 {
661 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling2_f32_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling2_f32_nchw<PoolingType::AVG, false>;
662 }
663 else
664 {
665 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, false>;
666 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000667 break;
668 case PoolingType::L2:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000669 if(is_nchw)
670 {
671 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling2_f32_nchw<PoolingType::L2, true> : &NEPoolingLayerKernel::pooling2_f32_nchw<PoolingType::L2, false>;
672 }
673 else
674 {
675 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, false>;
676 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000677 break;
678 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000679 if(is_nchw)
680 {
681 _func = &NEPoolingLayerKernel::pooling2_f32_nchw<PoolingType::MAX, false>;
682 }
683 else
684 {
685 _func = &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::MAX, false>;
686 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000687 break;
688 default:
689 ARM_COMPUTE_ERROR("Unsupported pooling type!");
690 }
691 break;
692 case 3:
693 switch(pool_type)
694 {
695 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000696 if(is_nchw)
697 {
698 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_f32_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling3_f32_nchw<PoolingType::AVG, false>;
699 }
700 else
701 {
702 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, false>;
703 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000704 break;
705 case PoolingType::L2:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000706 if(is_nchw)
707 {
708 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_f32_nchw<PoolingType::L2, true> : &NEPoolingLayerKernel::pooling3_f32_nchw<PoolingType::L2, false>;
709 }
710 else
711 {
712 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, false>;
713 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000714 break;
715 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000716 if(is_nchw)
717 {
718 _func = &NEPoolingLayerKernel::pooling3_f32_nchw<PoolingType::MAX, false>;
719 }
720 else
721 {
722 _func = &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::MAX, false>;
723 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000724 break;
725 default:
726 ARM_COMPUTE_ERROR("Unsupported pooling type!");
727 }
728 break;
729 case 7:
730 switch(pool_type)
731 {
732 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000733 if(is_nchw)
734 {
735 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling7_f32_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling7_f32_nchw<PoolingType::AVG, false>;
736 }
737 else
738 {
739 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, false>;
740 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000741 break;
742 case PoolingType::L2:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000743 if(is_nchw)
744 {
745 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling7_f32_nchw<PoolingType::L2, true> : &NEPoolingLayerKernel::pooling7_f32_nchw<PoolingType::L2, false>;
746 }
747 else
748 {
749 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, false>;
750 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000751 break;
752 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000753 if(is_nchw)
754 {
755 _func = &NEPoolingLayerKernel::pooling7_f32_nchw<PoolingType::MAX, false>;
756 }
757 else
758 {
759 _func = &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::MAX, false>;
760 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000761 break;
762 default:
763 ARM_COMPUTE_ERROR("Unsupported pooling type!");
764 }
765 break;
766 default:
767 switch(pool_type)
768 {
769 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000770 if(is_nchw)
771 {
772 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::AVG, false>;
773 }
774 else
775 {
776 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, false>;
777 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000778 break;
779 case PoolingType::L2:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000780 if(is_nchw)
781 {
782 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::L2, false>;
783 }
784 else
785 {
786 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, false>;
787 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000788 break;
789 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000790 if(is_nchw)
791 {
792 _func = &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::MAX, false>;
793 }
794 else
795 {
796 _func = &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::MAX, false>;
797 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000798 break;
799 default:
800 ARM_COMPUTE_ERROR("Unsupported pooling type!");
801 }
802 break;
803 }
804 }
805 else
806 {
807 switch(pool_type)
808 {
809 case PoolingType::AVG:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000810 if(is_nchw)
811 {
812 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::AVG, false>;
813 }
814 else
815 {
816 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::AVG, false>;
817 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000818 break;
819 case PoolingType::L2:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000820 if(is_nchw)
821 {
822 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::L2, false>;
823 }
824 else
825 {
826 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::L2, false>;
827 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000828 break;
829 case PoolingType::MAX:
Michalis Spyrou57dac842018-03-01 16:03:50 +0000830 if(is_nchw)
831 {
832 _func = &NEPoolingLayerKernel::poolingMxN_f32_nchw<PoolingType::MAX, false>;
833 }
834 else
835 {
836 _func = &NEPoolingLayerKernel::poolingMxN_f32_nhwc<PoolingType::MAX, false>;
837 }
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000838 break;
839 default:
840 ARM_COMPUTE_ERROR("Unsupported pooling type!");
841 }
Georgios Pinitas55186712018-01-08 17:37:12 +0000842 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100843 }
844
845 // Configure kernel window
Isabella Gottardi7567f5f2018-01-30 15:26:00 +0000846 auto win_config = validate_and_configure_window(input->info(), output->info(), pool_info, _num_elems_processed_per_iteration, _border_size, pooled_w, pooled_h, pool_size_x, pool_size_y);
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000847 ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
848 INEKernel::configure(win_config.second);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100849}
850
Georgios Pinitas55186712018-01-08 17:37:12 +0000851template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +0000852void NEPoolingLayerKernel::pooling2_qasymm8_nchw(const Window &window_input, const Window &window)
Georgios Pinitas55186712018-01-08 17:37:12 +0000853{
854 Iterator input(_input, window_input);
855 Iterator output(_output, window);
856
Michalis Spyroubd0e6122018-01-23 09:52:16 +0000857 constexpr int pool_size = 2;
858 int pool_stride_x = 0;
859 int pool_stride_y = 0;
860 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
861 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
862 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
863 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
Georgios Pinitas55186712018-01-08 17:37:12 +0000864 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +0000865 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
866 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Georgios Pinitas55186712018-01-08 17:37:12 +0000867
Michalis Spyroubd0e6122018-01-23 09:52:16 +0000868 const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
869 const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
Georgios Pinitas55186712018-01-08 17:37:12 +0000870
871 const int scale_step_x = (pool_stride_x == 1) ? 2 : 1;
872
873 execute_window_loop(window, [&](const Coordinates & id)
874 {
875 const auto top_data = vld1q_u8(reinterpret_cast<const uint8_t *>(input_top_ptr + input.offset()));
876 const auto bottom_data = vld1q_u8(reinterpret_cast<const uint8_t *>(input_bottom_ptr + input.offset()));
877 uint8x8_t lower_res = {};
878 uint8x8_t upper_res = {};
879
880 if(pooling_type != PoolingType::MAX)
881 {
882 const uint16x8x2_t top_data_u16 = { { vmovl_u8(vget_low_u8(top_data)), vmovl_u8(vget_high_u8(top_data)) } };
883 const uint16x8x2_t bottom_data_u16 = { { vmovl_u8(vget_low_u8(bottom_data)), vmovl_u8(vget_high_u8(bottom_data)) } };
884
885 // Add rows
886 const uint16x8x2_t vrsum =
887 {
888 {
889 vaddq_u16(top_data_u16.val[0], bottom_data_u16.val[0]),
890 vaddq_u16(top_data_u16.val[1], bottom_data_u16.val[1]),
891 }
892 };
893
894 // Pair-wise add row data
895 const uint16x4x2_t vpsum =
896 {
897 {
898 vpadd_u16(vget_low_u16(vrsum.val[0]), vget_high_u16(vrsum.val[0])),
899 vpadd_u16(vget_low_u16(vrsum.val[1]), vget_high_u16(vrsum.val[1])),
900 }
901 };
902
903 uint16x8_t res_lower = vcombine_u16(vpsum.val[0], vpsum.val[1]);
904
905 // Scale lower result
906 scale_vector_s16x8<exclude_padding>(res_lower, id, 0, scale_step_x,
907 pool_size, upper_bound_w, upper_bound_h,
Michalis Spyroubd0e6122018-01-23 09:52:16 +0000908 pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Georgios Pinitas55186712018-01-08 17:37:12 +0000909 lower_res = vmovn_u16(res_lower);
910
911 // Compute upper result for stride_x == 1
912 if(pool_stride_x == 1)
913 {
914 // Shifted row sum
915 const uint16x8x2_t vrsum_shifted =
916 {
917 {
918 vextq_u16(vrsum.val[0], vrsum.val[1], 1),
919 vextq_u16(vrsum.val[1], vrsum.val[1], 1)
920 }
921 };
922
923 // Pair-wise add shifted row
924 const uint16x4x2_t vpsum_shifted =
925 {
926 {
927 vpadd_u16(vget_low_u16(vrsum_shifted.val[0]), vget_high_u16(vrsum_shifted.val[0])),
928 vpadd_u16(vget_low_u16(vrsum_shifted.val[1]), vget_high_u16(vrsum_shifted.val[1])),
929 }
930 };
931 uint16x8_t res_upper = vcombine_u16(vpsum_shifted.val[0], vpsum_shifted.val[1]);
932
933 // Scale lower result
934 scale_vector_s16x8<exclude_padding>(res_upper, id, 1, 2,
935 pool_size, upper_bound_w, upper_bound_h,
Michalis Spyroubd0e6122018-01-23 09:52:16 +0000936 pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Georgios Pinitas55186712018-01-08 17:37:12 +0000937 upper_res = vmovn_u16(res_upper);
938 }
939 }
940 else
941 {
942 const uint8x16_t max_data = vmaxq_u8(top_data, bottom_data);
943 lower_res = vpmax_u8(vget_low_u8(max_data), vget_high_u8(max_data));
944 if(pool_stride_x == 1)
945 {
946 const uint8x16_t max_data_shifted = vextq_u8(max_data, max_data, 1);
947 upper_res = vpmax_u8(vget_low_u8(max_data_shifted), vget_high_u8(max_data_shifted));
948 }
949 }
950
951 // Store result
952 if(pool_stride_x == 1)
953 {
954 const uint8x8x2_t res = { { lower_res, upper_res } };
955 vst2_u8(reinterpret_cast<uint8_t *>(output.ptr()), res);
956 }
957 else
958 {
959 vst1_u8(reinterpret_cast<uint8_t *>(output.ptr()), lower_res);
960 }
961 },
962 input, output);
963}
964
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000965template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +0000966void NEPoolingLayerKernel::pooling3_f16_nchw(const Window &window_input, const Window &window)
Pablo Tello0c34fe22017-06-26 17:17:42 +0100967{
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000968#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tello0c34fe22017-06-26 17:17:42 +0100969 Iterator input(_input, window_input);
970 Iterator output(_output, window);
971
Michalis Spyroubd0e6122018-01-23 09:52:16 +0000972 constexpr const int pool_size = 3;
973 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
974 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
975 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
976 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
977 int pool_stride_x = 0;
978 int pool_stride_y = 0;
Pablo Tello0c34fe22017-06-26 17:17:42 +0100979 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +0000980 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
981 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Pablo Tello0c34fe22017-06-26 17:17:42 +0100982
Michalis Spyroubd0e6122018-01-23 09:52:16 +0000983 const unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
984 const unsigned char *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
985 const unsigned char *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 2));
Pablo Tello0c34fe22017-06-26 17:17:42 +0100986
987 execute_window_loop(window, [&](const Coordinates & id)
988 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100989 float16x4_t top_data = vld1_f16(reinterpret_cast<const float16_t *>(input_top_ptr + input.offset()));
990 float16x4_t middle_data = vld1_f16(reinterpret_cast<const float16_t *>(input_middle_ptr + input.offset()));
991 float16x4_t bottom_data = vld1_f16(reinterpret_cast<const float16_t *>(input_bottom_ptr + input.offset()));
992 float16x4_t res = {};
993
994 // Get power of 2 in case of l2 pooling
995 if(pooling_type == PoolingType::L2)
996 {
997 top_data = vmul_f16(top_data, top_data);
998 middle_data = vmul_f16(middle_data, middle_data);
999 bottom_data = vmul_f16(bottom_data, bottom_data);
1000 }
1001
1002 if(pooling_type != PoolingType::MAX)
Pablo Tello0c34fe22017-06-26 17:17:42 +01001003 {
1004 // Calculate scale
Michalis Spyrou57dac842018-03-01 16:03:50 +00001005 const float scale = calculate_avg_scale<exclude_padding, DataLayout::NCHW>(id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Pablo Tello0c34fe22017-06-26 17:17:42 +01001006 const float16x4_t scale_v = vdup_n_f16(scale);
1007 // Perform pooling
1008 const float16x4_t sum_data = vadd_f16(vadd_f16(top_data, bottom_data), middle_data);
1009 res = vpadd_f16(vset_lane_f16(0.f, sum_data, 3), sum_data);
1010 res = vmul_f16(vpadd_f16(res, res), scale_v);
1011 }
1012 else
1013 {
1014 const float16x4_t max_data = vmax_f16(vmax_f16(top_data, bottom_data), middle_data);
1015 res = vpmax_f16(vset_lane_f16(-std::numeric_limits<float>::max(), max_data, 3), max_data);
1016 res = vpmax_f16(res, res);
1017 }
Georgios Pinitascdf51452017-08-31 14:21:36 +01001018
1019 // Calculate square-root in case of l2 pooling
1020 if(pooling_type == PoolingType::L2)
1021 {
1022 res = vinv_f16(vinvsqrt_f16(res));
1023 }
1024
Pablo Tello0c34fe22017-06-26 17:17:42 +01001025 *(reinterpret_cast<float16_t *>(output.ptr())) = vget_lane_f16(res, 0);
1026 },
1027 input, output);
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +00001028#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tello0c34fe22017-06-26 17:17:42 +01001029 ARM_COMPUTE_UNUSED(window_input);
1030 ARM_COMPUTE_UNUSED(window);
1031 ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a");
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +00001032#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tello0c34fe22017-06-26 17:17:42 +01001033}
1034
Georgios Pinitasadaae7e2017-10-30 15:56:32 +00001035template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001036void NEPoolingLayerKernel::pooling2_f16_nchw(const Window &window_input, const Window &window)
Pablo Tello0c34fe22017-06-26 17:17:42 +01001037{
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +00001038#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tello0c34fe22017-06-26 17:17:42 +01001039 Iterator input(_input, window_input);
1040 Iterator output(_output, window);
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001041 constexpr int pool_size = 2;
1042 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1043 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1044 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1045 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1046 int pool_stride_x, pool_stride_y = 0;
1047 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
1048 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
1049 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Pablo Tello0c34fe22017-06-26 17:17:42 +01001050
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001051 const unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
1052 const unsigned char *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
Pablo Tello0c34fe22017-06-26 17:17:42 +01001053
1054 execute_window_loop(window, [&](const Coordinates & id)
1055 {
Georgios Pinitascdf51452017-08-31 14:21:36 +01001056 auto top_data = vld2q_f16(reinterpret_cast<const float16_t *>(input_top_ptr + input.offset()));
1057 auto bottom_data = vld2q_f16(reinterpret_cast<const float16_t *>(input_bottom_ptr + input.offset()));
Pablo Tello0c34fe22017-06-26 17:17:42 +01001058 float16x8_t res = {};
1059
Georgios Pinitascdf51452017-08-31 14:21:36 +01001060 // Get power of 2 in case of l2 pooling
1061 if(pooling_type == PoolingType::L2)
1062 {
1063 top_data.val[0] = vmulq_f16(top_data.val[0], top_data.val[0]);
1064 top_data.val[1] = vmulq_f16(top_data.val[1], top_data.val[1]);
1065 bottom_data.val[0] = vmulq_f16(bottom_data.val[0], bottom_data.val[0]);
1066 bottom_data.val[1] = vmulq_f16(bottom_data.val[1], bottom_data.val[1]);
1067 }
1068
1069 if(pooling_type != PoolingType::MAX)
Pablo Tello0c34fe22017-06-26 17:17:42 +01001070 {
Michalis Spyrou57dac842018-03-01 16:03:50 +00001071 const float scale = calculate_avg_scale<exclude_padding, DataLayout::NCHW>(id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Pablo Tello0c34fe22017-06-26 17:17:42 +01001072 const float16x8_t scale_v = vdupq_n_f16(scale);
1073 res = vmulq_f16(scale_v, vaddq_f16(bottom_data.val[1], vaddq_f16(bottom_data.val[0], vaddq_f16(top_data.val[0], top_data.val[1]))));
1074 }
1075 else
1076 {
1077 res = vmaxq_f16(bottom_data.val[1], vmaxq_f16(bottom_data.val[0], vmaxq_f16(top_data.val[0], top_data.val[1])));
1078 }
Georgios Pinitascdf51452017-08-31 14:21:36 +01001079
1080 // Calculate square-root in case of l2 pooling
1081 if(pooling_type == PoolingType::L2)
1082 {
1083 res = vinvq_f16(vinvsqrtq_f16(res));
1084 }
1085
1086 // Store result
Pablo Tello0c34fe22017-06-26 17:17:42 +01001087 vst1q_f16(reinterpret_cast<float16_t *>(output.ptr()), res);
1088 },
1089 input, output);
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +00001090#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tello0c34fe22017-06-26 17:17:42 +01001091 ARM_COMPUTE_UNUSED(window_input);
1092 ARM_COMPUTE_UNUSED(window);
1093 ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a");
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +00001094#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tello0c34fe22017-06-26 17:17:42 +01001095}
1096
Georgios Pinitasadaae7e2017-10-30 15:56:32 +00001097template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001098void NEPoolingLayerKernel::pooling2_f32_nchw(const Window &window_input, const Window &window)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001099{
1100 Iterator input(_input, window_input);
1101 Iterator output(_output, window);
1102
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001103 constexpr int pool_size = 2;
1104 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1105 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1106 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1107 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1108 int pool_stride_x = 0;
1109 int pool_stride_y = 0;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001110 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001111 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
1112 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001113
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001114 const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
1115 const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001116
1117 execute_window_loop(window, [&](const Coordinates & id)
1118 {
Georgios Pinitascdf51452017-08-31 14:21:36 +01001119 float32x2_t top_data = vld1_f32(reinterpret_cast<const float *>(input_top_ptr + input.offset()));
1120 float32x2_t bottom_data = vld1_f32(reinterpret_cast<const float *>(input_bottom_ptr + input.offset()));
1121 float32x2_t res = {};
1122 float final_res = 0;
1123
1124 // Get power of 2 in case of l2 pooling
1125 if(pooling_type == PoolingType::L2)
1126 {
1127 top_data = vmul_f32(top_data, top_data);
1128 bottom_data = vmul_f32(bottom_data, bottom_data);
1129 }
1130
1131 if(pooling_type != PoolingType::MAX)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001132 {
1133 // Calculate scale
Michalis Spyrou57dac842018-03-01 16:03:50 +00001134 float scale = calculate_avg_scale<exclude_padding, DataLayout::NCHW>(id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001135 const float32x2_t scale_v = vdup_n_f32(scale);
1136
1137 // Perform pooling
1138 const float32x2_t sum_data = vadd_f32(top_data, bottom_data);
1139 res = vmul_f32(vpadd_f32(sum_data, sum_data), scale_v);
1140 }
1141 else
1142 {
1143 const float32x2_t max_data = vmax_f32(top_data, bottom_data);
1144 res = vpmax_f32(max_data, max_data);
1145 }
Georgios Pinitascdf51452017-08-31 14:21:36 +01001146 final_res = vget_lane_f32(res, 0);
1147
1148 // Calculate square-root in case of l2 pooling
1149 if(pooling_type == PoolingType::L2)
1150 {
1151 final_res = sqrt(final_res);
1152 }
1153
1154 // Store result
1155 *(reinterpret_cast<float *>(output.ptr())) = final_res;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001156 },
1157 input, output);
1158}
1159
Georgios Pinitas55186712018-01-08 17:37:12 +00001160template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001161void NEPoolingLayerKernel::pooling3_qasymm8_nchw(const Window &window_input, const Window &window)
Georgios Pinitas55186712018-01-08 17:37:12 +00001162{
1163 Iterator input(_input, window_input);
1164 Iterator output(_output, window);
1165
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001166 constexpr int pool_size = 3;
1167 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1168 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1169 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1170 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1171 int pool_stride_x = 0;
1172 int pool_stride_y = 0;
Georgios Pinitas55186712018-01-08 17:37:12 +00001173 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001174 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
1175 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Georgios Pinitas55186712018-01-08 17:37:12 +00001176
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001177 const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
1178 const uint8_t *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
1179 const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 2));
Georgios Pinitas55186712018-01-08 17:37:12 +00001180
1181 execute_window_loop(window, [&](const Coordinates & id)
1182 {
1183 const auto top_data = vld1q_u8(reinterpret_cast<const uint8_t *>(input_top_ptr + input.offset()));
1184 const auto middle_data = vld1q_u8(reinterpret_cast<const uint8_t *>(input_middle_ptr + input.offset()));
1185 const auto bottom_data = vld1q_u8(reinterpret_cast<const uint8_t *>(input_bottom_ptr + input.offset()));
1186
1187 if(pooling_type == PoolingType::AVG)
1188 {
1189 // Convert data to u16
1190 const uint16x8x2_t top_data_u16 = { { vmovl_u8(vget_low_u8(top_data)), vmovl_u8(vget_high_u8(top_data)) } };
1191 const uint16x8x2_t middle_data_u16 = { { vmovl_u8(vget_low_u8(middle_data)), vmovl_u8(vget_high_u8(middle_data)) } };
1192 const uint16x8x2_t bottom_data_u16 = { { vmovl_u8(vget_low_u8(bottom_data)), vmovl_u8(vget_high_u8(bottom_data)) } };
1193
1194 // Calculate row sums
1195 const uint16x8x2_t vrsum =
1196 {
1197 {
1198 vaddq_u16(vaddq_u16(top_data_u16.val[0], bottom_data_u16.val[0]), middle_data_u16.val[0]),
1199 vaddq_u16(vaddq_u16(top_data_u16.val[1], bottom_data_u16.val[1]), middle_data_u16.val[1]),
1200 }
1201 };
1202 const uint16x8x2_t vrsum_shifted_1 =
1203 {
1204 {
1205 vextq_u16(vrsum.val[0], vrsum.val[1], 1),
1206 vextq_u16(vrsum.val[1], vrsum.val[1], 1)
1207 }
1208 };
1209 const uint16x8x2_t vrsum_shifted_2 =
1210 {
1211 {
1212 vextq_u16(vrsum.val[0], vrsum.val[1], 2),
1213 vextq_u16(vrsum.val[1], vrsum.val[1], 2)
1214 }
1215 };
1216 // Calculate final sum
1217 uint16x8x2_t final_sum =
1218 {
1219 {
1220 vaddq_u16(vaddq_u16(vrsum.val[0], vrsum_shifted_1.val[0]), vrsum_shifted_2.val[0]),
1221 vaddq_u16(vaddq_u16(vrsum.val[1], vrsum_shifted_1.val[1]), vrsum_shifted_2.val[1]),
1222 }
1223 };
1224 if(pool_stride_x == 2)
1225 {
1226 uint16x8_t res =
1227 {
1228 vgetq_lane_u16(final_sum.val[0], 0),
1229 vgetq_lane_u16(final_sum.val[0], 2),
1230 vgetq_lane_u16(final_sum.val[0], 4),
1231 vgetq_lane_u16(final_sum.val[0], 6),
1232 vgetq_lane_u16(final_sum.val[1], 0),
1233 vgetq_lane_u16(final_sum.val[1], 2),
1234 vgetq_lane_u16(final_sum.val[1], 4),
1235 vgetq_lane_u16(final_sum.val[1], 6),
1236 };
1237
1238 scale_vector_s16x8<exclude_padding>(res, id, 0, 1,
1239 pool_size, upper_bound_w, upper_bound_h,
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001240 pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Georgios Pinitas55186712018-01-08 17:37:12 +00001241 vst1_u8(reinterpret_cast<uint8_t *>(output.ptr()), vmovn_u16(res));
1242 }
1243 else
1244 {
1245 // Scale lower result
1246 scale_vector_s16x8<exclude_padding>(final_sum.val[0], id, 0, 1,
1247 pool_size, upper_bound_w, upper_bound_h,
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001248 pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Georgios Pinitas55186712018-01-08 17:37:12 +00001249 // Scale lower result
1250 scale_vector_s16x8<exclude_padding>(final_sum.val[1], id, 8, 1,
1251 pool_size, upper_bound_w, upper_bound_h,
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001252 pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Georgios Pinitas55186712018-01-08 17:37:12 +00001253 const uint8x16_t res = vcombine_u8(vmovn_u16(final_sum.val[0]), vmovn_u16(final_sum.val[1]));
1254 vst1q_u8(reinterpret_cast<uint8_t *>(output.ptr()), res);
1255 }
1256 }
1257 else
1258 {
1259 const uint8x16_t max_data = vmaxq_u8(vmaxq_u8(top_data, bottom_data), middle_data);
1260 const uint8x16_t max_data_shift1 = vextq_u8(max_data, max_data, 1);
1261 const uint8x16_t max_data_shift2 = vextq_u8(max_data, max_data, 2);
1262 const uint8x16_t final_max = vmaxq_u8(vmaxq_u8(max_data, max_data_shift1), max_data_shift2);
1263
1264 if(pool_stride_x == 2)
1265 {
1266 const uint8x8x2_t table = { { vget_low_u8(final_max), vget_high_u8(final_max) } };
1267 static const uint8x8_t lookup_val = { 0, 2, 4, 6, 8, 10, 12, 14 };
1268 const uint8x8_t res = vtbl2_u8(table, lookup_val);
1269 vst1_u8(reinterpret_cast<uint8_t *>(output.ptr()), res);
1270 }
1271 else
1272 {
1273 vst1q_u8(reinterpret_cast<uint8_t *>(output.ptr()), final_max);
1274 }
1275 }
1276 },
1277 input, output);
1278}
1279
Georgios Pinitasadaae7e2017-10-30 15:56:32 +00001280template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001281void NEPoolingLayerKernel::pooling3_f32_nchw(const Window &window_input, const Window &window)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001282{
1283 Iterator input(_input, window_input);
1284 Iterator output(_output, window);
1285
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001286 constexpr const int pool_size = 3;
1287 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1288 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1289 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1290 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1291 int pool_stride_x = 0;
1292 int pool_stride_y = 0;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001293 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001294 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
1295 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001296
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001297 const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
1298 const uint8_t *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
1299 const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 2));
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001300
1301 execute_window_loop(window, [&](const Coordinates & id)
1302 {
Georgios Pinitascdf51452017-08-31 14:21:36 +01001303 float32x4_t top_data = vld1q_f32(reinterpret_cast<const float *>(input_top_ptr + input.offset()));
1304 float32x4_t middle_data = vld1q_f32(reinterpret_cast<const float *>(input_middle_ptr + input.offset()));
1305 float32x4_t bottom_data = vld1q_f32(reinterpret_cast<const float *>(input_bottom_ptr + input.offset()));
1306 float32x2_t res = {};
1307 float final_res = 0;
1308
1309 // Get power of 2 in case of l2 pooling
1310 if(pooling_type == PoolingType::L2)
1311 {
1312 top_data = vmulq_f32(top_data, top_data);
1313 middle_data = vmulq_f32(middle_data, middle_data);
1314 bottom_data = vmulq_f32(bottom_data, bottom_data);
1315 }
1316
1317 if(pooling_type != PoolingType::MAX)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001318 {
1319 // Calculate scale
Michalis Spyrou57dac842018-03-01 16:03:50 +00001320 float scale = calculate_avg_scale<exclude_padding, DataLayout::NCHW>(id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001321 const float32x2_t scale_v = vdup_n_f32(scale);
1322
1323 // Perform pooling
1324 const float32x4_t sum_data = vaddq_f32(vaddq_f32(top_data, bottom_data), middle_data);
1325 res = vpadd_f32(vget_high_f32(vsetq_lane_f32(0.f, sum_data, 3)), vget_low_f32(sum_data));
1326 res = vmul_f32(vpadd_f32(res, res), scale_v);
1327 }
1328 else
1329 {
1330 const float32x4_t max_data = vmaxq_f32(vmaxq_f32(top_data, bottom_data), middle_data);
1331 res = vpmax_f32(vget_high_f32(vsetq_lane_f32(-std::numeric_limits<float>::max(), max_data, 3)), vget_low_f32(max_data));
1332 res = vpmax_f32(res, res);
1333 }
Georgios Pinitascdf51452017-08-31 14:21:36 +01001334 final_res = vget_lane_f32(res, 0);
1335
1336 // Calculate square-root in case of l2 pooling
1337 if(pooling_type == PoolingType::L2)
1338 {
1339 final_res = sqrt(final_res);
1340 }
1341
1342 // Store result
1343 *(reinterpret_cast<float *>(output.ptr())) = final_res;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001344 },
1345 input, output);
1346}
1347
Georgios Pinitasadaae7e2017-10-30 15:56:32 +00001348template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001349void NEPoolingLayerKernel::pooling7_f32_nchw(const Window &window_input, const Window &window)
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001350{
1351 Iterator input(_input, window_input);
1352 Iterator output(_output, window);
1353
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001354 constexpr const int pool_size = 7;
1355 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1356 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1357 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1358 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1359 int pool_stride_x = 0;
1360 int pool_stride_y = 0;
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001361 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001362 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
1363 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001364
1365 std::array<const uint8_t *, pool_size> input_ptrs{ {} };
1366 for(int i = 0; i < pool_size; ++i)
1367 {
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001368 input_ptrs[i] = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + i));
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001369 }
1370
1371 execute_window_loop(window, [&](const Coordinates & id)
1372 {
Georgios Pinitascdf51452017-08-31 14:21:36 +01001373 float32x2_t res = {};
1374 float final_res = 0.f;
1375 if(pooling_type != PoolingType::MAX)
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001376 {
1377 // Calculate scale
Michalis Spyrou57dac842018-03-01 16:03:50 +00001378 float scale = calculate_avg_scale<exclude_padding, DataLayout::NCHW>(id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001379 const float32x2_t scale_v = vdup_n_f32(scale);
1380
1381 // Perform pooling
Georgios Pinitascdf51452017-08-31 14:21:36 +01001382 float32x4x2_t data = vld2q_f32(reinterpret_cast<const float *>(input_ptrs[0] + input.offset()));
1383 // Get power of 2 in case of l2 pooling
1384 if(pooling_type == PoolingType::L2)
1385 {
1386 data.val[0] = vmulq_f32(data.val[0], data.val[0]);
1387 data.val[1] = vmulq_f32(data.val[1], data.val[1]);
1388 }
1389 float32x4_t sum_data = vaddq_f32(data.val[0], vsetq_lane_f32(0.f, data.val[1], 3));
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001390 for(int i = 1; i < pool_size; ++i)
1391 {
Georgios Pinitascdf51452017-08-31 14:21:36 +01001392 data = vld2q_f32(reinterpret_cast<const float *>(input_ptrs[i] + input.offset()));
1393 // Get power of 2 in case of l2 pooling
1394 if(pooling_type == PoolingType::L2)
1395 {
1396 data.val[0] = vmulq_f32(data.val[0], data.val[0]);
1397 data.val[1] = vmulq_f32(data.val[1], data.val[1]);
1398 }
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001399 sum_data = vaddq_f32(sum_data, data.val[0]);
1400 sum_data = vaddq_f32(sum_data, vsetq_lane_f32(0.f, data.val[1], 3));
1401 }
1402 res = vpadd_f32(vget_high_f32(sum_data), vget_low_f32(sum_data));
1403 res = vmul_f32(vpadd_f32(res, res), scale_v);
1404 }
1405 else
1406 {
1407 float32x4x2_t max_data = vld2q_f32(reinterpret_cast<const float *>(input_ptrs[0] + input.offset()));
1408 for(int i = 1; i < pool_size; ++i)
1409 {
1410 const float32x4x2_t data = vld2q_f32(reinterpret_cast<const float *>(input_ptrs[i] + input.offset()));
1411 max_data = vmax2q_f32(max_data, data);
1412 }
1413 res = vpmax_f32(vget_high_f32(vsetq_lane_f32(-std::numeric_limits<float>::max(), max_data.val[1], 3)), vget_low_f32(max_data.val[1]));
1414 res = vpmax_f32(res, vpmax_f32(vget_high_f32(max_data.val[0]), vget_low_f32(max_data.val[0])));
1415 res = vpmax_f32(res, res);
1416 }
Georgios Pinitascdf51452017-08-31 14:21:36 +01001417 final_res = vget_lane_f32(res, 0);
1418
1419 // Calculate square-root in case of l2 pooling
1420 if(pooling_type == PoolingType::L2)
1421 {
1422 final_res = sqrt(final_res);
1423 }
1424
1425 // Store result
1426 *(reinterpret_cast<float *>(output.ptr())) = final_res;
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001427 },
1428 input, output);
1429}
1430
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001431template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001432void NEPoolingLayerKernel::poolingMxN_f16_nchw(const Window &window_input, const Window &window)
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001433{
1434#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
1435 Iterator input(_input, window_input);
1436 Iterator output(_output, window);
1437
1438 const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().x() : _pool_info.pool_size().width;
1439 const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().height;
1440 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1441 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1442 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1443 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1444 int pool_stride_x = 0;
1445 int pool_stride_y = 0;
1446 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
1447 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
1448 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
1449
1450 execute_window_loop(window, [&](const Coordinates & id)
1451 {
1452 float16_t res = 0.0f;
1453 float16x8_t vres = vdupq_n_f16(0.0f);
1454
1455 if(pooling_type != PoolingType::MAX)
1456 {
1457 // Calculate scale
Michalis Spyrou57dac842018-03-01 16:03:50 +00001458 const float scale = calculate_avg_scale<exclude_padding, DataLayout::NCHW>(id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001459
1460 // Perform pooling
1461
1462 for(int y = 0; y < pool_size_y; ++y)
1463 {
1464 int x = 0;
1465 for(; x <= (pool_size_x - 8); x += 8)
1466 {
1467 const float16x8_t data = vld1q_f16(reinterpret_cast<const float16_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() +
1468 (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
1469
1470 // Get power of 2 in case of l2 pooling and accumulate
1471 if(pooling_type == PoolingType::L2)
1472 {
1473 vres = vaddq_f16(vres, vmulq_f16(data, data));
1474 }
1475 else
1476 {
1477 vres = vaddq_f16(vres, data);
1478 }
1479 }
1480
1481 // Leftover for loop
1482 for(; x < pool_size_x; ++x)
1483 {
1484 float16_t data = *(reinterpret_cast<const float16_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() + (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
1485
1486 // Get power of 2 in case of l2 pooling
1487 if(pooling_type == PoolingType::L2)
1488 {
1489 data *= data;
1490 }
1491
1492 res += data;
1493 }
1494 }
1495
1496 // Reduction
1497 float16x4_t tmp = vpadd_f16(vget_high_f16(vres), vget_low_f16(vres));
1498 res += vget_lane_f16(tmp, 0);
1499 res += vget_lane_f16(tmp, 1);
1500 res += vget_lane_f16(tmp, 2);
1501 res += vget_lane_f16(tmp, 3);
1502
1503 // Divide by scale
1504 res *= scale;
1505 }
1506 else
1507 {
1508 float16x8_t vres = vdupq_n_f16(std::numeric_limits<float>::lowest());
1509 res = std::numeric_limits<float>::lowest();
1510
1511 for(int y = 0; y < pool_size_y; ++y)
1512 {
1513 int x = 0;
1514 for(; x <= (pool_size_x - 8); x += 8)
1515 {
1516 const float16x8_t data = vld1q_f16(reinterpret_cast<const float16_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() +
1517 (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
1518 vres = vmaxq_f16(vres, data);
1519 }
1520
1521 // Leftover for loop
1522 for(; x < pool_size_x; ++x)
1523 {
1524 const float16_t data = *(reinterpret_cast<const float16_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() + (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
1525 res = std::max(res, data);
1526 }
1527 }
1528
1529 float16x4_t tmp = vpmax_f16(vget_high_f16(vres), vget_low_f16(vres));
1530 res = std::max(res, vget_lane_f16(tmp, 0));
1531 res = std::max(res, vget_lane_f16(tmp, 1));
1532 res = std::max(res, vget_lane_f16(tmp, 2));
1533 res = std::max(res, vget_lane_f16(tmp, 3));
1534 }
1535
1536 // Calculate square-root in case of l2 pooling
1537 if(pooling_type == PoolingType::L2)
1538 {
1539 res = std::sqrt(res);
1540 }
1541
1542 // Store result
1543 *(reinterpret_cast<float16_t *>(output.ptr())) = res;
1544 },
1545 input, output);
1546
1547#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
1548 ARM_COMPUTE_UNUSED(window_input);
1549 ARM_COMPUTE_UNUSED(window);
1550 ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a");
1551#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
1552}
1553
1554template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001555void NEPoolingLayerKernel::poolingMxN_f16_nhwc(const Window &window_input, const Window &window)
1556{
1557#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
1558 Iterator input(_input, window_input);
1559 Iterator output(_output, window);
1560
1561 const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().width;
1562 const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().z() : _pool_info.pool_size().height;
1563 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1564 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1565 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1566 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1567 int pool_stride_x = 0;
1568 int pool_stride_y = 0;
1569 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
1570 const int upper_bound_w = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_right);
1571 const int upper_bound_h = _input->info()->dimension(2) + (exclude_padding ? 0 : pool_pad_bottom);
1572
1573 float16x8_t vres;
1574
1575 execute_window_loop(window, [&](const Coordinates & id)
1576 {
1577 const int idx_width = id.y() * pool_stride_x;
1578 const int idx_height = id.z() * pool_stride_y;
1579 if(pooling_type != PoolingType::MAX)
1580 {
1581 // Calculate scale
1582 const float scale = calculate_avg_scale<exclude_padding, DataLayout::NHWC>(id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
1583 pool_stride_y);
1584 const float16x8_t scale_v = vdupq_n_f16(scale);
1585
1586 // Perform pooling
1587 vres = vdupq_n_f16(0.0f);
1588
1589 for(int y = 0; y < pool_size_y; ++y)
1590 {
Georgios Pinitas7b94c3e2018-05-14 12:47:59 +01001591 if(y + idx_height - pool_pad_top >= window_input.z().end() || y + idx_height - pool_pad_top < window_input.z().start())
Michalis Spyrou57dac842018-03-01 16:03:50 +00001592 {
1593 continue;
1594 }
1595
1596 for(int x = 0; x < pool_size_x; ++x)
1597 {
Georgios Pinitas7b94c3e2018-05-14 12:47:59 +01001598 if(x + idx_width - pool_pad_left >= window_input.y().end() || x + idx_width - pool_pad_left < window_input.y().start())
Michalis Spyrou57dac842018-03-01 16:03:50 +00001599 {
1600 continue;
1601 }
1602
1603 const float16x8_t data = vld1q_f16(reinterpret_cast<const float16_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().y() +
1604 (y - pool_pad_top) * _input->info()->strides_in_bytes().z()));
1605
1606 // Get power of 2 in case of l2 pooling and accumulate
1607 if(pooling_type == PoolingType::L2)
1608 {
1609 vres = vaddq_f16(vres, vmulq_f16(data, data));
1610 }
1611 else
1612 {
1613 vres = vaddq_f16(vres, data);
1614 }
1615 }
1616 }
1617 // Divide by scale
1618 vres = vmulq_f16(vres, scale_v);
1619 }
1620 else
1621 {
1622 vres = vdupq_n_f16(std::numeric_limits<float>::lowest());
1623 for(int y = 0; y < pool_size_y; ++y)
1624 {
1625 if(y + idx_height > window_input.z().end() || y + idx_height - pool_pad_top < window_input.z().start())
1626 {
1627 continue;
1628 }
1629
1630 for(int x = 0; x < pool_size_x; ++x)
1631 {
1632 if(x + idx_width > window_input.y().end() || x + idx_width - pool_pad_left < window_input.y().start())
1633 {
1634 continue;
1635 }
1636
1637 const float16x8_t data = vld1q_f16(reinterpret_cast<const float16_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().y() +
1638 (y - pool_pad_top) * _input->info()->strides_in_bytes().z()));
1639 vres = vmaxq_f16(vres, data);
1640 }
1641 }
1642 }
1643
1644 // Calculate square-root in case of l2 pooling
1645 if(pooling_type == PoolingType::L2)
1646 {
1647 float16x8_t sqrt_reciprocal = vrsqrteq_f16(vres);
1648 vres = vmulq_f16(vres, vmulq_f16(vrsqrtsq_f16(vmulq_f16(vres, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal));
1649 }
1650
1651 // Store result
1652 vst1q_f16(reinterpret_cast<float16_t *>(output.ptr()), vres);
1653 },
1654 input, output);
1655
1656#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
1657 ARM_COMPUTE_UNUSED(window_input);
1658 ARM_COMPUTE_UNUSED(window);
1659 ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a");
1660#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
1661}
1662
1663template <PoolingType pooling_type, bool exclude_padding>
1664void NEPoolingLayerKernel::poolingMxN_f32_nchw(const Window &window_input, const Window &window)
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001665{
1666 Iterator input(_input, window_input);
1667 Iterator output(_output, window);
1668
1669 const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().x() : _pool_info.pool_size().width;
1670 const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().height;
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001671 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1672 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1673 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1674 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1675 int pool_stride_x = 0;
1676 int pool_stride_y = 0;
Gian Marco Iodice16824302017-09-28 15:41:37 +01001677 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001678 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
1679 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Gian Marco Iodice16824302017-09-28 15:41:37 +01001680
1681 execute_window_loop(window, [&](const Coordinates & id)
1682 {
1683 float res = 0.0f;
1684
1685 if(pooling_type != PoolingType::MAX)
1686 {
1687 // Calculate scale
Michalis Spyrou57dac842018-03-01 16:03:50 +00001688 const float scale = calculate_avg_scale<exclude_padding, DataLayout::NCHW>(id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Gian Marco Iodice16824302017-09-28 15:41:37 +01001689
1690 // Perform pooling
1691 float32x4_t vres = vdupq_n_f32(0.0f);
1692
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001693 for(int y = 0; y < pool_size_y; ++y)
Gian Marco Iodice16824302017-09-28 15:41:37 +01001694 {
1695 int x = 0;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001696 for(; x <= (pool_size_x - 4); x += 4)
Gian Marco Iodice16824302017-09-28 15:41:37 +01001697 {
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001698 const float32x4_t data = vld1q_f32(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() +
1699 (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
Gian Marco Iodice16824302017-09-28 15:41:37 +01001700
1701 // Get power of 2 in case of l2 pooling and accumulate
1702 if(pooling_type == PoolingType::L2)
1703 {
1704 vres = vmlaq_f32(vres, data, data);
1705 }
1706 else
1707 {
1708 vres = vaddq_f32(vres, data);
1709 }
1710 }
1711
1712 // Leftover for loop
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001713 for(; x < pool_size_x; ++x)
Gian Marco Iodice16824302017-09-28 15:41:37 +01001714 {
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001715 float data = *(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() + (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
Gian Marco Iodice16824302017-09-28 15:41:37 +01001716
1717 // Get power of 2 in case of l2 pooling
1718 if(pooling_type == PoolingType::L2)
1719 {
1720 data *= data;
1721 }
1722
1723 res += data;
1724 }
1725 }
1726
1727#if defined(__aarch64__)
1728 // Reduction operation available on 64 bit architectures only
1729 res += vaddvq_f32(vres);
1730#else // __aarch64__
1731 // Reduction
1732 float32x2_t tmp = vpadd_f32(vget_high_f32(vres), vget_low_f32(vres));
1733 tmp = vpadd_f32(tmp, tmp);
1734
1735 res += vget_lane_f32(tmp, 0);
1736#endif // __aarch64__
1737 // Divide by scale
1738 res *= scale;
1739 }
1740 else
1741 {
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001742 float32x4_t vres = vdupq_n_f32(std::numeric_limits<float>::lowest());
1743 res = std::numeric_limits<float>::lowest();
Gian Marco Iodice16824302017-09-28 15:41:37 +01001744
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001745 for(int y = 0; y < pool_size_y; ++y)
Gian Marco Iodice16824302017-09-28 15:41:37 +01001746 {
1747 int x = 0;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001748 for(; x <= (pool_size_x - 4); x += 4)
Gian Marco Iodice16824302017-09-28 15:41:37 +01001749 {
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001750 const float32x4_t data = vld1q_f32(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() +
1751 (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
Gian Marco Iodice16824302017-09-28 15:41:37 +01001752 vres = vmaxq_f32(vres, data);
1753 }
1754
1755 // Leftover for loop
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001756 for(; x < pool_size_x; ++x)
Gian Marco Iodice16824302017-09-28 15:41:37 +01001757 {
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001758 const float data = *(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() + (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
Gian Marco Iodice16824302017-09-28 15:41:37 +01001759 res = std::max(res, data);
1760 }
1761 }
1762
1763#if defined(__aarch64__)
1764 // Reduction operation available on 64 bit architectures only
1765 res = std::max(vmaxvq_f32(vres), res);
1766#else // __aarch64__
1767 float32x2_t tmp = vpmax_f32(vget_high_f32(vres), vget_low_f32(vres));
1768 tmp = vpmax_f32(tmp, tmp);
1769
1770 res = std::max(res, vget_lane_f32(tmp, 0));
1771#endif // __aarch64__
1772 }
1773
1774 // Calculate square-root in case of l2 pooling
1775 if(pooling_type == PoolingType::L2)
1776 {
1777 res = std::sqrt(res);
1778 }
1779
1780 // Store result
1781 *(reinterpret_cast<float *>(output.ptr())) = res;
1782 },
1783 input, output);
1784}
1785
Georgios Pinitas55186712018-01-08 17:37:12 +00001786template <PoolingType pooling_type, bool exclude_padding>
Michalis Spyrou57dac842018-03-01 16:03:50 +00001787void NEPoolingLayerKernel::poolingMxN_f32_nhwc(const Window &window_input, const Window &window)
1788{
1789 Iterator input(_input, window_input);
1790 Iterator output(_output, window);
1791
1792 const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().width;
1793 const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().z() : _pool_info.pool_size().height;
1794 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1795 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1796 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1797 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1798 int pool_stride_x = 0;
1799 int pool_stride_y = 0;
1800 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
1801 const int upper_bound_w = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_right);
1802 const int upper_bound_h = _input->info()->dimension(2) + (exclude_padding ? 0 : pool_pad_bottom);
1803
1804 float32x4_t vres;
1805
1806 execute_window_loop(window, [&](const Coordinates & id)
1807 {
1808 const int idx_width = id.y() * pool_stride_x;
1809 const int idx_height = id.z() * pool_stride_y;
1810 if(pooling_type != PoolingType::MAX)
1811 {
1812 // Calculate scale
1813 const float scale = calculate_avg_scale<exclude_padding, DataLayout::NHWC>(id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
1814 pool_stride_y);
1815 const float32x4_t scale_v = vdupq_n_f32(scale);
1816
1817 // Perform pooling
1818 vres = vdupq_n_f32(0.0f);
1819
1820 for(int y = 0; y < pool_size_y; ++y)
1821 {
Georgios Pinitas7b94c3e2018-05-14 12:47:59 +01001822 if(y + idx_height - pool_pad_top >= window_input.z().end() || y + idx_height - pool_pad_top < window_input.z().start())
Michalis Spyrou57dac842018-03-01 16:03:50 +00001823 {
1824 continue;
1825 }
1826
1827 for(int x = 0; x < pool_size_x; ++x)
1828 {
Georgios Pinitas7b94c3e2018-05-14 12:47:59 +01001829 if(x + idx_width - pool_pad_left >= window_input.y().end() || x + idx_width - pool_pad_left < window_input.y().start())
Michalis Spyrou57dac842018-03-01 16:03:50 +00001830 {
1831 continue;
1832 }
1833
1834 const float32x4_t data = vld1q_f32(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().y() +
1835 (y - pool_pad_top) * _input->info()->strides_in_bytes().z()));
1836
1837 // Get power of 2 in case of l2 pooling and accumulate
1838 if(pooling_type == PoolingType::L2)
1839 {
1840 vres = vmlaq_f32(vres, data, data);
1841 }
1842 else
1843 {
1844 vres = vaddq_f32(vres, data);
1845 }
1846 }
1847 }
1848 // Divide by scale
1849 vres = vmulq_f32(vres, scale_v);
1850 }
1851 else
1852 {
1853 vres = vdupq_n_f32(std::numeric_limits<float>::lowest());
1854 for(int y = 0; y < pool_size_y; ++y)
1855 {
Georgios Pinitas7b94c3e2018-05-14 12:47:59 +01001856 if(y + idx_height - pool_pad_top >= window_input.z().end() || y + idx_height - pool_pad_top < window_input.z().start())
Michalis Spyrou57dac842018-03-01 16:03:50 +00001857 {
1858 continue;
1859 }
1860
1861 for(int x = 0; x < pool_size_x; ++x)
1862 {
Georgios Pinitas7b94c3e2018-05-14 12:47:59 +01001863 if(x + idx_width - pool_pad_left >= window_input.y().end() || x + idx_width - pool_pad_left < window_input.y().start())
Michalis Spyrou57dac842018-03-01 16:03:50 +00001864 {
1865 continue;
1866 }
1867
1868 const float32x4_t data = vld1q_f32(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().y() +
1869 (y - pool_pad_top) * _input->info()->strides_in_bytes().z()));
1870 vres = vmaxq_f32(vres, data);
1871 }
1872 }
1873 }
1874
1875 // Calculate square-root in case of l2 pooling
1876 if(pooling_type == PoolingType::L2)
1877 {
1878 float32x4_t sqrt_reciprocal = vrsqrteq_f32(vres);
1879 vres = vmulq_f32(vres, vmulq_f32(vrsqrtsq_f32(vmulq_f32(vres, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal));
1880 }
1881
1882 // Store result
1883 vst1q_f32(reinterpret_cast<float *>(output.ptr()), vres);
1884 },
1885 input, output);
1886}
1887
1888template <PoolingType pooling_type, bool exclude_padding>
1889void NEPoolingLayerKernel::poolingMxN_qasymm8_nchw(const Window &window_input, const Window &window)
Georgios Pinitas55186712018-01-08 17:37:12 +00001890{
1891 Iterator input(_input, window_input);
1892 Iterator output(_output, window);
1893
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001894 const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().x() : _pool_info.pool_size().width;
1895 const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().height;
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001896 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1897 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1898 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1899 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1900 int pool_stride_x = 0;
1901 int pool_stride_y = 0;
Georgios Pinitas55186712018-01-08 17:37:12 +00001902 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001903 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
1904 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
Georgios Pinitas55186712018-01-08 17:37:12 +00001905
1906 execute_window_loop(window, [&](const Coordinates & id)
1907 {
1908 uint8_t res = 0;
1909
1910 if(pooling_type != PoolingType::MAX)
1911 {
1912 uint32x4_t vres = vdupq_n_u32(0);
1913 uint32_t sres = 0;
1914
1915 // Calculate scale
Michalis Spyrou57dac842018-03-01 16:03:50 +00001916 const float scale = calculate_avg_scale<exclude_padding, DataLayout::NCHW>(id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
Georgios Pinitas55186712018-01-08 17:37:12 +00001917
1918 // Perform pooling
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001919 for(int y = 0; y < pool_size_y; ++y)
Georgios Pinitas55186712018-01-08 17:37:12 +00001920 {
1921 int x = 0;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001922 for(; x <= (pool_size_x - 8); x += 8)
Georgios Pinitas55186712018-01-08 17:37:12 +00001923 {
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001924 const uint8x8_t data = vld1_u8(reinterpret_cast<const uint8_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() +
1925 (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
Georgios Pinitas55186712018-01-08 17:37:12 +00001926
1927 const uint16x8_t data_u16 = vmovl_u8(data);
1928 vres = vaddq_u32(vres, vaddl_u16(vget_high_u16(data_u16), vget_low_u16(data_u16)));
1929 }
1930
1931 // Leftover for loop
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001932 for(; x < pool_size_x; ++x)
Georgios Pinitas55186712018-01-08 17:37:12 +00001933 {
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001934 uint8_t data = *(reinterpret_cast<const uint8_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() + (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
Georgios Pinitas55186712018-01-08 17:37:12 +00001935 sres += data;
1936 }
1937 }
1938
1939 // Reduction
1940 const auto tmp = vpadd_u32(vget_high_u32(vres), vget_low_u32(vres));
1941 sres += vget_lane_u32(tmp, 0) + vget_lane_u32(tmp, 1);
1942
1943 // Divide by scale
1944 res = static_cast<uint8_t>(support::cpp11::round(sres * scale));
1945 }
1946 else
1947 {
1948 uint8x8_t vres = vdup_n_u8(0);
1949 res = 0;
1950
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001951 for(int y = 0; y < pool_size_y; ++y)
Georgios Pinitas55186712018-01-08 17:37:12 +00001952 {
1953 int x = 0;
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001954 for(; x <= (pool_size_x - 8); x += 8)
Georgios Pinitas55186712018-01-08 17:37:12 +00001955 {
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001956 const uint8x8_t data = vld1_u8(reinterpret_cast<const uint8_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() +
1957 (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
Georgios Pinitas55186712018-01-08 17:37:12 +00001958 vres = vmax_u8(vres, data);
1959 }
1960
1961 // Leftover for loop
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00001962 for(; x < pool_size_x; ++x)
Georgios Pinitas55186712018-01-08 17:37:12 +00001963 {
Michalis Spyroubd0e6122018-01-23 09:52:16 +00001964 const uint8_t data = *(reinterpret_cast<const uint8_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() + (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
Georgios Pinitas55186712018-01-08 17:37:12 +00001965 res = std::max(res, data);
1966 }
1967 }
1968
1969 // Reduce max
1970 vres = vpmax_u8(vres, vres);
1971 vres = vpmax_u8(vres, vres);
1972 vres = vpmax_u8(vres, vres);
1973
1974 // Get max value
1975 res = std::max(res, vget_lane_u8(vres, 0));
1976 }
1977
1978 // Store result
1979 *(reinterpret_cast<uint8_t *>(output.ptr())) = res;
1980 },
1981 input, output);
1982}
1983
Michalis Spyrou57dac842018-03-01 16:03:50 +00001984template <PoolingType pooling_type, bool exclude_padding>
1985void NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc(const Window &window_input, const Window &window)
1986{
1987 Iterator input(_input, window_input);
1988 Iterator output(_output, window);
1989
1990 const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().width;
1991 const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().z() : _pool_info.pool_size().height;
1992 const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
1993 const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
1994 const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
1995 const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
1996 int pool_stride_x = 0;
1997 int pool_stride_y = 0;
1998 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
1999 const int upper_bound_w = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_right);
2000 const int upper_bound_h = _input->info()->dimension(2) + (exclude_padding ? 0 : pool_pad_bottom);
2001
2002 execute_window_loop(window, [&](const Coordinates & id)
2003 {
2004 const int idx_width = id.y() * pool_stride_x;
2005 const int idx_height = id.z() * pool_stride_y;
2006 if(pooling_type != PoolingType::MAX)
2007 {
2008 uint32x4_t vres1 = vdupq_n_u32(0);
2009 uint32x4_t vres2 = vdupq_n_u32(0);
2010
2011 // Calculate scale
2012 const float scale = calculate_avg_scale<exclude_padding, DataLayout::NHWC>(id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
2013 pool_stride_y);
2014 const float32x4_t scale_v = vdupq_n_f32(scale);
2015
2016 // Perform pooling
2017 for(int y = 0; y < pool_size_y; ++y)
2018 {
Georgios Pinitas7b94c3e2018-05-14 12:47:59 +01002019 if(y + idx_height - pool_pad_top >= window_input.z().end() || y + idx_height - pool_pad_top < window_input.z().start())
Michalis Spyrou57dac842018-03-01 16:03:50 +00002020 {
2021 continue;
2022 }
2023
2024 for(int x = 0; x < pool_size_x; ++x)
2025 {
Georgios Pinitas7b94c3e2018-05-14 12:47:59 +01002026 if(x + idx_width - pool_pad_left >= window_input.y().end() || x + idx_width - pool_pad_left < window_input.y().start())
Michalis Spyrou57dac842018-03-01 16:03:50 +00002027 {
2028 continue;
2029 }
2030
2031 const uint8x8_t data = vld1_u8(reinterpret_cast<const uint8_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().y() +
2032 (y - pool_pad_top) * _input->info()->strides_in_bytes().z()));
2033
2034 const uint16x8_t data_u16 = vmovl_u8(data);
2035 vres1 = vaddq_u32(vres1, vmovl_u16(vget_low_u16(data_u16)));
2036 vres2 = vaddq_u32(vres2, vmovl_u16(vget_high_u16(data_u16)));
2037 }
2038 }
2039 // Divide by scale
2040 vres1 = vcvtq_u32_f32(vmulq_f32(vcvtq_f32_u32(vres1), scale_v));
2041 vres2 = vcvtq_u32_f32(vmulq_f32(vcvtq_f32_u32(vres2), scale_v));
2042
2043 uint8x8_t res = vmovn_u16(vcombine_u16(vmovn_u32(vres1), vmovn_u32(vres2)));
2044
2045 // Store result
2046 vst1_u8(output.ptr(), res);
2047 }
2048 else
2049 {
2050 uint8x8_t vres = vdup_n_u8(0);
2051
2052 for(int y = 0; y < pool_size_y; ++y)
2053 {
Georgios Pinitas7b94c3e2018-05-14 12:47:59 +01002054 if(y + idx_height - pool_pad_top >= window_input.z().end() || y + idx_height - pool_pad_top < window_input.z().start())
Michalis Spyrou57dac842018-03-01 16:03:50 +00002055 {
2056 continue;
2057 }
2058
2059 for(int x = 0; x < pool_size_x; ++x)
2060 {
Georgios Pinitas7b94c3e2018-05-14 12:47:59 +01002061 if(x + idx_width - pool_pad_left >= window_input.y().end() || x + idx_width - pool_pad_left < window_input.y().start())
Michalis Spyrou57dac842018-03-01 16:03:50 +00002062 {
2063 continue;
2064 }
2065
2066 const uint8x8_t data = vld1_u8(reinterpret_cast<const uint8_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().y() +
2067 (y - pool_pad_top) * _input->info()->strides_in_bytes().z()));
2068 vres = vmax_u8(vres, data);
2069 }
2070 }
2071
2072 // Store result
2073 vst1_u8(output.ptr(), vres);
2074 }
2075 },
2076 input, output);
2077}
2078
Michalis Spyrouafa5d812017-11-30 14:25:57 +00002079Status NEPoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
2080{
2081 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
2082
2083 unsigned int pooled_w = 0;
2084 unsigned int pooled_h = 0;
2085 unsigned int num_elems_processed_per_iteration = 0;
2086 BorderSize border_size(0);
2087
Michalis Spyrou57dac842018-03-01 16:03:50 +00002088 const bool is_global_pooling = pool_info.is_global_pooling();
2089 unsigned int pool_size_x = 0;
2090 unsigned int pool_size_y = 0;
2091
2092 // Get data layout
2093 const DataLayout data_layout = input->data_layout();
2094 const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
2095 const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
2096
2097 pool_size_x = is_global_pooling ? input->dimension(idx_width) : pool_info.pool_size().width;
2098 pool_size_y = is_global_pooling ? input->dimension(idx_height) : pool_info.pool_size().height;
Michalis Spyrouafa5d812017-11-30 14:25:57 +00002099
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00002100 // Validate pool info before calling scaled_dimensions
2101 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_pool_info(pool_size_x, pool_size_y));
Michalis Spyrouafa5d812017-11-30 14:25:57 +00002102
2103 // Check output dimensions
Michalis Spyrou57dac842018-03-01 16:03:50 +00002104 std::tie(pooled_w, pooled_h) = scaled_dimensions(input->dimension(idx_width),
2105 input->dimension(idx_height),
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00002106 pool_size_x,
2107 pool_size_y,
Michalis Spyrouafa5d812017-11-30 14:25:57 +00002108 pool_info.pad_stride_info());
2109
Isabella Gottardi7567f5f2018-01-30 15:26:00 +00002110 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, pool_info, pooled_w, pooled_h, pool_size_x));
2111 ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), pool_info, num_elems_processed_per_iteration, border_size, pooled_w, pooled_h,
2112 pool_size_x, pool_size_y)
2113 .first);
Michalis Spyrouafa5d812017-11-30 14:25:57 +00002114
2115 return Status{};
2116}
2117
Moritz Pflanzerc186b572017-09-07 09:48:04 +01002118void NEPoolingLayerKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01002119{
Moritz Pflanzerc186b572017-09-07 09:48:04 +01002120 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01002121 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
2122 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
2123 ARM_COMPUTE_ERROR_ON(_func == nullptr);
2124
Pablo Tello0c34fe22017-06-26 17:17:42 +01002125 const unsigned int pool_stride_x = _pool_info.pad_stride_info().stride().first;
2126 const unsigned int pool_stride_y = _pool_info.pad_stride_info().stride().second;
Isabella Gottardi6e464c32018-01-26 12:32:45 +00002127 const unsigned int pool_size = _pool_info.pool_size().width;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01002128
Michalis Spyrou57dac842018-03-01 16:03:50 +00002129 Window window_input(window);
2130 if(_input->info()->data_layout() == DataLayout::NCHW)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01002131 {
Michalis Spyrou57dac842018-03-01 16:03:50 +00002132 // Set step for input in x and y direction for the input
2133 unsigned int window_x_inc = 0;
2134 switch(_input->info()->data_type())
Pablo Tello0c34fe22017-06-26 17:17:42 +01002135 {
Michalis Spyrou57dac842018-03-01 16:03:50 +00002136 case DataType::F16:
Georgios Pinitas55186712018-01-08 17:37:12 +00002137 {
2138 window_x_inc = (pool_stride_x == 2) ? _num_elems_processed_per_iteration * 2 : _num_elems_processed_per_iteration;
Michalis Spyrou57dac842018-03-01 16:03:50 +00002139 break;
Georgios Pinitas55186712018-01-08 17:37:12 +00002140 }
Michalis Spyrou57dac842018-03-01 16:03:50 +00002141 case DataType::QASYMM8:
2142 {
2143 window_x_inc = pool_stride_x;
2144 if((pool_size == 2 || pool_size == 3) && pool_stride_x < 3)
2145 {
2146 window_x_inc = (pool_stride_x == 2) ? _num_elems_processed_per_iteration * 2 : _num_elems_processed_per_iteration;
2147 }
2148 break;
2149 }
2150 case DataType::F32:
2151 {
2152 window_x_inc = pool_stride_x;
2153 break;
2154 }
2155 default:
2156 {
2157 ARM_COMPUTE_ERROR("Not supported");
2158 }
Georgios Pinitas55186712018-01-08 17:37:12 +00002159 }
Michalis Spyrou57dac842018-03-01 16:03:50 +00002160 window_input.set(Window::DimX, Window::Dimension(window.x().start() * pool_stride_x, window.x().end() * pool_stride_x, window_x_inc));
2161 window_input.set(Window::DimY, Window::Dimension(window.y().start() * pool_stride_y, window.y().end() * pool_stride_y, pool_stride_y));
Anthony Barbier6ff3b192017-09-04 18:44:23 +01002162 }
Michalis Spyrou57dac842018-03-01 16:03:50 +00002163 else
2164 {
Georgios Pinitascac13b12018-04-27 19:07:19 +01002165 window_input.set(Window::DimX, Window::Dimension(window.x().start(), window.x().end(), _num_elems_processed_per_iteration));
Michalis Spyrou57dac842018-03-01 16:03:50 +00002166 window_input.set(Window::DimY, Window::Dimension(0, _input->info()->dimension(1), pool_stride_x));
2167 window_input.set(Window::DimZ, Window::Dimension(0, _input->info()->dimension(2), pool_stride_y));
2168 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +01002169
2170 // Run function
2171 (this->*_func)(window_input, window);
2172}