blob: 122540b07ed9d28676a183dd5d4ed8eafa9e82f9 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h"
25
26#include "arm_compute/core/AccessWindowStatic.h"
27#include "arm_compute/core/Error.h"
28#include "arm_compute/core/FixedPoint.h"
29#include "arm_compute/core/Helpers.h"
30#include "arm_compute/core/ITensor.h"
31#include "arm_compute/core/NEON/NEFixedPoint.h"
Georgios Pinitascdf51452017-08-31 14:21:36 +010032#include "arm_compute/core/NEON/NEMath.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010033#include "arm_compute/core/TensorInfo.h"
34#include "arm_compute/core/Utils.h"
35#include "arm_compute/core/Validate.h"
36#include "arm_compute/core/Window.h"
37
38#include <algorithm>
39#include <arm_neon.h>
Georgios Pinitascdf51452017-08-31 14:21:36 +010040#include <cmath>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010041#include <limits>
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +010042#include <set>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010043#include <string>
44#include <tuple>
45
46using namespace arm_compute;
47
48namespace
49{
Georgios Pinitasadaae7e2017-10-30 15:56:32 +000050template <bool exclude_padding>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010051inline float calculate_avg_scale(const Coordinates &id, const int pool_size, const int upper_bound_w, const int upper_bound_h,
52 const int pad_x, const int pad_y, const int stride_x, const int stride_y)
53{
Georgios Pinitasadaae7e2017-10-30 15:56:32 +000054 int start_x = id.x() * stride_x - pad_x;
55 int start_y = id.y() * stride_y - pad_y;
Pablo Tello0c34fe22017-06-26 17:17:42 +010056 const int end_x = std::min(start_x + pool_size, upper_bound_w);
57 const int end_y = std::min(start_y + pool_size, upper_bound_h);
Georgios Pinitasadaae7e2017-10-30 15:56:32 +000058 if(exclude_padding)
59 {
60 start_x = std::max(0, start_x);
61 start_y = std::max(0, start_y);
62 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +010063 return 1.f / ((end_y - start_y) * (end_x - start_x));
64}
65
66inline qint8_t calculate_avg_scale_q8(const Coordinates &id, int pool_size, int upper_bound_w, int upper_bound_h,
67 int pad_x, int pad_y, int stride_x, int stride_y, int fixed_point_position)
68{
Pablo Tello0c34fe22017-06-26 17:17:42 +010069 static const std::array<qint8_t, 10> scale_values_q8 =
Anthony Barbier6ff3b192017-09-04 18:44:23 +010070 { { 0x0, 0x0, 0x40, 0x2A, 0x20, 0x19, 0x15, 0x12, 0x10, 0xE } };
71 const int start_x = id.x() * stride_x - pad_x;
72 const int start_y = id.y() * stride_y - pad_y;
73 const int end_x = std::min(start_x + pool_size, upper_bound_w);
74 const int end_y = std::min(start_y + pool_size, upper_bound_h);
75 const int val = ((end_y - start_y) * (end_x - start_x));
Michalis Spyroubbd9fb92017-06-22 12:57:51 +010076 return sshr_qs8(scale_values_q8[val], (7 - fixed_point_position));
77}
78
79inline qint16_t calculate_avg_scale_q16(const Coordinates &id, int pool_size, int upper_bound_w, int upper_bound_h,
80 int pad_x, int pad_y, int stride_x, int stride_y, int fixed_point_position)
81{
82 static std::array<qint16_t, 10> scale_values_q16 =
83 { { 0x0, 0x0, 0x4000, 0x2AAB, 0x2000, 0x199A, 0x1555, 0x1249, 0x1000, 0xE38 } };
84 const int start_x = id.x() * stride_x - pad_x;
85 const int start_y = id.y() * stride_y - pad_y;
86 const int end_x = std::min(start_x + pool_size, upper_bound_w);
87 const int end_y = std::min(start_y + pool_size, upper_bound_h);
88 const int val = ((end_y - start_y) * (end_x - start_x));
89 return sshr_qs16(scale_values_q16[val], (15 - fixed_point_position));
Anthony Barbier6ff3b192017-09-04 18:44:23 +010090}
91} // namespace
92
93NEPoolingLayerKernel::NEPoolingLayerKernel()
94 : _func(nullptr), _input(nullptr), _output(nullptr), _pool_info(), _num_elems_processed_per_iteration(0), _border_size(0)
95{
96}
97
98BorderSize NEPoolingLayerKernel::border_size() const
99{
100 return _border_size;
101}
102
103void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info)
104{
Gian Marco Iodice4e288692017-06-27 11:41:59 +0100105 int pool_pad_x = 0;
106 int pool_pad_y = 0;
107 int pool_stride_x = 0;
108 int pool_stride_y = 0;
109 unsigned int pooled_w = 0;
110 unsigned int pooled_h = 0;
111 PoolingType pool_type = pool_info.pool_type();
112 int pool_size = pool_info.pool_size();
113 const PadStrideInfo pad_stride_info = pool_info.pad_stride_info();
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000114 bool exclude_padding = pool_info.exclude_padding();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100115 std::tie(pool_pad_x, pool_pad_y) = pad_stride_info.pad();
116 std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
117
Gian Marco Iodice16824302017-09-28 15:41:37 +0100118 static const std::set<int> supported_pool_sizes = { 2, 3 };
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100119 ARM_COMPUTE_UNUSED(supported_pool_sizes);
120
Georgios Pinitas1dad50e2017-07-03 17:51:34 +0100121 ARM_COMPUTE_ERROR_ON_NULLPTR(output);
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100122 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
Georgios Pinitascdf51452017-08-31 14:21:36 +0100123 ARM_COMPUTE_ERROR_ON(pool_type == PoolingType::L2 && is_data_type_fixed_point(input->info()->data_type()));
Gian Marco Iodice16824302017-09-28 15:41:37 +0100124 ARM_COMPUTE_ERROR_ON((supported_pool_sizes.find(pool_size) == supported_pool_sizes.end()) && (input->info()->data_type() != DataType::F32));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100125 ARM_COMPUTE_ERROR_ON(pool_pad_x >= pool_size || pool_pad_y >= pool_size);
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100126 ARM_COMPUTE_ERROR_ON(is_data_type_fixed_point(input->info()->data_type()) && pool_stride_x > 2);
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000127 ARM_COMPUTE_ERROR_ON(exclude_padding && is_data_type_fixed_point(input->info()->data_type()));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100128
129 // Check output dimensions
130 std::tie(pooled_w, pooled_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1),
Gian Marco Iodice4e288692017-06-27 11:41:59 +0100131 pool_size, pool_size, pool_info.pad_stride_info());
Georgios Pinitas1dad50e2017-07-03 17:51:34 +0100132
133 // Output auto initialization if not yet initialized
134 {
135 TensorShape output_shape{ input->info()->tensor_shape() };
136 output_shape.set(0, pooled_w);
137 output_shape.set(1, pooled_h);
138
139 auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
140 }
141
142 ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
143 ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100144 ARM_COMPUTE_ERROR_ON((output->info()->dimension(0) != pooled_w) || (output->info()->dimension(1) != pooled_h));
145
146 unsigned int num_elems_read_per_iteration = 0;
147 unsigned int num_elems_processed_per_iteration = 0;
148 unsigned int num_elems_horizontal_window = 0;
149
150 // Select element size
151 switch(input->info()->data_type())
152 {
153 case DataType::QS8:
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100154 num_elems_read_per_iteration = 16;
155 switch(pool_size)
156 {
157 case 2:
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100158 num_elems_processed_per_iteration = (pool_stride_x == 2) ? 8 : 15;
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100159 break;
160 case 3:
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100161 num_elems_processed_per_iteration = (pool_stride_x == 2) ? 7 : 14;
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100162 break;
163 default:
164 ARM_COMPUTE_ERROR("Pooling size not supported");
Pablo Tello0c34fe22017-06-26 17:17:42 +0100165 break;
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100166 }
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100167 num_elems_horizontal_window = (pool_stride_x == 2) ? 8 : 16;
168 break;
169 case DataType::QS16:
170 num_elems_read_per_iteration = 8;
171 switch(pool_size)
172 {
173 case 2:
174 num_elems_processed_per_iteration = (pool_stride_x == 2) ? 4 : 7;
175 break;
176 case 3:
177 num_elems_processed_per_iteration = (pool_stride_x == 2) ? 3 : 6;
178 break;
179 default:
180 ARM_COMPUTE_ERROR("Pooling size not supported");
181 }
182 num_elems_horizontal_window = (pool_stride_x == 2) ? 4 : 8;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100183 break;
Ioan-Cristian Szabo33fd07b2017-10-26 15:42:24 +0100184#ifdef ARM_COMPUTE_AARCH64_V8_2
Pablo Tello0c34fe22017-06-26 17:17:42 +0100185 case DataType::F16:
186 switch(pool_size)
187 {
188 case 2:
189 num_elems_read_per_iteration = 16;
190 num_elems_processed_per_iteration = 8;
191 num_elems_horizontal_window = 8;
192 break;
193 case 3:
194 num_elems_read_per_iteration = 4;
195 num_elems_processed_per_iteration = 1;
196 num_elems_horizontal_window = 1;
197 break;
198 default:
199 ARM_COMPUTE_ERROR("Pooling size not supported");
200 break;
201 }
202 break;
Ioan-Cristian Szabo33fd07b2017-10-26 15:42:24 +0100203#endif /* ARM_COMPUTE_AARCH64_V8_2 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100204 case DataType::F32:
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100205 switch(pool_size)
206 {
207 case 2:
208 num_elems_read_per_iteration = 2;
209 break;
210 case 3:
211 num_elems_read_per_iteration = 4; // We use vload4 for pooling3
212 break;
213 case 7:
214 num_elems_read_per_iteration = 8; // We use vload8 for pooling7
215 break;
216 default:
Gian Marco Iodice16824302017-09-28 15:41:37 +0100217 num_elems_read_per_iteration = 1; // We use vload4 for poolingN but with a leftover for loop
Pablo Tello0c34fe22017-06-26 17:17:42 +0100218 break;
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100219 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100220 num_elems_processed_per_iteration = 1;
221 num_elems_horizontal_window = 1;
222 break;
223 default:
224 ARM_COMPUTE_ERROR("Element size not supported");
225 break;
226 }
227
228 _num_elems_processed_per_iteration = num_elems_processed_per_iteration;
229 const int input_width = input->info()->dimension(0);
230 const int input_height = input->info()->dimension(1);
231 const int upper_bound_w = ((pooled_w - 1) * pool_stride_x - pool_pad_x + num_elems_read_per_iteration) - input_width;
232 const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_y + pool_size) - input_height;
233
234 // Set instance variables
235 _input = input;
236 _output = output;
237 _pool_info = pool_info;
238 _border_size = BorderSize(pool_pad_y, pool_pad_x);
239 _border_size.right = std::max(upper_bound_w, pool_pad_x);
240 _border_size.bottom = std::max(upper_bound_h, pool_pad_y);
241
242 // Select appropriate function
243 switch(pool_size)
244 {
245 case 2:
246 if(input->info()->data_type() == DataType::QS8)
247 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100248 switch(pool_type)
249 {
250 case PoolingType::AVG:
251 _func = &NEPoolingLayerKernel::pooling2_q8<PoolingType::AVG>;
252 break;
253 case PoolingType::MAX:
254 _func = &NEPoolingLayerKernel::pooling2_q8<PoolingType::MAX>;
255 break;
256 default:
257 ARM_COMPUTE_ERROR("Unsupported pooling type!");
258 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100259 }
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100260 else if(input->info()->data_type() == DataType::QS16)
261 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100262 switch(pool_type)
263 {
264 case PoolingType::AVG:
265 _func = &NEPoolingLayerKernel::pooling2_q16<PoolingType::AVG>;
266 break;
267 case PoolingType::MAX:
268 _func = &NEPoolingLayerKernel::pooling2_q16<PoolingType::MAX>;
269 break;
270 default:
271 ARM_COMPUTE_ERROR("Unsupported pooling type!");
272 }
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100273 }
Pablo Tello0c34fe22017-06-26 17:17:42 +0100274 else if(input->info()->data_type() == DataType::F16)
275 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100276 switch(pool_type)
277 {
278 case PoolingType::AVG:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000279 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling2_f16<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling2_f16<PoolingType::AVG, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100280 break;
281 case PoolingType::L2:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000282 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling2_f16<PoolingType::L2, true> : &NEPoolingLayerKernel::pooling2_f16<PoolingType::L2, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100283 break;
284 case PoolingType::MAX:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000285 _func = &NEPoolingLayerKernel::pooling2_f16<PoolingType::MAX, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100286 break;
287 default:
288 ARM_COMPUTE_ERROR("Unsupported pooling type!");
289 }
Pablo Tello0c34fe22017-06-26 17:17:42 +0100290 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100291 else if(input->info()->data_type() == DataType::F32)
292 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100293 switch(pool_type)
294 {
295 case PoolingType::AVG:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000296 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling2_f32<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling2_f32<PoolingType::AVG, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100297 break;
298 case PoolingType::L2:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000299 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling2_f32<PoolingType::L2, true> : &NEPoolingLayerKernel::pooling2_f32<PoolingType::L2, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100300 break;
301 case PoolingType::MAX:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000302 _func = &NEPoolingLayerKernel::pooling2_f32<PoolingType::MAX, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100303 break;
304 default:
305 ARM_COMPUTE_ERROR("Unsupported pooling type!");
306 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100307 }
308 break;
309 case 3:
310 if(input->info()->data_type() == DataType::QS8)
311 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100312 switch(pool_type)
313 {
314 case PoolingType::AVG:
315 _func = &NEPoolingLayerKernel::pooling3_q8<PoolingType::AVG>;
316 break;
317 case PoolingType::MAX:
318 _func = &NEPoolingLayerKernel::pooling3_q8<PoolingType::MAX>;
319 break;
320 default:
321 ARM_COMPUTE_ERROR("Unsupported pooling type!");
322 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100323 }
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100324 else if(input->info()->data_type() == DataType::QS16)
325 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100326 switch(pool_type)
327 {
328 case PoolingType::AVG:
329 _func = &NEPoolingLayerKernel::pooling3_q16<PoolingType::AVG>;
330 break;
331 case PoolingType::MAX:
332 _func = &NEPoolingLayerKernel::pooling3_q16<PoolingType::MAX>;
333 break;
334 default:
335 ARM_COMPUTE_ERROR("Unsupported pooling type!");
336 }
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100337 }
Pablo Tello0c34fe22017-06-26 17:17:42 +0100338 else if(input->info()->data_type() == DataType::F16)
339 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100340 switch(pool_type)
341 {
342 case PoolingType::AVG:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000343 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_f16<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling3_f16<PoolingType::AVG, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100344 break;
345 case PoolingType::L2:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000346 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_f16<PoolingType::L2, true> : &NEPoolingLayerKernel::pooling3_f16<PoolingType::L2, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100347 break;
348 case PoolingType::MAX:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000349 _func = &NEPoolingLayerKernel::pooling3_f16<PoolingType::MAX, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100350 break;
351 default:
352 ARM_COMPUTE_ERROR("Unsupported pooling type!");
353 }
Pablo Tello0c34fe22017-06-26 17:17:42 +0100354 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100355 else if(input->info()->data_type() == DataType::F32)
356 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100357 switch(pool_type)
358 {
359 case PoolingType::AVG:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000360 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_f32<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling3_f32<PoolingType::AVG, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100361 break;
362 case PoolingType::L2:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000363 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_f32<PoolingType::L2, true> : &NEPoolingLayerKernel::pooling3_f32<PoolingType::L2, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100364 break;
365 case PoolingType::MAX:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000366 _func = &NEPoolingLayerKernel::pooling3_f32<PoolingType::MAX, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100367 break;
368 default:
369 ARM_COMPUTE_ERROR("Unsupported pooling type!");
370 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100371 }
372 break;
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100373 case 7:
Georgios Pinitascdf51452017-08-31 14:21:36 +0100374 switch(pool_type)
375 {
376 case PoolingType::AVG:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000377 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling7_f32<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling7_f32<PoolingType::AVG, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100378 break;
379 case PoolingType::L2:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000380 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling7_f32<PoolingType::L2, true> : &NEPoolingLayerKernel::pooling7_f32<PoolingType::L2, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100381 break;
382 case PoolingType::MAX:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000383 _func = &NEPoolingLayerKernel::pooling7_f32<PoolingType::MAX, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100384 break;
385 default:
386 ARM_COMPUTE_ERROR("Unsupported pooling type!");
387 }
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100388 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100389 default:
Gian Marco Iodice16824302017-09-28 15:41:37 +0100390 switch(pool_type)
391 {
392 case PoolingType::AVG:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000393 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingN_f32<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingN_f32<PoolingType::AVG, false>;
Gian Marco Iodice16824302017-09-28 15:41:37 +0100394 break;
395 case PoolingType::L2:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000396 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingN_f32<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingN_f32<PoolingType::L2, false>;
Gian Marco Iodice16824302017-09-28 15:41:37 +0100397 break;
398 case PoolingType::MAX:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000399 _func = &NEPoolingLayerKernel::poolingN_f32<PoolingType::MAX, false>;
Gian Marco Iodice16824302017-09-28 15:41:37 +0100400 break;
401 default:
402 ARM_COMPUTE_ERROR("Unsupported pooling type!");
403 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100404 break;
405 }
406
407 // Configure kernel window
408 Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
409 AccessWindowStatic input_access(input->info(), -pool_pad_x, -pool_pad_y, input_width + _border_size.right, input_height + _border_size.bottom);
410 AccessWindowHorizontal output_access(output->info(), 0, num_elems_horizontal_window);
411 update_window_and_padding(win, input_access, output_access);
412 output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
413 INEKernel::configure(win);
414}
415
416template <PoolingType pooling_type>
417void NEPoolingLayerKernel::pooling2_q8(const Window &window_input, const Window &window)
418{
419 Iterator input(_input, window_input);
420 Iterator output(_output, window);
421
422 const int fixed_point_position = _input->info()->fixed_point_position();
423 constexpr int pool_size = 2;
424 int pool_pad_x = 0;
425 int pool_pad_y = 0;
426 int pool_stride_x = 0;
427 int pool_stride_y = 0;
428 std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad();
429 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
430 const int upper_bound_w = _input->info()->dimension(0) + pool_pad_x;
431 const int upper_bound_h = _input->info()->dimension(1) + pool_pad_y;
432
433 const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y)));
434 const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 1));
435
436 execute_window_loop(window, [&](const Coordinates & id)
437 {
438 const auto top_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_top_ptr + input.offset()));
439 const auto bottom_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_bottom_ptr + input.offset()));
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100440 qint8x8_t lower_res = {};
441 qint8x8_t upper_res = {};
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100442 if(pooling_type == PoolingType::AVG)
443 {
444 // Calculate scale
445 const qint8_t scale = calculate_avg_scale_q8(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y, fixed_point_position);
446 const qint8x8_t scale_vec = vdup_n_qs8(scale);
447
448 // Perform pooling
449 const qint8x16_t sum_data = vqaddq_qs8(top_data, bottom_data);
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100450 lower_res = vqmul_qs8(vpadd_s8(vget_low_s8(sum_data), vget_high_s8(sum_data)), scale_vec, fixed_point_position);
451 if(pool_stride_x == 1)
452 {
453 const qint8x16_t sum_data_shifted = vextq_s8(sum_data, sum_data, 1);
454 upper_res = vqmul_qs8(vpadd_s8(vget_low_s8(sum_data_shifted), vget_high_s8(sum_data_shifted)), scale_vec, fixed_point_position);
455 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100456 }
457 else
458 {
459 const qint8x16_t max_data = vmaxq_s8(top_data, bottom_data);
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100460 lower_res = vpmax_s8(vget_low_s8(max_data), vget_high_s8(max_data));
461 if(pool_stride_x == 1)
462 {
463 const qint8x16_t max_data_shifted = vextq_s8(max_data, max_data, 1);
464 upper_res = vpmax_s8(vget_low_s8(max_data_shifted), vget_high_s8(max_data_shifted));
465 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100466 }
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100467 if(pool_stride_x == 1)
468 {
Georgios Pinitasdc460f12017-08-24 19:02:44 +0100469 const qint8x8x2_t res = { { lower_res, upper_res } };
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100470 vst2_s8(reinterpret_cast<qint8_t *>(output.ptr()), res);
471 }
472 else
473 {
474 vst1_qs8(reinterpret_cast<qint8_t *>(output.ptr()), lower_res);
475 }
476 },
477 input, output);
478}
479
480template <PoolingType pooling_type>
481void NEPoolingLayerKernel::pooling2_q16(const Window &window_input, const Window &window)
482{
483 Iterator input(_input, window_input);
484 Iterator output(_output, window);
485
486 const int fixed_point_position = _input->info()->fixed_point_position();
487 constexpr int pool_size = 2;
488 int pool_pad_x = 0;
489 int pool_pad_y = 0;
490 int pool_stride_x = 0;
491 int pool_stride_y = 0;
492 std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad();
493 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
494 const int upper_bound_w = _input->info()->dimension(0) + pool_pad_x;
495 const int upper_bound_h = _input->info()->dimension(1) + pool_pad_y;
496
497 const unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y)));
498 const unsigned char *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 1));
499
500 execute_window_loop(window, [&](const Coordinates & id)
501 {
502 const auto top_data = vld1q_qs16(reinterpret_cast<const qint16_t *>(input_top_ptr + input.offset()));
503 const auto bottom_data = vld1q_qs16(reinterpret_cast<const qint16_t *>(input_bottom_ptr + input.offset()));
504 qint16x4_t lower_res = {};
505 qint16x4_t upper_res = {};
506 if(pooling_type == PoolingType::AVG)
507 {
508 // Calculate scale
509 const qint16_t scale = calculate_avg_scale_q16(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y, fixed_point_position);
510 const qint16x4_t scale_vec = vdup_n_qs16(scale);
511
512 // Perform pooling
513 const qint16x8_t sum_data = vqaddq_qs16(top_data, bottom_data);
514 lower_res = vqmul_qs16(vpadd_s16(vget_low_s16(sum_data), vget_high_s16(sum_data)), scale_vec, fixed_point_position);
515 if(pool_stride_x == 1)
516 {
517 const qint16x8_t sum_data_shifted = vextq_s16(sum_data, sum_data, 1);
518 upper_res = vqmul_qs16(vpadd_s16(vget_low_s16(sum_data_shifted), vget_high_s16(sum_data_shifted)), scale_vec, fixed_point_position);
519 }
520 }
521 else
522 {
523 const qint16x8_t max_data = vmaxq_s16(top_data, bottom_data);
524 lower_res = vpmax_s16(vget_low_s16(max_data), vget_high_s16(max_data));
525 if(pool_stride_x == 1)
526 {
527 const qint16x8_t max_data_shifted = vextq_s16(max_data, max_data, 1);
528 upper_res = vpmax_s16(vget_low_s16(max_data_shifted), vget_high_s16(max_data_shifted));
529 }
530 }
531 if(pool_stride_x == 1)
532 {
Georgios Pinitasdc460f12017-08-24 19:02:44 +0100533 const qint16x4x2_t res = { { lower_res, upper_res } };
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100534 vst2_s16(reinterpret_cast<qint16_t *>(output.ptr()), res);
535 }
536 else
537 {
538 vst1_qs16(reinterpret_cast<qint16_t *>(output.ptr()), lower_res);
539 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100540 },
541 input, output);
542}
543
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000544template <PoolingType pooling_type, bool exclude_padding>
Pablo Tello0c34fe22017-06-26 17:17:42 +0100545void NEPoolingLayerKernel::pooling3_f16(const Window &window_input, const Window &window)
546{
Ioan-Cristian Szabo33fd07b2017-10-26 15:42:24 +0100547#ifdef ARM_COMPUTE_AARCH64_V8_2
Pablo Tello0c34fe22017-06-26 17:17:42 +0100548 Iterator input(_input, window_input);
549 Iterator output(_output, window);
550
551 constexpr const int pool_size = 3;
552 int pool_pad_x = 0;
553 int pool_pad_y = 0;
554 int pool_stride_x = 0;
555 int pool_stride_y = 0;
556 std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad();
557 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000558 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_x);
559 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_y);
Pablo Tello0c34fe22017-06-26 17:17:42 +0100560
561 const unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y)));
562 const unsigned char *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 1));
563 const unsigned char *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 2));
564
565 execute_window_loop(window, [&](const Coordinates & id)
566 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100567 float16x4_t top_data = vld1_f16(reinterpret_cast<const float16_t *>(input_top_ptr + input.offset()));
568 float16x4_t middle_data = vld1_f16(reinterpret_cast<const float16_t *>(input_middle_ptr + input.offset()));
569 float16x4_t bottom_data = vld1_f16(reinterpret_cast<const float16_t *>(input_bottom_ptr + input.offset()));
570 float16x4_t res = {};
571
572 // Get power of 2 in case of l2 pooling
573 if(pooling_type == PoolingType::L2)
574 {
575 top_data = vmul_f16(top_data, top_data);
576 middle_data = vmul_f16(middle_data, middle_data);
577 bottom_data = vmul_f16(bottom_data, bottom_data);
578 }
579
580 if(pooling_type != PoolingType::MAX)
Pablo Tello0c34fe22017-06-26 17:17:42 +0100581 {
582 // Calculate scale
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000583 const float scale = calculate_avg_scale<exclude_padding>(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y);
Pablo Tello0c34fe22017-06-26 17:17:42 +0100584 const float16x4_t scale_v = vdup_n_f16(scale);
585 // Perform pooling
586 const float16x4_t sum_data = vadd_f16(vadd_f16(top_data, bottom_data), middle_data);
587 res = vpadd_f16(vset_lane_f16(0.f, sum_data, 3), sum_data);
588 res = vmul_f16(vpadd_f16(res, res), scale_v);
589 }
590 else
591 {
592 const float16x4_t max_data = vmax_f16(vmax_f16(top_data, bottom_data), middle_data);
593 res = vpmax_f16(vset_lane_f16(-std::numeric_limits<float>::max(), max_data, 3), max_data);
594 res = vpmax_f16(res, res);
595 }
Georgios Pinitascdf51452017-08-31 14:21:36 +0100596
597 // Calculate square-root in case of l2 pooling
598 if(pooling_type == PoolingType::L2)
599 {
600 res = vinv_f16(vinvsqrt_f16(res));
601 }
602
Pablo Tello0c34fe22017-06-26 17:17:42 +0100603 *(reinterpret_cast<float16_t *>(output.ptr())) = vget_lane_f16(res, 0);
604 },
605 input, output);
Ioan-Cristian Szabo33fd07b2017-10-26 15:42:24 +0100606#else /* ARM_COMPUTE_AARCH64_V8_2 */
Pablo Tello0c34fe22017-06-26 17:17:42 +0100607 ARM_COMPUTE_UNUSED(window_input);
608 ARM_COMPUTE_UNUSED(window);
609 ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a");
Ioan-Cristian Szabo33fd07b2017-10-26 15:42:24 +0100610#endif /* ARM_COMPUTE_AARCH64_V8_2 */
Pablo Tello0c34fe22017-06-26 17:17:42 +0100611}
612
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000613template <PoolingType pooling_type, bool exclude_padding>
Pablo Tello0c34fe22017-06-26 17:17:42 +0100614void NEPoolingLayerKernel::pooling2_f16(const Window &window_input, const Window &window)
615{
Ioan-Cristian Szabo33fd07b2017-10-26 15:42:24 +0100616#ifdef ARM_COMPUTE_AARCH64_V8_2
Pablo Tello0c34fe22017-06-26 17:17:42 +0100617 Iterator input(_input, window_input);
618 Iterator output(_output, window);
619 constexpr int pool_size = 2;
620 int pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y = 0;
621 std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad();
622 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000623 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_x);
624 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_y);
Pablo Tello0c34fe22017-06-26 17:17:42 +0100625
626 const unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y)));
627 const unsigned char *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 1));
628
629 execute_window_loop(window, [&](const Coordinates & id)
630 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100631 auto top_data = vld2q_f16(reinterpret_cast<const float16_t *>(input_top_ptr + input.offset()));
632 auto bottom_data = vld2q_f16(reinterpret_cast<const float16_t *>(input_bottom_ptr + input.offset()));
Pablo Tello0c34fe22017-06-26 17:17:42 +0100633 float16x8_t res = {};
634
Georgios Pinitascdf51452017-08-31 14:21:36 +0100635 // Get power of 2 in case of l2 pooling
636 if(pooling_type == PoolingType::L2)
637 {
638 top_data.val[0] = vmulq_f16(top_data.val[0], top_data.val[0]);
639 top_data.val[1] = vmulq_f16(top_data.val[1], top_data.val[1]);
640 bottom_data.val[0] = vmulq_f16(bottom_data.val[0], bottom_data.val[0]);
641 bottom_data.val[1] = vmulq_f16(bottom_data.val[1], bottom_data.val[1]);
642 }
643
644 if(pooling_type != PoolingType::MAX)
Pablo Tello0c34fe22017-06-26 17:17:42 +0100645 {
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000646 const float scale = calculate_avg_scale<exclude_padding>(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y);
Pablo Tello0c34fe22017-06-26 17:17:42 +0100647 const float16x8_t scale_v = vdupq_n_f16(scale);
648 res = vmulq_f16(scale_v, vaddq_f16(bottom_data.val[1], vaddq_f16(bottom_data.val[0], vaddq_f16(top_data.val[0], top_data.val[1]))));
649 }
650 else
651 {
652 res = vmaxq_f16(bottom_data.val[1], vmaxq_f16(bottom_data.val[0], vmaxq_f16(top_data.val[0], top_data.val[1])));
653 }
Georgios Pinitascdf51452017-08-31 14:21:36 +0100654
655 // Calculate square-root in case of l2 pooling
656 if(pooling_type == PoolingType::L2)
657 {
658 res = vinvq_f16(vinvsqrtq_f16(res));
659 }
660
661 // Store result
Pablo Tello0c34fe22017-06-26 17:17:42 +0100662 vst1q_f16(reinterpret_cast<float16_t *>(output.ptr()), res);
663 },
664 input, output);
Ioan-Cristian Szabo33fd07b2017-10-26 15:42:24 +0100665#else /* ARM_COMPUTE_AARCH64_V8_2 */
Pablo Tello0c34fe22017-06-26 17:17:42 +0100666 ARM_COMPUTE_UNUSED(window_input);
667 ARM_COMPUTE_UNUSED(window);
668 ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a");
Ioan-Cristian Szabo33fd07b2017-10-26 15:42:24 +0100669#endif /* ARM_COMPUTE_AARCH64_V8_2 */
Pablo Tello0c34fe22017-06-26 17:17:42 +0100670}
671
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000672template <PoolingType pooling_type, bool exclude_padding>
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100673void NEPoolingLayerKernel::pooling2_f32(const Window &window_input, const Window &window)
674{
675 Iterator input(_input, window_input);
676 Iterator output(_output, window);
677
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100678 constexpr int pool_size = 2;
679 int pool_pad_x = 0;
680 int pool_pad_y = 0;
681 int pool_stride_x = 0;
682 int pool_stride_y = 0;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100683 std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad();
684 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000685 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_x);
686 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_y);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100687
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100688 const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y)));
689 const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100690
691 execute_window_loop(window, [&](const Coordinates & id)
692 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100693 float32x2_t top_data = vld1_f32(reinterpret_cast<const float *>(input_top_ptr + input.offset()));
694 float32x2_t bottom_data = vld1_f32(reinterpret_cast<const float *>(input_bottom_ptr + input.offset()));
695 float32x2_t res = {};
696 float final_res = 0;
697
698 // Get power of 2 in case of l2 pooling
699 if(pooling_type == PoolingType::L2)
700 {
701 top_data = vmul_f32(top_data, top_data);
702 bottom_data = vmul_f32(bottom_data, bottom_data);
703 }
704
705 if(pooling_type != PoolingType::MAX)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100706 {
707 // Calculate scale
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000708 float scale = calculate_avg_scale<exclude_padding>(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100709 const float32x2_t scale_v = vdup_n_f32(scale);
710
711 // Perform pooling
712 const float32x2_t sum_data = vadd_f32(top_data, bottom_data);
713 res = vmul_f32(vpadd_f32(sum_data, sum_data), scale_v);
714 }
715 else
716 {
717 const float32x2_t max_data = vmax_f32(top_data, bottom_data);
718 res = vpmax_f32(max_data, max_data);
719 }
Georgios Pinitascdf51452017-08-31 14:21:36 +0100720 final_res = vget_lane_f32(res, 0);
721
722 // Calculate square-root in case of l2 pooling
723 if(pooling_type == PoolingType::L2)
724 {
725 final_res = sqrt(final_res);
726 }
727
728 // Store result
729 *(reinterpret_cast<float *>(output.ptr())) = final_res;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100730 },
731 input, output);
732}
733
734template <PoolingType pooling_type>
735void NEPoolingLayerKernel::pooling3_q8(const Window &window_input, const Window &window)
736{
737 Iterator input(_input, window_input);
738 Iterator output(_output, window);
739
740 const int fixed_point_position = _input->info()->fixed_point_position();
741 constexpr int pool_size = 3;
742 int pool_pad_x = 0;
743 int pool_pad_y = 0;
744 int pool_stride_x = 0;
745 int pool_stride_y = 0;
746 std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad();
747 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
748 const int upper_bound_w = _input->info()->dimension(0) + pool_pad_x;
749 const int upper_bound_h = _input->info()->dimension(1) + pool_pad_y;
750
751 const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y)));
752 const uint8_t *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 1));
753 const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 2));
754
755 execute_window_loop(window, [&](const Coordinates & id)
756 {
757 const auto top_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_top_ptr + input.offset()));
758 const auto middle_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_middle_ptr + input.offset()));
759 const auto bottom_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_bottom_ptr + input.offset()));
760 qint8x8_t res = {};
761 if(pooling_type == PoolingType::AVG)
762 {
763 // Calculate scale
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100764 const qint8_t scale = calculate_avg_scale_q8(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y, fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100765
766 // Perform pooling for stride 2
767 const qint8x16_t sum_data = vqaddq_qs8(vqaddq_qs8(top_data, bottom_data), middle_data);
768 const qint8x16_t sum_data2 = vextq_s8(sum_data, sum_data, 1);
769 const qint8x16_t sum_data3 = vextq_s8(sum_data, sum_data, 2);
770 const qint8x16_t final_sum = vqaddq_qs8(vqaddq_qs8(sum_data, sum_data2), sum_data3);
771 if(pool_stride_x == 2)
772 {
773 const qint8x8x2_t table = { { vget_low_s8(final_sum), vget_high_s8(final_sum) } };
774 static const qint8x8_t lookup_val = { 0, 2, 4, 6, 8, 10, 12, 14 };
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100775 const qint8x8_t scale_vec = vdup_n_qs8(scale);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100776 res = vtbl2_s8(table, lookup_val);
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100777 res = vqmul_qs8(res, scale_vec, fixed_point_position);
778 vst1_qs8(reinterpret_cast<qint8_t *>(output.ptr()), res);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100779 }
780 else
781 {
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100782 const qint8x16_t scale_vec = vdupq_n_qs8(scale);
783 vst1q_qs8(reinterpret_cast<qint8_t *>(output.ptr()), vqmulq_qs8(final_sum, scale_vec, fixed_point_position));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100784 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100785 }
786 else
787 {
788 const qint8x16_t max_data = vmaxq_s8(vmaxq_s8(top_data, bottom_data), middle_data);
789 const qint8x16_t max_data2 = vextq_s8(max_data, max_data, 1);
790 const qint8x16_t max_data3 = vextq_s8(max_data, max_data, 2);
791 const qint8x16_t final_max = vmaxq_s8(vmaxq_s8(max_data, max_data2), max_data3);
792
793 if(pool_stride_x == 2)
794 {
795 const qint8x8x2_t table = { { vget_low_s8(final_max), vget_high_s8(final_max) } };
796 static const qint8x8_t lookup_val = { 0, 2, 4, 6, 8, 10, 12, 14 };
797 res = vtbl2_s8(table, lookup_val);
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100798 vst1_qs8(reinterpret_cast<qint8_t *>(output.ptr()), res);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100799 }
800 else
801 {
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100802 vst1q_qs8(reinterpret_cast<qint8_t *>(output.ptr()), final_max);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100803 }
804 }
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100805 },
806 input, output);
807}
808
809template <PoolingType pooling_type>
810void NEPoolingLayerKernel::pooling3_q16(const Window &window_input, const Window &window)
811{
812 Iterator input(_input, window_input);
813 Iterator output(_output, window);
814
815 const int fixed_point_position = _input->info()->fixed_point_position();
816 constexpr int pool_size = 3;
817 int pool_pad_x = 0;
818 int pool_pad_y = 0;
819 int pool_stride_x = 0;
820 int pool_stride_y = 0;
821 std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad();
822 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
823 const int upper_bound_w = _input->info()->dimension(0) + pool_pad_x;
824 const int upper_bound_h = _input->info()->dimension(1) + pool_pad_y;
825
826 const unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y)));
827 const unsigned char *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 1));
828 const unsigned char *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 2));
829
830 execute_window_loop(window, [&](const Coordinates & id)
831 {
832 const auto top_data = vld1q_qs16(reinterpret_cast<const qint16_t *>(input_top_ptr + input.offset()));
833 const auto middle_data = vld1q_qs16(reinterpret_cast<const qint16_t *>(input_middle_ptr + input.offset()));
834 const auto bottom_data = vld1q_qs16(reinterpret_cast<const qint16_t *>(input_bottom_ptr + input.offset()));
835
836 if(pooling_type == PoolingType::AVG)
837 {
838 // Calculate scale
839 const qint16_t scale = calculate_avg_scale_q16(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y, fixed_point_position);
840
841 // Perform pooling for stride 2
842 const qint16x8_t sum_data = vqaddq_qs16(vqaddq_qs16(top_data, bottom_data), middle_data);
843 const qint16x8_t sum_data2 = vextq_s16(sum_data, sum_data, 1);
844 const qint16x8_t sum_data3 = vextq_s16(sum_data, sum_data, 2);
845 const qint16x8_t final_sum = vqaddq_qs16(vqaddq_qs16(sum_data, sum_data2), sum_data3);
846 if(pool_stride_x == 2)
847 {
848 const qint16x4_t tmp = { vgetq_lane_s16(final_sum, 0), vgetq_lane_s16(final_sum, 2), vgetq_lane_s16(final_sum, 4), vgetq_lane_s16(final_sum, 6) };
849 const qint16x4_t scale_vec = vdup_n_qs16(scale);
850 vst1_qs16(reinterpret_cast<qint16_t *>(output.ptr()), vqmul_qs16(tmp, scale_vec, fixed_point_position));
851 }
852 else
853 {
854 const qint16x8_t scale_vec = vdupq_n_qs16(scale);
855 vst1q_qs16(reinterpret_cast<qint16_t *>(output.ptr()), vqmulq_qs16(final_sum, scale_vec, fixed_point_position));
856 }
857 }
858 else
859 {
860 const qint16x8_t max_data = vmaxq_s16(vmaxq_s16(top_data, bottom_data), middle_data);
861 const qint16x8_t max_data2 = vextq_s16(max_data, max_data, 1);
862 const qint16x8_t max_data3 = vextq_s16(max_data, max_data, 2);
863 const qint16x8_t final_max = vmaxq_s16(vmaxq_s16(max_data, max_data2), max_data3);
864
865 if(pool_stride_x == 2)
866 {
867 const qint16x4_t tmp = { vgetq_lane_s16(final_max, 0), vgetq_lane_s16(final_max, 2), vgetq_lane_s16(final_max, 4), vgetq_lane_s16(final_max, 6) };
868 vst1_qs16(reinterpret_cast<qint16_t *>(output.ptr()), tmp);
869 }
870 else
871 {
872 vst1q_qs16(reinterpret_cast<qint16_t *>(output.ptr()), final_max);
873 }
874 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100875 },
876 input, output);
877}
878
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000879template <PoolingType pooling_type, bool exclude_padding>
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100880void NEPoolingLayerKernel::pooling3_f32(const Window &window_input, const Window &window)
881{
882 Iterator input(_input, window_input);
883 Iterator output(_output, window);
884
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100885 constexpr const int pool_size = 3;
886 int pool_pad_x = 0;
887 int pool_pad_y = 0;
888 int pool_stride_x = 0;
889 int pool_stride_y = 0;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100890 std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad();
891 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000892 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_x);
893 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_y);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100894
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100895 const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y)));
896 const uint8_t *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 1));
897 const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 2));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100898
899 execute_window_loop(window, [&](const Coordinates & id)
900 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100901 float32x4_t top_data = vld1q_f32(reinterpret_cast<const float *>(input_top_ptr + input.offset()));
902 float32x4_t middle_data = vld1q_f32(reinterpret_cast<const float *>(input_middle_ptr + input.offset()));
903 float32x4_t bottom_data = vld1q_f32(reinterpret_cast<const float *>(input_bottom_ptr + input.offset()));
904 float32x2_t res = {};
905 float final_res = 0;
906
907 // Get power of 2 in case of l2 pooling
908 if(pooling_type == PoolingType::L2)
909 {
910 top_data = vmulq_f32(top_data, top_data);
911 middle_data = vmulq_f32(middle_data, middle_data);
912 bottom_data = vmulq_f32(bottom_data, bottom_data);
913 }
914
915 if(pooling_type != PoolingType::MAX)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100916 {
917 // Calculate scale
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000918 float scale = calculate_avg_scale<exclude_padding>(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100919 const float32x2_t scale_v = vdup_n_f32(scale);
920
921 // Perform pooling
922 const float32x4_t sum_data = vaddq_f32(vaddq_f32(top_data, bottom_data), middle_data);
923 res = vpadd_f32(vget_high_f32(vsetq_lane_f32(0.f, sum_data, 3)), vget_low_f32(sum_data));
924 res = vmul_f32(vpadd_f32(res, res), scale_v);
925 }
926 else
927 {
928 const float32x4_t max_data = vmaxq_f32(vmaxq_f32(top_data, bottom_data), middle_data);
929 res = vpmax_f32(vget_high_f32(vsetq_lane_f32(-std::numeric_limits<float>::max(), max_data, 3)), vget_low_f32(max_data));
930 res = vpmax_f32(res, res);
931 }
Georgios Pinitascdf51452017-08-31 14:21:36 +0100932 final_res = vget_lane_f32(res, 0);
933
934 // Calculate square-root in case of l2 pooling
935 if(pooling_type == PoolingType::L2)
936 {
937 final_res = sqrt(final_res);
938 }
939
940 // Store result
941 *(reinterpret_cast<float *>(output.ptr())) = final_res;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100942 },
943 input, output);
944}
945
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000946template <PoolingType pooling_type, bool exclude_padding>
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100947void NEPoolingLayerKernel::pooling7_f32(const Window &window_input, const Window &window)
948{
949 Iterator input(_input, window_input);
950 Iterator output(_output, window);
951
952 constexpr const int pool_size = 7;
953 int pool_pad_x = 0;
954 int pool_pad_y = 0;
955 int pool_stride_x = 0;
956 int pool_stride_y = 0;
957 std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad();
958 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000959 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_x);
960 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_y);
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100961
962 std::array<const uint8_t *, pool_size> input_ptrs{ {} };
963 for(int i = 0; i < pool_size; ++i)
964 {
965 input_ptrs[i] = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + i));
966 }
967
968 execute_window_loop(window, [&](const Coordinates & id)
969 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100970 float32x2_t res = {};
971 float final_res = 0.f;
972 if(pooling_type != PoolingType::MAX)
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100973 {
974 // Calculate scale
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000975 float scale = calculate_avg_scale<exclude_padding>(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y);
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100976 const float32x2_t scale_v = vdup_n_f32(scale);
977
978 // Perform pooling
Georgios Pinitascdf51452017-08-31 14:21:36 +0100979 float32x4x2_t data = vld2q_f32(reinterpret_cast<const float *>(input_ptrs[0] + input.offset()));
980 // Get power of 2 in case of l2 pooling
981 if(pooling_type == PoolingType::L2)
982 {
983 data.val[0] = vmulq_f32(data.val[0], data.val[0]);
984 data.val[1] = vmulq_f32(data.val[1], data.val[1]);
985 }
986 float32x4_t sum_data = vaddq_f32(data.val[0], vsetq_lane_f32(0.f, data.val[1], 3));
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100987 for(int i = 1; i < pool_size; ++i)
988 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100989 data = vld2q_f32(reinterpret_cast<const float *>(input_ptrs[i] + input.offset()));
990 // Get power of 2 in case of l2 pooling
991 if(pooling_type == PoolingType::L2)
992 {
993 data.val[0] = vmulq_f32(data.val[0], data.val[0]);
994 data.val[1] = vmulq_f32(data.val[1], data.val[1]);
995 }
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100996 sum_data = vaddq_f32(sum_data, data.val[0]);
997 sum_data = vaddq_f32(sum_data, vsetq_lane_f32(0.f, data.val[1], 3));
998 }
999 res = vpadd_f32(vget_high_f32(sum_data), vget_low_f32(sum_data));
1000 res = vmul_f32(vpadd_f32(res, res), scale_v);
1001 }
1002 else
1003 {
1004 float32x4x2_t max_data = vld2q_f32(reinterpret_cast<const float *>(input_ptrs[0] + input.offset()));
1005 for(int i = 1; i < pool_size; ++i)
1006 {
1007 const float32x4x2_t data = vld2q_f32(reinterpret_cast<const float *>(input_ptrs[i] + input.offset()));
1008 max_data = vmax2q_f32(max_data, data);
1009 }
1010 res = vpmax_f32(vget_high_f32(vsetq_lane_f32(-std::numeric_limits<float>::max(), max_data.val[1], 3)), vget_low_f32(max_data.val[1]));
1011 res = vpmax_f32(res, vpmax_f32(vget_high_f32(max_data.val[0]), vget_low_f32(max_data.val[0])));
1012 res = vpmax_f32(res, res);
1013 }
Georgios Pinitascdf51452017-08-31 14:21:36 +01001014 final_res = vget_lane_f32(res, 0);
1015
1016 // Calculate square-root in case of l2 pooling
1017 if(pooling_type == PoolingType::L2)
1018 {
1019 final_res = sqrt(final_res);
1020 }
1021
1022 // Store result
1023 *(reinterpret_cast<float *>(output.ptr())) = final_res;
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001024 },
1025 input, output);
1026}
1027
Georgios Pinitasadaae7e2017-10-30 15:56:32 +00001028template <PoolingType pooling_type, bool exclude_padding>
Gian Marco Iodice16824302017-09-28 15:41:37 +01001029void NEPoolingLayerKernel::poolingN_f32(const Window &window_input, const Window &window)
1030{
1031 Iterator input(_input, window_input);
1032 Iterator output(_output, window);
1033
1034 const int pool_size = _pool_info.pool_size();
1035 int pool_pad_x = 0;
1036 int pool_pad_y = 0;
1037 int pool_stride_x = 0;
1038 int pool_stride_y = 0;
1039 std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad();
1040 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Georgios Pinitasadaae7e2017-10-30 15:56:32 +00001041 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_x);
1042 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_y);
Gian Marco Iodice16824302017-09-28 15:41:37 +01001043
1044 execute_window_loop(window, [&](const Coordinates & id)
1045 {
1046 float res = 0.0f;
1047
1048 if(pooling_type != PoolingType::MAX)
1049 {
1050 // Calculate scale
Georgios Pinitasadaae7e2017-10-30 15:56:32 +00001051 const float scale = calculate_avg_scale<exclude_padding>(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y);
Gian Marco Iodice16824302017-09-28 15:41:37 +01001052
1053 // Perform pooling
1054 float32x4_t vres = vdupq_n_f32(0.0f);
1055
1056 for(int y = 0; y < pool_size; ++y)
1057 {
1058 int x = 0;
1059 for(; x <= (pool_size - 4); x += 4)
1060 {
1061 const float32x4_t data = vld1q_f32(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_x) * _input->info()->strides_in_bytes().x() +
1062 (y - pool_pad_y) * _input->info()->strides_in_bytes().y()));
1063
1064 // Get power of 2 in case of l2 pooling and accumulate
1065 if(pooling_type == PoolingType::L2)
1066 {
1067 vres = vmlaq_f32(vres, data, data);
1068 }
1069 else
1070 {
1071 vres = vaddq_f32(vres, data);
1072 }
1073 }
1074
1075 // Leftover for loop
1076 for(; x < pool_size; ++x)
1077 {
1078 float data = *(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_x) * _input->info()->strides_in_bytes().x() + (y - pool_pad_y) * _input->info()->strides_in_bytes().y()));
1079
1080 // Get power of 2 in case of l2 pooling
1081 if(pooling_type == PoolingType::L2)
1082 {
1083 data *= data;
1084 }
1085
1086 res += data;
1087 }
1088 }
1089
1090#if defined(__aarch64__)
1091 // Reduction operation available on 64 bit architectures only
1092 res += vaddvq_f32(vres);
1093#else // __aarch64__
1094 // Reduction
1095 float32x2_t tmp = vpadd_f32(vget_high_f32(vres), vget_low_f32(vres));
1096 tmp = vpadd_f32(tmp, tmp);
1097
1098 res += vget_lane_f32(tmp, 0);
1099#endif // __aarch64__
1100 // Divide by scale
1101 res *= scale;
1102 }
1103 else
1104 {
1105 float32x4_t vres = vdupq_n_f32(std::numeric_limits<float>::min());
1106 res = std::numeric_limits<float>::min();
1107
1108 for(int y = 0; y < pool_size; ++y)
1109 {
1110 int x = 0;
1111 for(; x <= (pool_size - 4); x += 4)
1112 {
1113 const float32x4_t data = vld1q_f32(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_x) * _input->info()->strides_in_bytes().x() +
1114 (y - pool_pad_y) * _input->info()->strides_in_bytes().y()));
1115 vres = vmaxq_f32(vres, data);
1116 }
1117
1118 // Leftover for loop
1119 for(; x < pool_size; ++x)
1120 {
1121 const float data = *(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_x) * _input->info()->strides_in_bytes().x() + (y - pool_pad_y) * _input->info()->strides_in_bytes().y()));
1122 res = std::max(res, data);
1123 }
1124 }
1125
1126#if defined(__aarch64__)
1127 // Reduction operation available on 64 bit architectures only
1128 res = std::max(vmaxvq_f32(vres), res);
1129#else // __aarch64__
1130 float32x2_t tmp = vpmax_f32(vget_high_f32(vres), vget_low_f32(vres));
1131 tmp = vpmax_f32(tmp, tmp);
1132
1133 res = std::max(res, vget_lane_f32(tmp, 0));
1134#endif // __aarch64__
1135 }
1136
1137 // Calculate square-root in case of l2 pooling
1138 if(pooling_type == PoolingType::L2)
1139 {
1140 res = std::sqrt(res);
1141 }
1142
1143 // Store result
1144 *(reinterpret_cast<float *>(output.ptr())) = res;
1145 },
1146 input, output);
1147}
1148
Moritz Pflanzerc186b572017-09-07 09:48:04 +01001149void NEPoolingLayerKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001150{
Moritz Pflanzerc186b572017-09-07 09:48:04 +01001151 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001152 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
1153 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
1154 ARM_COMPUTE_ERROR_ON(_func == nullptr);
1155
Pablo Tello0c34fe22017-06-26 17:17:42 +01001156 const unsigned int pool_stride_x = _pool_info.pad_stride_info().stride().first;
1157 const unsigned int pool_stride_y = _pool_info.pad_stride_info().stride().second;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001158
1159 // Set step for input in x and y direction for the input
1160 Window window_input(window);
1161 unsigned int window_x_inc = 0;
Pablo Tello0c34fe22017-06-26 17:17:42 +01001162 switch(_input->info()->data_type())
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001163 {
Pablo Tello0c34fe22017-06-26 17:17:42 +01001164 case DataType::QS8:
Michalis Spyroubbd9fb92017-06-22 12:57:51 +01001165 case DataType::QS16:
Pablo Tello0c34fe22017-06-26 17:17:42 +01001166 case DataType::F16:
1167 {
1168 window_x_inc = (pool_stride_x == 2) ? _num_elems_processed_per_iteration * 2 : _num_elems_processed_per_iteration;
1169 break;
1170 }
1171 case DataType::F32:
1172 {
1173 window_x_inc = pool_stride_x;
1174 break;
1175 }
1176 default:
1177 {
1178 ARM_COMPUTE_ERROR("Not supported");
1179 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001180 }
1181 window_input.set(Window::DimX, Window::Dimension(window.x().start() * pool_stride_x, window.x().end() * pool_stride_x, window_x_inc));
1182 window_input.set(Window::DimY, Window::Dimension(window.y().start() * pool_stride_y, window.y().end() * pool_stride_y, pool_stride_y));
1183
1184 // Run function
1185 (this->*_func)(window_input, window);
1186}