blob: 0e06704666c8163ca075ab9689c9bc64a2d1c10f [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h"
25
26#include "arm_compute/core/AccessWindowStatic.h"
27#include "arm_compute/core/Error.h"
28#include "arm_compute/core/FixedPoint.h"
29#include "arm_compute/core/Helpers.h"
30#include "arm_compute/core/ITensor.h"
31#include "arm_compute/core/NEON/NEFixedPoint.h"
Georgios Pinitascdf51452017-08-31 14:21:36 +010032#include "arm_compute/core/NEON/NEMath.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010033#include "arm_compute/core/TensorInfo.h"
34#include "arm_compute/core/Utils.h"
35#include "arm_compute/core/Validate.h"
36#include "arm_compute/core/Window.h"
37
38#include <algorithm>
39#include <arm_neon.h>
Georgios Pinitascdf51452017-08-31 14:21:36 +010040#include <cmath>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010041#include <limits>
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +010042#include <set>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010043#include <string>
44#include <tuple>
45
46using namespace arm_compute;
47
48namespace
49{
Georgios Pinitasadaae7e2017-10-30 15:56:32 +000050template <bool exclude_padding>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010051inline float calculate_avg_scale(const Coordinates &id, const int pool_size, const int upper_bound_w, const int upper_bound_h,
52 const int pad_x, const int pad_y, const int stride_x, const int stride_y)
53{
Georgios Pinitasadaae7e2017-10-30 15:56:32 +000054 int start_x = id.x() * stride_x - pad_x;
55 int start_y = id.y() * stride_y - pad_y;
Pablo Tello0c34fe22017-06-26 17:17:42 +010056 const int end_x = std::min(start_x + pool_size, upper_bound_w);
57 const int end_y = std::min(start_y + pool_size, upper_bound_h);
Georgios Pinitasadaae7e2017-10-30 15:56:32 +000058 if(exclude_padding)
59 {
60 start_x = std::max(0, start_x);
61 start_y = std::max(0, start_y);
62 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +010063 return 1.f / ((end_y - start_y) * (end_x - start_x));
64}
65
66inline qint8_t calculate_avg_scale_q8(const Coordinates &id, int pool_size, int upper_bound_w, int upper_bound_h,
67 int pad_x, int pad_y, int stride_x, int stride_y, int fixed_point_position)
68{
Pablo Tello0c34fe22017-06-26 17:17:42 +010069 static const std::array<qint8_t, 10> scale_values_q8 =
Anthony Barbier6ff3b192017-09-04 18:44:23 +010070 { { 0x0, 0x0, 0x40, 0x2A, 0x20, 0x19, 0x15, 0x12, 0x10, 0xE } };
71 const int start_x = id.x() * stride_x - pad_x;
72 const int start_y = id.y() * stride_y - pad_y;
73 const int end_x = std::min(start_x + pool_size, upper_bound_w);
74 const int end_y = std::min(start_y + pool_size, upper_bound_h);
75 const int val = ((end_y - start_y) * (end_x - start_x));
Michalis Spyroubbd9fb92017-06-22 12:57:51 +010076 return sshr_qs8(scale_values_q8[val], (7 - fixed_point_position));
77}
78
79inline qint16_t calculate_avg_scale_q16(const Coordinates &id, int pool_size, int upper_bound_w, int upper_bound_h,
80 int pad_x, int pad_y, int stride_x, int stride_y, int fixed_point_position)
81{
82 static std::array<qint16_t, 10> scale_values_q16 =
83 { { 0x0, 0x0, 0x4000, 0x2AAB, 0x2000, 0x199A, 0x1555, 0x1249, 0x1000, 0xE38 } };
84 const int start_x = id.x() * stride_x - pad_x;
85 const int start_y = id.y() * stride_y - pad_y;
86 const int end_x = std::min(start_x + pool_size, upper_bound_w);
87 const int end_y = std::min(start_y + pool_size, upper_bound_h);
88 const int val = ((end_y - start_y) * (end_x - start_x));
89 return sshr_qs16(scale_values_q16[val], (15 - fixed_point_position));
Anthony Barbier6ff3b192017-09-04 18:44:23 +010090}
91} // namespace
92
93NEPoolingLayerKernel::NEPoolingLayerKernel()
94 : _func(nullptr), _input(nullptr), _output(nullptr), _pool_info(), _num_elems_processed_per_iteration(0), _border_size(0)
95{
96}
97
98BorderSize NEPoolingLayerKernel::border_size() const
99{
100 return _border_size;
101}
102
103void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info)
104{
Georgios Pinitas4c2dd542017-11-13 12:58:41 +0000105 int pool_pad_x = 0;
106 int pool_pad_y = 0;
107 int pool_stride_x = 0;
108 int pool_stride_y = 0;
109 unsigned int pooled_w = 0;
110 unsigned int pooled_h = 0;
111 PoolingType pool_type = pool_info.pool_type();
112 int pool_size = pool_info.pool_size();
113 const PadStrideInfo pad_stride_info = pool_info.pad_stride_info();
114 const bool exclude_padding = pool_info.exclude_padding();
115 const bool is_global_pooling = pool_info.is_global_pooling();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100116 std::tie(pool_pad_x, pool_pad_y) = pad_stride_info.pad();
117 std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
118
Gian Marco Iodice16824302017-09-28 15:41:37 +0100119 static const std::set<int> supported_pool_sizes = { 2, 3 };
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100120 ARM_COMPUTE_UNUSED(supported_pool_sizes);
121
Georgios Pinitas1dad50e2017-07-03 17:51:34 +0100122 ARM_COMPUTE_ERROR_ON_NULLPTR(output);
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100123 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
Georgios Pinitascdf51452017-08-31 14:21:36 +0100124 ARM_COMPUTE_ERROR_ON(pool_type == PoolingType::L2 && is_data_type_fixed_point(input->info()->data_type()));
Gian Marco Iodice16824302017-09-28 15:41:37 +0100125 ARM_COMPUTE_ERROR_ON((supported_pool_sizes.find(pool_size) == supported_pool_sizes.end()) && (input->info()->data_type() != DataType::F32));
Georgios Pinitas4c2dd542017-11-13 12:58:41 +0000126 ARM_COMPUTE_ERROR_ON(!is_global_pooling && (pool_pad_x >= pool_size || pool_pad_y >= pool_size));
127 ARM_COMPUTE_ERROR_ON(is_global_pooling && (input->info()->tensor_shape().x() != input->info()->tensor_shape().y()));
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100128 ARM_COMPUTE_ERROR_ON(is_data_type_fixed_point(input->info()->data_type()) && pool_stride_x > 2);
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000129 ARM_COMPUTE_ERROR_ON(exclude_padding && is_data_type_fixed_point(input->info()->data_type()));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100130
Georgios Pinitas4c2dd542017-11-13 12:58:41 +0000131 // Update pool size in case of global pooling
132 pool_size = is_global_pooling ? input->info()->dimension(0) : pool_size;
133
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100134 // Check output dimensions
Georgios Pinitas4c2dd542017-11-13 12:58:41 +0000135 std::tie(pooled_w, pooled_h) = scaled_dimensions(input->info()->dimension(0),
136 input->info()->dimension(1),
137 pool_size,
138 pool_size,
139 pool_info.pad_stride_info());
Georgios Pinitas1dad50e2017-07-03 17:51:34 +0100140
141 // Output auto initialization if not yet initialized
142 {
143 TensorShape output_shape{ input->info()->tensor_shape() };
144 output_shape.set(0, pooled_w);
145 output_shape.set(1, pooled_h);
146
147 auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
148 }
149
150 ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
151 ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100152 ARM_COMPUTE_ERROR_ON((output->info()->dimension(0) != pooled_w) || (output->info()->dimension(1) != pooled_h));
153
154 unsigned int num_elems_read_per_iteration = 0;
155 unsigned int num_elems_processed_per_iteration = 0;
156 unsigned int num_elems_horizontal_window = 0;
157
158 // Select element size
159 switch(input->info()->data_type())
160 {
161 case DataType::QS8:
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100162 num_elems_read_per_iteration = 16;
163 switch(pool_size)
164 {
165 case 2:
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100166 num_elems_processed_per_iteration = (pool_stride_x == 2) ? 8 : 15;
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100167 break;
168 case 3:
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100169 num_elems_processed_per_iteration = (pool_stride_x == 2) ? 7 : 14;
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100170 break;
171 default:
172 ARM_COMPUTE_ERROR("Pooling size not supported");
Pablo Tello0c34fe22017-06-26 17:17:42 +0100173 break;
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100174 }
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100175 num_elems_horizontal_window = (pool_stride_x == 2) ? 8 : 16;
176 break;
177 case DataType::QS16:
178 num_elems_read_per_iteration = 8;
179 switch(pool_size)
180 {
181 case 2:
182 num_elems_processed_per_iteration = (pool_stride_x == 2) ? 4 : 7;
183 break;
184 case 3:
185 num_elems_processed_per_iteration = (pool_stride_x == 2) ? 3 : 6;
186 break;
187 default:
188 ARM_COMPUTE_ERROR("Pooling size not supported");
189 }
190 num_elems_horizontal_window = (pool_stride_x == 2) ? 4 : 8;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100191 break;
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000192#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tello0c34fe22017-06-26 17:17:42 +0100193 case DataType::F16:
194 switch(pool_size)
195 {
196 case 2:
197 num_elems_read_per_iteration = 16;
198 num_elems_processed_per_iteration = 8;
199 num_elems_horizontal_window = 8;
200 break;
201 case 3:
202 num_elems_read_per_iteration = 4;
203 num_elems_processed_per_iteration = 1;
204 num_elems_horizontal_window = 1;
205 break;
206 default:
207 ARM_COMPUTE_ERROR("Pooling size not supported");
208 break;
209 }
210 break;
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000211#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100212 case DataType::F32:
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100213 switch(pool_size)
214 {
215 case 2:
216 num_elems_read_per_iteration = 2;
217 break;
218 case 3:
219 num_elems_read_per_iteration = 4; // We use vload4 for pooling3
220 break;
221 case 7:
222 num_elems_read_per_iteration = 8; // We use vload8 for pooling7
223 break;
224 default:
Gian Marco Iodice16824302017-09-28 15:41:37 +0100225 num_elems_read_per_iteration = 1; // We use vload4 for poolingN but with a leftover for loop
Pablo Tello0c34fe22017-06-26 17:17:42 +0100226 break;
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100227 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100228 num_elems_processed_per_iteration = 1;
229 num_elems_horizontal_window = 1;
230 break;
231 default:
232 ARM_COMPUTE_ERROR("Element size not supported");
233 break;
234 }
235
236 _num_elems_processed_per_iteration = num_elems_processed_per_iteration;
237 const int input_width = input->info()->dimension(0);
238 const int input_height = input->info()->dimension(1);
239 const int upper_bound_w = ((pooled_w - 1) * pool_stride_x - pool_pad_x + num_elems_read_per_iteration) - input_width;
240 const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_y + pool_size) - input_height;
241
242 // Set instance variables
243 _input = input;
244 _output = output;
245 _pool_info = pool_info;
246 _border_size = BorderSize(pool_pad_y, pool_pad_x);
247 _border_size.right = std::max(upper_bound_w, pool_pad_x);
248 _border_size.bottom = std::max(upper_bound_h, pool_pad_y);
249
250 // Select appropriate function
251 switch(pool_size)
252 {
253 case 2:
254 if(input->info()->data_type() == DataType::QS8)
255 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100256 switch(pool_type)
257 {
258 case PoolingType::AVG:
259 _func = &NEPoolingLayerKernel::pooling2_q8<PoolingType::AVG>;
260 break;
261 case PoolingType::MAX:
262 _func = &NEPoolingLayerKernel::pooling2_q8<PoolingType::MAX>;
263 break;
264 default:
265 ARM_COMPUTE_ERROR("Unsupported pooling type!");
266 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100267 }
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100268 else if(input->info()->data_type() == DataType::QS16)
269 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100270 switch(pool_type)
271 {
272 case PoolingType::AVG:
273 _func = &NEPoolingLayerKernel::pooling2_q16<PoolingType::AVG>;
274 break;
275 case PoolingType::MAX:
276 _func = &NEPoolingLayerKernel::pooling2_q16<PoolingType::MAX>;
277 break;
278 default:
279 ARM_COMPUTE_ERROR("Unsupported pooling type!");
280 }
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100281 }
Pablo Tello0c34fe22017-06-26 17:17:42 +0100282 else if(input->info()->data_type() == DataType::F16)
283 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100284 switch(pool_type)
285 {
286 case PoolingType::AVG:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000287 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling2_f16<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling2_f16<PoolingType::AVG, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100288 break;
289 case PoolingType::L2:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000290 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling2_f16<PoolingType::L2, true> : &NEPoolingLayerKernel::pooling2_f16<PoolingType::L2, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100291 break;
292 case PoolingType::MAX:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000293 _func = &NEPoolingLayerKernel::pooling2_f16<PoolingType::MAX, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100294 break;
295 default:
296 ARM_COMPUTE_ERROR("Unsupported pooling type!");
297 }
Pablo Tello0c34fe22017-06-26 17:17:42 +0100298 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100299 else if(input->info()->data_type() == DataType::F32)
300 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100301 switch(pool_type)
302 {
303 case PoolingType::AVG:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000304 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling2_f32<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling2_f32<PoolingType::AVG, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100305 break;
306 case PoolingType::L2:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000307 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling2_f32<PoolingType::L2, true> : &NEPoolingLayerKernel::pooling2_f32<PoolingType::L2, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100308 break;
309 case PoolingType::MAX:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000310 _func = &NEPoolingLayerKernel::pooling2_f32<PoolingType::MAX, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100311 break;
312 default:
313 ARM_COMPUTE_ERROR("Unsupported pooling type!");
314 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100315 }
316 break;
317 case 3:
318 if(input->info()->data_type() == DataType::QS8)
319 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100320 switch(pool_type)
321 {
322 case PoolingType::AVG:
323 _func = &NEPoolingLayerKernel::pooling3_q8<PoolingType::AVG>;
324 break;
325 case PoolingType::MAX:
326 _func = &NEPoolingLayerKernel::pooling3_q8<PoolingType::MAX>;
327 break;
328 default:
329 ARM_COMPUTE_ERROR("Unsupported pooling type!");
330 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100331 }
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100332 else if(input->info()->data_type() == DataType::QS16)
333 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100334 switch(pool_type)
335 {
336 case PoolingType::AVG:
337 _func = &NEPoolingLayerKernel::pooling3_q16<PoolingType::AVG>;
338 break;
339 case PoolingType::MAX:
340 _func = &NEPoolingLayerKernel::pooling3_q16<PoolingType::MAX>;
341 break;
342 default:
343 ARM_COMPUTE_ERROR("Unsupported pooling type!");
344 }
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100345 }
Pablo Tello0c34fe22017-06-26 17:17:42 +0100346 else if(input->info()->data_type() == DataType::F16)
347 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100348 switch(pool_type)
349 {
350 case PoolingType::AVG:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000351 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_f16<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling3_f16<PoolingType::AVG, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100352 break;
353 case PoolingType::L2:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000354 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_f16<PoolingType::L2, true> : &NEPoolingLayerKernel::pooling3_f16<PoolingType::L2, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100355 break;
356 case PoolingType::MAX:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000357 _func = &NEPoolingLayerKernel::pooling3_f16<PoolingType::MAX, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100358 break;
359 default:
360 ARM_COMPUTE_ERROR("Unsupported pooling type!");
361 }
Pablo Tello0c34fe22017-06-26 17:17:42 +0100362 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100363 else if(input->info()->data_type() == DataType::F32)
364 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100365 switch(pool_type)
366 {
367 case PoolingType::AVG:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000368 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_f32<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling3_f32<PoolingType::AVG, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100369 break;
370 case PoolingType::L2:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000371 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_f32<PoolingType::L2, true> : &NEPoolingLayerKernel::pooling3_f32<PoolingType::L2, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100372 break;
373 case PoolingType::MAX:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000374 _func = &NEPoolingLayerKernel::pooling3_f32<PoolingType::MAX, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100375 break;
376 default:
377 ARM_COMPUTE_ERROR("Unsupported pooling type!");
378 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100379 }
380 break;
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100381 case 7:
Georgios Pinitascdf51452017-08-31 14:21:36 +0100382 switch(pool_type)
383 {
384 case PoolingType::AVG:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000385 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling7_f32<PoolingType::AVG, true> : &NEPoolingLayerKernel::pooling7_f32<PoolingType::AVG, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100386 break;
387 case PoolingType::L2:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000388 _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling7_f32<PoolingType::L2, true> : &NEPoolingLayerKernel::pooling7_f32<PoolingType::L2, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100389 break;
390 case PoolingType::MAX:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000391 _func = &NEPoolingLayerKernel::pooling7_f32<PoolingType::MAX, false>;
Georgios Pinitascdf51452017-08-31 14:21:36 +0100392 break;
393 default:
394 ARM_COMPUTE_ERROR("Unsupported pooling type!");
395 }
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100396 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100397 default:
Gian Marco Iodice16824302017-09-28 15:41:37 +0100398 switch(pool_type)
399 {
400 case PoolingType::AVG:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000401 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingN_f32<PoolingType::AVG, true> : &NEPoolingLayerKernel::poolingN_f32<PoolingType::AVG, false>;
Gian Marco Iodice16824302017-09-28 15:41:37 +0100402 break;
403 case PoolingType::L2:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000404 _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingN_f32<PoolingType::L2, true> : &NEPoolingLayerKernel::poolingN_f32<PoolingType::L2, false>;
Gian Marco Iodice16824302017-09-28 15:41:37 +0100405 break;
406 case PoolingType::MAX:
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000407 _func = &NEPoolingLayerKernel::poolingN_f32<PoolingType::MAX, false>;
Gian Marco Iodice16824302017-09-28 15:41:37 +0100408 break;
409 default:
410 ARM_COMPUTE_ERROR("Unsupported pooling type!");
411 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100412 break;
413 }
414
415 // Configure kernel window
416 Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
417 AccessWindowStatic input_access(input->info(), -pool_pad_x, -pool_pad_y, input_width + _border_size.right, input_height + _border_size.bottom);
418 AccessWindowHorizontal output_access(output->info(), 0, num_elems_horizontal_window);
419 update_window_and_padding(win, input_access, output_access);
420 output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
421 INEKernel::configure(win);
422}
423
424template <PoolingType pooling_type>
425void NEPoolingLayerKernel::pooling2_q8(const Window &window_input, const Window &window)
426{
427 Iterator input(_input, window_input);
428 Iterator output(_output, window);
429
430 const int fixed_point_position = _input->info()->fixed_point_position();
431 constexpr int pool_size = 2;
432 int pool_pad_x = 0;
433 int pool_pad_y = 0;
434 int pool_stride_x = 0;
435 int pool_stride_y = 0;
436 std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad();
437 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
438 const int upper_bound_w = _input->info()->dimension(0) + pool_pad_x;
439 const int upper_bound_h = _input->info()->dimension(1) + pool_pad_y;
440
441 const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y)));
442 const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 1));
443
444 execute_window_loop(window, [&](const Coordinates & id)
445 {
446 const auto top_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_top_ptr + input.offset()));
447 const auto bottom_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_bottom_ptr + input.offset()));
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100448 qint8x8_t lower_res = {};
449 qint8x8_t upper_res = {};
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100450 if(pooling_type == PoolingType::AVG)
451 {
452 // Calculate scale
453 const qint8_t scale = calculate_avg_scale_q8(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y, fixed_point_position);
454 const qint8x8_t scale_vec = vdup_n_qs8(scale);
455
456 // Perform pooling
457 const qint8x16_t sum_data = vqaddq_qs8(top_data, bottom_data);
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100458 lower_res = vqmul_qs8(vpadd_s8(vget_low_s8(sum_data), vget_high_s8(sum_data)), scale_vec, fixed_point_position);
459 if(pool_stride_x == 1)
460 {
461 const qint8x16_t sum_data_shifted = vextq_s8(sum_data, sum_data, 1);
462 upper_res = vqmul_qs8(vpadd_s8(vget_low_s8(sum_data_shifted), vget_high_s8(sum_data_shifted)), scale_vec, fixed_point_position);
463 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100464 }
465 else
466 {
467 const qint8x16_t max_data = vmaxq_s8(top_data, bottom_data);
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100468 lower_res = vpmax_s8(vget_low_s8(max_data), vget_high_s8(max_data));
469 if(pool_stride_x == 1)
470 {
471 const qint8x16_t max_data_shifted = vextq_s8(max_data, max_data, 1);
472 upper_res = vpmax_s8(vget_low_s8(max_data_shifted), vget_high_s8(max_data_shifted));
473 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100474 }
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100475 if(pool_stride_x == 1)
476 {
Georgios Pinitasdc460f12017-08-24 19:02:44 +0100477 const qint8x8x2_t res = { { lower_res, upper_res } };
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100478 vst2_s8(reinterpret_cast<qint8_t *>(output.ptr()), res);
479 }
480 else
481 {
482 vst1_qs8(reinterpret_cast<qint8_t *>(output.ptr()), lower_res);
483 }
484 },
485 input, output);
486}
487
488template <PoolingType pooling_type>
489void NEPoolingLayerKernel::pooling2_q16(const Window &window_input, const Window &window)
490{
491 Iterator input(_input, window_input);
492 Iterator output(_output, window);
493
494 const int fixed_point_position = _input->info()->fixed_point_position();
495 constexpr int pool_size = 2;
496 int pool_pad_x = 0;
497 int pool_pad_y = 0;
498 int pool_stride_x = 0;
499 int pool_stride_y = 0;
500 std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad();
501 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
502 const int upper_bound_w = _input->info()->dimension(0) + pool_pad_x;
503 const int upper_bound_h = _input->info()->dimension(1) + pool_pad_y;
504
505 const unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y)));
506 const unsigned char *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 1));
507
508 execute_window_loop(window, [&](const Coordinates & id)
509 {
510 const auto top_data = vld1q_qs16(reinterpret_cast<const qint16_t *>(input_top_ptr + input.offset()));
511 const auto bottom_data = vld1q_qs16(reinterpret_cast<const qint16_t *>(input_bottom_ptr + input.offset()));
512 qint16x4_t lower_res = {};
513 qint16x4_t upper_res = {};
514 if(pooling_type == PoolingType::AVG)
515 {
516 // Calculate scale
517 const qint16_t scale = calculate_avg_scale_q16(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y, fixed_point_position);
518 const qint16x4_t scale_vec = vdup_n_qs16(scale);
519
520 // Perform pooling
521 const qint16x8_t sum_data = vqaddq_qs16(top_data, bottom_data);
522 lower_res = vqmul_qs16(vpadd_s16(vget_low_s16(sum_data), vget_high_s16(sum_data)), scale_vec, fixed_point_position);
523 if(pool_stride_x == 1)
524 {
525 const qint16x8_t sum_data_shifted = vextq_s16(sum_data, sum_data, 1);
526 upper_res = vqmul_qs16(vpadd_s16(vget_low_s16(sum_data_shifted), vget_high_s16(sum_data_shifted)), scale_vec, fixed_point_position);
527 }
528 }
529 else
530 {
531 const qint16x8_t max_data = vmaxq_s16(top_data, bottom_data);
532 lower_res = vpmax_s16(vget_low_s16(max_data), vget_high_s16(max_data));
533 if(pool_stride_x == 1)
534 {
535 const qint16x8_t max_data_shifted = vextq_s16(max_data, max_data, 1);
536 upper_res = vpmax_s16(vget_low_s16(max_data_shifted), vget_high_s16(max_data_shifted));
537 }
538 }
539 if(pool_stride_x == 1)
540 {
Georgios Pinitasdc460f12017-08-24 19:02:44 +0100541 const qint16x4x2_t res = { { lower_res, upper_res } };
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100542 vst2_s16(reinterpret_cast<qint16_t *>(output.ptr()), res);
543 }
544 else
545 {
546 vst1_qs16(reinterpret_cast<qint16_t *>(output.ptr()), lower_res);
547 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100548 },
549 input, output);
550}
551
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000552template <PoolingType pooling_type, bool exclude_padding>
Pablo Tello0c34fe22017-06-26 17:17:42 +0100553void NEPoolingLayerKernel::pooling3_f16(const Window &window_input, const Window &window)
554{
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000555#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tello0c34fe22017-06-26 17:17:42 +0100556 Iterator input(_input, window_input);
557 Iterator output(_output, window);
558
559 constexpr const int pool_size = 3;
560 int pool_pad_x = 0;
561 int pool_pad_y = 0;
562 int pool_stride_x = 0;
563 int pool_stride_y = 0;
564 std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad();
565 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000566 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_x);
567 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_y);
Pablo Tello0c34fe22017-06-26 17:17:42 +0100568
569 const unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y)));
570 const unsigned char *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 1));
571 const unsigned char *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 2));
572
573 execute_window_loop(window, [&](const Coordinates & id)
574 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100575 float16x4_t top_data = vld1_f16(reinterpret_cast<const float16_t *>(input_top_ptr + input.offset()));
576 float16x4_t middle_data = vld1_f16(reinterpret_cast<const float16_t *>(input_middle_ptr + input.offset()));
577 float16x4_t bottom_data = vld1_f16(reinterpret_cast<const float16_t *>(input_bottom_ptr + input.offset()));
578 float16x4_t res = {};
579
580 // Get power of 2 in case of l2 pooling
581 if(pooling_type == PoolingType::L2)
582 {
583 top_data = vmul_f16(top_data, top_data);
584 middle_data = vmul_f16(middle_data, middle_data);
585 bottom_data = vmul_f16(bottom_data, bottom_data);
586 }
587
588 if(pooling_type != PoolingType::MAX)
Pablo Tello0c34fe22017-06-26 17:17:42 +0100589 {
590 // Calculate scale
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000591 const float scale = calculate_avg_scale<exclude_padding>(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y);
Pablo Tello0c34fe22017-06-26 17:17:42 +0100592 const float16x4_t scale_v = vdup_n_f16(scale);
593 // Perform pooling
594 const float16x4_t sum_data = vadd_f16(vadd_f16(top_data, bottom_data), middle_data);
595 res = vpadd_f16(vset_lane_f16(0.f, sum_data, 3), sum_data);
596 res = vmul_f16(vpadd_f16(res, res), scale_v);
597 }
598 else
599 {
600 const float16x4_t max_data = vmax_f16(vmax_f16(top_data, bottom_data), middle_data);
601 res = vpmax_f16(vset_lane_f16(-std::numeric_limits<float>::max(), max_data, 3), max_data);
602 res = vpmax_f16(res, res);
603 }
Georgios Pinitascdf51452017-08-31 14:21:36 +0100604
605 // Calculate square-root in case of l2 pooling
606 if(pooling_type == PoolingType::L2)
607 {
608 res = vinv_f16(vinvsqrt_f16(res));
609 }
610
Pablo Tello0c34fe22017-06-26 17:17:42 +0100611 *(reinterpret_cast<float16_t *>(output.ptr())) = vget_lane_f16(res, 0);
612 },
613 input, output);
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000614#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tello0c34fe22017-06-26 17:17:42 +0100615 ARM_COMPUTE_UNUSED(window_input);
616 ARM_COMPUTE_UNUSED(window);
617 ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a");
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000618#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tello0c34fe22017-06-26 17:17:42 +0100619}
620
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000621template <PoolingType pooling_type, bool exclude_padding>
Pablo Tello0c34fe22017-06-26 17:17:42 +0100622void NEPoolingLayerKernel::pooling2_f16(const Window &window_input, const Window &window)
623{
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000624#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tello0c34fe22017-06-26 17:17:42 +0100625 Iterator input(_input, window_input);
626 Iterator output(_output, window);
627 constexpr int pool_size = 2;
628 int pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y = 0;
629 std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad();
630 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000631 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_x);
632 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_y);
Pablo Tello0c34fe22017-06-26 17:17:42 +0100633
634 const unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y)));
635 const unsigned char *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 1));
636
637 execute_window_loop(window, [&](const Coordinates & id)
638 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100639 auto top_data = vld2q_f16(reinterpret_cast<const float16_t *>(input_top_ptr + input.offset()));
640 auto bottom_data = vld2q_f16(reinterpret_cast<const float16_t *>(input_bottom_ptr + input.offset()));
Pablo Tello0c34fe22017-06-26 17:17:42 +0100641 float16x8_t res = {};
642
Georgios Pinitascdf51452017-08-31 14:21:36 +0100643 // Get power of 2 in case of l2 pooling
644 if(pooling_type == PoolingType::L2)
645 {
646 top_data.val[0] = vmulq_f16(top_data.val[0], top_data.val[0]);
647 top_data.val[1] = vmulq_f16(top_data.val[1], top_data.val[1]);
648 bottom_data.val[0] = vmulq_f16(bottom_data.val[0], bottom_data.val[0]);
649 bottom_data.val[1] = vmulq_f16(bottom_data.val[1], bottom_data.val[1]);
650 }
651
652 if(pooling_type != PoolingType::MAX)
Pablo Tello0c34fe22017-06-26 17:17:42 +0100653 {
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000654 const float scale = calculate_avg_scale<exclude_padding>(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y);
Pablo Tello0c34fe22017-06-26 17:17:42 +0100655 const float16x8_t scale_v = vdupq_n_f16(scale);
656 res = vmulq_f16(scale_v, vaddq_f16(bottom_data.val[1], vaddq_f16(bottom_data.val[0], vaddq_f16(top_data.val[0], top_data.val[1]))));
657 }
658 else
659 {
660 res = vmaxq_f16(bottom_data.val[1], vmaxq_f16(bottom_data.val[0], vmaxq_f16(top_data.val[0], top_data.val[1])));
661 }
Georgios Pinitascdf51452017-08-31 14:21:36 +0100662
663 // Calculate square-root in case of l2 pooling
664 if(pooling_type == PoolingType::L2)
665 {
666 res = vinvq_f16(vinvsqrtq_f16(res));
667 }
668
669 // Store result
Pablo Tello0c34fe22017-06-26 17:17:42 +0100670 vst1q_f16(reinterpret_cast<float16_t *>(output.ptr()), res);
671 },
672 input, output);
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000673#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tello0c34fe22017-06-26 17:17:42 +0100674 ARM_COMPUTE_UNUSED(window_input);
675 ARM_COMPUTE_UNUSED(window);
676 ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a");
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000677#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tello0c34fe22017-06-26 17:17:42 +0100678}
679
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000680template <PoolingType pooling_type, bool exclude_padding>
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100681void NEPoolingLayerKernel::pooling2_f32(const Window &window_input, const Window &window)
682{
683 Iterator input(_input, window_input);
684 Iterator output(_output, window);
685
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100686 constexpr int pool_size = 2;
687 int pool_pad_x = 0;
688 int pool_pad_y = 0;
689 int pool_stride_x = 0;
690 int pool_stride_y = 0;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100691 std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad();
692 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000693 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_x);
694 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_y);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100695
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100696 const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y)));
697 const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100698
699 execute_window_loop(window, [&](const Coordinates & id)
700 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100701 float32x2_t top_data = vld1_f32(reinterpret_cast<const float *>(input_top_ptr + input.offset()));
702 float32x2_t bottom_data = vld1_f32(reinterpret_cast<const float *>(input_bottom_ptr + input.offset()));
703 float32x2_t res = {};
704 float final_res = 0;
705
706 // Get power of 2 in case of l2 pooling
707 if(pooling_type == PoolingType::L2)
708 {
709 top_data = vmul_f32(top_data, top_data);
710 bottom_data = vmul_f32(bottom_data, bottom_data);
711 }
712
713 if(pooling_type != PoolingType::MAX)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100714 {
715 // Calculate scale
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000716 float scale = calculate_avg_scale<exclude_padding>(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100717 const float32x2_t scale_v = vdup_n_f32(scale);
718
719 // Perform pooling
720 const float32x2_t sum_data = vadd_f32(top_data, bottom_data);
721 res = vmul_f32(vpadd_f32(sum_data, sum_data), scale_v);
722 }
723 else
724 {
725 const float32x2_t max_data = vmax_f32(top_data, bottom_data);
726 res = vpmax_f32(max_data, max_data);
727 }
Georgios Pinitascdf51452017-08-31 14:21:36 +0100728 final_res = vget_lane_f32(res, 0);
729
730 // Calculate square-root in case of l2 pooling
731 if(pooling_type == PoolingType::L2)
732 {
733 final_res = sqrt(final_res);
734 }
735
736 // Store result
737 *(reinterpret_cast<float *>(output.ptr())) = final_res;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100738 },
739 input, output);
740}
741
742template <PoolingType pooling_type>
743void NEPoolingLayerKernel::pooling3_q8(const Window &window_input, const Window &window)
744{
745 Iterator input(_input, window_input);
746 Iterator output(_output, window);
747
748 const int fixed_point_position = _input->info()->fixed_point_position();
749 constexpr int pool_size = 3;
750 int pool_pad_x = 0;
751 int pool_pad_y = 0;
752 int pool_stride_x = 0;
753 int pool_stride_y = 0;
754 std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad();
755 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
756 const int upper_bound_w = _input->info()->dimension(0) + pool_pad_x;
757 const int upper_bound_h = _input->info()->dimension(1) + pool_pad_y;
758
759 const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y)));
760 const uint8_t *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 1));
761 const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 2));
762
763 execute_window_loop(window, [&](const Coordinates & id)
764 {
765 const auto top_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_top_ptr + input.offset()));
766 const auto middle_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_middle_ptr + input.offset()));
767 const auto bottom_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_bottom_ptr + input.offset()));
768 qint8x8_t res = {};
769 if(pooling_type == PoolingType::AVG)
770 {
771 // Calculate scale
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100772 const qint8_t scale = calculate_avg_scale_q8(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y, fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100773
774 // Perform pooling for stride 2
775 const qint8x16_t sum_data = vqaddq_qs8(vqaddq_qs8(top_data, bottom_data), middle_data);
776 const qint8x16_t sum_data2 = vextq_s8(sum_data, sum_data, 1);
777 const qint8x16_t sum_data3 = vextq_s8(sum_data, sum_data, 2);
778 const qint8x16_t final_sum = vqaddq_qs8(vqaddq_qs8(sum_data, sum_data2), sum_data3);
779 if(pool_stride_x == 2)
780 {
781 const qint8x8x2_t table = { { vget_low_s8(final_sum), vget_high_s8(final_sum) } };
782 static const qint8x8_t lookup_val = { 0, 2, 4, 6, 8, 10, 12, 14 };
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100783 const qint8x8_t scale_vec = vdup_n_qs8(scale);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100784 res = vtbl2_s8(table, lookup_val);
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100785 res = vqmul_qs8(res, scale_vec, fixed_point_position);
786 vst1_qs8(reinterpret_cast<qint8_t *>(output.ptr()), res);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100787 }
788 else
789 {
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100790 const qint8x16_t scale_vec = vdupq_n_qs8(scale);
791 vst1q_qs8(reinterpret_cast<qint8_t *>(output.ptr()), vqmulq_qs8(final_sum, scale_vec, fixed_point_position));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100792 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100793 }
794 else
795 {
796 const qint8x16_t max_data = vmaxq_s8(vmaxq_s8(top_data, bottom_data), middle_data);
797 const qint8x16_t max_data2 = vextq_s8(max_data, max_data, 1);
798 const qint8x16_t max_data3 = vextq_s8(max_data, max_data, 2);
799 const qint8x16_t final_max = vmaxq_s8(vmaxq_s8(max_data, max_data2), max_data3);
800
801 if(pool_stride_x == 2)
802 {
803 const qint8x8x2_t table = { { vget_low_s8(final_max), vget_high_s8(final_max) } };
804 static const qint8x8_t lookup_val = { 0, 2, 4, 6, 8, 10, 12, 14 };
805 res = vtbl2_s8(table, lookup_val);
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100806 vst1_qs8(reinterpret_cast<qint8_t *>(output.ptr()), res);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100807 }
808 else
809 {
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100810 vst1q_qs8(reinterpret_cast<qint8_t *>(output.ptr()), final_max);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100811 }
812 }
Michalis Spyroubbd9fb92017-06-22 12:57:51 +0100813 },
814 input, output);
815}
816
817template <PoolingType pooling_type>
818void NEPoolingLayerKernel::pooling3_q16(const Window &window_input, const Window &window)
819{
820 Iterator input(_input, window_input);
821 Iterator output(_output, window);
822
823 const int fixed_point_position = _input->info()->fixed_point_position();
824 constexpr int pool_size = 3;
825 int pool_pad_x = 0;
826 int pool_pad_y = 0;
827 int pool_stride_x = 0;
828 int pool_stride_y = 0;
829 std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad();
830 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
831 const int upper_bound_w = _input->info()->dimension(0) + pool_pad_x;
832 const int upper_bound_h = _input->info()->dimension(1) + pool_pad_y;
833
834 const unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y)));
835 const unsigned char *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 1));
836 const unsigned char *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 2));
837
838 execute_window_loop(window, [&](const Coordinates & id)
839 {
840 const auto top_data = vld1q_qs16(reinterpret_cast<const qint16_t *>(input_top_ptr + input.offset()));
841 const auto middle_data = vld1q_qs16(reinterpret_cast<const qint16_t *>(input_middle_ptr + input.offset()));
842 const auto bottom_data = vld1q_qs16(reinterpret_cast<const qint16_t *>(input_bottom_ptr + input.offset()));
843
844 if(pooling_type == PoolingType::AVG)
845 {
846 // Calculate scale
847 const qint16_t scale = calculate_avg_scale_q16(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y, fixed_point_position);
848
849 // Perform pooling for stride 2
850 const qint16x8_t sum_data = vqaddq_qs16(vqaddq_qs16(top_data, bottom_data), middle_data);
851 const qint16x8_t sum_data2 = vextq_s16(sum_data, sum_data, 1);
852 const qint16x8_t sum_data3 = vextq_s16(sum_data, sum_data, 2);
853 const qint16x8_t final_sum = vqaddq_qs16(vqaddq_qs16(sum_data, sum_data2), sum_data3);
854 if(pool_stride_x == 2)
855 {
856 const qint16x4_t tmp = { vgetq_lane_s16(final_sum, 0), vgetq_lane_s16(final_sum, 2), vgetq_lane_s16(final_sum, 4), vgetq_lane_s16(final_sum, 6) };
857 const qint16x4_t scale_vec = vdup_n_qs16(scale);
858 vst1_qs16(reinterpret_cast<qint16_t *>(output.ptr()), vqmul_qs16(tmp, scale_vec, fixed_point_position));
859 }
860 else
861 {
862 const qint16x8_t scale_vec = vdupq_n_qs16(scale);
863 vst1q_qs16(reinterpret_cast<qint16_t *>(output.ptr()), vqmulq_qs16(final_sum, scale_vec, fixed_point_position));
864 }
865 }
866 else
867 {
868 const qint16x8_t max_data = vmaxq_s16(vmaxq_s16(top_data, bottom_data), middle_data);
869 const qint16x8_t max_data2 = vextq_s16(max_data, max_data, 1);
870 const qint16x8_t max_data3 = vextq_s16(max_data, max_data, 2);
871 const qint16x8_t final_max = vmaxq_s16(vmaxq_s16(max_data, max_data2), max_data3);
872
873 if(pool_stride_x == 2)
874 {
875 const qint16x4_t tmp = { vgetq_lane_s16(final_max, 0), vgetq_lane_s16(final_max, 2), vgetq_lane_s16(final_max, 4), vgetq_lane_s16(final_max, 6) };
876 vst1_qs16(reinterpret_cast<qint16_t *>(output.ptr()), tmp);
877 }
878 else
879 {
880 vst1q_qs16(reinterpret_cast<qint16_t *>(output.ptr()), final_max);
881 }
882 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100883 },
884 input, output);
885}
886
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000887template <PoolingType pooling_type, bool exclude_padding>
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100888void NEPoolingLayerKernel::pooling3_f32(const Window &window_input, const Window &window)
889{
890 Iterator input(_input, window_input);
891 Iterator output(_output, window);
892
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100893 constexpr const int pool_size = 3;
894 int pool_pad_x = 0;
895 int pool_pad_y = 0;
896 int pool_stride_x = 0;
897 int pool_stride_y = 0;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100898 std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad();
899 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000900 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_x);
901 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_y);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100902
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100903 const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y)));
904 const uint8_t *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 1));
905 const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 2));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100906
907 execute_window_loop(window, [&](const Coordinates & id)
908 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100909 float32x4_t top_data = vld1q_f32(reinterpret_cast<const float *>(input_top_ptr + input.offset()));
910 float32x4_t middle_data = vld1q_f32(reinterpret_cast<const float *>(input_middle_ptr + input.offset()));
911 float32x4_t bottom_data = vld1q_f32(reinterpret_cast<const float *>(input_bottom_ptr + input.offset()));
912 float32x2_t res = {};
913 float final_res = 0;
914
915 // Get power of 2 in case of l2 pooling
916 if(pooling_type == PoolingType::L2)
917 {
918 top_data = vmulq_f32(top_data, top_data);
919 middle_data = vmulq_f32(middle_data, middle_data);
920 bottom_data = vmulq_f32(bottom_data, bottom_data);
921 }
922
923 if(pooling_type != PoolingType::MAX)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100924 {
925 // Calculate scale
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000926 float scale = calculate_avg_scale<exclude_padding>(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100927 const float32x2_t scale_v = vdup_n_f32(scale);
928
929 // Perform pooling
930 const float32x4_t sum_data = vaddq_f32(vaddq_f32(top_data, bottom_data), middle_data);
931 res = vpadd_f32(vget_high_f32(vsetq_lane_f32(0.f, sum_data, 3)), vget_low_f32(sum_data));
932 res = vmul_f32(vpadd_f32(res, res), scale_v);
933 }
934 else
935 {
936 const float32x4_t max_data = vmaxq_f32(vmaxq_f32(top_data, bottom_data), middle_data);
937 res = vpmax_f32(vget_high_f32(vsetq_lane_f32(-std::numeric_limits<float>::max(), max_data, 3)), vget_low_f32(max_data));
938 res = vpmax_f32(res, res);
939 }
Georgios Pinitascdf51452017-08-31 14:21:36 +0100940 final_res = vget_lane_f32(res, 0);
941
942 // Calculate square-root in case of l2 pooling
943 if(pooling_type == PoolingType::L2)
944 {
945 final_res = sqrt(final_res);
946 }
947
948 // Store result
949 *(reinterpret_cast<float *>(output.ptr())) = final_res;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100950 },
951 input, output);
952}
953
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000954template <PoolingType pooling_type, bool exclude_padding>
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100955void NEPoolingLayerKernel::pooling7_f32(const Window &window_input, const Window &window)
956{
957 Iterator input(_input, window_input);
958 Iterator output(_output, window);
959
960 constexpr const int pool_size = 7;
961 int pool_pad_x = 0;
962 int pool_pad_y = 0;
963 int pool_stride_x = 0;
964 int pool_stride_y = 0;
965 std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad();
966 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000967 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_x);
968 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_y);
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100969
970 std::array<const uint8_t *, pool_size> input_ptrs{ {} };
971 for(int i = 0; i < pool_size; ++i)
972 {
973 input_ptrs[i] = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + i));
974 }
975
976 execute_window_loop(window, [&](const Coordinates & id)
977 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100978 float32x2_t res = {};
979 float final_res = 0.f;
980 if(pooling_type != PoolingType::MAX)
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100981 {
982 // Calculate scale
Georgios Pinitasadaae7e2017-10-30 15:56:32 +0000983 float scale = calculate_avg_scale<exclude_padding>(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y);
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100984 const float32x2_t scale_v = vdup_n_f32(scale);
985
986 // Perform pooling
Georgios Pinitascdf51452017-08-31 14:21:36 +0100987 float32x4x2_t data = vld2q_f32(reinterpret_cast<const float *>(input_ptrs[0] + input.offset()));
988 // Get power of 2 in case of l2 pooling
989 if(pooling_type == PoolingType::L2)
990 {
991 data.val[0] = vmulq_f32(data.val[0], data.val[0]);
992 data.val[1] = vmulq_f32(data.val[1], data.val[1]);
993 }
994 float32x4_t sum_data = vaddq_f32(data.val[0], vsetq_lane_f32(0.f, data.val[1], 3));
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100995 for(int i = 1; i < pool_size; ++i)
996 {
Georgios Pinitascdf51452017-08-31 14:21:36 +0100997 data = vld2q_f32(reinterpret_cast<const float *>(input_ptrs[i] + input.offset()));
998 // Get power of 2 in case of l2 pooling
999 if(pooling_type == PoolingType::L2)
1000 {
1001 data.val[0] = vmulq_f32(data.val[0], data.val[0]);
1002 data.val[1] = vmulq_f32(data.val[1], data.val[1]);
1003 }
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001004 sum_data = vaddq_f32(sum_data, data.val[0]);
1005 sum_data = vaddq_f32(sum_data, vsetq_lane_f32(0.f, data.val[1], 3));
1006 }
1007 res = vpadd_f32(vget_high_f32(sum_data), vget_low_f32(sum_data));
1008 res = vmul_f32(vpadd_f32(res, res), scale_v);
1009 }
1010 else
1011 {
1012 float32x4x2_t max_data = vld2q_f32(reinterpret_cast<const float *>(input_ptrs[0] + input.offset()));
1013 for(int i = 1; i < pool_size; ++i)
1014 {
1015 const float32x4x2_t data = vld2q_f32(reinterpret_cast<const float *>(input_ptrs[i] + input.offset()));
1016 max_data = vmax2q_f32(max_data, data);
1017 }
1018 res = vpmax_f32(vget_high_f32(vsetq_lane_f32(-std::numeric_limits<float>::max(), max_data.val[1], 3)), vget_low_f32(max_data.val[1]));
1019 res = vpmax_f32(res, vpmax_f32(vget_high_f32(max_data.val[0]), vget_low_f32(max_data.val[0])));
1020 res = vpmax_f32(res, res);
1021 }
Georgios Pinitascdf51452017-08-31 14:21:36 +01001022 final_res = vget_lane_f32(res, 0);
1023
1024 // Calculate square-root in case of l2 pooling
1025 if(pooling_type == PoolingType::L2)
1026 {
1027 final_res = sqrt(final_res);
1028 }
1029
1030 // Store result
1031 *(reinterpret_cast<float *>(output.ptr())) = final_res;
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001032 },
1033 input, output);
1034}
1035
Georgios Pinitasadaae7e2017-10-30 15:56:32 +00001036template <PoolingType pooling_type, bool exclude_padding>
Gian Marco Iodice16824302017-09-28 15:41:37 +01001037void NEPoolingLayerKernel::poolingN_f32(const Window &window_input, const Window &window)
1038{
1039 Iterator input(_input, window_input);
1040 Iterator output(_output, window);
1041
Georgios Pinitas4c2dd542017-11-13 12:58:41 +00001042 const int pool_size = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().x() : _pool_info.pool_size();
Gian Marco Iodice16824302017-09-28 15:41:37 +01001043 int pool_pad_x = 0;
1044 int pool_pad_y = 0;
1045 int pool_stride_x = 0;
1046 int pool_stride_y = 0;
1047 std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad();
1048 std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
Georgios Pinitasadaae7e2017-10-30 15:56:32 +00001049 const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_x);
1050 const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_y);
Gian Marco Iodice16824302017-09-28 15:41:37 +01001051
1052 execute_window_loop(window, [&](const Coordinates & id)
1053 {
1054 float res = 0.0f;
1055
1056 if(pooling_type != PoolingType::MAX)
1057 {
1058 // Calculate scale
Georgios Pinitasadaae7e2017-10-30 15:56:32 +00001059 const float scale = calculate_avg_scale<exclude_padding>(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y);
Gian Marco Iodice16824302017-09-28 15:41:37 +01001060
1061 // Perform pooling
1062 float32x4_t vres = vdupq_n_f32(0.0f);
1063
1064 for(int y = 0; y < pool_size; ++y)
1065 {
1066 int x = 0;
1067 for(; x <= (pool_size - 4); x += 4)
1068 {
1069 const float32x4_t data = vld1q_f32(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_x) * _input->info()->strides_in_bytes().x() +
1070 (y - pool_pad_y) * _input->info()->strides_in_bytes().y()));
1071
1072 // Get power of 2 in case of l2 pooling and accumulate
1073 if(pooling_type == PoolingType::L2)
1074 {
1075 vres = vmlaq_f32(vres, data, data);
1076 }
1077 else
1078 {
1079 vres = vaddq_f32(vres, data);
1080 }
1081 }
1082
1083 // Leftover for loop
1084 for(; x < pool_size; ++x)
1085 {
1086 float data = *(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_x) * _input->info()->strides_in_bytes().x() + (y - pool_pad_y) * _input->info()->strides_in_bytes().y()));
1087
1088 // Get power of 2 in case of l2 pooling
1089 if(pooling_type == PoolingType::L2)
1090 {
1091 data *= data;
1092 }
1093
1094 res += data;
1095 }
1096 }
1097
1098#if defined(__aarch64__)
1099 // Reduction operation available on 64 bit architectures only
1100 res += vaddvq_f32(vres);
1101#else // __aarch64__
1102 // Reduction
1103 float32x2_t tmp = vpadd_f32(vget_high_f32(vres), vget_low_f32(vres));
1104 tmp = vpadd_f32(tmp, tmp);
1105
1106 res += vget_lane_f32(tmp, 0);
1107#endif // __aarch64__
1108 // Divide by scale
1109 res *= scale;
1110 }
1111 else
1112 {
1113 float32x4_t vres = vdupq_n_f32(std::numeric_limits<float>::min());
1114 res = std::numeric_limits<float>::min();
1115
1116 for(int y = 0; y < pool_size; ++y)
1117 {
1118 int x = 0;
1119 for(; x <= (pool_size - 4); x += 4)
1120 {
1121 const float32x4_t data = vld1q_f32(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_x) * _input->info()->strides_in_bytes().x() +
1122 (y - pool_pad_y) * _input->info()->strides_in_bytes().y()));
1123 vres = vmaxq_f32(vres, data);
1124 }
1125
1126 // Leftover for loop
1127 for(; x < pool_size; ++x)
1128 {
1129 const float data = *(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_x) * _input->info()->strides_in_bytes().x() + (y - pool_pad_y) * _input->info()->strides_in_bytes().y()));
1130 res = std::max(res, data);
1131 }
1132 }
1133
1134#if defined(__aarch64__)
1135 // Reduction operation available on 64 bit architectures only
1136 res = std::max(vmaxvq_f32(vres), res);
1137#else // __aarch64__
1138 float32x2_t tmp = vpmax_f32(vget_high_f32(vres), vget_low_f32(vres));
1139 tmp = vpmax_f32(tmp, tmp);
1140
1141 res = std::max(res, vget_lane_f32(tmp, 0));
1142#endif // __aarch64__
1143 }
1144
1145 // Calculate square-root in case of l2 pooling
1146 if(pooling_type == PoolingType::L2)
1147 {
1148 res = std::sqrt(res);
1149 }
1150
1151 // Store result
1152 *(reinterpret_cast<float *>(output.ptr())) = res;
1153 },
1154 input, output);
1155}
1156
Moritz Pflanzerc186b572017-09-07 09:48:04 +01001157void NEPoolingLayerKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001158{
Moritz Pflanzerc186b572017-09-07 09:48:04 +01001159 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001160 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
1161 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
1162 ARM_COMPUTE_ERROR_ON(_func == nullptr);
1163
Pablo Tello0c34fe22017-06-26 17:17:42 +01001164 const unsigned int pool_stride_x = _pool_info.pad_stride_info().stride().first;
1165 const unsigned int pool_stride_y = _pool_info.pad_stride_info().stride().second;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001166
1167 // Set step for input in x and y direction for the input
1168 Window window_input(window);
1169 unsigned int window_x_inc = 0;
Pablo Tello0c34fe22017-06-26 17:17:42 +01001170 switch(_input->info()->data_type())
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001171 {
Pablo Tello0c34fe22017-06-26 17:17:42 +01001172 case DataType::QS8:
Michalis Spyroubbd9fb92017-06-22 12:57:51 +01001173 case DataType::QS16:
Pablo Tello0c34fe22017-06-26 17:17:42 +01001174 case DataType::F16:
1175 {
1176 window_x_inc = (pool_stride_x == 2) ? _num_elems_processed_per_iteration * 2 : _num_elems_processed_per_iteration;
1177 break;
1178 }
1179 case DataType::F32:
1180 {
1181 window_x_inc = pool_stride_x;
1182 break;
1183 }
1184 default:
1185 {
1186 ARM_COMPUTE_ERROR("Not supported");
1187 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001188 }
1189 window_input.set(Window::DimX, Window::Dimension(window.x().start() * pool_stride_x, window.x().end() * pool_stride_x, window_x_inc));
1190 window_input.set(Window::DimY, Window::Dimension(window.y().start() * pool_stride_y, window.y().end() * pool_stride_y, pool_stride_y));
1191
1192 // Run function
1193 (this->*_func)(window_input, window);
1194}