blob: 3b944ab8d01e2651fb6d3f5b03cbdbaddfd0948c [file] [log] [blame]
Pablo Tellod75f9e92019-08-23 16:26:26 +01001/*
2 * Copyright (c) 2019 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h"
25
26#include "arm_compute/core/AccessWindowStatic.h"
27#include "arm_compute/core/CPP/Validate.h"
28#include "arm_compute/core/Helpers.h"
29#include "arm_compute/core/TensorInfo.h"
30#include "arm_compute/core/Utils.h"
31#include "arm_compute/core/Window.h"
32#include "arm_compute/core/utils/misc/ShapeCalculator.h"
33#include "arm_compute/core/utils/misc/Utility.h"
34
35#include <arm_neon.h>
36
37using namespace arm_compute::misc::shape_calculator;
38
39namespace arm_compute
40{
41namespace
42{
43Status validate_arguments(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info)
44{
45 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, rois, output);
Pablo Tellod75f9e92019-08-23 16:26:26 +010046 ARM_COMPUTE_RETURN_ERROR_ON(rois->dimension(0) != 5);
47 ARM_COMPUTE_RETURN_ERROR_ON(rois->num_dimensions() > 2);
Pablo Telloebe2e8c2019-08-23 16:26:26 +010048 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32, DataType::F16);
Pablo Tellod75f9e92019-08-23 16:26:26 +010049 ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(input, DataLayout::NHWC, DataLayout::NCHW);
50 ARM_COMPUTE_RETURN_ERROR_ON((pool_info.pooled_width() == 0) || (pool_info.pooled_height() == 0));
51 ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
52
53 if(output->total_size() != 0)
54 {
55 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
56 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
57 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(compute_roi_align_shape(*input, *rois, pool_info), output->tensor_shape());
58 }
Pablo Telloebe2e8c2019-08-23 16:26:26 +010059
60 if(input->data_type() == DataType::QASYMM8)
61 {
62 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(rois, 1, DataType::QASYMM16);
63
64 const UniformQuantizationInfo rois_qinfo = rois->quantization_info().uniform();
65 ARM_COMPUTE_RETURN_ERROR_ON(rois_qinfo.scale != 0.125f);
66 ARM_COMPUTE_RETURN_ERROR_ON(rois_qinfo.offset != 0);
67 }
68 else
69 {
70 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, rois);
71 }
72
Pablo Tellod75f9e92019-08-23 16:26:26 +010073 return Status{};
74}
75
76std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info)
77{
78 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
79
80 // Output auto inizialitation if not yet initialized
81 const TensorShape output_shape = compute_roi_align_shape(*input, *rois, pool_info);
82 auto_init_if_empty((*output), output_shape, 1, input->data_type());
83 output->set_data_layout(input->data_layout());
84
85 const unsigned int num_rois = rois->dimension(1);
86 Window window;
87 window.set(Window::DimX, Window::Dimension(0, num_rois));
88 window.set(Window::DimY, Window::Dimension(0, 1));
89
90 AccessWindowStatic input_access(input,
91 input->valid_region().start(0),
92 input->valid_region().start(1),
93 input->valid_region().end(0),
94 input->valid_region().end(1));
95 AccessWindowStatic output_access(output, 0, 0, pool_info.pooled_width(), pool_info.pooled_height());
96
97 const bool window_changed = update_window_and_padding(window, input_access, output_access);
98 output_access.set_valid_region(window, ValidRegion(Coordinates(), output->tensor_shape()));
99
100 Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
101 return std::make_pair(err, window);
102}
103} // namespace
104
105NEROIAlignLayerKernel::NEROIAlignLayerKernel()
106 : _input(nullptr), _output(nullptr), _rois(nullptr), _pool_info(0, 0, 0.f)
107{
108}
109
110void NEROIAlignLayerKernel::configure(const ITensor *input, const ITensor *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info)
111{
112 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, rois);
113 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), rois->info(), output->info(), pool_info));
114 // Configure kernel window
115 auto win_config = validate_and_configure_window(input->info(), rois->info(), output->info(), pool_info);
116 ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
117
118 // Set instance variables
119 _input = input;
120 _rois = rois;
121 _output = output;
122 _pool_info = pool_info;
123
124 INEKernel::configure(win_config.second);
125}
126
127Status NEROIAlignLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info)
128{
129 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, rois, output, pool_info));
130 return Status{};
131}
132
133/** Average pooling over an aligned window */
Pablo Telloebe2e8c2019-08-23 16:26:26 +0100134template <typename input_data_type, DataLayout data_layout>
135inline input_data_type roi_align_1x1(const ITensor *input,
136 unsigned int roi_batch,
137 float region_start_x,
138 float bin_size_x,
139 int grid_size_x,
140 float region_end_x,
141 float region_start_y,
142 float bin_size_y,
143 int grid_size_y,
144 float region_end_y,
145 int pz)
Pablo Tellod75f9e92019-08-23 16:26:26 +0100146{
147 if((region_end_x <= region_start_x) || (region_end_y <= region_start_y))
148 {
Pablo Telloebe2e8c2019-08-23 16:26:26 +0100149 return input_data_type(0);
Pablo Tellod75f9e92019-08-23 16:26:26 +0100150 }
151 else
152 {
153 float avg = 0;
154 // Iterate through the aligned pooling region
155 for(int iy = 0; iy < grid_size_y; ++iy)
156 {
157 for(int ix = 0; ix < grid_size_x; ++ix)
158 {
159 // Align the window in the middle of every bin
160 float y = region_start_y + (iy + 0.5) * bin_size_y / float(grid_size_y);
161 float x = region_start_x + (ix + 0.5) * bin_size_x / float(grid_size_x);
162
163 // Interpolation in the [0,0] [0,1] [1,0] [1,1] square
164 const int y_low = y;
165 const int x_low = x;
166 const int y_high = y_low + 1;
167 const int x_high = x_low + 1;
168
169 const float ly = y - y_low;
170 const float lx = x - x_low;
171 const float hy = 1. - ly;
172 const float hx = 1. - lx;
173
174 const float w1 = hy * hx;
175 const float w2 = hy * lx;
176 const float w3 = ly * hx;
177 const float w4 = ly * lx;
178 if(data_layout == DataLayout::NCHW)
179 {
Pablo Telloebe2e8c2019-08-23 16:26:26 +0100180 const auto data1 = *reinterpret_cast<const input_data_type *>(input->ptr_to_element(Coordinates(x_low, y_low, pz, roi_batch)));
181 const auto data2 = *reinterpret_cast<const input_data_type *>(input->ptr_to_element(Coordinates(x_high, y_low, pz, roi_batch)));
182 const auto data3 = *reinterpret_cast<const input_data_type *>(input->ptr_to_element(Coordinates(x_low, y_high, pz, roi_batch)));
183 const auto data4 = *reinterpret_cast<const input_data_type *>(input->ptr_to_element(Coordinates(x_high, y_high, pz, roi_batch)));
Pablo Tellod75f9e92019-08-23 16:26:26 +0100184 avg += w1 * data1 + w2 * data2 + w3 * data3 + w4 * data4;
185 }
186 else
187 {
Pablo Telloebe2e8c2019-08-23 16:26:26 +0100188 const auto data1 = *reinterpret_cast<const input_data_type *>(input->ptr_to_element(Coordinates(pz, x_low, y_low, roi_batch)));
189 const auto data2 = *reinterpret_cast<const input_data_type *>(input->ptr_to_element(Coordinates(pz, x_high, y_low, roi_batch)));
190 const auto data3 = *reinterpret_cast<const input_data_type *>(input->ptr_to_element(Coordinates(pz, x_low, y_high, roi_batch)));
191 const auto data4 = *reinterpret_cast<const input_data_type *>(input->ptr_to_element(Coordinates(pz, x_high, y_high, roi_batch)));
192 avg += w1 * data1 + w2 * data2 + w3 * data3 + w4 * data4;
193 }
194 }
195 }
196
197 avg /= grid_size_x * grid_size_y;
198 return input_data_type(avg);
199 }
200}
201
202/** Average pooling over an aligned window */
203template <typename input_data_type, DataLayout data_layout>
204inline input_data_type roi_align_1x1_qasymm8(const ITensor *input,
205 unsigned int roi_batch,
206 float region_start_x,
207 float bin_size_x,
208 int grid_size_x,
209 float region_end_x,
210 float region_start_y,
211 float bin_size_y,
212 int grid_size_y,
213 float region_end_y,
214 int pz,
215 const QuantizationInfo &out_qinfo)
216{
217 if((region_end_x <= region_start_x) || (region_end_y <= region_start_y))
218 {
219 return input_data_type(out_qinfo.uniform().offset);
220 }
221 else
222 {
223 float avg = 0;
224 const UniformQuantizationInfo input_qinfo = input->info()->quantization_info().uniform();
225 // Iterate through the aligned pooling region
226 for(int iy = 0; iy < grid_size_y; ++iy)
227 {
228 for(int ix = 0; ix < grid_size_x; ++ix)
229 {
230 // Align the window in the middle of every bin
231 float y = region_start_y + (iy + 0.5) * bin_size_y / float(grid_size_y);
232 float x = region_start_x + (ix + 0.5) * bin_size_x / float(grid_size_x);
233
234 // Interpolation in the [0,0] [0,1] [1,0] [1,1] square
235 const int y_low = y;
236 const int x_low = x;
237 const int y_high = y_low + 1;
238 const int x_high = x_low + 1;
239
240 const float ly = y - y_low;
241 const float lx = x - x_low;
242 const float hy = 1. - ly;
243 const float hx = 1. - lx;
244
245 const float w1 = hy * hx;
246 const float w2 = hy * lx;
247 const float w3 = ly * hx;
248 const float w4 = ly * lx;
249
250 if(data_layout == DataLayout::NCHW)
251 {
252 float data1 = dequantize_qasymm8(*reinterpret_cast<const input_data_type *>(input->ptr_to_element(Coordinates(x_low, y_low, pz, roi_batch))), input_qinfo);
253 float data2 = dequantize_qasymm8(*reinterpret_cast<const input_data_type *>(input->ptr_to_element(Coordinates(x_high, y_low, pz, roi_batch))), input_qinfo);
254 float data3 = dequantize_qasymm8(*reinterpret_cast<const input_data_type *>(input->ptr_to_element(Coordinates(x_low, y_high, pz, roi_batch))), input_qinfo);
255 float data4 = dequantize_qasymm8(*reinterpret_cast<const input_data_type *>(input->ptr_to_element(Coordinates(x_high, y_high, pz, roi_batch))), input_qinfo);
256 avg += w1 * data1 + w2 * data2 + w3 * data3 + w4 * data4;
257 }
258 else
259 {
260 const auto data1 = dequantize_qasymm8(*reinterpret_cast<const input_data_type *>(input->ptr_to_element(Coordinates(pz, x_low, y_low, roi_batch))), input_qinfo);
261 const auto data2 = dequantize_qasymm8(*reinterpret_cast<const input_data_type *>(input->ptr_to_element(Coordinates(pz, x_high, y_low, roi_batch))), input_qinfo);
262 const auto data3 = dequantize_qasymm8(*reinterpret_cast<const input_data_type *>(input->ptr_to_element(Coordinates(pz, x_low, y_high, roi_batch))), input_qinfo);
263 const auto data4 = dequantize_qasymm8(*reinterpret_cast<const input_data_type *>(input->ptr_to_element(Coordinates(pz, x_high, y_high, roi_batch))), input_qinfo);
Pablo Tellod75f9e92019-08-23 16:26:26 +0100264 avg += w1 * data1 + w2 * data2 + w3 * data3 + w4 * data4;
265 }
266 }
267 }
268
269 avg /= grid_size_x * grid_size_y;
270
Pablo Telloebe2e8c2019-08-23 16:26:26 +0100271 return quantize_qasymm8(avg, out_qinfo);
Pablo Tellod75f9e92019-08-23 16:26:26 +0100272 }
273}
274
275inline float compute_region_coordinate(int p, float bin_size, float roi_anchor, float max_value)
276{
277 const float region_start = p * bin_size + roi_anchor;
278 return utility::clamp(region_start, 0.0f, max_value);
279}
280
281void NEROIAlignLayerKernel::run(const Window &window, const ThreadInfo &info)
282{
283 if(_input->info()->data_layout() == DataLayout::NCHW)
284 {
285 switch(_input->info()->data_type())
286 {
Pablo Telloebe2e8c2019-08-23 16:26:26 +0100287 case DataType::QASYMM8:
288 {
289 NEROIAlignLayerKernel::internal_run<DataLayout::NCHW, uint8_t, uint16_t>(window, info);
290 break;
291 }
Pablo Tellod75f9e92019-08-23 16:26:26 +0100292 case DataType::F32:
293 {
294 NEROIAlignLayerKernel::internal_run<DataLayout::NCHW, float>(window, info);
295 break;
296 }
297#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
298 case DataType::F16:
299 {
300 NEROIAlignLayerKernel::internal_run<DataLayout::NCHW, float16_t>(window, info);
301 break;
302 }
303#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
304 default:
305 {
306 ARM_COMPUTE_ERROR("DataType not supported");
307 break;
308 }
309 }
310 }
311 else if(_input->info()->data_layout() == DataLayout::NHWC)
312 {
313 switch(_input->info()->data_type())
314 {
Pablo Telloebe2e8c2019-08-23 16:26:26 +0100315 case DataType::QASYMM8:
316 {
317 NEROIAlignLayerKernel::internal_run<DataLayout::NHWC, uint8_t, uint16_t>(window, info);
318 break;
319 }
Pablo Tellod75f9e92019-08-23 16:26:26 +0100320 case DataType::F32:
321 {
322 NEROIAlignLayerKernel::internal_run<DataLayout::NHWC, float>(window, info);
323 break;
324 }
325#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
326 case DataType::F16:
327 {
328 NEROIAlignLayerKernel::internal_run<DataLayout::NHWC, float16_t>(window, info);
329 break;
330 }
331#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
332 default:
333 {
334 ARM_COMPUTE_ERROR("DataType not supported");
335 break;
336 }
337 }
338 }
339 else
340 {
341 ARM_COMPUTE_ERROR("Invalid layout");
342 }
343}
344
Pablo Telloebe2e8c2019-08-23 16:26:26 +0100345template <DataLayout data_layout, typename input_data_type, typename roi_data_type>
Pablo Tellod75f9e92019-08-23 16:26:26 +0100346void NEROIAlignLayerKernel::internal_run(const Window &window, const ThreadInfo &info)
347{
348 ARM_COMPUTE_UNUSED(info);
349 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
350 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
351
352 const size_t values_per_roi = _rois->info()->dimension(0);
353
354 const int roi_list_start = window.x().start();
355 const int roi_list_end = window.x().end();
356
357 const unsigned int idx_width = get_data_layout_dimension_index(_input->info()->data_layout(), DataLayoutDimension::WIDTH);
358 const unsigned int idx_height = get_data_layout_dimension_index(_input->info()->data_layout(), DataLayoutDimension::HEIGHT);
359 const unsigned int idx_depth = get_data_layout_dimension_index(_input->info()->data_layout(), DataLayoutDimension::CHANNEL);
360
361 const int input_width = _input->info()->dimension(idx_width);
362 const int input_height = _input->info()->dimension(idx_height);
363 const int input_chanels = _input->info()->dimension(idx_depth);
364 const int pooled_w = _pool_info.pooled_width();
365 const int pooled_h = _pool_info.pooled_height();
366
Pablo Telloebe2e8c2019-08-23 16:26:26 +0100367 const DataType data_type = _input->info()->data_type();
368 const bool is_qasymm = is_data_type_quantized_asymmetric(data_type);
Pablo Tellod75f9e92019-08-23 16:26:26 +0100369
Pablo Telloebe2e8c2019-08-23 16:26:26 +0100370 const auto *rois_ptr = reinterpret_cast<const roi_data_type *>(_rois->buffer());
371 const QuantizationInfo &rois_qinfo = _rois->info()->quantization_info();
Pablo Tellod75f9e92019-08-23 16:26:26 +0100372 for(int roi_indx = roi_list_start; roi_indx < roi_list_end; ++roi_indx)
373 {
374 const unsigned int roi_batch = rois_ptr[values_per_roi * roi_indx];
Pablo Tellod75f9e92019-08-23 16:26:26 +0100375
Pablo Telloebe2e8c2019-08-23 16:26:26 +0100376 roi_data_type qx1 = rois_ptr[values_per_roi * roi_indx + 1];
377 roi_data_type qy1 = rois_ptr[values_per_roi * roi_indx + 2];
378 roi_data_type qx2 = rois_ptr[values_per_roi * roi_indx + 3];
379 roi_data_type qy2 = rois_ptr[values_per_roi * roi_indx + 4];
380 float x1(qx1);
381 float x2(qx2);
382 float y1(qy1);
383 float y2(qy2);
384 if(is_qasymm)
385 {
386 x1 = dequantize_qasymm16(qx1, rois_qinfo);
387 x2 = dequantize_qasymm16(qx2, rois_qinfo);
388 y1 = dequantize_qasymm16(qy1, rois_qinfo);
389 y2 = dequantize_qasymm16(qy2, rois_qinfo);
390 }
Pablo Tellod75f9e92019-08-23 16:26:26 +0100391 const float roi_anchor_x = x1 * _pool_info.spatial_scale();
392 const float roi_anchor_y = y1 * _pool_info.spatial_scale();
393 const float roi_dims_x = std::max((x2 - x1) * _pool_info.spatial_scale(), 1.0f);
394 const float roi_dims_y = std::max((y2 - y1) * _pool_info.spatial_scale(), 1.0f);
395 float bin_size_x = roi_dims_x / _pool_info.pooled_width();
396 float bin_size_y = roi_dims_y / _pool_info.pooled_height();
397
398 // Iterate through all feature maps
399 for(int ch = 0; ch < input_chanels; ++ch)
400 {
401 // Iterate through all output pixels
402 for(int py = 0; py < pooled_h; ++py)
403 {
404 for(int px = 0; px < pooled_w; ++px)
405 {
Pablo Telloebe2e8c2019-08-23 16:26:26 +0100406 const float region_start_x = compute_region_coordinate(px, bin_size_x, roi_anchor_x, input_width);
407 const float region_start_y = compute_region_coordinate(py, bin_size_y, roi_anchor_y, input_height);
408 const float region_end_x = compute_region_coordinate(px + 1, bin_size_x, roi_anchor_x, input_width);
409 const float region_end_y = compute_region_coordinate(py + 1, bin_size_y, roi_anchor_y, input_height);
410 const int roi_bin_grid_x = (_pool_info.sampling_ratio() > 0) ? _pool_info.sampling_ratio() : int(ceil(bin_size_x));
411 const int roi_bin_grid_y = (_pool_info.sampling_ratio() > 0) ? _pool_info.sampling_ratio() : int(ceil(bin_size_y));
412 input_data_type out_val(0);
413 if(is_qasymm)
414 {
415 out_val = roi_align_1x1_qasymm8<input_data_type, data_layout>(
416 _input, roi_batch, region_start_x, bin_size_x,
417 roi_bin_grid_x, region_end_x, region_start_y, bin_size_y,
418 roi_bin_grid_y, region_end_y, ch, _output->info()->quantization_info());
419 }
420 else
421 {
422 out_val = roi_align_1x1<input_data_type, data_layout>(
423 _input, roi_batch, region_start_x, bin_size_x,
424 roi_bin_grid_x, region_end_x, region_start_y, bin_size_y,
425 roi_bin_grid_y, region_end_y, ch);
426 }
Pablo Tellod75f9e92019-08-23 16:26:26 +0100427
428 if(data_layout == DataLayout::NCHW)
429 {
Pablo Telloebe2e8c2019-08-23 16:26:26 +0100430 auto out_ptr = reinterpret_cast<input_data_type *>(_output->ptr_to_element(Coordinates(px, py, ch, roi_indx)));
Pablo Tellod75f9e92019-08-23 16:26:26 +0100431 *out_ptr = out_val;
432 }
433 else
434 {
Pablo Telloebe2e8c2019-08-23 16:26:26 +0100435 auto out_ptr = reinterpret_cast<input_data_type *>(_output->ptr_to_element(Coordinates(ch, px, py, roi_indx)));
Pablo Tellod75f9e92019-08-23 16:26:26 +0100436 *out_ptr = out_val;
437 }
438 }
439 }
440 }
441 }
442}
443} // namespace arm_compute