blob: ab8ab14ae5c045ee81a1315454280d0d06bfd50c [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2016, 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEWarpKernel.h"
25
26#include "arm_compute/core/AccessWindowStatic.h"
27#include "arm_compute/core/Coordinates.h"
28#include "arm_compute/core/Error.h"
29#include "arm_compute/core/Helpers.h"
30#include "arm_compute/core/ITensor.h"
31#include "arm_compute/core/TensorInfo.h"
32#include "arm_compute/core/Validate.h"
33#include "arm_compute/core/Window.h"
34
35#include <cstddef>
36
37using namespace arm_compute;
38
39namespace
40{
41inline uint8_t nearest_interpolation(const uint8_t *in_ptr, int x, int y, size_t stride)
42{
43 return in_ptr[x + y * stride];
44}
45} // namespace
46
47INEWarpKernel::INEWarpKernel()
48 : _func(nullptr), _input(nullptr), _output(nullptr), _constant_border_value(0), _matrix(nullptr)
49{
50}
51
Isabella Gottardif9bae2e2017-07-28 17:24:08 +010052BorderSize INEWarpKernel::border_size() const
53{
54 return BorderSize(1);
55}
56
Moritz Pflanzerc186b572017-09-07 09:48:04 +010057void INEWarpKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010058{
Moritz Pflanzerc186b572017-09-07 09:48:04 +010059 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010060 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
61 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
62 ARM_COMPUTE_ERROR_ON(_func == nullptr);
63
64 (this->*_func)(window);
65}
66
67void INEWarpKernel::configure(const ITensor *input, ITensor *output, const float *matrix, BorderMode border_mode, uint8_t constant_border_value)
68{
69 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
70 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
71 ARM_COMPUTE_ERROR_ON(nullptr == matrix);
72
73 _matrix = matrix;
74 _constant_border_value = constant_border_value;
75
76 switch(border_mode)
77 {
78 case BorderMode::UNDEFINED:
79 _func = &INEWarpKernel::warp_undefined;
80 break;
81 case BorderMode::CONSTANT:
82 _func = &INEWarpKernel::warp_constant;
83 break;
84 case BorderMode::REPLICATE:
85 _func = &INEWarpKernel::warp_replicate;
86 break;
87 default:
88 ARM_COMPUTE_ERROR("Border mode not supported");
89 break;
90 }
91
92 _input = input;
93 _output = output;
94
95 // Configure kernel window
96 Window win = calculate_max_window(*output->info(), Steps(1U));
97
98 const ValidRegion &input_valid_region = input->info()->valid_region();
99
100 // Reads can occur within the valid region of the input
101 AccessWindowStatic input_access(input->info(),
Isabella Gottardif9bae2e2017-07-28 17:24:08 +0100102 input_valid_region.anchor[0] - border_size().left, input_valid_region.anchor[1] - border_size().top,
103 input_valid_region.anchor[0] + input_valid_region.shape[0] + border_size().right,
104 input_valid_region.anchor[1] + input_valid_region.shape[1] + border_size().bottom);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100105 AccessWindowHorizontal output_access(output->info(), 0, 1);
106
107 update_window_and_padding(win, input_access, output_access);
108
109 output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
110
111 INEKernel::configure(win);
112}
113
114template <InterpolationPolicy interpolation>
115void NEWarpAffineKernel<interpolation>::warp_undefined(const Window &window)
116{
117 // Don't increment in X and Y direction for the input tensor
118 // A pointer to the start of this plane is needed as base for the precomputed offsets
119 Window win_in(window);
120 win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
121 win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
122
123 Iterator in(_input, win_in);
124 Iterator out(_output, window);
125
126 const int min_x = _input->info()->valid_region().anchor[0];
127 const int max_x = min_x + _input->info()->valid_region().shape[0];
128 const int min_y = _input->info()->valid_region().anchor[1];
129 const int max_y = min_y + _input->info()->valid_region().shape[1];
130 const size_t stride = _input->info()->strides_in_bytes()[1];
131
132 // x0 = M01 * x + M01 * y + M02
133 // y0 = M11 * x + M11 * y + M12
134 const float M00 = _matrix[0];
135 const float M10 = _matrix[1];
136 const float M01 = _matrix[0 + 1 * 2];
137 const float M11 = _matrix[1 + 1 * 2];
138 const float M02 = _matrix[0 + 2 * 2];
139 const float M12 = _matrix[1 + 2 * 2];
140
141 // "M00 * x" and "M10 * x", when x = window.x.start
142 const float start_x0 = M00 * window.x().start();
143 const float start_y0 = M10 * window.x().start();
144
145 // Current row
Isabella Gottardi83be7452017-08-29 13:47:03 +0100146 int y_cur = window.y().start();
147 int z_cur = window.z().start();
148 int d3_cur = window[3].start();
149 int d4_cur = window[4].start();
150 int d5_cur = window[5].start();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100151
152 // const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing
153 float const_x0 = M01 * y_cur + M02;
154 float const_y0 = M11 * y_cur + M12;
155
156 // Affine warp coordinates
157 float x0 = start_x0 + const_x0;
158 float y0 = start_y0 + const_y0;
159
160 execute_window_loop(window, [&](const Coordinates & id)
161 {
Isabella Gottardi83be7452017-08-29 13:47:03 +0100162 // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0
163 if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5]))
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100164 {
Isabella Gottardi83be7452017-08-29 13:47:03 +0100165 y_cur = id.y();
166 z_cur = id.z();
167 d3_cur = id[3];
168 d4_cur = id[4];
169 d5_cur = id[5];
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100170
171 const_x0 = M01 * y_cur + M02;
172 const_y0 = M11 * y_cur + M12;
173
174 x0 = start_x0 + const_x0;
175 y0 = start_y0 + const_y0;
176 }
177
178 // Only write to output if x0 and y0 are within the valid region.
179 // Otherwise the read value would be undefined.
180 if((min_y <= y0) && (y0 < max_y) && (min_x <= x0) && (x0 < max_x))
181 {
182 switch(interpolation)
183 {
184 case InterpolationPolicy::NEAREST_NEIGHBOR:
185 *out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride);
186 break;
187 case InterpolationPolicy::BILINEAR:
Georgios Pinitas583137c2017-08-31 18:12:42 +0100188 *out.ptr() = pixel_bilinear_c1(in.ptr(), stride, x0, y0);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100189 break;
190 default:
191 ARM_COMPUTE_ERROR("Interpolation not supported");
192 }
193 }
194
195 x0 += M00;
196 y0 += M10;
197 },
198 in, out);
199}
200
201template <InterpolationPolicy interpolation>
202void NEWarpAffineKernel<interpolation>::warp_constant(const Window &window)
203{
204 // Don't increment in X and Y direction for the input tensor
205 // A pointer to the start of this plane is needed as base for the precomputed offsets
206 Window win_in(window);
207 win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
208 win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
209
210 Iterator in(_input, win_in);
211 Iterator out(_output, window);
212
213 const int min_x = _input->info()->valid_region().anchor[0];
214 const int max_x = min_x + _input->info()->valid_region().shape[0];
215 const int min_y = _input->info()->valid_region().anchor[1];
216 const int max_y = min_y + _input->info()->valid_region().shape[1];
217 const size_t stride = _input->info()->strides_in_bytes()[1];
218
219 // x0 = M01 * x + M01 * y + M02
220 // y0 = M11 * x + M11 * y + M12
221 const float M00 = _matrix[0];
222 const float M10 = _matrix[1];
223 const float M01 = _matrix[0 + 1 * 2];
224 const float M11 = _matrix[1 + 1 * 2];
225 const float M02 = _matrix[0 + 2 * 2];
226 const float M12 = _matrix[1 + 2 * 2];
227
228 // "M00 * x" and "M10 * x", when x = window.x.start
229 const float start_x0 = M00 * window.x().start();
230 const float start_y0 = M10 * window.x().start();
231
232 // Current row
Isabella Gottardi83be7452017-08-29 13:47:03 +0100233 int y_cur = window.y().start();
234 int z_cur = window.z().start();
235 int d3_cur = window[3].start();
236 int d4_cur = window[4].start();
237 int d5_cur = window[5].start();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100238
239 // const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing
240 float const_x0 = M01 * y_cur + M02;
241 float const_y0 = M11 * y_cur + M12;
242
243 // Affine warp coordinates
244 float x0 = start_x0 + const_x0;
245 float y0 = start_y0 + const_y0;
246
247 execute_window_loop(window, [&](const Coordinates & id)
248 {
Isabella Gottardi83be7452017-08-29 13:47:03 +0100249 // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0
250 if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5]))
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100251 {
Isabella Gottardi83be7452017-08-29 13:47:03 +0100252 y_cur = id.y();
253 z_cur = id.z();
254 d3_cur = id[3];
255 d4_cur = id[4];
256 d5_cur = id[5];
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100257
258 const_x0 = M01 * y_cur + M02;
259 const_y0 = M11 * y_cur + M12;
260
261 x0 = start_x0 + const_x0;
262 y0 = start_y0 + const_y0;
263 }
264
265 // Only use input values if x0 and y0 are within the valid region.
266 // Otherwise write the constant border value.
267 if((min_y <= y0) && (y0 < max_y) && (min_x <= x0) && (x0 < max_x))
268 {
269 switch(interpolation)
270 {
271 case InterpolationPolicy::NEAREST_NEIGHBOR:
272 *out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride);
273 break;
274 case InterpolationPolicy::BILINEAR:
Georgios Pinitas583137c2017-08-31 18:12:42 +0100275 *out.ptr() = pixel_bilinear_c1(in.ptr(), stride, x0, y0);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100276 break;
277 default:
278 ARM_COMPUTE_ERROR("Interpolation not supported");
279 }
280 }
281 else
282 {
Isabella Gottardi83be7452017-08-29 13:47:03 +0100283 switch(interpolation)
284 {
285 case InterpolationPolicy::NEAREST_NEIGHBOR:
286 *out.ptr() = _constant_border_value;
287 break;
288 case InterpolationPolicy::BILINEAR:
289 {
290 const auto xi = clamp<int>(std::floor(x0), min_x - 1, max_x);
291 const auto yi = clamp<int>(std::floor(y0), min_y - 1, max_y);
292 const auto xi_1 = clamp<int>(std::floor(x0 + 1), min_x - 1, max_x);
293 const auto yi_1 = clamp<int>(std::floor(y0 + 1), min_y - 1, max_y);
294
295 const float dx = x0 - std::floor(x0);
296 const float dy = y0 - std::floor(y0);
297 const float dx1 = 1.0f - dx;
298 const float dy1 = 1.0f - dy;
299
300 const float a00 = *(in.ptr() + xi + yi * stride);
301 const float a01 = *(in.ptr() + xi_1 + yi * stride);
302 const float a10 = *(in.ptr() + xi + yi_1 * stride);
303 const float a11 = *(in.ptr() + xi_1 + yi_1 * stride);
304
305 *out.ptr() = a00 * (dx1 * dy1) + a01 * (dx * dy1) + a10 * (dx1 * dy) + a11 * (dx * dy);
306 }
307 break;
308 default:
309 ARM_COMPUTE_ERROR("Interpolation not supported");
310 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100311 }
312
313 x0 += M00;
314 y0 += M10;
315 },
316 in, out);
317}
318
319template <InterpolationPolicy interpolation>
320void NEWarpAffineKernel<interpolation>::warp_replicate(const Window &window)
321{
322 // Don't increment in X and Y direction for the input tensor
323 // A pointer to the start of this plane is needed as base for the precomputed offsets
324 Window win_in(window);
325 win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
326 win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
327
328 Iterator in(_input, win_in);
329 Iterator out(_output, window);
330
331 const int min_x = _input->info()->valid_region().anchor[0];
332 const int max_x = min_x + _input->info()->valid_region().shape[0];
333 const int min_y = _input->info()->valid_region().anchor[1];
334 const int max_y = min_y + _input->info()->valid_region().shape[1];
335 const size_t stride = _input->info()->strides_in_bytes()[1];
336
337 // Current row
Isabella Gottardi83be7452017-08-29 13:47:03 +0100338 int y_cur = window.y().start();
339 int z_cur = window.z().start();
340 int d3_cur = window[3].start();
341 int d4_cur = window[4].start();
342 int d5_cur = window[5].start();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100343
344 const float M00 = _matrix[0];
345 const float M10 = _matrix[1];
346 const float M01 = _matrix[0 + 1 * 2];
347 const float M11 = _matrix[1 + 1 * 2];
348 const float M02 = _matrix[0 + 2 * 2];
349 const float M12 = _matrix[1 + 2 * 2];
350
351 // "M00 * x" and "M10 * x", when x = window.x.start
352 const float start_x0 = M00 * window.x().start();
353 const float start_y0 = M10 * window.x().start();
354
355 // const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing
356 float const_x0 = M01 * y_cur + M02;
357 float const_y0 = M11 * y_cur + M12;
358
359 float x0 = start_x0 + const_x0;
360 float y0 = start_y0 + const_y0;
361
362 execute_window_loop(window, [&](const Coordinates & id)
363 {
Isabella Gottardi83be7452017-08-29 13:47:03 +0100364 // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0
365 if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5]))
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100366 {
Isabella Gottardi83be7452017-08-29 13:47:03 +0100367 y_cur = id.y();
368 z_cur = id.z();
369 d3_cur = id[3];
370 d4_cur = id[4];
371 d5_cur = id[5];
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100372
373 const_x0 = M01 * y_cur + M02;
374 const_y0 = M11 * y_cur + M12;
375
376 x0 = start_x0 + const_x0;
377 y0 = start_y0 + const_y0;
378 }
379
380 // Only load from (x0, y0) if the point is within the valid region.
381 // Otherwise load from the edge of the valid region.
382 if((min_y <= y0) && (y0 < max_y) && (min_x <= x0) && (x0 < max_x))
383 {
384 switch(interpolation)
385 {
386 case InterpolationPolicy::NEAREST_NEIGHBOR:
387 *out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride);
388 break;
389 case InterpolationPolicy::BILINEAR:
Georgios Pinitas583137c2017-08-31 18:12:42 +0100390 *out.ptr() = pixel_bilinear_c1(in.ptr(), stride, x0, y0);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100391 break;
392 default:
393 ARM_COMPUTE_ERROR("Interpolation not supported");
394 }
395 }
396 else
397 {
398 // Clamp coordinates
Isabella Gottardi83be7452017-08-29 13:47:03 +0100399 const auto xi = clamp<int>(std::floor(x0), min_x, max_x - 1);
400 const auto yi = clamp<int>(std::floor(y0), min_y, max_y - 1);
401 switch(interpolation)
402 {
403 case InterpolationPolicy::NEAREST_NEIGHBOR:
404 *out.ptr() = *(in.ptr() + xi + yi * stride);
405 break;
406 case InterpolationPolicy::BILINEAR:
407 {
408 const auto xi_1 = clamp<int>(std::floor(x0 + 1), min_x, max_x - 1);
409 const auto yi_1 = clamp<int>(std::floor(y0 + 1), min_y, max_y - 1);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100410
Isabella Gottardi83be7452017-08-29 13:47:03 +0100411 const float dx = x0 - std::floor(x0);
412 const float dy = y0 - std::floor(y0);
413 const float dx1 = 1.0f - dx;
414 const float dy1 = 1.0f - dy;
415
416 const float a00 = *(in.ptr() + xi + yi * stride);
417 const float a01 = *(in.ptr() + xi_1 + yi * stride);
418 const float a10 = *(in.ptr() + xi + yi_1 * stride);
419 const float a11 = *(in.ptr() + xi_1 + yi_1 * stride);
420
421 *out.ptr() = a00 * (dx1 * dy1) + a01 * (dx * dy1) + a10 * (dx1 * dy) + a11 * (dx * dy);
422 }
423 break;
424 default:
425 ARM_COMPUTE_ERROR("Interpolation not supported");
426 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100427 }
428
429 x0 += M00;
430 y0 += M10;
431 },
432 in, out);
433}
434
435template <InterpolationPolicy interpolation>
436void NEWarpPerspectiveKernel<interpolation>::warp_undefined(const Window &window)
437{
438 // Don't increment in X and Y direction for the input tensor
439 // A pointer to the start of this plane is needed as base for the precomputed offsets
440 Window win_in(window);
441 win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
442 win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
443
444 Iterator in(_input, win_in);
445 Iterator out(_output, window);
446
447 const int min_x = _input->info()->valid_region().anchor[0];
448 const int max_x = min_x + _input->info()->valid_region().shape[0];
449 const int min_y = _input->info()->valid_region().anchor[1];
450 const int max_y = min_y + _input->info()->valid_region().shape[1];
451 const size_t stride = _input->info()->strides_in_bytes()[1];
452
453 // x0 = M00 * x + M01 * y + M02
454 // y0 = M10 * x + M11 * y + M12
455 // z0 = M20 * x + M21 * y + M22
456 // xn = x0 / z0
457 // yn = y0 / z0
458 const float M00 = _matrix[0];
459 const float M10 = _matrix[1];
460 const float M20 = _matrix[2];
461 const float M01 = _matrix[0 + 1 * 3];
462 const float M11 = _matrix[1 + 1 * 3];
463 const float M21 = _matrix[2 + 1 * 3];
464 const float M02 = _matrix[0 + 2 * 3];
465 const float M12 = _matrix[1 + 2 * 3];
466 const float M22 = _matrix[2 + 2 * 3];
467
468 // "M00 * x", "M10 * x" and "M20 * x", when x = window.x.start
469 const float start_x0 = M00 * window.x().start();
470 const float start_y0 = M10 * window.x().start();
471 const float start_z0 = M20 * window.x().start();
472
473 // Current row
Isabella Gottardi40ff03b2017-08-10 16:44:42 +0100474 int y_cur = window.y().start();
475 int z_cur = window.z().start();
476 int d3_cur = window[3].start();
477 int d4_cur = window[4].start();
478 int d5_cur = window[5].start();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100479
480 // const_x0, const_y0 and const_z0 are the constant parts of x0, y0 and z0 during the row processing
481 float const_x0 = M01 * y_cur + M02;
482 float const_y0 = M11 * y_cur + M12;
483 float const_z0 = M21 * y_cur + M22;
484
485 // Perspective warp coordinates
486 float x0 = start_x0 + const_x0;
487 float y0 = start_y0 + const_y0;
488 float z0 = start_z0 + const_z0;
489
490 execute_window_loop(window, [&](const Coordinates & id)
491 {
492 // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0
Isabella Gottardi40ff03b2017-08-10 16:44:42 +0100493 if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5]))
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100494 {
Isabella Gottardi40ff03b2017-08-10 16:44:42 +0100495 y_cur = id.y();
496 z_cur = id.z();
497 d3_cur = id[3];
498 d4_cur = id[4];
499 d5_cur = id[5];
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100500
501 const_x0 = M01 * y_cur + M02;
502 const_y0 = M11 * y_cur + M12;
503 const_z0 = M21 * y_cur + M22;
504
505 x0 = start_x0 + const_x0;
506 y0 = start_y0 + const_y0;
507 z0 = start_z0 + const_z0;
508 }
509
510 const float xn = x0 / z0;
511 const float yn = y0 / z0;
512
513 // Only write to output if xn and yn are within the valid region.
514 // Otherwise the read value would be undefined.
515 if((min_y <= yn) && (yn < max_y) && (min_x <= xn) && (xn < max_x))
516 {
517 switch(interpolation)
518 {
519 case InterpolationPolicy::NEAREST_NEIGHBOR:
520 *out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride);
521 break;
522 case InterpolationPolicy::BILINEAR:
Georgios Pinitas583137c2017-08-31 18:12:42 +0100523 *out.ptr() = pixel_bilinear_c1(in.ptr(), stride, xn, yn);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100524 break;
525 default:
526 ARM_COMPUTE_ERROR("Interpolation not supported");
527 }
528 }
529
530 x0 += M00;
531 y0 += M10;
532 z0 += M20;
533 },
534 in, out);
535}
536
537template <InterpolationPolicy interpolation>
538void NEWarpPerspectiveKernel<interpolation>::warp_constant(const Window &window)
539{
540 // Don't increment in X and Y direction for the input tensor
541 // A pointer to the start of this plane is needed as base for the precomputed offsets
542 Window win_in(window);
543 win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
544 win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
545
546 Iterator in(_input, win_in);
547 Iterator out(_output, window);
548
549 const int min_x = _input->info()->valid_region().anchor[0];
550 const int max_x = min_x + _input->info()->valid_region().shape[0];
551 const int min_y = _input->info()->valid_region().anchor[1];
552 const int max_y = min_y + _input->info()->valid_region().shape[1];
553 const size_t stride = _input->info()->strides_in_bytes()[1];
554
555 // x0 = M00 * x + M01 * y + M02
556 // y0 = M10 * x + M11 * y + M12
557 // z0 = M20 * x + M21 * y + M22
558 // xn = x0 / z0
559 // yn = y0 / z0
560 const float M00 = _matrix[0];
561 const float M10 = _matrix[1];
562 const float M20 = _matrix[2];
563 const float M01 = _matrix[0 + 1 * 3];
564 const float M11 = _matrix[1 + 1 * 3];
565 const float M21 = _matrix[2 + 1 * 3];
566 const float M02 = _matrix[0 + 2 * 3];
567 const float M12 = _matrix[1 + 2 * 3];
568 const float M22 = _matrix[2 + 2 * 3];
569
570 // "M00 * x", "M10 * x" and "M20 * x", when x = window.x.start
571 const float start_x0 = M00 * window.x().start();
572 const float start_y0 = M10 * window.x().start();
573 const float start_z0 = M20 * window.x().start();
574
575 // Current row
Isabella Gottardi40ff03b2017-08-10 16:44:42 +0100576 int y_cur = window.y().start();
577 int z_cur = window.z().start();
578 int d3_cur = window[3].start();
579 int d4_cur = window[4].start();
580 int d5_cur = window[5].start();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100581
582 // const_x0, const_y0 and const_z0 are the constant parts of x0, y0 and z0 during the row processing
583 float const_x0 = M01 * y_cur + M02;
584 float const_y0 = M11 * y_cur + M12;
585 float const_z0 = M21 * y_cur + M22;
586
587 // Perspective warp coordinates
588 float x0 = start_x0 + const_x0;
589 float y0 = start_y0 + const_y0;
590 float z0 = start_z0 + const_z0;
591
592 execute_window_loop(window, [&](const Coordinates & id)
593 {
Isabella Gottardi40ff03b2017-08-10 16:44:42 +0100594 // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0
595 if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5]))
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100596 {
Isabella Gottardi40ff03b2017-08-10 16:44:42 +0100597 y_cur = id.y();
598 z_cur = id.z();
599 d3_cur = id[3];
600 d4_cur = id[4];
601 d5_cur = id[5];
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100602
603 const_x0 = M01 * y_cur + M02;
604 const_y0 = M11 * y_cur + M12;
605 const_z0 = M21 * y_cur + M22;
606
607 x0 = start_x0 + const_x0;
608 y0 = start_y0 + const_y0;
609 z0 = start_z0 + const_z0;
610 }
611
612 const float xn = x0 / z0;
613 const float yn = y0 / z0;
614
615 // Only use input values if xn and yn are within the valid region.
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100616 if((min_y <= yn) && (yn < max_y) && (min_x <= xn) && (xn < max_x))
617 {
618 switch(interpolation)
619 {
620 case InterpolationPolicy::NEAREST_NEIGHBOR:
621 *out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride);
622 break;
623 case InterpolationPolicy::BILINEAR:
Georgios Pinitas583137c2017-08-31 18:12:42 +0100624 *out.ptr() = pixel_bilinear_c1(in.ptr(), stride, xn, yn);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100625 break;
626 default:
627 ARM_COMPUTE_ERROR("Interpolation not supported");
628 }
629 }
630 else
631 {
Isabella Gottardi62031532017-07-04 11:21:28 +0100632 switch(interpolation)
633 {
634 case InterpolationPolicy::NEAREST_NEIGHBOR:
635 *out.ptr() = _constant_border_value;
636 break;
637 case InterpolationPolicy::BILINEAR:
638 {
639 const auto xi = clamp<int>(std::floor(xn), min_x - 1, max_x);
640 const auto yi = clamp<int>(std::floor(yn), min_y - 1, max_y);
641 const auto xi_1 = clamp<int>(std::floor(xn + 1), min_x - 1, max_x);
642 const auto yi_1 = clamp<int>(std::floor(yn + 1), min_y - 1, max_y);
643
644 const float dx = xn - std::floor(xn);
645 const float dy = yn - std::floor(yn);
646 const float dx1 = 1.0f - dx;
647 const float dy1 = 1.0f - dy;
648
649 const float a00 = *(in.ptr() + xi + yi * stride);
650 const float a01 = *(in.ptr() + xi_1 + yi * stride);
651 const float a10 = *(in.ptr() + xi + yi_1 * stride);
652 const float a11 = *(in.ptr() + xi_1 + yi_1 * stride);
653
654 *out.ptr() = a00 * (dx1 * dy1) + a01 * (dx * dy1) + a10 * (dx1 * dy) + a11 * (dx * dy);
655 }
656 break;
657 default:
658 ARM_COMPUTE_ERROR("Interpolation not supported");
659 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100660 }
661
662 x0 += M00;
663 y0 += M10;
664 z0 += M20;
665 },
666 in, out);
667}
668
669template <InterpolationPolicy interpolation>
670void NEWarpPerspectiveKernel<interpolation>::warp_replicate(const Window &window)
671{
672 // Don't increment in X and Y direction for the input tensor
673 // A pointer to the start of this plane is needed as base for the precomputed offsets
674 Window win_in(window);
675 win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
676 win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
677
678 Iterator in(_input, win_in);
679 Iterator out(_output, window);
680
681 const int min_x = _input->info()->valid_region().anchor[0];
682 const int max_x = min_x + _input->info()->valid_region().shape[0];
683 const int min_y = _input->info()->valid_region().anchor[1];
684 const int max_y = min_y + _input->info()->valid_region().shape[1];
685 const size_t stride = _input->info()->strides_in_bytes()[1];
686
687 // Current row
Isabella Gottardi40ff03b2017-08-10 16:44:42 +0100688 int y_cur = window.y().start();
689 int z_cur = window.z().start();
690 int d3_cur = window[3].start();
691 int d4_cur = window[4].start();
692 int d5_cur = window[5].start();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100693
694 // x0 = M00 * x + M01 * y + M02
695 // y0 = M10 * x + M11 * y + M12
696 // z0 = M20 * x + M21 * y + M22
697 // xn = x0 / z0
698 // yn = y0 / z0
699 const float M00 = _matrix[0];
700 const float M10 = _matrix[1];
701 const float M20 = _matrix[2];
702 const float M01 = _matrix[0 + 1 * 3];
703 const float M11 = _matrix[1 + 1 * 3];
704 const float M21 = _matrix[2 + 1 * 3];
705 const float M02 = _matrix[0 + 2 * 3];
706 const float M12 = _matrix[1 + 2 * 3];
707 const float M22 = _matrix[2 + 2 * 3];
708
709 // "M00 * x", "M10 * x" and "M20 * x", when x = window.x.start
710 const float start_x0 = M00 * window.x().start();
711 const float start_y0 = M10 * window.x().start();
712 const float start_z0 = M20 * window.x().start();
713
714 // const_x0, const_y0 and const_z0 are the constant parts of x0, y0 and z0 during the row processing
715 float const_x0 = M01 * y_cur + M02;
716 float const_y0 = M11 * y_cur + M12;
717 float const_z0 = M21 * y_cur + M22;
718
719 // Perspective warp coordinates
720 float x0 = start_x0 + const_x0;
721 float y0 = start_y0 + const_y0;
722 float z0 = start_z0 + const_z0;
723
724 execute_window_loop(window, [&](const Coordinates & id)
725 {
Isabella Gottardi40ff03b2017-08-10 16:44:42 +0100726 // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0
727 if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5]))
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100728 {
Isabella Gottardi40ff03b2017-08-10 16:44:42 +0100729 y_cur = id.y();
730 z_cur = id.z();
731 d3_cur = id[3];
732 d4_cur = id[4];
733 d5_cur = id[5];
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100734
735 const_x0 = M01 * y_cur + M02;
736 const_y0 = M11 * y_cur + M12;
737 const_z0 = M21 * y_cur + M22;
738
739 x0 = start_x0 + const_x0;
740 y0 = start_y0 + const_y0;
741 z0 = start_z0 + const_z0;
742 }
743
744 const float xn = x0 / z0;
745 const float yn = y0 / z0;
746
747 // Only load from (x0, y0) if the point is within the valid region.
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100748 if((min_y <= yn) && (yn < max_y) && (min_x <= xn) && (xn < max_x))
749 {
750 switch(interpolation)
751 {
752 case InterpolationPolicy::NEAREST_NEIGHBOR:
753 *out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride);
754 break;
755 case InterpolationPolicy::BILINEAR:
Georgios Pinitas583137c2017-08-31 18:12:42 +0100756 *out.ptr() = pixel_bilinear_c1(in.ptr(), stride, xn, yn);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100757 break;
758 default:
759 ARM_COMPUTE_ERROR("Interpolation not supported");
760 }
761 }
762 else
763 {
764 // Clamp coordinates
Isabella Gottardi62031532017-07-04 11:21:28 +0100765 const auto xi = clamp<int>(std::floor(xn), min_x, max_x - 1);
766 const auto yi = clamp<int>(std::floor(yn), min_y, max_y - 1);
767 switch(interpolation)
768 {
769 case InterpolationPolicy::NEAREST_NEIGHBOR:
770 *out.ptr() = *(in.ptr() + xi + yi * stride);
771 break;
772 case InterpolationPolicy::BILINEAR:
773 {
774 const auto xi_1 = clamp<int>(std::floor(xn + 1), min_x, max_x - 1);
775 const auto yi_1 = clamp<int>(std::floor(yn + 1), min_y, max_y - 1);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100776
Isabella Gottardi62031532017-07-04 11:21:28 +0100777 const float dx = xn - std::floor(xn);
778 const float dy = yn - std::floor(yn);
779 const float dx1 = 1.0f - dx;
780 const float dy1 = 1.0f - dy;
781
782 const float a00 = *(in.ptr() + xi + yi * stride);
783 const float a01 = *(in.ptr() + xi_1 + yi * stride);
784 const float a10 = *(in.ptr() + xi + yi_1 * stride);
785 const float a11 = *(in.ptr() + xi_1 + yi_1 * stride);
786
787 *out.ptr() = a00 * (dx1 * dy1) + a01 * (dx * dy1) + a10 * (dx1 * dy) + a11 * (dx * dy);
788 }
789 break;
790 default:
791 ARM_COMPUTE_ERROR("Interpolation not supported");
792 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100793 }
794
795 x0 += M00;
796 y0 += M10;
797 z0 += M20;
798 },
799 in, out);
800}
801
802template class arm_compute::NEWarpAffineKernel<InterpolationPolicy::NEAREST_NEIGHBOR>;
803template class arm_compute::NEWarpAffineKernel<InterpolationPolicy::BILINEAR>;
804template class arm_compute::NEWarpPerspectiveKernel<InterpolationPolicy::NEAREST_NEIGHBOR>;
805template class arm_compute::NEWarpPerspectiveKernel<InterpolationPolicy::BILINEAR>;