blob: a1ec1a0b51470d20d119a7305ed332706929c32c [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2016, 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEWarpKernel.h"
25
26#include "arm_compute/core/AccessWindowStatic.h"
27#include "arm_compute/core/Coordinates.h"
28#include "arm_compute/core/Error.h"
29#include "arm_compute/core/Helpers.h"
30#include "arm_compute/core/ITensor.h"
31#include "arm_compute/core/TensorInfo.h"
32#include "arm_compute/core/Validate.h"
33#include "arm_compute/core/Window.h"
34
35#include <cstddef>
36
37using namespace arm_compute;
38
39namespace
40{
41inline uint8_t nearest_interpolation(const uint8_t *in_ptr, int x, int y, size_t stride)
42{
43 return in_ptr[x + y * stride];
44}
45} // namespace
46
47INEWarpKernel::INEWarpKernel()
48 : _func(nullptr), _input(nullptr), _output(nullptr), _constant_border_value(0), _matrix(nullptr)
49{
50}
51
Isabella Gottardif9bae2e2017-07-28 17:24:08 +010052BorderSize INEWarpKernel::border_size() const
53{
54 return BorderSize(1);
55}
56
Anthony Barbier6ff3b192017-09-04 18:44:23 +010057void INEWarpKernel::run(const Window &window)
58{
59 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
60 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
61 ARM_COMPUTE_ERROR_ON(_func == nullptr);
62
63 (this->*_func)(window);
64}
65
66void INEWarpKernel::configure(const ITensor *input, ITensor *output, const float *matrix, BorderMode border_mode, uint8_t constant_border_value)
67{
68 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
69 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
70 ARM_COMPUTE_ERROR_ON(nullptr == matrix);
71
72 _matrix = matrix;
73 _constant_border_value = constant_border_value;
74
75 switch(border_mode)
76 {
77 case BorderMode::UNDEFINED:
78 _func = &INEWarpKernel::warp_undefined;
79 break;
80 case BorderMode::CONSTANT:
81 _func = &INEWarpKernel::warp_constant;
82 break;
83 case BorderMode::REPLICATE:
84 _func = &INEWarpKernel::warp_replicate;
85 break;
86 default:
87 ARM_COMPUTE_ERROR("Border mode not supported");
88 break;
89 }
90
91 _input = input;
92 _output = output;
93
94 // Configure kernel window
95 Window win = calculate_max_window(*output->info(), Steps(1U));
96
97 const ValidRegion &input_valid_region = input->info()->valid_region();
98
99 // Reads can occur within the valid region of the input
100 AccessWindowStatic input_access(input->info(),
Isabella Gottardif9bae2e2017-07-28 17:24:08 +0100101 input_valid_region.anchor[0] - border_size().left, input_valid_region.anchor[1] - border_size().top,
102 input_valid_region.anchor[0] + input_valid_region.shape[0] + border_size().right,
103 input_valid_region.anchor[1] + input_valid_region.shape[1] + border_size().bottom);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100104 AccessWindowHorizontal output_access(output->info(), 0, 1);
105
106 update_window_and_padding(win, input_access, output_access);
107
108 output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
109
110 INEKernel::configure(win);
111}
112
113template <InterpolationPolicy interpolation>
114void NEWarpAffineKernel<interpolation>::warp_undefined(const Window &window)
115{
116 // Don't increment in X and Y direction for the input tensor
117 // A pointer to the start of this plane is needed as base for the precomputed offsets
118 Window win_in(window);
119 win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
120 win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
121
122 Iterator in(_input, win_in);
123 Iterator out(_output, window);
124
125 const int min_x = _input->info()->valid_region().anchor[0];
126 const int max_x = min_x + _input->info()->valid_region().shape[0];
127 const int min_y = _input->info()->valid_region().anchor[1];
128 const int max_y = min_y + _input->info()->valid_region().shape[1];
129 const size_t stride = _input->info()->strides_in_bytes()[1];
130
131 // x0 = M01 * x + M01 * y + M02
132 // y0 = M11 * x + M11 * y + M12
133 const float M00 = _matrix[0];
134 const float M10 = _matrix[1];
135 const float M01 = _matrix[0 + 1 * 2];
136 const float M11 = _matrix[1 + 1 * 2];
137 const float M02 = _matrix[0 + 2 * 2];
138 const float M12 = _matrix[1 + 2 * 2];
139
140 // "M00 * x" and "M10 * x", when x = window.x.start
141 const float start_x0 = M00 * window.x().start();
142 const float start_y0 = M10 * window.x().start();
143
144 // Current row
145 int y_cur = window.y().start();
146
147 // const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing
148 float const_x0 = M01 * y_cur + M02;
149 float const_y0 = M11 * y_cur + M12;
150
151 // Affine warp coordinates
152 float x0 = start_x0 + const_x0;
153 float y0 = start_y0 + const_y0;
154
155 execute_window_loop(window, [&](const Coordinates & id)
156 {
157 // Check if we are processing a new row. If so, update the current row (y_cur), x0 and y0
158 if(y_cur != id.y())
159 {
160 y_cur = id.y();
161
162 const_x0 = M01 * y_cur + M02;
163 const_y0 = M11 * y_cur + M12;
164
165 x0 = start_x0 + const_x0;
166 y0 = start_y0 + const_y0;
167 }
168
169 // Only write to output if x0 and y0 are within the valid region.
170 // Otherwise the read value would be undefined.
171 if((min_y <= y0) && (y0 < max_y) && (min_x <= x0) && (x0 < max_x))
172 {
173 switch(interpolation)
174 {
175 case InterpolationPolicy::NEAREST_NEIGHBOR:
176 *out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride);
177 break;
178 case InterpolationPolicy::BILINEAR:
179 *out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, x0, y0);
180 break;
181 default:
182 ARM_COMPUTE_ERROR("Interpolation not supported");
183 }
184 }
185
186 x0 += M00;
187 y0 += M10;
188 },
189 in, out);
190}
191
192template <InterpolationPolicy interpolation>
193void NEWarpAffineKernel<interpolation>::warp_constant(const Window &window)
194{
195 // Don't increment in X and Y direction for the input tensor
196 // A pointer to the start of this plane is needed as base for the precomputed offsets
197 Window win_in(window);
198 win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
199 win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
200
201 Iterator in(_input, win_in);
202 Iterator out(_output, window);
203
204 const int min_x = _input->info()->valid_region().anchor[0];
205 const int max_x = min_x + _input->info()->valid_region().shape[0];
206 const int min_y = _input->info()->valid_region().anchor[1];
207 const int max_y = min_y + _input->info()->valid_region().shape[1];
208 const size_t stride = _input->info()->strides_in_bytes()[1];
209
210 // x0 = M01 * x + M01 * y + M02
211 // y0 = M11 * x + M11 * y + M12
212 const float M00 = _matrix[0];
213 const float M10 = _matrix[1];
214 const float M01 = _matrix[0 + 1 * 2];
215 const float M11 = _matrix[1 + 1 * 2];
216 const float M02 = _matrix[0 + 2 * 2];
217 const float M12 = _matrix[1 + 2 * 2];
218
219 // "M00 * x" and "M10 * x", when x = window.x.start
220 const float start_x0 = M00 * window.x().start();
221 const float start_y0 = M10 * window.x().start();
222
223 // Current row
224 int y_cur = window.y().start();
225
226 // const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing
227 float const_x0 = M01 * y_cur + M02;
228 float const_y0 = M11 * y_cur + M12;
229
230 // Affine warp coordinates
231 float x0 = start_x0 + const_x0;
232 float y0 = start_y0 + const_y0;
233
234 execute_window_loop(window, [&](const Coordinates & id)
235 {
236 // Check if we are processing a new row. If so, update the current row (y_cur), x0 and y0
237 if(y_cur != id.y())
238 {
239 y_cur = id.y();
240
241 const_x0 = M01 * y_cur + M02;
242 const_y0 = M11 * y_cur + M12;
243
244 x0 = start_x0 + const_x0;
245 y0 = start_y0 + const_y0;
246 }
247
248 // Only use input values if x0 and y0 are within the valid region.
249 // Otherwise write the constant border value.
250 if((min_y <= y0) && (y0 < max_y) && (min_x <= x0) && (x0 < max_x))
251 {
252 switch(interpolation)
253 {
254 case InterpolationPolicy::NEAREST_NEIGHBOR:
255 *out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride);
256 break;
257 case InterpolationPolicy::BILINEAR:
258 *out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, x0, y0);
259 break;
260 default:
261 ARM_COMPUTE_ERROR("Interpolation not supported");
262 }
263 }
264 else
265 {
266 *out.ptr() = _constant_border_value;
267 }
268
269 x0 += M00;
270 y0 += M10;
271 },
272 in, out);
273}
274
275template <InterpolationPolicy interpolation>
276void NEWarpAffineKernel<interpolation>::warp_replicate(const Window &window)
277{
278 // Don't increment in X and Y direction for the input tensor
279 // A pointer to the start of this plane is needed as base for the precomputed offsets
280 Window win_in(window);
281 win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
282 win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
283
284 Iterator in(_input, win_in);
285 Iterator out(_output, window);
286
287 const int min_x = _input->info()->valid_region().anchor[0];
288 const int max_x = min_x + _input->info()->valid_region().shape[0];
289 const int min_y = _input->info()->valid_region().anchor[1];
290 const int max_y = min_y + _input->info()->valid_region().shape[1];
291 const size_t stride = _input->info()->strides_in_bytes()[1];
292
293 // Current row
294 int y_cur = window.y().start();
295
296 const float M00 = _matrix[0];
297 const float M10 = _matrix[1];
298 const float M01 = _matrix[0 + 1 * 2];
299 const float M11 = _matrix[1 + 1 * 2];
300 const float M02 = _matrix[0 + 2 * 2];
301 const float M12 = _matrix[1 + 2 * 2];
302
303 // "M00 * x" and "M10 * x", when x = window.x.start
304 const float start_x0 = M00 * window.x().start();
305 const float start_y0 = M10 * window.x().start();
306
307 // const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing
308 float const_x0 = M01 * y_cur + M02;
309 float const_y0 = M11 * y_cur + M12;
310
311 float x0 = start_x0 + const_x0;
312 float y0 = start_y0 + const_y0;
313
314 execute_window_loop(window, [&](const Coordinates & id)
315 {
316 // Check if we are processing a new row. If so, update the current row (y_cur), x0 and y0
317 if(y_cur != id.y())
318 {
319 y_cur = id.y();
320
321 const_x0 = M01 * y_cur + M02;
322 const_y0 = M11 * y_cur + M12;
323
324 x0 = start_x0 + const_x0;
325 y0 = start_y0 + const_y0;
326 }
327
328 // Only load from (x0, y0) if the point is within the valid region.
329 // Otherwise load from the edge of the valid region.
330 if((min_y <= y0) && (y0 < max_y) && (min_x <= x0) && (x0 < max_x))
331 {
332 switch(interpolation)
333 {
334 case InterpolationPolicy::NEAREST_NEIGHBOR:
335 *out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride);
336 break;
337 case InterpolationPolicy::BILINEAR:
338 *out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, x0, y0);
339 break;
340 default:
341 ARM_COMPUTE_ERROR("Interpolation not supported");
342 }
343 }
344 else
345 {
346 // Clamp coordinates
347 const auto xi = clamp<int>(x0, min_x, max_x - 1);
348 const auto yi = clamp<int>(y0, min_y, max_y - 1);
349
350 *out.ptr() = *(in.ptr() + xi + yi * stride);
351 }
352
353 x0 += M00;
354 y0 += M10;
355 },
356 in, out);
357}
358
359template <InterpolationPolicy interpolation>
360void NEWarpPerspectiveKernel<interpolation>::warp_undefined(const Window &window)
361{
362 // Don't increment in X and Y direction for the input tensor
363 // A pointer to the start of this plane is needed as base for the precomputed offsets
364 Window win_in(window);
365 win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
366 win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
367
368 Iterator in(_input, win_in);
369 Iterator out(_output, window);
370
371 const int min_x = _input->info()->valid_region().anchor[0];
372 const int max_x = min_x + _input->info()->valid_region().shape[0];
373 const int min_y = _input->info()->valid_region().anchor[1];
374 const int max_y = min_y + _input->info()->valid_region().shape[1];
375 const size_t stride = _input->info()->strides_in_bytes()[1];
376
377 // x0 = M00 * x + M01 * y + M02
378 // y0 = M10 * x + M11 * y + M12
379 // z0 = M20 * x + M21 * y + M22
380 // xn = x0 / z0
381 // yn = y0 / z0
382 const float M00 = _matrix[0];
383 const float M10 = _matrix[1];
384 const float M20 = _matrix[2];
385 const float M01 = _matrix[0 + 1 * 3];
386 const float M11 = _matrix[1 + 1 * 3];
387 const float M21 = _matrix[2 + 1 * 3];
388 const float M02 = _matrix[0 + 2 * 3];
389 const float M12 = _matrix[1 + 2 * 3];
390 const float M22 = _matrix[2 + 2 * 3];
391
392 // "M00 * x", "M10 * x" and "M20 * x", when x = window.x.start
393 const float start_x0 = M00 * window.x().start();
394 const float start_y0 = M10 * window.x().start();
395 const float start_z0 = M20 * window.x().start();
396
397 // Current row
Isabella Gottardi40ff03b2017-08-10 16:44:42 +0100398 int y_cur = window.y().start();
399 int z_cur = window.z().start();
400 int d3_cur = window[3].start();
401 int d4_cur = window[4].start();
402 int d5_cur = window[5].start();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100403
404 // const_x0, const_y0 and const_z0 are the constant parts of x0, y0 and z0 during the row processing
405 float const_x0 = M01 * y_cur + M02;
406 float const_y0 = M11 * y_cur + M12;
407 float const_z0 = M21 * y_cur + M22;
408
409 // Perspective warp coordinates
410 float x0 = start_x0 + const_x0;
411 float y0 = start_y0 + const_y0;
412 float z0 = start_z0 + const_z0;
413
414 execute_window_loop(window, [&](const Coordinates & id)
415 {
416 // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0
Isabella Gottardi40ff03b2017-08-10 16:44:42 +0100417 if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5]))
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100418 {
Isabella Gottardi40ff03b2017-08-10 16:44:42 +0100419 y_cur = id.y();
420 z_cur = id.z();
421 d3_cur = id[3];
422 d4_cur = id[4];
423 d5_cur = id[5];
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100424
425 const_x0 = M01 * y_cur + M02;
426 const_y0 = M11 * y_cur + M12;
427 const_z0 = M21 * y_cur + M22;
428
429 x0 = start_x0 + const_x0;
430 y0 = start_y0 + const_y0;
431 z0 = start_z0 + const_z0;
432 }
433
434 const float xn = x0 / z0;
435 const float yn = y0 / z0;
436
437 // Only write to output if xn and yn are within the valid region.
438 // Otherwise the read value would be undefined.
439 if((min_y <= yn) && (yn < max_y) && (min_x <= xn) && (xn < max_x))
440 {
441 switch(interpolation)
442 {
443 case InterpolationPolicy::NEAREST_NEIGHBOR:
444 *out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride);
445 break;
446 case InterpolationPolicy::BILINEAR:
447 *out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, xn, yn);
448 break;
449 default:
450 ARM_COMPUTE_ERROR("Interpolation not supported");
451 }
452 }
453
454 x0 += M00;
455 y0 += M10;
456 z0 += M20;
457 },
458 in, out);
459}
460
461template <InterpolationPolicy interpolation>
462void NEWarpPerspectiveKernel<interpolation>::warp_constant(const Window &window)
463{
464 // Don't increment in X and Y direction for the input tensor
465 // A pointer to the start of this plane is needed as base for the precomputed offsets
466 Window win_in(window);
467 win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
468 win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
469
470 Iterator in(_input, win_in);
471 Iterator out(_output, window);
472
473 const int min_x = _input->info()->valid_region().anchor[0];
474 const int max_x = min_x + _input->info()->valid_region().shape[0];
475 const int min_y = _input->info()->valid_region().anchor[1];
476 const int max_y = min_y + _input->info()->valid_region().shape[1];
477 const size_t stride = _input->info()->strides_in_bytes()[1];
478
479 // x0 = M00 * x + M01 * y + M02
480 // y0 = M10 * x + M11 * y + M12
481 // z0 = M20 * x + M21 * y + M22
482 // xn = x0 / z0
483 // yn = y0 / z0
484 const float M00 = _matrix[0];
485 const float M10 = _matrix[1];
486 const float M20 = _matrix[2];
487 const float M01 = _matrix[0 + 1 * 3];
488 const float M11 = _matrix[1 + 1 * 3];
489 const float M21 = _matrix[2 + 1 * 3];
490 const float M02 = _matrix[0 + 2 * 3];
491 const float M12 = _matrix[1 + 2 * 3];
492 const float M22 = _matrix[2 + 2 * 3];
493
494 // "M00 * x", "M10 * x" and "M20 * x", when x = window.x.start
495 const float start_x0 = M00 * window.x().start();
496 const float start_y0 = M10 * window.x().start();
497 const float start_z0 = M20 * window.x().start();
498
499 // Current row
Isabella Gottardi40ff03b2017-08-10 16:44:42 +0100500 int y_cur = window.y().start();
501 int z_cur = window.z().start();
502 int d3_cur = window[3].start();
503 int d4_cur = window[4].start();
504 int d5_cur = window[5].start();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100505
506 // const_x0, const_y0 and const_z0 are the constant parts of x0, y0 and z0 during the row processing
507 float const_x0 = M01 * y_cur + M02;
508 float const_y0 = M11 * y_cur + M12;
509 float const_z0 = M21 * y_cur + M22;
510
511 // Perspective warp coordinates
512 float x0 = start_x0 + const_x0;
513 float y0 = start_y0 + const_y0;
514 float z0 = start_z0 + const_z0;
515
516 execute_window_loop(window, [&](const Coordinates & id)
517 {
Isabella Gottardi40ff03b2017-08-10 16:44:42 +0100518 // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0
519 if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5]))
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100520 {
Isabella Gottardi40ff03b2017-08-10 16:44:42 +0100521 y_cur = id.y();
522 z_cur = id.z();
523 d3_cur = id[3];
524 d4_cur = id[4];
525 d5_cur = id[5];
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100526
527 const_x0 = M01 * y_cur + M02;
528 const_y0 = M11 * y_cur + M12;
529 const_z0 = M21 * y_cur + M22;
530
531 x0 = start_x0 + const_x0;
532 y0 = start_y0 + const_y0;
533 z0 = start_z0 + const_z0;
534 }
535
536 const float xn = x0 / z0;
537 const float yn = y0 / z0;
538
539 // Only use input values if xn and yn are within the valid region.
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100540 if((min_y <= yn) && (yn < max_y) && (min_x <= xn) && (xn < max_x))
541 {
542 switch(interpolation)
543 {
544 case InterpolationPolicy::NEAREST_NEIGHBOR:
545 *out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride);
546 break;
547 case InterpolationPolicy::BILINEAR:
548 *out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, xn, yn);
549 break;
550 default:
551 ARM_COMPUTE_ERROR("Interpolation not supported");
552 }
553 }
554 else
555 {
Isabella Gottardi62031532017-07-04 11:21:28 +0100556 switch(interpolation)
557 {
558 case InterpolationPolicy::NEAREST_NEIGHBOR:
559 *out.ptr() = _constant_border_value;
560 break;
561 case InterpolationPolicy::BILINEAR:
562 {
563 const auto xi = clamp<int>(std::floor(xn), min_x - 1, max_x);
564 const auto yi = clamp<int>(std::floor(yn), min_y - 1, max_y);
565 const auto xi_1 = clamp<int>(std::floor(xn + 1), min_x - 1, max_x);
566 const auto yi_1 = clamp<int>(std::floor(yn + 1), min_y - 1, max_y);
567
568 const float dx = xn - std::floor(xn);
569 const float dy = yn - std::floor(yn);
570 const float dx1 = 1.0f - dx;
571 const float dy1 = 1.0f - dy;
572
573 const float a00 = *(in.ptr() + xi + yi * stride);
574 const float a01 = *(in.ptr() + xi_1 + yi * stride);
575 const float a10 = *(in.ptr() + xi + yi_1 * stride);
576 const float a11 = *(in.ptr() + xi_1 + yi_1 * stride);
577
578 *out.ptr() = a00 * (dx1 * dy1) + a01 * (dx * dy1) + a10 * (dx1 * dy) + a11 * (dx * dy);
579 }
580 break;
581 default:
582 ARM_COMPUTE_ERROR("Interpolation not supported");
583 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100584 }
585
586 x0 += M00;
587 y0 += M10;
588 z0 += M20;
589 },
590 in, out);
591}
592
593template <InterpolationPolicy interpolation>
594void NEWarpPerspectiveKernel<interpolation>::warp_replicate(const Window &window)
595{
596 // Don't increment in X and Y direction for the input tensor
597 // A pointer to the start of this plane is needed as base for the precomputed offsets
598 Window win_in(window);
599 win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
600 win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
601
602 Iterator in(_input, win_in);
603 Iterator out(_output, window);
604
605 const int min_x = _input->info()->valid_region().anchor[0];
606 const int max_x = min_x + _input->info()->valid_region().shape[0];
607 const int min_y = _input->info()->valid_region().anchor[1];
608 const int max_y = min_y + _input->info()->valid_region().shape[1];
609 const size_t stride = _input->info()->strides_in_bytes()[1];
610
611 // Current row
Isabella Gottardi40ff03b2017-08-10 16:44:42 +0100612 int y_cur = window.y().start();
613 int z_cur = window.z().start();
614 int d3_cur = window[3].start();
615 int d4_cur = window[4].start();
616 int d5_cur = window[5].start();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100617
618 // x0 = M00 * x + M01 * y + M02
619 // y0 = M10 * x + M11 * y + M12
620 // z0 = M20 * x + M21 * y + M22
621 // xn = x0 / z0
622 // yn = y0 / z0
623 const float M00 = _matrix[0];
624 const float M10 = _matrix[1];
625 const float M20 = _matrix[2];
626 const float M01 = _matrix[0 + 1 * 3];
627 const float M11 = _matrix[1 + 1 * 3];
628 const float M21 = _matrix[2 + 1 * 3];
629 const float M02 = _matrix[0 + 2 * 3];
630 const float M12 = _matrix[1 + 2 * 3];
631 const float M22 = _matrix[2 + 2 * 3];
632
633 // "M00 * x", "M10 * x" and "M20 * x", when x = window.x.start
634 const float start_x0 = M00 * window.x().start();
635 const float start_y0 = M10 * window.x().start();
636 const float start_z0 = M20 * window.x().start();
637
638 // const_x0, const_y0 and const_z0 are the constant parts of x0, y0 and z0 during the row processing
639 float const_x0 = M01 * y_cur + M02;
640 float const_y0 = M11 * y_cur + M12;
641 float const_z0 = M21 * y_cur + M22;
642
643 // Perspective warp coordinates
644 float x0 = start_x0 + const_x0;
645 float y0 = start_y0 + const_y0;
646 float z0 = start_z0 + const_z0;
647
648 execute_window_loop(window, [&](const Coordinates & id)
649 {
Isabella Gottardi40ff03b2017-08-10 16:44:42 +0100650 // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0
651 if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5]))
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100652 {
Isabella Gottardi40ff03b2017-08-10 16:44:42 +0100653 y_cur = id.y();
654 z_cur = id.z();
655 d3_cur = id[3];
656 d4_cur = id[4];
657 d5_cur = id[5];
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100658
659 const_x0 = M01 * y_cur + M02;
660 const_y0 = M11 * y_cur + M12;
661 const_z0 = M21 * y_cur + M22;
662
663 x0 = start_x0 + const_x0;
664 y0 = start_y0 + const_y0;
665 z0 = start_z0 + const_z0;
666 }
667
668 const float xn = x0 / z0;
669 const float yn = y0 / z0;
670
671 // Only load from (x0, y0) if the point is within the valid region.
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100672 if((min_y <= yn) && (yn < max_y) && (min_x <= xn) && (xn < max_x))
673 {
674 switch(interpolation)
675 {
676 case InterpolationPolicy::NEAREST_NEIGHBOR:
677 *out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride);
678 break;
679 case InterpolationPolicy::BILINEAR:
680 *out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, xn, yn);
681 break;
682 default:
683 ARM_COMPUTE_ERROR("Interpolation not supported");
684 }
685 }
686 else
687 {
688 // Clamp coordinates
Isabella Gottardi62031532017-07-04 11:21:28 +0100689 const auto xi = clamp<int>(std::floor(xn), min_x, max_x - 1);
690 const auto yi = clamp<int>(std::floor(yn), min_y, max_y - 1);
691 switch(interpolation)
692 {
693 case InterpolationPolicy::NEAREST_NEIGHBOR:
694 *out.ptr() = *(in.ptr() + xi + yi * stride);
695 break;
696 case InterpolationPolicy::BILINEAR:
697 {
698 const auto xi_1 = clamp<int>(std::floor(xn + 1), min_x, max_x - 1);
699 const auto yi_1 = clamp<int>(std::floor(yn + 1), min_y, max_y - 1);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100700
Isabella Gottardi62031532017-07-04 11:21:28 +0100701 const float dx = xn - std::floor(xn);
702 const float dy = yn - std::floor(yn);
703 const float dx1 = 1.0f - dx;
704 const float dy1 = 1.0f - dy;
705
706 const float a00 = *(in.ptr() + xi + yi * stride);
707 const float a01 = *(in.ptr() + xi_1 + yi * stride);
708 const float a10 = *(in.ptr() + xi + yi_1 * stride);
709 const float a11 = *(in.ptr() + xi_1 + yi_1 * stride);
710
711 *out.ptr() = a00 * (dx1 * dy1) + a01 * (dx * dy1) + a10 * (dx1 * dy) + a11 * (dx * dy);
712 }
713 break;
714 default:
715 ARM_COMPUTE_ERROR("Interpolation not supported");
716 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100717 }
718
719 x0 += M00;
720 y0 += M10;
721 z0 += M20;
722 },
723 in, out);
724}
725
726template class arm_compute::NEWarpAffineKernel<InterpolationPolicy::NEAREST_NEIGHBOR>;
727template class arm_compute::NEWarpAffineKernel<InterpolationPolicy::BILINEAR>;
728template class arm_compute::NEWarpPerspectiveKernel<InterpolationPolicy::NEAREST_NEIGHBOR>;
729template class arm_compute::NEWarpPerspectiveKernel<InterpolationPolicy::BILINEAR>;