blob: 5ca1395b472b108e192e1f5ee6d9f4714319d356 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2016, 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEWarpKernel.h"
25
26#include "arm_compute/core/AccessWindowStatic.h"
27#include "arm_compute/core/Coordinates.h"
28#include "arm_compute/core/Error.h"
29#include "arm_compute/core/Helpers.h"
30#include "arm_compute/core/ITensor.h"
31#include "arm_compute/core/TensorInfo.h"
32#include "arm_compute/core/Validate.h"
33#include "arm_compute/core/Window.h"
34
35#include <cstddef>
36
37using namespace arm_compute;
38
39namespace
40{
41inline uint8_t nearest_interpolation(const uint8_t *in_ptr, int x, int y, size_t stride)
42{
43 return in_ptr[x + y * stride];
44}
45} // namespace
46
47INEWarpKernel::INEWarpKernel()
48 : _func(nullptr), _input(nullptr), _output(nullptr), _constant_border_value(0), _matrix(nullptr)
49{
50}
51
Isabella Gottardif9bae2e2017-07-28 17:24:08 +010052BorderSize INEWarpKernel::border_size() const
53{
54 return BorderSize(1);
55}
56
Anthony Barbier6ff3b192017-09-04 18:44:23 +010057void INEWarpKernel::run(const Window &window)
58{
59 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
60 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
61 ARM_COMPUTE_ERROR_ON(_func == nullptr);
62
63 (this->*_func)(window);
64}
65
66void INEWarpKernel::configure(const ITensor *input, ITensor *output, const float *matrix, BorderMode border_mode, uint8_t constant_border_value)
67{
68 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
69 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
70 ARM_COMPUTE_ERROR_ON(nullptr == matrix);
71
72 _matrix = matrix;
73 _constant_border_value = constant_border_value;
74
75 switch(border_mode)
76 {
77 case BorderMode::UNDEFINED:
78 _func = &INEWarpKernel::warp_undefined;
79 break;
80 case BorderMode::CONSTANT:
81 _func = &INEWarpKernel::warp_constant;
82 break;
83 case BorderMode::REPLICATE:
84 _func = &INEWarpKernel::warp_replicate;
85 break;
86 default:
87 ARM_COMPUTE_ERROR("Border mode not supported");
88 break;
89 }
90
91 _input = input;
92 _output = output;
93
94 // Configure kernel window
95 Window win = calculate_max_window(*output->info(), Steps(1U));
96
97 const ValidRegion &input_valid_region = input->info()->valid_region();
98
99 // Reads can occur within the valid region of the input
100 AccessWindowStatic input_access(input->info(),
Isabella Gottardif9bae2e2017-07-28 17:24:08 +0100101 input_valid_region.anchor[0] - border_size().left, input_valid_region.anchor[1] - border_size().top,
102 input_valid_region.anchor[0] + input_valid_region.shape[0] + border_size().right,
103 input_valid_region.anchor[1] + input_valid_region.shape[1] + border_size().bottom);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100104 AccessWindowHorizontal output_access(output->info(), 0, 1);
105
106 update_window_and_padding(win, input_access, output_access);
107
108 output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
109
110 INEKernel::configure(win);
111}
112
113template <InterpolationPolicy interpolation>
114void NEWarpAffineKernel<interpolation>::warp_undefined(const Window &window)
115{
116 // Don't increment in X and Y direction for the input tensor
117 // A pointer to the start of this plane is needed as base for the precomputed offsets
118 Window win_in(window);
119 win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
120 win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
121
122 Iterator in(_input, win_in);
123 Iterator out(_output, window);
124
125 const int min_x = _input->info()->valid_region().anchor[0];
126 const int max_x = min_x + _input->info()->valid_region().shape[0];
127 const int min_y = _input->info()->valid_region().anchor[1];
128 const int max_y = min_y + _input->info()->valid_region().shape[1];
129 const size_t stride = _input->info()->strides_in_bytes()[1];
130
131 // x0 = M01 * x + M01 * y + M02
132 // y0 = M11 * x + M11 * y + M12
133 const float M00 = _matrix[0];
134 const float M10 = _matrix[1];
135 const float M01 = _matrix[0 + 1 * 2];
136 const float M11 = _matrix[1 + 1 * 2];
137 const float M02 = _matrix[0 + 2 * 2];
138 const float M12 = _matrix[1 + 2 * 2];
139
140 // "M00 * x" and "M10 * x", when x = window.x.start
141 const float start_x0 = M00 * window.x().start();
142 const float start_y0 = M10 * window.x().start();
143
144 // Current row
145 int y_cur = window.y().start();
146
147 // const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing
148 float const_x0 = M01 * y_cur + M02;
149 float const_y0 = M11 * y_cur + M12;
150
151 // Affine warp coordinates
152 float x0 = start_x0 + const_x0;
153 float y0 = start_y0 + const_y0;
154
155 execute_window_loop(window, [&](const Coordinates & id)
156 {
157 // Check if we are processing a new row. If so, update the current row (y_cur), x0 and y0
158 if(y_cur != id.y())
159 {
160 y_cur = id.y();
161
162 const_x0 = M01 * y_cur + M02;
163 const_y0 = M11 * y_cur + M12;
164
165 x0 = start_x0 + const_x0;
166 y0 = start_y0 + const_y0;
167 }
168
169 // Only write to output if x0 and y0 are within the valid region.
170 // Otherwise the read value would be undefined.
171 if((min_y <= y0) && (y0 < max_y) && (min_x <= x0) && (x0 < max_x))
172 {
173 switch(interpolation)
174 {
175 case InterpolationPolicy::NEAREST_NEIGHBOR:
176 *out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride);
177 break;
178 case InterpolationPolicy::BILINEAR:
179 *out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, x0, y0);
180 break;
181 default:
182 ARM_COMPUTE_ERROR("Interpolation not supported");
183 }
184 }
185
186 x0 += M00;
187 y0 += M10;
188 },
189 in, out);
190}
191
192template <InterpolationPolicy interpolation>
193void NEWarpAffineKernel<interpolation>::warp_constant(const Window &window)
194{
195 // Don't increment in X and Y direction for the input tensor
196 // A pointer to the start of this plane is needed as base for the precomputed offsets
197 Window win_in(window);
198 win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
199 win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
200
201 Iterator in(_input, win_in);
202 Iterator out(_output, window);
203
204 const int min_x = _input->info()->valid_region().anchor[0];
205 const int max_x = min_x + _input->info()->valid_region().shape[0];
206 const int min_y = _input->info()->valid_region().anchor[1];
207 const int max_y = min_y + _input->info()->valid_region().shape[1];
208 const size_t stride = _input->info()->strides_in_bytes()[1];
209
210 // x0 = M01 * x + M01 * y + M02
211 // y0 = M11 * x + M11 * y + M12
212 const float M00 = _matrix[0];
213 const float M10 = _matrix[1];
214 const float M01 = _matrix[0 + 1 * 2];
215 const float M11 = _matrix[1 + 1 * 2];
216 const float M02 = _matrix[0 + 2 * 2];
217 const float M12 = _matrix[1 + 2 * 2];
218
219 // "M00 * x" and "M10 * x", when x = window.x.start
220 const float start_x0 = M00 * window.x().start();
221 const float start_y0 = M10 * window.x().start();
222
223 // Current row
224 int y_cur = window.y().start();
225
226 // const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing
227 float const_x0 = M01 * y_cur + M02;
228 float const_y0 = M11 * y_cur + M12;
229
230 // Affine warp coordinates
231 float x0 = start_x0 + const_x0;
232 float y0 = start_y0 + const_y0;
233
234 execute_window_loop(window, [&](const Coordinates & id)
235 {
236 // Check if we are processing a new row. If so, update the current row (y_cur), x0 and y0
237 if(y_cur != id.y())
238 {
239 y_cur = id.y();
240
241 const_x0 = M01 * y_cur + M02;
242 const_y0 = M11 * y_cur + M12;
243
244 x0 = start_x0 + const_x0;
245 y0 = start_y0 + const_y0;
246 }
247
248 // Only use input values if x0 and y0 are within the valid region.
249 // Otherwise write the constant border value.
250 if((min_y <= y0) && (y0 < max_y) && (min_x <= x0) && (x0 < max_x))
251 {
252 switch(interpolation)
253 {
254 case InterpolationPolicy::NEAREST_NEIGHBOR:
255 *out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride);
256 break;
257 case InterpolationPolicy::BILINEAR:
258 *out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, x0, y0);
259 break;
260 default:
261 ARM_COMPUTE_ERROR("Interpolation not supported");
262 }
263 }
264 else
265 {
266 *out.ptr() = _constant_border_value;
267 }
268
269 x0 += M00;
270 y0 += M10;
271 },
272 in, out);
273}
274
275template <InterpolationPolicy interpolation>
276void NEWarpAffineKernel<interpolation>::warp_replicate(const Window &window)
277{
278 // Don't increment in X and Y direction for the input tensor
279 // A pointer to the start of this plane is needed as base for the precomputed offsets
280 Window win_in(window);
281 win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
282 win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
283
284 Iterator in(_input, win_in);
285 Iterator out(_output, window);
286
287 const int min_x = _input->info()->valid_region().anchor[0];
288 const int max_x = min_x + _input->info()->valid_region().shape[0];
289 const int min_y = _input->info()->valid_region().anchor[1];
290 const int max_y = min_y + _input->info()->valid_region().shape[1];
291 const size_t stride = _input->info()->strides_in_bytes()[1];
292
293 // Current row
294 int y_cur = window.y().start();
295
296 const float M00 = _matrix[0];
297 const float M10 = _matrix[1];
298 const float M01 = _matrix[0 + 1 * 2];
299 const float M11 = _matrix[1 + 1 * 2];
300 const float M02 = _matrix[0 + 2 * 2];
301 const float M12 = _matrix[1 + 2 * 2];
302
303 // "M00 * x" and "M10 * x", when x = window.x.start
304 const float start_x0 = M00 * window.x().start();
305 const float start_y0 = M10 * window.x().start();
306
307 // const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing
308 float const_x0 = M01 * y_cur + M02;
309 float const_y0 = M11 * y_cur + M12;
310
311 float x0 = start_x0 + const_x0;
312 float y0 = start_y0 + const_y0;
313
314 execute_window_loop(window, [&](const Coordinates & id)
315 {
316 // Check if we are processing a new row. If so, update the current row (y_cur), x0 and y0
317 if(y_cur != id.y())
318 {
319 y_cur = id.y();
320
321 const_x0 = M01 * y_cur + M02;
322 const_y0 = M11 * y_cur + M12;
323
324 x0 = start_x0 + const_x0;
325 y0 = start_y0 + const_y0;
326 }
327
328 // Only load from (x0, y0) if the point is within the valid region.
329 // Otherwise load from the edge of the valid region.
330 if((min_y <= y0) && (y0 < max_y) && (min_x <= x0) && (x0 < max_x))
331 {
332 switch(interpolation)
333 {
334 case InterpolationPolicy::NEAREST_NEIGHBOR:
335 *out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride);
336 break;
337 case InterpolationPolicy::BILINEAR:
338 *out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, x0, y0);
339 break;
340 default:
341 ARM_COMPUTE_ERROR("Interpolation not supported");
342 }
343 }
344 else
345 {
346 // Clamp coordinates
347 const auto xi = clamp<int>(x0, min_x, max_x - 1);
348 const auto yi = clamp<int>(y0, min_y, max_y - 1);
349
350 *out.ptr() = *(in.ptr() + xi + yi * stride);
351 }
352
353 x0 += M00;
354 y0 += M10;
355 },
356 in, out);
357}
358
359template <InterpolationPolicy interpolation>
360void NEWarpPerspectiveKernel<interpolation>::warp_undefined(const Window &window)
361{
362 // Don't increment in X and Y direction for the input tensor
363 // A pointer to the start of this plane is needed as base for the precomputed offsets
364 Window win_in(window);
365 win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
366 win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
367
368 Iterator in(_input, win_in);
369 Iterator out(_output, window);
370
371 const int min_x = _input->info()->valid_region().anchor[0];
372 const int max_x = min_x + _input->info()->valid_region().shape[0];
373 const int min_y = _input->info()->valid_region().anchor[1];
374 const int max_y = min_y + _input->info()->valid_region().shape[1];
375 const size_t stride = _input->info()->strides_in_bytes()[1];
376
377 // x0 = M00 * x + M01 * y + M02
378 // y0 = M10 * x + M11 * y + M12
379 // z0 = M20 * x + M21 * y + M22
380 // xn = x0 / z0
381 // yn = y0 / z0
382 const float M00 = _matrix[0];
383 const float M10 = _matrix[1];
384 const float M20 = _matrix[2];
385 const float M01 = _matrix[0 + 1 * 3];
386 const float M11 = _matrix[1 + 1 * 3];
387 const float M21 = _matrix[2 + 1 * 3];
388 const float M02 = _matrix[0 + 2 * 3];
389 const float M12 = _matrix[1 + 2 * 3];
390 const float M22 = _matrix[2 + 2 * 3];
391
392 // "M00 * x", "M10 * x" and "M20 * x", when x = window.x.start
393 const float start_x0 = M00 * window.x().start();
394 const float start_y0 = M10 * window.x().start();
395 const float start_z0 = M20 * window.x().start();
396
397 // Current row
398 int y_cur = window.y().start();
399
400 // const_x0, const_y0 and const_z0 are the constant parts of x0, y0 and z0 during the row processing
401 float const_x0 = M01 * y_cur + M02;
402 float const_y0 = M11 * y_cur + M12;
403 float const_z0 = M21 * y_cur + M22;
404
405 // Perspective warp coordinates
406 float x0 = start_x0 + const_x0;
407 float y0 = start_y0 + const_y0;
408 float z0 = start_z0 + const_z0;
409
410 execute_window_loop(window, [&](const Coordinates & id)
411 {
412 // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0
413 if(y_cur != id.y())
414 {
415 y_cur = id.y();
416
417 const_x0 = M01 * y_cur + M02;
418 const_y0 = M11 * y_cur + M12;
419 const_z0 = M21 * y_cur + M22;
420
421 x0 = start_x0 + const_x0;
422 y0 = start_y0 + const_y0;
423 z0 = start_z0 + const_z0;
424 }
425
426 const float xn = x0 / z0;
427 const float yn = y0 / z0;
428
429 // Only write to output if xn and yn are within the valid region.
430 // Otherwise the read value would be undefined.
431 if((min_y <= yn) && (yn < max_y) && (min_x <= xn) && (xn < max_x))
432 {
433 switch(interpolation)
434 {
435 case InterpolationPolicy::NEAREST_NEIGHBOR:
436 *out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride);
437 break;
438 case InterpolationPolicy::BILINEAR:
439 *out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, xn, yn);
440 break;
441 default:
442 ARM_COMPUTE_ERROR("Interpolation not supported");
443 }
444 }
445
446 x0 += M00;
447 y0 += M10;
448 z0 += M20;
449 },
450 in, out);
451}
452
453template <InterpolationPolicy interpolation>
454void NEWarpPerspectiveKernel<interpolation>::warp_constant(const Window &window)
455{
456 // Don't increment in X and Y direction for the input tensor
457 // A pointer to the start of this plane is needed as base for the precomputed offsets
458 Window win_in(window);
459 win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
460 win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
461
462 Iterator in(_input, win_in);
463 Iterator out(_output, window);
464
465 const int min_x = _input->info()->valid_region().anchor[0];
466 const int max_x = min_x + _input->info()->valid_region().shape[0];
467 const int min_y = _input->info()->valid_region().anchor[1];
468 const int max_y = min_y + _input->info()->valid_region().shape[1];
469 const size_t stride = _input->info()->strides_in_bytes()[1];
470
471 // x0 = M00 * x + M01 * y + M02
472 // y0 = M10 * x + M11 * y + M12
473 // z0 = M20 * x + M21 * y + M22
474 // xn = x0 / z0
475 // yn = y0 / z0
476 const float M00 = _matrix[0];
477 const float M10 = _matrix[1];
478 const float M20 = _matrix[2];
479 const float M01 = _matrix[0 + 1 * 3];
480 const float M11 = _matrix[1 + 1 * 3];
481 const float M21 = _matrix[2 + 1 * 3];
482 const float M02 = _matrix[0 + 2 * 3];
483 const float M12 = _matrix[1 + 2 * 3];
484 const float M22 = _matrix[2 + 2 * 3];
485
486 // "M00 * x", "M10 * x" and "M20 * x", when x = window.x.start
487 const float start_x0 = M00 * window.x().start();
488 const float start_y0 = M10 * window.x().start();
489 const float start_z0 = M20 * window.x().start();
490
491 // Current row
492 int y_cur = window.y().start();
493
494 // const_x0, const_y0 and const_z0 are the constant parts of x0, y0 and z0 during the row processing
495 float const_x0 = M01 * y_cur + M02;
496 float const_y0 = M11 * y_cur + M12;
497 float const_z0 = M21 * y_cur + M22;
498
499 // Perspective warp coordinates
500 float x0 = start_x0 + const_x0;
501 float y0 = start_y0 + const_y0;
502 float z0 = start_z0 + const_z0;
503
504 execute_window_loop(window, [&](const Coordinates & id)
505 {
506 // Check if we are processing a new row. If so, update the current row (y_cur), x0, y0 and z0
507 if(y_cur != id.y())
508 {
509 y_cur = id.y();
510
511 const_x0 = M01 * y_cur + M02;
512 const_y0 = M11 * y_cur + M12;
513 const_z0 = M21 * y_cur + M22;
514
515 x0 = start_x0 + const_x0;
516 y0 = start_y0 + const_y0;
517 z0 = start_z0 + const_z0;
518 }
519
520 const float xn = x0 / z0;
521 const float yn = y0 / z0;
522
523 // Only use input values if xn and yn are within the valid region.
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100524 if((min_y <= yn) && (yn < max_y) && (min_x <= xn) && (xn < max_x))
525 {
526 switch(interpolation)
527 {
528 case InterpolationPolicy::NEAREST_NEIGHBOR:
529 *out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride);
530 break;
531 case InterpolationPolicy::BILINEAR:
532 *out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, xn, yn);
533 break;
534 default:
535 ARM_COMPUTE_ERROR("Interpolation not supported");
536 }
537 }
538 else
539 {
Isabella Gottardi62031532017-07-04 11:21:28 +0100540 switch(interpolation)
541 {
542 case InterpolationPolicy::NEAREST_NEIGHBOR:
543 *out.ptr() = _constant_border_value;
544 break;
545 case InterpolationPolicy::BILINEAR:
546 {
547 const auto xi = clamp<int>(std::floor(xn), min_x - 1, max_x);
548 const auto yi = clamp<int>(std::floor(yn), min_y - 1, max_y);
549 const auto xi_1 = clamp<int>(std::floor(xn + 1), min_x - 1, max_x);
550 const auto yi_1 = clamp<int>(std::floor(yn + 1), min_y - 1, max_y);
551
552 const float dx = xn - std::floor(xn);
553 const float dy = yn - std::floor(yn);
554 const float dx1 = 1.0f - dx;
555 const float dy1 = 1.0f - dy;
556
557 const float a00 = *(in.ptr() + xi + yi * stride);
558 const float a01 = *(in.ptr() + xi_1 + yi * stride);
559 const float a10 = *(in.ptr() + xi + yi_1 * stride);
560 const float a11 = *(in.ptr() + xi_1 + yi_1 * stride);
561
562 *out.ptr() = a00 * (dx1 * dy1) + a01 * (dx * dy1) + a10 * (dx1 * dy) + a11 * (dx * dy);
563 }
564 break;
565 default:
566 ARM_COMPUTE_ERROR("Interpolation not supported");
567 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100568 }
569
570 x0 += M00;
571 y0 += M10;
572 z0 += M20;
573 },
574 in, out);
575}
576
577template <InterpolationPolicy interpolation>
578void NEWarpPerspectiveKernel<interpolation>::warp_replicate(const Window &window)
579{
580 // Don't increment in X and Y direction for the input tensor
581 // A pointer to the start of this plane is needed as base for the precomputed offsets
582 Window win_in(window);
583 win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
584 win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
585
586 Iterator in(_input, win_in);
587 Iterator out(_output, window);
588
589 const int min_x = _input->info()->valid_region().anchor[0];
590 const int max_x = min_x + _input->info()->valid_region().shape[0];
591 const int min_y = _input->info()->valid_region().anchor[1];
592 const int max_y = min_y + _input->info()->valid_region().shape[1];
593 const size_t stride = _input->info()->strides_in_bytes()[1];
594
595 // Current row
596 int y_cur = window.y().start();
597
598 // x0 = M00 * x + M01 * y + M02
599 // y0 = M10 * x + M11 * y + M12
600 // z0 = M20 * x + M21 * y + M22
601 // xn = x0 / z0
602 // yn = y0 / z0
603 const float M00 = _matrix[0];
604 const float M10 = _matrix[1];
605 const float M20 = _matrix[2];
606 const float M01 = _matrix[0 + 1 * 3];
607 const float M11 = _matrix[1 + 1 * 3];
608 const float M21 = _matrix[2 + 1 * 3];
609 const float M02 = _matrix[0 + 2 * 3];
610 const float M12 = _matrix[1 + 2 * 3];
611 const float M22 = _matrix[2 + 2 * 3];
612
613 // "M00 * x", "M10 * x" and "M20 * x", when x = window.x.start
614 const float start_x0 = M00 * window.x().start();
615 const float start_y0 = M10 * window.x().start();
616 const float start_z0 = M20 * window.x().start();
617
618 // const_x0, const_y0 and const_z0 are the constant parts of x0, y0 and z0 during the row processing
619 float const_x0 = M01 * y_cur + M02;
620 float const_y0 = M11 * y_cur + M12;
621 float const_z0 = M21 * y_cur + M22;
622
623 // Perspective warp coordinates
624 float x0 = start_x0 + const_x0;
625 float y0 = start_y0 + const_y0;
626 float z0 = start_z0 + const_z0;
627
628 execute_window_loop(window, [&](const Coordinates & id)
629 {
630 // Check if we are processing a new row. If so, update the current row (y_cur), x0, y0 and z0
631 if(y_cur != id.y())
632 {
633 y_cur = id.y();
634
635 const_x0 = M01 * y_cur + M02;
636 const_y0 = M11 * y_cur + M12;
637 const_z0 = M21 * y_cur + M22;
638
639 x0 = start_x0 + const_x0;
640 y0 = start_y0 + const_y0;
641 z0 = start_z0 + const_z0;
642 }
643
644 const float xn = x0 / z0;
645 const float yn = y0 / z0;
646
647 // Only load from (x0, y0) if the point is within the valid region.
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100648 if((min_y <= yn) && (yn < max_y) && (min_x <= xn) && (xn < max_x))
649 {
650 switch(interpolation)
651 {
652 case InterpolationPolicy::NEAREST_NEIGHBOR:
653 *out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride);
654 break;
655 case InterpolationPolicy::BILINEAR:
656 *out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, xn, yn);
657 break;
658 default:
659 ARM_COMPUTE_ERROR("Interpolation not supported");
660 }
661 }
662 else
663 {
664 // Clamp coordinates
Isabella Gottardi62031532017-07-04 11:21:28 +0100665 const auto xi = clamp<int>(std::floor(xn), min_x, max_x - 1);
666 const auto yi = clamp<int>(std::floor(yn), min_y, max_y - 1);
667 switch(interpolation)
668 {
669 case InterpolationPolicy::NEAREST_NEIGHBOR:
670 *out.ptr() = *(in.ptr() + xi + yi * stride);
671 break;
672 case InterpolationPolicy::BILINEAR:
673 {
674 const auto xi_1 = clamp<int>(std::floor(xn + 1), min_x, max_x - 1);
675 const auto yi_1 = clamp<int>(std::floor(yn + 1), min_y, max_y - 1);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100676
Isabella Gottardi62031532017-07-04 11:21:28 +0100677 const float dx = xn - std::floor(xn);
678 const float dy = yn - std::floor(yn);
679 const float dx1 = 1.0f - dx;
680 const float dy1 = 1.0f - dy;
681
682 const float a00 = *(in.ptr() + xi + yi * stride);
683 const float a01 = *(in.ptr() + xi_1 + yi * stride);
684 const float a10 = *(in.ptr() + xi + yi_1 * stride);
685 const float a11 = *(in.ptr() + xi_1 + yi_1 * stride);
686
687 *out.ptr() = a00 * (dx1 * dy1) + a01 * (dx * dy1) + a10 * (dx1 * dy) + a11 * (dx * dy);
688 }
689 break;
690 default:
691 ARM_COMPUTE_ERROR("Interpolation not supported");
692 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100693 }
694
695 x0 += M00;
696 y0 += M10;
697 z0 += M20;
698 },
699 in, out);
700}
701
702template class arm_compute::NEWarpAffineKernel<InterpolationPolicy::NEAREST_NEIGHBOR>;
703template class arm_compute::NEWarpAffineKernel<InterpolationPolicy::BILINEAR>;
704template class arm_compute::NEWarpPerspectiveKernel<InterpolationPolicy::NEAREST_NEIGHBOR>;
705template class arm_compute::NEWarpPerspectiveKernel<InterpolationPolicy::BILINEAR>;