Blame - src/core/NEON/kernels/NEWarpKernel.cpp - ml/ComputeLibrary

blob: 6c90a334af4789422c6241781ed9b1a721ccc59a [file] [log] [blame]

Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1	/*
				2	* Copyright (c) 2016, 2017 ARM Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "arm_compute/core/NEON/kernels/NEWarpKernel.h"
				25
				26	#include "arm_compute/core/AccessWindowStatic.h"
				27	#include "arm_compute/core/Coordinates.h"
				28	#include "arm_compute/core/Error.h"
				29	#include "arm_compute/core/Helpers.h"
				30	#include "arm_compute/core/ITensor.h"
				31	#include "arm_compute/core/TensorInfo.h"
				32	#include "arm_compute/core/Validate.h"
				33	#include "arm_compute/core/Window.h"
				34
				35	#include <cstddef>
				36
				37	using namespace arm_compute;
				38
				39	namespace
				40	{
				41	inline uint8_t nearest_interpolation(const uint8_t *in_ptr, int x, int y, size_t stride)
				42	{
				43	return in_ptr[x + y * stride];
				44	}
				45	} // namespace
				46
				47	INEWarpKernel::INEWarpKernel()
				48	: _func(nullptr), _input(nullptr), _output(nullptr), _constant_border_value(0), _matrix(nullptr)
				49	{
				50	}
				51
				52	void INEWarpKernel::run(const Window &window)
				53	{
				54	ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
				55	ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
				56	ARM_COMPUTE_ERROR_ON(_func == nullptr);
				57
				58	(this->*_func)(window);
				59	}
				60
				61	void INEWarpKernel::configure(const ITensor input, ITensor output, const float *matrix, BorderMode border_mode, uint8_t constant_border_value)
				62	{
				63	ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
				64	ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
				65	ARM_COMPUTE_ERROR_ON(nullptr == matrix);
				66
				67	_matrix = matrix;
				68	_constant_border_value = constant_border_value;
				69
				70	switch(border_mode)
				71	{
				72	case BorderMode::UNDEFINED:
				73	_func = &INEWarpKernel::warp_undefined;
				74	break;
				75	case BorderMode::CONSTANT:
				76	_func = &INEWarpKernel::warp_constant;
				77	break;
				78	case BorderMode::REPLICATE:
				79	_func = &INEWarpKernel::warp_replicate;
				80	break;
				81	default:
				82	ARM_COMPUTE_ERROR("Border mode not supported");
				83	break;
				84	}
				85
				86	_input = input;
				87	_output = output;
				88
				89	// Configure kernel window
				90	Window win = calculate_max_window(*output->info(), Steps(1U));
				91
				92	const ValidRegion &input_valid_region = input->info()->valid_region();
				93
				94	// Reads can occur within the valid region of the input
				95	AccessWindowStatic input_access(input->info(),
				96	input_valid_region.anchor[0], input_valid_region.anchor[1],
				97	input_valid_region.anchor[0] + input_valid_region.shape[0],
				98	input_valid_region.anchor[1] + input_valid_region.shape[1]);
				99	AccessWindowHorizontal output_access(output->info(), 0, 1);
				100
				101	update_window_and_padding(win, input_access, output_access);
				102
				103	output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
				104
				105	INEKernel::configure(win);
				106	}
				107
				108	template <InterpolationPolicy interpolation>
				109	void NEWarpAffineKernel<interpolation>::warp_undefined(const Window &window)
				110	{
				111	// Don't increment in X and Y direction for the input tensor
				112	// A pointer to the start of this plane is needed as base for the precomputed offsets
				113	Window win_in(window);
				114	win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
				115	win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
				116
				117	Iterator in(_input, win_in);
				118	Iterator out(_output, window);
				119
				120	const int min_x = _input->info()->valid_region().anchor[0];
				121	const int max_x = min_x + _input->info()->valid_region().shape[0];
				122	const int min_y = _input->info()->valid_region().anchor[1];
				123	const int max_y = min_y + _input->info()->valid_region().shape[1];
				124	const size_t stride = _input->info()->strides_in_bytes()[1];
				125
				126	// x0 = M01 * x + M01 * y + M02
				127	// y0 = M11 * x + M11 * y + M12
				128	const float M00 = _matrix[0];
				129	const float M10 = _matrix[1];
				130	const float M01 = _matrix[0 + 1 * 2];
				131	const float M11 = _matrix[1 + 1 * 2];
				132	const float M02 = _matrix[0 + 2 * 2];
				133	const float M12 = _matrix[1 + 2 * 2];
				134
				135	// "M00 * x" and "M10 * x", when x = window.x.start
				136	const float start_x0 = M00 * window.x().start();
				137	const float start_y0 = M10 * window.x().start();
				138
				139	// Current row
				140	int y_cur = window.y().start();
				141
				142	// const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing
				143	float const_x0 = M01 * y_cur + M02;
				144	float const_y0 = M11 * y_cur + M12;
				145
				146	// Affine warp coordinates
				147	float x0 = start_x0 + const_x0;
				148	float y0 = start_y0 + const_y0;
				149
				150	execute_window_loop(window, [&](const Coordinates & id)
				151	{
				152	// Check if we are processing a new row. If so, update the current row (y_cur), x0 and y0
				153	if(y_cur != id.y())
				154	{
				155	y_cur = id.y();
				156
				157	const_x0 = M01 * y_cur + M02;
				158	const_y0 = M11 * y_cur + M12;
				159
				160	x0 = start_x0 + const_x0;
				161	y0 = start_y0 + const_y0;
				162	}
				163
				164	// Only write to output if x0 and y0 are within the valid region.
				165	// Otherwise the read value would be undefined.
				166	if((min_y <= y0) && (y0 < max_y) && (min_x <= x0) && (x0 < max_x))
				167	{
				168	switch(interpolation)
				169	{
				170	case InterpolationPolicy::NEAREST_NEIGHBOR:
				171	*out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride);
				172	break;
				173	case InterpolationPolicy::BILINEAR:
				174	*out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, x0, y0);
				175	break;
				176	default:
				177	ARM_COMPUTE_ERROR("Interpolation not supported");
				178	}
				179	}
				180
				181	x0 += M00;
				182	y0 += M10;
				183	},
				184	in, out);
				185	}
				186
				187	template <InterpolationPolicy interpolation>
				188	void NEWarpAffineKernel<interpolation>::warp_constant(const Window &window)
				189	{
				190	// Don't increment in X and Y direction for the input tensor
				191	// A pointer to the start of this plane is needed as base for the precomputed offsets
				192	Window win_in(window);
				193	win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
				194	win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
				195
				196	Iterator in(_input, win_in);
				197	Iterator out(_output, window);
				198
				199	const int min_x = _input->info()->valid_region().anchor[0];
				200	const int max_x = min_x + _input->info()->valid_region().shape[0];
				201	const int min_y = _input->info()->valid_region().anchor[1];
				202	const int max_y = min_y + _input->info()->valid_region().shape[1];
				203	const size_t stride = _input->info()->strides_in_bytes()[1];
				204
				205	// x0 = M01 * x + M01 * y + M02
				206	// y0 = M11 * x + M11 * y + M12
				207	const float M00 = _matrix[0];
				208	const float M10 = _matrix[1];
				209	const float M01 = _matrix[0 + 1 * 2];
				210	const float M11 = _matrix[1 + 1 * 2];
				211	const float M02 = _matrix[0 + 2 * 2];
				212	const float M12 = _matrix[1 + 2 * 2];
				213
				214	// "M00 * x" and "M10 * x", when x = window.x.start
				215	const float start_x0 = M00 * window.x().start();
				216	const float start_y0 = M10 * window.x().start();
				217
				218	// Current row
				219	int y_cur = window.y().start();
				220
				221	// const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing
				222	float const_x0 = M01 * y_cur + M02;
				223	float const_y0 = M11 * y_cur + M12;
				224
				225	// Affine warp coordinates
				226	float x0 = start_x0 + const_x0;
				227	float y0 = start_y0 + const_y0;
				228
				229	execute_window_loop(window, [&](const Coordinates & id)
				230	{
				231	// Check if we are processing a new row. If so, update the current row (y_cur), x0 and y0
				232	if(y_cur != id.y())
				233	{
				234	y_cur = id.y();
				235
				236	const_x0 = M01 * y_cur + M02;
				237	const_y0 = M11 * y_cur + M12;
				238
				239	x0 = start_x0 + const_x0;
				240	y0 = start_y0 + const_y0;
				241	}
				242
				243	// Only use input values if x0 and y0 are within the valid region.
				244	// Otherwise write the constant border value.
				245	if((min_y <= y0) && (y0 < max_y) && (min_x <= x0) && (x0 < max_x))
				246	{
				247	switch(interpolation)
				248	{
				249	case InterpolationPolicy::NEAREST_NEIGHBOR:
				250	*out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride);
				251	break;
				252	case InterpolationPolicy::BILINEAR:
				253	*out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, x0, y0);
				254	break;
				255	default:
				256	ARM_COMPUTE_ERROR("Interpolation not supported");
				257	}
				258	}
				259	else
				260	{
				261	*out.ptr() = _constant_border_value;
				262	}
				263
				264	x0 += M00;
				265	y0 += M10;
				266	},
				267	in, out);
				268	}
				269
				270	template <InterpolationPolicy interpolation>
				271	void NEWarpAffineKernel<interpolation>::warp_replicate(const Window &window)
				272	{
				273	// Don't increment in X and Y direction for the input tensor
				274	// A pointer to the start of this plane is needed as base for the precomputed offsets
				275	Window win_in(window);
				276	win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
				277	win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
				278
				279	Iterator in(_input, win_in);
				280	Iterator out(_output, window);
				281
				282	const int min_x = _input->info()->valid_region().anchor[0];
				283	const int max_x = min_x + _input->info()->valid_region().shape[0];
				284	const int min_y = _input->info()->valid_region().anchor[1];
				285	const int max_y = min_y + _input->info()->valid_region().shape[1];
				286	const size_t stride = _input->info()->strides_in_bytes()[1];
				287
				288	// Current row
				289	int y_cur = window.y().start();
				290
				291	const float M00 = _matrix[0];
				292	const float M10 = _matrix[1];
				293	const float M01 = _matrix[0 + 1 * 2];
				294	const float M11 = _matrix[1 + 1 * 2];
				295	const float M02 = _matrix[0 + 2 * 2];
				296	const float M12 = _matrix[1 + 2 * 2];
				297
				298	// "M00 * x" and "M10 * x", when x = window.x.start
				299	const float start_x0 = M00 * window.x().start();
				300	const float start_y0 = M10 * window.x().start();
				301
				302	// const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing
				303	float const_x0 = M01 * y_cur + M02;
				304	float const_y0 = M11 * y_cur + M12;
				305
				306	float x0 = start_x0 + const_x0;
				307	float y0 = start_y0 + const_y0;
				308
				309	execute_window_loop(window, [&](const Coordinates & id)
				310	{
				311	// Check if we are processing a new row. If so, update the current row (y_cur), x0 and y0
				312	if(y_cur != id.y())
				313	{
				314	y_cur = id.y();
				315
				316	const_x0 = M01 * y_cur + M02;
				317	const_y0 = M11 * y_cur + M12;
				318
				319	x0 = start_x0 + const_x0;
				320	y0 = start_y0 + const_y0;
				321	}
				322
				323	// Only load from (x0, y0) if the point is within the valid region.
				324	// Otherwise load from the edge of the valid region.
				325	if((min_y <= y0) && (y0 < max_y) && (min_x <= x0) && (x0 < max_x))
				326	{
				327	switch(interpolation)
				328	{
				329	case InterpolationPolicy::NEAREST_NEIGHBOR:
				330	*out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride);
				331	break;
				332	case InterpolationPolicy::BILINEAR:
				333	*out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, x0, y0);
				334	break;
				335	default:
				336	ARM_COMPUTE_ERROR("Interpolation not supported");
				337	}
				338	}
				339	else
				340	{
				341	// Clamp coordinates
				342	const auto xi = clamp<int>(x0, min_x, max_x - 1);
				343	const auto yi = clamp<int>(y0, min_y, max_y - 1);
				344
				345	out.ptr() = (in.ptr() + xi + yi * stride);
				346	}
				347
				348	x0 += M00;
				349	y0 += M10;
				350	},
				351	in, out);
				352	}
				353
				354	template <InterpolationPolicy interpolation>
				355	void NEWarpPerspectiveKernel<interpolation>::warp_undefined(const Window &window)
				356	{
				357	// Don't increment in X and Y direction for the input tensor
				358	// A pointer to the start of this plane is needed as base for the precomputed offsets
				359	Window win_in(window);
				360	win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
				361	win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
				362
				363	Iterator in(_input, win_in);
				364	Iterator out(_output, window);
				365
				366	const int min_x = _input->info()->valid_region().anchor[0];
				367	const int max_x = min_x + _input->info()->valid_region().shape[0];
				368	const int min_y = _input->info()->valid_region().anchor[1];
				369	const int max_y = min_y + _input->info()->valid_region().shape[1];
				370	const size_t stride = _input->info()->strides_in_bytes()[1];
				371
				372	// x0 = M00 * x + M01 * y + M02
				373	// y0 = M10 * x + M11 * y + M12
				374	// z0 = M20 * x + M21 * y + M22
				375	// xn = x0 / z0
				376	// yn = y0 / z0
				377	const float M00 = _matrix[0];
				378	const float M10 = _matrix[1];
				379	const float M20 = _matrix[2];
				380	const float M01 = _matrix[0 + 1 * 3];
				381	const float M11 = _matrix[1 + 1 * 3];
				382	const float M21 = _matrix[2 + 1 * 3];
				383	const float M02 = _matrix[0 + 2 * 3];
				384	const float M12 = _matrix[1 + 2 * 3];
				385	const float M22 = _matrix[2 + 2 * 3];
				386
				387	// "M00 * x", "M10 * x" and "M20 * x", when x = window.x.start
				388	const float start_x0 = M00 * window.x().start();
				389	const float start_y0 = M10 * window.x().start();
				390	const float start_z0 = M20 * window.x().start();
				391
				392	// Current row
				393	int y_cur = window.y().start();
				394
				395	// const_x0, const_y0 and const_z0 are the constant parts of x0, y0 and z0 during the row processing
				396	float const_x0 = M01 * y_cur + M02;
				397	float const_y0 = M11 * y_cur + M12;
				398	float const_z0 = M21 * y_cur + M22;
				399
				400	// Perspective warp coordinates
				401	float x0 = start_x0 + const_x0;
				402	float y0 = start_y0 + const_y0;
				403	float z0 = start_z0 + const_z0;
				404
				405	execute_window_loop(window, [&](const Coordinates & id)
				406	{
				407	// Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0
				408	if(y_cur != id.y())
				409	{
				410	y_cur = id.y();
				411
				412	const_x0 = M01 * y_cur + M02;
				413	const_y0 = M11 * y_cur + M12;
				414	const_z0 = M21 * y_cur + M22;
				415
				416	x0 = start_x0 + const_x0;
				417	y0 = start_y0 + const_y0;
				418	z0 = start_z0 + const_z0;
				419	}
				420
				421	const float xn = x0 / z0;
				422	const float yn = y0 / z0;
				423
				424	// Only write to output if xn and yn are within the valid region.
				425	// Otherwise the read value would be undefined.
				426	if((min_y <= yn) && (yn < max_y) && (min_x <= xn) && (xn < max_x))
				427	{
				428	switch(interpolation)
				429	{
				430	case InterpolationPolicy::NEAREST_NEIGHBOR:
				431	*out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride);
				432	break;
				433	case InterpolationPolicy::BILINEAR:
				434	*out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, xn, yn);
				435	break;
				436	default:
				437	ARM_COMPUTE_ERROR("Interpolation not supported");
				438	}
				439	}
				440
				441	x0 += M00;
				442	y0 += M10;
				443	z0 += M20;
				444	},
				445	in, out);
				446	}
				447
				448	template <InterpolationPolicy interpolation>
				449	void NEWarpPerspectiveKernel<interpolation>::warp_constant(const Window &window)
				450	{
				451	// Don't increment in X and Y direction for the input tensor
				452	// A pointer to the start of this plane is needed as base for the precomputed offsets
				453	Window win_in(window);
				454	win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
				455	win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
				456
				457	Iterator in(_input, win_in);
				458	Iterator out(_output, window);
				459
				460	const int min_x = _input->info()->valid_region().anchor[0];
				461	const int max_x = min_x + _input->info()->valid_region().shape[0];
				462	const int min_y = _input->info()->valid_region().anchor[1];
				463	const int max_y = min_y + _input->info()->valid_region().shape[1];
				464	const size_t stride = _input->info()->strides_in_bytes()[1];
				465
				466	// x0 = M00 * x + M01 * y + M02
				467	// y0 = M10 * x + M11 * y + M12
				468	// z0 = M20 * x + M21 * y + M22
				469	// xn = x0 / z0
				470	// yn = y0 / z0
				471	const float M00 = _matrix[0];
				472	const float M10 = _matrix[1];
				473	const float M20 = _matrix[2];
				474	const float M01 = _matrix[0 + 1 * 3];
				475	const float M11 = _matrix[1 + 1 * 3];
				476	const float M21 = _matrix[2 + 1 * 3];
				477	const float M02 = _matrix[0 + 2 * 3];
				478	const float M12 = _matrix[1 + 2 * 3];
				479	const float M22 = _matrix[2 + 2 * 3];
				480
				481	// "M00 * x", "M10 * x" and "M20 * x", when x = window.x.start
				482	const float start_x0 = M00 * window.x().start();
				483	const float start_y0 = M10 * window.x().start();
				484	const float start_z0 = M20 * window.x().start();
				485
				486	// Current row
				487	int y_cur = window.y().start();
				488
				489	// const_x0, const_y0 and const_z0 are the constant parts of x0, y0 and z0 during the row processing
				490	float const_x0 = M01 * y_cur + M02;
				491	float const_y0 = M11 * y_cur + M12;
				492	float const_z0 = M21 * y_cur + M22;
				493
				494	// Perspective warp coordinates
				495	float x0 = start_x0 + const_x0;
				496	float y0 = start_y0 + const_y0;
				497	float z0 = start_z0 + const_z0;
				498
				499	execute_window_loop(window, [&](const Coordinates & id)
				500	{
				501	// Check if we are processing a new row. If so, update the current row (y_cur), x0, y0 and z0
				502	if(y_cur != id.y())
				503	{
				504	y_cur = id.y();
				505
				506	const_x0 = M01 * y_cur + M02;
				507	const_y0 = M11 * y_cur + M12;
				508	const_z0 = M21 * y_cur + M22;
				509
				510	x0 = start_x0 + const_x0;
				511	y0 = start_y0 + const_y0;
				512	z0 = start_z0 + const_z0;
				513	}
				514
				515	const float xn = x0 / z0;
				516	const float yn = y0 / z0;
				517
				518	// Only use input values if xn and yn are within the valid region.
				519	// Otherwise write the constant border value.
				520	if((min_y <= yn) && (yn < max_y) && (min_x <= xn) && (xn < max_x))
				521	{
				522	switch(interpolation)
				523	{
				524	case InterpolationPolicy::NEAREST_NEIGHBOR:
				525	*out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride);
				526	break;
				527	case InterpolationPolicy::BILINEAR:
				528	*out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, xn, yn);
				529	break;
				530	default:
				531	ARM_COMPUTE_ERROR("Interpolation not supported");
				532	}
				533	}
				534	else
				535	{
				536	*out.ptr() = _constant_border_value;
				537	}
				538
				539	x0 += M00;
				540	y0 += M10;
				541	z0 += M20;
				542	},
				543	in, out);
				544	}
				545
				546	template <InterpolationPolicy interpolation>
				547	void NEWarpPerspectiveKernel<interpolation>::warp_replicate(const Window &window)
				548	{
				549	// Don't increment in X and Y direction for the input tensor
				550	// A pointer to the start of this plane is needed as base for the precomputed offsets
				551	Window win_in(window);
				552	win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
				553	win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
				554
				555	Iterator in(_input, win_in);
				556	Iterator out(_output, window);
				557
				558	const int min_x = _input->info()->valid_region().anchor[0];
				559	const int max_x = min_x + _input->info()->valid_region().shape[0];
				560	const int min_y = _input->info()->valid_region().anchor[1];
				561	const int max_y = min_y + _input->info()->valid_region().shape[1];
				562	const size_t stride = _input->info()->strides_in_bytes()[1];
				563
				564	// Current row
				565	int y_cur = window.y().start();
				566
				567	// x0 = M00 * x + M01 * y + M02
				568	// y0 = M10 * x + M11 * y + M12
				569	// z0 = M20 * x + M21 * y + M22
				570	// xn = x0 / z0
				571	// yn = y0 / z0
				572	const float M00 = _matrix[0];
				573	const float M10 = _matrix[1];
				574	const float M20 = _matrix[2];
				575	const float M01 = _matrix[0 + 1 * 3];
				576	const float M11 = _matrix[1 + 1 * 3];
				577	const float M21 = _matrix[2 + 1 * 3];
				578	const float M02 = _matrix[0 + 2 * 3];
				579	const float M12 = _matrix[1 + 2 * 3];
				580	const float M22 = _matrix[2 + 2 * 3];
				581
				582	// "M00 * x", "M10 * x" and "M20 * x", when x = window.x.start
				583	const float start_x0 = M00 * window.x().start();
				584	const float start_y0 = M10 * window.x().start();
				585	const float start_z0 = M20 * window.x().start();
				586
				587	// const_x0, const_y0 and const_z0 are the constant parts of x0, y0 and z0 during the row processing
				588	float const_x0 = M01 * y_cur + M02;
				589	float const_y0 = M11 * y_cur + M12;
				590	float const_z0 = M21 * y_cur + M22;
				591
				592	// Perspective warp coordinates
				593	float x0 = start_x0 + const_x0;
				594	float y0 = start_y0 + const_y0;
				595	float z0 = start_z0 + const_z0;
				596
				597	execute_window_loop(window, [&](const Coordinates & id)
				598	{
				599	// Check if we are processing a new row. If so, update the current row (y_cur), x0, y0 and z0
				600	if(y_cur != id.y())
				601	{
				602	y_cur = id.y();
				603
				604	const_x0 = M01 * y_cur + M02;
				605	const_y0 = M11 * y_cur + M12;
				606	const_z0 = M21 * y_cur + M22;
				607
				608	x0 = start_x0 + const_x0;
				609	y0 = start_y0 + const_y0;
				610	z0 = start_z0 + const_z0;
				611	}
				612
				613	const float xn = x0 / z0;
				614	const float yn = y0 / z0;
				615
				616	// Only load from (x0, y0) if the point is within the valid region.
				617	// Otherwise load from the edge of the valid region.
				618	if((min_y <= yn) && (yn < max_y) && (min_x <= xn) && (xn < max_x))
				619	{
				620	switch(interpolation)
				621	{
				622	case InterpolationPolicy::NEAREST_NEIGHBOR:
				623	*out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride);
				624	break;
				625	case InterpolationPolicy::BILINEAR:
				626	*out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, xn, yn);
				627	break;
				628	default:
				629	ARM_COMPUTE_ERROR("Interpolation not supported");
				630	}
				631	}
				632	else
				633	{
				634	// Clamp coordinates
				635	const auto xi = clamp<int>(x0, min_x, max_x - 1);
				636	const auto yi = clamp<int>(y0, min_y, max_y - 1);
				637
				638	out.ptr() = (in.ptr() + xi + yi * stride);
				639	}
				640
				641	x0 += M00;
				642	y0 += M10;
				643	z0 += M20;
				644	},
				645	in, out);
				646	}
				647
				648	template class arm_compute::NEWarpAffineKernel<InterpolationPolicy::NEAREST_NEIGHBOR>;
				649	template class arm_compute::NEWarpAffineKernel<InterpolationPolicy::BILINEAR>;
				650	template class arm_compute::NEWarpPerspectiveKernel<InterpolationPolicy::NEAREST_NEIGHBOR>;
				651	template class arm_compute::NEWarpPerspectiveKernel<InterpolationPolicy::BILINEAR>;