Blame - src/core/NEON/kernels/NEWarpKernel.cpp - ml/ComputeLibrary

blob: 5ca1395b472b108e192e1f5ee6d9f4714319d356 [file] [log] [blame]

Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1	/*
				2	* Copyright (c) 2016, 2017 ARM Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "arm_compute/core/NEON/kernels/NEWarpKernel.h"
				25
				26	#include "arm_compute/core/AccessWindowStatic.h"
				27	#include "arm_compute/core/Coordinates.h"
				28	#include "arm_compute/core/Error.h"
				29	#include "arm_compute/core/Helpers.h"
				30	#include "arm_compute/core/ITensor.h"
				31	#include "arm_compute/core/TensorInfo.h"
				32	#include "arm_compute/core/Validate.h"
				33	#include "arm_compute/core/Window.h"
				34
				35	#include <cstddef>
				36
				37	using namespace arm_compute;
				38
				39	namespace
				40	{
				41	inline uint8_t nearest_interpolation(const uint8_t *in_ptr, int x, int y, size_t stride)
				42	{
				43	return in_ptr[x + y * stride];
				44	}
				45	} // namespace
				46
				47	INEWarpKernel::INEWarpKernel()
				48	: _func(nullptr), _input(nullptr), _output(nullptr), _constant_border_value(0), _matrix(nullptr)
				49	{
				50	}
				51
Isabella Gottardi	f9bae2e	2017-07-28 17:24:08 +0100	[diff] [blame]	52	BorderSize INEWarpKernel::border_size() const
				53	{
				54	return BorderSize(1);
				55	}
				56
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	57	void INEWarpKernel::run(const Window &window)
				58	{
				59	ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
				60	ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
				61	ARM_COMPUTE_ERROR_ON(_func == nullptr);
				62
				63	(this->*_func)(window);
				64	}
				65
				66	void INEWarpKernel::configure(const ITensor input, ITensor output, const float *matrix, BorderMode border_mode, uint8_t constant_border_value)
				67	{
				68	ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
				69	ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
				70	ARM_COMPUTE_ERROR_ON(nullptr == matrix);
				71
				72	_matrix = matrix;
				73	_constant_border_value = constant_border_value;
				74
				75	switch(border_mode)
				76	{
				77	case BorderMode::UNDEFINED:
				78	_func = &INEWarpKernel::warp_undefined;
				79	break;
				80	case BorderMode::CONSTANT:
				81	_func = &INEWarpKernel::warp_constant;
				82	break;
				83	case BorderMode::REPLICATE:
				84	_func = &INEWarpKernel::warp_replicate;
				85	break;
				86	default:
				87	ARM_COMPUTE_ERROR("Border mode not supported");
				88	break;
				89	}
				90
				91	_input = input;
				92	_output = output;
				93
				94	// Configure kernel window
				95	Window win = calculate_max_window(*output->info(), Steps(1U));
				96
				97	const ValidRegion &input_valid_region = input->info()->valid_region();
				98
				99	// Reads can occur within the valid region of the input
				100	AccessWindowStatic input_access(input->info(),
Isabella Gottardi	f9bae2e	2017-07-28 17:24:08 +0100	[diff] [blame]	101	input_valid_region.anchor[0] - border_size().left, input_valid_region.anchor[1] - border_size().top,
				102	input_valid_region.anchor[0] + input_valid_region.shape[0] + border_size().right,
				103	input_valid_region.anchor[1] + input_valid_region.shape[1] + border_size().bottom);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	104	AccessWindowHorizontal output_access(output->info(), 0, 1);
				105
				106	update_window_and_padding(win, input_access, output_access);
				107
				108	output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
				109
				110	INEKernel::configure(win);
				111	}
				112
				113	template <InterpolationPolicy interpolation>
				114	void NEWarpAffineKernel<interpolation>::warp_undefined(const Window &window)
				115	{
				116	// Don't increment in X and Y direction for the input tensor
				117	// A pointer to the start of this plane is needed as base for the precomputed offsets
				118	Window win_in(window);
				119	win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
				120	win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
				121
				122	Iterator in(_input, win_in);
				123	Iterator out(_output, window);
				124
				125	const int min_x = _input->info()->valid_region().anchor[0];
				126	const int max_x = min_x + _input->info()->valid_region().shape[0];
				127	const int min_y = _input->info()->valid_region().anchor[1];
				128	const int max_y = min_y + _input->info()->valid_region().shape[1];
				129	const size_t stride = _input->info()->strides_in_bytes()[1];
				130
				131	// x0 = M01 * x + M01 * y + M02
				132	// y0 = M11 * x + M11 * y + M12
				133	const float M00 = _matrix[0];
				134	const float M10 = _matrix[1];
				135	const float M01 = _matrix[0 + 1 * 2];
				136	const float M11 = _matrix[1 + 1 * 2];
				137	const float M02 = _matrix[0 + 2 * 2];
				138	const float M12 = _matrix[1 + 2 * 2];
				139
				140	// "M00 * x" and "M10 * x", when x = window.x.start
				141	const float start_x0 = M00 * window.x().start();
				142	const float start_y0 = M10 * window.x().start();
				143
				144	// Current row
				145	int y_cur = window.y().start();
				146
				147	// const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing
				148	float const_x0 = M01 * y_cur + M02;
				149	float const_y0 = M11 * y_cur + M12;
				150
				151	// Affine warp coordinates
				152	float x0 = start_x0 + const_x0;
				153	float y0 = start_y0 + const_y0;
				154
				155	execute_window_loop(window, [&](const Coordinates & id)
				156	{
				157	// Check if we are processing a new row. If so, update the current row (y_cur), x0 and y0
				158	if(y_cur != id.y())
				159	{
				160	y_cur = id.y();
				161
				162	const_x0 = M01 * y_cur + M02;
				163	const_y0 = M11 * y_cur + M12;
				164
				165	x0 = start_x0 + const_x0;
				166	y0 = start_y0 + const_y0;
				167	}
				168
				169	// Only write to output if x0 and y0 are within the valid region.
				170	// Otherwise the read value would be undefined.
				171	if((min_y <= y0) && (y0 < max_y) && (min_x <= x0) && (x0 < max_x))
				172	{
				173	switch(interpolation)
				174	{
				175	case InterpolationPolicy::NEAREST_NEIGHBOR:
				176	*out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride);
				177	break;
				178	case InterpolationPolicy::BILINEAR:
				179	*out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, x0, y0);
				180	break;
				181	default:
				182	ARM_COMPUTE_ERROR("Interpolation not supported");
				183	}
				184	}
				185
				186	x0 += M00;
				187	y0 += M10;
				188	},
				189	in, out);
				190	}
				191
				192	template <InterpolationPolicy interpolation>
				193	void NEWarpAffineKernel<interpolation>::warp_constant(const Window &window)
				194	{
				195	// Don't increment in X and Y direction for the input tensor
				196	// A pointer to the start of this plane is needed as base for the precomputed offsets
				197	Window win_in(window);
				198	win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
				199	win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
				200
				201	Iterator in(_input, win_in);
				202	Iterator out(_output, window);
				203
				204	const int min_x = _input->info()->valid_region().anchor[0];
				205	const int max_x = min_x + _input->info()->valid_region().shape[0];
				206	const int min_y = _input->info()->valid_region().anchor[1];
				207	const int max_y = min_y + _input->info()->valid_region().shape[1];
				208	const size_t stride = _input->info()->strides_in_bytes()[1];
				209
				210	// x0 = M01 * x + M01 * y + M02
				211	// y0 = M11 * x + M11 * y + M12
				212	const float M00 = _matrix[0];
				213	const float M10 = _matrix[1];
				214	const float M01 = _matrix[0 + 1 * 2];
				215	const float M11 = _matrix[1 + 1 * 2];
				216	const float M02 = _matrix[0 + 2 * 2];
				217	const float M12 = _matrix[1 + 2 * 2];
				218
				219	// "M00 * x" and "M10 * x", when x = window.x.start
				220	const float start_x0 = M00 * window.x().start();
				221	const float start_y0 = M10 * window.x().start();
				222
				223	// Current row
				224	int y_cur = window.y().start();
				225
				226	// const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing
				227	float const_x0 = M01 * y_cur + M02;
				228	float const_y0 = M11 * y_cur + M12;
				229
				230	// Affine warp coordinates
				231	float x0 = start_x0 + const_x0;
				232	float y0 = start_y0 + const_y0;
				233
				234	execute_window_loop(window, [&](const Coordinates & id)
				235	{
				236	// Check if we are processing a new row. If so, update the current row (y_cur), x0 and y0
				237	if(y_cur != id.y())
				238	{
				239	y_cur = id.y();
				240
				241	const_x0 = M01 * y_cur + M02;
				242	const_y0 = M11 * y_cur + M12;
				243
				244	x0 = start_x0 + const_x0;
				245	y0 = start_y0 + const_y0;
				246	}
				247
				248	// Only use input values if x0 and y0 are within the valid region.
				249	// Otherwise write the constant border value.
				250	if((min_y <= y0) && (y0 < max_y) && (min_x <= x0) && (x0 < max_x))
				251	{
				252	switch(interpolation)
				253	{
				254	case InterpolationPolicy::NEAREST_NEIGHBOR:
				255	*out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride);
				256	break;
				257	case InterpolationPolicy::BILINEAR:
				258	*out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, x0, y0);
				259	break;
				260	default:
				261	ARM_COMPUTE_ERROR("Interpolation not supported");
				262	}
				263	}
				264	else
				265	{
				266	*out.ptr() = _constant_border_value;
				267	}
				268
				269	x0 += M00;
				270	y0 += M10;
				271	},
				272	in, out);
				273	}
				274
				275	template <InterpolationPolicy interpolation>
				276	void NEWarpAffineKernel<interpolation>::warp_replicate(const Window &window)
				277	{
				278	// Don't increment in X and Y direction for the input tensor
				279	// A pointer to the start of this plane is needed as base for the precomputed offsets
				280	Window win_in(window);
				281	win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
				282	win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
				283
				284	Iterator in(_input, win_in);
				285	Iterator out(_output, window);
				286
				287	const int min_x = _input->info()->valid_region().anchor[0];
				288	const int max_x = min_x + _input->info()->valid_region().shape[0];
				289	const int min_y = _input->info()->valid_region().anchor[1];
				290	const int max_y = min_y + _input->info()->valid_region().shape[1];
				291	const size_t stride = _input->info()->strides_in_bytes()[1];
				292
				293	// Current row
				294	int y_cur = window.y().start();
				295
				296	const float M00 = _matrix[0];
				297	const float M10 = _matrix[1];
				298	const float M01 = _matrix[0 + 1 * 2];
				299	const float M11 = _matrix[1 + 1 * 2];
				300	const float M02 = _matrix[0 + 2 * 2];
				301	const float M12 = _matrix[1 + 2 * 2];
				302
				303	// "M00 * x" and "M10 * x", when x = window.x.start
				304	const float start_x0 = M00 * window.x().start();
				305	const float start_y0 = M10 * window.x().start();
				306
				307	// const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing
				308	float const_x0 = M01 * y_cur + M02;
				309	float const_y0 = M11 * y_cur + M12;
				310
				311	float x0 = start_x0 + const_x0;
				312	float y0 = start_y0 + const_y0;
				313
				314	execute_window_loop(window, [&](const Coordinates & id)
				315	{
				316	// Check if we are processing a new row. If so, update the current row (y_cur), x0 and y0
				317	if(y_cur != id.y())
				318	{
				319	y_cur = id.y();
				320
				321	const_x0 = M01 * y_cur + M02;
				322	const_y0 = M11 * y_cur + M12;
				323
				324	x0 = start_x0 + const_x0;
				325	y0 = start_y0 + const_y0;
				326	}
				327
				328	// Only load from (x0, y0) if the point is within the valid region.
				329	// Otherwise load from the edge of the valid region.
				330	if((min_y <= y0) && (y0 < max_y) && (min_x <= x0) && (x0 < max_x))
				331	{
				332	switch(interpolation)
				333	{
				334	case InterpolationPolicy::NEAREST_NEIGHBOR:
				335	*out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride);
				336	break;
				337	case InterpolationPolicy::BILINEAR:
				338	*out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, x0, y0);
				339	break;
				340	default:
				341	ARM_COMPUTE_ERROR("Interpolation not supported");
				342	}
				343	}
				344	else
				345	{
				346	// Clamp coordinates
				347	const auto xi = clamp<int>(x0, min_x, max_x - 1);
				348	const auto yi = clamp<int>(y0, min_y, max_y - 1);
				349
				350	out.ptr() = (in.ptr() + xi + yi * stride);
				351	}
				352
				353	x0 += M00;
				354	y0 += M10;
				355	},
				356	in, out);
				357	}
				358
				359	template <InterpolationPolicy interpolation>
				360	void NEWarpPerspectiveKernel<interpolation>::warp_undefined(const Window &window)
				361	{
				362	// Don't increment in X and Y direction for the input tensor
				363	// A pointer to the start of this plane is needed as base for the precomputed offsets
				364	Window win_in(window);
				365	win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
				366	win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
				367
				368	Iterator in(_input, win_in);
				369	Iterator out(_output, window);
				370
				371	const int min_x = _input->info()->valid_region().anchor[0];
				372	const int max_x = min_x + _input->info()->valid_region().shape[0];
				373	const int min_y = _input->info()->valid_region().anchor[1];
				374	const int max_y = min_y + _input->info()->valid_region().shape[1];
				375	const size_t stride = _input->info()->strides_in_bytes()[1];
				376
				377	// x0 = M00 * x + M01 * y + M02
				378	// y0 = M10 * x + M11 * y + M12
				379	// z0 = M20 * x + M21 * y + M22
				380	// xn = x0 / z0
				381	// yn = y0 / z0
				382	const float M00 = _matrix[0];
				383	const float M10 = _matrix[1];
				384	const float M20 = _matrix[2];
				385	const float M01 = _matrix[0 + 1 * 3];
				386	const float M11 = _matrix[1 + 1 * 3];
				387	const float M21 = _matrix[2 + 1 * 3];
				388	const float M02 = _matrix[0 + 2 * 3];
				389	const float M12 = _matrix[1 + 2 * 3];
				390	const float M22 = _matrix[2 + 2 * 3];
				391
				392	// "M00 * x", "M10 * x" and "M20 * x", when x = window.x.start
				393	const float start_x0 = M00 * window.x().start();
				394	const float start_y0 = M10 * window.x().start();
				395	const float start_z0 = M20 * window.x().start();
				396
				397	// Current row
				398	int y_cur = window.y().start();
				399
				400	// const_x0, const_y0 and const_z0 are the constant parts of x0, y0 and z0 during the row processing
				401	float const_x0 = M01 * y_cur + M02;
				402	float const_y0 = M11 * y_cur + M12;
				403	float const_z0 = M21 * y_cur + M22;
				404
				405	// Perspective warp coordinates
				406	float x0 = start_x0 + const_x0;
				407	float y0 = start_y0 + const_y0;
				408	float z0 = start_z0 + const_z0;
				409
				410	execute_window_loop(window, [&](const Coordinates & id)
				411	{
				412	// Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0
				413	if(y_cur != id.y())
				414	{
				415	y_cur = id.y();
				416
				417	const_x0 = M01 * y_cur + M02;
				418	const_y0 = M11 * y_cur + M12;
				419	const_z0 = M21 * y_cur + M22;
				420
				421	x0 = start_x0 + const_x0;
				422	y0 = start_y0 + const_y0;
				423	z0 = start_z0 + const_z0;
				424	}
				425
				426	const float xn = x0 / z0;
				427	const float yn = y0 / z0;
				428
				429	// Only write to output if xn and yn are within the valid region.
				430	// Otherwise the read value would be undefined.
				431	if((min_y <= yn) && (yn < max_y) && (min_x <= xn) && (xn < max_x))
				432	{
				433	switch(interpolation)
				434	{
				435	case InterpolationPolicy::NEAREST_NEIGHBOR:
				436	*out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride);
				437	break;
				438	case InterpolationPolicy::BILINEAR:
				439	*out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, xn, yn);
				440	break;
				441	default:
				442	ARM_COMPUTE_ERROR("Interpolation not supported");
				443	}
				444	}
				445
				446	x0 += M00;
				447	y0 += M10;
				448	z0 += M20;
				449	},
				450	in, out);
				451	}
				452
				453	template <InterpolationPolicy interpolation>
				454	void NEWarpPerspectiveKernel<interpolation>::warp_constant(const Window &window)
				455	{
				456	// Don't increment in X and Y direction for the input tensor
				457	// A pointer to the start of this plane is needed as base for the precomputed offsets
				458	Window win_in(window);
				459	win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
				460	win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
				461
				462	Iterator in(_input, win_in);
				463	Iterator out(_output, window);
				464
				465	const int min_x = _input->info()->valid_region().anchor[0];
				466	const int max_x = min_x + _input->info()->valid_region().shape[0];
				467	const int min_y = _input->info()->valid_region().anchor[1];
				468	const int max_y = min_y + _input->info()->valid_region().shape[1];
				469	const size_t stride = _input->info()->strides_in_bytes()[1];
				470
				471	// x0 = M00 * x + M01 * y + M02
				472	// y0 = M10 * x + M11 * y + M12
				473	// z0 = M20 * x + M21 * y + M22
				474	// xn = x0 / z0
				475	// yn = y0 / z0
				476	const float M00 = _matrix[0];
				477	const float M10 = _matrix[1];
				478	const float M20 = _matrix[2];
				479	const float M01 = _matrix[0 + 1 * 3];
				480	const float M11 = _matrix[1 + 1 * 3];
				481	const float M21 = _matrix[2 + 1 * 3];
				482	const float M02 = _matrix[0 + 2 * 3];
				483	const float M12 = _matrix[1 + 2 * 3];
				484	const float M22 = _matrix[2 + 2 * 3];
				485
				486	// "M00 * x", "M10 * x" and "M20 * x", when x = window.x.start
				487	const float start_x0 = M00 * window.x().start();
				488	const float start_y0 = M10 * window.x().start();
				489	const float start_z0 = M20 * window.x().start();
				490
				491	// Current row
				492	int y_cur = window.y().start();
				493
				494	// const_x0, const_y0 and const_z0 are the constant parts of x0, y0 and z0 during the row processing
				495	float const_x0 = M01 * y_cur + M02;
				496	float const_y0 = M11 * y_cur + M12;
				497	float const_z0 = M21 * y_cur + M22;
				498
				499	// Perspective warp coordinates
				500	float x0 = start_x0 + const_x0;
				501	float y0 = start_y0 + const_y0;
				502	float z0 = start_z0 + const_z0;
				503
				504	execute_window_loop(window, [&](const Coordinates & id)
				505	{
				506	// Check if we are processing a new row. If so, update the current row (y_cur), x0, y0 and z0
				507	if(y_cur != id.y())
				508	{
				509	y_cur = id.y();
				510
				511	const_x0 = M01 * y_cur + M02;
				512	const_y0 = M11 * y_cur + M12;
				513	const_z0 = M21 * y_cur + M22;
				514
				515	x0 = start_x0 + const_x0;
				516	y0 = start_y0 + const_y0;
				517	z0 = start_z0 + const_z0;
				518	}
				519
				520	const float xn = x0 / z0;
				521	const float yn = y0 / z0;
				522
				523	// Only use input values if xn and yn are within the valid region.
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	524	if((min_y <= yn) && (yn < max_y) && (min_x <= xn) && (xn < max_x))
				525	{
				526	switch(interpolation)
				527	{
				528	case InterpolationPolicy::NEAREST_NEIGHBOR:
				529	*out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride);
				530	break;
				531	case InterpolationPolicy::BILINEAR:
				532	*out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, xn, yn);
				533	break;
				534	default:
				535	ARM_COMPUTE_ERROR("Interpolation not supported");
				536	}
				537	}
				538	else
				539	{
Isabella Gottardi	6203153	2017-07-04 11:21:28 +0100	[diff] [blame^]	540	switch(interpolation)
				541	{
				542	case InterpolationPolicy::NEAREST_NEIGHBOR:
				543	*out.ptr() = _constant_border_value;
				544	break;
				545	case InterpolationPolicy::BILINEAR:
				546	{
				547	const auto xi = clamp<int>(std::floor(xn), min_x - 1, max_x);
				548	const auto yi = clamp<int>(std::floor(yn), min_y - 1, max_y);
				549	const auto xi_1 = clamp<int>(std::floor(xn + 1), min_x - 1, max_x);
				550	const auto yi_1 = clamp<int>(std::floor(yn + 1), min_y - 1, max_y);
				551
				552	const float dx = xn - std::floor(xn);
				553	const float dy = yn - std::floor(yn);
				554	const float dx1 = 1.0f - dx;
				555	const float dy1 = 1.0f - dy;
				556
				557	const float a00 = (in.ptr() + xi + yi stride);
				558	const float a01 = (in.ptr() + xi_1 + yi stride);
				559	const float a10 = (in.ptr() + xi + yi_1 stride);
				560	const float a11 = (in.ptr() + xi_1 + yi_1 stride);
				561
				562	out.ptr() = a00 (dx1 * dy1) + a01 * (dx * dy1) + a10 * (dx1 * dy) + a11 * (dx * dy);
				563	}
				564	break;
				565	default:
				566	ARM_COMPUTE_ERROR("Interpolation not supported");
				567	}
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	568	}
				569
				570	x0 += M00;
				571	y0 += M10;
				572	z0 += M20;
				573	},
				574	in, out);
				575	}
				576
				577	template <InterpolationPolicy interpolation>
				578	void NEWarpPerspectiveKernel<interpolation>::warp_replicate(const Window &window)
				579	{
				580	// Don't increment in X and Y direction for the input tensor
				581	// A pointer to the start of this plane is needed as base for the precomputed offsets
				582	Window win_in(window);
				583	win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
				584	win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
				585
				586	Iterator in(_input, win_in);
				587	Iterator out(_output, window);
				588
				589	const int min_x = _input->info()->valid_region().anchor[0];
				590	const int max_x = min_x + _input->info()->valid_region().shape[0];
				591	const int min_y = _input->info()->valid_region().anchor[1];
				592	const int max_y = min_y + _input->info()->valid_region().shape[1];
				593	const size_t stride = _input->info()->strides_in_bytes()[1];
				594
				595	// Current row
				596	int y_cur = window.y().start();
				597
				598	// x0 = M00 * x + M01 * y + M02
				599	// y0 = M10 * x + M11 * y + M12
				600	// z0 = M20 * x + M21 * y + M22
				601	// xn = x0 / z0
				602	// yn = y0 / z0
				603	const float M00 = _matrix[0];
				604	const float M10 = _matrix[1];
				605	const float M20 = _matrix[2];
				606	const float M01 = _matrix[0 + 1 * 3];
				607	const float M11 = _matrix[1 + 1 * 3];
				608	const float M21 = _matrix[2 + 1 * 3];
				609	const float M02 = _matrix[0 + 2 * 3];
				610	const float M12 = _matrix[1 + 2 * 3];
				611	const float M22 = _matrix[2 + 2 * 3];
				612
				613	// "M00 * x", "M10 * x" and "M20 * x", when x = window.x.start
				614	const float start_x0 = M00 * window.x().start();
				615	const float start_y0 = M10 * window.x().start();
				616	const float start_z0 = M20 * window.x().start();
				617
				618	// const_x0, const_y0 and const_z0 are the constant parts of x0, y0 and z0 during the row processing
				619	float const_x0 = M01 * y_cur + M02;
				620	float const_y0 = M11 * y_cur + M12;
				621	float const_z0 = M21 * y_cur + M22;
				622
				623	// Perspective warp coordinates
				624	float x0 = start_x0 + const_x0;
				625	float y0 = start_y0 + const_y0;
				626	float z0 = start_z0 + const_z0;
				627
				628	execute_window_loop(window, [&](const Coordinates & id)
				629	{
				630	// Check if we are processing a new row. If so, update the current row (y_cur), x0, y0 and z0
				631	if(y_cur != id.y())
				632	{
				633	y_cur = id.y();
				634
				635	const_x0 = M01 * y_cur + M02;
				636	const_y0 = M11 * y_cur + M12;
				637	const_z0 = M21 * y_cur + M22;
				638
				639	x0 = start_x0 + const_x0;
				640	y0 = start_y0 + const_y0;
				641	z0 = start_z0 + const_z0;
				642	}
				643
				644	const float xn = x0 / z0;
				645	const float yn = y0 / z0;
				646
				647	// Only load from (x0, y0) if the point is within the valid region.
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	648	if((min_y <= yn) && (yn < max_y) && (min_x <= xn) && (xn < max_x))
				649	{
				650	switch(interpolation)
				651	{
				652	case InterpolationPolicy::NEAREST_NEIGHBOR:
				653	*out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride);
				654	break;
				655	case InterpolationPolicy::BILINEAR:
				656	*out.ptr() = pixel_bilinear_c1u8(in.ptr(), stride, xn, yn);
				657	break;
				658	default:
				659	ARM_COMPUTE_ERROR("Interpolation not supported");
				660	}
				661	}
				662	else
				663	{
				664	// Clamp coordinates
Isabella Gottardi	6203153	2017-07-04 11:21:28 +0100	[diff] [blame^]	665	const auto xi = clamp<int>(std::floor(xn), min_x, max_x - 1);
				666	const auto yi = clamp<int>(std::floor(yn), min_y, max_y - 1);
				667	switch(interpolation)
				668	{
				669	case InterpolationPolicy::NEAREST_NEIGHBOR:
				670	out.ptr() = (in.ptr() + xi + yi * stride);
				671	break;
				672	case InterpolationPolicy::BILINEAR:
				673	{
				674	const auto xi_1 = clamp<int>(std::floor(xn + 1), min_x, max_x - 1);
				675	const auto yi_1 = clamp<int>(std::floor(yn + 1), min_y, max_y - 1);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	676
Isabella Gottardi	6203153	2017-07-04 11:21:28 +0100	[diff] [blame^]	677	const float dx = xn - std::floor(xn);
				678	const float dy = yn - std::floor(yn);
				679	const float dx1 = 1.0f - dx;
				680	const float dy1 = 1.0f - dy;
				681
				682	const float a00 = (in.ptr() + xi + yi stride);
				683	const float a01 = (in.ptr() + xi_1 + yi stride);
				684	const float a10 = (in.ptr() + xi + yi_1 stride);
				685	const float a11 = (in.ptr() + xi_1 + yi_1 stride);
				686
				687	out.ptr() = a00 (dx1 * dy1) + a01 * (dx * dy1) + a10 * (dx1 * dy) + a11 * (dx * dy);
				688	}
				689	break;
				690	default:
				691	ARM_COMPUTE_ERROR("Interpolation not supported");
				692	}
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	693	}
				694
				695	x0 += M00;
				696	y0 += M10;
				697	z0 += M20;
				698	},
				699	in, out);
				700	}
				701
				702	template class arm_compute::NEWarpAffineKernel<InterpolationPolicy::NEAREST_NEIGHBOR>;
				703	template class arm_compute::NEWarpAffineKernel<InterpolationPolicy::BILINEAR>;
				704	template class arm_compute::NEWarpPerspectiveKernel<InterpolationPolicy::NEAREST_NEIGHBOR>;
				705	template class arm_compute::NEWarpPerspectiveKernel<InterpolationPolicy::BILINEAR>;