Blame - src/core/CL/cl_kernels/nhwc/scale.cl - ml/ComputeLibrary

blob: bccfd6543a6efb48b83998f74a8d634f8738b351 [file] [log] [blame]

Adnan AlSinan	7075fe2	2021-07-05 13:12:52 +0100	[diff] [blame]	1	/*
				2	* Copyright (c) 2016-2021 Arm Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "helpers.h"
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	25	#include "tile_helpers.h"
Adnan AlSinan	7075fe2	2021-07-05 13:12:52 +0100	[diff] [blame]	26
Adnan AlSinan	17975a6	2021-11-08 17:46:39 +0000	[diff] [blame]	27	#if defined(SCALE_NEAREST_NEIGHBOUR)
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	28	//! @cond Doxygen_Suppress
				29	/** Performs scale on a tensor by interpolating with the NEAREAST NEIGHBOUR method. (NHWC)
Adnan AlSinan	7075fe2	2021-07-05 13:12:52 +0100	[diff] [blame]	30	*
				31	* @note Sampling policy to used is passed as -DSAMPLING_POLICY_(TYPE) e.g. -DSAMPLING_POLICY_TOP_LEFT
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	32	* @note The tensor type ("BUFFER" only is supported) of the source tensor must be passed at compile time using -DSRC_TENSOR_TYPE (e.g. -DSRC_TENSOR_TYPE=BUFFER)
				33	* @note The tensor type ("BUFFER" only is supported) of the destination tensor must be passed at compile time using -DDST_TENSOR_TYPE (e.g. -DDST_TENSOR_TYPE=BUFFER)
				34	* @note The data type of the source tensor must be passed at compile time using -DSRC_DATA_TYPE (e.g. -DSRC_DATA_TYPE=float)
				35	* @note The data type of the destination tensor must be passed at compile time using -DDST_DATA_TYPE (e.g. -DDST_DATA_TYPE=float)
				36	* @note The number of N0 output channels to process must be passed at compile time using -DN0 (e.g. -DN0=2)
				37	* @note The border value value must be passed at compile time using -DCONSTANT_VALUE (e.g. -DCONSTANT_VALUE=0)
				38	* @note In case of F32/F16, -DIS_FLOATING_POINT must be passed at compile time
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	39	* @note If the source tensor has more than 3 dimensions, -DBATCHED_EXECUTION must be passed at compile time
Adnan AlSinan	7075fe2	2021-07-05 13:12:52 +0100	[diff] [blame]	40	*
Adnan AlSinan	17975a6	2021-11-08 17:46:39 +0000	[diff] [blame]	41	* @param[in] src_ptr Pointer to the source tensor. Supported data types: U8/S16/F16/F32.
				42	* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
				43	* @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
				44	* @param[in] src_stride_w Stride of the source tensor in W dimension (in bytes)
				45	* @param[in] src_c The size of the channels dimension of the source tensor
				46	* @param[in] src_w The size of the width dimension of the source tensor
				47	* @param[in] src_h The size of the height dimension of the source tensor
				48	* @param[in] src_n The size of the batches dimension of the source tensor
				49	* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
				50	* @param[in] dst_ptr Pointer to the destination tensor. Supported data types: U8/S16/F16/F32.
				51	* @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
				52	* @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
				53	* @param[in] dst_stride_w Stride of the destination tensor in W dimension (in bytes)
				54	* @param[in] dst_c The size of the channels dimension of the destination tensor
				55	* @param[in] dst_w The size of the width dimension of the destination tensor
				56	* @param[in] dst_h The size of the height dimension of the destination tensor
				57	* @param[in] dst_n The size of the batches dimension of the destination tensor
				58	* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
				59	* @param[in] scale_x The scale value to apply on the source width
				60	* @param[in] scale_y The scale value to apply on the source height
Adnan AlSinan	7075fe2	2021-07-05 13:12:52 +0100	[diff] [blame]	61	*/
Adnan AlSinan	17975a6	2021-11-08 17:46:39 +0000	[diff] [blame]	62	//! @endcond
Adnan AlSinan	7075fe2	2021-07-05 13:12:52 +0100	[diff] [blame]	63	__kernel void scale_nearest_neighbour_nhwc(
Adnan AlSinan	17975a6	2021-11-08 17:46:39 +0000	[diff] [blame]	64	TENSOR4D_T(src, SRC_TENSOR_TYPE),
				65	TENSOR4D_T(dst, DST_TENSOR_TYPE),
				66	const float scale_x,
				67	const float scale_y)
Adnan AlSinan	7075fe2	2021-07-05 13:12:52 +0100	[diff] [blame]	68	{
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	69	const int cout = GET_SPATIAL_IDX(0, N0, PARTIAL_N0); // OFM
				70	const int xo = GET_SPATIAL_IDX(1, 1, 0); // WIDTH
				71	#if defined(BATCHED_EXECUTION)
Adnan AlSinan	17975a6	2021-11-08 17:46:39 +0000	[diff] [blame]	72	const int yo = GET_SPATIAL_IDX(2, 1, 0) % dst_h; // HEIGHT
				73	const int bout = GET_SPATIAL_IDX(2, 1, 0) / dst_h; // BATCH SIZE IDX
				74	#else // defined(BATCHED_EXECUTION)
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	75	const int yo = GET_SPATIAL_IDX(2, 1, 0); // HEIGHT
Adnan AlSinan	17975a6	2021-11-08 17:46:39 +0000	[diff] [blame]	76	const int bout = 0; // BATCH SIZE IDX
				77	#endif // defined(BATCHED_EXECUTION)
Adnan AlSinan	7075fe2	2021-07-05 13:12:52 +0100	[diff] [blame]	78
				79	#ifdef SAMPLING_POLICY_TOP_LEFT
Adnan AlSinan	17975a6	2021-11-08 17:46:39 +0000	[diff] [blame]	80	float xi_f = (xo * scale_x);
				81	float yi_f = (yo * scale_y);
Adnan AlSinan	7075fe2	2021-07-05 13:12:52 +0100	[diff] [blame]	82	#elif SAMPLING_POLICY_CENTER
Adnan AlSinan	17975a6	2021-11-08 17:46:39 +0000	[diff] [blame]	83	float xi_f = ((xo + 0.5f) * scale_x);
				84	float yi_f = ((yo + 0.5f) * scale_y);
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	85	#else // SAMPLING_POLICY
Adnan AlSinan	7075fe2	2021-07-05 13:12:52 +0100	[diff] [blame]	86	#error("Unsupported sampling policy");
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	87	#endif // SAMPLING_POLICY
Adnan AlSinan	7075fe2	2021-07-05 13:12:52 +0100	[diff] [blame]	88
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	89	#ifdef ALIGN_CORNERS
				90	xi_f = round(xi_f);
				91	yi_f = round(yi_f);
				92	#endif // ALIGN_CORNERS
				93
Adnan AlSinan	17975a6	2021-11-08 17:46:39 +0000	[diff] [blame]	94	const int xi0 = clamp((int)xi_f, 0, (int)src_w - 1);
				95	const int yi0 = clamp((int)yi_f, 0, (int)src_h - 1);
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	96
				97	TILE(SRC_DATA_TYPE, 1, N0, in00);
				98
Adnan AlSinan	17975a6	2021-11-08 17:46:39 +0000	[diff] [blame]	99	T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, 1, N0, SRC_TENSOR_TYPE, src, bout, yi0, xi0, cout, src_w, src_h, 1, 1, false, in00);
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	100
				101	TILE(uint, 1, 1, dst_indirect_y);
				102
				103	// Calculate the destination indirect Y
Adnan AlSinan	17975a6	2021-11-08 17:46:39 +0000	[diff] [blame]	104	dst_indirect_y[0].v = xo + (yo * (int)(dst_w)) + bout * (int)(dst_w * dst_h);
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	105
				106	bool x_cond = PARTIAL_N0 != 0 && get_global_id(0) == 0;
				107
				108	T_STORE_INDIRECT_WIDTH_SELECT(DST_DATA_TYPE, 1, N0, PARTIAL_N0, DST_TENSOR_TYPE, dst, cout, dst_stride_y, x_cond, in00, dst_indirect_y);
Adnan AlSinan	7075fe2	2021-07-05 13:12:52 +0100	[diff] [blame]	109	}
Adnan AlSinan	17975a6	2021-11-08 17:46:39 +0000	[diff] [blame]	110	#endif /* SCALE_NEAREST_NEIGHBOUR */
Adnan AlSinan	7075fe2	2021-07-05 13:12:52 +0100	[diff] [blame]	111
Adnan AlSinan	17975a6	2021-11-08 17:46:39 +0000	[diff] [blame]	112	#if defined(SCALE_BILINEAR)
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	113	//! @cond Doxygen_Suppress
				114	/** Performs scale on a tensor by interpolating with the BILINEAR method. (NHWC)
Adnan AlSinan	7075fe2	2021-07-05 13:12:52 +0100	[diff] [blame]	115	*
Adnan AlSinan	7075fe2	2021-07-05 13:12:52 +0100	[diff] [blame]	116	* @note If border mode replicate is used, is should be passed as -DBORDER_MODE_REPLICATE
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	117	* @note Sampling policy to used is passed as -DSAMPLING_POLICY_(TYPE) e.g. -DSAMPLING_POLICY_TOP_LEFT
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	118	* @note The tensor type ("BUFFER" only is supported) of the source tensor must be passed at compile time using -DSRC_TENSOR_TYPE (e.g. -DSRC_TENSOR_TYPE=BUFFER)
				119	* @note The tensor type ("BUFFER" only is supported) of the destination tensor must be passed at compile time using -DDST_TENSOR_TYPE (e.g. -DDST_TENSOR_TYPE=BUFFER)
				120	* @note The data type of the source tensor must be passed at compile time using -DSRC_DATA_TYPE (e.g. -DSRC_DATA_TYPE=float)
				121	* @note The data type of the destination tensor must be passed at compile time using -DDST_DATA_TYPE (e.g. -DDST_DATA_TYPE=float)
				122	* @note The number of N0 output channels to process must be passed at compile time using -DN0 (e.g. -DN0=2)
				123	* @note The border value value must be passed at compile time using -DCONSTANT_VALUE (e.g. -DCONSTANT_VALUE=0)
				124	* @note In case of F32/F16, -DIS_FLOATING_POINT must be passed at compile time
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	125	* @note If the source tensor has more than 3 dimensions, -DBATCHED_EXECUTION must be passed at compile time
Adnan AlSinan	7075fe2	2021-07-05 13:12:52 +0100	[diff] [blame]	126	*
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	127	* @note In case of QASYMM8, the following extra information must be passed at compile time:
				128	* - The source offset e.g. -DOFFSET=4
				129	* - The source scale e.g. -DSCALE=4
Adnan AlSinan	7075fe2	2021-07-05 13:12:52 +0100	[diff] [blame]	130	*
Adnan AlSinan	17975a6	2021-11-08 17:46:39 +0000	[diff] [blame]	131	* @param[in] src_ptr Pointer to the source tensor. Supported data types: U8/S16/F16/F32.
				132	* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
				133	* @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
				134	* @param[in] src_stride_w Stride of the source tensor in W dimension (in bytes)
				135	* @param[in] src_c The size of the channels dimension of the source tensor
				136	* @param[in] src_w The size of the width dimension of the source tensor
				137	* @param[in] src_h The size of the height dimension of the source tensor
				138	* @param[in] src_n The size of the batches dimension of the source tensor
				139	* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
				140	* @param[in] dst_ptr Pointer to the destination tensor. Supported data types: U8/S16/F16/F32.
				141	* @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
				142	* @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
				143	* @param[in] dst_stride_w Stride of the destination tensor in W dimension (in bytes)
				144	* @param[in] dst_c The size of the channels dimension of the destination tensor
				145	* @param[in] dst_w The size of the width dimension of the destination tensor
				146	* @param[in] dst_h The size of the height dimension of the destination tensor
				147	* @param[in] dst_n The size of the batches dimension of the destination tensor
				148	* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
				149	* @param[in] scale_x The scale value to apply on the source width
				150	* @param[in] scale_y The scale value to apply on the source height
Adnan AlSinan	7075fe2	2021-07-05 13:12:52 +0100	[diff] [blame]	151	*/
Adnan AlSinan	17975a6	2021-11-08 17:46:39 +0000	[diff] [blame]	152	//! @endcond
Adnan AlSinan	7075fe2	2021-07-05 13:12:52 +0100	[diff] [blame]	153	__kernel void scale_bilinear_nhwc(
Adnan AlSinan	17975a6	2021-11-08 17:46:39 +0000	[diff] [blame]	154	TENSOR4D_T(src, SRC_TENSOR_TYPE),
				155	TENSOR4D_T(dst, DST_TENSOR_TYPE),
				156	const float scale_x,
				157	const float scale_y)
Adnan AlSinan	7075fe2	2021-07-05 13:12:52 +0100	[diff] [blame]	158	{
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	159	const int cout = GET_SPATIAL_IDX(0, N0, PARTIAL_N0); // OFM
				160	const int xo = GET_SPATIAL_IDX(1, 1, 0); // WIDTH
				161	#if defined(BATCHED_EXECUTION)
Adnan AlSinan	17975a6	2021-11-08 17:46:39 +0000	[diff] [blame]	162	const int yo = GET_SPATIAL_IDX(2, 1, 0) % dst_h; // HEIGHT
				163	const int bout = GET_SPATIAL_IDX(2, 1, 0) / dst_h; // BATCH SIZE IDX
				164	#else // defined(BATCHED_EXECUTION)
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	165	const int yo = GET_SPATIAL_IDX(2, 1, 0); // HEIGHT
				166	const int bout = 0; // BATCH SIZE IDX
Adnan AlSinan	17975a6	2021-11-08 17:46:39 +0000	[diff] [blame]	167	#endif // defined(BATCHED_EXECUTION)
Adnan AlSinan	7075fe2	2021-07-05 13:12:52 +0100	[diff] [blame]	168
				169	#ifdef SAMPLING_POLICY_TOP_LEFT
Adnan AlSinan	17975a6	2021-11-08 17:46:39 +0000	[diff] [blame]	170	float xi_f = (xo * scale_x);
				171	float yi_f = (yo * scale_y);
Adnan AlSinan	7075fe2	2021-07-05 13:12:52 +0100	[diff] [blame]	172	#elif SAMPLING_POLICY_CENTER
Adnan AlSinan	17975a6	2021-11-08 17:46:39 +0000	[diff] [blame]	173	float xi_f = ((xo + 0.5f) * scale_x - 0.5f);
				174	float yi_f = ((yo + 0.5f) * scale_y - 0.5f);
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	175	#else // SAMPLING_POLICY
Adnan AlSinan	7075fe2	2021-07-05 13:12:52 +0100	[diff] [blame]	176	#error("Unsupported sampling policy");
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	177	#endif // SAMPLING_POLICY
Adnan AlSinan	7075fe2	2021-07-05 13:12:52 +0100	[diff] [blame]	178
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	179	const int xi = (int)floor(xi_f);
				180	const int yi = (int)floor(yi_f);
Giorgio Arena	511771f	2021-08-19 13:04:56 +0100	[diff] [blame]	181
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	182	TILE(SRC_DATA_TYPE, 1, N0, in00);
				183	TILE(SRC_DATA_TYPE, 1, N0, in01);
				184	TILE(SRC_DATA_TYPE, 1, N0, in10);
				185	TILE(SRC_DATA_TYPE, 1, N0, in11);
				186
				187	// Initialize the tiles to CONSTANT_VALUE
				188	in00[0].v = CONSTANT_VALUE;
				189	in01[0].v = CONSTANT_VALUE;
				190	in10[0].v = CONSTANT_VALUE;
				191	in11[0].v = CONSTANT_VALUE;
Adnan AlSinan	7075fe2	2021-07-05 13:12:52 +0100	[diff] [blame]	192
				193	#ifndef BORDER_MODE_REPLICATE
Adnan AlSinan	17975a6	2021-11-08 17:46:39 +0000	[diff] [blame]	194	T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, 1, N0, SRC_TENSOR_TYPE, src, bout, yi, xi, cout, src_w, src_h, 1, 1, true, in00);
				195	T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, 1, N0, SRC_TENSOR_TYPE, src, bout, yi, xi + 1, cout, src_w, src_h, 1, 1, true, in01);
				196	T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, 1, N0, SRC_TENSOR_TYPE, src, bout, yi + 1, xi, cout, src_w, src_h, 1, 1, true, in10);
				197	T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, 1, N0, SRC_TENSOR_TYPE, src, bout, yi + 1, xi + 1, cout, src_w, src_h, 1, 1, true, in11);
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	198	#else // BORDER_MODE_REPLICATE
Adnan AlSinan	17975a6	2021-11-08 17:46:39 +0000	[diff] [blame]	199	const int xi0 = clamp(xi, 0, (int)src_w - 1);
				200	const int yi0 = clamp(yi, 0, (int)src_h - 1);
				201	const int xi1 = clamp(xi + 1, 0, (int)src_w - 1);
				202	const int yi1 = clamp(yi + 1, 0, (int)src_h - 1);
Giorgio Arena	511771f	2021-08-19 13:04:56 +0100	[diff] [blame]	203
Adnan AlSinan	17975a6	2021-11-08 17:46:39 +0000	[diff] [blame]	204	T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, 1, N0, SRC_TENSOR_TYPE, src, bout, yi0, xi0, cout, src_w, src_h, 1, 1, false, in00);
				205	T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, 1, N0, SRC_TENSOR_TYPE, src, bout, yi0, xi1, cout, src_w, src_h, 1, 1, false, in01);
				206	T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, 1, N0, SRC_TENSOR_TYPE, src, bout, yi1, xi0, cout, src_w, src_h, 1, 1, false, in10);
				207	T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, 1, N0, SRC_TENSOR_TYPE, src, bout, yi1, xi1, cout, src_w, src_h, 1, 1, false, in11);
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	208	#endif // BORDER_MODE_REPLICATE
Adnan AlSinan	7075fe2	2021-07-05 13:12:52 +0100	[diff] [blame]	209
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	210	TILE(DST_DATA_TYPE, 1, N0, out);
Giorgio Arena	511771f	2021-08-19 13:04:56 +0100	[diff] [blame]	211
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	212	#if defined(IS_FLOATING_POINT)
				213	const SRC_DATA_TYPE a = (SRC_DATA_TYPE)(xi_f - (float)xi);
				214	const SRC_DATA_TYPE b = (SRC_DATA_TYPE)(1.f - a);
				215	const SRC_DATA_TYPE a1 = (SRC_DATA_TYPE)(yi_f - (float)yi);
				216	const SRC_DATA_TYPE b1 = (SRC_DATA_TYPE)(1.f - a1);
Giorgio Arena	511771f	2021-08-19 13:04:56 +0100	[diff] [blame]	217
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	218	// Calculate the output
				219	out[0].v = ((in00[0].v * b * b1) + (in01[0].v * a * b1) + (in10[0].v * b * a1) + (in11[0].v * a * a1));
				220	#else // defined(IS_FLOATING_POINT)
				221	TILE(float, 1, N0, out_f);
				222	TILE(float, 1, N0, in00_f);
				223	TILE(float, 1, N0, in01_f);
				224	TILE(float, 1, N0, in10_f);
				225	TILE(float, 1, N0, in11_f);
Adnan AlSinan	7075fe2	2021-07-05 13:12:52 +0100	[diff] [blame]	226
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	227	const float a = (xi_f - (float)xi);
				228	const float b = (1.f - a);
				229	const float a1 = (yi_f - (float)yi);
				230	const float b1 = (1.f - a1);
				231
				232	// Dequantize
				233	LOOP_UNROLLING(int, n0, 0, 1, N0,
				234	{
				235	in00_f[0].s[n0] = ((float)in00[0].s[n0] - (float)OFFSET) * (float)SCALE;
				236	in01_f[0].s[n0] = ((float)in01[0].s[n0] - (float)OFFSET) * (float)SCALE;
				237	in10_f[0].s[n0] = ((float)in10[0].s[n0] - (float)OFFSET) * (float)SCALE;
				238	in11_f[0].s[n0] = ((float)in11[0].s[n0] - (float)OFFSET) * (float)SCALE;
				239	})
				240
				241	// Calculate the output in the floating-point domain
				242	out_f[0].v = ((in00_f[0].v * b * b1) + (in01_f[0].v * a * b1) + (in10_f[0].v * b * a1) + (in11_f[0].v * a * a1));
				243
				244	// Quantize
				245	LOOP_UNROLLING(int, n0, 0, 1, N0,
				246	{
				247	out[0].s[n0] = CONVERT_SAT(out_f[0].s[n0] / (float)SCALE + (float)OFFSET, DST_DATA_TYPE);
				248	})
				249	#endif // defined(IS_FLOATING_POINT)
				250
				251	TILE(uint, 1, 1, dst_indirect_y);
				252
				253	// Calculate the destination indirect Y
Adnan AlSinan	17975a6	2021-11-08 17:46:39 +0000	[diff] [blame]	254	dst_indirect_y[0].v = xo + (yo * (int)(dst_w)) + bout * (int)(dst_w * dst_h);
Gian Marco Iodice	7bc1a77	2021-09-08 17:14:19 +0100	[diff] [blame]	255
				256	bool x_cond = PARTIAL_N0 != 0 && get_global_id(0) == 0;
				257
				258	T_STORE_INDIRECT_WIDTH_SELECT(DST_DATA_TYPE, 1, N0, PARTIAL_N0, DST_TENSOR_TYPE, dst, cout, dst_stride_y, x_cond, out, dst_indirect_y);
Adnan AlSinan	17975a6	2021-11-08 17:46:39 +0000	[diff] [blame]	259	}
				260	#endif /* SCALE_BILINEAR */