Blame - src/core/CL/cl_kernels/reduction_operation.cl - ml/ComputeLibrary

blob: b2e56928d07e231a5996eb281ec6b333906edb03 [file] [log] [blame]

Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	1	/*
Michele Di Giorgio	d9eaf61	2020-07-08 11:12:57 +0100	[diff] [blame]	2	* Copyright (c) 2016-2020 Arm Limited.
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "helpers.h"
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	25	#include "helpers_asymm.h"
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	26
Michalis Spyrou	7317e39	2020-01-17 11:27:49 +0000	[diff] [blame]	27	#if defined(FLOAT_DATA_TYPE)
				28	#define ISGREATER(x, y) isgreater(x, y)
				29	#define ISLESS(x, y) isless(x, y)
				30	#else // !FLOAT_DATA_TYPE
				31	#if defined(WIDTH)
				32	#define ISGREATER(x, y) (x > y) ? 1 : 0
				33	#define ISLESS(x, y) (x < y) ? 1 : 0
				34	#else // !defined(WIDTH)
Michalis Spyrou	6c89ffa	2020-01-24 12:05:05 +0000	[diff] [blame]	35	#define ISGREATER(x, y) select((int16)0, (int16)-1, x > y)
				36	#define ISLESS(x, y) select((int16)0, (int16)-1, x < y)
Michalis Spyrou	7317e39	2020-01-17 11:27:49 +0000	[diff] [blame]	37	#endif // defined(WIDTH)
				38	#endif // defined(FLOAT_DATA_TYPE)
				39
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	40	/** Calculate square sum of a vector
				41	*
				42	* @param[in] input Pointer to the first pixel.
				43	*
				44	* @return square sum of vector.
				45	*/
				46	inline DATA_TYPE square_sum(__global const DATA_TYPE *input)
				47	{
				48	VEC_DATA_TYPE(DATA_TYPE, 16)
				49	in = vload16(0, input);
				50
				51	in *= in;
				52
				53	in.s01234567 += in.s89ABCDEF;
				54	in.s0123 += in.s4567;
				55	in.s01 += in.s23;
				56
				57	return (in.s0 + in.s1);
				58	}
				59
				60	/** Calculate sum of a vector
				61	*
				62	* @param[in] input Pointer to the first pixel.
				63	*
				64	* @return sum of vector.
				65	*/
				66	inline DATA_TYPE sum(__global const DATA_TYPE *input)
				67	{
				68	VEC_DATA_TYPE(DATA_TYPE, 16)
				69	in = vload16(0, input);
				70
				71	in.s01234567 += in.s89ABCDEF;
				72	in.s0123 += in.s4567;
				73	in.s01 += in.s23;
				74
				75	return (in.s0 + in.s1);
				76	}
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	77
				78	/** Calculate product of a vector
				79	*
				80	* @param[in] input Pointer to the first pixel.
				81	*
				82	* @return product of vector.
				83	*/
				84	inline DATA_TYPE product(__global const DATA_TYPE *input)
				85	{
				86	VEC_DATA_TYPE(DATA_TYPE, 16)
				87	in = vload16(0, input);
				88
				89	in.s01234567 *= in.s89ABCDEF;
				90	in.s0123 *= in.s4567;
				91	in.s01 *= in.s23;
				92
				93	return (in.s0 * in.s1);
				94	}
Manuel Bottini	34f88dd	2019-10-18 10:37:46 +0000	[diff] [blame]	95	#if defined(OPERATION)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	96	/** This kernel performs parallel reduction given an operation on x-axis.
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	97	*
				98	* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	99	* @note The operation we want to perform must be passed at compile time using -DOPERATION e.g. -DOPERATION=square_sum
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	100	* @note The mean flag must be passed at compile time using -DMEAN if we want to compute the mean value
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	101	* @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	102	* @note The width size must be passed at compile time using -DWIDTH e.g. -DWIDTH=128 if we want to compute the mean value
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	103	*
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	104	* @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	105	* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
				106	* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	107	* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
				108	* @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	109	* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	110	* @param[in] partial_res_ptr The local buffer to hold partial result values. Supported data types: same as @p src_ptr
				111	* @param[in] partial_res_stride_x Stride of the output tensor in X dimension (in bytes)
				112	* @param[in] partial_res_step_x partial_res_stride_x * number of elements along X processed per workitem(in bytes)
				113	* @param[in] partial_res_stride_y Stride of the output tensor in Y dimension (in bytes)
				114	* @param[in] partial_res_step_y partial_res_stride_y * number of elements along Y processed per workitem(in bytes)
				115	* @param[in] partial_res_offset_first_element_in_bytes The offset of the first element in the source tensor
				116	* @param[in] local_results Local buffer for storing the partial result
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	117	*/
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	118	__kernel void reduction_operation_x(
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	119	IMAGE_DECLARATION(src),
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	120	IMAGE_DECLARATION(partial_res),
Manuel Bottini	34f88dd	2019-10-18 10:37:46 +0000	[diff] [blame]	121	__local DATA_TYPE *local_results)
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	122	{
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	123	Image src = CONVERT_TO_IMAGE_STRUCT(src);
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	124	Image partial_res = CONVERT_TO_IMAGE_STRUCT(partial_res);
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	125
				126	unsigned int lsize = get_local_size(0);
				127	unsigned int lid = get_local_id(0);
				128
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	129	for(unsigned int y = 0; y < get_local_size(1); ++y)
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	130	{
Manuel Bottini	34f88dd	2019-10-18 10:37:46 +0000	[diff] [blame]	131	local_results[lid] = OPERATION((__global DATA_TYPE *)offset(&src, 0, y));
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	132	barrier(CLK_LOCAL_MEM_FENCE);
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	133
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	134	// Perform parallel reduction
				135	for(unsigned int i = lsize >> 1; i > 0; i >>= 1)
				136	{
				137	if(lid < i)
				138	{
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	139	#if defined(PROD)
				140	local_results[lid] *= local_results[lid + i];
Manuel Bottini	34f88dd	2019-10-18 10:37:46 +0000	[diff] [blame]	141	#else // !defined(PROD)
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	142	local_results[lid] += local_results[lid + i];
Manuel Bottini	34f88dd	2019-10-18 10:37:46 +0000	[diff] [blame]	143	#endif // defined(PROD)
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	144	}
				145	barrier(CLK_LOCAL_MEM_FENCE);
				146	}
				147
				148	if(lid == 0)
				149	{
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	150	#if defined(MEAN) && defined(WIDTH)
				151	if(y == get_local_size(1) - 1)
				152	{
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	153	local_results[0] /= WIDTH;
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	154	}
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	155	#endif // defined(MEAN) && defined(WIDTH)
Manuel Bottini	34f88dd	2019-10-18 10:37:46 +0000	[diff] [blame]	156	((__global DATA_TYPE *)offset(&partial_res, get_group_id(0), y))[0] = local_results[0];
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	157	}
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	158	}
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	159	}
Manuel Bottini	34f88dd	2019-10-18 10:37:46 +0000	[diff] [blame]	160	#endif // defined(OPERATION)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	161
				162	#if defined(WIDTH)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	163	/** This kernel performs reduction on x-axis. (Non parallel)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	164	*
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	165	* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	166	* @note The width size must be passed at compile time using -DWIDTH e.g. -DWIDTH=128
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	167	* @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	168	* @note In case of MIN and MAX the condition data type must be passed at compile time using -DCOND_DATA_TYPE e.g. -DCOND_DATA_TYPE=short
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	169	*
Michele Di Giorgio	f6f7876	2020-07-06 11:27:21 +0100	[diff] [blame]	170	* @param[in] src_ptr Pointer to the source tensor. Supported data types: S32/F16/F32 and QASYMM8/QASYMM8_SIGNED for operation MEAN
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	171	* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
				172	* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
				173	* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
Michele Di Giorgio	f6f7876	2020-07-06 11:27:21 +0100	[diff] [blame]	174	* @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p src_ptr
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	175	* @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
				176	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				177	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
				178	*/
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	179	__kernel void reduction_operation_non_parallel_x(
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	180	VECTOR_DECLARATION(src),
				181	VECTOR_DECLARATION(output))
				182	{
				183	Vector src = CONVERT_TO_VECTOR_STRUCT(src);
				184	Vector output = CONVERT_TO_VECTOR_STRUCT(output);
				185
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	186	DATA_TYPE_PROMOTED res = CONVERT(((__global DATA_TYPE )vector_offset(&src, 0)), DATA_TYPE_PROMOTED);
				187
				188	// Convert input into F32 in order to perform quantized multiplication
				189	#if defined(PROD) && defined(OFFSET) && defined(SCALE)
				190	float res_f = DEQUANTIZE(res, OFFSET, SCALE, DATA_TYPE_PROMOTED, 1);
				191	#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	192
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	193	for(unsigned int x = 1; x < WIDTH; ++x)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	194	{
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	195	DATA_TYPE_PROMOTED in = CONVERT(((__global DATA_TYPE )vector_offset(&src, x)), DATA_TYPE_PROMOTED);
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	196	#if defined(MIN)
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame]	197	res = select(res, in, CONVERT(ISLESS(in, res), COND_DATA_TYPE));
				198	#elif defined(MAX)
Michele Di Giorgio	0333704	2020-02-26 17:47:55 +0000	[diff] [blame]	199	res = select(res, in, CONVERT(ISGREATER(in, res), COND_DATA_TYPE));
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	200	#elif defined(PROD)
				201	#if defined(OFFSET) && defined(SCALE)
				202	res_f *= DEQUANTIZE(in, OFFSET, SCALE, DATA_TYPE_PROMOTED, 1);
				203	#else // !(defined(OFFSET) && defined(SCALE))
				204	res *= in;
				205	#endif // defined(OFFSET) && defined(SCALE)
				206	#else // defined(SUM))
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	207	res += in;
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	208	#endif // defined(MAX) \|\| defined(MIN) \|\| defined(PROD)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	209	}
				210
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	211	// Store result
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	212	#if defined(MEAN)
				213	res /= WIDTH;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	214	#endif // defined(MEAN)
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	215
				216	// Subtract the offsets in case of quantized SUM
				217	#if defined(SUM) && defined(OFFSET) && defined(SCALE)
				218	res -= (WIDTH - 1) * OFFSET;
				219	#endif // defined(OFFSET) && defined(OFFSET) && defined(SCALE)
				220
				221	// Re-quantize
				222	#if defined(PROD) && defined(OFFSET) && defined(SCALE)
				223	res = QUANTIZE(res_f, OFFSET, SCALE, DATA_TYPE_PROMOTED, 1);
				224	#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
				225
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	226	((__global DATA_TYPE )output.ptr) = CONVERT_SAT(res, DATA_TYPE);
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	227	}
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	228	#endif // defined(WIDTH)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	229
				230	#if defined(HEIGHT)
				231	/** This kernel performs reduction on y-axis.
				232	*
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	233	* @note The input data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	234	* @note The height size must be passed at compile time using -DHEIGHT e.g. -DHEIGHT=128
				235	*
Michele Di Giorgio	f6f7876	2020-07-06 11:27:21 +0100	[diff] [blame]	236	* @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	237	* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
				238	* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
				239	* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
				240	* @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
				241	* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
Michele Di Giorgio	f6f7876	2020-07-06 11:27:21 +0100	[diff] [blame]	242	* @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p src_ptr
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	243	* @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
				244	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				245	* @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
				246	* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
				247	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
				248	*/
				249	__kernel void reduction_operation_y(
				250	IMAGE_DECLARATION(src),
				251	IMAGE_DECLARATION(output))
				252	{
				253	Image src = CONVERT_TO_IMAGE_STRUCT(src);
				254	Image output = CONVERT_TO_IMAGE_STRUCT(output);
				255
				256	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	257	res = CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	258
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	259	// Convert input into F32 in order to perform quantized multiplication
				260	#if defined(PROD) && defined(OFFSET) && defined(SCALE)
				261	float16 res_f = DEQUANTIZE(res, OFFSET, SCALE, DATA_TYPE_PROMOTED, 16);
				262	#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
				263
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	264	#if defined(SUM_SQUARE)
				265	res *= res;
				266	#endif // defined(SUM_SQUARE)
				267
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	268	for(unsigned int y = 1; y < HEIGHT; ++y)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	269	{
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	270	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
				271	in = CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, y)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	272	#if defined(MIN)
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame]	273	res = select(res, in, ISLESS(in, res));
				274	#elif defined(MAX)
Michele Di Giorgio	0333704	2020-02-26 17:47:55 +0000	[diff] [blame]	275	res = select(res, in, ISGREATER(in, res));
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	276	#else // !(defined(MAX) \|\| defined(MIN))
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	277	#if defined(SUM_SQUARE)
				278	in *= in;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	279	#endif // defined(SUM_SQUARE)
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	280	#if defined(PROD)
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	281
				282	#if defined(OFFSET) && defined(SCALE)
				283	res_f *= DEQUANTIZE(in, OFFSET, SCALE, DATA_TYPE_PROMOTED, 16);
				284	#else // !(defined(OFFSET) && defined(SCALE))
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	285	res *= in;
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	286	#endif // defined(OFFSET) && defined(SCALE)
				287
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	288	#else // !defined(PROD)
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	289	res += in;
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	290	#endif // defined(PROD)
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	291	#endif // defined(MAX) \|\| defined(MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	292	}
				293
				294	#if defined(MEAN)
				295	res /= HEIGHT;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	296	#endif // defined(MEAN)
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	297
				298	// Subtract the offsets in case of quantized SUM
				299	#if defined(SUM) && defined(OFFSET) && defined(SCALE)
				300	res -= (HEIGHT - 1) * OFFSET;
				301	#endif // defined(OFFSET) && defined(OFFSET) && defined(SCALE)
				302
				303	// Re-quantize
				304	#if defined(PROD) && defined(OFFSET) && defined(SCALE)
				305	res = QUANTIZE(res_f, OFFSET, SCALE, DATA_TYPE_PROMOTED, 16);
				306	#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
				307
				308	// Store result
				309	vstore16(CONVERT_SAT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	310	}
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	311	#endif // defined(HEIGHT)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	312
				313	#if defined(DEPTH)
				314	/** This kernel performs reduction on z-axis.
				315	*
				316	* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
				317	* @note The depth size must be passed at compile time using -DDEPTH e.g. -DDEPTH=128
				318	*
Michele Di Giorgio	f6f7876	2020-07-06 11:27:21 +0100	[diff] [blame]	319	* @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	320	* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
				321	* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
				322	* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
				323	* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
				324	* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
				325	* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
				326	* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
Michele Di Giorgio	f6f7876	2020-07-06 11:27:21 +0100	[diff] [blame]	327	* @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptr
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	328	* @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
				329	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				330	* @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
				331	* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
				332	* @param[in] output_stride_z Stride of the output tensor in Z dimension (in bytes)
				333	* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
				334	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
				335	*/
				336	__kernel void reduction_operation_z(
				337	TENSOR3D_DECLARATION(input),
				338	TENSOR3D_DECLARATION(output))
				339	{
				340	Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
				341	Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
				342
				343	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	344	res = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	345
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	346	// Convert input into F32 in order to perform quantized multiplication
				347	#if defined(PROD) && defined(OFFSET) && defined(SCALE)
				348	float16 res_f = DEQUANTIZE(res, OFFSET, SCALE, DATA_TYPE_PROMOTED, 16);
				349	#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
				350
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	351	#if defined(COMPLEX)
				352	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
				353	res1 = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 8, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
				354	#endif // defined(COMPLEX)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	355	#if defined(SUM_SQUARE)
				356	res *= res;
				357	#endif // defined(SUM_SQUARE)
				358
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	359	for(unsigned int z = 1; z < DEPTH; ++z)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	360	{
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	361	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
				362	in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, z)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	363
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	364	#if defined(COMPLEX)
				365	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
				366	in1 = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 8, 0, z)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
				367	#endif // defined(COMPLEX)
				368
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	369	#if defined(MIN)
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame]	370	res = select(res, in, ISLESS(in, res));
				371	#elif defined(MAX)
Michele Di Giorgio	0333704	2020-02-26 17:47:55 +0000	[diff] [blame]	372	res = select(res, in, ISGREATER(in, res));
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	373	#else // !(defined(MAX) \|\| defined(MIN))
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	374	#if defined(SUM_SQUARE)
				375	in *= in;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	376	#endif // defined(SUM_SQUARE)
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	377	#if defined(PROD)
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	378
				379	#if defined(OFFSET) && defined(SCALE)
				380	res_f *= DEQUANTIZE(in, OFFSET, SCALE, DATA_TYPE_PROMOTED, 16);
				381	#else // !(defined(OFFSET) && defined(SCALE))
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	382	res *= in;
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	383	#endif // defined(OFFSET) && defined(SCALE)
				384
				385	#else // !defined(PROD)
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	386	res += in;
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	387	#if defined(COMPLEX)
				388	res1 += in1;
				389	#endif // defined(COMPLEX)
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	390	#endif // defined(PROD)
				391	#endif // defined(MAX) \|\| defined(MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	392	}
				393
				394	#if defined(MEAN)
				395	res /= DEPTH;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	396	#endif // defined(MEAN)
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	397
				398	// Subtract the offsets in case of quantized SUM
				399	#if defined(SUM) && defined(OFFSET) && defined(SCALE)
				400	res -= (DEPTH - 1) * OFFSET;
				401	#endif // defined(OFFSET) && defined(OFFSET) && defined(SCALE)
				402
				403	// Re-quantize
				404	#if defined(PROD) && defined(OFFSET) && defined(SCALE)
				405	res = QUANTIZE(res_f, OFFSET, SCALE, DATA_TYPE_PROMOTED, 16);
				406	#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
				407
				408	// Store result
				409	vstore16(CONVERT_SAT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	410	#if defined(COMPLEX)
				411	vstore16(CONVERT(res1, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)tensor3D_offset(&output, 8, 0, 0));
				412	#endif // defined(COMPLEX)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	413	}
				414	#endif /* defined(DEPTH) */
				415
				416	#if defined(BATCH) && defined(DEPTH)
				417	/** This kernel performs reduction on w-axis.
				418	*
				419	* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
				420	* @note The batch size must be passed at compile time using -DBATCH e.g. -DBATCH=128
Manuel Bottini	34f88dd	2019-10-18 10:37:46 +0000	[diff] [blame]	421	* @note The depth size must be passed at compile time using -DBATCH e.g. -DDEPTH=128
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	422	*
Michele Di Giorgio	f6f7876	2020-07-06 11:27:21 +0100	[diff] [blame]	423	* @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	424	* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
				425	* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
				426	* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
				427	* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
				428	* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
				429	* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
				430	* @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes)
				431	* @param[in] input_step_w input_stride_w * number of elements along W processed per workitem(in bytes)
				432	* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
Michele Di Giorgio	f6f7876	2020-07-06 11:27:21 +0100	[diff] [blame]	433	* @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptr
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	434	* @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
				435	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				436	* @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
				437	* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
				438	* @param[in] output_stride_z Stride of the output tensor in Z dimension (in bytes)
				439	* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
				440	* @param[in] output_stride_w Stride of the output tensor in W dimension (in bytes)
				441	* @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
				442	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
				443	*/
				444	__kernel void reduction_operation_w(
				445	TENSOR4D_DECLARATION(input),
				446	TENSOR4D_DECLARATION(output))
				447	{
				448	Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT(input, DEPTH);
				449	Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH);
				450
				451	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	452	res = CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	453
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	454	// Convert input into F32 in order to perform quantized multiplication
				455	#if defined(PROD) && defined(OFFSET) && defined(SCALE)
				456	float16 res_f = DEQUANTIZE(res, OFFSET, SCALE, DATA_TYPE_PROMOTED, 16);
				457	#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
				458
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	459	#if defined(SUM_SQUARE)
				460	res *= res;
				461	#endif // defined(SUM_SQUARE)
				462
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	463	for(unsigned int w = 1; w < BATCH; ++w)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	464	{
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	465	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
				466	in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, w)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	467
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	468	#if defined(MIN)
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame]	469	res = select(res, in, ISLESS(in, res));
				470	#elif defined(MAX)
Michele Di Giorgio	0333704	2020-02-26 17:47:55 +0000	[diff] [blame]	471	res = select(res, in, ISGREATER(in, res));
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	472	#else // !(defined(MAX) \|\| defined(MIN))
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	473	#if defined(SUM_SQUARE)
				474	in *= in;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	475	#endif // defined(SUM_SQUARE)
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	476	#if defined(PROD)
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	477
				478	#if defined(OFFSET) && defined(SCALE)
				479	res_f *= DEQUANTIZE(in, OFFSET, SCALE, DATA_TYPE_PROMOTED, 16);
				480	#else // !(defined(OFFSET) && defined(SCALE))
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	481	res *= in;
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	482	#endif // defined(OFFSET) && defined(SCALE)
				483
				484	#else // !defined(PROD)
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	485	res += in;
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	486	#endif //defined(PROD)
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	487	#endif // defined(MAX) \|\| defined(MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	488	}
				489
				490	#if defined(MEAN)
				491	res /= BATCH;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	492	#endif // defined(MEAN)
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	493
				494	// Subtract the offsets in case of quantized SUM
				495	#if defined(SUM) && defined(OFFSET) && defined(SCALE)
				496	res -= (BATCH - 1) * OFFSET;
				497	#endif // defined(OFFSET) && defined(OFFSET) && defined(SCALE)
				498
				499	// Re-quantize
				500	#if defined(PROD) && defined(OFFSET) && defined(SCALE)
				501	res = QUANTIZE(res_f, OFFSET, SCALE, DATA_TYPE_PROMOTED, 16);
				502	#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
				503
				504	// Store result
				505	vstore16(CONVERT_SAT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	506	}
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	507	#endif /* defined(BATCH) && defined(DEPTH) */