Blame - src/core/CL/cl_kernels/reduction_operation.cl - ml/ComputeLibrary

blob: a5fd0b36221af98071823dcbd06af9644a619efe [file] [log] [blame]

Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	1	/*
Michalis Spyrou	6c89ffa	2020-01-24 12:05:05 +0000	[diff] [blame]	2	* Copyright (c) 2016-2020 ARM Limited.
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "helpers.h"
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame^]	25	#include "helpers_asymm.h"
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	26
Michalis Spyrou	7317e39	2020-01-17 11:27:49 +0000	[diff] [blame]	27	#if defined(FLOAT_DATA_TYPE)
				28	#define ISGREATER(x, y) isgreater(x, y)
				29	#define ISLESS(x, y) isless(x, y)
				30	#else // !FLOAT_DATA_TYPE
				31	#if defined(WIDTH)
				32	#define ISGREATER(x, y) (x > y) ? 1 : 0
				33	#define ISLESS(x, y) (x < y) ? 1 : 0
				34	#else // !defined(WIDTH)
Michalis Spyrou	6c89ffa	2020-01-24 12:05:05 +0000	[diff] [blame]	35	#define ISGREATER(x, y) select((int16)0, (int16)-1, x > y)
				36	#define ISLESS(x, y) select((int16)0, (int16)-1, x < y)
Michalis Spyrou	7317e39	2020-01-17 11:27:49 +0000	[diff] [blame]	37	#endif // defined(WIDTH)
				38	#endif // defined(FLOAT_DATA_TYPE)
				39
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	40	/** Calculate square sum of a vector
				41	*
				42	* @param[in] input Pointer to the first pixel.
				43	*
				44	* @return square sum of vector.
				45	*/
				46	inline DATA_TYPE square_sum(__global const DATA_TYPE *input)
				47	{
				48	VEC_DATA_TYPE(DATA_TYPE, 16)
				49	in = vload16(0, input);
				50
				51	in *= in;
				52
				53	in.s01234567 += in.s89ABCDEF;
				54	in.s0123 += in.s4567;
				55	in.s01 += in.s23;
				56
				57	return (in.s0 + in.s1);
				58	}
				59
				60	/** Calculate sum of a vector
				61	*
				62	* @param[in] input Pointer to the first pixel.
				63	*
				64	* @return sum of vector.
				65	*/
				66	inline DATA_TYPE sum(__global const DATA_TYPE *input)
				67	{
				68	VEC_DATA_TYPE(DATA_TYPE, 16)
				69	in = vload16(0, input);
				70
				71	in.s01234567 += in.s89ABCDEF;
				72	in.s0123 += in.s4567;
				73	in.s01 += in.s23;
				74
				75	return (in.s0 + in.s1);
				76	}
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	77
				78	/** Calculate product of a vector
				79	*
				80	* @param[in] input Pointer to the first pixel.
				81	*
				82	* @return product of vector.
				83	*/
				84	inline DATA_TYPE product(__global const DATA_TYPE *input)
				85	{
				86	VEC_DATA_TYPE(DATA_TYPE, 16)
				87	in = vload16(0, input);
				88
				89	in.s01234567 *= in.s89ABCDEF;
				90	in.s0123 *= in.s4567;
				91	in.s01 *= in.s23;
				92
				93	return (in.s0 * in.s1);
				94	}
Manuel Bottini	34f88dd	2019-10-18 10:37:46 +0000	[diff] [blame]	95	#if defined(OPERATION)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	96	/** This kernel performs parallel reduction given an operation on x-axis.
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	97	*
				98	* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	99	* @note The operation we want to perform must be passed at compile time using -DOPERATION e.g. -DOPERATION=square_sum
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	100	* @note The mean flag must be passed at compile time using -DMEAN if we want to compute the mean value
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	101	* @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	102	* @note The width size must be passed at compile time using -DWIDTH e.g. -DWIDTH=128 if we want to compute the mean value
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	103	*
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	104	* @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	105	* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
				106	* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	107	* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
				108	* @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	109	* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	110	* @param[in] partial_res_ptr The local buffer to hold partial result values. Supported data types: same as @p src_ptr
				111	* @param[in] partial_res_stride_x Stride of the output tensor in X dimension (in bytes)
				112	* @param[in] partial_res_step_x partial_res_stride_x * number of elements along X processed per workitem(in bytes)
				113	* @param[in] partial_res_stride_y Stride of the output tensor in Y dimension (in bytes)
				114	* @param[in] partial_res_step_y partial_res_stride_y * number of elements along Y processed per workitem(in bytes)
				115	* @param[in] partial_res_offset_first_element_in_bytes The offset of the first element in the source tensor
				116	* @param[in] local_results Local buffer for storing the partial result
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	117	*/
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	118	__kernel void reduction_operation_x(
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	119	IMAGE_DECLARATION(src),
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	120	IMAGE_DECLARATION(partial_res),
Manuel Bottini	34f88dd	2019-10-18 10:37:46 +0000	[diff] [blame]	121	__local DATA_TYPE *local_results)
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	122	{
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	123	Image src = CONVERT_TO_IMAGE_STRUCT(src);
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	124	Image partial_res = CONVERT_TO_IMAGE_STRUCT(partial_res);
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	125
				126	unsigned int lsize = get_local_size(0);
				127	unsigned int lid = get_local_id(0);
				128
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	129	for(unsigned int y = 0; y < get_local_size(1); ++y)
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	130	{
Manuel Bottini	34f88dd	2019-10-18 10:37:46 +0000	[diff] [blame]	131	local_results[lid] = OPERATION((__global DATA_TYPE *)offset(&src, 0, y));
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	132	barrier(CLK_LOCAL_MEM_FENCE);
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	133
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	134	// Perform parallel reduction
				135	for(unsigned int i = lsize >> 1; i > 0; i >>= 1)
				136	{
				137	if(lid < i)
				138	{
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	139	#if defined(PROD)
				140	local_results[lid] *= local_results[lid + i];
Manuel Bottini	34f88dd	2019-10-18 10:37:46 +0000	[diff] [blame]	141	#else // !defined(PROD)
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	142	local_results[lid] += local_results[lid + i];
Manuel Bottini	34f88dd	2019-10-18 10:37:46 +0000	[diff] [blame]	143	#endif // defined(PROD)
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	144	}
				145	barrier(CLK_LOCAL_MEM_FENCE);
				146	}
				147
				148	if(lid == 0)
				149	{
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	150	#if defined(MEAN) && defined(WIDTH)
				151	if(y == get_local_size(1) - 1)
				152	{
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	153	local_results[0] /= WIDTH;
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	154	}
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	155	#endif // defined(MEAN) && defined(WIDTH)
Manuel Bottini	34f88dd	2019-10-18 10:37:46 +0000	[diff] [blame]	156	((__global DATA_TYPE *)offset(&partial_res, get_group_id(0), y))[0] = local_results[0];
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	157	}
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	158	}
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	159	}
Manuel Bottini	34f88dd	2019-10-18 10:37:46 +0000	[diff] [blame]	160	#endif // defined(OPERATION)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	161
				162	#if defined(WIDTH)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	163	/** This kernel performs reduction on x-axis. (Non parallel)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	164	*
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	165	* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	166	* @note The width size must be passed at compile time using -DWIDTH e.g. -DWIDTH=128
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	167	* @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	168	* @note In case of MIN and MAX the condition data type must be passed at compile time using -DCOND_DATA_TYPE e.g. -DCOND_DATA_TYPE=short
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	169	*
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	170	* @param[in] src_ptr Pointer to the source tensor. Supported data types: S32/F16/F32 and QASYMM8 for operation MEAN
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	171	* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
				172	* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
				173	* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
				174	* @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p src_ptt
				175	* @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
				176	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				177	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
				178	*/
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	179	__kernel void reduction_operation_non_parallel_x(
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	180	VECTOR_DECLARATION(src),
				181	VECTOR_DECLARATION(output))
				182	{
				183	Vector src = CONVERT_TO_VECTOR_STRUCT(src);
				184	Vector output = CONVERT_TO_VECTOR_STRUCT(output);
				185
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame^]	186	DATA_TYPE_PROMOTED res = CONVERT(((__global DATA_TYPE )vector_offset(&src, 0)), DATA_TYPE_PROMOTED);
				187
				188	// Convert input into F32 in order to perform quantized multiplication
				189	#if defined(PROD) && defined(OFFSET) && defined(SCALE)
				190	float res_f = DEQUANTIZE(res, OFFSET, SCALE, DATA_TYPE_PROMOTED, 1);
				191	#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	192
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	193	for(unsigned int x = 1; x < WIDTH; ++x)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	194	{
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame^]	195	DATA_TYPE_PROMOTED in = CONVERT(((__global DATA_TYPE )vector_offset(&src, x)), DATA_TYPE_PROMOTED);
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	196	#if defined(MIN)
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame]	197	res = select(res, in, CONVERT(ISLESS(in, res), COND_DATA_TYPE));
				198	#elif defined(MAX)
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame^]	199	res = select(res, in, CONVERT(ISGREATER(in, res), COND_DATA_TYPE));
				200	#elif defined(PROD)
				201	#if defined(OFFSET) && defined(SCALE)
				202	res_f *= DEQUANTIZE(in, OFFSET, SCALE, DATA_TYPE_PROMOTED, 1);
				203	#else // !(defined(OFFSET) && defined(SCALE))
				204	res *= in;
				205	#endif // defined(OFFSET) && defined(SCALE)
				206	#else // defined(SUM))
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	207	res += in;
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame^]	208	#endif // defined(MAX) \|\| defined(MIN) \|\| defined(PROD)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	209	}
				210
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	211	// Store result
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	212	#if defined(MEAN)
				213	res /= WIDTH;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	214	#endif // defined(MEAN)
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame^]	215
				216	// Subtract the offsets in case of quantized SUM
				217	#if defined(SUM) && defined(OFFSET) && defined(SCALE)
				218	res -= (WIDTH - 1) * OFFSET;
				219	#endif // defined(OFFSET) && defined(OFFSET) && defined(SCALE)
				220
				221	// Re-quantize
				222	#if defined(PROD) && defined(OFFSET) && defined(SCALE)
				223	res = QUANTIZE(res_f, OFFSET, SCALE, DATA_TYPE_PROMOTED, 1);
				224	#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
				225
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame]	226	#if defined(MIN) \|\| defined(MAX)
Usama Arif	b289050	2019-05-21 11:48:37 +0100	[diff] [blame]	227	((__global DATA_TYPE_PROMOTED )output.ptr) = res;
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame^]	228	#else // !(defined(MIN) \|\| defined(MAX))
				229	((__global DATA_TYPE )output.ptr) = CONVERT_SAT(res, DATA_TYPE);
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame]	230	#endif // defined(MIN) \|\| defined(MAX)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	231	}
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	232	#endif // defined(WIDTH)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	233
				234	#if defined(HEIGHT)
				235	/** This kernel performs reduction on y-axis.
				236	*
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	237	* @note The input data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	238	* @note The height size must be passed at compile time using -DHEIGHT e.g. -DHEIGHT=128
				239	*
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	240	* @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	241	* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
				242	* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
				243	* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
				244	* @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
				245	* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
				246	* @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p src_ptt
				247	* @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
				248	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				249	* @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
				250	* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
				251	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
				252	*/
				253	__kernel void reduction_operation_y(
				254	IMAGE_DECLARATION(src),
				255	IMAGE_DECLARATION(output))
				256	{
				257	Image src = CONVERT_TO_IMAGE_STRUCT(src);
				258	Image output = CONVERT_TO_IMAGE_STRUCT(output);
				259
				260	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	261	res = CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	262
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame^]	263	// Convert input into F32 in order to perform quantized multiplication
				264	#if defined(PROD) && defined(OFFSET) && defined(SCALE)
				265	float16 res_f = DEQUANTIZE(res, OFFSET, SCALE, DATA_TYPE_PROMOTED, 16);
				266	#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
				267
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	268	#if defined(SUM_SQUARE)
				269	res *= res;
				270	#endif // defined(SUM_SQUARE)
				271
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	272	for(unsigned int y = 1; y < HEIGHT; ++y)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	273	{
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	274	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
				275	in = CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, y)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	276	#if defined(MIN)
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame]	277	res = select(res, in, ISLESS(in, res));
				278	#elif defined(MAX)
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame^]	279	res = select(res, in, ISGREATER(in, res));
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	280	#else // !(defined(MAX) \|\| defined(MIN))
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	281	#if defined(SUM_SQUARE)
				282	in *= in;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	283	#endif // defined(SUM_SQUARE)
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	284	#if defined(PROD)
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame^]	285
				286	#if defined(OFFSET) && defined(SCALE)
				287	res_f *= DEQUANTIZE(in, OFFSET, SCALE, DATA_TYPE_PROMOTED, 16);
				288	#else // !(defined(OFFSET) && defined(SCALE))
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	289	res *= in;
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame^]	290	#endif // defined(OFFSET) && defined(SCALE)
				291
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	292	#else // !defined(PROD)
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	293	res += in;
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	294	#endif // defined(PROD)
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	295	#endif // defined(MAX) \|\| defined(MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	296	}
				297
				298	#if defined(MEAN)
				299	res /= HEIGHT;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	300	#endif // defined(MEAN)
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame^]	301
				302	// Subtract the offsets in case of quantized SUM
				303	#if defined(SUM) && defined(OFFSET) && defined(SCALE)
				304	res -= (HEIGHT - 1) * OFFSET;
				305	#endif // defined(OFFSET) && defined(OFFSET) && defined(SCALE)
				306
				307	// Re-quantize
				308	#if defined(PROD) && defined(OFFSET) && defined(SCALE)
				309	res = QUANTIZE(res_f, OFFSET, SCALE, DATA_TYPE_PROMOTED, 16);
				310	#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
				311
				312	// Store result
				313	vstore16(CONVERT_SAT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	314	}
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	315	#endif // defined(HEIGHT)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	316
				317	#if defined(DEPTH)
				318	/** This kernel performs reduction on z-axis.
				319	*
				320	* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
				321	* @note The depth size must be passed at compile time using -DDEPTH e.g. -DDEPTH=128
				322	*
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	323	* @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	324	* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
				325	* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
				326	* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
				327	* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
				328	* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
				329	* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
				330	* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
				331	* @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptt
				332	* @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
				333	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				334	* @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
				335	* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
				336	* @param[in] output_stride_z Stride of the output tensor in Z dimension (in bytes)
				337	* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
				338	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
				339	*/
				340	__kernel void reduction_operation_z(
				341	TENSOR3D_DECLARATION(input),
				342	TENSOR3D_DECLARATION(output))
				343	{
				344	Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
				345	Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
				346
				347	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	348	res = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	349
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame^]	350	// Convert input into F32 in order to perform quantized multiplication
				351	#if defined(PROD) && defined(OFFSET) && defined(SCALE)
				352	float16 res_f = DEQUANTIZE(res, OFFSET, SCALE, DATA_TYPE_PROMOTED, 16);
				353	#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
				354
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	355	#if defined(COMPLEX)
				356	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
				357	res1 = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 8, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
				358	#endif // defined(COMPLEX)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	359	#if defined(SUM_SQUARE)
				360	res *= res;
				361	#endif // defined(SUM_SQUARE)
				362
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	363	for(unsigned int z = 1; z < DEPTH; ++z)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	364	{
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	365	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
				366	in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, z)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	367
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	368	#if defined(COMPLEX)
				369	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
				370	in1 = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 8, 0, z)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
				371	#endif // defined(COMPLEX)
				372
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	373	#if defined(MIN)
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame]	374	res = select(res, in, ISLESS(in, res));
				375	#elif defined(MAX)
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame^]	376	res = select(res, in, ISGREATER(in, res));
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	377	#else // !(defined(MAX) \|\| defined(MIN))
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	378	#if defined(SUM_SQUARE)
				379	in *= in;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	380	#endif // defined(SUM_SQUARE)
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	381	#if defined(PROD)
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame^]	382
				383	#if defined(OFFSET) && defined(SCALE)
				384	res_f *= DEQUANTIZE(in, OFFSET, SCALE, DATA_TYPE_PROMOTED, 16);
				385	#else // !(defined(OFFSET) && defined(SCALE))
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	386	res *= in;
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame^]	387	#endif // defined(OFFSET) && defined(SCALE)
				388
				389	#else // !defined(PROD)
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	390	res += in;
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	391	#if defined(COMPLEX)
				392	res1 += in1;
				393	#endif // defined(COMPLEX)
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	394	#endif // defined(PROD)
				395	#endif // defined(MAX) \|\| defined(MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	396	}
				397
				398	#if defined(MEAN)
				399	res /= DEPTH;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	400	#endif // defined(MEAN)
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame^]	401
				402	// Subtract the offsets in case of quantized SUM
				403	#if defined(SUM) && defined(OFFSET) && defined(SCALE)
				404	res -= (DEPTH - 1) * OFFSET;
				405	#endif // defined(OFFSET) && defined(OFFSET) && defined(SCALE)
				406
				407	// Re-quantize
				408	#if defined(PROD) && defined(OFFSET) && defined(SCALE)
				409	res = QUANTIZE(res_f, OFFSET, SCALE, DATA_TYPE_PROMOTED, 16);
				410	#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
				411
				412	// Store result
				413	vstore16(CONVERT_SAT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	414	#if defined(COMPLEX)
				415	vstore16(CONVERT(res1, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)tensor3D_offset(&output, 8, 0, 0));
				416	#endif // defined(COMPLEX)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	417	}
				418	#endif /* defined(DEPTH) */
				419
				420	#if defined(BATCH) && defined(DEPTH)
				421	/** This kernel performs reduction on w-axis.
				422	*
				423	* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
				424	* @note The batch size must be passed at compile time using -DBATCH e.g. -DBATCH=128
Manuel Bottini	34f88dd	2019-10-18 10:37:46 +0000	[diff] [blame]	425	* @note The depth size must be passed at compile time using -DBATCH e.g. -DDEPTH=128
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	426	*
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	427	* @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	428	* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
				429	* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
				430	* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
				431	* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
				432	* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
				433	* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
				434	* @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes)
				435	* @param[in] input_step_w input_stride_w * number of elements along W processed per workitem(in bytes)
				436	* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
				437	* @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptt
				438	* @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
				439	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				440	* @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
				441	* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
				442	* @param[in] output_stride_z Stride of the output tensor in Z dimension (in bytes)
				443	* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
				444	* @param[in] output_stride_w Stride of the output tensor in W dimension (in bytes)
				445	* @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
				446	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
				447	*/
				448	__kernel void reduction_operation_w(
				449	TENSOR4D_DECLARATION(input),
				450	TENSOR4D_DECLARATION(output))
				451	{
				452	Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT(input, DEPTH);
				453	Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH);
				454
				455	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	456	res = CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	457
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame^]	458	// Convert input into F32 in order to perform quantized multiplication
				459	#if defined(PROD) && defined(OFFSET) && defined(SCALE)
				460	float16 res_f = DEQUANTIZE(res, OFFSET, SCALE, DATA_TYPE_PROMOTED, 16);
				461	#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
				462
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	463	#if defined(SUM_SQUARE)
				464	res *= res;
				465	#endif // defined(SUM_SQUARE)
				466
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	467	for(unsigned int w = 1; w < BATCH; ++w)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	468	{
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	469	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
				470	in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, w)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	471
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	472	#if defined(MIN)
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame]	473	res = select(res, in, ISLESS(in, res));
				474	#elif defined(MAX)
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame^]	475	res = select(res, in, ISGREATER(in, res));
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	476	#else // !(defined(MAX) \|\| defined(MIN))
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	477	#if defined(SUM_SQUARE)
				478	in *= in;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	479	#endif // defined(SUM_SQUARE)
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	480	#if defined(PROD)
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame^]	481
				482	#if defined(OFFSET) && defined(SCALE)
				483	res_f *= DEQUANTIZE(in, OFFSET, SCALE, DATA_TYPE_PROMOTED, 16);
				484	#else // !(defined(OFFSET) && defined(SCALE))
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	485	res *= in;
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame^]	486	#endif // defined(OFFSET) && defined(SCALE)
				487
				488	#else // !defined(PROD)
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	489	res += in;
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	490	#endif //defined(PROD)
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	491	#endif // defined(MAX) \|\| defined(MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	492	}
				493
				494	#if defined(MEAN)
				495	res /= BATCH;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	496	#endif // defined(MEAN)
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame^]	497
				498	// Subtract the offsets in case of quantized SUM
				499	#if defined(SUM) && defined(OFFSET) && defined(SCALE)
				500	res -= (BATCH - 1) * OFFSET;
				501	#endif // defined(OFFSET) && defined(OFFSET) && defined(SCALE)
				502
				503	// Re-quantize
				504	#if defined(PROD) && defined(OFFSET) && defined(SCALE)
				505	res = QUANTIZE(res_f, OFFSET, SCALE, DATA_TYPE_PROMOTED, 16);
				506	#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
				507
				508	// Store result
				509	vstore16(CONVERT_SAT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	510	}
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	511	#endif /* defined(BATCH) && defined(DEPTH) */