Blame - src/core/CL/cl_kernels/reduction_operation.cl - ml/ComputeLibrary

blob: 5a4bb9ff4ce2c46ea16badf52ad8a8149e31609f [file] [log] [blame]

Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	1	/*
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	2	* Copyright (c) 2016-2019 ARM Limited.
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "helpers.h"
				25
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	26	#if FLOAT_DATA_TYPE
				27	#define ISGREATER(x, y) isgreater(x, y)
				28	#define ISLESS(x, y) isless(x, y)
				29	#else // !FLOAT_DATA_TYPE
				30	#if defined(WIDTH)
				31	#define ISGREATER(x, y) (x > y) ? 1 : 0
				32	#define ISLESS(x, y) (x < y) ? 1 : 0
				33	#else // !defined(WIDTH)
				34	#define ISGREATER(x, y) select((int16)0, (int16)-1, x > y)
				35	#define ISLESS(x, y) select((int16)0, (int16)-1, x < y)
				36	#endif // defined(WIDTH)
				37	#endif // FLOAT_DATA_TYPE
				38
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	39	/** Calculate square sum of a vector
				40	*
				41	* @param[in] input Pointer to the first pixel.
				42	*
				43	* @return square sum of vector.
				44	*/
				45	inline DATA_TYPE square_sum(__global const DATA_TYPE *input)
				46	{
				47	VEC_DATA_TYPE(DATA_TYPE, 16)
				48	in = vload16(0, input);
				49
				50	in *= in;
				51
				52	in.s01234567 += in.s89ABCDEF;
				53	in.s0123 += in.s4567;
				54	in.s01 += in.s23;
				55
				56	return (in.s0 + in.s1);
				57	}
				58
				59	/** Calculate sum of a vector
				60	*
				61	* @param[in] input Pointer to the first pixel.
				62	*
				63	* @return sum of vector.
				64	*/
				65	inline DATA_TYPE sum(__global const DATA_TYPE *input)
				66	{
				67	VEC_DATA_TYPE(DATA_TYPE, 16)
				68	in = vload16(0, input);
				69
				70	in.s01234567 += in.s89ABCDEF;
				71	in.s0123 += in.s4567;
				72	in.s01 += in.s23;
				73
				74	return (in.s0 + in.s1);
				75	}
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	76
				77	/** Calculate product of a vector
				78	*
				79	* @param[in] input Pointer to the first pixel.
				80	*
				81	* @return product of vector.
				82	*/
				83	inline DATA_TYPE product(__global const DATA_TYPE *input)
				84	{
				85	VEC_DATA_TYPE(DATA_TYPE, 16)
				86	in = vload16(0, input);
				87
				88	in.s01234567 *= in.s89ABCDEF;
				89	in.s0123 *= in.s4567;
				90	in.s01 *= in.s23;
				91
				92	return (in.s0 * in.s1);
				93	}
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	94	#if defined(OPERATION)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	95	/** This kernel performs parallel reduction given an operation on x-axis.
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	96	*
				97	* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	98	* @note The operation we want to perform must be passed at compile time using -DOPERATION e.g. -DOPERATION=square_sum
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	99	* @note The mean flag must be passed at compile time using -DMEAN if we want to compute the mean value
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	100	* @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	101	* @note The width size must be passed at compile time using -DWIDTH e.g. -DWIDTH=128 if we want to compute the mean value
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	102	*
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	103	* @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	104	* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
				105	* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	106	* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
				107	* @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	108	* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	109	* @param[in] partial_res_ptr The local buffer to hold partial result values. Supported data types: same as @p src_ptr
				110	* @param[in] partial_res_stride_x Stride of the output tensor in X dimension (in bytes)
				111	* @param[in] partial_res_step_x partial_res_stride_x * number of elements along X processed per workitem(in bytes)
				112	* @param[in] partial_res_stride_y Stride of the output tensor in Y dimension (in bytes)
				113	* @param[in] partial_res_step_y partial_res_stride_y * number of elements along Y processed per workitem(in bytes)
				114	* @param[in] partial_res_offset_first_element_in_bytes The offset of the first element in the source tensor
				115	* @param[in] local_results Local buffer for storing the partial result
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	116	*/
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	117	__kernel void reduction_operation_x(
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	118	IMAGE_DECLARATION(src),
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	119	IMAGE_DECLARATION(partial_res),
				120	__local DATA_TYPE *local_results)
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	121	{
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	122	Image src = CONVERT_TO_IMAGE_STRUCT(src);
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	123	Image partial_res = CONVERT_TO_IMAGE_STRUCT(partial_res);
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	124
				125	unsigned int lsize = get_local_size(0);
				126	unsigned int lid = get_local_id(0);
				127
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	128	for(unsigned int y = 0; y < get_local_size(1); ++y)
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	129	{
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	130	local_results[lid] = OPERATION((__global DATA_TYPE *)offset(&src, 0, y));
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	131	barrier(CLK_LOCAL_MEM_FENCE);
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	132
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	133	// Perform parallel reduction
				134	for(unsigned int i = lsize >> 1; i > 0; i >>= 1)
				135	{
				136	if(lid < i)
				137	{
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	138	#if defined(PROD)
				139	local_results[lid] *= local_results[lid + i];
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	140	#else // !defined(PROD)
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	141	local_results[lid] += local_results[lid + i];
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	142	#endif // defined(PROD)
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	143	}
				144	barrier(CLK_LOCAL_MEM_FENCE);
				145	}
				146
				147	if(lid == 0)
				148	{
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	149	#if defined(MEAN) && defined(WIDTH)
				150	if(y == get_local_size(1) - 1)
				151	{
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	152	local_results[0] /= WIDTH;
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	153	}
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	154	#endif // defined(MEAN) && defined(WIDTH)
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	155	((__global DATA_TYPE *)offset(&partial_res, get_group_id(0), y))[0] = local_results[0];
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	156	}
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	157	}
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	158	}
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	159	#endif // defined(OPERATION)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	160
				161	#if defined(WIDTH)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	162	/** This kernel performs reduction on x-axis. (Non parallel)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	163	*
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	164	* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	165	* @note The width size must be passed at compile time using -DWIDTH e.g. -DWIDTH=128
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	166	* @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	167	* @note In case of ARG_MIN and ARG_MAX the condition data type must be passed at compile time using -DCOND_DATA_TYPE e.g. -DCOND_DATA_TYPE=short
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	168	*
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	169	* @param[in] src_ptr Pointer to the source tensor. Supported data types: S32/F16/F32 and QASYMM8 for operation MEAN
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	170	* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
				171	* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
				172	* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
				173	* @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p src_ptt
				174	* @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
				175	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				176	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
				177	*/
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	178	__kernel void reduction_operation_non_parallel_x(
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	179	VECTOR_DECLARATION(src),
				180	VECTOR_DECLARATION(output))
				181	{
				182	Vector src = CONVERT_TO_VECTOR_STRUCT(src);
				183	Vector output = CONVERT_TO_VECTOR_STRUCT(output);
				184
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	185	DATA_TYPE_PROMOTED res = ((__global DATA_TYPE )vector_offset(&src, 0));
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	186
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	187	#if defined(ARG_MAX) \|\| defined(ARG_MIN)
				188	uint indx = 0;
				189	#endif // defined(ARG_MAX) \|\| defined(ARG_MIN)
				190
				191	for(unsigned int x = 1; x < WIDTH; ++x)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	192	{
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	193	DATA_TYPE_PROMOTED in = ((__global DATA_TYPE )vector_offset(&src, x));
				194	#if defined(ARG_MAX)
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	195	indx = select(indx, x, ISGREATER(in, res));
				196	res = select(res, in, CONVERT(ISGREATER(in, res), COND_DATA_TYPE));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	197	#elif defined(ARG_MIN)
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	198	indx = select(indx, x, ISLESS(in, res));
				199	res = select(res, in, CONVERT(ISLESS(in, res), COND_DATA_TYPE));
Usama Arif	b289050	2019-05-21 11:48:37 +0100	[diff] [blame]	200	#elif defined(MIN)
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame^]	201	res = select(res, in, CONVERT(ISLESS(in, res), COND_DATA_TYPE));
				202	#elif defined(MAX)
				203	res = select(res, in, CONVERT(ISGREATER(in, res), COND_DATA_TYPE));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	204	#else // !(defined(ARG_MAX) \|\| defined(ARG_MIN))
				205	res += in;
				206	#endif // defined(ARG_MAX) \|\| defined(ARG_MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	207	}
				208
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	209	// Store result
				210	#if defined(ARG_MAX) \|\| defined(ARG_MIN)
				211	((__global uint )output.ptr) = indx;
				212	#else // !(defined(ARG_MAX) \|\| defined(ARG_MIN))
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	213	#if defined(MEAN)
				214	res /= WIDTH;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	215	#endif // defined(MEAN)
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame^]	216	#if defined(MIN) \|\| defined(MAX)
Usama Arif	b289050	2019-05-21 11:48:37 +0100	[diff] [blame]	217	((__global DATA_TYPE_PROMOTED )output.ptr) = res;
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame^]	218	#else // defined(MIN) \|\| defined(MAX)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	219	((__global uchar )output.ptr) = convert_uchar(res);
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame^]	220	#endif // defined(MIN) \|\| defined(MAX)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	221	#endif // defined(ARG_MAX) \|\| defined(ARG_MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	222	}
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	223	#endif // defined(WIDTH)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	224
				225	#if defined(HEIGHT)
				226	/** This kernel performs reduction on y-axis.
				227	*
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	228	* @note The input data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	229	* @note The height size must be passed at compile time using -DHEIGHT e.g. -DHEIGHT=128
				230	*
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	231	* @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	232	* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
				233	* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
				234	* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
				235	* @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
				236	* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
				237	* @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p src_ptt
				238	* @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
				239	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				240	* @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
				241	* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
				242	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
				243	*/
				244	__kernel void reduction_operation_y(
				245	IMAGE_DECLARATION(src),
				246	IMAGE_DECLARATION(output))
				247	{
				248	Image src = CONVERT_TO_IMAGE_STRUCT(src);
				249	Image output = CONVERT_TO_IMAGE_STRUCT(output);
				250
				251	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	252	res = CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	253
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	254	#if defined(SUM_SQUARE)
				255	res *= res;
				256	#endif // defined(SUM_SQUARE)
				257
				258	#if defined(ARG_MAX) \|\| defined(ARG_MIN)
				259	uint16 indx = 0;
				260	#endif // defined(ARG_MAX) \|\| defined(ARG_MIN)
				261
				262	for(unsigned int y = 1; y < HEIGHT; ++y)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	263	{
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	264	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
				265	in = CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, y)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	266	#if defined(ARG_MAX)
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	267	uint16 cond_conv = CONVERT(ISGREATER(in, res), uint16);
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	268	indx = select(indx, y, cond_conv);
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	269	res = select(res, in, ISGREATER(in, res));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	270	#elif defined(ARG_MIN)
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame^]	271	uint16 cond_conv = CONVERT(ISLESS(in, res), uint16);
				272	indx = select(indx, y, cond_conv);
				273	res = select(res, in, ISLESS(in, res));
Usama Arif	b289050	2019-05-21 11:48:37 +0100	[diff] [blame]	274	#elif defined(MIN)
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame^]	275	res = select(res, in, ISLESS(in, res));
				276	#elif defined(MAX)
				277	res = select(res, in, ISGREATER(in, res));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	278	#else // !(defined(ARG_MAX) \|\| defined(ARG_MIN))
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	279	#if defined(SUM_SQUARE)
				280	in *= in;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	281	#endif // defined(SUM_SQUARE)
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	282	#if defined(PROD)
				283	res *= in;
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	284	#else // !defined(PROD)
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	285	res += in;
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	286	#endif // defined(PROD)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	287	#endif // defined(ARG_MAX) \|\| defined(ARG_MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	288	}
				289
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	290	// Store result
				291	#if defined(ARG_MAX) \|\| defined(ARG_MIN)
				292	vstore16(indx, 0, (__global uint *)output.ptr);
				293	#else // !(defined(ARG_MAX) \|\| defined(ARG_MIN))
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	294	#if defined(MEAN)
				295	res /= HEIGHT;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	296	#endif // defined(MEAN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	297	vstore16(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	298	#endif // defined(ARG_MAX) \|\| defined(ARG_MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	299	}
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	300	#endif // defined(HEIGHT)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	301
				302	#if defined(DEPTH)
				303	/** This kernel performs reduction on z-axis.
				304	*
				305	* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
				306	* @note The depth size must be passed at compile time using -DDEPTH e.g. -DDEPTH=128
				307	*
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	308	* @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	309	* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
				310	* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
				311	* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
				312	* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
				313	* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
				314	* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
				315	* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
				316	* @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptt
				317	* @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
				318	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				319	* @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
				320	* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
				321	* @param[in] output_stride_z Stride of the output tensor in Z dimension (in bytes)
				322	* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
				323	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
				324	*/
				325	__kernel void reduction_operation_z(
				326	TENSOR3D_DECLARATION(input),
				327	TENSOR3D_DECLARATION(output))
				328	{
				329	Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
				330	Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
				331
				332	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	333	res = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	334
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	335	#if defined(COMPLEX)
				336	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
				337	res1 = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 8, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
				338	#endif // defined(COMPLEX)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	339	#if defined(SUM_SQUARE)
				340	res *= res;
				341	#endif // defined(SUM_SQUARE)
				342
				343	#if defined(ARG_MAX) \|\| defined(ARG_MIN)
				344	uint16 indx = 0;
				345	#endif // defined(ARG_MAX) \|\| defined(ARG_MIN)
				346
				347	for(unsigned int z = 1; z < DEPTH; ++z)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	348	{
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	349	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
				350	in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, z)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	351
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	352	#if defined(COMPLEX)
				353	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
				354	in1 = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 8, 0, z)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
				355	#endif // defined(COMPLEX)
				356
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	357	#if defined(ARG_MAX)
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	358	uint16 cond_conv = CONVERT(ISGREATER(in, res), uint16);
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	359	indx = select(indx, z, cond_conv);
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	360	res = select(res, in, ISGREATER(in, res));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	361	#elif defined(ARG_MIN)
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	362	uint16 cond_conv = CONVERT(ISLESS(in, res), uint16);
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	363	indx = select(indx, z, cond_conv);
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	364	res = select(res, in, ISLESS(in, res));
Usama Arif	b289050	2019-05-21 11:48:37 +0100	[diff] [blame]	365	#elif defined(MIN)
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame^]	366	res = select(res, in, ISLESS(in, res));
				367	#elif defined(MAX)
				368	res = select(res, in, ISGREATER(in, res));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	369	#else // !(defined(ARG_MAX) \|\| defined(ARG_MIN))
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	370	#if defined(SUM_SQUARE)
				371	in *= in;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	372	#endif // defined(SUM_SQUARE)
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	373	#if defined(PROD)
				374	res *= in;
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	375	#else //!defined(PROD)
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	376	res += in;
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	377	#if defined(COMPLEX)
				378	res1 += in1;
				379	#endif // defined(COMPLEX)
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	380	#endif //defined(PROD)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	381	#endif // defined(ARG_MAX) \|\| defined(ARG_MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	382	}
				383
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	384	// Store result
				385	#if defined(ARG_MAX) \|\| defined(ARG_MIN)
				386	vstore16(indx, 0, (__global uint *)output.ptr);
				387	#else // !(defined(ARG_MAX) \|\| defined(ARG_MIN))
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	388	#if defined(MEAN)
				389	res /= DEPTH;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	390	#endif // defined(MEAN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	391	vstore16(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	392	#if defined(COMPLEX)
				393	vstore16(CONVERT(res1, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)tensor3D_offset(&output, 8, 0, 0));
				394	#endif // defined(COMPLEX)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	395	#endif // defined(ARG_MAX) \|\| defined(ARG_MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	396	}
				397	#endif /* defined(DEPTH) */
				398
				399	#if defined(BATCH) && defined(DEPTH)
				400	/** This kernel performs reduction on w-axis.
				401	*
				402	* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
				403	* @note The batch size must be passed at compile time using -DBATCH e.g. -DBATCH=128
				404	* @note The depth size must be passed at compile time using -DBATCH e.g. -DDEPTH=128
				405	*
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	406	* @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	407	* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
				408	* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
				409	* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
				410	* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
				411	* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
				412	* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
				413	* @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes)
				414	* @param[in] input_step_w input_stride_w * number of elements along W processed per workitem(in bytes)
				415	* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
				416	* @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptt
				417	* @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
				418	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				419	* @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
				420	* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
				421	* @param[in] output_stride_z Stride of the output tensor in Z dimension (in bytes)
				422	* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
				423	* @param[in] output_stride_w Stride of the output tensor in W dimension (in bytes)
				424	* @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
				425	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
				426	*/
				427	__kernel void reduction_operation_w(
				428	TENSOR4D_DECLARATION(input),
				429	TENSOR4D_DECLARATION(output))
				430	{
				431	Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT(input, DEPTH);
				432	Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH);
				433
				434	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	435	res = CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	436
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	437	#if defined(SUM_SQUARE)
				438	res *= res;
				439	#endif // defined(SUM_SQUARE)
				440
				441	#if defined(ARG_MAX) \|\| defined(ARG_MIN)
				442	uint16 indx = 0;
				443	#endif // defined(ARG_MAX) \|\| defined(ARG_MIN)
				444
				445	for(unsigned int w = 1; w < BATCH; ++w)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	446	{
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	447	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
				448	in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, w)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	449
				450	#if defined(ARG_MAX)
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	451	uint16 cond_conv = CONVERT(ISGREATER(in, res), uint16);
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	452	indx = select(indx, w, cond_conv);
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	453	res = select(res, in, ISGREATER(in, res));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	454	#elif defined(ARG_MIN)
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	455	uint16 cond_conv = CONVERT(ISLESS(in, res), uint16);
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	456	indx = select(indx, w, cond_conv);
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	457	res = select(res, in, ISLESS(in, res));
Usama Arif	b289050	2019-05-21 11:48:37 +0100	[diff] [blame]	458	#elif defined(MIN)
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame^]	459	res = select(res, in, ISLESS(in, res));
				460	#elif defined(MAX)
				461	res = select(res, in, ISGREATER(in, res));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	462	#else // !(defined(ARG_MAX) \|\| defined(ARG_MIN))
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	463	#if defined(SUM_SQUARE)
				464	in *= in;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	465	#endif // defined(SUM_SQUARE)
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	466	#if defined(PROD)
				467	res *= in;
				468	#else //!defined(PROD)
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	469	res += in;
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	470	#endif //defined(PROD)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	471	#endif // defined(ARG_MAX) \|\| defined(ARG_MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	472	}
				473
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	474	// Store result
				475	#if defined(ARG_MAX) \|\| defined(ARG_MIN)
				476	vstore16(indx, 0, (__global uint *)output.ptr);
				477	#else // !(defined(ARG_MAX) \|\| defined(ARG_MIN))
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	478	#if defined(MEAN)
				479	res /= BATCH;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	480	#endif // defined(MEAN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	481	vstore16(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	482	#endif // defined(ARG_MAX) \|\| defined(ARG_MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	483	}
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	484	#endif /* defined(BATCH) && defined(DEPTH) */