Blame - src/core/CL/cl_kernels/reduction_operation.cl - ml/ComputeLibrary

blob: 749e3cdaa326540c574004207b92b8a7ca6e24de [file] [log] [blame]

Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	1	/*
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	2	* Copyright (c) 2016-2019 ARM Limited.
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "helpers.h"
				25
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame^]	26	#if FLOAT_DATA_TYPE
				27	#define ISGREATER(x, y) isgreater(x, y)
				28	#define ISLESS(x, y) isless(x, y)
				29	#else // !FLOAT_DATA_TYPE
				30	#if defined(WIDTH)
				31	#define ISGREATER(x, y) (x > y) ? 1 : 0
				32	#define ISLESS(x, y) (x < y) ? 1 : 0
				33	#else // !defined(WIDTH)
				34	#define ISGREATER(x, y) select((int16)0, (int16)-1, x > y)
				35	#define ISLESS(x, y) select((int16)0, (int16)-1, x < y)
				36	#endif // defined(WIDTH)
				37	#endif // FLOAT_DATA_TYPE
				38
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	39	/** Calculate square sum of a vector
				40	*
				41	* @param[in] input Pointer to the first pixel.
				42	*
				43	* @return square sum of vector.
				44	*/
				45	inline DATA_TYPE square_sum(__global const DATA_TYPE *input)
				46	{
				47	VEC_DATA_TYPE(DATA_TYPE, 16)
				48	in = vload16(0, input);
				49
				50	in *= in;
				51
				52	in.s01234567 += in.s89ABCDEF;
				53	in.s0123 += in.s4567;
				54	in.s01 += in.s23;
				55
				56	return (in.s0 + in.s1);
				57	}
				58
				59	/** Calculate sum of a vector
				60	*
				61	* @param[in] input Pointer to the first pixel.
				62	*
				63	* @return sum of vector.
				64	*/
				65	inline DATA_TYPE sum(__global const DATA_TYPE *input)
				66	{
				67	VEC_DATA_TYPE(DATA_TYPE, 16)
				68	in = vload16(0, input);
				69
				70	in.s01234567 += in.s89ABCDEF;
				71	in.s0123 += in.s4567;
				72	in.s01 += in.s23;
				73
				74	return (in.s0 + in.s1);
				75	}
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	76
				77	/** Calculate product of a vector
				78	*
				79	* @param[in] input Pointer to the first pixel.
				80	*
				81	* @return product of vector.
				82	*/
				83	inline DATA_TYPE product(__global const DATA_TYPE *input)
				84	{
				85	VEC_DATA_TYPE(DATA_TYPE, 16)
				86	in = vload16(0, input);
				87
				88	in.s01234567 *= in.s89ABCDEF;
				89	in.s0123 *= in.s4567;
				90	in.s01 *= in.s23;
				91
				92	return (in.s0 * in.s1);
				93	}
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	94	#if defined(OPERATION)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	95	/** This kernel performs parallel reduction given an operation on x-axis.
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	96	*
				97	* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	98	* @note The operation we want to perform must be passed at compile time using -DOPERATION e.g. -DOPERATION=square_sum
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	99	* @note The mean flag must be passed at compile time using -DMEAN if we want to compute the mean value
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	100	* @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	101	* @note The width size must be passed at compile time using -DWIDTH e.g. -DWIDTH=128 if we want to compute the mean value
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	102	*
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	103	* @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	104	* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
				105	* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	106	* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
				107	* @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	108	* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	109	* @param[in] partial_res_ptr The local buffer to hold partial result values. Supported data types: same as @p src_ptr
				110	* @param[in] partial_res_stride_x Stride of the output tensor in X dimension (in bytes)
				111	* @param[in] partial_res_step_x partial_res_stride_x * number of elements along X processed per workitem(in bytes)
				112	* @param[in] partial_res_stride_y Stride of the output tensor in Y dimension (in bytes)
				113	* @param[in] partial_res_step_y partial_res_stride_y * number of elements along Y processed per workitem(in bytes)
				114	* @param[in] partial_res_offset_first_element_in_bytes The offset of the first element in the source tensor
				115	* @param[in] local_results Local buffer for storing the partial result
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	116	*/
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	117	__kernel void reduction_operation_x(
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	118	IMAGE_DECLARATION(src),
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	119	IMAGE_DECLARATION(partial_res),
				120	__local DATA_TYPE *local_results)
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	121	{
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	122	Image src = CONVERT_TO_IMAGE_STRUCT(src);
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	123	Image partial_res = CONVERT_TO_IMAGE_STRUCT(partial_res);
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	124
				125	unsigned int lsize = get_local_size(0);
				126	unsigned int lid = get_local_id(0);
				127
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	128	for(unsigned int y = 0; y < get_local_size(1); ++y)
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	129	{
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	130	local_results[lid] = OPERATION((__global DATA_TYPE *)offset(&src, 0, y));
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	131	barrier(CLK_LOCAL_MEM_FENCE);
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	132
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	133	// Perform parallel reduction
				134	for(unsigned int i = lsize >> 1; i > 0; i >>= 1)
				135	{
				136	if(lid < i)
				137	{
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	138	#if defined(PROD)
				139	local_results[lid] *= local_results[lid + i];
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame^]	140	#else // !defined(PROD)
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	141	local_results[lid] += local_results[lid + i];
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame^]	142	#endif // defined(PROD)
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	143	}
				144	barrier(CLK_LOCAL_MEM_FENCE);
				145	}
				146
				147	if(lid == 0)
				148	{
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	149	#if defined(MEAN) && defined(WIDTH)
				150	if(y == get_local_size(1) - 1)
				151	{
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	152	local_results[0] /= WIDTH;
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	153	}
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame^]	154	#endif // defined(MEAN) && defined(WIDTH)
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	155	((__global DATA_TYPE *)offset(&partial_res, get_group_id(0), y))[0] = local_results[0];
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	156	}
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	157	}
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	158	}
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	159	#endif // defined(OPERATION)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	160
				161	#if defined(WIDTH)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	162	/** This kernel performs reduction on x-axis. (Non parallel)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	163	*
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	164	* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	165	* @note The width size must be passed at compile time using -DWIDTH e.g. -DWIDTH=128
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	166	* @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	167	* @note In case of ARG_MIN and ARG_MAX the condition data type must be passed at compile time using -DCOND_DATA_TYPE e.g. -DCOND_DATA_TYPE=short
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	168	*
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame^]	169	* @param[in] src_ptr Pointer to the source tensor. Supported data types: S32/F16/F32 and QASYMM8 for operation MEAN
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	170	* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
				171	* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
				172	* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
				173	* @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p src_ptt
				174	* @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
				175	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				176	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
				177	*/
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	178	__kernel void reduction_operation_non_parallel_x(
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	179	VECTOR_DECLARATION(src),
				180	VECTOR_DECLARATION(output))
				181	{
				182	Vector src = CONVERT_TO_VECTOR_STRUCT(src);
				183	Vector output = CONVERT_TO_VECTOR_STRUCT(output);
				184
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	185	DATA_TYPE_PROMOTED res = ((__global DATA_TYPE )vector_offset(&src, 0));
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	186
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	187	#if defined(ARG_MAX) \|\| defined(ARG_MIN)
				188	uint indx = 0;
				189	#endif // defined(ARG_MAX) \|\| defined(ARG_MIN)
				190
				191	for(unsigned int x = 1; x < WIDTH; ++x)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	192	{
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	193	DATA_TYPE_PROMOTED in = ((__global DATA_TYPE )vector_offset(&src, x));
				194	#if defined(ARG_MAX)
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame^]	195	indx = select(indx, x, ISGREATER(in, res));
				196	res = select(res, in, CONVERT(ISGREATER(in, res), COND_DATA_TYPE));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	197	#elif defined(ARG_MIN)
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame^]	198	indx = select(indx, x, ISLESS(in, res));
				199	res = select(res, in, CONVERT(ISLESS(in, res), COND_DATA_TYPE));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	200	#else // !(defined(ARG_MAX) \|\| defined(ARG_MIN))
				201	res += in;
				202	#endif // defined(ARG_MAX) \|\| defined(ARG_MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	203	}
				204
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	205	// Store result
				206	#if defined(ARG_MAX) \|\| defined(ARG_MIN)
				207	((__global uint )output.ptr) = indx;
				208	#else // !(defined(ARG_MAX) \|\| defined(ARG_MIN))
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	209	#if defined(MEAN)
				210	res /= WIDTH;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	211	#endif // defined(MEAN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	212	((__global uchar )output.ptr) = convert_uchar(res);
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	213	#endif // defined(ARG_MAX) \|\| defined(ARG_MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	214	}
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame^]	215	#endif // defined(WIDTH)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	216
				217	#if defined(HEIGHT)
				218	/** This kernel performs reduction on y-axis.
				219	*
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	220	* @note The input data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	221	* @note The height size must be passed at compile time using -DHEIGHT e.g. -DHEIGHT=128
				222	*
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame^]	223	* @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	224	* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
				225	* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
				226	* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
				227	* @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
				228	* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
				229	* @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p src_ptt
				230	* @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
				231	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				232	* @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
				233	* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
				234	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
				235	*/
				236	__kernel void reduction_operation_y(
				237	IMAGE_DECLARATION(src),
				238	IMAGE_DECLARATION(output))
				239	{
				240	Image src = CONVERT_TO_IMAGE_STRUCT(src);
				241	Image output = CONVERT_TO_IMAGE_STRUCT(output);
				242
				243	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	244	res = CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	245
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	246	#if defined(SUM_SQUARE)
				247	res *= res;
				248	#endif // defined(SUM_SQUARE)
				249
				250	#if defined(ARG_MAX) \|\| defined(ARG_MIN)
				251	uint16 indx = 0;
				252	#endif // defined(ARG_MAX) \|\| defined(ARG_MIN)
				253
				254	for(unsigned int y = 1; y < HEIGHT; ++y)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	255	{
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	256	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
				257	in = CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, y)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	258	#if defined(ARG_MAX)
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame^]	259	uint16 cond_conv = CONVERT(ISGREATER(in, res), uint16);
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	260	indx = select(indx, y, cond_conv);
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame^]	261	res = select(res, in, ISGREATER(in, res));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	262	#elif defined(ARG_MIN)
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame^]	263	uint16 cond_conv = CONVERT(ISLESS(in, res), uint16);
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	264	indx = select(indx, y, cond_conv);
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame^]	265	res = select(res, in, ISLESS(in, res));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	266	#else // !(defined(ARG_MAX) \|\| defined(ARG_MIN))
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	267	#if defined(SUM_SQUARE)
				268	in *= in;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	269	#endif // defined(SUM_SQUARE)
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	270	#if defined(PROD)
				271	res *= in;
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame^]	272	#else // !defined(PROD)
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	273	res += in;
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame^]	274	#endif // defined(PROD)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	275	#endif // defined(ARG_MAX) \|\| defined(ARG_MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	276	}
				277
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	278	// Store result
				279	#if defined(ARG_MAX) \|\| defined(ARG_MIN)
				280	vstore16(indx, 0, (__global uint *)output.ptr);
				281	#else // !(defined(ARG_MAX) \|\| defined(ARG_MIN))
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	282	#if defined(MEAN)
				283	res /= HEIGHT;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	284	#endif // defined(MEAN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	285	vstore16(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	286	#endif // defined(ARG_MAX) \|\| defined(ARG_MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	287	}
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame^]	288	#endif // defined(HEIGHT)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	289
				290	#if defined(DEPTH)
				291	/** This kernel performs reduction on z-axis.
				292	*
				293	* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
				294	* @note The depth size must be passed at compile time using -DDEPTH e.g. -DDEPTH=128
				295	*
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame^]	296	* @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	297	* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
				298	* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
				299	* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
				300	* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
				301	* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
				302	* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
				303	* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
				304	* @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptt
				305	* @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
				306	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				307	* @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
				308	* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
				309	* @param[in] output_stride_z Stride of the output tensor in Z dimension (in bytes)
				310	* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
				311	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
				312	*/
				313	__kernel void reduction_operation_z(
				314	TENSOR3D_DECLARATION(input),
				315	TENSOR3D_DECLARATION(output))
				316	{
				317	Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
				318	Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
				319
				320	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	321	res = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	322
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	323	#if defined(COMPLEX)
				324	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
				325	res1 = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 8, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
				326	#endif // defined(COMPLEX)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	327	#if defined(SUM_SQUARE)
				328	res *= res;
				329	#endif // defined(SUM_SQUARE)
				330
				331	#if defined(ARG_MAX) \|\| defined(ARG_MIN)
				332	uint16 indx = 0;
				333	#endif // defined(ARG_MAX) \|\| defined(ARG_MIN)
				334
				335	for(unsigned int z = 1; z < DEPTH; ++z)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	336	{
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	337	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
				338	in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, z)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	339
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	340	#if defined(COMPLEX)
				341	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
				342	in1 = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 8, 0, z)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
				343	#endif // defined(COMPLEX)
				344
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	345	#if defined(ARG_MAX)
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame^]	346	uint16 cond_conv = CONVERT(ISGREATER(in, res), uint16);
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	347	indx = select(indx, z, cond_conv);
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame^]	348	res = select(res, in, ISGREATER(in, res));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	349	#elif defined(ARG_MIN)
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame^]	350	uint16 cond_conv = CONVERT(ISLESS(in, res), uint16);
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	351	indx = select(indx, z, cond_conv);
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame^]	352	res = select(res, in, ISLESS(in, res));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	353	#else // !(defined(ARG_MAX) \|\| defined(ARG_MIN))
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	354	#if defined(SUM_SQUARE)
				355	in *= in;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	356	#endif // defined(SUM_SQUARE)
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	357	#if defined(PROD)
				358	res *= in;
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	359	#else //!defined(PROD)
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	360	res += in;
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	361	#if defined(COMPLEX)
				362	res1 += in1;
				363	#endif // defined(COMPLEX)
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	364	#endif //defined(PROD)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	365	#endif // defined(ARG_MAX) \|\| defined(ARG_MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	366	}
				367
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	368	// Store result
				369	#if defined(ARG_MAX) \|\| defined(ARG_MIN)
				370	vstore16(indx, 0, (__global uint *)output.ptr);
				371	#else // !(defined(ARG_MAX) \|\| defined(ARG_MIN))
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	372	#if defined(MEAN)
				373	res /= DEPTH;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	374	#endif // defined(MEAN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	375	vstore16(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	376	#if defined(COMPLEX)
				377	vstore16(CONVERT(res1, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)tensor3D_offset(&output, 8, 0, 0));
				378	#endif // defined(COMPLEX)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	379	#endif // defined(ARG_MAX) \|\| defined(ARG_MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	380	}
				381	#endif /* defined(DEPTH) */
				382
				383	#if defined(BATCH) && defined(DEPTH)
				384	/** This kernel performs reduction on w-axis.
				385	*
				386	* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
				387	* @note The batch size must be passed at compile time using -DBATCH e.g. -DBATCH=128
				388	* @note The depth size must be passed at compile time using -DBATCH e.g. -DDEPTH=128
				389	*
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame^]	390	* @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	391	* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
				392	* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
				393	* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
				394	* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
				395	* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
				396	* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
				397	* @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes)
				398	* @param[in] input_step_w input_stride_w * number of elements along W processed per workitem(in bytes)
				399	* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
				400	* @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptt
				401	* @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
				402	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				403	* @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
				404	* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
				405	* @param[in] output_stride_z Stride of the output tensor in Z dimension (in bytes)
				406	* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
				407	* @param[in] output_stride_w Stride of the output tensor in W dimension (in bytes)
				408	* @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
				409	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
				410	*/
				411	__kernel void reduction_operation_w(
				412	TENSOR4D_DECLARATION(input),
				413	TENSOR4D_DECLARATION(output))
				414	{
				415	Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT(input, DEPTH);
				416	Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH);
				417
				418	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	419	res = CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	420
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	421	#if defined(SUM_SQUARE)
				422	res *= res;
				423	#endif // defined(SUM_SQUARE)
				424
				425	#if defined(ARG_MAX) \|\| defined(ARG_MIN)
				426	uint16 indx = 0;
				427	#endif // defined(ARG_MAX) \|\| defined(ARG_MIN)
				428
				429	for(unsigned int w = 1; w < BATCH; ++w)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	430	{
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	431	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
				432	in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, w)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	433
				434	#if defined(ARG_MAX)
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame^]	435	uint16 cond_conv = CONVERT(ISGREATER(in, res), uint16);
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	436	indx = select(indx, w, cond_conv);
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame^]	437	res = select(res, in, ISGREATER(in, res));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	438	#elif defined(ARG_MIN)
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame^]	439	uint16 cond_conv = CONVERT(ISLESS(in, res), uint16);
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	440	indx = select(indx, w, cond_conv);
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame^]	441	res = select(res, in, ISLESS(in, res));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	442	#else // !(defined(ARG_MAX) \|\| defined(ARG_MIN))
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	443	#if defined(SUM_SQUARE)
				444	in *= in;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	445	#endif // defined(SUM_SQUARE)
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	446	#if defined(PROD)
				447	res *= in;
				448	#else //!defined(PROD)
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	449	res += in;
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	450	#endif //defined(PROD)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	451	#endif // defined(ARG_MAX) \|\| defined(ARG_MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	452	}
				453
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	454	// Store result
				455	#if defined(ARG_MAX) \|\| defined(ARG_MIN)
				456	vstore16(indx, 0, (__global uint *)output.ptr);
				457	#else // !(defined(ARG_MAX) \|\| defined(ARG_MIN))
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	458	#if defined(MEAN)
				459	res /= BATCH;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	460	#endif // defined(MEAN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	461	vstore16(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	462	#endif // defined(ARG_MAX) \|\| defined(ARG_MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	463	}
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	464	#endif /* defined(BATCH) && defined(DEPTH) */