Blame - src/core/CL/cl_kernels/reduction_operation.cl - ml/ComputeLibrary

blob: 0c393345e2c7747b06669864311a2233c891cb6a [file] [log] [blame]

Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	1	/*
Michalis Spyrou	6c89ffa	2020-01-24 12:05:05 +0000	[diff] [blame^]	2	* Copyright (c) 2016-2020 ARM Limited.
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "helpers.h"
				25
Michalis Spyrou	7317e39	2020-01-17 11:27:49 +0000	[diff] [blame]	26	#if defined(FLOAT_DATA_TYPE)
				27	#define ISGREATER(x, y) isgreater(x, y)
				28	#define ISLESS(x, y) isless(x, y)
				29	#else // !FLOAT_DATA_TYPE
				30	#if defined(WIDTH)
				31	#define ISGREATER(x, y) (x > y) ? 1 : 0
				32	#define ISLESS(x, y) (x < y) ? 1 : 0
				33	#else // !defined(WIDTH)
Michalis Spyrou	6c89ffa	2020-01-24 12:05:05 +0000	[diff] [blame^]	34	#define ISGREATER(x, y) select((int16)0, (int16)-1, x > y)
				35	#define ISLESS(x, y) select((int16)0, (int16)-1, x < y)
Michalis Spyrou	7317e39	2020-01-17 11:27:49 +0000	[diff] [blame]	36	#endif // defined(WIDTH)
				37	#endif // defined(FLOAT_DATA_TYPE)
				38
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	39	/** Calculate square sum of a vector
				40	*
				41	* @param[in] input Pointer to the first pixel.
				42	*
				43	* @return square sum of vector.
				44	*/
				45	inline DATA_TYPE square_sum(__global const DATA_TYPE *input)
				46	{
				47	VEC_DATA_TYPE(DATA_TYPE, 16)
				48	in = vload16(0, input);
				49
				50	in *= in;
				51
				52	in.s01234567 += in.s89ABCDEF;
				53	in.s0123 += in.s4567;
				54	in.s01 += in.s23;
				55
				56	return (in.s0 + in.s1);
				57	}
				58
				59	/** Calculate sum of a vector
				60	*
				61	* @param[in] input Pointer to the first pixel.
				62	*
				63	* @return sum of vector.
				64	*/
				65	inline DATA_TYPE sum(__global const DATA_TYPE *input)
				66	{
				67	VEC_DATA_TYPE(DATA_TYPE, 16)
				68	in = vload16(0, input);
				69
				70	in.s01234567 += in.s89ABCDEF;
				71	in.s0123 += in.s4567;
				72	in.s01 += in.s23;
				73
				74	return (in.s0 + in.s1);
				75	}
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	76
				77	/** Calculate product of a vector
				78	*
				79	* @param[in] input Pointer to the first pixel.
				80	*
				81	* @return product of vector.
				82	*/
				83	inline DATA_TYPE product(__global const DATA_TYPE *input)
				84	{
				85	VEC_DATA_TYPE(DATA_TYPE, 16)
				86	in = vload16(0, input);
				87
				88	in.s01234567 *= in.s89ABCDEF;
				89	in.s0123 *= in.s4567;
				90	in.s01 *= in.s23;
				91
				92	return (in.s0 * in.s1);
				93	}
Manuel Bottini	34f88dd	2019-10-18 10:37:46 +0000	[diff] [blame]	94	#if defined(OPERATION)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	95	/** This kernel performs parallel reduction given an operation on x-axis.
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	96	*
				97	* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	98	* @note The operation we want to perform must be passed at compile time using -DOPERATION e.g. -DOPERATION=square_sum
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	99	* @note The mean flag must be passed at compile time using -DMEAN if we want to compute the mean value
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	100	* @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	101	* @note The width size must be passed at compile time using -DWIDTH e.g. -DWIDTH=128 if we want to compute the mean value
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	102	*
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	103	* @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	104	* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
				105	* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	106	* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
				107	* @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	108	* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	109	* @param[in] partial_res_ptr The local buffer to hold partial result values. Supported data types: same as @p src_ptr
				110	* @param[in] partial_res_stride_x Stride of the output tensor in X dimension (in bytes)
				111	* @param[in] partial_res_step_x partial_res_stride_x * number of elements along X processed per workitem(in bytes)
				112	* @param[in] partial_res_stride_y Stride of the output tensor in Y dimension (in bytes)
				113	* @param[in] partial_res_step_y partial_res_stride_y * number of elements along Y processed per workitem(in bytes)
				114	* @param[in] partial_res_offset_first_element_in_bytes The offset of the first element in the source tensor
				115	* @param[in] local_results Local buffer for storing the partial result
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	116	*/
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	117	__kernel void reduction_operation_x(
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	118	IMAGE_DECLARATION(src),
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	119	IMAGE_DECLARATION(partial_res),
Manuel Bottini	34f88dd	2019-10-18 10:37:46 +0000	[diff] [blame]	120	__local DATA_TYPE *local_results)
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	121	{
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	122	Image src = CONVERT_TO_IMAGE_STRUCT(src);
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	123	Image partial_res = CONVERT_TO_IMAGE_STRUCT(partial_res);
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	124
				125	unsigned int lsize = get_local_size(0);
				126	unsigned int lid = get_local_id(0);
				127
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	128	for(unsigned int y = 0; y < get_local_size(1); ++y)
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	129	{
Manuel Bottini	34f88dd	2019-10-18 10:37:46 +0000	[diff] [blame]	130	local_results[lid] = OPERATION((__global DATA_TYPE *)offset(&src, 0, y));
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	131	barrier(CLK_LOCAL_MEM_FENCE);
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	132
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	133	// Perform parallel reduction
				134	for(unsigned int i = lsize >> 1; i > 0; i >>= 1)
				135	{
				136	if(lid < i)
				137	{
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	138	#if defined(PROD)
				139	local_results[lid] *= local_results[lid + i];
Manuel Bottini	34f88dd	2019-10-18 10:37:46 +0000	[diff] [blame]	140	#else // !defined(PROD)
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	141	local_results[lid] += local_results[lid + i];
Manuel Bottini	34f88dd	2019-10-18 10:37:46 +0000	[diff] [blame]	142	#endif // defined(PROD)
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	143	}
				144	barrier(CLK_LOCAL_MEM_FENCE);
				145	}
				146
				147	if(lid == 0)
				148	{
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	149	#if defined(MEAN) && defined(WIDTH)
				150	if(y == get_local_size(1) - 1)
				151	{
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	152	local_results[0] /= WIDTH;
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	153	}
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	154	#endif // defined(MEAN) && defined(WIDTH)
Manuel Bottini	34f88dd	2019-10-18 10:37:46 +0000	[diff] [blame]	155	((__global DATA_TYPE *)offset(&partial_res, get_group_id(0), y))[0] = local_results[0];
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	156	}
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	157	}
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	158	}
Manuel Bottini	34f88dd	2019-10-18 10:37:46 +0000	[diff] [blame]	159	#endif // defined(OPERATION)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	160
				161	#if defined(WIDTH)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	162	/** This kernel performs reduction on x-axis. (Non parallel)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	163	*
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	164	* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	165	* @note The width size must be passed at compile time using -DWIDTH e.g. -DWIDTH=128
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	166	* @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	167	* @note In case of MIN and MAX the condition data type must be passed at compile time using -DCOND_DATA_TYPE e.g. -DCOND_DATA_TYPE=short
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	168	*
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	169	* @param[in] src_ptr Pointer to the source tensor. Supported data types: S32/F16/F32 and QASYMM8 for operation MEAN
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	170	* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
				171	* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
				172	* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
				173	* @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p src_ptt
				174	* @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
				175	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				176	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
				177	*/
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	178	__kernel void reduction_operation_non_parallel_x(
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	179	VECTOR_DECLARATION(src),
				180	VECTOR_DECLARATION(output))
				181	{
				182	Vector src = CONVERT_TO_VECTOR_STRUCT(src);
				183	Vector output = CONVERT_TO_VECTOR_STRUCT(output);
				184
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	185	DATA_TYPE_PROMOTED res = ((__global DATA_TYPE )vector_offset(&src, 0));
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	186
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	187	for(unsigned int x = 1; x < WIDTH; ++x)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	188	{
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	189	DATA_TYPE_PROMOTED in = ((__global DATA_TYPE )vector_offset(&src, x));
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	190	#if defined(MIN)
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame]	191	res = select(res, in, CONVERT(ISLESS(in, res), COND_DATA_TYPE));
				192	#elif defined(MAX)
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	193	res = select(res, in, CONVERT(ISGREATER(in, res), COND_DATA_TYPE));
				194	#else // !(defined(MAX) \|\| defined(MIN))
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	195	res += in;
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	196	#endif // defined(MAX) \|\| defined(MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	197	}
				198
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	199	// Store result
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	200	#if defined(MEAN)
				201	res /= WIDTH;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	202	#endif // defined(MEAN)
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame]	203	#if defined(MIN) \|\| defined(MAX)
Usama Arif	b289050	2019-05-21 11:48:37 +0100	[diff] [blame]	204	((__global DATA_TYPE_PROMOTED )output.ptr) = res;
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame]	205	#else // defined(MIN) \|\| defined(MAX)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	206	((__global uchar )output.ptr) = convert_uchar(res);
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame]	207	#endif // defined(MIN) \|\| defined(MAX)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	208	}
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	209	#endif // defined(WIDTH)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	210
				211	#if defined(HEIGHT)
				212	/** This kernel performs reduction on y-axis.
				213	*
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	214	* @note The input data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	215	* @note The height size must be passed at compile time using -DHEIGHT e.g. -DHEIGHT=128
				216	*
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	217	* @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	218	* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
				219	* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
				220	* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
				221	* @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
				222	* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
				223	* @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p src_ptt
				224	* @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
				225	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				226	* @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
				227	* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
				228	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
				229	*/
				230	__kernel void reduction_operation_y(
				231	IMAGE_DECLARATION(src),
				232	IMAGE_DECLARATION(output))
				233	{
				234	Image src = CONVERT_TO_IMAGE_STRUCT(src);
				235	Image output = CONVERT_TO_IMAGE_STRUCT(output);
				236
				237	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	238	res = CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	239
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	240	#if defined(SUM_SQUARE)
				241	res *= res;
				242	#endif // defined(SUM_SQUARE)
				243
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	244	for(unsigned int y = 1; y < HEIGHT; ++y)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	245	{
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	246	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
				247	in = CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, y)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	248	#if defined(MIN)
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame]	249	res = select(res, in, ISLESS(in, res));
				250	#elif defined(MAX)
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	251	res = select(res, in, ISGREATER(in, res));
				252	#else // !(defined(MAX) \|\| defined(MIN))
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	253	#if defined(SUM_SQUARE)
				254	in *= in;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	255	#endif // defined(SUM_SQUARE)
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	256	#if defined(PROD)
				257	res *= in;
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	258	#else // !defined(PROD)
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	259	res += in;
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	260	#endif // defined(PROD)
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	261	#endif // defined(MAX) \|\| defined(MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	262	}
				263
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	264	// Store result
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	265	#if defined(MEAN)
				266	res /= HEIGHT;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	267	#endif // defined(MEAN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	268	vstore16(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
				269	}
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	270	#endif // defined(HEIGHT)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	271
				272	#if defined(DEPTH)
				273	/** This kernel performs reduction on z-axis.
				274	*
				275	* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
				276	* @note The depth size must be passed at compile time using -DDEPTH e.g. -DDEPTH=128
				277	*
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	278	* @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	279	* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
				280	* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
				281	* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
				282	* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
				283	* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
				284	* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
				285	* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
				286	* @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptt
				287	* @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
				288	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				289	* @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
				290	* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
				291	* @param[in] output_stride_z Stride of the output tensor in Z dimension (in bytes)
				292	* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
				293	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
				294	*/
				295	__kernel void reduction_operation_z(
				296	TENSOR3D_DECLARATION(input),
				297	TENSOR3D_DECLARATION(output))
				298	{
				299	Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
				300	Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
				301
				302	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	303	res = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	304
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	305	#if defined(COMPLEX)
				306	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
				307	res1 = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 8, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
				308	#endif // defined(COMPLEX)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	309	#if defined(SUM_SQUARE)
				310	res *= res;
				311	#endif // defined(SUM_SQUARE)
				312
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	313	for(unsigned int z = 1; z < DEPTH; ++z)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	314	{
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	315	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
				316	in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, z)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	317
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	318	#if defined(COMPLEX)
				319	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
				320	in1 = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 8, 0, z)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
				321	#endif // defined(COMPLEX)
				322
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	323	#if defined(MIN)
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame]	324	res = select(res, in, ISLESS(in, res));
				325	#elif defined(MAX)
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	326	res = select(res, in, ISGREATER(in, res));
				327	#else // !(defined(MAX) \|\| defined(MIN))
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	328	#if defined(SUM_SQUARE)
				329	in *= in;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	330	#endif // defined(SUM_SQUARE)
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	331	#if defined(PROD)
				332	res *= in;
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	333	#else //!defined(PROD)
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	334	res += in;
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	335	#if defined(COMPLEX)
				336	res1 += in1;
				337	#endif // defined(COMPLEX)
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	338	#endif // defined(PROD)
				339	#endif // defined(MAX) \|\| defined(MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	340	}
				341
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	342	// Store result
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	343	#if defined(MEAN)
				344	res /= DEPTH;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	345	#endif // defined(MEAN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	346	vstore16(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	347	#if defined(COMPLEX)
				348	vstore16(CONVERT(res1, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)tensor3D_offset(&output, 8, 0, 0));
				349	#endif // defined(COMPLEX)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	350	}
				351	#endif /* defined(DEPTH) */
				352
				353	#if defined(BATCH) && defined(DEPTH)
				354	/** This kernel performs reduction on w-axis.
				355	*
				356	* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
				357	* @note The batch size must be passed at compile time using -DBATCH e.g. -DBATCH=128
Manuel Bottini	34f88dd	2019-10-18 10:37:46 +0000	[diff] [blame]	358	* @note The depth size must be passed at compile time using -DBATCH e.g. -DDEPTH=128
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	359	*
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	360	* @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	361	* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
				362	* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
				363	* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
				364	* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
				365	* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
				366	* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
				367	* @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes)
				368	* @param[in] input_step_w input_stride_w * number of elements along W processed per workitem(in bytes)
				369	* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
				370	* @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptt
				371	* @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
				372	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				373	* @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
				374	* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
				375	* @param[in] output_stride_z Stride of the output tensor in Z dimension (in bytes)
				376	* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
				377	* @param[in] output_stride_w Stride of the output tensor in W dimension (in bytes)
				378	* @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
				379	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
				380	*/
				381	__kernel void reduction_operation_w(
				382	TENSOR4D_DECLARATION(input),
				383	TENSOR4D_DECLARATION(output))
				384	{
				385	Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT(input, DEPTH);
				386	Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH);
				387
				388	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	389	res = CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	390
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	391	#if defined(SUM_SQUARE)
				392	res *= res;
				393	#endif // defined(SUM_SQUARE)
				394
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	395	for(unsigned int w = 1; w < BATCH; ++w)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	396	{
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	397	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
				398	in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, w)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	399
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	400	#if defined(MIN)
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame]	401	res = select(res, in, ISLESS(in, res));
				402	#elif defined(MAX)
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	403	res = select(res, in, ISGREATER(in, res));
				404	#else // !(defined(MAX) \|\| defined(MIN))
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	405	#if defined(SUM_SQUARE)
				406	in *= in;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	407	#endif // defined(SUM_SQUARE)
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	408	#if defined(PROD)
				409	res *= in;
				410	#else //!defined(PROD)
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	411	res += in;
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	412	#endif //defined(PROD)
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	413	#endif // defined(MAX) \|\| defined(MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	414	}
				415
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	416	// Store result
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	417	#if defined(MEAN)
				418	res /= BATCH;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	419	#endif // defined(MEAN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	420	vstore16(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
				421	}
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	422	#endif /* defined(BATCH) && defined(DEPTH) */