Blame - src/core/CL/cl_kernels/reduction_operation.cl - ml/ComputeLibrary

blob: 9f2c6e23b579a154c3a7d692e7548766b3eff2ba [file] [log] [blame]

Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	1	/*
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	2	* Copyright (c) 2016-2021 Arm Limited.
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "helpers.h"
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	25	#include "helpers_asymm.h"
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	26
Michalis Spyrou	7317e39	2020-01-17 11:27:49 +0000	[diff] [blame]	27	#if defined(FLOAT_DATA_TYPE)
Giorgio Arena	1fac037	2021-04-30 15:09:46 +0100	[diff] [blame^]	28	#define ISGREATER(x, y) (SELECT_VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE))(isgreater(x, y))
				29	#define ISLESS(x, y) (SELECT_VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE))(isless(x, y))
				30	#define ISGREATER_SCALAR(x, y) (SELECT_DATA_TYPE(DATA_TYPE_PROMOTED))(isgreater(x, y))
				31	#define ISLESS_SCALAR(x, y) (SELECT_DATA_TYPE(DATA_TYPE_PROMOTED))(isless(x, y))
Michalis Spyrou	7317e39	2020-01-17 11:27:49 +0000	[diff] [blame]	32	#else // !FLOAT_DATA_TYPE
				33	#if defined(WIDTH)
				34	#define ISGREATER(x, y) (x > y) ? 1 : 0
				35	#define ISLESS(x, y) (x < y) ? 1 : 0
Giorgio Arena	1fac037	2021-04-30 15:09:46 +0100	[diff] [blame^]	36	#define ISGREATER_SCALAR ISGREATER
				37	#define ISLESS_SCALAR ISLESS
Michalis Spyrou	7317e39	2020-01-17 11:27:49 +0000	[diff] [blame]	38	#else // !defined(WIDTH)
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	39	#define ISGREATER(x, y) select((VEC_DATA_TYPE(int, VEC_SIZE))0, (VEC_DATA_TYPE(int, VEC_SIZE)) - 1, x > y)
				40	#define ISLESS(x, y) select((VEC_DATA_TYPE(int, VEC_SIZE))0, (VEC_DATA_TYPE(int, VEC_SIZE)) - 1, x < y)
Michalis Spyrou	7317e39	2020-01-17 11:27:49 +0000	[diff] [blame]	41	#endif // defined(WIDTH)
				42	#endif // defined(FLOAT_DATA_TYPE)
				43
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	44	#if defined(WIDTH)
Manuel Bottini	34f88dd	2019-10-18 10:37:46 +0000	[diff] [blame]	45	#if defined(OPERATION)
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	46
				47	#define sum(in0, in1, size) (in0 + SUM_REDUCE(in1, size))
				48	#define square_sum(in0, in1, size) (in0 + SUM_REDUCE((in1 * in1), size))
				49	#define product(in0, in1, size) (in0 * PROD_REDUCE(in1, size))
				50
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	51	/** This kernel performs parallel reduction given an operation on x-axis.
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	52	*
				53	* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	54	* @note The operation we want to perform must be passed at compile time using -DOPERATION e.g. -DOPERATION=square_sum
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	55	* @note The mean flag must be passed at compile time using -DMEAN if we want to compute the mean value
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	56	* @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	57	* @note The width size must be passed at compile time using -DWIDTH e.g. -DWIDTH=128 if we want to compute the mean value
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	58	*
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	59	* @param[in] input_ptr Pointer to the source tensor. Supported data types: F16/F32
				60	* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
				61	* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
				62	* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
				63	* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
				64	* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
				65	* @param[in] output_ptr Pointer to the destination tensor. Supported data types: same as @p input
				66	* @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
				67	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				68	* @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
				69	* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
				70	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	71	*/
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	72	__kernel void reduction_operation_x(
Giorgio Arena	1fac037	2021-04-30 15:09:46 +0100	[diff] [blame^]	73	TENSOR3D_DECLARATION(input),
				74	TENSOR3D_DECLARATION(output))
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	75	{
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	76	int y = get_global_id(1);
Giorgio Arena	1fac037	2021-04-30 15:09:46 +0100	[diff] [blame^]	77	int z = get_global_id(2);
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	78
Giorgio Arena	1fac037	2021-04-30 15:09:46 +0100	[diff] [blame^]	79	__global uchar input_addr = input_ptr + input_offset_first_element_in_bytes + y input_stride_y + z * input_stride_z;
				80	__global uchar output_addr = output_ptr + output_offset_first_element_in_bytes + y output_stride_y + z * output_stride_z;
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	81
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	82	#if defined(PROD)
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	83	DATA_TYPE res = (DATA_TYPE)1;
				84	#else // defined(PROD)
				85	DATA_TYPE res = (DATA_TYPE)0;
Manuel Bottini	34f88dd	2019-10-18 10:37:46 +0000	[diff] [blame]	86	#endif // defined(PROD)
Michalis Spyrou	f6402dd	2018-01-26 15:06:19 +0000	[diff] [blame]	87
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	88	int x = 0;
				89
				90	for(; x <= (WIDTH - VEC_SIZE); x += VEC_SIZE)
				91	{
				92	VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
				93	vals = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE )(input_addr + x sizeof(DATA_TYPE)));
				94	res = OPERATION(res, vals, VEC_SIZE);
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	95	}
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	96
				97	#if(WIDTH % VEC_SIZE)
				98	_Pragma("unroll") for(; x < WIDTH; ++x)
				99	{
				100	DATA_TYPE val = ((__global DATA_TYPE )(input_addr + x * sizeof(DATA_TYPE)));
				101	res = OPERATION(res, val, 1);
				102	}
				103	#endif // (WIDTH % VEC_SIZE)
				104
				105	#if defined(MEAN)
				106	res /= WIDTH;
				107	#endif // defined(MEAN)
				108	((__global DATA_TYPE )output_addr) = res;
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	109	}
Manuel Bottini	34f88dd	2019-10-18 10:37:46 +0000	[diff] [blame]	110	#endif // defined(OPERATION)
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	111	/** This kernel performs reduction on x-axis. (Non parallel)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	112	*
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	113	* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	114	* @note The width size must be passed at compile time using -DWIDTH e.g. -DWIDTH=128
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	115	* @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	116	*
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	117	* @param[in] input_ptr Pointer to the source tensor. Supported data types: S32/F16/F32 and QASYMM8/QASYMM8_SIGNED for operation MEAN
				118	* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
				119	* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
				120	* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
				121	* @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptr
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	122	* @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
				123	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				124	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
				125	*/
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	126	__kernel void reduction_operation_non_parallel_x(
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	127	VECTOR_DECLARATION(input),
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	128	VECTOR_DECLARATION(output))
				129	{
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	130	Vector input = CONVERT_TO_VECTOR_STRUCT(input);
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	131	Vector output = CONVERT_TO_VECTOR_STRUCT(output);
				132
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	133	DATA_TYPE_PROMOTED res = CONVERT(((__global DATA_TYPE )vector_offset(&input, 0)), DATA_TYPE_PROMOTED);
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	134
				135	// Convert input into F32 in order to perform quantized multiplication
				136	#if defined(PROD) && defined(OFFSET) && defined(SCALE)
				137	float res_f = DEQUANTIZE(res, OFFSET, SCALE, DATA_TYPE_PROMOTED, 1);
				138	#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	139
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	140	for(unsigned int x = 1; x < WIDTH; ++x)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	141	{
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	142	DATA_TYPE_PROMOTED in = CONVERT(((__global DATA_TYPE )vector_offset(&input, x)), DATA_TYPE_PROMOTED);
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	143	#if defined(MIN)
Giorgio Arena	1fac037	2021-04-30 15:09:46 +0100	[diff] [blame^]	144	res = select(res, in, ISLESS_SCALAR(in, res));
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame]	145	#elif defined(MAX)
Giorgio Arena	1fac037	2021-04-30 15:09:46 +0100	[diff] [blame^]	146	res = select(res, in, ISGREATER_SCALAR(in, res));
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	147	#elif defined(PROD)
				148	#if defined(OFFSET) && defined(SCALE)
				149	res_f *= DEQUANTIZE(in, OFFSET, SCALE, DATA_TYPE_PROMOTED, 1);
				150	#else // !(defined(OFFSET) && defined(SCALE))
				151	res *= in;
				152	#endif // defined(OFFSET) && defined(SCALE)
				153	#else // defined(SUM))
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	154	res += in;
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	155	#endif // defined(MAX) \|\| defined(MIN) \|\| defined(PROD)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	156	}
				157
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	158	// Store result
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	159	#if defined(MEAN)
				160	res /= WIDTH;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	161	#endif // defined(MEAN)
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	162
				163	// Subtract the offsets in case of quantized SUM
				164	#if defined(SUM) && defined(OFFSET) && defined(SCALE)
				165	res -= (WIDTH - 1) * OFFSET;
				166	#endif // defined(OFFSET) && defined(OFFSET) && defined(SCALE)
				167
				168	// Re-quantize
				169	#if defined(PROD) && defined(OFFSET) && defined(SCALE)
				170	res = QUANTIZE(res_f, OFFSET, SCALE, DATA_TYPE_PROMOTED, 1);
				171	#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
				172
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	173	((__global DATA_TYPE )output.ptr) = CONVERT_SAT(res, DATA_TYPE);
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	174	}
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	175	#endif // defined(WIDTH)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	176
				177	#if defined(HEIGHT)
				178	/** This kernel performs reduction on y-axis.
				179	*
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	180	* @note The input data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	181	* @note The height size must be passed at compile time using -DHEIGHT e.g. -DHEIGHT=128
				182	*
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	183	* @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32
				184	* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
				185	* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
				186	* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
				187	* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
				188	* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
				189	* @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptr
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	190	* @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
				191	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				192	* @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
				193	* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
				194	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
				195	*/
				196	__kernel void reduction_operation_y(
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	197	IMAGE_DECLARATION(input),
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	198	IMAGE_DECLARATION(output))
				199	{
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	200	int x = max((int)(get_global_id(0) * VEC_SIZE - (VEC_SIZE - VEC_SIZE_LEFTOVER) % VEC_SIZE), 0);
				201	int y = get_global_id(1);
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	202
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	203	__global uchar input_addr = input_ptr + input_offset_first_element_in_bytes + x sizeof(DATA_TYPE) + y * input_stride_y;
				204	__global uchar output_addr = output_ptr + output_offset_first_element_in_bytes + x sizeof(DATA_TYPE) + y * output_stride_y;
				205
				206	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE)
				207	res = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input_addr), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE));
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	208
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	209	// Convert input into F32 in order to perform quantized multiplication
				210	#if defined(PROD) && defined(OFFSET) && defined(SCALE)
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	211	VEC_DATA_TYPE(float, VEC_SIZE)
				212	res_f = DEQUANTIZE(res, OFFSET, SCALE, DATA_TYPE_PROMOTED, VEC_SIZE);
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	213	#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
				214
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	215	#if defined(SUM_SQUARE)
				216	res *= res;
				217	#endif // defined(SUM_SQUARE)
				218
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	219	for(unsigned int y = 1; y < HEIGHT; ++y)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	220	{
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	221	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE)
				222	in = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE )(input_addr + y input_stride_y)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE));
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	223	#if defined(MIN)
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame]	224	res = select(res, in, ISLESS(in, res));
				225	#elif defined(MAX)
Michele Di Giorgio	0333704	2020-02-26 17:47:55 +0000	[diff] [blame]	226	res = select(res, in, ISGREATER(in, res));
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	227	#else // !(defined(MAX) \|\| defined(MIN))
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	228	#if defined(SUM_SQUARE)
				229	in *= in;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	230	#endif // defined(SUM_SQUARE)
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	231	#if defined(PROD)
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	232
				233	#if defined(OFFSET) && defined(SCALE)
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	234	res_f *= DEQUANTIZE(in, OFFSET, SCALE, DATA_TYPE_PROMOTED, VEC_SIZE);
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	235	#else // !(defined(OFFSET) && defined(SCALE))
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	236	res *= in;
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	237	#endif // defined(OFFSET) && defined(SCALE)
				238
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	239	#else // !defined(PROD)
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	240	res += in;
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	241	#endif // defined(PROD)
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	242	#endif // defined(MAX) \|\| defined(MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	243	}
				244
				245	#if defined(MEAN)
				246	res /= HEIGHT;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	247	#endif // defined(MEAN)
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	248
				249	// Subtract the offsets in case of quantized SUM
				250	#if defined(SUM) && defined(OFFSET) && defined(SCALE)
				251	res -= (HEIGHT - 1) * OFFSET;
				252	#endif // defined(OFFSET) && defined(OFFSET) && defined(SCALE)
				253
				254	// Re-quantize
				255	#if defined(PROD) && defined(OFFSET) && defined(SCALE)
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	256	res = QUANTIZE(res_f, OFFSET, SCALE, DATA_TYPE_PROMOTED, VEC_SIZE);
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	257	#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
				258
				259	// Store result
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	260	VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
				261	res0 = CONVERT_SAT(res, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE));
				262	STORE_VECTOR_SELECT(res, DATA_TYPE, output_addr, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0);
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	263	}
Michalis Spyrou	b9626ab	2019-05-13 17:41:01 +0100	[diff] [blame]	264	#endif // defined(HEIGHT)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	265
				266	#if defined(DEPTH)
				267	/** This kernel performs reduction on z-axis.
				268	*
				269	* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
				270	* @note The depth size must be passed at compile time using -DDEPTH e.g. -DDEPTH=128
				271	*
Michele Di Giorgio	f6f7876	2020-07-06 11:27:21 +0100	[diff] [blame]	272	* @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	273	* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
				274	* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
				275	* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
				276	* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
				277	* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
				278	* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
				279	* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
Michele Di Giorgio	f6f7876	2020-07-06 11:27:21 +0100	[diff] [blame]	280	* @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptr
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	281	* @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
				282	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				283	* @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
				284	* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
				285	* @param[in] output_stride_z Stride of the output tensor in Z dimension (in bytes)
				286	* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
				287	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
				288	*/
				289	__kernel void reduction_operation_z(
				290	TENSOR3D_DECLARATION(input),
				291	TENSOR3D_DECLARATION(output))
				292	{
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	293	int x = max((int)(get_global_id(0) * VEC_SIZE - (VEC_SIZE - VEC_SIZE_LEFTOVER) % VEC_SIZE), 0);
				294	int y = get_global_id(1);
				295	int z = get_global_id(2);
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	296
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	297	__global uchar input_addr = input_ptr + input_offset_first_element_in_bytes + x sizeof(DATA_TYPE) + y * input_stride_y + z * input_stride_z;
				298	__global uchar output_addr = output_ptr + output_offset_first_element_in_bytes + x sizeof(DATA_TYPE) + y * output_stride_y + z * output_stride_z;
				299
				300	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE)
				301	res = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input_addr), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE));
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	302
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	303	// Convert input into F32 in order to perform quantized multiplication
				304	#if defined(PROD) && defined(OFFSET) && defined(SCALE)
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	305	VEC_DATA_TYPE(float, VEC_SIZE)
				306	res_f = DEQUANTIZE(res, OFFSET, SCALE, DATA_TYPE_PROMOTED, VEC_SIZE);
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	307	#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
				308
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	309	#if defined(SUM_SQUARE)
				310	res *= res;
				311	#endif // defined(SUM_SQUARE)
				312
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	313	for(unsigned int z = 1; z < DEPTH; ++z)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	314	{
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	315	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE)
				316	in = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE )(input_addr + z input_stride_z)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE));
Georgios Pinitas	8be9148	2019-03-26 17:23:28 +0000	[diff] [blame]	317
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	318	#if defined(MIN)
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame]	319	res = select(res, in, ISLESS(in, res));
				320	#elif defined(MAX)
Michele Di Giorgio	0333704	2020-02-26 17:47:55 +0000	[diff] [blame]	321	res = select(res, in, ISGREATER(in, res));
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	322	#else // !(defined(MAX) \|\| defined(MIN))
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	323	#if defined(SUM_SQUARE)
				324	in *= in;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	325	#endif // defined(SUM_SQUARE)
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	326	#if defined(PROD)
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	327
				328	#if defined(OFFSET) && defined(SCALE)
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	329	res_f *= DEQUANTIZE(in, OFFSET, SCALE, DATA_TYPE_PROMOTED, VEC_SIZE);
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	330	#else // !(defined(OFFSET) && defined(SCALE))
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	331	res *= in;
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	332	#endif // defined(OFFSET) && defined(SCALE)
				333
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	334	#else // !defined(PROD)
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	335	res += in;
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	336	#endif // defined(PROD)
				337	#endif // defined(MAX) \|\| defined(MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	338	}
				339
				340	#if defined(MEAN)
				341	res /= DEPTH;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	342	#endif // defined(MEAN)
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	343
				344	// Subtract the offsets in case of quantized SUM
				345	#if defined(SUM) && defined(OFFSET) && defined(SCALE)
				346	res -= (DEPTH - 1) * OFFSET;
				347	#endif // defined(OFFSET) && defined(OFFSET) && defined(SCALE)
				348
				349	// Re-quantize
				350	#if defined(PROD) && defined(OFFSET) && defined(SCALE)
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	351	res = QUANTIZE(res_f, OFFSET, SCALE, DATA_TYPE_PROMOTED, VEC_SIZE);
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	352	#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
				353
				354	// Store result
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	355	VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
				356	res0 = CONVERT_SAT(res, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE));
				357
				358	STORE_VECTOR_SELECT(res, DATA_TYPE, output_addr, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0);
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	359	}
				360	#endif /* defined(DEPTH) */
				361
				362	#if defined(BATCH) && defined(DEPTH)
				363	/** This kernel performs reduction on w-axis.
				364	*
				365	* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
				366	* @note The batch size must be passed at compile time using -DBATCH e.g. -DBATCH=128
Manuel Bottini	34f88dd	2019-10-18 10:37:46 +0000	[diff] [blame]	367	* @note The depth size must be passed at compile time using -DBATCH e.g. -DDEPTH=128
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	368	*
Michele Di Giorgio	f6f7876	2020-07-06 11:27:21 +0100	[diff] [blame]	369	* @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	370	* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
				371	* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
				372	* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
				373	* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
				374	* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
				375	* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
				376	* @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes)
				377	* @param[in] input_step_w input_stride_w * number of elements along W processed per workitem(in bytes)
				378	* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
Michele Di Giorgio	f6f7876	2020-07-06 11:27:21 +0100	[diff] [blame]	379	* @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptr
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	380	* @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
				381	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				382	* @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
				383	* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
				384	* @param[in] output_stride_z Stride of the output tensor in Z dimension (in bytes)
				385	* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
				386	* @param[in] output_stride_w Stride of the output tensor in W dimension (in bytes)
				387	* @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
				388	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
				389	*/
				390	__kernel void reduction_operation_w(
				391	TENSOR4D_DECLARATION(input),
				392	TENSOR4D_DECLARATION(output))
				393	{
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	394	int x = max((int)(get_global_id(0) * VEC_SIZE - (VEC_SIZE - VEC_SIZE_LEFTOVER) % VEC_SIZE), 0);
				395	int y = get_global_id(1);
				396	int z = get_global_id(2);
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	397
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	398	__global uchar input_addr = input_ptr + input_offset_first_element_in_bytes + x sizeof(DATA_TYPE) + y * input_stride_y + (z % DEPTH) * input_stride_z + (z / DEPTH) * input_stride_w;
				399	__global uchar output_addr = output_ptr + output_offset_first_element_in_bytes + x sizeof(DATA_TYPE) + y * output_stride_y + (z % DEPTH) * output_stride_z + (z / DEPTH) * output_stride_z;
				400
				401	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE)
				402	res = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input_addr), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE));
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	403
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	404	// Convert input into F32 in order to perform quantized multiplication
				405	#if defined(PROD) && defined(OFFSET) && defined(SCALE)
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	406	VEC_DATA_TYPE(float, VEC_SIZE)
				407	res_f = DEQUANTIZE(res, OFFSET, SCALE, DATA_TYPE_PROMOTED, VEC_SIZE);
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	408	#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
				409
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	410	#if defined(SUM_SQUARE)
				411	res *= res;
				412	#endif // defined(SUM_SQUARE)
				413
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	414	for(unsigned int w = 1; w < BATCH; ++w)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	415	{
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	416	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE)
				417	in = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE )(input_addr + w input_stride_w)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE));
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	418
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	419	#if defined(MIN)
Usama Arif	048b0f3	2019-05-22 16:32:27 +0100	[diff] [blame]	420	res = select(res, in, ISLESS(in, res));
				421	#elif defined(MAX)
Michele Di Giorgio	0333704	2020-02-26 17:47:55 +0000	[diff] [blame]	422	res = select(res, in, ISGREATER(in, res));
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	423	#else // !(defined(MAX) \|\| defined(MIN))
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	424	#if defined(SUM_SQUARE)
				425	in *= in;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	426	#endif // defined(SUM_SQUARE)
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	427	#if defined(PROD)
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	428
				429	#if defined(OFFSET) && defined(SCALE)
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	430	res_f *= DEQUANTIZE(in, OFFSET, SCALE, DATA_TYPE_PROMOTED, VEC_SIZE);
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	431	#else // !(defined(OFFSET) && defined(SCALE))
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	432	res *= in;
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	433	#endif // defined(OFFSET) && defined(SCALE)
				434
				435	#else // !defined(PROD)
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	436	res += in;
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	437	#endif //defined(PROD)
Manuel Bottini	7b9998d	2019-10-21 17:59:07 +0100	[diff] [blame]	438	#endif // defined(MAX) \|\| defined(MIN)
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	439	}
				440
				441	#if defined(MEAN)
				442	res /= BATCH;
Michalis Spyrou	7930db4	2018-11-22 17:36:28 +0000	[diff] [blame]	443	#endif // defined(MEAN)
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	444
				445	// Subtract the offsets in case of quantized SUM
				446	#if defined(SUM) && defined(OFFSET) && defined(SCALE)
				447	res -= (BATCH - 1) * OFFSET;
				448	#endif // defined(OFFSET) && defined(OFFSET) && defined(SCALE)
				449
				450	// Re-quantize
				451	#if defined(PROD) && defined(OFFSET) && defined(SCALE)
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	452	res = QUANTIZE(res_f, OFFSET, SCALE, DATA_TYPE_PROMOTED, VEC_SIZE);
Michalis Spyrou	0b18d97	2020-01-30 18:11:13 +0000	[diff] [blame]	453	#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
				454
				455	// Store result
Giorgio Arena	3ecf9fe	2021-04-28 16:11:51 +0100	[diff] [blame]	456	VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
				457	res0 = CONVERT_SAT(res, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE));
				458	STORE_VECTOR_SELECT(res, DATA_TYPE, output_addr, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0);
Michalis Spyrou	7e9391b	2018-10-05 14:49:28 +0100	[diff] [blame]	459	}
Manuel Bottini	b412fab	2018-12-10 17:40:23 +0000	[diff] [blame]	460	#endif /* defined(BATCH) && defined(DEPTH) */