Blame - src/core/CL/cl_kernels/common/l2_normalize.cl - ml/ComputeLibrary

blob: fbe3406239eebebbfccfafc36e5130c1a5b7de99 [file] [log] [blame]

Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	1	/*
Manuel Bottini	7a452fe	2021-03-31 18:22:59 +0100	[diff] [blame]	2	* Copyright (c) 2016-2021 Arm Limited.
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "helpers.h"
				25
Manuel Bottini	7a452fe	2021-03-31 18:22:59 +0100	[diff] [blame]	26	#if defined(VEC_SIZE_X) && defined(VEC_SIZE_LEFTOVER_X)
Michalis Spyrou	5538d34	2018-11-14 08:10:13 +0000	[diff] [blame]	27	/** This kernel performs l2 normalization on x-axis
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	28	*
				29	* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Manuel Bottini	7a452fe	2021-03-31 18:22:59 +0100	[diff] [blame]	30	* @note Vector size should be given as a preprocessor argument using -DVEC_SIZE_X=size. e.g. -DVEC_SIZE_X=16
				31	* @note The leftover size in the X dimension shoud be given as preprocessor argument using -DVEC_SIZE_LEFTOVER_X is; x_dimension % VEC_SIZE_X. e.g. -DVEC_SIZE_LEFTOVER_X=1
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	32	*
Manuel Bottini	7a452fe	2021-03-31 18:22:59 +0100	[diff] [blame]	33	* @param[in] input_ptr Pointer to the source tensor. Supported data types: F16/F32
				34	* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
				35	* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
				36	* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
				37	* @param[in] input_step_y input_stride_y * number of elements along X processed per workitem(in bytes)
				38	* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
				39	* @param[in] sum_ptr Pointer to the source tensor. Supported data types: F16/F32
				40	* @param[in] sum_stride_x Stride of the source tensor in X dimension (in bytes)
				41	* @param[in] sum_step_x sum_stride_x * number of elements along X processed per workitem(in bytes)
				42	* @param[in] sum_stride_y Stride of the source tensor in Y dimension (in bytes)
				43	* @param[in] sum_step_y sum_stride_y * number of elements along Y processed per workitem(in bytes)
				44	* @param[in] sum_offset_first_element_in_bytes The offset of the first element in the source tensor
				45	* @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr
				46	* @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
				47	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				48	* @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
				49	* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
				50	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor
				51	* @param[in] epsilon Epsilon value
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	52	*/
Michalis Spyrou	5538d34	2018-11-14 08:10:13 +0000	[diff] [blame]	53	__kernel void l2_normalize_x(
Manuel Bottini	7a452fe	2021-03-31 18:22:59 +0100	[diff] [blame]	54	IMAGE_DECLARATION(input),
Usama Arif	ae0001e	2019-03-26 13:44:01 +0000	[diff] [blame]	55	IMAGE_DECLARATION(sum),
Manuel Bottini	7a452fe	2021-03-31 18:22:59 +0100	[diff] [blame]	56	IMAGE_DECLARATION(output),
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	57	DATA_TYPE epsilon)
				58	{
Manuel Bottini	7a452fe	2021-03-31 18:22:59 +0100	[diff] [blame]	59	// Offset computation
				60	const uint x_offs = max((int)(get_global_id(0) * VEC_SIZE_X - (VEC_SIZE_X - VEC_SIZE_LEFTOVER_X) % VEC_SIZE_X), 0);
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	61
Manuel Bottini	7a452fe	2021-03-31 18:22:59 +0100	[diff] [blame]	62	// Address computation
				63	__global uchar input_addr = input_ptr + input_offset_first_element_in_bytes + x_offs sizeof(DATA_TYPE) + get_global_id(1) * input_stride_y;
				64	__global uchar sum_addr = sum_ptr + sum_offset_first_element_in_bytes + get_global_id(1) sum_stride_y;
				65	__global uchar output_addr = output_ptr + output_offset_first_element_in_bytes + x_offs sizeof(DATA_TYPE) + get_global_id(1) * output_stride_y;
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	66
Manuel Bottini	7a452fe	2021-03-31 18:22:59 +0100	[diff] [blame]	67	VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_X)
				68	in = VLOAD(VEC_SIZE_X)(0, (__global DATA_TYPE *)input_addr);
				69
				70	VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_X)
				71	normalize_value = (VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_X))rsqrt(fmax(((__global DATA_TYPE )sum_addr), epsilon));
				72
				73	const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_X)
				74	data0 = in * normalize_value;
				75
				76	STORE_VECTOR_SELECT(data, DATA_TYPE, output_addr, VEC_SIZE_X, VEC_SIZE_LEFTOVER_X, VEC_SIZE_LEFTOVER_X != 0 && get_global_id(0) == 0);
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	77	}
				78
Michalis Spyrou	5538d34	2018-11-14 08:10:13 +0000	[diff] [blame]	79	/** This kernel performs l2 normalization on y-axis.
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	80	*
				81	* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Manuel Bottini	7a452fe	2021-03-31 18:22:59 +0100	[diff] [blame]	82	* @note Vector size should be given as a preprocessor argument using -DVEC_SIZE_X=size. e.g. -DVEC_SIZE_X=16
				83	* @note The leftover size in the X dimension shoud be given as preprocessor argument using -DVEC_SIZE_LEFTOVER_X is; x_dimension % VEC_SIZE_X. e.g. -DVEC_SIZE_LEFTOVER_X=1
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	84	*
Manuel Bottini	7a452fe	2021-03-31 18:22:59 +0100	[diff] [blame]	85	* @param[in] input_ptr Pointer to the source tensor. Supported data types: F16/F32
				86	* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
				87	* @param[in] input_step_x input_stride_x * number of elements along Y processed per workitem(in bytes)
				88	* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
				89	* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
				90	* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
				91	* @param[in] sum_ptr Pointer to the source tensor. Supported data types: F16/F32
				92	* @param[in] sum_stride_x Stride of the source tensor in X dimension (in bytes)
				93	* @param[in] sum_step_x sum_stride_x * number of elements along X processed per workitem(in bytes)
				94	* @param[in] sum_stride_y Stride of the source tensor in Y dimension (in bytes)
				95	* @param[in] sum_step_y sum_stride_y * number of elements along Y processed per workitem(in bytes)
				96	* @param[in] sum_offset_first_element_in_bytes The offset of the first element in the source tensor
				97	* @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr
				98	* @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
				99	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				100	* @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
				101	* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
				102	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor
				103	* @param[in] epsilon Epsilon value
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	104	*/
Michalis Spyrou	5538d34	2018-11-14 08:10:13 +0000	[diff] [blame]	105	__kernel void l2_normalize_y(
Manuel Bottini	7a452fe	2021-03-31 18:22:59 +0100	[diff] [blame]	106	IMAGE_DECLARATION(input),
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	107	IMAGE_DECLARATION(sum),
Manuel Bottini	7a452fe	2021-03-31 18:22:59 +0100	[diff] [blame]	108	IMAGE_DECLARATION(output),
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	109	DATA_TYPE epsilon)
				110	{
Manuel Bottini	7a452fe	2021-03-31 18:22:59 +0100	[diff] [blame]	111	// Offset computation
				112	const uint x_offs = max((int)(get_global_id(0) * VEC_SIZE_X - (VEC_SIZE_X - VEC_SIZE_LEFTOVER_X) % VEC_SIZE_X), 0);
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	113
Manuel Bottini	7a452fe	2021-03-31 18:22:59 +0100	[diff] [blame]	114	// Address computation
				115	__global uchar input_addr = input_ptr + input_offset_first_element_in_bytes + x_offs sizeof(DATA_TYPE) + get_global_id(1) * input_stride_y;
				116	__global uchar sum_addr = sum_ptr + sum_offset_first_element_in_bytes + x_offs sizeof(DATA_TYPE);
				117	__global uchar output_addr = output_ptr + output_offset_first_element_in_bytes + x_offs sizeof(DATA_TYPE) + get_global_id(1) * output_stride_y;
Michalis Spyrou	8aaf93e	2018-10-11 17:33:32 +0100	[diff] [blame]	118
Manuel Bottini	7a452fe	2021-03-31 18:22:59 +0100	[diff] [blame]	119	VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_X)
				120	in = VLOAD(VEC_SIZE_X)(0, (__global DATA_TYPE *)input_addr);
				121	VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_X)
				122	sums = VLOAD(VEC_SIZE_X)(0, (__global DATA_TYPE *)sum_addr);
Michalis Spyrou	04f089c	2017-08-08 17:42:38 +0100	[diff] [blame]	123
Manuel Bottini	7a452fe	2021-03-31 18:22:59 +0100	[diff] [blame]	124	VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_X)
				125	normalize_value = (VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_X))rsqrt(fmax(sums, epsilon));
				126
				127	const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_X)
				128	data0 = in * normalize_value;
				129
				130	STORE_VECTOR_SELECT(data, DATA_TYPE, output_addr, VEC_SIZE_X, VEC_SIZE_LEFTOVER_X, VEC_SIZE_LEFTOVER_X != 0 && get_global_id(0) == 0);
Michalis Spyrou	5538d34	2018-11-14 08:10:13 +0000	[diff] [blame]	131	}
Manuel Bottini	7a452fe	2021-03-31 18:22:59 +0100	[diff] [blame]	132
Michalis Spyrou	5538d34	2018-11-14 08:10:13 +0000	[diff] [blame]	133	/** This kernel performs l2 normalization on z-axis.
				134	*
				135	* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Manuel Bottini	7a452fe	2021-03-31 18:22:59 +0100	[diff] [blame]	136	* @note Vector size should be given as a preprocessor argument using -DVEC_SIZE_X=size. e.g. -DVEC_SIZE_X=16
				137	* @note The leftover size in the X dimension shoud be given as preprocessor argument using -DVEC_SIZE_LEFTOVER_X is; x_dimension % VEC_SIZE_X. e.g. -DVEC_SIZE_LEFTOVER_X=1
Michalis Spyrou	5538d34	2018-11-14 08:10:13 +0000	[diff] [blame]	138	*
Manuel Bottini	7a452fe	2021-03-31 18:22:59 +0100	[diff] [blame]	139	* @param[in] input_ptr Pointer to the source tensor. Supported data types: F16/F32
				140	* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
				141	* @param[in] input_step_x input_stride_x * number of elements along Y processed per workitem(in bytes)
				142	* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
				143	* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
				144	* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
				145	* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
				146	* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
				147	* @param[in] sum_ptr Pointer to the source tensor. Supported data types: F16/F32
				148	* @param[in] sum_stride_x Stride of the source tensor in X dimension (in bytes)
				149	* @param[in] sum_step_x sum_stride_x * number of elements along X processed per workitem(in bytes)
				150	* @param[in] sum_stride_y Stride of the source tensor in Y dimension (in bytes)
				151	* @param[in] sum_step_y sum_stride_y * number of elements along Y processed per workitem(in bytes)
				152	* @param[in] sum_stride_z Stride of the source tensor in Z dimension (in bytes)
				153	* @param[in] sum_step_z sum_stride_z * number of elements along Z processed per workitem(in bytes)
				154	* @param[in] sum_offset_first_element_in_bytes The offset of the first element in the source tensor
				155	* @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr
				156	* @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
				157	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				158	* @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
				159	* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
				160	* @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes)
				161	* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
				162	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor
				163	* @param[in] epsilon Epsilon value
Michalis Spyrou	5538d34	2018-11-14 08:10:13 +0000	[diff] [blame]	164	*/
				165	__kernel void l2_normalize_z(
Manuel Bottini	7a452fe	2021-03-31 18:22:59 +0100	[diff] [blame]	166	TENSOR3D_DECLARATION(input),
Michalis Spyrou	5538d34	2018-11-14 08:10:13 +0000	[diff] [blame]	167	TENSOR3D_DECLARATION(sum),
Manuel Bottini	7a452fe	2021-03-31 18:22:59 +0100	[diff] [blame]	168	TENSOR3D_DECLARATION(output),
Michalis Spyrou	5538d34	2018-11-14 08:10:13 +0000	[diff] [blame]	169	DATA_TYPE epsilon)
				170	{
Manuel Bottini	7a452fe	2021-03-31 18:22:59 +0100	[diff] [blame]	171	// Offset computation
				172	const uint x_offs = max((int)(get_global_id(0) * VEC_SIZE_X - (VEC_SIZE_X - VEC_SIZE_LEFTOVER_X) % VEC_SIZE_X), 0);
Michalis Spyrou	5538d34	2018-11-14 08:10:13 +0000	[diff] [blame]	173
Manuel Bottini	7a452fe	2021-03-31 18:22:59 +0100	[diff] [blame]	174	// Address computation
				175	__global uchar input_addr = input_ptr + input_offset_first_element_in_bytes + x_offs sizeof(DATA_TYPE) + get_global_id(1) * input_stride_y + get_global_id(2) * input_stride_z;
				176	__global uchar sum_addr = sum_ptr + sum_offset_first_element_in_bytes + x_offs sizeof(DATA_TYPE) + get_global_id(1) * sum_stride_y;
				177	__global uchar output_addr = output_ptr + output_offset_first_element_in_bytes + x_offs sizeof(DATA_TYPE) + get_global_id(1) * output_stride_y + get_global_id(2) * output_stride_z;
Michalis Spyrou	5538d34	2018-11-14 08:10:13 +0000	[diff] [blame]	178
Manuel Bottini	7a452fe	2021-03-31 18:22:59 +0100	[diff] [blame]	179	VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_X)
				180	in = VLOAD(VEC_SIZE_X)(0, (__global DATA_TYPE *)input_addr);
				181	VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_X)
				182	sums = VLOAD(VEC_SIZE_X)(0, (__global DATA_TYPE *)sum_addr);
Michalis Spyrou	5538d34	2018-11-14 08:10:13 +0000	[diff] [blame]	183
Manuel Bottini	7a452fe	2021-03-31 18:22:59 +0100	[diff] [blame]	184	VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_X)
				185	data0 = in * ((VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_X))(rsqrt(fmax(sums, epsilon))));
				186
				187	STORE_VECTOR_SELECT(data, DATA_TYPE, output_addr, VEC_SIZE_X, VEC_SIZE_LEFTOVER_X, VEC_SIZE_LEFTOVER_X != 0 && get_global_id(0) == 0);
				188	}
				189	#endif // defined(VEC_SIZE_X) && defined(VEC_SIZE_LEFTOVER_X)