Blame - src/core/CL/cl_kernels/activation_layer_quant.cl - ml/ComputeLibrary

blob: c031c86a5e92408025e28cd51a1bcaaaaff8051f [file] [log] [blame]

Michel Iwaniec	0063380	2017-10-12 14:14:15 +0100	[diff] [blame]	1	/*
Michele Di Giorgio	f6f7876	2020-07-06 11:27:21 +0100	[diff] [blame]	2	* Copyright (c) 2016-2020 Arm Limited.
Michel Iwaniec	0063380	2017-10-12 14:14:15 +0100	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
Manuel Bottini	30dbeef	2019-06-26 16:23:03 +0100	[diff] [blame]	24	#include "activation_quant_helpers.h"
Michel Iwaniec	0063380	2017-10-12 14:14:15 +0100	[diff] [blame]	25
Michele Di Giorgio	d304e80	2018-07-06 10:17:33 +0100	[diff] [blame]	26	#define VEC_FLOAT VEC_DATA_TYPE(float, VEC_SIZE)
Michel Iwaniec	0063380	2017-10-12 14:14:15 +0100	[diff] [blame]	27
Georgios Pinitas	4b3fba1	2019-06-04 17:31:46 +0100	[diff] [blame]	28	#if defined(FLOAT_DOMAIN)
				29	// Activations performed in the float domain
				30
				31	#include "activation_float_helpers.h"
				32
Manuel Bottini	30dbeef	2019-06-26 16:23:03 +0100	[diff] [blame]	33	/** This performs an activation function on quantized inputs with float transformations.
Georgios Pinitas	4b3fba1	2019-06-04 17:31:46 +0100	[diff] [blame]	34	*
				35	* @note In order to perform the activation function "in-place", the pre-processor -DIN_PLACE must be passed at compile time
				36	*
				37	* @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
				38	* @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
Giorgio Arena	5304884	2020-10-07 16:03:43 +0100	[diff] [blame]	39	* @note Leftover vector size has to be passed at compile time using -DVEC_SIZE_LEFTOVER. e.g. -DVEC_SIZE=3. It is defined as the remainder between the input's first dimension and VEC_SIZE
Georgios Pinitas	4b3fba1	2019-06-04 17:31:46 +0100	[diff] [blame]	40	* @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively.
				41	* @note Quantization scales of the input/output tensors are passed in with -DS1_VAL= and -DS2_VAL= respectively.
Manuel Bottini	30dbeef	2019-06-26 16:23:03 +0100	[diff] [blame]	42	* @note Quantization offsets of the input/output tensors are passed in only if asymmetric with -DO1_VAL= and -DO2_VAL= respectively.
Georgios Pinitas	4b3fba1	2019-06-04 17:31:46 +0100	[diff] [blame]	43	* @note Quantized value of constant zero should be given as a preprocessor argument using -DCONST_0=value. e.g. -DCONST_0=128.
				44	*
Michele Di Giorgio	f6f7876	2020-07-06 11:27:21 +0100	[diff] [blame]	45	* @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8/QASYMM8_SIGNED/QSYMM16
Georgios Pinitas	4b3fba1	2019-06-04 17:31:46 +0100	[diff] [blame]	46	* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
				47	* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
				48	* @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
				49	* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
				50	* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
				51	* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
				52	* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
				53	* @param[out] output_ptr (Optional) Pointer to the destination image. Supported data types: same as @p input_ptr
				54	* @param[in] output_stride_x (Optional) Stride of the destination image in X dimension (in bytes)
				55	* @param[in] output_step_x (Optional) output_stride_x * number of elements along X processed per workitem(in bytes)
				56	* @param[in] output_stride_y (Optional) Stride of the destination image in Y dimension (in bytes)
				57	* @param[in] output_step_y (Optional) output_stride_y * number of elements along Y processed per workitem(in bytes)
				58	* @param[in] output_stride_z (Optional) Stride of the source tensor in Z dimension (in bytes)
				59	* @param[in] output_step_z (Optional) output_stride_z * number of elements along Z processed per workitem(in bytes)
				60	* @param[in] output_offset_first_element_in_bytes (Optional) The offset of the first element in the destination image
				61	*/
Manuel Bottini	30dbeef	2019-06-26 16:23:03 +0100	[diff] [blame]	62	__kernel void activation_layer_quant_f32(
Georgios Pinitas	4b3fba1	2019-06-04 17:31:46 +0100	[diff] [blame]	63	TENSOR3D_DECLARATION(input)
				64	#ifndef IN_PLACE
				65	,
				66	TENSOR3D_DECLARATION(output)
				67	#endif /* not IN_PLACE */
				68	)
				69	{
Giorgio Arena	d304adb	2020-10-02 10:20:11 +0100	[diff] [blame]	70	uint x_offs = max((int)(get_global_id(0) * VEC_SIZE * sizeof(DATA_TYPE) - (VEC_SIZE - VEC_SIZE_LEFTOVER) % VEC_SIZE * sizeof(DATA_TYPE)), 0);
				71
Georgios Pinitas	4b3fba1	2019-06-04 17:31:46 +0100	[diff] [blame]	72	// Get pixels pointer
Giorgio Arena	d304adb	2020-10-02 10:20:11 +0100	[diff] [blame]	73	__global uchar input_addr = input_ptr + input_offset_first_element_in_bytes + x_offs + get_global_id(1) input_stride_y + get_global_id(2) * input_stride_z;
Georgios Pinitas	4b3fba1	2019-06-04 17:31:46 +0100	[diff] [blame]	74	#ifdef IN_PLACE
Giorgio Arena	d304adb	2020-10-02 10:20:11 +0100	[diff] [blame]	75	__global uchar *output_addr = input_addr;
Georgios Pinitas	4b3fba1	2019-06-04 17:31:46 +0100	[diff] [blame]	76	#else /* IN_PLACE */
Giorgio Arena	d304adb	2020-10-02 10:20:11 +0100	[diff] [blame]	77	__global uchar output_addr = output_ptr + output_offset_first_element_in_bytes + x_offs + get_global_id(1) output_stride_y + get_global_id(2) * output_stride_z;
Georgios Pinitas	4b3fba1	2019-06-04 17:31:46 +0100	[diff] [blame]	78	#endif /* IN_PLACE */
				79
				80	// Load data
Giorgio Arena	d304adb	2020-10-02 10:20:11 +0100	[diff] [blame]	81	TYPE data0 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input_addr);
Georgios Pinitas	4b3fba1	2019-06-04 17:31:46 +0100	[diff] [blame]	82
Giorgio Arena	d304adb	2020-10-02 10:20:11 +0100	[diff] [blame]	83	VEC_FLOAT data_flt = CONVERT(data0, VEC_FLOAT);
Manuel Bottini	30dbeef	2019-06-26 16:23:03 +0100	[diff] [blame]	84	#if defined(O1_VAL)
				85	data_flt = round(data_flt - (float)O1_VAL) * ((float)S1_VAL);
				86	#else // defined(O1_VAL)
Giorgio Arena	d304adb	2020-10-02 10:20:11 +0100	[diff] [blame]	87	data_flt = round(data_flt) * ((float)S1_VAL);
Manuel Bottini	30dbeef	2019-06-26 16:23:03 +0100	[diff] [blame]	88	#endif // defined(O1_VAL)
Giorgio Arena	d056e57	2020-10-12 11:53:51 +0100	[diff] [blame]	89	data_flt = ACTIVATION(ACT, float, VEC_SIZE, data_flt, A_VAL, B_VAL);
Georgios Pinitas	4b3fba1	2019-06-04 17:31:46 +0100	[diff] [blame]	90
Manuel Bottini	30dbeef	2019-06-26 16:23:03 +0100	[diff] [blame]	91	#if defined(O2_VAL)
Giorgio Arena	d304adb	2020-10-02 10:20:11 +0100	[diff] [blame]	92	data0 = CONVERT_SAT(round(data_flt / ((float)S2_VAL)) + (float)O2_VAL, TYPE);
Manuel Bottini	30dbeef	2019-06-26 16:23:03 +0100	[diff] [blame]	93	#else // defined(O2_VAL)
Giorgio Arena	d304adb	2020-10-02 10:20:11 +0100	[diff] [blame]	94	data0 = CONVERT_SAT(round(data_flt / ((float)S2_VAL)), TYPE);
Manuel Bottini	30dbeef	2019-06-26 16:23:03 +0100	[diff] [blame]	95	#endif // defined(O2_VAL)
Georgios Pinitas	4b3fba1	2019-06-04 17:31:46 +0100	[diff] [blame]	96
				97	// Store result
Giorgio Arena	d304adb	2020-10-02 10:20:11 +0100	[diff] [blame]	98	STORE_VECTOR_SELECT(data, DATA_TYPE, output_addr, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0)
Georgios Pinitas	4b3fba1	2019-06-04 17:31:46 +0100	[diff] [blame]	99	}
				100
				101	#else // defined(FLOAT_DOMAIN)
				102	// Activations performed in the quantized domain
				103
Giorgio Arena	99ac60b	2018-02-16 15:17:23 +0000	[diff] [blame]	104	#if defined(ACT)
Manuel Bottini	30dbeef	2019-06-26 16:23:03 +0100	[diff] [blame]	105	/** This performs an activation function on quantized inputs.
Michel Iwaniec	0063380	2017-10-12 14:14:15 +0100	[diff] [blame]	106	*
				107	* @note In order to perform the activation function "in-place", the pre-processor -DIN_PLACE must be passed at compile time
				108	*
				109	* @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
				110	* @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
Giorgio Arena	5304884	2020-10-07 16:03:43 +0100	[diff] [blame]	111	* @note Leftover vector size has to be passed at compile time using -DVEC_SIZE_LEFTOVER. e.g. -DVEC_SIZE=3. It is defined as the remainder between the input's first dimension and VEC_SIZE
Michel Iwaniec	0063380	2017-10-12 14:14:15 +0100	[diff] [blame]	112	* @note Activation function should be given as a preprocessor argument using -DACT=name. e.g. -DACT=TANH
				113	* @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively.
				114	* @note Quantization scales of the input/output tensors are passed in with -DS1_VAL= and -DS2_VAL= respectively.
				115	* @note Quantization offsets of the input/output tensors are passed in with -DO1_VAL= and -DO2_VAL= respectively.
Michele Di Giorgio	a1f7e33	2018-01-22 17:26:36 +0000	[diff] [blame]	116	* @note Quantized value of constant zero should be given as a preprocessor argument using -DCONST_0=value. e.g. -DCONST_0=128.
Michel Iwaniec	0063380	2017-10-12 14:14:15 +0100	[diff] [blame]	117	*
Michele Di Giorgio	f6f7876	2020-07-06 11:27:21 +0100	[diff] [blame]	118	* @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8/QASYMM8_SIGNED/QSYMM16
Michel Iwaniec	0063380	2017-10-12 14:14:15 +0100	[diff] [blame]	119	* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
				120	* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
				121	* @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
				122	* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
				123	* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
				124	* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
				125	* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
Michele Di Giorgio	e5bf4c5	2019-02-14 17:47:33 +0000	[diff] [blame]	126	* @param[out] output_ptr (Optional) Pointer to the destination image. Supported data types: same as @p input_ptr
				127	* @param[in] output_stride_x (Optional) Stride of the destination image in X dimension (in bytes)
				128	* @param[in] output_step_x (Optional) output_stride_x * number of elements along X processed per workitem(in bytes)
				129	* @param[in] output_stride_y (Optional) Stride of the destination image in Y dimension (in bytes)
				130	* @param[in] output_step_y (Optional) output_stride_y * number of elements along Y processed per workitem(in bytes)
				131	* @param[in] output_stride_z (Optional) Stride of the source tensor in Z dimension (in bytes)
				132	* @param[in] output_step_z (Optional) output_stride_z * number of elements along Z processed per workitem(in bytes)
				133	* @param[in] output_offset_first_element_in_bytes (Optional) The offset of the first element in the destination image
Michel Iwaniec	0063380	2017-10-12 14:14:15 +0100	[diff] [blame]	134	*/
Manuel Bottini	30dbeef	2019-06-26 16:23:03 +0100	[diff] [blame]	135	__kernel void activation_layer_quant(
Michel Iwaniec	0063380	2017-10-12 14:14:15 +0100	[diff] [blame]	136	TENSOR3D_DECLARATION(input)
				137	#ifndef IN_PLACE
				138	,
				139	TENSOR3D_DECLARATION(output)
				140	#endif /* not IN_PLACE */
				141	)
				142	{
Giorgio Arena	d304adb	2020-10-02 10:20:11 +0100	[diff] [blame]	143	uint x_offs = max((int)(get_global_id(0) * VEC_SIZE * sizeof(DATA_TYPE) - (VEC_SIZE - VEC_SIZE_LEFTOVER) % VEC_SIZE * sizeof(DATA_TYPE)), 0);
				144
Michel Iwaniec	0063380	2017-10-12 14:14:15 +0100	[diff] [blame]	145	// Get pixels pointer
Giorgio Arena	d304adb	2020-10-02 10:20:11 +0100	[diff] [blame]	146	__global uchar input_addr = input_ptr + input_offset_first_element_in_bytes + x_offs + get_global_id(1) input_stride_y + get_global_id(2) * input_stride_z;
Michel Iwaniec	0063380	2017-10-12 14:14:15 +0100	[diff] [blame]	147	#ifdef IN_PLACE
Giorgio Arena	d304adb	2020-10-02 10:20:11 +0100	[diff] [blame]	148	__global uchar *output_addr = input_addr;
Michel Iwaniec	0063380	2017-10-12 14:14:15 +0100	[diff] [blame]	149	#else /* IN_PLACE */
Giorgio Arena	d304adb	2020-10-02 10:20:11 +0100	[diff] [blame]	150	__global uchar output_addr = output_ptr + output_offset_first_element_in_bytes + x_offs + get_global_id(1) output_stride_y + get_global_id(2) * output_stride_z;
Michel Iwaniec	0063380	2017-10-12 14:14:15 +0100	[diff] [blame]	151	#endif /* IN_PLACE */
				152
				153	// Load data
Giorgio Arena	d304adb	2020-10-02 10:20:11 +0100	[diff] [blame]	154	TYPE data0 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input_addr);
Michel Iwaniec	0063380	2017-10-12 14:14:15 +0100	[diff] [blame]	155
Giorgio Arena	d304adb	2020-10-02 10:20:11 +0100	[diff] [blame]	156	data0 = PERFORM_ACTIVATION_QUANT(ACT, data0);
Michel Iwaniec	0063380	2017-10-12 14:14:15 +0100	[diff] [blame]	157
				158	// Store result
Giorgio Arena	d304adb	2020-10-02 10:20:11 +0100	[diff] [blame]	159	STORE_VECTOR_SELECT(data, DATA_TYPE, output_addr, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0)
Michel Iwaniec	0063380	2017-10-12 14:14:15 +0100	[diff] [blame]	160	}
Georgios Pinitas	4b3fba1	2019-06-04 17:31:46 +0100	[diff] [blame]	161	#endif // defined(ACT)
				162	#endif // defined(FLOAT_DOMAIN)