Blame - src/core/CL/cl_kernels/nhwc/pooling_layer_quantized.cl - ml/ComputeLibrary

blob: 46268a4a88d978b4b12759f2c1ba6107df0e1093 [file] [log] [blame]

Anton Lokhmotov	af6204c	2017-11-08 09:34:19 +0000	[diff] [blame]	1	/*
Adnan AlSinan	7075fe2	2021-07-05 13:12:52 +0100	[diff] [blame]	2	* Copyright (c) 2017-2021 Arm Limited.
Anton Lokhmotov	af6204c	2017-11-08 09:34:19 +0000	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "helpers.h"
				25
Michele Di Giorgio	cbbed28	2019-12-20 13:26:08 +0000	[diff] [blame]	26	#if defined(DATA_TYPE) && defined(INITIAL_VALUE)
				27	#define VEC_TYPE(VEC_SIZE) VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
				28
Pablo Tello	a52e4cf	2019-04-01 14:55:18 +0100	[diff] [blame]	29	#if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT)
Michele Di Giorgio	cbbed28	2019-12-20 13:26:08 +0000	[diff] [blame]	30	#define VEC_FLOAT(VEC_SIZE) VEC_DATA_TYPE(float, VEC_SIZE)
Michalis Spyrou	4335a8c	2019-04-05 16:41:30 +0100	[diff] [blame]	31	#define VEC_INT(VEC_SIZE) VEC_DATA_TYPE(int, VEC_SIZE)
Michalis Spyrou	4335a8c	2019-04-05 16:41:30 +0100	[diff] [blame]	32	#define CONVERT_RTE(x, type) (convert_##type##_rte((x)))
Pablo Tello	a52e4cf	2019-04-01 14:55:18 +0100	[diff] [blame]	33	#define CONVERT_DOWN(x, type) CONVERT_RTE(x, type)
				34	#define REQUANTIZE(VEC_SIZE, input, in_offset, out_offset, in_scale, out_scale, res) \
				35	{ \
				36	const VEC_FLOAT(VEC_SIZE) in_f32 = (CONVERT(input, VEC_FLOAT(VEC_SIZE)) - (VEC_FLOAT(VEC_SIZE))((float)in_offset)) * (VEC_FLOAT(VEC_SIZE))((float)in_scale); \
				37	const VEC_FLOAT(VEC_SIZE) out_f32 = in_f32 / ((VEC_FLOAT(VEC_SIZE))(float)out_scale) + ((VEC_FLOAT(VEC_SIZE))((float)out_offset)); \
Michele Di Giorgio	cbbed28	2019-12-20 13:26:08 +0000	[diff] [blame]	38	res = CONVERT_SAT(CONVERT_DOWN(out_f32, VEC_INT(VEC_SIZE)), VEC_TYPE(VEC_SIZE)); \
Pablo Tello	a52e4cf	2019-04-01 14:55:18 +0100	[diff] [blame]	39	}
				40	#endif /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */
				41
Anton Lokhmotov	af6204c	2017-11-08 09:34:19 +0000	[diff] [blame]	42	#if defined(POOL_AVG)
				43	#define POOL_OP(x, y) ((x) + (y))
				44	#else /* defined(POOL_AVG) */
				45	#define POOL_OP(x, y) (max((x), (y)))
				46	#endif /* defined(POOL_AVG) */
				47
				48	#define DIV_OP(x, y) (x * (1.f / y))
				49
				50	#if defined(POOL_L2)
				51	#error "L2 pooling is not supported"
				52	#endif /* defined(POOL_L2) */
				53
Gian Marco Iodice	7333e1f	2020-10-08 10:25:49 +0100	[diff] [blame]	54	#if defined(VEC_SIZE) && defined(VEC_SIZE_LEFTOVER) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) && defined(DST_CHANNELS) && defined(DST_HEIGHT) && defined(DST_BATCH_SIZE) && defined(ACC_DATA_TYPE)
				55	/** Performs pooling layer of size equal to MxN. This OpenCL kernel can perform the following pooling types:
				56	* -# max, -DPOOL_MAX must be passed at compile time
				57	* -# average, -DPOOL_AVG must be passed at compile time. If padding has to be expluded, -DEXCLUDE_PADDING should be passed at compile time
Michalis Spyrou	e74b201	2018-04-18 09:49:16 +0100	[diff] [blame]	58	*
Gian Marco Iodice	7333e1f	2020-10-08 10:25:49 +0100	[diff] [blame]	59	* @note Datatype must be passed at compile type using -DDATA_TYPE e.g. -DDATA_TYPE=uchar. Supported data types are QASYMM8/QASYMM8_SIGNED
				60	* @note Accumulation data type must be passed at compile time using -DACC_DATA_TYPE e.g. -DACC_DATA_TYPE=int
				61	* @note Pool size must be passed at compile time using -DPOOL_SIZE_X and -DPOOL_SIZE_Y. e.g. -DPOOL_SIZE_X=4, -DPOOL_SIZE_Y=4
				62	* @note Input tensor width and height must be passed at compile time using -DSRC_WIDTH and -DSRC_HEIGHT
				63	* @note Output tensor height, channels and batch size must be passed at compile time using -DDST_HEIGHT, -DDST_CHANNELS and -DDST_BATCH_SIZE
				64	* @note Pool strides must be passed at compile time using -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions
				65	* @note Pool pads must be passed at compile time using -DPAD_X and -DPAD_Y
				66	* @note Vector size must be passed at compile time using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
				67	* @note Leftover vector size must be passed at compile time using -DVEC_SIZE_LEFTOVER. e.g. -DVEC_SIZE_LEFTOVER=3. It is defined as the remainder between the input's first dimension and VEC_SIZE
Michele Di Giorgio	cbbed28	2019-12-20 13:26:08 +0000	[diff] [blame]	68	* @note The initial value for the pooling operation must be passed at compile time using -DINITIAL_VALUE e.g. -DINITIAL_VALUE=0
Gian Marco Iodice	7333e1f	2020-10-08 10:25:49 +0100	[diff] [blame]	69	* @note If the output has be requantized, -DOFFSET_IN1, -DOFFSET_OUT, -DSCALE_IN1 and -DSCALE_OUT muste be passed at compile time
Michalis Spyrou	e74b201	2018-04-18 09:49:16 +0100	[diff] [blame]	70	*
Michele Di Giorgio	cbbed28	2019-12-20 13:26:08 +0000	[diff] [blame]	71	* @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8/QASYMM8_SIGNED
Michalis Spyrou	e74b201	2018-04-18 09:49:16 +0100	[diff] [blame]	72	* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
				73	* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
				74	* @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
				75	* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
				76	* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
				77	* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
Georgios Pinitas	89d7173	2018-10-29 20:07:15 +0000	[diff] [blame]	78	* @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes)
				79	* @param[in] input_step_w input_stride_w * number of elements along W processed per workitem(in bytes)
Michalis Spyrou	e74b201	2018-04-18 09:49:16 +0100	[diff] [blame]	80	* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
				81	* @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr
Georgios Pinitas	89d7173	2018-10-29 20:07:15 +0000	[diff] [blame]	82	* @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
Michalis Spyrou	e74b201	2018-04-18 09:49:16 +0100	[diff] [blame]	83	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
Georgios Pinitas	89d7173	2018-10-29 20:07:15 +0000	[diff] [blame]	84	* @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
Michalis Spyrou	e74b201	2018-04-18 09:49:16 +0100	[diff] [blame]	85	* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
Georgios Pinitas	89d7173	2018-10-29 20:07:15 +0000	[diff] [blame]	86	* @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes)
Michalis Spyrou	e74b201	2018-04-18 09:49:16 +0100	[diff] [blame]	87	* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
Georgios Pinitas	89d7173	2018-10-29 20:07:15 +0000	[diff] [blame]	88	* @param[in] output_stride_w Stride of the destination tensor in W dimension (in bytes)
				89	* @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
Michalis Spyrou	e74b201	2018-04-18 09:49:16 +0100	[diff] [blame]	90	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
				91	*/
				92	__kernel void pooling_layer_MxN_quantized_nhwc(
Georgios Pinitas	89d7173	2018-10-29 20:07:15 +0000	[diff] [blame]	93	TENSOR4D_DECLARATION(input),
				94	TENSOR4D_DECLARATION(output))
Michalis Spyrou	e74b201	2018-04-18 09:49:16 +0100	[diff] [blame]	95	{
Gian Marco Iodice	7333e1f	2020-10-08 10:25:49 +0100	[diff] [blame]	96	// Note: If C is not multiple of VEC_SIZE, we shift back of VEC_SIZE_LEFTOVER elements to compute the leftover elements for get_global_id(0) == 0
				97	// Note: If C is less than VEC_SIZE, VEC_SIZE should be SHRINKED to the closest smaller VEC_SIZE. This operation is performed on the host side
Giorgio Arena	2d1a835	2020-10-26 15:04:08 +0000	[diff] [blame]	98	int offset_c = max((int)(get_global_id(0) * VEC_SIZE - (VEC_SIZE - VEC_SIZE_LEFTOVER) % VEC_SIZE), 0) * sizeof(DATA_TYPE);
Gian Marco Iodice	7333e1f	2020-10-08 10:25:49 +0100	[diff] [blame]	99	int idx_out_w = get_global_id(1);
				100	#if DST_BATCH_SIZE != 1
				101	// If batch size != 1, the batch size dimension is collapsed over the height dimension
				102	int idx_out_h = get_global_id(2) % DST_HEIGHT;
				103	int idx_out_n = get_global_id(2) / DST_HEIGHT;
Giorgio Arena	2d1a835	2020-10-26 15:04:08 +0000	[diff] [blame]	104	#else //DST_BATCH_SIZE != 1
				105	int idx_out_h = get_global_id(2);
				106	int idx_out_n = 0;
Gian Marco Iodice	7333e1f	2020-10-08 10:25:49 +0100	[diff] [blame]	107	#endif // DST_BATCH_SIZE != 1
Michalis Spyrou	e74b201	2018-04-18 09:49:16 +0100	[diff] [blame]	108
Giorgio Arena	2d1a835	2020-10-26 15:04:08 +0000	[diff] [blame]	109	int idx_in_w = idx_out_w * STRIDE_X - PAD_X;
				110	int idx_in_h = idx_out_h * STRIDE_Y - PAD_Y;
Michalis Spyrou	e74b201	2018-04-18 09:49:16 +0100	[diff] [blame]	111
Giorgio Arena	2d1a835	2020-10-26 15:04:08 +0000	[diff] [blame]	112	__global unsigned char in_base_ptr = input_ptr + input_offset_first_element_in_bytes + offset_c + idx_out_n input_stride_w;
Michalis Spyrou	e74b201	2018-04-18 09:49:16 +0100	[diff] [blame]	113
Giorgio Arena	2d1a835	2020-10-26 15:04:08 +0000	[diff] [blame]	114	__global unsigned char out_base_ptr = output_ptr + output_offset_first_element_in_bytes + offset_c + idx_out_w output_stride_y + idx_out_h * output_stride_z + idx_out_n * output_stride_w;
Gian Marco Iodice	7333e1f	2020-10-08 10:25:49 +0100	[diff] [blame]	115
				116	int pool_x_s = max((int)0, -idx_in_w);
				117	int pool_x_e = min((int)POOL_SIZE_X, (int)SRC_WIDTH - idx_in_w);
				118	int pool_y_s = max((int)0, -idx_in_h);
				119	int pool_y_e = min((int)POOL_SIZE_Y, (int)SRC_HEIGHT - idx_in_h);
				120
				121	#if defined(POOL_AVG) && defined(EXCLUDE_PADDING)
				122	int filter_size = 0;
				123	#elif defined(POOL_AVG) && !defined(EXCLUDE_PADDING) // defined(POOL_AVG) && defined(EXCLUDE_PADDING)
				124	int filter_size = POOL_SIZE_X * POOL_SIZE_Y;
Giorgio Arena	2d1a835	2020-10-26 15:04:08 +0000	[diff] [blame]	125	#endif // defined(POOL_AVG) && !defined(EXCLUDE_PADDING)
Gian Marco Iodice	7333e1f	2020-10-08 10:25:49 +0100	[diff] [blame]	126
				127	VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE)
				128	res0 = INITIAL_VALUE;
				129
				130	for(int y = pool_y_s; y < pool_y_e; ++y)
Michalis Spyrou	e74b201	2018-04-18 09:49:16 +0100	[diff] [blame]	131	{
Gian Marco Iodice	7333e1f	2020-10-08 10:25:49 +0100	[diff] [blame]	132	for(int x = pool_x_s; x < pool_x_e; ++x)
Michalis Spyrou	e74b201	2018-04-18 09:49:16 +0100	[diff] [blame]	133	{
Giorgio Arena	2d1a835	2020-10-26 15:04:08 +0000	[diff] [blame]	134	VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
				135	data;
				136	VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE)
				137	data0;
Georgios Pinitas	89d7173	2018-10-29 20:07:15 +0000	[diff] [blame]	138
Giorgio Arena	2d1a835	2020-10-26 15:04:08 +0000	[diff] [blame]	139	data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE )(in_base_ptr + (x + idx_in_w) input_stride_y + (y + idx_in_h) * input_stride_z));
Gian Marco Iodice	7333e1f	2020-10-08 10:25:49 +0100	[diff] [blame]	140	data0 = CONVERT(data, VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE));
Georgios Pinitas	89d7173	2018-10-29 20:07:15 +0000	[diff] [blame]	141
Gian Marco Iodice	7333e1f	2020-10-08 10:25:49 +0100	[diff] [blame]	142	res0 = POOL_OP(res0, data0);
				143
				144	#if defined(POOL_AVG) && defined(EXCLUDE_PADDING)
				145	filter_size++;
				146	#endif // defined(POOL_AVG) && defined(EXCLUDE_PADDING)
Michalis Spyrou	e74b201	2018-04-18 09:49:16 +0100	[diff] [blame]	147	}
				148	}
				149
				150	#if defined(POOL_AVG)
Gian Marco Iodice	7333e1f	2020-10-08 10:25:49 +0100	[diff] [blame]	151	res0 = (res0 + (VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE))(filter_size >> 1)) / filter_size;
				152	#endif // defined(POOL_AVG)
Michalis Spyrou	e74b201	2018-04-18 09:49:16 +0100	[diff] [blame]	153
Giorgio Arena	2d1a835	2020-10-26 15:04:08 +0000	[diff] [blame]	154	VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
				155	out_q0 = CONVERT(res0, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE));
Pablo Tello	a52e4cf	2019-04-01 14:55:18 +0100	[diff] [blame]	156	#if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT)
Gian Marco Iodice	7333e1f	2020-10-08 10:25:49 +0100	[diff] [blame]	157	REQUANTIZE(VEC_SIZE, out_q0, OFFSET_IN1, OFFSET_OUT, SCALE_IN1, SCALE_OUT, out_q0);
Pablo Tello	a52e4cf	2019-04-01 14:55:18 +0100	[diff] [blame]	158	#endif /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */
				159
Michalis Spyrou	e74b201	2018-04-18 09:49:16 +0100	[diff] [blame]	160	// Store result
Gian Marco Iodice	7333e1f	2020-10-08 10:25:49 +0100	[diff] [blame]	161	STORE_VECTOR_SELECT(out_q, DATA_TYPE, out_base_ptr, VEC_SIZE, VEC_SIZE_LEFTOVER, ((VEC_SIZE_LEFTOVER != 0) && get_global_id(0) == 0));
Pablo Tello	a52e4cf	2019-04-01 14:55:18 +0100	[diff] [blame]	162	}
Giorgio Arena	2d1a835	2020-10-26 15:04:08 +0000	[diff] [blame]	163	#endif // defined(VEC_SIZE) && defined(VEC_SIZE_LEFTOVER) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) && defined(DST_CHANNELS) && defined(DST_HEIGHT) && defined(DST_BATCH_SIZE) && defined(ACC_DATA_TYPE)
Gian Marco Iodice	7333e1f	2020-10-08 10:25:49 +0100	[diff] [blame]	164	#endif // defined(DATA_TYPE) && defined(INITIAL_VALUE)