Blame - src/core/CL/cl_kernels/direct_convolution1x1.cl - ml/ComputeLibrary

blob: 2aa999a80f02be8d42ba614a7555eb21afc0c298 [file] [log] [blame]

SiCong Li	c51b72f	2017-07-28 14:46:20 +0100	[diff] [blame]	1	/*
				2	* Copyright (c) 2016, 2017 ARM Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "helpers.h"
				25
Michalis Spyrou	def665a	2017-08-14 11:26:37 +0100	[diff] [blame]	26	#if defined(FIXED_POINT_POSITION)
				27	#include "fixed_point.h"
				28
				29	#define ADD_OP(a, b) ADD_SAT_OP_EXPAND((a), (b), DATA_TYPE_PROMOTED, 8)
				30	#define MUL_OP(a, b) MUL_SAT_OP_EXPAND(CONVERT((a), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 8)), CONVERT((b), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 8)), DATA_TYPE_PROMOTED, 8, FIXED_POINT_POSITION)
				31
				32	// There is no need to have a larger intermediate type for qs32 because all the arguments are already promoted
				33	MULQ_SAT_IMPL(qs32x8, qs32x8)
				34
				35	#else /* FIXED_POINT_POSITION */
steniu01	db00668	2017-08-09 16:26:22 +0100	[diff] [blame^]	36	#undef CONVERT_SAT
Michalis Spyrou	def665a	2017-08-14 11:26:37 +0100	[diff] [blame]	37
				38	#define ADD_OP(a, b) ((a) + (b))
				39	#define MUL_OP(a, b) ((a) * (b))
				40	#define CONVERT_SAT(a, b) ((a))
				41
				42	#endif /* FIXED_POINT_POSITION */
				43
SiCong Li	c51b72f	2017-07-28 14:46:20 +0100	[diff] [blame]	44	#if STRIDE_X == 3
				45	#define INPUT_PIXEL_STR(data_size) extract_input_stride3_##data_size
				46	#define INPUT_PIXEL(data_size) INPUT_PIXEL_STR(data_size)
				47	#elif STRIDE_X == 2
				48	#define INPUT_PIXEL(data_size) extract_input_stride2
				49	#elif STRIDE_X == 1
				50	#define INPUT_PIXEL(data_size) extract_input_stride1
				51	#else /* STRIDE_X not equals 1, 2 or 3 */
				52	#error "Only support strides 1, 2 and 3"
				53	#endif /* STRIDE_X == 3 */
				54
				55	/** Extracts a 1D horizontal vector from the input tensor with stride as 1.
				56	*
				57	* @param[in] input_pixel Pointer to the first pixel.
				58	*
				59	* @return extracted input pixels.
				60	*/
				61	inline VEC_DATA_TYPE(DATA_TYPE, 8) extract_input_stride1(__global const DATA_TYPE *input_pixel)
				62	{
				63	return vload8(0, input_pixel);
				64	}
				65
				66	/** Extracts a 1D horizontal vector from the input tensor with stride as 2.
				67	*
				68	* @param[in] input_pixel Pointer to the first pixel.
				69	*
				70	* @return extracted input pixels.
				71	*/
				72	inline VEC_DATA_TYPE(DATA_TYPE, 8) extract_input_stride2(__global const DATA_TYPE *input_pixel)
				73	{
				74	VEC_DATA_TYPE(DATA_TYPE, 16)
				75	temp = vload16(0, input_pixel);
				76	return temp.s02468ace;
				77	}
				78
				79	/** Extracts a 1D horizontal vector from the input tensor with stride as 3 and 32-bit data size.
				80	*
				81	* @param[in] input_pixel Pointer to the first pixel.
				82	*
				83	* @return extracted input pixels.
				84	*/
				85	inline VEC_DATA_TYPE(DATA_TYPE, 8) extract_input_stride3_32(__global const DATA_TYPE *input_pixel)
				86	{
				87	VEC_DATA_TYPE(DATA_TYPE, 4)
				88	temp1 = vload4(0, input_pixel);
				89	VEC_DATA_TYPE(DATA_TYPE, 4)
				90	temp2 = vload4(0, input_pixel + 6);
				91	VEC_DATA_TYPE(DATA_TYPE, 4)
				92	temp3 = vload4(0, input_pixel + 12);
				93	VEC_DATA_TYPE(DATA_TYPE, 4)
				94	temp4 = vload4(0, input_pixel + 18);
				95	return (VEC_DATA_TYPE(DATA_TYPE, 8))(temp1.s03, temp2.s03, temp3.s03, temp4.s03);
				96	}
				97
				98	/** Extracts a 1D horizontal vector from the input tensor with stride as 3 and 16-bit data size.
				99	*
				100	* @param[in] input_pixel Pointer to the first pixel.
				101	*
				102	* @return extracted input pixels.
				103	*/
				104	inline VEC_DATA_TYPE(DATA_TYPE, 8) extract_input_stride3_16(__global const DATA_TYPE *input_pixel)
				105	{
				106	VEC_DATA_TYPE(DATA_TYPE, 8)
				107	temp1 = vload8(0, input_pixel);
				108	VEC_DATA_TYPE(DATA_TYPE, 8)
				109	temp2 = vload8(0, input_pixel + 8);
				110	VEC_DATA_TYPE(DATA_TYPE, 8)
				111	temp3 = vload8(0, input_pixel + 16);
				112	return (VEC_DATA_TYPE(DATA_TYPE, 8))(temp1.s036, temp2.s147, temp3.s25);
				113	}
				114
				115	/** Extracts a 1D horizontal vector from the input tensor with stride as 3 and 8-bit data size.
				116	*
				117	* @param[in] input_pixel Pointer to the first pixel.
				118	*
				119	* @return extracted input pixels.
				120	*/
				121	inline VEC_DATA_TYPE(DATA_TYPE, 8) extract_input_stride3_8(__global const DATA_TYPE *input_pixel)
				122	{
				123	VEC_DATA_TYPE(DATA_TYPE, 16)
				124	temp1 = vload16(0, input_pixel);
				125	VEC_DATA_TYPE(DATA_TYPE, 16)
				126	temp2 = vload16(0, input_pixel + 12);
				127	return (VEC_DATA_TYPE(DATA_TYPE, 8))(temp1.s0369, temp2.s0369);
				128	}
				129
				130	/** This kernel performs a direct convolution to convolve the low three dimensions.
				131	*
				132	* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
				133	* @note The data size must be passed at compile time using -DDATA_SIZE e.g. -DDATA_SIZE=32
Gian Marco Iodice	5cb4d6a	2017-08-08 10:53:00 +0100	[diff] [blame]	134	* @note The convolution stride x must be passed at compile time using -DSTRIDE_X e.g. -DSTRIDE_X=1
				135	* @note The third dimensions of the weights tensors must be passed at compile time using -DWEIGHTS_DEPTH
SiCong Li	c51b72f	2017-07-28 14:46:20 +0100	[diff] [blame]	136	* @note In case biases will be added to the convolution -DHAS_BIAS has to be passed to append the final matrix with 1 in each row.
				137	*
Gian Marco Iodice	5cb4d6a	2017-08-08 10:53:00 +0100	[diff] [blame]	138	* @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32
SiCong Li	c51b72f	2017-07-28 14:46:20 +0100	[diff] [blame]	139	* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
				140	* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
				141	* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
				142	* @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
				143	* @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
				144	* @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)
				145	* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
				146	* @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr
				147	* @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)
				148	* @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
				149	* @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
				150	* @param[in] dst_step_y dst_stride_y * number of elements along Z processed per workitem(in bytes)
				151	* @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
				152	* @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes)
				153	* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
				154	* @param[out] weights_ptr Pointer to the weights tensor. Supported data types: same as @p weights_ptr
				155	* @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes)
				156	* @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes)
				157	* @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes)
				158	* @param[in] weights_step_y weights_stride_y * number of elements along y processed per workitem(in bytes)
				159	* @param[in] weights_stride_z Stride of the weights tensor in Z dimension (in bytes)
				160	* @param[in] weights_step_z weights_stride_z * number of elements along Z processed per workitem(in bytes)
				161	* @param[in] weights_offset_first_element_in_bytes The offset of the first element in the weights tensor
				162	* @param[in] biases_ptr Pointer to the biases tensor. Same as @p src_ptr
				163	* @param[in] biases_stride_x Stride of the biases tensor in X dimension (in bytes)
				164	* @param[in] biases_step_x biases_stride_x * number of elements along X processed per workitem(in bytes)
				165	* @param[in] biases_offset_first_element_in_bytes The offset of the first element in the biases tensor
Gian Marco Iodice	5cb4d6a	2017-08-08 10:53:00 +0100	[diff] [blame]	166	* @param[in] weights_stride_w Stride of the weights tensor in the 4th dimension
SiCong Li	c51b72f	2017-07-28 14:46:20 +0100	[diff] [blame]	167	*/
Gian Marco Iodice	5cb4d6a	2017-08-08 10:53:00 +0100	[diff] [blame]	168	#if defined(DATA_TYPE) && defined(DATA_SIZE) && defined(STRIDE_X) && defined(WEIGHTS_DEPTH)
SiCong Li	c51b72f	2017-07-28 14:46:20 +0100	[diff] [blame]	169	__kernel void direct_convolution1x1(
				170	TENSOR3D_DECLARATION(src),
				171	TENSOR3D_DECLARATION(dst),
				172	TENSOR3D_DECLARATION(weights),
				173	#ifdef HAS_BIAS
				174	VECTOR_DECLARATION(biases),
				175	#endif /* defined(HAS_BIAS) */
Gian Marco Iodice	5cb4d6a	2017-08-08 10:53:00 +0100	[diff] [blame]	176	unsigned int weights_stride_w)
SiCong Li	c51b72f	2017-07-28 14:46:20 +0100	[diff] [blame]	177	{
				178	Image src = CONVERT_TO_IMAGE_STRUCT(src);
				179	Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(weights);
				180	Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
				181
				182	#ifdef HAS_BIAS
				183	Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases);
				184	#endif /* defined(HAS_BIAS) */
				185
Michalis Spyrou	def665a	2017-08-14 11:26:37 +0100	[diff] [blame]	186	VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 8)
SiCong Li	c51b72f	2017-07-28 14:46:20 +0100	[diff] [blame]	187	pixels = 0;
				188
				189	const uint z_index = get_global_id(2);
				190
				191	weights.ptr += z_index * weights_stride_w;
				192
Gian Marco Iodice	5cb4d6a	2017-08-08 10:53:00 +0100	[diff] [blame]	193	for(int d = 0; d < WEIGHTS_DEPTH; ++d)
SiCong Li	c51b72f	2017-07-28 14:46:20 +0100	[diff] [blame]	194	{
				195	DATA_TYPE weight = (__global DATA_TYPE )weights.ptr;
				196	VEC_DATA_TYPE(DATA_TYPE, 8)
				197	input_pixel = INPUT_PIXEL(DATA_SIZE)((__global DATA_TYPE *)src.ptr);
Michalis Spyrou	def665a	2017-08-14 11:26:37 +0100	[diff] [blame]	198	pixels = ADD_OP(pixels, MUL_OP((VEC_DATA_TYPE(DATA_TYPE, 8))weight, input_pixel));
SiCong Li	c51b72f	2017-07-28 14:46:20 +0100	[diff] [blame]	199	src.ptr += src_stride_z;
				200	weights.ptr += weights_stride_z;
				201	}
				202
				203	#ifdef HAS_BIAS
Michalis Spyrou	def665a	2017-08-14 11:26:37 +0100	[diff] [blame]	204	pixels = ADD_OP(pixels, (VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 8)) * ((__global DATA_TYPE *)(vector_offset(&biases, z_index))));
SiCong Li	c51b72f	2017-07-28 14:46:20 +0100	[diff] [blame]	205	#endif /* defined(HAS_BIAS) */
				206
Michalis Spyrou	def665a	2017-08-14 11:26:37 +0100	[diff] [blame]	207	vstore8(CONVERT_SAT(pixels, VEC_DATA_TYPE(DATA_TYPE, 8)), 0, (__global DATA_TYPE *)dst.ptr);
SiCong Li	c51b72f	2017-07-28 14:46:20 +0100	[diff] [blame]	208	}
steniu01	db00668	2017-08-09 16:26:22 +0100	[diff] [blame^]	209	#endif // defined(DATA_TYPE) && defined(DATA_SIZE) && defined(STRIDE_X) && defined(WEIGHTS_DEPTH)