Blame - src/core/CL/cl_kernels/pixelwise_mul_float.cl - ml/ComputeLibrary

blob: 89367dc0ce2716dd6f86e1dc3bd8cf278ff83ee3 [file] [log] [blame]

Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1	/*
				2	* Copyright (c) 2016, 2017 ARM Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "helpers.h"
				25
				26	#ifdef SATURATE
				27	#define CONVERT_OP_FLOAT_STR(x, type, round) (convert_##type##_sat##round(x))
Anthony Barbier	ac69aa1	2017-07-03 17:39:37 +0100	[diff] [blame^]	28	#else /* SATURATE */
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	29	#define CONVERT_OP_FLOAT_STR(x, type, round) (convert_##type##round(x))
Anthony Barbier	ac69aa1	2017-07-03 17:39:37 +0100	[diff] [blame^]	30	#endif /* SATURATE */
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	31	#define CONVERT_OP_FLOAT(x, type, round) CONVERT_OP_FLOAT_STR(x, type, round)
				32
				33	/** Performs a pixelwise multiplication with float scale of either integer or float inputs.
				34	*
				35	* @attention The inputs and output data types need to be passed at compile time using -DDATA_TYPE_IN1, -DDATA_TYPE_IN2 and -DDATA_TYPE_OUT:
				36	* e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=ushort -DDATA_TYPE_OUT=short
				37	* @attention The data type of the intermediate result of the multiplication should passed as well using -DDATA_TYPE_RES.
				38	* e.g. If one of inputs is S16 -DDATA_TYPE_RES=int should be passed else -DDATA_TYPE_RES=short.
				39	* @attention -DDATA_TYPE_FLOAT must be passed if floating point inputs are provided.
				40	*
				41	* @param[in] in1_ptr Pointer to the source image. Supported data types: U8, S16, F16, F32
				42	* @param[in] in1_stride_x Stride of the source image in X dimension (in bytes)
				43	* @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes)
				44	* @param[in] in1_stride_y Stride of the source image in Y dimension (in bytes)
				45	* @param[in] in1_step_y in1_stride_y * number of elements along Y processed per workitem(in bytes)
				46	* @param[in] in1_offset_first_element_in_bytes The offset of the first element in the source image
				47	* @param[in] in2_ptr Pointer to the source image. Supported data types: U8, S16, F16, F32
				48	* @param[in] in2_stride_x Stride of the source image in X dimension (in bytes)
				49	* @param[in] in2_step_x in2_stride_x * number of elements along X processed per workitem(in bytes)
				50	* @param[in] in2_stride_y Stride of the source image in Y dimension (in bytes)
				51	* @param[in] in2_step_y in2_stride_y * number of elements along Y processed per workitem(in bytes)
				52	* @param[in] in2_offset_first_element_in_bytes The offset of the first element in the source image
				53	* @param[out] out_ptr Pointer to the destination image. Supported data types: U8, S16, F16, F32
				54	* @param[in] out_stride_x Stride of the destination image in X dimension (in bytes)
				55	* @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes)
				56	* @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes)
				57	* @param[in] out_step_y out_stride_y * number of elements along Y processed per workitem(in bytes)
				58	* @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination image
				59	* @param[in] scale Float scaling factor. Supported data types: F32
				60	*/
				61	__kernel void pixelwise_mul_float(
				62	IMAGE_DECLARATION(in1),
				63	IMAGE_DECLARATION(in2),
				64	IMAGE_DECLARATION(out),
				65	const float scale)
				66	{
				67	// Get pixels pointer
				68	Image in1 = CONVERT_TO_IMAGE_STRUCT(in1);
				69	Image in2 = CONVERT_TO_IMAGE_STRUCT(in2);
				70	Image out = CONVERT_TO_IMAGE_STRUCT(out);
				71
				72	// Load data
				73	VEC_DATA_TYPE(DATA_TYPE_RES, 16)
				74	in1_data = CONVERT(vload16(0, (__global DATA_TYPE_IN1 *)in1.ptr), VEC_DATA_TYPE(DATA_TYPE_RES, 16));
				75	VEC_DATA_TYPE(DATA_TYPE_RES, 16)
				76	in2_data = CONVERT(vload16(0, (__global DATA_TYPE_IN2 *)in2.ptr), VEC_DATA_TYPE(DATA_TYPE_RES, 16));
				77
				78	// Perform multiplication
Anthony Barbier	ac69aa1	2017-07-03 17:39:37 +0100	[diff] [blame^]	79	#ifdef DATA_TYPE_FLOAT
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	80	VEC_DATA_TYPE(DATA_TYPE_OUT, 16)
				81	res = CONVERT(in1_data * in2_data * scale, VEC_DATA_TYPE(DATA_TYPE_OUT, 16));
Anthony Barbier	ac69aa1	2017-07-03 17:39:37 +0100	[diff] [blame^]	82	#else /* DATA_TYPE_FLOAT */
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	83	VEC_DATA_TYPE(DATA_TYPE_OUT, 16)
				84	res = CONVERT_OP_FLOAT(CONVERT_OP_FLOAT((convert_float16(in1_data * in2_data) * scale), VEC_DATA_TYPE(DATA_TYPE_RES, 16), ROUND), VEC_DATA_TYPE(DATA_TYPE_OUT, 16), ROUND);
Anthony Barbier	ac69aa1	2017-07-03 17:39:37 +0100	[diff] [blame^]	85	#endif /* DATA_TYPE_FLOAT */
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	86
				87	// Store result
				88	vstore16(res, 0, (__global DATA_TYPE_OUT *)out.ptr);
				89	}