Blame - src/core/CL/cl_kernels/activation_layer_qa8.cl - ml/ComputeLibrary

blob: 4d9bf0efadae0bdbc9d107dde099bd4250600ee9 [file] [log] [blame]

Michel Iwaniec	0063380	2017-10-12 14:14:15 +0100	[diff] [blame]	1	/*
				2	* Copyright (c) 2016, 2017 ARM Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "helpers.h"
				25
				26	#define TYPE VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
				27
				28	// Bounded RELU Activation
				29	inline TYPE brelu_op(TYPE x)
				30	{
				31	return min((TYPE)A_VAL, max(0, x));
				32	}
				33	// Lower Upper Bounded RELU Activation
				34	inline TYPE lu_brelu_op(TYPE x)
				35	{
				36	return min(max(x, (TYPE)B_VAL), (TYPE)A_VAL);
				37	}
				38
				39	#define ACTIVATION_OP2(op, x) op##_op(x)
				40	#define ACTIVATION_OP(op, x) ACTIVATION_OP2(op, x)
				41
				42	/** This performs an activation function on QASYMM8 inputs.
				43	*
				44	* @note In order to perform the activation function "in-place", the pre-processor -DIN_PLACE must be passed at compile time
				45	*
				46	* @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
				47	* @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
				48	* @note Activation function should be given as a preprocessor argument using -DACT=name. e.g. -DACT=TANH
				49	* @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively.
				50	* @note Quantization scales of the input/output tensors are passed in with -DS1_VAL= and -DS2_VAL= respectively.
				51	* @note Quantization offsets of the input/output tensors are passed in with -DO1_VAL= and -DO2_VAL= respectively.
				52	*
				53	* @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8
				54	* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
				55	* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
				56	* @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
				57	* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
				58	* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
				59	* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
				60	* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
				61	* @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr
				62	* @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
				63	* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
				64	* @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
				65	* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
				66	* @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
				67	* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
				68	* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
				69	*/
				70	__kernel void activation_layer_qa8(
				71	TENSOR3D_DECLARATION(input)
				72	#ifndef IN_PLACE
				73	,
				74	TENSOR3D_DECLARATION(output)
				75	#endif /* not IN_PLACE */
				76	)
				77	{
				78	// Get pixels pointer
				79	Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
				80	#ifdef IN_PLACE
				81	Tensor3D output = input;
				82	#else /* IN_PLACE */
				83	Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
				84	#endif /* IN_PLACE */
				85
				86	// Load data
				87	TYPE data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr);
				88
				89	// Perform activation
				90	data = ACTIVATION_OP(ACT, data);
				91
				92	// requantize to output space
				93	float16 fdata = convert_float16(data);
				94	fdata = round((fdata - O1_VAL) * (S1_VAL / S2_VAL) + O2_VAL);
				95	uchar16 qdata = convert_uchar16(fdata);
				96
				97	// Store result
				98	VSTORE(VEC_SIZE)
				99	(qdata, 0, (__global DATA_TYPE *)output.ptr);
				100	}