blob: c031c86a5e92408025e28cd51a1bcaaaaff8051f [file] [log] [blame]
Michel Iwaniec00633802017-10-12 14:14:15 +01001/*
Michele Di Giorgiof6f78762020-07-06 11:27:21 +01002 * Copyright (c) 2016-2020 Arm Limited.
Michel Iwaniec00633802017-10-12 14:14:15 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Manuel Bottini30dbeef2019-06-26 16:23:03 +010024#include "activation_quant_helpers.h"
Michel Iwaniec00633802017-10-12 14:14:15 +010025
Michele Di Giorgiod304e802018-07-06 10:17:33 +010026#define VEC_FLOAT VEC_DATA_TYPE(float, VEC_SIZE)
Michel Iwaniec00633802017-10-12 14:14:15 +010027
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010028#if defined(FLOAT_DOMAIN)
29// Activations performed in the float domain
30
31#include "activation_float_helpers.h"
32
Manuel Bottini30dbeef2019-06-26 16:23:03 +010033/** This performs an activation function on quantized inputs with float transformations.
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010034 *
35 * @note In order to perform the activation function "in-place", the pre-processor -DIN_PLACE must be passed at compile time
36 *
37 * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
38 * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
Giorgio Arena53048842020-10-07 16:03:43 +010039 * @note Leftover vector size has to be passed at compile time using -DVEC_SIZE_LEFTOVER. e.g. -DVEC_SIZE=3. It is defined as the remainder between the input's first dimension and VEC_SIZE
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010040 * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively.
41 * @note Quantization scales of the input/output tensors are passed in with -DS1_VAL= and -DS2_VAL= respectively.
Manuel Bottini30dbeef2019-06-26 16:23:03 +010042 * @note Quantization offsets of the input/output tensors are passed in only if asymmetric with -DO1_VAL= and -DO2_VAL= respectively.
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010043 * @note Quantized value of constant zero should be given as a preprocessor argument using -DCONST_0=value. e.g. -DCONST_0=128.
44 *
Michele Di Giorgiof6f78762020-07-06 11:27:21 +010045 * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8/QASYMM8_SIGNED/QSYMM16
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010046 * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
47 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
48 * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
49 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
50 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
51 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
52 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
53 * @param[out] output_ptr (Optional) Pointer to the destination image. Supported data types: same as @p input_ptr
54 * @param[in] output_stride_x (Optional) Stride of the destination image in X dimension (in bytes)
55 * @param[in] output_step_x (Optional) output_stride_x * number of elements along X processed per workitem(in bytes)
56 * @param[in] output_stride_y (Optional) Stride of the destination image in Y dimension (in bytes)
57 * @param[in] output_step_y (Optional) output_stride_y * number of elements along Y processed per workitem(in bytes)
58 * @param[in] output_stride_z (Optional) Stride of the source tensor in Z dimension (in bytes)
59 * @param[in] output_step_z (Optional) output_stride_z * number of elements along Z processed per workitem(in bytes)
60 * @param[in] output_offset_first_element_in_bytes (Optional) The offset of the first element in the destination image
61 */
Manuel Bottini30dbeef2019-06-26 16:23:03 +010062__kernel void activation_layer_quant_f32(
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010063 TENSOR3D_DECLARATION(input)
64#ifndef IN_PLACE
65 ,
66 TENSOR3D_DECLARATION(output)
67#endif /* not IN_PLACE */
68)
69{
Giorgio Arenad304adb2020-10-02 10:20:11 +010070 uint x_offs = max((int)(get_global_id(0) * VEC_SIZE * sizeof(DATA_TYPE) - (VEC_SIZE - VEC_SIZE_LEFTOVER) % VEC_SIZE * sizeof(DATA_TYPE)), 0);
71
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010072 // Get pixels pointer
Giorgio Arenad304adb2020-10-02 10:20:11 +010073 __global uchar *input_addr = input_ptr + input_offset_first_element_in_bytes + x_offs + get_global_id(1) * input_stride_y + get_global_id(2) * input_stride_z;
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010074#ifdef IN_PLACE
Giorgio Arenad304adb2020-10-02 10:20:11 +010075 __global uchar *output_addr = input_addr;
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010076#else /* IN_PLACE */
Giorgio Arenad304adb2020-10-02 10:20:11 +010077 __global uchar *output_addr = output_ptr + output_offset_first_element_in_bytes + x_offs + get_global_id(1) * output_stride_y + get_global_id(2) * output_stride_z;
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010078#endif /* IN_PLACE */
79
80 // Load data
Giorgio Arenad304adb2020-10-02 10:20:11 +010081 TYPE data0 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input_addr);
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010082
Giorgio Arenad304adb2020-10-02 10:20:11 +010083 VEC_FLOAT data_flt = CONVERT(data0, VEC_FLOAT);
Manuel Bottini30dbeef2019-06-26 16:23:03 +010084#if defined(O1_VAL)
85 data_flt = round(data_flt - (float)O1_VAL) * ((float)S1_VAL);
86#else // defined(O1_VAL)
Giorgio Arenad304adb2020-10-02 10:20:11 +010087 data_flt = round(data_flt) * ((float)S1_VAL);
Manuel Bottini30dbeef2019-06-26 16:23:03 +010088#endif // defined(O1_VAL)
Giorgio Arenad056e572020-10-12 11:53:51 +010089 data_flt = ACTIVATION(ACT, float, VEC_SIZE, data_flt, A_VAL, B_VAL);
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010090
Manuel Bottini30dbeef2019-06-26 16:23:03 +010091#if defined(O2_VAL)
Giorgio Arenad304adb2020-10-02 10:20:11 +010092 data0 = CONVERT_SAT(round(data_flt / ((float)S2_VAL)) + (float)O2_VAL, TYPE);
Manuel Bottini30dbeef2019-06-26 16:23:03 +010093#else // defined(O2_VAL)
Giorgio Arenad304adb2020-10-02 10:20:11 +010094 data0 = CONVERT_SAT(round(data_flt / ((float)S2_VAL)), TYPE);
Manuel Bottini30dbeef2019-06-26 16:23:03 +010095#endif // defined(O2_VAL)
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010096
97 // Store result
Giorgio Arenad304adb2020-10-02 10:20:11 +010098 STORE_VECTOR_SELECT(data, DATA_TYPE, output_addr, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0)
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010099}
100
101#else // defined(FLOAT_DOMAIN)
102// Activations performed in the quantized domain
103
Giorgio Arena99ac60b2018-02-16 15:17:23 +0000104#if defined(ACT)
Manuel Bottini30dbeef2019-06-26 16:23:03 +0100105/** This performs an activation function on quantized inputs.
Michel Iwaniec00633802017-10-12 14:14:15 +0100106 *
107 * @note In order to perform the activation function "in-place", the pre-processor -DIN_PLACE must be passed at compile time
108 *
109 * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
110 * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
Giorgio Arena53048842020-10-07 16:03:43 +0100111 * @note Leftover vector size has to be passed at compile time using -DVEC_SIZE_LEFTOVER. e.g. -DVEC_SIZE=3. It is defined as the remainder between the input's first dimension and VEC_SIZE
Michel Iwaniec00633802017-10-12 14:14:15 +0100112 * @note Activation function should be given as a preprocessor argument using -DACT=name. e.g. -DACT=TANH
113 * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively.
114 * @note Quantization scales of the input/output tensors are passed in with -DS1_VAL= and -DS2_VAL= respectively.
115 * @note Quantization offsets of the input/output tensors are passed in with -DO1_VAL= and -DO2_VAL= respectively.
Michele Di Giorgioa1f7e332018-01-22 17:26:36 +0000116 * @note Quantized value of constant zero should be given as a preprocessor argument using -DCONST_0=value. e.g. -DCONST_0=128.
Michel Iwaniec00633802017-10-12 14:14:15 +0100117 *
Michele Di Giorgiof6f78762020-07-06 11:27:21 +0100118 * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8/QASYMM8_SIGNED/QSYMM16
Michel Iwaniec00633802017-10-12 14:14:15 +0100119 * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
120 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
121 * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
122 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
123 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
124 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
125 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
Michele Di Giorgioe5bf4c52019-02-14 17:47:33 +0000126 * @param[out] output_ptr (Optional) Pointer to the destination image. Supported data types: same as @p input_ptr
127 * @param[in] output_stride_x (Optional) Stride of the destination image in X dimension (in bytes)
128 * @param[in] output_step_x (Optional) output_stride_x * number of elements along X processed per workitem(in bytes)
129 * @param[in] output_stride_y (Optional) Stride of the destination image in Y dimension (in bytes)
130 * @param[in] output_step_y (Optional) output_stride_y * number of elements along Y processed per workitem(in bytes)
131 * @param[in] output_stride_z (Optional) Stride of the source tensor in Z dimension (in bytes)
132 * @param[in] output_step_z (Optional) output_stride_z * number of elements along Z processed per workitem(in bytes)
133 * @param[in] output_offset_first_element_in_bytes (Optional) The offset of the first element in the destination image
Michel Iwaniec00633802017-10-12 14:14:15 +0100134 */
Manuel Bottini30dbeef2019-06-26 16:23:03 +0100135__kernel void activation_layer_quant(
Michel Iwaniec00633802017-10-12 14:14:15 +0100136 TENSOR3D_DECLARATION(input)
137#ifndef IN_PLACE
138 ,
139 TENSOR3D_DECLARATION(output)
140#endif /* not IN_PLACE */
141)
142{
Giorgio Arenad304adb2020-10-02 10:20:11 +0100143 uint x_offs = max((int)(get_global_id(0) * VEC_SIZE * sizeof(DATA_TYPE) - (VEC_SIZE - VEC_SIZE_LEFTOVER) % VEC_SIZE * sizeof(DATA_TYPE)), 0);
144
Michel Iwaniec00633802017-10-12 14:14:15 +0100145 // Get pixels pointer
Giorgio Arenad304adb2020-10-02 10:20:11 +0100146 __global uchar *input_addr = input_ptr + input_offset_first_element_in_bytes + x_offs + get_global_id(1) * input_stride_y + get_global_id(2) * input_stride_z;
Michel Iwaniec00633802017-10-12 14:14:15 +0100147#ifdef IN_PLACE
Giorgio Arenad304adb2020-10-02 10:20:11 +0100148 __global uchar *output_addr = input_addr;
Michel Iwaniec00633802017-10-12 14:14:15 +0100149#else /* IN_PLACE */
Giorgio Arenad304adb2020-10-02 10:20:11 +0100150 __global uchar *output_addr = output_ptr + output_offset_first_element_in_bytes + x_offs + get_global_id(1) * output_stride_y + get_global_id(2) * output_stride_z;
Michel Iwaniec00633802017-10-12 14:14:15 +0100151#endif /* IN_PLACE */
152
153 // Load data
Giorgio Arenad304adb2020-10-02 10:20:11 +0100154 TYPE data0 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input_addr);
Michel Iwaniec00633802017-10-12 14:14:15 +0100155
Giorgio Arenad304adb2020-10-02 10:20:11 +0100156 data0 = PERFORM_ACTIVATION_QUANT(ACT, data0);
Michel Iwaniec00633802017-10-12 14:14:15 +0100157
158 // Store result
Giorgio Arenad304adb2020-10-02 10:20:11 +0100159 STORE_VECTOR_SELECT(data, DATA_TYPE, output_addr, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0)
Michel Iwaniec00633802017-10-12 14:14:15 +0100160}
Georgios Pinitas4b3fba12019-06-04 17:31:46 +0100161#endif // defined(ACT)
162#endif // defined(FLOAT_DOMAIN)