blob: d8f56c093a6c0bd83078d781f083008c004cb005 [file] [log] [blame]
Michel Iwaniec00633802017-10-12 14:14:15 +01001/*
Michele Di Giorgiof6f78762020-07-06 11:27:21 +01002 * Copyright (c) 2016-2020 Arm Limited.
Michel Iwaniec00633802017-10-12 14:14:15 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Manuel Bottini30dbeef2019-06-26 16:23:03 +010024#include "activation_quant_helpers.h"
Michel Iwaniec00633802017-10-12 14:14:15 +010025
Michele Di Giorgiod304e802018-07-06 10:17:33 +010026#define VEC_FLOAT VEC_DATA_TYPE(float, VEC_SIZE)
Michel Iwaniec00633802017-10-12 14:14:15 +010027
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010028#if defined(FLOAT_DOMAIN)
29// Activations performed in the float domain
30
31#include "activation_float_helpers.h"
32
Manuel Bottini30dbeef2019-06-26 16:23:03 +010033/** This performs an activation function on quantized inputs with float transformations.
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010034 *
35 * @note In order to perform the activation function "in-place", the pre-processor -DIN_PLACE must be passed at compile time
36 *
37 * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
38 * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
39 * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively.
40 * @note Quantization scales of the input/output tensors are passed in with -DS1_VAL= and -DS2_VAL= respectively.
Manuel Bottini30dbeef2019-06-26 16:23:03 +010041 * @note Quantization offsets of the input/output tensors are passed in only if asymmetric with -DO1_VAL= and -DO2_VAL= respectively.
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010042 * @note Quantized value of constant zero should be given as a preprocessor argument using -DCONST_0=value. e.g. -DCONST_0=128.
43 *
Michele Di Giorgiof6f78762020-07-06 11:27:21 +010044 * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8/QASYMM8_SIGNED/QSYMM16
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010045 * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
46 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
47 * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
48 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
49 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
50 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
51 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
52 * @param[out] output_ptr (Optional) Pointer to the destination image. Supported data types: same as @p input_ptr
53 * @param[in] output_stride_x (Optional) Stride of the destination image in X dimension (in bytes)
54 * @param[in] output_step_x (Optional) output_stride_x * number of elements along X processed per workitem(in bytes)
55 * @param[in] output_stride_y (Optional) Stride of the destination image in Y dimension (in bytes)
56 * @param[in] output_step_y (Optional) output_stride_y * number of elements along Y processed per workitem(in bytes)
57 * @param[in] output_stride_z (Optional) Stride of the source tensor in Z dimension (in bytes)
58 * @param[in] output_step_z (Optional) output_stride_z * number of elements along Z processed per workitem(in bytes)
59 * @param[in] output_offset_first_element_in_bytes (Optional) The offset of the first element in the destination image
60 */
Manuel Bottini30dbeef2019-06-26 16:23:03 +010061__kernel void activation_layer_quant_f32(
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010062 TENSOR3D_DECLARATION(input)
63#ifndef IN_PLACE
64 ,
65 TENSOR3D_DECLARATION(output)
66#endif /* not IN_PLACE */
67)
68{
Giorgio Arenad304adb2020-10-02 10:20:11 +010069 uint x_offs = max((int)(get_global_id(0) * VEC_SIZE * sizeof(DATA_TYPE) - (VEC_SIZE - VEC_SIZE_LEFTOVER) % VEC_SIZE * sizeof(DATA_TYPE)), 0);
70
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010071 // Get pixels pointer
Giorgio Arenad304adb2020-10-02 10:20:11 +010072 __global uchar *input_addr = input_ptr + input_offset_first_element_in_bytes + x_offs + get_global_id(1) * input_stride_y + get_global_id(2) * input_stride_z;
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010073#ifdef IN_PLACE
Giorgio Arenad304adb2020-10-02 10:20:11 +010074 __global uchar *output_addr = input_addr;
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010075#else /* IN_PLACE */
Giorgio Arenad304adb2020-10-02 10:20:11 +010076 __global uchar *output_addr = output_ptr + output_offset_first_element_in_bytes + x_offs + get_global_id(1) * output_stride_y + get_global_id(2) * output_stride_z;
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010077#endif /* IN_PLACE */
78
79 // Load data
Giorgio Arenad304adb2020-10-02 10:20:11 +010080 TYPE data0 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input_addr);
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010081
Giorgio Arenad304adb2020-10-02 10:20:11 +010082 VEC_FLOAT data_flt = CONVERT(data0, VEC_FLOAT);
Manuel Bottini30dbeef2019-06-26 16:23:03 +010083#if defined(O1_VAL)
84 data_flt = round(data_flt - (float)O1_VAL) * ((float)S1_VAL);
85#else // defined(O1_VAL)
Giorgio Arenad304adb2020-10-02 10:20:11 +010086 data_flt = round(data_flt) * ((float)S1_VAL);
Manuel Bottini30dbeef2019-06-26 16:23:03 +010087#endif // defined(O1_VAL)
88 data_flt = ACTIVATION(ACT, float, data_flt, A_VAL, B_VAL);
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010089
Manuel Bottini30dbeef2019-06-26 16:23:03 +010090#if defined(O2_VAL)
Giorgio Arenad304adb2020-10-02 10:20:11 +010091 data0 = CONVERT_SAT(round(data_flt / ((float)S2_VAL)) + (float)O2_VAL, TYPE);
Manuel Bottini30dbeef2019-06-26 16:23:03 +010092#else // defined(O2_VAL)
Giorgio Arenad304adb2020-10-02 10:20:11 +010093 data0 = CONVERT_SAT(round(data_flt / ((float)S2_VAL)), TYPE);
Manuel Bottini30dbeef2019-06-26 16:23:03 +010094#endif // defined(O2_VAL)
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010095
96 // Store result
Giorgio Arenad304adb2020-10-02 10:20:11 +010097 STORE_VECTOR_SELECT(data, DATA_TYPE, output_addr, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0)
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010098}
99
100#else // defined(FLOAT_DOMAIN)
101// Activations performed in the quantized domain
102
Giorgio Arena99ac60b2018-02-16 15:17:23 +0000103#if defined(ACT)
Manuel Bottini30dbeef2019-06-26 16:23:03 +0100104/** This performs an activation function on quantized inputs.
Michel Iwaniec00633802017-10-12 14:14:15 +0100105 *
106 * @note In order to perform the activation function "in-place", the pre-processor -DIN_PLACE must be passed at compile time
107 *
108 * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
109 * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
110 * @note Activation function should be given as a preprocessor argument using -DACT=name. e.g. -DACT=TANH
111 * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively.
112 * @note Quantization scales of the input/output tensors are passed in with -DS1_VAL= and -DS2_VAL= respectively.
113 * @note Quantization offsets of the input/output tensors are passed in with -DO1_VAL= and -DO2_VAL= respectively.
Michele Di Giorgioa1f7e332018-01-22 17:26:36 +0000114 * @note Quantized value of constant zero should be given as a preprocessor argument using -DCONST_0=value. e.g. -DCONST_0=128.
Michel Iwaniec00633802017-10-12 14:14:15 +0100115 *
Michele Di Giorgiof6f78762020-07-06 11:27:21 +0100116 * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8/QASYMM8_SIGNED/QSYMM16
Michel Iwaniec00633802017-10-12 14:14:15 +0100117 * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
118 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
119 * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
120 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
121 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
122 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
123 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
Michele Di Giorgioe5bf4c52019-02-14 17:47:33 +0000124 * @param[out] output_ptr (Optional) Pointer to the destination image. Supported data types: same as @p input_ptr
125 * @param[in] output_stride_x (Optional) Stride of the destination image in X dimension (in bytes)
126 * @param[in] output_step_x (Optional) output_stride_x * number of elements along X processed per workitem(in bytes)
127 * @param[in] output_stride_y (Optional) Stride of the destination image in Y dimension (in bytes)
128 * @param[in] output_step_y (Optional) output_stride_y * number of elements along Y processed per workitem(in bytes)
129 * @param[in] output_stride_z (Optional) Stride of the source tensor in Z dimension (in bytes)
130 * @param[in] output_step_z (Optional) output_stride_z * number of elements along Z processed per workitem(in bytes)
131 * @param[in] output_offset_first_element_in_bytes (Optional) The offset of the first element in the destination image
Michel Iwaniec00633802017-10-12 14:14:15 +0100132 */
Manuel Bottini30dbeef2019-06-26 16:23:03 +0100133__kernel void activation_layer_quant(
Michel Iwaniec00633802017-10-12 14:14:15 +0100134 TENSOR3D_DECLARATION(input)
135#ifndef IN_PLACE
136 ,
137 TENSOR3D_DECLARATION(output)
138#endif /* not IN_PLACE */
139)
140{
Giorgio Arenad304adb2020-10-02 10:20:11 +0100141 uint x_offs = max((int)(get_global_id(0) * VEC_SIZE * sizeof(DATA_TYPE) - (VEC_SIZE - VEC_SIZE_LEFTOVER) % VEC_SIZE * sizeof(DATA_TYPE)), 0);
142
Michel Iwaniec00633802017-10-12 14:14:15 +0100143 // Get pixels pointer
Giorgio Arenad304adb2020-10-02 10:20:11 +0100144 __global uchar *input_addr = input_ptr + input_offset_first_element_in_bytes + x_offs + get_global_id(1) * input_stride_y + get_global_id(2) * input_stride_z;
Michel Iwaniec00633802017-10-12 14:14:15 +0100145#ifdef IN_PLACE
Giorgio Arenad304adb2020-10-02 10:20:11 +0100146 __global uchar *output_addr = input_addr;
Michel Iwaniec00633802017-10-12 14:14:15 +0100147#else /* IN_PLACE */
Giorgio Arenad304adb2020-10-02 10:20:11 +0100148 __global uchar *output_addr = output_ptr + output_offset_first_element_in_bytes + x_offs + get_global_id(1) * output_stride_y + get_global_id(2) * output_stride_z;
Michel Iwaniec00633802017-10-12 14:14:15 +0100149#endif /* IN_PLACE */
150
151 // Load data
Giorgio Arenad304adb2020-10-02 10:20:11 +0100152 TYPE data0 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input_addr);
Michel Iwaniec00633802017-10-12 14:14:15 +0100153
Giorgio Arenad304adb2020-10-02 10:20:11 +0100154 data0 = PERFORM_ACTIVATION_QUANT(ACT, data0);
Michel Iwaniec00633802017-10-12 14:14:15 +0100155
156 // Store result
Giorgio Arenad304adb2020-10-02 10:20:11 +0100157 STORE_VECTOR_SELECT(data, DATA_TYPE, output_addr, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0)
Michel Iwaniec00633802017-10-12 14:14:15 +0100158}
Georgios Pinitas4b3fba12019-06-04 17:31:46 +0100159#endif // defined(ACT)
160#endif // defined(FLOAT_DOMAIN)