blob: 9601dddf67fdb71455ac59e28976370949c06aeb [file] [log] [blame]
Giorgio Arena73023022018-09-04 14:55:55 +01001/*
Giorgio Arenad056e572020-10-12 11:53:51 +01002 * Copyright (c) 2018-2020 Arm Limited.
Giorgio Arena73023022018-09-04 14:55:55 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Giorgio Arenad056e572020-10-12 11:53:51 +010024#if defined(DATA_TYPE) && defined(ACTIVATION_TYPE) && defined(NUM_CLASSES) && defined(VEC_SIZE)
Usama Arif6a98a6e2019-05-10 17:07:27 +010025
26#include "activation_float_helpers.h"
Giorgio Arena73023022018-09-04 14:55:55 +010027
Giorgio Arena2d1a8352020-10-26 15:04:08 +000028#define SELECT_TYPE SELECT_VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
Giorgio Arenad056e572020-10-12 11:53:51 +010029
Giorgio Arena73023022018-09-04 14:55:55 +010030#if VEC_SIZE != 1
31#define TYPE VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
Giorgio Arena73023022018-09-04 14:55:55 +010032
Giorgio Arena73023022018-09-04 14:55:55 +010033/** This performs a YOLO partial activation function for NCHW data layout
34 *
35 * @note In order to perform the activation function "in-place", the pre-processor -DIN_PLACE must be passed at compile time
36 *
37 * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
38 * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
Usama Arif6a98a6e2019-05-10 17:07:27 +010039 * @note Activation function should be given as a preprocessor argument using -DACTIVATION_TYPE=name. e.g. -DACTIVATION_TYPE=TANH
Giorgio Arena73023022018-09-04 14:55:55 +010040 * @note The number of classes should be given as a preprocessor argument using -DNUM_CLASSES=num. e.g. -DNUM_CLASSES=80
41 * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively.
42 *
43 * @param[in] input_ptr Pointer to the source tensor. Supported data types: F16/F32
44 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
45 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
46 * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
47 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
48 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
49 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
50 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
51 * @param[out] output_ptr (Optional) Pointer to the destination tensor. Supported data types: same as @p input_ptr
52 * @param[in] output_stride_x (Optional) Stride of the destination tensor in X dimension (in bytes)
53 * @param[in] output_step_x (Optional) output_stride_x * number of elements along X processed per workitem(in bytes)
54 * @param[in] output_stride_y (Optional) Stride of the destination tensor in Y dimension (in bytes)
55 * @param[in] output_step_y (Optional) output_stride_y * number of elements along Y processed per workitem(in bytes)
56 * @param[in] output_stride_z (Optional) Stride of the source tensor in Z dimension (in bytes)
57 * @param[in] output_step_z (Optional) output_stride_z * number of elements along Z processed per workitem(in bytes)
58 * @param[in] output_offset_first_element_in_bytes (Optional) The offset of the first element in the destination tensor
59 */
60__kernel void yolo_layer_nchw(
61 TENSOR3D_DECLARATION(input)
62#ifndef IN_PLACE
63 ,
64 TENSOR3D_DECLARATION(output)
65#endif /* not IN_PLACE */
66)
67{
68 // Get pixels pointer
69 Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
70#ifdef IN_PLACE
71 Tensor3D output = input;
72#else /* IN_PLACE */
73 Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
74#endif /* IN_PLACE */
75
76 const int box_ch_id = get_global_id(2) % (NUM_CLASSES + 5);
77 const bool activate = box_ch_id != 2 && box_ch_id != 3;
78
79 if(activate)
80 {
81 // Load data
82 TYPE data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr);
Giorgio Arenad056e572020-10-12 11:53:51 +010083 data = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, data, A_VAL, B_VAL); // select(1.0f, ACTIVATION_OP(ACTIVATION_TYPE, data), (SELECT_TYPE)activate);
Giorgio Arena73023022018-09-04 14:55:55 +010084
85 // Store result
86 VSTORE(VEC_SIZE)
87 (data, 0, (__global DATA_TYPE *)output.ptr);
88 }
89#ifndef IN_PLACE
90 else
91 {
92 // Load data
93 TYPE data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr);
94
95 // Store result
96 VSTORE(VEC_SIZE)
97 (data, 0, (__global DATA_TYPE *)output.ptr);
98 }
99#endif // IN_PLACE
100}
101
102#else // VEC_SIZE != 1
103
Giorgio Arena73023022018-09-04 14:55:55 +0100104/** This performs a YOLO partial activation function for NCHW data layout
105 *
106 * @note In order to perform the activation function "in-place", the pre-processor -DIN_PLACE must be passed at compile time
107 *
108 * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
109 * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=1
Usama Arif6a98a6e2019-05-10 17:07:27 +0100110 * @note Activation function should be given as a preprocessor argument using -DACTIVATION_TYPE=name. e.g. -DACTIVATION_TYPE=TANH
Giorgio Arena73023022018-09-04 14:55:55 +0100111 * @note The number of classes should be given as a preprocessor argument using -DNUM_CLASSES=num. e.g. -DNUM_CLASSES=80
112 * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively.
113 *
114 * @param[in] input_ptr Pointer to the source tensor. Supported data types: F16/F32
115 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
116 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
117 * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
118 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
119 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
120 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
121 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
122 * @param[out] output_ptr (Optional) Pointer to the destination tensor. Supported data types: same as @p input_ptr
123 * @param[in] output_stride_x (Optional) Stride of the destination tensor in X dimension (in bytes)
124 * @param[in] output_step_x (Optional) output_stride_x * number of elements along X processed per workitem(in bytes)
125 * @param[in] output_stride_y (Optional) Stride of the destination tensor in Y dimension (in bytes)
126 * @param[in] output_step_y (Optional) output_stride_y * number of elements along Y processed per workitem(in bytes)
127 * @param[in] output_stride_z (Optional) Stride of the source tensor in Z dimension (in bytes)
128 * @param[in] output_step_z (Optional) output_stride_z * number of elements along Z processed per workitem(in bytes)
129 * @param[in] output_offset_first_element_in_bytes (Optional) The offset of the first element in the destination tensor
130 */
131__kernel void yolo_layer_nhwc(
132 TENSOR3D_DECLARATION(input)
133#ifndef IN_PLACE
134 ,
135 TENSOR3D_DECLARATION(output)
136#endif /* not IN_PLACE */
137)
138{
139 // Get pixels pointer
140 Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
141#ifdef IN_PLACE
142 Tensor3D output = input;
143#else /* IN_PLACE */
144 Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
145#endif /* IN_PLACE */
146
147 const int box_ch_id = get_global_id(0) % (NUM_CLASSES + 5);
148 const bool activate = box_ch_id != 2 && box_ch_id != 3;
149
150 if(activate)
151 {
152 // Load data
153 DATA_TYPE data = *((__global DATA_TYPE *)input.ptr);
Giorgio Arenad056e572020-10-12 11:53:51 +0100154 data = select(data, ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, data, A_VAL, B_VAL), (SELECT_TYPE)activate);
Giorgio Arena73023022018-09-04 14:55:55 +0100155
156 // Store result
157 *((__global DATA_TYPE *)output.ptr) = data;
158 }
159#ifndef IN_PLACE
160 else
161 {
162 // Load data
163 DATA_TYPE data = *((__global DATA_TYPE *)input.ptr);
164
165 // Store result
166 *((__global DATA_TYPE *)output.ptr) = data;
167 }
168#endif // IN_PLACE
169}
170
171#endif // VEC_SIZE != 1
Giorgio Arenad056e572020-10-12 11:53:51 +0100172#endif // defined(DATA_TYPE) && defined(ACTIVATION_TYPE) && defined(NUM_CLASSES) && defined(VEC_SIZE)