blob: e59396d54ac750cfbd08727a16c81f80e47edc82 [file] [log] [blame]
Giorgio Arena73023022018-09-04 14:55:55 +01001/*
Usama Arif6a98a6e2019-05-10 17:07:27 +01002 * Copyright (c) 2018-2019 ARM Limited.
Giorgio Arena73023022018-09-04 14:55:55 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Usama Arif6a98a6e2019-05-10 17:07:27 +010024#if defined(DATA_TYPE) && defined(SELECT_DATA_TYPE) && defined(ACTIVATION_TYPE) && defined(NUM_CLASSES) && defined(VEC_SIZE)
25
26#include "activation_float_helpers.h"
Giorgio Arena73023022018-09-04 14:55:55 +010027
28#if VEC_SIZE != 1
29#define TYPE VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
30#define SELECT_TYPE VEC_DATA_TYPE(SELECT_DATA_TYPE, VEC_SIZE)
31
Giorgio Arena73023022018-09-04 14:55:55 +010032/** This performs a YOLO partial activation function for NCHW data layout
33 *
34 * @note In order to perform the activation function "in-place", the pre-processor -DIN_PLACE must be passed at compile time
35 *
36 * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
37 * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
Usama Arif6a98a6e2019-05-10 17:07:27 +010038 * @note Activation function should be given as a preprocessor argument using -DACTIVATION_TYPE=name. e.g. -DACTIVATION_TYPE=TANH
Giorgio Arena73023022018-09-04 14:55:55 +010039 * @note The number of classes should be given as a preprocessor argument using -DNUM_CLASSES=num. e.g. -DNUM_CLASSES=80
40 * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively.
41 *
42 * @param[in] input_ptr Pointer to the source tensor. Supported data types: F16/F32
43 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
44 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
45 * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
46 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
47 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
48 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
49 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
50 * @param[out] output_ptr (Optional) Pointer to the destination tensor. Supported data types: same as @p input_ptr
51 * @param[in] output_stride_x (Optional) Stride of the destination tensor in X dimension (in bytes)
52 * @param[in] output_step_x (Optional) output_stride_x * number of elements along X processed per workitem(in bytes)
53 * @param[in] output_stride_y (Optional) Stride of the destination tensor in Y dimension (in bytes)
54 * @param[in] output_step_y (Optional) output_stride_y * number of elements along Y processed per workitem(in bytes)
55 * @param[in] output_stride_z (Optional) Stride of the source tensor in Z dimension (in bytes)
56 * @param[in] output_step_z (Optional) output_stride_z * number of elements along Z processed per workitem(in bytes)
57 * @param[in] output_offset_first_element_in_bytes (Optional) The offset of the first element in the destination tensor
58 */
59__kernel void yolo_layer_nchw(
60 TENSOR3D_DECLARATION(input)
61#ifndef IN_PLACE
62 ,
63 TENSOR3D_DECLARATION(output)
64#endif /* not IN_PLACE */
65)
66{
67 // Get pixels pointer
68 Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
69#ifdef IN_PLACE
70 Tensor3D output = input;
71#else /* IN_PLACE */
72 Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
73#endif /* IN_PLACE */
74
75 const int box_ch_id = get_global_id(2) % (NUM_CLASSES + 5);
76 const bool activate = box_ch_id != 2 && box_ch_id != 3;
77
78 if(activate)
79 {
80 // Load data
81 TYPE data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr);
Usama Arif6a98a6e2019-05-10 17:07:27 +010082 data = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, data, A_VAL, B_VAL); // select(1.0f, ACTIVATION_OP(ACTIVATION_TYPE, data), (SELECT_TYPE)activate);
Giorgio Arena73023022018-09-04 14:55:55 +010083
84 // Store result
85 VSTORE(VEC_SIZE)
86 (data, 0, (__global DATA_TYPE *)output.ptr);
87 }
88#ifndef IN_PLACE
89 else
90 {
91 // Load data
92 TYPE data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr);
93
94 // Store result
95 VSTORE(VEC_SIZE)
96 (data, 0, (__global DATA_TYPE *)output.ptr);
97 }
98#endif // IN_PLACE
99}
100
101#else // VEC_SIZE != 1
102
Giorgio Arena73023022018-09-04 14:55:55 +0100103#define SELECT_TYPE SELECT_DATA_TYPE
Giorgio Arena73023022018-09-04 14:55:55 +0100104/** This performs a YOLO partial activation function for NCHW data layout
105 *
106 * @note In order to perform the activation function "in-place", the pre-processor -DIN_PLACE must be passed at compile time
107 *
108 * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
109 * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=1
Usama Arif6a98a6e2019-05-10 17:07:27 +0100110 * @note Activation function should be given as a preprocessor argument using -DACTIVATION_TYPE=name. e.g. -DACTIVATION_TYPE=TANH
Giorgio Arena73023022018-09-04 14:55:55 +0100111 * @note The number of classes should be given as a preprocessor argument using -DNUM_CLASSES=num. e.g. -DNUM_CLASSES=80
112 * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively.
113 *
114 * @param[in] input_ptr Pointer to the source tensor. Supported data types: F16/F32
115 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
116 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
117 * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
118 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
119 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
120 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
121 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
122 * @param[out] output_ptr (Optional) Pointer to the destination tensor. Supported data types: same as @p input_ptr
123 * @param[in] output_stride_x (Optional) Stride of the destination tensor in X dimension (in bytes)
124 * @param[in] output_step_x (Optional) output_stride_x * number of elements along X processed per workitem(in bytes)
125 * @param[in] output_stride_y (Optional) Stride of the destination tensor in Y dimension (in bytes)
126 * @param[in] output_step_y (Optional) output_stride_y * number of elements along Y processed per workitem(in bytes)
127 * @param[in] output_stride_z (Optional) Stride of the source tensor in Z dimension (in bytes)
128 * @param[in] output_step_z (Optional) output_stride_z * number of elements along Z processed per workitem(in bytes)
129 * @param[in] output_offset_first_element_in_bytes (Optional) The offset of the first element in the destination tensor
130 */
131__kernel void yolo_layer_nhwc(
132 TENSOR3D_DECLARATION(input)
133#ifndef IN_PLACE
134 ,
135 TENSOR3D_DECLARATION(output)
136#endif /* not IN_PLACE */
137)
138{
139 // Get pixels pointer
140 Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
141#ifdef IN_PLACE
142 Tensor3D output = input;
143#else /* IN_PLACE */
144 Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
145#endif /* IN_PLACE */
146
147 const int box_ch_id = get_global_id(0) % (NUM_CLASSES + 5);
148 const bool activate = box_ch_id != 2 && box_ch_id != 3;
149
150 if(activate)
151 {
152 // Load data
153 DATA_TYPE data = *((__global DATA_TYPE *)input.ptr);
Usama Arif6a98a6e2019-05-10 17:07:27 +0100154 data = select(data, ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, data, A_VAL, B_VAL), (SELECT_TYPE)activate);
Giorgio Arena73023022018-09-04 14:55:55 +0100155
156 // Store result
157 *((__global DATA_TYPE *)output.ptr) = data;
158 }
159#ifndef IN_PLACE
160 else
161 {
162 // Load data
163 DATA_TYPE data = *((__global DATA_TYPE *)input.ptr);
164
165 // Store result
166 *((__global DATA_TYPE *)output.ptr) = data;
167 }
168#endif // IN_PLACE
169}
170
171#endif // VEC_SIZE != 1
Usama Arif6a98a6e2019-05-10 17:07:27 +0100172#endif // defined(DATA_TYPE) && defined(SELECT_DATA_TYPE) && defined(ACTIVATION_TYPE) && defined(NUM_CLASSES) && defined(VEC_SIZE)