blob: 2240d7c637513dfdde77e3dc4a79b17b5a01f16b [file] [log] [blame]
Giorgio Arena73023022018-09-04 14:55:55 +01001/*
2 * Copyright (c) 2018 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#if defined(DATA_TYPE) && defined(SELECT_DATA_TYPE) && defined(ACT) && defined(NUM_CLASSES) && defined(VEC_SIZE)
25
26#if VEC_SIZE != 1
27#define TYPE VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
28#define SELECT_TYPE VEC_DATA_TYPE(SELECT_DATA_TYPE, VEC_SIZE)
29
30#include "activation_helpers.h"
31
32/** This performs a YOLO partial activation function for NCHW data layout
33 *
34 * @note In order to perform the activation function "in-place", the pre-processor -DIN_PLACE must be passed at compile time
35 *
36 * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
37 * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
38 * @note Activation function should be given as a preprocessor argument using -DACT=name. e.g. -DACT=TANH
39 * @note The number of classes should be given as a preprocessor argument using -DNUM_CLASSES=num. e.g. -DNUM_CLASSES=80
40 * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively.
41 *
42 * @param[in] input_ptr Pointer to the source tensor. Supported data types: F16/F32
43 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
44 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
45 * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
46 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
47 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
48 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
49 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
50 * @param[out] output_ptr (Optional) Pointer to the destination tensor. Supported data types: same as @p input_ptr
51 * @param[in] output_stride_x (Optional) Stride of the destination tensor in X dimension (in bytes)
52 * @param[in] output_step_x (Optional) output_stride_x * number of elements along X processed per workitem(in bytes)
53 * @param[in] output_stride_y (Optional) Stride of the destination tensor in Y dimension (in bytes)
54 * @param[in] output_step_y (Optional) output_stride_y * number of elements along Y processed per workitem(in bytes)
55 * @param[in] output_stride_z (Optional) Stride of the source tensor in Z dimension (in bytes)
56 * @param[in] output_step_z (Optional) output_stride_z * number of elements along Z processed per workitem(in bytes)
57 * @param[in] output_offset_first_element_in_bytes (Optional) The offset of the first element in the destination tensor
58 */
59__kernel void yolo_layer_nchw(
60 TENSOR3D_DECLARATION(input)
61#ifndef IN_PLACE
62 ,
63 TENSOR3D_DECLARATION(output)
64#endif /* not IN_PLACE */
65)
66{
67 // Get pixels pointer
68 Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
69#ifdef IN_PLACE
70 Tensor3D output = input;
71#else /* IN_PLACE */
72 Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
73#endif /* IN_PLACE */
74
75 const int box_ch_id = get_global_id(2) % (NUM_CLASSES + 5);
76 const bool activate = box_ch_id != 2 && box_ch_id != 3;
77
78 if(activate)
79 {
80 // Load data
81 TYPE data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr);
82 data = ACTIVATION_OP(ACT, data); // select(1.0f, ACTIVATION_OP(ACT, data), (SELECT_TYPE)activate);
83
84 // Store result
85 VSTORE(VEC_SIZE)
86 (data, 0, (__global DATA_TYPE *)output.ptr);
87 }
88#ifndef IN_PLACE
89 else
90 {
91 // Load data
92 TYPE data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr);
93
94 // Store result
95 VSTORE(VEC_SIZE)
96 (data, 0, (__global DATA_TYPE *)output.ptr);
97 }
98#endif // IN_PLACE
99}
100
101#else // VEC_SIZE != 1
102
103#define TYPE DATA_TYPE
104#define SELECT_TYPE SELECT_DATA_TYPE
105
106#include "activation_helpers.h"
107
108/** This performs a YOLO partial activation function for NCHW data layout
109 *
110 * @note In order to perform the activation function "in-place", the pre-processor -DIN_PLACE must be passed at compile time
111 *
112 * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
113 * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=1
114 * @note Activation function should be given as a preprocessor argument using -DACT=name. e.g. -DACT=TANH
115 * @note The number of classes should be given as a preprocessor argument using -DNUM_CLASSES=num. e.g. -DNUM_CLASSES=80
116 * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively.
117 *
118 * @param[in] input_ptr Pointer to the source tensor. Supported data types: F16/F32
119 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
120 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
121 * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
122 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
123 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
124 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
125 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
126 * @param[out] output_ptr (Optional) Pointer to the destination tensor. Supported data types: same as @p input_ptr
127 * @param[in] output_stride_x (Optional) Stride of the destination tensor in X dimension (in bytes)
128 * @param[in] output_step_x (Optional) output_stride_x * number of elements along X processed per workitem(in bytes)
129 * @param[in] output_stride_y (Optional) Stride of the destination tensor in Y dimension (in bytes)
130 * @param[in] output_step_y (Optional) output_stride_y * number of elements along Y processed per workitem(in bytes)
131 * @param[in] output_stride_z (Optional) Stride of the source tensor in Z dimension (in bytes)
132 * @param[in] output_step_z (Optional) output_stride_z * number of elements along Z processed per workitem(in bytes)
133 * @param[in] output_offset_first_element_in_bytes (Optional) The offset of the first element in the destination tensor
134 */
135__kernel void yolo_layer_nhwc(
136 TENSOR3D_DECLARATION(input)
137#ifndef IN_PLACE
138 ,
139 TENSOR3D_DECLARATION(output)
140#endif /* not IN_PLACE */
141)
142{
143 // Get pixels pointer
144 Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
145#ifdef IN_PLACE
146 Tensor3D output = input;
147#else /* IN_PLACE */
148 Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
149#endif /* IN_PLACE */
150
151 const int box_ch_id = get_global_id(0) % (NUM_CLASSES + 5);
152 const bool activate = box_ch_id != 2 && box_ch_id != 3;
153
154 if(activate)
155 {
156 // Load data
157 DATA_TYPE data = *((__global DATA_TYPE *)input.ptr);
158 data = select(data, ACTIVATION_OP(ACT, data), (SELECT_TYPE)activate);
159
160 // Store result
161 *((__global DATA_TYPE *)output.ptr) = data;
162 }
163#ifndef IN_PLACE
164 else
165 {
166 // Load data
167 DATA_TYPE data = *((__global DATA_TYPE *)input.ptr);
168
169 // Store result
170 *((__global DATA_TYPE *)output.ptr) = data;
171 }
172#endif // IN_PLACE
173}
174
175#endif // VEC_SIZE != 1
176#endif // defined(DATA_TYPE) && defined(SELECT_DATA_TYPE) && defined(ACT) && defined(NUM_CLASSES) && defined(VEC_SIZE)