blob: 792544dc61b9f49d57ec15bd3f3a79a69e980b8f [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Usama Arife2428a02019-05-09 11:03:17 +01002 * Copyright (c) 2016-2019 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef ARM_COMPUTE_HELPER_H
25#define ARM_COMPUTE_HELPER_H
26
Georgios Pinitasdaa38552018-08-28 17:43:18 +010027#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010028#pragma OPENCL EXTENSION cl_khr_fp16 : enable
Georgios Pinitasdaa38552018-08-28 17:43:18 +010029#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16)
Matthew Bentham6f31f8c2017-10-27 11:50:06 +010030
Georgios Pinitasdaa38552018-08-28 17:43:18 +010031#if defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
Michalis Spyroue03342e2018-01-15 14:39:13 +000032#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : enable
Georgios Pinitasdaa38552018-08-28 17:43:18 +010033#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
Michalis Spyroue03342e2018-01-15 14:39:13 +000034
Georgios Pinitasdaa38552018-08-28 17:43:18 +010035#if defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && defined(cl_arm_integer_dot_product_accumulate_int8)
Giorgio Arenaeff8d952018-07-02 15:29:57 +010036#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : enable
Georgios Pinitasdaa38552018-08-28 17:43:18 +010037#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && defined(cl_arm_integer_dot_product_accumulate_int8)
Giorgio Arenaeff8d952018-07-02 15:29:57 +010038
Georgios Pinitasdaa38552018-08-28 17:43:18 +010039#if defined(ARM_COMPUTE_DEBUG_ENABLED) && defined(cl_arm_printf)
steniu01f01f9de2017-09-27 17:00:11 +010040#pragma OPENCL EXTENSION cl_arm_printf : enable
Georgios Pinitas238c97c2018-08-31 17:28:29 +010041#endif // defined(ARM_COMPUTE_DEBUG_ENABLED) && defined(cl_arm_printf)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010042
Usama Arife2428a02019-05-09 11:03:17 +010043#define GPU_ARCH_MIDGARD 0x100
44#define GPU_ARCH_BIFROST 0x200
45
Georgios Pinitase5f8fd62017-06-23 18:03:44 +010046#define EXPAND(x) x
47
Anthony Barbier6ff3b192017-09-04 18:44:23 +010048#define CLAMP(x, min_val, max_val) min(max(x, min_val), max_val)
49
Georgios Pinitasac4e8732017-07-05 17:02:25 +010050#define VLOAD_STR(size) vload##size
51#define VLOAD(size) VLOAD_STR(size)
52
53#define VSTORE_STR(size) vstore##size
54#define VSTORE(size) VSTORE_STR(size)
55
Manuel Bottini0d0028c2018-10-02 16:41:52 +010056#define float1 float
57#define half1 half
58
Anthony Barbier6ff3b192017-09-04 18:44:23 +010059#define VEC_DATA_TYPE_STR(type, size) type##size
60#define VEC_DATA_TYPE(type, size) VEC_DATA_TYPE_STR(type, size)
61
Chunosovd6afedc2017-11-06 22:09:45 +070062#define CL_VEC_DATA_TYPE_STR(type, size) type##size
63#define CL_VEC_DATA_TYPE(type, size) CL_VEC_DATA_TYPE_STR(type, size)
64
Anthony Barbier6ff3b192017-09-04 18:44:23 +010065#define CONVERT_STR(x, type) (convert_##type((x)))
66#define CONVERT(x, type) CONVERT_STR(x, type)
67
68#define CONVERT_SAT_STR(x, type) (convert_##type##_sat((x)))
69#define CONVERT_SAT(x, type) CONVERT_SAT_STR(x, type)
70
71#define CONVERT_SAT_ROUND_STR(x, type, round) (convert_##type##_sat_##round((x)))
72#define CONVERT_SAT_ROUND(x, type, round) CONVERT_SAT_ROUND_STR(x, type, round)
73
74#define VECTOR_DECLARATION(name) \
75 __global uchar *name##_ptr, \
76 uint name##_stride_x, \
77 uint name##_step_x, \
78 uint name##_offset_first_element_in_bytes
79
80#define IMAGE_DECLARATION(name) \
81 __global uchar *name##_ptr, \
82 uint name##_stride_x, \
83 uint name##_step_x, \
84 uint name##_stride_y, \
85 uint name##_step_y, \
86 uint name##_offset_first_element_in_bytes
87
88#define TENSOR3D_DECLARATION(name) \
89 __global uchar *name##_ptr, \
90 uint name##_stride_x, \
91 uint name##_step_x, \
92 uint name##_stride_y, \
93 uint name##_step_y, \
94 uint name##_stride_z, \
95 uint name##_step_z, \
96 uint name##_offset_first_element_in_bytes
97
steniu01868e5412017-07-17 23:16:00 +010098#define TENSOR4D_DECLARATION(name) \
99 __global uchar *name##_ptr, \
100 uint name##_stride_x, \
101 uint name##_step_x, \
102 uint name##_stride_y, \
103 uint name##_step_y, \
104 uint name##_stride_z, \
105 uint name##_step_z, \
106 uint name##_stride_w, \
107 uint name##_step_w, \
108 uint name##_offset_first_element_in_bytes
109
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100110#define CONVERT_TO_VECTOR_STRUCT(name) \
111 update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x)
112
113#define CONVERT_TO_VECTOR_STRUCT_NO_STEP(name) \
114 update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0)
115
116#define CONVERT_TO_IMAGE_STRUCT(name) \
117 update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y)
118
119#define CONVERT_TO_IMAGE_STRUCT_NO_STEP(name) \
120 update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0)
121
steniu01868e5412017-07-17 23:16:00 +0100122#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
123 update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)
124
Anthony Barbier7ff47a32017-07-11 16:54:04 +0100125#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(name) \
126 update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, name##_step_z)
127
steniu010d523cc2017-07-13 14:24:23 +0100128#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
129 update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)
130
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100131#define CONVERT_TO_TENSOR3D_STRUCT(name) \
132 update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
133 name##_stride_z, name##_step_z)
134
135#define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name) \
136 update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, 0)
137
steniu01868e5412017-07-17 23:16:00 +0100138#define CONVERT_TO_TENSOR4D_STRUCT(name, mod_size) \
139 update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
Michalis Spyrou5237e012018-01-17 09:40:27 +0000140 name##_stride_z, name##_step_z, name##_stride_w, name##_step_w, mod_size)
steniu01868e5412017-07-17 23:16:00 +0100141
142#define CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(name, mod_size) \
143 update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, 0, name##_stride_w, 0, mod_size)
144
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100145/** Structure to hold Vector information */
146typedef struct Vector
147{
148 __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
149 int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
150 int stride_x; /**< Stride of the image in X dimension (in bytes) */
151} Vector;
152
153/** Structure to hold Image information */
154typedef struct Image
155{
156 __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
157 int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
158 int stride_x; /**< Stride of the image in X dimension (in bytes) */
159 int stride_y; /**< Stride of the image in Y dimension (in bytes) */
160} Image;
161
162/** Structure to hold 3D tensor information */
163typedef struct Tensor3D
164{
165 __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
166 int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
167 int stride_x; /**< Stride of the image in X dimension (in bytes) */
168 int stride_y; /**< Stride of the image in Y dimension (in bytes) */
169 int stride_z; /**< Stride of the image in Z dimension (in bytes) */
170} Tensor3D;
171
steniu01868e5412017-07-17 23:16:00 +0100172/** Structure to hold 4D tensor information */
173typedef struct Tensor4D
174{
175 __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
176 int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
177 int stride_x; /**< Stride of the image in X dimension (in bytes) */
178 int stride_y; /**< Stride of the image in Y dimension (in bytes) */
179 int stride_z; /**< Stride of the image in Z dimension (in bytes) */
180 int stride_w; /**< Stride of the image in W dimension (in bytes) */
181} Tensor4D;
182
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100183/** Wrap vector information into an Vector structure, and make the pointer point at this workitem's data.
184 *
185 * @param[in] ptr Pointer to the starting postion of the buffer
186 * @param[in] offset_first_element_in_bytes The offset of the first element in the source vector
187 * @param[in] stride_x Stride of the vector in X dimension (in bytes)
188 * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
189 *
190 * @return An image object
191 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100192inline Vector update_vector_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100193{
194 Vector vector =
195 {
196 .ptr = ptr,
197 .offset_first_element_in_bytes = offset_first_element_in_bytes,
198 .stride_x = stride_x,
199 };
200 vector.ptr += vector.offset_first_element_in_bytes + get_global_id(0) * step_x;
201 return vector;
202}
203
204/** Wrap image information into an Image structure, and make the pointer point at this workitem's data.
205 *
206 * @param[in] ptr Pointer to the starting postion of the buffer
207 * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
208 * @param[in] stride_x Stride of the image in X dimension (in bytes)
209 * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
210 * @param[in] stride_y Stride of the image in Y dimension (in bytes)
211 * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
212 *
213 * @return An image object
214 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100215inline Image update_image_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100216{
217 Image img =
218 {
219 .ptr = ptr,
220 .offset_first_element_in_bytes = offset_first_element_in_bytes,
221 .stride_x = stride_x,
222 .stride_y = stride_y
223 };
224 img.ptr += img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y;
225 return img;
226}
227
Anthony Barbier7ff47a32017-07-11 16:54:04 +0100228/** Wrap 3D tensor information into an image structure, and make the pointer point at this workitem's data.
229 *
230 * @param[in] ptr Pointer to the starting postion of the buffer
231 * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
232 * @param[in] stride_x Stride of the image in X dimension (in bytes)
233 * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
234 * @param[in] stride_y Stride of the image in Y dimension (in bytes)
235 * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
236 * @param[in] stride_z Stride of the image in Z dimension (in bytes)
237 * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes)
238 *
239 * @return A 3D tensor object
240 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100241inline Image update_image_from_tensor3D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
Anthony Barbier7ff47a32017-07-11 16:54:04 +0100242{
243 Image img =
244 {
245 .ptr = ptr,
246 .offset_first_element_in_bytes = offset_first_element_in_bytes,
247 .stride_x = stride_x,
248 .stride_y = stride_y
249 };
250 img.ptr += img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y + get_global_id(2) * step_z;
251 return img;
252}
253
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100254/** Wrap 3D tensor information into an tensor structure, and make the pointer point at this workitem's data.
255 *
256 * @param[in] ptr Pointer to the starting postion of the buffer
257 * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
258 * @param[in] stride_x Stride of the image in X dimension (in bytes)
259 * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
260 * @param[in] stride_y Stride of the image in Y dimension (in bytes)
261 * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
262 * @param[in] stride_z Stride of the image in Z dimension (in bytes)
263 * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes)
264 *
265 * @return A 3D tensor object
266 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100267inline Tensor3D update_tensor3D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100268{
269 Tensor3D tensor =
270 {
271 .ptr = ptr,
272 .offset_first_element_in_bytes = offset_first_element_in_bytes,
273 .stride_x = stride_x,
274 .stride_y = stride_y,
275 .stride_z = stride_z
276 };
277 tensor.ptr += tensor.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y + get_global_id(2) * step_z;
278 return tensor;
279}
280
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100281inline Tensor4D update_tensor4D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z, uint stride_w,
steniu01868e5412017-07-17 23:16:00 +0100282 uint step_w,
283 uint mod_size)
284{
285 Tensor4D tensor =
286 {
287 .ptr = ptr,
288 .offset_first_element_in_bytes = offset_first_element_in_bytes,
289 .stride_x = stride_x,
290 .stride_y = stride_y,
291 .stride_z = stride_z,
292 .stride_w = stride_w
293 };
294
295 tensor.ptr += tensor.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y + (get_global_id(2) % mod_size) * step_z + (get_global_id(2) / mod_size) * step_w;
296 return tensor;
297}
298
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100299/** Get the pointer position of a Vector
300 *
301 * @param[in] vec Pointer to the starting position of the buffer
302 * @param[in] x Relative X position
303 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100304inline __global const uchar *vector_offset(const Vector *vec, int x)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100305{
306 return vec->ptr + x * vec->stride_x;
307}
308
309/** Get the pointer position of a Image
310 *
311 * @param[in] img Pointer to the starting position of the buffer
312 * @param[in] x Relative X position
313 * @param[in] y Relative Y position
314 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100315inline __global uchar *offset(const Image *img, int x, int y)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100316{
317 return img->ptr + x * img->stride_x + y * img->stride_y;
318}
319
320/** Get the pointer position of a Tensor3D
321 *
Gian Marco Iodice3a623242017-07-25 10:25:53 +0100322 * @param[in] tensor Pointer to the starting position of the buffer
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100323 * @param[in] x Relative X position
324 * @param[in] y Relative Y position
325 * @param[in] z Relative Z position
326 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100327inline __global const uchar *tensor3D_offset(const Tensor3D *tensor, int x, int y, int z)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100328{
329 return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z;
330}
331
steniu01868e5412017-07-17 23:16:00 +0100332/** Get the pointer position of a Tensor4D
333 *
334 * @param[in] tensor Pointer to the starting position of the buffer
335 * @param[in] x Relative X position
336 * @param[in] y Relative Y position
337 * @param[in] z Relative Z position
338 * @param[in] w Relative W position
339 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100340inline __global const uchar *tensor4D_offset(const Tensor4D *tensor, int x, int y, int z, int w)
steniu01868e5412017-07-17 23:16:00 +0100341{
342 return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z + w * tensor->stride_w;
343}
344
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100345#endif // _HELPER_H