blob: 6f51b87bc6fb56134698742a719d426ad72785d9 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Usama Arife2428a02019-05-09 11:03:17 +01002 * Copyright (c) 2016-2019 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef ARM_COMPUTE_HELPER_H
25#define ARM_COMPUTE_HELPER_H
26
Georgios Pinitasdaa38552018-08-28 17:43:18 +010027#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010028#pragma OPENCL EXTENSION cl_khr_fp16 : enable
Georgios Pinitasdaa38552018-08-28 17:43:18 +010029#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16)
Matthew Bentham6f31f8c2017-10-27 11:50:06 +010030
Georgios Pinitasdaa38552018-08-28 17:43:18 +010031#if defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
Michalis Spyroue03342e2018-01-15 14:39:13 +000032#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : enable
Georgios Pinitasdaa38552018-08-28 17:43:18 +010033#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
Michalis Spyroue03342e2018-01-15 14:39:13 +000034
Georgios Pinitasdaa38552018-08-28 17:43:18 +010035#if defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && defined(cl_arm_integer_dot_product_accumulate_int8)
Giorgio Arenaeff8d952018-07-02 15:29:57 +010036#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : enable
Georgios Pinitasdaa38552018-08-28 17:43:18 +010037#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && defined(cl_arm_integer_dot_product_accumulate_int8)
Giorgio Arenaeff8d952018-07-02 15:29:57 +010038
Georgios Pinitasdaa38552018-08-28 17:43:18 +010039#if defined(ARM_COMPUTE_DEBUG_ENABLED) && defined(cl_arm_printf)
steniu01f01f9de2017-09-27 17:00:11 +010040#pragma OPENCL EXTENSION cl_arm_printf : enable
Georgios Pinitas238c97c2018-08-31 17:28:29 +010041#endif // defined(ARM_COMPUTE_DEBUG_ENABLED) && defined(cl_arm_printf)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010042
Usama Arife2428a02019-05-09 11:03:17 +010043#define GPU_ARCH_MIDGARD 0x100
44#define GPU_ARCH_BIFROST 0x200
45
Gian Marco Iodice43a129e2019-05-14 10:14:08 +010046#define CONCAT(a, b) a##b
47
Georgios Pinitase5f8fd62017-06-23 18:03:44 +010048#define EXPAND(x) x
49
Anthony Barbier6ff3b192017-09-04 18:44:23 +010050#define CLAMP(x, min_val, max_val) min(max(x, min_val), max_val)
51
Georgios Pinitasac4e8732017-07-05 17:02:25 +010052#define VLOAD_STR(size) vload##size
53#define VLOAD(size) VLOAD_STR(size)
54
55#define VSTORE_STR(size) vstore##size
56#define VSTORE(size) VSTORE_STR(size)
57
Manuel Bottini0d0028c2018-10-02 16:41:52 +010058#define float1 float
59#define half1 half
Usama Arif0681e3b2019-04-25 14:28:07 +010060#define char1 char
61#define uchar1 uchar
62#define short1 short
63#define ushort1 ushort
64#define int1 int
65#define uint1 uint
66#define long1 long
67#define ulong1 ulong
68#define double1 double
69
70#define vload1(OFFSET, PTR) *(OFFSET + PTR)
Gian Marco Iodice9285adb2019-09-05 16:10:27 +010071#define vstore1(DATA, OFFSET, PTR) *(OFFSET + PTR) = DATA
Manuel Bottini0d0028c2018-10-02 16:41:52 +010072
Gian Marco Iodice0c17aa22019-09-27 09:23:15 +010073// Convert built-in functions with _sat modifier are not supported in floating point so we create defines
74// without _sat to overcome this issue
75#define convert_float_sat convert_float
76#define convert_float1_sat convert_float
77#define convert_float2_sat convert_float2
78#define convert_float3_sat convert_float3
79#define convert_float4_sat convert_float4
80#define convert_float8_sat convert_float8
81#define convert_float16_sat convert_float16
82#define convert_half_sat convert_float
83#define convert_half1_sat convert_half
84#define convert_half2_sat convert_half2
85#define convert_half3_sat convert_half3
86#define convert_half4_sat convert_half4
87#define convert_half8_sat convert_half8
88#define convert_half16_sat convert_half16
89
Anthony Barbier6ff3b192017-09-04 18:44:23 +010090#define VEC_DATA_TYPE_STR(type, size) type##size
91#define VEC_DATA_TYPE(type, size) VEC_DATA_TYPE_STR(type, size)
92
Chunosovd6afedc2017-11-06 22:09:45 +070093#define CL_VEC_DATA_TYPE_STR(type, size) type##size
94#define CL_VEC_DATA_TYPE(type, size) CL_VEC_DATA_TYPE_STR(type, size)
95
Anthony Barbier6ff3b192017-09-04 18:44:23 +010096#define CONVERT_STR(x, type) (convert_##type((x)))
97#define CONVERT(x, type) CONVERT_STR(x, type)
98
99#define CONVERT_SAT_STR(x, type) (convert_##type##_sat((x)))
100#define CONVERT_SAT(x, type) CONVERT_SAT_STR(x, type)
101
102#define CONVERT_SAT_ROUND_STR(x, type, round) (convert_##type##_sat_##round((x)))
103#define CONVERT_SAT_ROUND(x, type, round) CONVERT_SAT_ROUND_STR(x, type, round)
104
105#define VECTOR_DECLARATION(name) \
106 __global uchar *name##_ptr, \
107 uint name##_stride_x, \
108 uint name##_step_x, \
109 uint name##_offset_first_element_in_bytes
110
111#define IMAGE_DECLARATION(name) \
112 __global uchar *name##_ptr, \
113 uint name##_stride_x, \
114 uint name##_step_x, \
115 uint name##_stride_y, \
116 uint name##_step_y, \
117 uint name##_offset_first_element_in_bytes
118
119#define TENSOR3D_DECLARATION(name) \
120 __global uchar *name##_ptr, \
121 uint name##_stride_x, \
122 uint name##_step_x, \
123 uint name##_stride_y, \
124 uint name##_step_y, \
125 uint name##_stride_z, \
126 uint name##_step_z, \
127 uint name##_offset_first_element_in_bytes
128
steniu01868e5412017-07-17 23:16:00 +0100129#define TENSOR4D_DECLARATION(name) \
130 __global uchar *name##_ptr, \
131 uint name##_stride_x, \
132 uint name##_step_x, \
133 uint name##_stride_y, \
134 uint name##_step_y, \
135 uint name##_stride_z, \
136 uint name##_step_z, \
137 uint name##_stride_w, \
138 uint name##_step_w, \
139 uint name##_offset_first_element_in_bytes
140
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100141#define CONVERT_TO_VECTOR_STRUCT(name) \
142 update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x)
143
144#define CONVERT_TO_VECTOR_STRUCT_NO_STEP(name) \
145 update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0)
146
147#define CONVERT_TO_IMAGE_STRUCT(name) \
148 update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y)
149
150#define CONVERT_TO_IMAGE_STRUCT_NO_STEP(name) \
151 update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0)
152
steniu01868e5412017-07-17 23:16:00 +0100153#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
154 update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)
155
Anthony Barbier7ff47a32017-07-11 16:54:04 +0100156#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(name) \
157 update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, name##_step_z)
158
steniu010d523cc2017-07-13 14:24:23 +0100159#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
160 update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)
161
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100162#define CONVERT_TO_TENSOR3D_STRUCT(name) \
163 update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
164 name##_stride_z, name##_step_z)
165
166#define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name) \
167 update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, 0)
168
steniu01868e5412017-07-17 23:16:00 +0100169#define CONVERT_TO_TENSOR4D_STRUCT(name, mod_size) \
170 update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
Michalis Spyrou5237e012018-01-17 09:40:27 +0000171 name##_stride_z, name##_step_z, name##_stride_w, name##_step_w, mod_size)
steniu01868e5412017-07-17 23:16:00 +0100172
173#define CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(name, mod_size) \
174 update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, 0, name##_stride_w, 0, mod_size)
175
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100176/** Structure to hold Vector information */
177typedef struct Vector
178{
179 __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
180 int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
181 int stride_x; /**< Stride of the image in X dimension (in bytes) */
182} Vector;
183
184/** Structure to hold Image information */
185typedef struct Image
186{
187 __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
188 int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
189 int stride_x; /**< Stride of the image in X dimension (in bytes) */
190 int stride_y; /**< Stride of the image in Y dimension (in bytes) */
191} Image;
192
193/** Structure to hold 3D tensor information */
194typedef struct Tensor3D
195{
196 __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
197 int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
198 int stride_x; /**< Stride of the image in X dimension (in bytes) */
199 int stride_y; /**< Stride of the image in Y dimension (in bytes) */
200 int stride_z; /**< Stride of the image in Z dimension (in bytes) */
201} Tensor3D;
202
steniu01868e5412017-07-17 23:16:00 +0100203/** Structure to hold 4D tensor information */
204typedef struct Tensor4D
205{
206 __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
207 int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
208 int stride_x; /**< Stride of the image in X dimension (in bytes) */
209 int stride_y; /**< Stride of the image in Y dimension (in bytes) */
210 int stride_z; /**< Stride of the image in Z dimension (in bytes) */
211 int stride_w; /**< Stride of the image in W dimension (in bytes) */
212} Tensor4D;
213
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100214/** Wrap vector information into an Vector structure, and make the pointer point at this workitem's data.
215 *
216 * @param[in] ptr Pointer to the starting postion of the buffer
217 * @param[in] offset_first_element_in_bytes The offset of the first element in the source vector
218 * @param[in] stride_x Stride of the vector in X dimension (in bytes)
219 * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
220 *
221 * @return An image object
222 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100223inline Vector update_vector_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100224{
225 Vector vector =
226 {
227 .ptr = ptr,
228 .offset_first_element_in_bytes = offset_first_element_in_bytes,
229 .stride_x = stride_x,
230 };
231 vector.ptr += vector.offset_first_element_in_bytes + get_global_id(0) * step_x;
232 return vector;
233}
234
235/** Wrap image information into an Image structure, and make the pointer point at this workitem's data.
236 *
237 * @param[in] ptr Pointer to the starting postion of the buffer
238 * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
239 * @param[in] stride_x Stride of the image in X dimension (in bytes)
240 * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
241 * @param[in] stride_y Stride of the image in Y dimension (in bytes)
242 * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
243 *
244 * @return An image object
245 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100246inline Image update_image_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100247{
248 Image img =
249 {
250 .ptr = ptr,
251 .offset_first_element_in_bytes = offset_first_element_in_bytes,
252 .stride_x = stride_x,
253 .stride_y = stride_y
254 };
255 img.ptr += img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y;
256 return img;
257}
258
Anthony Barbier7ff47a32017-07-11 16:54:04 +0100259/** Wrap 3D tensor information into an image structure, and make the pointer point at this workitem's data.
260 *
261 * @param[in] ptr Pointer to the starting postion of the buffer
262 * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
263 * @param[in] stride_x Stride of the image in X dimension (in bytes)
264 * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
265 * @param[in] stride_y Stride of the image in Y dimension (in bytes)
266 * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
267 * @param[in] stride_z Stride of the image in Z dimension (in bytes)
268 * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes)
269 *
270 * @return A 3D tensor object
271 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100272inline Image update_image_from_tensor3D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
Anthony Barbier7ff47a32017-07-11 16:54:04 +0100273{
274 Image img =
275 {
276 .ptr = ptr,
277 .offset_first_element_in_bytes = offset_first_element_in_bytes,
278 .stride_x = stride_x,
279 .stride_y = stride_y
280 };
281 img.ptr += img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y + get_global_id(2) * step_z;
282 return img;
283}
284
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100285/** Wrap 3D tensor information into an tensor structure, and make the pointer point at this workitem's data.
286 *
287 * @param[in] ptr Pointer to the starting postion of the buffer
288 * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
289 * @param[in] stride_x Stride of the image in X dimension (in bytes)
290 * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
291 * @param[in] stride_y Stride of the image in Y dimension (in bytes)
292 * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
293 * @param[in] stride_z Stride of the image in Z dimension (in bytes)
294 * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes)
295 *
296 * @return A 3D tensor object
297 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100298inline Tensor3D update_tensor3D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100299{
300 Tensor3D tensor =
301 {
302 .ptr = ptr,
303 .offset_first_element_in_bytes = offset_first_element_in_bytes,
304 .stride_x = stride_x,
305 .stride_y = stride_y,
306 .stride_z = stride_z
307 };
308 tensor.ptr += tensor.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y + get_global_id(2) * step_z;
309 return tensor;
310}
311
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100312inline Tensor4D update_tensor4D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z, uint stride_w,
steniu01868e5412017-07-17 23:16:00 +0100313 uint step_w,
314 uint mod_size)
315{
316 Tensor4D tensor =
317 {
318 .ptr = ptr,
319 .offset_first_element_in_bytes = offset_first_element_in_bytes,
320 .stride_x = stride_x,
321 .stride_y = stride_y,
322 .stride_z = stride_z,
323 .stride_w = stride_w
324 };
325
326 tensor.ptr += tensor.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y + (get_global_id(2) % mod_size) * step_z + (get_global_id(2) / mod_size) * step_w;
327 return tensor;
328}
329
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100330/** Get the pointer position of a Vector
331 *
332 * @param[in] vec Pointer to the starting position of the buffer
333 * @param[in] x Relative X position
334 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100335inline __global const uchar *vector_offset(const Vector *vec, int x)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100336{
337 return vec->ptr + x * vec->stride_x;
338}
339
340/** Get the pointer position of a Image
341 *
342 * @param[in] img Pointer to the starting position of the buffer
343 * @param[in] x Relative X position
344 * @param[in] y Relative Y position
345 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100346inline __global uchar *offset(const Image *img, int x, int y)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100347{
348 return img->ptr + x * img->stride_x + y * img->stride_y;
349}
350
351/** Get the pointer position of a Tensor3D
352 *
Gian Marco Iodice3a623242017-07-25 10:25:53 +0100353 * @param[in] tensor Pointer to the starting position of the buffer
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100354 * @param[in] x Relative X position
355 * @param[in] y Relative Y position
356 * @param[in] z Relative Z position
357 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100358inline __global const uchar *tensor3D_offset(const Tensor3D *tensor, int x, int y, int z)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100359{
360 return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z;
361}
362
steniu01868e5412017-07-17 23:16:00 +0100363/** Get the pointer position of a Tensor4D
364 *
365 * @param[in] tensor Pointer to the starting position of the buffer
366 * @param[in] x Relative X position
367 * @param[in] y Relative Y position
368 * @param[in] z Relative Z position
369 * @param[in] w Relative W position
370 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100371inline __global const uchar *tensor4D_offset(const Tensor4D *tensor, int x, int y, int z, int w)
steniu01868e5412017-07-17 23:16:00 +0100372{
373 return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z + w * tensor->stride_w;
374}
375
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100376#endif // _HELPER_H