blob: f501077a4068789f125bf86f9d90ff4e57321d56 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Usama Arife2428a02019-05-09 11:03:17 +01002 * Copyright (c) 2016-2019 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef ARM_COMPUTE_HELPER_H
25#define ARM_COMPUTE_HELPER_H
26
Georgios Pinitasdaa38552018-08-28 17:43:18 +010027#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010028#pragma OPENCL EXTENSION cl_khr_fp16 : enable
Georgios Pinitasdaa38552018-08-28 17:43:18 +010029#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16)
Matthew Bentham6f31f8c2017-10-27 11:50:06 +010030
Georgios Pinitasdaa38552018-08-28 17:43:18 +010031#if defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
Michalis Spyroue03342e2018-01-15 14:39:13 +000032#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : enable
Georgios Pinitasdaa38552018-08-28 17:43:18 +010033#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
Michalis Spyroue03342e2018-01-15 14:39:13 +000034
Georgios Pinitasdaa38552018-08-28 17:43:18 +010035#if defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && defined(cl_arm_integer_dot_product_accumulate_int8)
Giorgio Arenaeff8d952018-07-02 15:29:57 +010036#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : enable
Georgios Pinitasdaa38552018-08-28 17:43:18 +010037#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && defined(cl_arm_integer_dot_product_accumulate_int8)
Giorgio Arenaeff8d952018-07-02 15:29:57 +010038
Georgios Pinitasdaa38552018-08-28 17:43:18 +010039#if defined(ARM_COMPUTE_DEBUG_ENABLED) && defined(cl_arm_printf)
steniu01f01f9de2017-09-27 17:00:11 +010040#pragma OPENCL EXTENSION cl_arm_printf : enable
Georgios Pinitas238c97c2018-08-31 17:28:29 +010041#endif // defined(ARM_COMPUTE_DEBUG_ENABLED) && defined(cl_arm_printf)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010042
Usama Arife2428a02019-05-09 11:03:17 +010043#define GPU_ARCH_MIDGARD 0x100
44#define GPU_ARCH_BIFROST 0x200
45
Gian Marco Iodice43a129e2019-05-14 10:14:08 +010046#define CONCAT(a, b) a##b
47
Georgios Pinitase5f8fd62017-06-23 18:03:44 +010048#define EXPAND(x) x
49
Anthony Barbier6ff3b192017-09-04 18:44:23 +010050#define CLAMP(x, min_val, max_val) min(max(x, min_val), max_val)
51
Georgios Pinitasac4e8732017-07-05 17:02:25 +010052#define VLOAD_STR(size) vload##size
53#define VLOAD(size) VLOAD_STR(size)
54
55#define VSTORE_STR(size) vstore##size
56#define VSTORE(size) VSTORE_STR(size)
57
Manuel Bottini0d0028c2018-10-02 16:41:52 +010058#define float1 float
59#define half1 half
Usama Arif0681e3b2019-04-25 14:28:07 +010060#define char1 char
61#define uchar1 uchar
62#define short1 short
63#define ushort1 ushort
64#define int1 int
65#define uint1 uint
66#define long1 long
67#define ulong1 ulong
68#define double1 double
69
70#define vload1(OFFSET, PTR) *(OFFSET + PTR)
Gian Marco Iodice9285adb2019-09-05 16:10:27 +010071#define vstore1(DATA, OFFSET, PTR) *(OFFSET + PTR) = DATA
Manuel Bottini0d0028c2018-10-02 16:41:52 +010072
Anthony Barbier6ff3b192017-09-04 18:44:23 +010073#define VEC_DATA_TYPE_STR(type, size) type##size
74#define VEC_DATA_TYPE(type, size) VEC_DATA_TYPE_STR(type, size)
75
Chunosovd6afedc2017-11-06 22:09:45 +070076#define CL_VEC_DATA_TYPE_STR(type, size) type##size
77#define CL_VEC_DATA_TYPE(type, size) CL_VEC_DATA_TYPE_STR(type, size)
78
Anthony Barbier6ff3b192017-09-04 18:44:23 +010079#define CONVERT_STR(x, type) (convert_##type((x)))
80#define CONVERT(x, type) CONVERT_STR(x, type)
81
82#define CONVERT_SAT_STR(x, type) (convert_##type##_sat((x)))
83#define CONVERT_SAT(x, type) CONVERT_SAT_STR(x, type)
84
85#define CONVERT_SAT_ROUND_STR(x, type, round) (convert_##type##_sat_##round((x)))
86#define CONVERT_SAT_ROUND(x, type, round) CONVERT_SAT_ROUND_STR(x, type, round)
87
88#define VECTOR_DECLARATION(name) \
89 __global uchar *name##_ptr, \
90 uint name##_stride_x, \
91 uint name##_step_x, \
92 uint name##_offset_first_element_in_bytes
93
94#define IMAGE_DECLARATION(name) \
95 __global uchar *name##_ptr, \
96 uint name##_stride_x, \
97 uint name##_step_x, \
98 uint name##_stride_y, \
99 uint name##_step_y, \
100 uint name##_offset_first_element_in_bytes
101
102#define TENSOR3D_DECLARATION(name) \
103 __global uchar *name##_ptr, \
104 uint name##_stride_x, \
105 uint name##_step_x, \
106 uint name##_stride_y, \
107 uint name##_step_y, \
108 uint name##_stride_z, \
109 uint name##_step_z, \
110 uint name##_offset_first_element_in_bytes
111
steniu01868e5412017-07-17 23:16:00 +0100112#define TENSOR4D_DECLARATION(name) \
113 __global uchar *name##_ptr, \
114 uint name##_stride_x, \
115 uint name##_step_x, \
116 uint name##_stride_y, \
117 uint name##_step_y, \
118 uint name##_stride_z, \
119 uint name##_step_z, \
120 uint name##_stride_w, \
121 uint name##_step_w, \
122 uint name##_offset_first_element_in_bytes
123
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100124#define CONVERT_TO_VECTOR_STRUCT(name) \
125 update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x)
126
127#define CONVERT_TO_VECTOR_STRUCT_NO_STEP(name) \
128 update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0)
129
130#define CONVERT_TO_IMAGE_STRUCT(name) \
131 update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y)
132
133#define CONVERT_TO_IMAGE_STRUCT_NO_STEP(name) \
134 update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0)
135
steniu01868e5412017-07-17 23:16:00 +0100136#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
137 update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)
138
Anthony Barbier7ff47a32017-07-11 16:54:04 +0100139#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(name) \
140 update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, name##_step_z)
141
steniu010d523cc2017-07-13 14:24:23 +0100142#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
143 update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)
144
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100145#define CONVERT_TO_TENSOR3D_STRUCT(name) \
146 update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
147 name##_stride_z, name##_step_z)
148
149#define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name) \
150 update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, 0)
151
steniu01868e5412017-07-17 23:16:00 +0100152#define CONVERT_TO_TENSOR4D_STRUCT(name, mod_size) \
153 update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
Michalis Spyrou5237e012018-01-17 09:40:27 +0000154 name##_stride_z, name##_step_z, name##_stride_w, name##_step_w, mod_size)
steniu01868e5412017-07-17 23:16:00 +0100155
156#define CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(name, mod_size) \
157 update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, 0, name##_stride_w, 0, mod_size)
158
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100159/** Structure to hold Vector information */
160typedef struct Vector
161{
162 __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
163 int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
164 int stride_x; /**< Stride of the image in X dimension (in bytes) */
165} Vector;
166
167/** Structure to hold Image information */
168typedef struct Image
169{
170 __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
171 int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
172 int stride_x; /**< Stride of the image in X dimension (in bytes) */
173 int stride_y; /**< Stride of the image in Y dimension (in bytes) */
174} Image;
175
176/** Structure to hold 3D tensor information */
177typedef struct Tensor3D
178{
179 __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
180 int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
181 int stride_x; /**< Stride of the image in X dimension (in bytes) */
182 int stride_y; /**< Stride of the image in Y dimension (in bytes) */
183 int stride_z; /**< Stride of the image in Z dimension (in bytes) */
184} Tensor3D;
185
steniu01868e5412017-07-17 23:16:00 +0100186/** Structure to hold 4D tensor information */
187typedef struct Tensor4D
188{
189 __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
190 int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
191 int stride_x; /**< Stride of the image in X dimension (in bytes) */
192 int stride_y; /**< Stride of the image in Y dimension (in bytes) */
193 int stride_z; /**< Stride of the image in Z dimension (in bytes) */
194 int stride_w; /**< Stride of the image in W dimension (in bytes) */
195} Tensor4D;
196
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100197/** Wrap vector information into an Vector structure, and make the pointer point at this workitem's data.
198 *
199 * @param[in] ptr Pointer to the starting postion of the buffer
200 * @param[in] offset_first_element_in_bytes The offset of the first element in the source vector
201 * @param[in] stride_x Stride of the vector in X dimension (in bytes)
202 * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
203 *
204 * @return An image object
205 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100206inline Vector update_vector_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100207{
208 Vector vector =
209 {
210 .ptr = ptr,
211 .offset_first_element_in_bytes = offset_first_element_in_bytes,
212 .stride_x = stride_x,
213 };
214 vector.ptr += vector.offset_first_element_in_bytes + get_global_id(0) * step_x;
215 return vector;
216}
217
218/** Wrap image information into an Image structure, and make the pointer point at this workitem's data.
219 *
220 * @param[in] ptr Pointer to the starting postion of the buffer
221 * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
222 * @param[in] stride_x Stride of the image in X dimension (in bytes)
223 * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
224 * @param[in] stride_y Stride of the image in Y dimension (in bytes)
225 * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
226 *
227 * @return An image object
228 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100229inline Image update_image_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100230{
231 Image img =
232 {
233 .ptr = ptr,
234 .offset_first_element_in_bytes = offset_first_element_in_bytes,
235 .stride_x = stride_x,
236 .stride_y = stride_y
237 };
238 img.ptr += img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y;
239 return img;
240}
241
Anthony Barbier7ff47a32017-07-11 16:54:04 +0100242/** Wrap 3D tensor information into an image structure, and make the pointer point at this workitem's data.
243 *
244 * @param[in] ptr Pointer to the starting postion of the buffer
245 * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
246 * @param[in] stride_x Stride of the image in X dimension (in bytes)
247 * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
248 * @param[in] stride_y Stride of the image in Y dimension (in bytes)
249 * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
250 * @param[in] stride_z Stride of the image in Z dimension (in bytes)
251 * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes)
252 *
253 * @return A 3D tensor object
254 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100255inline Image update_image_from_tensor3D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
Anthony Barbier7ff47a32017-07-11 16:54:04 +0100256{
257 Image img =
258 {
259 .ptr = ptr,
260 .offset_first_element_in_bytes = offset_first_element_in_bytes,
261 .stride_x = stride_x,
262 .stride_y = stride_y
263 };
264 img.ptr += img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y + get_global_id(2) * step_z;
265 return img;
266}
267
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100268/** Wrap 3D tensor information into an tensor structure, and make the pointer point at this workitem's data.
269 *
270 * @param[in] ptr Pointer to the starting postion of the buffer
271 * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
272 * @param[in] stride_x Stride of the image in X dimension (in bytes)
273 * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
274 * @param[in] stride_y Stride of the image in Y dimension (in bytes)
275 * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
276 * @param[in] stride_z Stride of the image in Z dimension (in bytes)
277 * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes)
278 *
279 * @return A 3D tensor object
280 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100281inline Tensor3D update_tensor3D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100282{
283 Tensor3D tensor =
284 {
285 .ptr = ptr,
286 .offset_first_element_in_bytes = offset_first_element_in_bytes,
287 .stride_x = stride_x,
288 .stride_y = stride_y,
289 .stride_z = stride_z
290 };
291 tensor.ptr += tensor.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y + get_global_id(2) * step_z;
292 return tensor;
293}
294
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100295inline Tensor4D update_tensor4D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z, uint stride_w,
steniu01868e5412017-07-17 23:16:00 +0100296 uint step_w,
297 uint mod_size)
298{
299 Tensor4D tensor =
300 {
301 .ptr = ptr,
302 .offset_first_element_in_bytes = offset_first_element_in_bytes,
303 .stride_x = stride_x,
304 .stride_y = stride_y,
305 .stride_z = stride_z,
306 .stride_w = stride_w
307 };
308
309 tensor.ptr += tensor.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y + (get_global_id(2) % mod_size) * step_z + (get_global_id(2) / mod_size) * step_w;
310 return tensor;
311}
312
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100313/** Get the pointer position of a Vector
314 *
315 * @param[in] vec Pointer to the starting position of the buffer
316 * @param[in] x Relative X position
317 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100318inline __global const uchar *vector_offset(const Vector *vec, int x)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100319{
320 return vec->ptr + x * vec->stride_x;
321}
322
323/** Get the pointer position of a Image
324 *
325 * @param[in] img Pointer to the starting position of the buffer
326 * @param[in] x Relative X position
327 * @param[in] y Relative Y position
328 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100329inline __global uchar *offset(const Image *img, int x, int y)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100330{
331 return img->ptr + x * img->stride_x + y * img->stride_y;
332}
333
334/** Get the pointer position of a Tensor3D
335 *
Gian Marco Iodice3a623242017-07-25 10:25:53 +0100336 * @param[in] tensor Pointer to the starting position of the buffer
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100337 * @param[in] x Relative X position
338 * @param[in] y Relative Y position
339 * @param[in] z Relative Z position
340 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100341inline __global const uchar *tensor3D_offset(const Tensor3D *tensor, int x, int y, int z)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100342{
343 return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z;
344}
345
steniu01868e5412017-07-17 23:16:00 +0100346/** Get the pointer position of a Tensor4D
347 *
348 * @param[in] tensor Pointer to the starting position of the buffer
349 * @param[in] x Relative X position
350 * @param[in] y Relative Y position
351 * @param[in] z Relative Z position
352 * @param[in] w Relative W position
353 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100354inline __global const uchar *tensor4D_offset(const Tensor4D *tensor, int x, int y, int z, int w)
steniu01868e5412017-07-17 23:16:00 +0100355{
356 return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z + w * tensor->stride_w;
357}
358
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100359#endif // _HELPER_H