blob: eaeaa6034d7f5111416fd3a5d776fec6db16417b [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Usama Arife2428a02019-05-09 11:03:17 +01002 * Copyright (c) 2016-2019 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef ARM_COMPUTE_HELPER_H
25#define ARM_COMPUTE_HELPER_H
26
Georgios Pinitasdaa38552018-08-28 17:43:18 +010027#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010028#pragma OPENCL EXTENSION cl_khr_fp16 : enable
Georgios Pinitasdaa38552018-08-28 17:43:18 +010029#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16)
Matthew Bentham6f31f8c2017-10-27 11:50:06 +010030
Georgios Pinitasdaa38552018-08-28 17:43:18 +010031#if defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
Michalis Spyroue03342e2018-01-15 14:39:13 +000032#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : enable
Georgios Pinitasdaa38552018-08-28 17:43:18 +010033#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
Michalis Spyroue03342e2018-01-15 14:39:13 +000034
Georgios Pinitasdaa38552018-08-28 17:43:18 +010035#if defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && defined(cl_arm_integer_dot_product_accumulate_int8)
Giorgio Arenaeff8d952018-07-02 15:29:57 +010036#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : enable
Georgios Pinitasdaa38552018-08-28 17:43:18 +010037#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && defined(cl_arm_integer_dot_product_accumulate_int8)
Giorgio Arenaeff8d952018-07-02 15:29:57 +010038
Georgios Pinitasdaa38552018-08-28 17:43:18 +010039#if defined(ARM_COMPUTE_DEBUG_ENABLED) && defined(cl_arm_printf)
steniu01f01f9de2017-09-27 17:00:11 +010040#pragma OPENCL EXTENSION cl_arm_printf : enable
Georgios Pinitas238c97c2018-08-31 17:28:29 +010041#endif // defined(ARM_COMPUTE_DEBUG_ENABLED) && defined(cl_arm_printf)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010042
Usama Arife2428a02019-05-09 11:03:17 +010043#define GPU_ARCH_MIDGARD 0x100
44#define GPU_ARCH_BIFROST 0x200
45
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +000046/** Concatenate two inputs.
47 *
48 * @param[in] a The first input to be concatenated
49 * @param[in] b The second input to be concatenated
50 *
51 * @return The concatenated output
52 */
Gian Marco Iodice43a129e2019-05-14 10:14:08 +010053#define CONCAT(a, b) a##b
54
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +000055/** Expand the given vector
56 *
57 * @param[in] x The vector to be expanded
58 *
59 * @return The expanded output
60 */
Georgios Pinitase5f8fd62017-06-23 18:03:44 +010061#define EXPAND(x) x
62
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +000063/** Clamp the given value between an upper and lower bound.
64 *
65 * @param[in] x The value to be clamped
66 * @param[in] min_val The lower bound
67 * @param[in] max_val The upper bound
68 *
69 * @return The clamped value.
70 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +010071#define CLAMP(x, min_val, max_val) min(max(x, min_val), max_val)
72
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +000073/** REVn reverses the given vector whose size is n.
74 * @name REVn
75 *
76 * @param[in] x The vector to be reversed
77 *
78 * @return The reversed vector
79 * @{
80 */
Giorgio Arena5c4a8e92019-08-28 17:55:07 +010081#define REV1(x) ((x))
82#define REV2(x) ((x).s10)
83#define REV3(x) ((x).s210)
84#define REV4(x) ((x).s3210)
85#define REV8(x) ((x).s76543210)
86#define REV16(x) ((x).sFEDCBA9876543210)
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +000087/** @} */ // end of group REVn
Giorgio Arena5c4a8e92019-08-28 17:55:07 +010088
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +000089/** Reverse the given vector.
90 * @name REVERSE
91 *
92 * @param[in] x The vector to be reversed
93 * @param[in] s The size of the vector
94 *
95 * @return The reversed vector
96 * @{
97 */
Giorgio Arena5c4a8e92019-08-28 17:55:07 +010098#define REVERSE_STR(x, s) REV##s((x))
99#define REVERSE(x, s) REVERSE_STR(x, s)
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +0000100/** @} */ // end of group REVERSE
Giorgio Arena5c4a8e92019-08-28 17:55:07 +0100101
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +0000102/** Circular-right-shift (rotate-right) the vector of size s by the amount of n.
103 * @name ROTs_n
104 *
105 * @param[in] x The vector to be shifted
106 *
107 * @return The shifted vector
108 * @{
109 */
Giorgio Arena5c4a8e92019-08-28 17:55:07 +0100110#define ROT1_0(x) ((x))
111
112#define ROT2_0(x) ((x))
113#define ROT2_1(x) ((x).s10)
114
115#define ROT3_0(x) ((x))
116#define ROT3_1(x) ((x).s201)
117#define ROT3_2(x) ((x).s120)
118
119#define ROT4_0(x) ((x))
120#define ROT4_1(x) ((x).s3012)
121#define ROT4_2(x) ((x).s2301)
122#define ROT4_3(x) ((x).s1230)
123
124#define ROT8_0(x) ((x))
125#define ROT8_1(x) ((x).s70123456)
126#define ROT8_2(x) ((x).s67012345)
127#define ROT8_3(x) ((x).s56701234)
128#define ROT8_4(x) ((x).s45670123)
129#define ROT8_5(x) ((x).s34567012)
130#define ROT8_6(x) ((x).s23456701)
131#define ROT8_7(x) ((x).s12345670)
132
133#define ROT16_0(x) ((x))
134#define ROT16_1(x) ((x).sF0123456789ABCDE)
135#define ROT16_2(x) ((x).sEF0123456789ABCD)
136#define ROT16_3(x) ((x).sDEF0123456789ABC)
137#define ROT16_4(x) ((x).sCDEF0123456789AB)
138#define ROT16_5(x) ((x).sBCDEF0123456789A)
139#define ROT16_6(x) ((x).sABCDEF0123456789)
140#define ROT16_7(x) ((x).s9ABCDEF012345678)
141#define ROT16_8(x) ((x).s89ABCDEF01234567)
142#define ROT16_9(x) ((x).s789ABCDEF0123456)
143#define ROT16_10(x) ((x).s6789ABCDEF012345)
144#define ROT16_11(x) ((x).s56789ABCDEF01234)
145#define ROT16_12(x) ((x).s456789ABCDEF0123)
146#define ROT16_13(x) ((x).s3456789ABCDEF012)
147#define ROT16_14(x) ((x).s23456789ABCDEF01)
148#define ROT16_15(x) ((x).s123456789ABCDEF0)
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +0000149/** @} */ // end of group ROTs_n
Giorgio Arena5c4a8e92019-08-28 17:55:07 +0100150
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +0000151/** Circular-right-shift (rotate-right) the given vector by the given amount.
152 * @name ROTATE
153 *
154 * @param[in] x The vector to be shifted
155 * @param[in] s The size of the vector
156 * @param[in] n The amount to be shifted
157 *
158 * @return The shifted vector
159 * @{
160 */
Giorgio Arena5c4a8e92019-08-28 17:55:07 +0100161#define ROTATE_STR(x, s, n) ROT##s##_##n(x)
162#define ROTATE(x, s, n) ROTATE_STR(x, s, n)
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +0000163/** @} */ // end of group ROTATE
Giorgio Arena5c4a8e92019-08-28 17:55:07 +0100164
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +0000165/** Creates a vector of size n filled with offset values corresponding to the location of each element.
166 * @name V_OFFSn
167 *
168 * @param[in] dt The data type of the output vector
169 *
170 * @return The vector filled with offset values
171 * @{
172 */
Giorgio Arena5c4a8e92019-08-28 17:55:07 +0100173#define V_OFFS1(dt) (dt)(0)
174#define V_OFFS2(dt) (dt)(0, 1)
175#define V_OFFS3(dt) (dt)(0, 1, 3)
176#define V_OFFS4(dt) (dt)(0, 1, 2, 3)
177#define V_OFFS8(dt) (dt)(0, 1, 2, 3, 4, 5, 6, 7)
178#define V_OFFS16(dt) (dt)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +0000179/** @} */ // end of group V_OFFSn
Giorgio Arena5c4a8e92019-08-28 17:55:07 +0100180
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +0000181/** Create a vector filled with offset values corresponding to the location of each element.
182 * @name VEC_OFFS
183 *
184 * @param[in] dt The data type of the output vector
185 * @param[in] s The size of the output vector
186 *
187 * @return The vector filled with offset values
188 * @{
189 */
Giorgio Arena5c4a8e92019-08-28 17:55:07 +0100190#define VEC_OFFS_STR(dt, s) V_OFFS##s(dt)
191#define VEC_OFFS(dt, s) VEC_OFFS_STR(dt, s)
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +0000192/** @} */ // end of group VEC_OFFS
Giorgio Arena5c4a8e92019-08-28 17:55:07 +0100193
Georgios Pinitasac4e8732017-07-05 17:02:25 +0100194#define VLOAD_STR(size) vload##size
195#define VLOAD(size) VLOAD_STR(size)
196
197#define VSTORE_STR(size) vstore##size
198#define VSTORE(size) VSTORE_STR(size)
199
Manuel Bottini0d0028c2018-10-02 16:41:52 +0100200#define float1 float
201#define half1 half
Usama Arif0681e3b2019-04-25 14:28:07 +0100202#define char1 char
203#define uchar1 uchar
204#define short1 short
205#define ushort1 ushort
206#define int1 int
207#define uint1 uint
208#define long1 long
209#define ulong1 ulong
210#define double1 double
211
212#define vload1(OFFSET, PTR) *(OFFSET + PTR)
Gian Marco Iodice9285adb2019-09-05 16:10:27 +0100213#define vstore1(DATA, OFFSET, PTR) *(OFFSET + PTR) = DATA
Manuel Bottini0d0028c2018-10-02 16:41:52 +0100214
Gian Marco Iodice0c17aa22019-09-27 09:23:15 +0100215// Convert built-in functions with _sat modifier are not supported in floating point so we create defines
216// without _sat to overcome this issue
217#define convert_float_sat convert_float
218#define convert_float1_sat convert_float
219#define convert_float2_sat convert_float2
220#define convert_float3_sat convert_float3
221#define convert_float4_sat convert_float4
222#define convert_float8_sat convert_float8
223#define convert_float16_sat convert_float16
224#define convert_half_sat convert_float
225#define convert_half1_sat convert_half
226#define convert_half2_sat convert_half2
227#define convert_half3_sat convert_half3
228#define convert_half4_sat convert_half4
229#define convert_half8_sat convert_half8
230#define convert_half16_sat convert_half16
231
Michele Di Giorgioa046e162019-10-08 09:36:26 +0100232#define convert_float1 convert_float
233#define convert_half1 convert_half
234#define convert_char1 convert_char
235#define convert_uchar1 convert_uchar
236#define convert_short1 convert_short
237#define convert_ushort1 convert_ushort
238#define convert_int1 convert_int
239#define convert_uint1 convert_uint
240#define convert_long1 convert_long
241#define convert_ulong1 convert_ulong
242#define convert_double1 convert_double
243
244#define convert_char1_sat convert_char_sat
245#define convert_uchar1_sat convert_uchar_sat
246#define convert_short1_sat convert_short_sat
247#define convert_ushort1_sat convert_ushort_sat
248#define convert_int1_sat convert_int_sat
249#define convert_uint1_sat convert_uint_sat
250#define convert_long1_sat convert_long_sat
251#define convert_ulong1_sat convert_ulong_sat
252#define convert_double1_sat convert_double_sat
253
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100254#define VEC_DATA_TYPE_STR(type, size) type##size
255#define VEC_DATA_TYPE(type, size) VEC_DATA_TYPE_STR(type, size)
256
Chunosovd6afedc2017-11-06 22:09:45 +0700257#define CL_VEC_DATA_TYPE_STR(type, size) type##size
258#define CL_VEC_DATA_TYPE(type, size) CL_VEC_DATA_TYPE_STR(type, size)
259
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100260#define CONVERT_STR(x, type) (convert_##type((x)))
261#define CONVERT(x, type) CONVERT_STR(x, type)
262
263#define CONVERT_SAT_STR(x, type) (convert_##type##_sat((x)))
264#define CONVERT_SAT(x, type) CONVERT_SAT_STR(x, type)
265
266#define CONVERT_SAT_ROUND_STR(x, type, round) (convert_##type##_sat_##round((x)))
267#define CONVERT_SAT_ROUND(x, type, round) CONVERT_SAT_ROUND_STR(x, type, round)
268
269#define VECTOR_DECLARATION(name) \
270 __global uchar *name##_ptr, \
271 uint name##_stride_x, \
272 uint name##_step_x, \
273 uint name##_offset_first_element_in_bytes
274
275#define IMAGE_DECLARATION(name) \
276 __global uchar *name##_ptr, \
277 uint name##_stride_x, \
278 uint name##_step_x, \
279 uint name##_stride_y, \
280 uint name##_step_y, \
281 uint name##_offset_first_element_in_bytes
282
283#define TENSOR3D_DECLARATION(name) \
284 __global uchar *name##_ptr, \
285 uint name##_stride_x, \
286 uint name##_step_x, \
287 uint name##_stride_y, \
288 uint name##_step_y, \
289 uint name##_stride_z, \
290 uint name##_step_z, \
291 uint name##_offset_first_element_in_bytes
292
steniu01868e5412017-07-17 23:16:00 +0100293#define TENSOR4D_DECLARATION(name) \
294 __global uchar *name##_ptr, \
295 uint name##_stride_x, \
296 uint name##_step_x, \
297 uint name##_stride_y, \
298 uint name##_step_y, \
299 uint name##_stride_z, \
300 uint name##_step_z, \
301 uint name##_stride_w, \
302 uint name##_step_w, \
303 uint name##_offset_first_element_in_bytes
304
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100305#define CONVERT_TO_VECTOR_STRUCT(name) \
306 update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x)
307
308#define CONVERT_TO_VECTOR_STRUCT_NO_STEP(name) \
309 update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0)
310
311#define CONVERT_TO_IMAGE_STRUCT(name) \
312 update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y)
313
314#define CONVERT_TO_IMAGE_STRUCT_NO_STEP(name) \
315 update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0)
316
steniu01868e5412017-07-17 23:16:00 +0100317#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
318 update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)
319
Anthony Barbier7ff47a32017-07-11 16:54:04 +0100320#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(name) \
321 update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, name##_step_z)
322
steniu010d523cc2017-07-13 14:24:23 +0100323#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
324 update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)
325
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100326#define CONVERT_TO_TENSOR3D_STRUCT(name) \
327 update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
328 name##_stride_z, name##_step_z)
329
330#define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name) \
331 update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, 0)
332
steniu01868e5412017-07-17 23:16:00 +0100333#define CONVERT_TO_TENSOR4D_STRUCT(name, mod_size) \
334 update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
Michalis Spyrou5237e012018-01-17 09:40:27 +0000335 name##_stride_z, name##_step_z, name##_stride_w, name##_step_w, mod_size)
steniu01868e5412017-07-17 23:16:00 +0100336
337#define CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(name, mod_size) \
338 update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, 0, name##_stride_w, 0, mod_size)
339
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100340/** Structure to hold Vector information */
341typedef struct Vector
342{
343 __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
344 int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
345 int stride_x; /**< Stride of the image in X dimension (in bytes) */
346} Vector;
347
348/** Structure to hold Image information */
349typedef struct Image
350{
351 __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
352 int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
353 int stride_x; /**< Stride of the image in X dimension (in bytes) */
354 int stride_y; /**< Stride of the image in Y dimension (in bytes) */
355} Image;
356
357/** Structure to hold 3D tensor information */
358typedef struct Tensor3D
359{
360 __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
361 int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
362 int stride_x; /**< Stride of the image in X dimension (in bytes) */
363 int stride_y; /**< Stride of the image in Y dimension (in bytes) */
364 int stride_z; /**< Stride of the image in Z dimension (in bytes) */
365} Tensor3D;
366
steniu01868e5412017-07-17 23:16:00 +0100367/** Structure to hold 4D tensor information */
368typedef struct Tensor4D
369{
370 __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
371 int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
372 int stride_x; /**< Stride of the image in X dimension (in bytes) */
373 int stride_y; /**< Stride of the image in Y dimension (in bytes) */
374 int stride_z; /**< Stride of the image in Z dimension (in bytes) */
375 int stride_w; /**< Stride of the image in W dimension (in bytes) */
376} Tensor4D;
377
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100378/** Wrap vector information into an Vector structure, and make the pointer point at this workitem's data.
379 *
380 * @param[in] ptr Pointer to the starting postion of the buffer
381 * @param[in] offset_first_element_in_bytes The offset of the first element in the source vector
382 * @param[in] stride_x Stride of the vector in X dimension (in bytes)
383 * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
384 *
385 * @return An image object
386 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100387inline Vector update_vector_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100388{
389 Vector vector =
390 {
391 .ptr = ptr,
392 .offset_first_element_in_bytes = offset_first_element_in_bytes,
393 .stride_x = stride_x,
394 };
395 vector.ptr += vector.offset_first_element_in_bytes + get_global_id(0) * step_x;
396 return vector;
397}
398
399/** Wrap image information into an Image structure, and make the pointer point at this workitem's data.
400 *
401 * @param[in] ptr Pointer to the starting postion of the buffer
402 * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
403 * @param[in] stride_x Stride of the image in X dimension (in bytes)
404 * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
405 * @param[in] stride_y Stride of the image in Y dimension (in bytes)
406 * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
407 *
408 * @return An image object
409 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100410inline Image update_image_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100411{
412 Image img =
413 {
414 .ptr = ptr,
415 .offset_first_element_in_bytes = offset_first_element_in_bytes,
416 .stride_x = stride_x,
417 .stride_y = stride_y
418 };
419 img.ptr += img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y;
420 return img;
421}
422
Anthony Barbier7ff47a32017-07-11 16:54:04 +0100423/** Wrap 3D tensor information into an image structure, and make the pointer point at this workitem's data.
424 *
425 * @param[in] ptr Pointer to the starting postion of the buffer
426 * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
427 * @param[in] stride_x Stride of the image in X dimension (in bytes)
428 * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
429 * @param[in] stride_y Stride of the image in Y dimension (in bytes)
430 * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
431 * @param[in] stride_z Stride of the image in Z dimension (in bytes)
432 * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes)
433 *
434 * @return A 3D tensor object
435 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100436inline Image update_image_from_tensor3D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
Anthony Barbier7ff47a32017-07-11 16:54:04 +0100437{
438 Image img =
439 {
440 .ptr = ptr,
441 .offset_first_element_in_bytes = offset_first_element_in_bytes,
442 .stride_x = stride_x,
443 .stride_y = stride_y
444 };
445 img.ptr += img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y + get_global_id(2) * step_z;
446 return img;
447}
448
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100449/** Wrap 3D tensor information into an tensor structure, and make the pointer point at this workitem's data.
450 *
451 * @param[in] ptr Pointer to the starting postion of the buffer
452 * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
453 * @param[in] stride_x Stride of the image in X dimension (in bytes)
454 * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
455 * @param[in] stride_y Stride of the image in Y dimension (in bytes)
456 * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
457 * @param[in] stride_z Stride of the image in Z dimension (in bytes)
458 * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes)
459 *
460 * @return A 3D tensor object
461 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100462inline Tensor3D update_tensor3D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100463{
464 Tensor3D tensor =
465 {
466 .ptr = ptr,
467 .offset_first_element_in_bytes = offset_first_element_in_bytes,
468 .stride_x = stride_x,
469 .stride_y = stride_y,
470 .stride_z = stride_z
471 };
472 tensor.ptr += tensor.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y + get_global_id(2) * step_z;
473 return tensor;
474}
475
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100476inline Tensor4D update_tensor4D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z, uint stride_w,
steniu01868e5412017-07-17 23:16:00 +0100477 uint step_w,
478 uint mod_size)
479{
480 Tensor4D tensor =
481 {
482 .ptr = ptr,
483 .offset_first_element_in_bytes = offset_first_element_in_bytes,
484 .stride_x = stride_x,
485 .stride_y = stride_y,
486 .stride_z = stride_z,
487 .stride_w = stride_w
488 };
489
490 tensor.ptr += tensor.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y + (get_global_id(2) % mod_size) * step_z + (get_global_id(2) / mod_size) * step_w;
491 return tensor;
492}
493
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100494/** Get the pointer position of a Vector
495 *
496 * @param[in] vec Pointer to the starting position of the buffer
497 * @param[in] x Relative X position
498 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100499inline __global const uchar *vector_offset(const Vector *vec, int x)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100500{
501 return vec->ptr + x * vec->stride_x;
502}
503
504/** Get the pointer position of a Image
505 *
506 * @param[in] img Pointer to the starting position of the buffer
507 * @param[in] x Relative X position
508 * @param[in] y Relative Y position
509 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100510inline __global uchar *offset(const Image *img, int x, int y)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100511{
512 return img->ptr + x * img->stride_x + y * img->stride_y;
513}
514
515/** Get the pointer position of a Tensor3D
516 *
Gian Marco Iodice3a623242017-07-25 10:25:53 +0100517 * @param[in] tensor Pointer to the starting position of the buffer
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100518 * @param[in] x Relative X position
519 * @param[in] y Relative Y position
520 * @param[in] z Relative Z position
521 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100522inline __global const uchar *tensor3D_offset(const Tensor3D *tensor, int x, int y, int z)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100523{
524 return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z;
525}
526
steniu01868e5412017-07-17 23:16:00 +0100527/** Get the pointer position of a Tensor4D
528 *
529 * @param[in] tensor Pointer to the starting position of the buffer
530 * @param[in] x Relative X position
531 * @param[in] y Relative Y position
532 * @param[in] z Relative Z position
533 * @param[in] w Relative W position
534 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100535inline __global const uchar *tensor4D_offset(const Tensor4D *tensor, int x, int y, int z, int w)
steniu01868e5412017-07-17 23:16:00 +0100536{
537 return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z + w * tensor->stride_w;
538}
539
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100540#endif // _HELPER_H