blob: f6795663dfcc8a00f8a71ee2b06a65947bd952a3 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2016-2020 Arm Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef ARM_COMPUTE_HELPER_H
25#define ARM_COMPUTE_HELPER_H
26
Georgios Pinitasdaa38552018-08-28 17:43:18 +010027#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010028#pragma OPENCL EXTENSION cl_khr_fp16 : enable
Georgios Pinitasdaa38552018-08-28 17:43:18 +010029#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16)
Matthew Bentham6f31f8c2017-10-27 11:50:06 +010030
Georgios Pinitasdaa38552018-08-28 17:43:18 +010031#if defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
Michalis Spyroue03342e2018-01-15 14:39:13 +000032#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : enable
Georgios Pinitasdaa38552018-08-28 17:43:18 +010033#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
Michalis Spyroue03342e2018-01-15 14:39:13 +000034
Georgios Pinitasdaa38552018-08-28 17:43:18 +010035#if defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && defined(cl_arm_integer_dot_product_accumulate_int8)
Giorgio Arenaeff8d952018-07-02 15:29:57 +010036#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : enable
Georgios Pinitasdaa38552018-08-28 17:43:18 +010037#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && defined(cl_arm_integer_dot_product_accumulate_int8)
Giorgio Arenaeff8d952018-07-02 15:29:57 +010038
Georgios Pinitasdaa38552018-08-28 17:43:18 +010039#if defined(ARM_COMPUTE_DEBUG_ENABLED) && defined(cl_arm_printf)
steniu01f01f9de2017-09-27 17:00:11 +010040#pragma OPENCL EXTENSION cl_arm_printf : enable
Georgios Pinitas238c97c2018-08-31 17:28:29 +010041#endif // defined(ARM_COMPUTE_DEBUG_ENABLED) && defined(cl_arm_printf)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010042
Usama Arife2428a02019-05-09 11:03:17 +010043#define GPU_ARCH_MIDGARD 0x100
44#define GPU_ARCH_BIFROST 0x200
45
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +000046/** Concatenate two inputs.
47 *
48 * @param[in] a The first input to be concatenated
49 * @param[in] b The second input to be concatenated
50 *
51 * @return The concatenated output
52 */
Gian Marco Iodice43a129e2019-05-14 10:14:08 +010053#define CONCAT(a, b) a##b
54
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +000055/** Expand the given vector
56 *
57 * @param[in] x The vector to be expanded
58 *
59 * @return The expanded output
60 */
Georgios Pinitase5f8fd62017-06-23 18:03:44 +010061#define EXPAND(x) x
62
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +000063/** Clamp the given value between an upper and lower bound.
64 *
65 * @param[in] x The value to be clamped
66 * @param[in] min_val The lower bound
67 * @param[in] max_val The upper bound
68 *
69 * @return The clamped value.
70 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +010071#define CLAMP(x, min_val, max_val) min(max(x, min_val), max_val)
72
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +000073/** REVn reverses the given vector whose size is n.
74 * @name REVn
75 *
76 * @param[in] x The vector to be reversed
77 *
78 * @return The reversed vector
79 * @{
80 */
Giorgio Arena5c4a8e92019-08-28 17:55:07 +010081#define REV1(x) ((x))
82#define REV2(x) ((x).s10)
83#define REV3(x) ((x).s210)
84#define REV4(x) ((x).s3210)
85#define REV8(x) ((x).s76543210)
86#define REV16(x) ((x).sFEDCBA9876543210)
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +000087/** @} */ // end of group REVn
Giorgio Arena5c4a8e92019-08-28 17:55:07 +010088
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +000089/** Reverse the given vector.
90 * @name REVERSE
91 *
92 * @param[in] x The vector to be reversed
93 * @param[in] s The size of the vector
94 *
95 * @return The reversed vector
96 * @{
97 */
Giorgio Arena5c4a8e92019-08-28 17:55:07 +010098#define REVERSE_STR(x, s) REV##s((x))
99#define REVERSE(x, s) REVERSE_STR(x, s)
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +0000100/** @} */ // end of group REVERSE
Giorgio Arena5c4a8e92019-08-28 17:55:07 +0100101
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +0000102/** Circular-right-shift (rotate-right) the vector of size s by the amount of n.
103 * @name ROTs_n
104 *
105 * @param[in] x The vector to be shifted
106 *
107 * @return The shifted vector
108 * @{
109 */
Giorgio Arena5c4a8e92019-08-28 17:55:07 +0100110#define ROT1_0(x) ((x))
111
112#define ROT2_0(x) ((x))
113#define ROT2_1(x) ((x).s10)
114
115#define ROT3_0(x) ((x))
116#define ROT3_1(x) ((x).s201)
117#define ROT3_2(x) ((x).s120)
118
119#define ROT4_0(x) ((x))
120#define ROT4_1(x) ((x).s3012)
121#define ROT4_2(x) ((x).s2301)
122#define ROT4_3(x) ((x).s1230)
123
124#define ROT8_0(x) ((x))
125#define ROT8_1(x) ((x).s70123456)
126#define ROT8_2(x) ((x).s67012345)
127#define ROT8_3(x) ((x).s56701234)
128#define ROT8_4(x) ((x).s45670123)
129#define ROT8_5(x) ((x).s34567012)
130#define ROT8_6(x) ((x).s23456701)
131#define ROT8_7(x) ((x).s12345670)
132
133#define ROT16_0(x) ((x))
134#define ROT16_1(x) ((x).sF0123456789ABCDE)
135#define ROT16_2(x) ((x).sEF0123456789ABCD)
136#define ROT16_3(x) ((x).sDEF0123456789ABC)
137#define ROT16_4(x) ((x).sCDEF0123456789AB)
138#define ROT16_5(x) ((x).sBCDEF0123456789A)
139#define ROT16_6(x) ((x).sABCDEF0123456789)
140#define ROT16_7(x) ((x).s9ABCDEF012345678)
141#define ROT16_8(x) ((x).s89ABCDEF01234567)
142#define ROT16_9(x) ((x).s789ABCDEF0123456)
143#define ROT16_10(x) ((x).s6789ABCDEF012345)
144#define ROT16_11(x) ((x).s56789ABCDEF01234)
145#define ROT16_12(x) ((x).s456789ABCDEF0123)
146#define ROT16_13(x) ((x).s3456789ABCDEF012)
147#define ROT16_14(x) ((x).s23456789ABCDEF01)
148#define ROT16_15(x) ((x).s123456789ABCDEF0)
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +0000149/** @} */ // end of group ROTs_n
Giorgio Arena5c4a8e92019-08-28 17:55:07 +0100150
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +0000151/** Circular-right-shift (rotate-right) the given vector by the given amount.
152 * @name ROTATE
153 *
154 * @param[in] x The vector to be shifted
155 * @param[in] s The size of the vector
156 * @param[in] n The amount to be shifted
157 *
158 * @return The shifted vector
159 * @{
160 */
Giorgio Arena5c4a8e92019-08-28 17:55:07 +0100161#define ROTATE_STR(x, s, n) ROT##s##_##n(x)
162#define ROTATE(x, s, n) ROTATE_STR(x, s, n)
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +0000163/** @} */ // end of group ROTATE
Giorgio Arena5c4a8e92019-08-28 17:55:07 +0100164
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +0000165/** Creates a vector of size n filled with offset values corresponding to the location of each element.
166 * @name V_OFFSn
167 *
168 * @param[in] dt The data type of the output vector
169 *
170 * @return The vector filled with offset values
171 * @{
172 */
Giorgio Arena5c4a8e92019-08-28 17:55:07 +0100173#define V_OFFS1(dt) (dt)(0)
174#define V_OFFS2(dt) (dt)(0, 1)
175#define V_OFFS3(dt) (dt)(0, 1, 3)
176#define V_OFFS4(dt) (dt)(0, 1, 2, 3)
177#define V_OFFS8(dt) (dt)(0, 1, 2, 3, 4, 5, 6, 7)
178#define V_OFFS16(dt) (dt)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +0000179/** @} */ // end of group V_OFFSn
Giorgio Arena5c4a8e92019-08-28 17:55:07 +0100180
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +0000181/** Create a vector filled with offset values corresponding to the location of each element.
182 * @name VEC_OFFS
183 *
184 * @param[in] dt The data type of the output vector
185 * @param[in] s The size of the output vector
186 *
187 * @return The vector filled with offset values
188 * @{
189 */
Giorgio Arena5c4a8e92019-08-28 17:55:07 +0100190#define VEC_OFFS_STR(dt, s) V_OFFS##s(dt)
191#define VEC_OFFS(dt, s) VEC_OFFS_STR(dt, s)
Sang-Hoon Parkbfd75d62019-10-30 14:56:17 +0000192/** @} */ // end of group VEC_OFFS
Giorgio Arena5c4a8e92019-08-28 17:55:07 +0100193
Georgios Pinitasac4e8732017-07-05 17:02:25 +0100194#define VLOAD_STR(size) vload##size
195#define VLOAD(size) VLOAD_STR(size)
196
Gian Marco Iodicee3a849a2020-06-10 17:59:30 +0100197#define PIXEL_UNIT4 1
198#define PIXEL_UNIT8 2
199#define PIXEL_UNIT16 4
200
201/** Utility macro to convert a vector size in pixel unit.
202 *
203 * @name CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT
204 *
205 * @param[in] vec_size Vector size. Only 4,8 and 16 is supported
206 *
207 * @return The pixel unit (number of pixels)
208 * @{
209 */
210#define CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT_STR(vec_size) PIXEL_UNIT##vec_size
211#define CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT(vec_size) CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT_STR(vec_size)
212/** @} */ // end of group CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT
213
214#define read_image2d_floatx1(img, x_coord, y_coord) (float4)(read_imagef(img, (int2)(x_coord, y_coord)));
215#define read_image2d_floatx2(img, x_coord, y_coord) (float8)(read_imagef(img, (int2)(x_coord, y_coord)), read_imagef(img, (int2)(x_coord + 1, y_coord)));
216#define read_image2d_floatx4(img, x_coord, y_coord) (float16)(read_imagef(img, (int2)(x_coord, y_coord)), read_imagef(img, (int2)(x_coord + 1, y_coord)), read_imagef(img, (int2)(x_coord + 2, y_coord)), read_imagef(img, (int2)(x_coord + 3, y_coord)));
217
218#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16)
219#define read_image2d_halfx1(img, x_coord, y_coord) (half4)(read_imageh(img, (int2)(x_coord, y_coord)));
220#define read_image2d_halfx2(img, x_coord, y_coord) (half8)(read_imageh(img, (int2)(x_coord, y_coord)), read_imageh(img, (int2)(x_coord + 1, y_coord)));
221#define read_image2d_halfx4(img, x_coord, y_coord) (half16)(read_imageh(img, (int2)(x_coord, y_coord)), read_imageh(img, (int2)(x_coord + 1, y_coord)), read_imageh(img, (int2)(x_coord + 2, y_coord)), read_imageh(img, (int2)(x_coord + 3, y_coord)));
222#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16)
223
224/** Utility macro to read a 2D OpenCL image object.
225 *
226 * @note Coordinates are not normalized
227 *
228 * @param[in] data_type Data type
229 * @param[in] n0 Number of pixel to read. Only 1,2 and 4 is supported
230 * @param[in] img OpenCL image object
231 * @param[in] x_coord The x coordinate for the top-left pixel
232 * @param[in] y_coord The y coordinate for the top-left pixel
233 *
234 * @return Pixels from the 2D OpenCL image object
235 * @{
236 */
237#define READ_IMAGE2D_STR(data_type, n0, img, x_coord, y_coord) read_image2d_##data_type##x##n0(img, x_coord, y_coord)
238#define READ_IMAGE2D(data_type, n0, img, x_coord, y_coord) READ_IMAGE2D_STR(data_type, n0, img, x_coord, y_coord)
239
Georgios Pinitasac4e8732017-07-05 17:02:25 +0100240#define VSTORE_STR(size) vstore##size
241#define VSTORE(size) VSTORE_STR(size)
242
Manuel Bottini0d0028c2018-10-02 16:41:52 +0100243#define float1 float
244#define half1 half
Usama Arif0681e3b2019-04-25 14:28:07 +0100245#define char1 char
246#define uchar1 uchar
247#define short1 short
248#define ushort1 ushort
249#define int1 int
250#define uint1 uint
251#define long1 long
252#define ulong1 ulong
253#define double1 double
254
255#define vload1(OFFSET, PTR) *(OFFSET + PTR)
Gian Marco Iodice9285adb2019-09-05 16:10:27 +0100256#define vstore1(DATA, OFFSET, PTR) *(OFFSET + PTR) = DATA
Manuel Bottini0d0028c2018-10-02 16:41:52 +0100257
Gian Marco Iodice0c17aa22019-09-27 09:23:15 +0100258// Convert built-in functions with _sat modifier are not supported in floating point so we create defines
259// without _sat to overcome this issue
260#define convert_float_sat convert_float
261#define convert_float1_sat convert_float
262#define convert_float2_sat convert_float2
263#define convert_float3_sat convert_float3
264#define convert_float4_sat convert_float4
265#define convert_float8_sat convert_float8
266#define convert_float16_sat convert_float16
267#define convert_half_sat convert_float
268#define convert_half1_sat convert_half
269#define convert_half2_sat convert_half2
270#define convert_half3_sat convert_half3
271#define convert_half4_sat convert_half4
272#define convert_half8_sat convert_half8
273#define convert_half16_sat convert_half16
274
Michele Di Giorgioa046e162019-10-08 09:36:26 +0100275#define convert_float1 convert_float
276#define convert_half1 convert_half
277#define convert_char1 convert_char
278#define convert_uchar1 convert_uchar
279#define convert_short1 convert_short
280#define convert_ushort1 convert_ushort
281#define convert_int1 convert_int
282#define convert_uint1 convert_uint
283#define convert_long1 convert_long
284#define convert_ulong1 convert_ulong
285#define convert_double1 convert_double
286
287#define convert_char1_sat convert_char_sat
288#define convert_uchar1_sat convert_uchar_sat
289#define convert_short1_sat convert_short_sat
290#define convert_ushort1_sat convert_ushort_sat
291#define convert_int1_sat convert_int_sat
292#define convert_uint1_sat convert_uint_sat
293#define convert_long1_sat convert_long_sat
294#define convert_ulong1_sat convert_ulong_sat
295#define convert_double1_sat convert_double_sat
296
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100297#define VEC_DATA_TYPE_STR(type, size) type##size
298#define VEC_DATA_TYPE(type, size) VEC_DATA_TYPE_STR(type, size)
299
Chunosovd6afedc2017-11-06 22:09:45 +0700300#define CL_VEC_DATA_TYPE_STR(type, size) type##size
301#define CL_VEC_DATA_TYPE(type, size) CL_VEC_DATA_TYPE_STR(type, size)
302
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100303#define CONVERT_STR(x, type) (convert_##type((x)))
304#define CONVERT(x, type) CONVERT_STR(x, type)
305
306#define CONVERT_SAT_STR(x, type) (convert_##type##_sat((x)))
307#define CONVERT_SAT(x, type) CONVERT_SAT_STR(x, type)
308
309#define CONVERT_SAT_ROUND_STR(x, type, round) (convert_##type##_sat_##round((x)))
310#define CONVERT_SAT_ROUND(x, type, round) CONVERT_SAT_ROUND_STR(x, type, round)
311
312#define VECTOR_DECLARATION(name) \
313 __global uchar *name##_ptr, \
314 uint name##_stride_x, \
315 uint name##_step_x, \
316 uint name##_offset_first_element_in_bytes
317
318#define IMAGE_DECLARATION(name) \
319 __global uchar *name##_ptr, \
320 uint name##_stride_x, \
321 uint name##_step_x, \
322 uint name##_stride_y, \
323 uint name##_step_y, \
324 uint name##_offset_first_element_in_bytes
325
326#define TENSOR3D_DECLARATION(name) \
327 __global uchar *name##_ptr, \
328 uint name##_stride_x, \
329 uint name##_step_x, \
330 uint name##_stride_y, \
331 uint name##_step_y, \
332 uint name##_stride_z, \
333 uint name##_step_z, \
334 uint name##_offset_first_element_in_bytes
335
steniu01868e5412017-07-17 23:16:00 +0100336#define TENSOR4D_DECLARATION(name) \
337 __global uchar *name##_ptr, \
338 uint name##_stride_x, \
339 uint name##_step_x, \
340 uint name##_stride_y, \
341 uint name##_step_y, \
342 uint name##_stride_z, \
343 uint name##_step_z, \
344 uint name##_stride_w, \
345 uint name##_step_w, \
346 uint name##_offset_first_element_in_bytes
347
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100348#define CONVERT_TO_VECTOR_STRUCT(name) \
349 update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x)
350
351#define CONVERT_TO_VECTOR_STRUCT_NO_STEP(name) \
352 update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0)
353
354#define CONVERT_TO_IMAGE_STRUCT(name) \
355 update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y)
356
357#define CONVERT_TO_IMAGE_STRUCT_NO_STEP(name) \
358 update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0)
359
steniu01868e5412017-07-17 23:16:00 +0100360#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
361 update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)
362
Anthony Barbier7ff47a32017-07-11 16:54:04 +0100363#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(name) \
364 update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, name##_step_z)
365
steniu010d523cc2017-07-13 14:24:23 +0100366#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
367 update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)
368
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100369#define CONVERT_TO_TENSOR3D_STRUCT(name) \
370 update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
371 name##_stride_z, name##_step_z)
372
373#define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name) \
374 update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, 0)
375
steniu01868e5412017-07-17 23:16:00 +0100376#define CONVERT_TO_TENSOR4D_STRUCT(name, mod_size) \
377 update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
Michalis Spyrou5237e012018-01-17 09:40:27 +0000378 name##_stride_z, name##_step_z, name##_stride_w, name##_step_w, mod_size)
steniu01868e5412017-07-17 23:16:00 +0100379
380#define CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(name, mod_size) \
381 update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, 0, name##_stride_w, 0, mod_size)
382
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100383/** Structure to hold Vector information */
384typedef struct Vector
385{
386 __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
387 int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
388 int stride_x; /**< Stride of the image in X dimension (in bytes) */
389} Vector;
390
391/** Structure to hold Image information */
392typedef struct Image
393{
394 __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
395 int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
396 int stride_x; /**< Stride of the image in X dimension (in bytes) */
397 int stride_y; /**< Stride of the image in Y dimension (in bytes) */
398} Image;
399
400/** Structure to hold 3D tensor information */
401typedef struct Tensor3D
402{
403 __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
404 int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
405 int stride_x; /**< Stride of the image in X dimension (in bytes) */
406 int stride_y; /**< Stride of the image in Y dimension (in bytes) */
407 int stride_z; /**< Stride of the image in Z dimension (in bytes) */
408} Tensor3D;
409
steniu01868e5412017-07-17 23:16:00 +0100410/** Structure to hold 4D tensor information */
411typedef struct Tensor4D
412{
413 __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
414 int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
415 int stride_x; /**< Stride of the image in X dimension (in bytes) */
416 int stride_y; /**< Stride of the image in Y dimension (in bytes) */
417 int stride_z; /**< Stride of the image in Z dimension (in bytes) */
418 int stride_w; /**< Stride of the image in W dimension (in bytes) */
419} Tensor4D;
420
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100421/** Wrap vector information into an Vector structure, and make the pointer point at this workitem's data.
422 *
423 * @param[in] ptr Pointer to the starting postion of the buffer
424 * @param[in] offset_first_element_in_bytes The offset of the first element in the source vector
425 * @param[in] stride_x Stride of the vector in X dimension (in bytes)
426 * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
427 *
428 * @return An image object
429 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100430inline Vector update_vector_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100431{
432 Vector vector =
433 {
434 .ptr = ptr,
435 .offset_first_element_in_bytes = offset_first_element_in_bytes,
436 .stride_x = stride_x,
437 };
438 vector.ptr += vector.offset_first_element_in_bytes + get_global_id(0) * step_x;
439 return vector;
440}
441
442/** Wrap image information into an Image structure, and make the pointer point at this workitem's data.
443 *
444 * @param[in] ptr Pointer to the starting postion of the buffer
445 * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
446 * @param[in] stride_x Stride of the image in X dimension (in bytes)
447 * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
448 * @param[in] stride_y Stride of the image in Y dimension (in bytes)
449 * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
450 *
451 * @return An image object
452 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100453inline Image update_image_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100454{
455 Image img =
456 {
457 .ptr = ptr,
458 .offset_first_element_in_bytes = offset_first_element_in_bytes,
459 .stride_x = stride_x,
460 .stride_y = stride_y
461 };
462 img.ptr += img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y;
463 return img;
464}
465
Anthony Barbier7ff47a32017-07-11 16:54:04 +0100466/** Wrap 3D tensor information into an image structure, and make the pointer point at this workitem's data.
467 *
468 * @param[in] ptr Pointer to the starting postion of the buffer
469 * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
470 * @param[in] stride_x Stride of the image in X dimension (in bytes)
471 * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
472 * @param[in] stride_y Stride of the image in Y dimension (in bytes)
473 * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
474 * @param[in] stride_z Stride of the image in Z dimension (in bytes)
475 * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes)
476 *
477 * @return A 3D tensor object
478 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100479inline Image update_image_from_tensor3D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
Anthony Barbier7ff47a32017-07-11 16:54:04 +0100480{
481 Image img =
482 {
483 .ptr = ptr,
484 .offset_first_element_in_bytes = offset_first_element_in_bytes,
485 .stride_x = stride_x,
486 .stride_y = stride_y
487 };
488 img.ptr += img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y + get_global_id(2) * step_z;
489 return img;
490}
491
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100492/** Wrap 3D tensor information into an tensor structure, and make the pointer point at this workitem's data.
493 *
494 * @param[in] ptr Pointer to the starting postion of the buffer
495 * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
496 * @param[in] stride_x Stride of the image in X dimension (in bytes)
497 * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
498 * @param[in] stride_y Stride of the image in Y dimension (in bytes)
499 * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
500 * @param[in] stride_z Stride of the image in Z dimension (in bytes)
501 * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes)
502 *
503 * @return A 3D tensor object
504 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100505inline Tensor3D update_tensor3D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100506{
507 Tensor3D tensor =
508 {
509 .ptr = ptr,
510 .offset_first_element_in_bytes = offset_first_element_in_bytes,
511 .stride_x = stride_x,
512 .stride_y = stride_y,
513 .stride_z = stride_z
514 };
515 tensor.ptr += tensor.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y + get_global_id(2) * step_z;
516 return tensor;
517}
518
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100519inline Tensor4D update_tensor4D_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z, uint stride_w,
steniu01868e5412017-07-17 23:16:00 +0100520 uint step_w,
521 uint mod_size)
522{
523 Tensor4D tensor =
524 {
525 .ptr = ptr,
526 .offset_first_element_in_bytes = offset_first_element_in_bytes,
527 .stride_x = stride_x,
528 .stride_y = stride_y,
529 .stride_z = stride_z,
530 .stride_w = stride_w
531 };
532
533 tensor.ptr += tensor.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y + (get_global_id(2) % mod_size) * step_z + (get_global_id(2) / mod_size) * step_w;
534 return tensor;
535}
536
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100537/** Get the pointer position of a Vector
538 *
539 * @param[in] vec Pointer to the starting position of the buffer
540 * @param[in] x Relative X position
541 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100542inline __global const uchar *vector_offset(const Vector *vec, int x)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100543{
544 return vec->ptr + x * vec->stride_x;
545}
546
547/** Get the pointer position of a Image
548 *
549 * @param[in] img Pointer to the starting position of the buffer
550 * @param[in] x Relative X position
551 * @param[in] y Relative Y position
552 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100553inline __global uchar *offset(const Image *img, int x, int y)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100554{
555 return img->ptr + x * img->stride_x + y * img->stride_y;
556}
557
558/** Get the pointer position of a Tensor3D
559 *
Gian Marco Iodice3a623242017-07-25 10:25:53 +0100560 * @param[in] tensor Pointer to the starting position of the buffer
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100561 * @param[in] x Relative X position
562 * @param[in] y Relative Y position
563 * @param[in] z Relative Z position
564 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100565inline __global const uchar *tensor3D_offset(const Tensor3D *tensor, int x, int y, int z)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100566{
567 return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z;
568}
569
steniu01868e5412017-07-17 23:16:00 +0100570/** Get the pointer position of a Tensor4D
571 *
572 * @param[in] tensor Pointer to the starting position of the buffer
573 * @param[in] x Relative X position
574 * @param[in] y Relative Y position
575 * @param[in] z Relative Z position
576 * @param[in] w Relative W position
577 */
Georgios Pinitasaf7f7402018-10-25 16:04:40 +0100578inline __global const uchar *tensor4D_offset(const Tensor4D *tensor, int x, int y, int z, int w)
steniu01868e5412017-07-17 23:16:00 +0100579{
580 return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z + w * tensor->stride_w;
581}
582
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100583#endif // _HELPER_H