blob: ba27eec71625e56e1f3366b80a85a49b2add0cfe [file] [log] [blame]
/*
* Copyright (c) 2017 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef ARM_COMPUTE_HELPER_H
#define ARM_COMPUTE_HELPER_H
#define CLAMP(x, min_val, max_val) min(max(x, min_val), max_val)
#define VEC_DATA_TYPE_STR(type, size) type##size
#define VEC_DATA_TYPE(type, size) VEC_DATA_TYPE_STR(type, size)
#define CONVERT(x, type) type(x)
#define PACK(value, stype, dtype) \
pack_##stype##_##dtype(value)
#define UNPACK(value, stype, dtype) \
unpack_##stype##_##dtype(value)
#define BUFFER_DECLARATION(name, location, type, access) \
layout(std430, binding = location) access buffer name##Buffer \
{ \
type name##_ptr[]; \
}
#define VECTOR_PARAM_DECLARATION(name) \
uint name##_stride_x; \
uint name##_step_x; \
uint name##_offset_first_element_in_bytes; \
uint name##_buffer_data_type_size
#define IMAGE_PARAM_DECLARATION(name) \
uint name##_stride_x; \
uint name##_step_x; \
uint name##_stride_y; \
uint name##_step_y; \
uint name##_offset_first_element_in_bytes; \
uint name##_buffer_data_type_size; \
uint name##_padding1; \
uint name##_padding2
#define TENSOR3D_PARAM_DECLARATION(name) \
uint name##_stride_x; \
uint name##_step_x; \
uint name##_stride_y; \
uint name##_step_y; \
uint name##_stride_z; \
uint name##_step_z; \
uint name##_offset_first_element_in_bytes; \
uint name##_buffer_data_type_size
/** Structure to hold Vector information */
struct Vector
{
uint current_offset; /**< Current offset of vector */
uint offset_first_element_in_bytes; /**< The offset of the first element in the source image */
uint stride_x; /**< Stride of the image in X dimension (in bytes) */
};
/** Structure to hold Image information */
struct Image
{
uint current_offset; /**< Current offset of image */
uint offset_first_element_in_bytes; /**< The offset of the first element in the source image */
uint stride_x; /**< Stride of the image in X dimension (in bytes) */
uint stride_y; /**< Stride of the image in Y dimension (in bytes) */
};
/** Structure to hold 3D tensor information */
struct Tensor3D
{
uint current_offset; /**< Current offset of tensor */
uint offset_first_element_in_bytes; /**< The offset of the first element in the source image */
uint stride_x; /**< Stride of the image in X dimension (in bytes) */
uint stride_y; /**< Stride of the image in Y dimension (in bytes) */
uint stride_z; /**< Stride of the image in Z dimension (in bytes) */
};
/////////////////////////////////////////////////////////////
// TODO: old to be removed
#define CONVERT_TO_VECTOR_STRUCT(name) \
update_vector_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x)
#define CONVERT_TO_VECTOR_STRUCT_FP16(name) \
update_vector_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x)
#define CONVERT_TO_VECTOR_STRUCT_NO_STEP(name) \
update_vector_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0))
#define CONVERT_TO_VECTOR_STRUCT_NO_STEP_FP16(name) \
update_vector_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, uint(0))
#define CONVERT_TO_IMAGE_STRUCT(name) \
update_image_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y)
#define CONVERT_TO_IMAGE_STRUCT_FP16(name) \
update_image_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y)
#define CONVERT_TO_IMAGE_STRUCT_NO_STEP(name) \
update_image_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0))
#define CONVERT_TO_IMAGE_STRUCT_NO_STEP_FP16(name) \
update_image_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0))
#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(name) \
update_image_from_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, name##_step_z)
#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP_FP16(name) \
update_image_from_tensor3D_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, name##_step_z)
#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
update_image_from_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)
#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_FP16(name) \
update_image_from_tensor3D_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)
#define CONVERT_TO_TENSOR3D_STRUCT(name) \
update_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
name##_stride_z, name##_step_z)
#define CONVERT_TO_TENSOR3D_STRUCT_FP16(name) \
update_tensor3D_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
name##_stride_z, name##_step_z)
#define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name) \
update_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, uint(0))
#define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP_FP16(name) \
update_tensor3D_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, uint(0))
// FIXME: Redesign the macros if different data types are supported.
#define LOAD4(name, offset) \
name##_ptr[offset]
#define STORE4(name, offset, value) \
name##_ptr[offset] = value
// Load 1 element, which size is determined by ssbo type.
#define LOAD1(r, name, offset) \
r = name##_ptr[offset]
#define STORE1(name, offset, value) \
name##_ptr[offset] = value
#define LOAD2(r, name, offset) \
LOAD1(r[0], name, offset); \
LOAD1(r[1], name, (offset) + uint(1))
#define STORE2(name, offset, value) \
name##_ptr[offset] = value[0]; \
name##_ptr[(offset) + uint(1)] = value[1]
#define LOAD3(r, name, offset) \
LOAD1(r[0], name, offset); \
LOAD1(r[1], name, (offset) + uint(1)); \
LOAD1(r[2], name, (offset) + uint(2))
#define CURRENT_OFFSET(name) \
name.current_offset
/** Wrap vector information into an Vector structure, and make the offset to be this workitem's position.
*
* @param[in] offset_first_element_in_bytes The offset of the first element in the source vector
* @param[in] stride_x Stride of the vector in X dimension (in bytes)
* @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
*
* @return An vector object
*/
Vector update_vector_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x)
{
Vector vector;
vector.offset_first_element_in_bytes = offset_first_element_in_bytes;
vector.stride_x = stride_x;
vector.current_offset = (vector.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x) >> 2;
return vector;
}
/** Wrap vector information into an Vector structure, and make the offset to be this workitem's position.
*
* @param[in] offset_first_element_in_bytes The offset of the first element in the source vector
* @param[in] stride_x Stride of the vector in X dimension (in bytes)
* @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
*
* @return An vector object
*/
Vector update_vector_workitem_offset_fp16(uint offset_first_element_in_bytes, uint stride_x, uint step_x)
{
Vector vector;
vector.offset_first_element_in_bytes = offset_first_element_in_bytes;
vector.stride_x = stride_x;
vector.current_offset = vector.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x;
return vector;
}
/** Wrap image information into an Image structure, and make the offset to be this workitem's position.
*
* @param[in] offset_first_element_in_bytes The offset of the first element in the source image
* @param[in] stride_x Stride of the image in X dimension (in bytes)
* @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] stride_y Stride of the image in Y dimension (in bytes)
* @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
*
* @return An image object
*/
Image update_image_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y)
{
Image img;
img.offset_first_element_in_bytes = offset_first_element_in_bytes;
img.stride_x = stride_x;
img.stride_y = stride_y;
img.current_offset = (img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y) >> 2;
return img;
}
/** Wrap image information into an Image structure, and make the offset to be this workitem's position.
*
* @param[in] offset_first_element_in_bytes The offset of the first element in the source image
* @param[in] stride_x Stride of the image in X dimension (in bytes)
* @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] stride_y Stride of the image in Y dimension (in bytes)
* @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
*
* @return An image object
*/
Image update_image_workitem_offset_fp16(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y)
{
Image img;
img.offset_first_element_in_bytes = offset_first_element_in_bytes;
img.stride_x = stride_x;
img.stride_y = stride_y;
img.current_offset = img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y;
return img;
}
/** Wrap 3D tensor information into an image structure, and make the offset to be this workitem's position.
*
* @param[in] offset_first_element_in_bytes The offset of the first element in the source image
* @param[in] stride_x Stride of the image in X dimension (in bytes)
* @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] stride_y Stride of the image in Y dimension (in bytes)
* @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] stride_z Stride of the image in Z dimension (in bytes)
* @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes)
*
* @return A 2D Image object
*/
Image update_image_from_tensor3D_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
{
Image img;
img.offset_first_element_in_bytes = offset_first_element_in_bytes;
img.stride_x = stride_x;
img.stride_y = stride_y;
img.current_offset = (img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z) >> 2;
return img;
}
/** Wrap 3D tensor information into an image structure, and make the offset to be this workitem's position.
*
* @param[in] offset_first_element_in_bytes The offset of the first element in the source image
* @param[in] stride_x Stride of the image in X dimension (in bytes)
* @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] stride_y Stride of the image in Y dimension (in bytes)
* @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] stride_z Stride of the image in Z dimension (in bytes)
* @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes)
*
* @return A 2D Image object
*/
Image update_image_from_tensor3D_workitem_offset_fp16(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
{
Image img;
img.offset_first_element_in_bytes = offset_first_element_in_bytes;
img.stride_x = stride_x;
img.stride_y = stride_y;
img.current_offset = img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z;
return img;
}
/** Wrap 3D tensor information into an tensor structure, and make the offset to be this workitem's position.
*
* @param[in] offset_first_element_in_bytes The offset of the first element in the source image
* @param[in] stride_x Stride of the image in X dimension (in bytes)
* @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] stride_y Stride of the image in Y dimension (in bytes)
* @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] stride_z Stride of the image in Z dimension (in bytes)
* @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes)
*
* @return A 3D tensor object
*/
Tensor3D update_tensor3D_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
{
Tensor3D tensor;
tensor.offset_first_element_in_bytes = offset_first_element_in_bytes;
tensor.stride_x = stride_x;
tensor.stride_y = stride_y;
tensor.stride_z = stride_z;
tensor.current_offset = (tensor.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z) >> 2;
return tensor;
}
/** Wrap 3D tensor information into an tensor structure, and make the offset to be this workitem's position.
*
* @param[in] offset_first_element_in_bytes The offset of the first element in the source image
* @param[in] stride_x Stride of the image in X dimension (in bytes)
* @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] stride_y Stride of the image in Y dimension (in bytes)
* @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] stride_z Stride of the image in Z dimension (in bytes)
* @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes)
*
* @return A 3D tensor object
*/
Tensor3D update_tensor3D_workitem_offset_fp16(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
{
Tensor3D tensor;
tensor.offset_first_element_in_bytes = offset_first_element_in_bytes;
tensor.stride_x = stride_x;
tensor.stride_y = stride_y;
tensor.stride_z = stride_z;
tensor.current_offset = tensor.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z;
return tensor;
}
/** Get the pointer position of a Vector
*
* @param[in] vec Pointer to the starting position of the buffer
* @param[in] x Relative X position
*/
uint vector_offset(Vector vec, int x)
{
return CONVERT(CONVERT(vec.current_offset << 2, int) + x * CONVERT(vec.stride_x, int), uint) >> 2;
}
/** Get the pointer position of a Vector
*
* @param[in] vec Pointer to the starting position of the buffer
* @param[in] x Relative X position
*/
uint vector_offset_fp16(Vector vec, int x)
{
return CONVERT(CONVERT(vec.current_offset, int) + x * CONVERT(vec.stride_x, int), uint);
}
/** Get the pointer position of a Image
*
* @param[in] img Pointer to the starting position of the buffer
* @param[in] x Relative X position
* @param[in] y Relative Y position
*/
uint offset(Image img, int x, int y)
{
return CONVERT(CONVERT(img.current_offset << 2, int) + x * CONVERT(img.stride_x, int) + y * CONVERT(img.stride_y, int), uint) >> 2;
}
/** Get the pointer position of a Image
*
* @param[in] img Pointer to the starting position of the buffer
* @param[in] x Relative X position
* @param[in] y Relative Y position
*/
uint offset_fp16(Image img, int x, int y)
{
return CONVERT(CONVERT(img.current_offset, int) + x * CONVERT(img.stride_x, int) + y * CONVERT(img.stride_y, int), uint);
}
/** Get the pointer position of a Tensor3D
*
* @param[in] tensor Pointer to the starting postion of the buffer
* @param[in] x Relative X position
* @param[in] y Relative Y position
* @param[in] z Relative Z position
*/
uint tensor3D_offset(Tensor3D tensor, int x, int y, int z)
{
return CONVERT(CONVERT(tensor.current_offset << 2, int) + x * CONVERT(tensor.stride_x, int) + y * CONVERT(tensor.stride_y, int) + z * CONVERT(tensor.stride_z, int), uint) >> 2;
}
/** Get the pointer position of a Tensor3D
*
* @param[in] tensor Pointer to the starting postion of the buffer
* @param[in] x Relative X position
* @param[in] y Relative Y position
* @param[in] z Relative Z position
*/
uint tensor3D_offset_fp16(Tensor3D tensor, int x, int y, int z)
{
return CONVERT(CONVERT(tensor.current_offset, int) + x * CONVERT(tensor.stride_x, int) + y * CONVERT(tensor.stride_y, int) + z * CONVERT(tensor.stride_z, int), uint);
}
/////////////////////////////////////////////////////////////
// new one
#define GC_CONVERT_TO_VECTOR_STRUCT(name) \
gc_update_vector_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x)
#define GC_CONVERT_TO_VECTOR_STRUCT_NO_STEP(name) \
gc_update_vector_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0))
#define GC_CONVERT_TO_IMAGE_STRUCT(name) \
gc_update_image_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y)
#define GC_CONVERT_TO_IMAGE_STRUCT_NO_STEP(name) \
gc_update_image_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0))
#define GC_CONVERT_TO_TENSOR3D_STRUCT(name) \
gc_update_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
name##_stride_z, name##_step_z)
#define GC_CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name) \
gc_update_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, uint(0))
#define GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
gc_update_image_from_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)
#define GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(name) \
gc_update_image_from_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, name##_step_z)
Vector gc_update_vector_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x)
{
Vector vector;
vector.offset_first_element_in_bytes = offset_first_element_in_bytes;
vector.stride_x = stride_x;
vector.current_offset = vector.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x;
return vector;
}
Image gc_update_image_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y)
{
Image img;
img.offset_first_element_in_bytes = offset_first_element_in_bytes;
img.stride_x = stride_x;
img.stride_y = stride_y;
img.current_offset = img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y;
return img;
}
Tensor3D gc_update_tensor3D_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
{
Tensor3D tensor;
tensor.offset_first_element_in_bytes = offset_first_element_in_bytes;
tensor.stride_x = stride_x;
tensor.stride_y = stride_y;
tensor.stride_z = stride_z;
tensor.current_offset = tensor.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z;
return tensor;
}
Image gc_update_image_from_tensor3D_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
{
Image img;
img.offset_first_element_in_bytes = offset_first_element_in_bytes;
img.stride_x = stride_x;
img.stride_y = stride_y;
img.current_offset = img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z;
return img;
}
#define GC_CURRENT_OFFSET(name) \
name.current_offset
uint gc_vector_offset(Vector vec, int x)
{
return CONVERT(CONVERT(vec.current_offset, int) + x * CONVERT(vec.stride_x, int), uint);
}
uint gc_image_offset(Image img, int x, int y)
{
return CONVERT(CONVERT(img.current_offset, int) + x * CONVERT(img.stride_x, int) + y * CONVERT(img.stride_y, int), uint);
}
uint gc_tensor3D_offset(Tensor3D tensor, int x, int y, int z)
{
return CONVERT(CONVERT(tensor.current_offset, int) + x * CONVERT(tensor.stride_x, int) + y * CONVERT(tensor.stride_y, int) + z * CONVERT(tensor.stride_z, int), uint);
}
// load/store number of element depends on buffer type
#define GC_LOAD1(r, name, offset) \
r = name##_ptr[offset]
#define GC_LOAD2(r, name, offset) \
GC_LOAD1(r[0], name, offset); \
GC_LOAD1(r[1], name, (offset) + uint(1))
#define GC_LOAD3(r, name, offset) \
GC_LOAD1(r[0], name, offset); \
GC_LOAD1(r[1], name, (offset) + uint(1)); \
GC_LOAD1(r[2], name, (offset) + uint(2))
#define GC_STORE1(value, name, offset) \
name##_ptr[offset] = value
#define GC_STORE2(value, name, offset) \
GC_STORE1(value[0], name, offset); \
GC_STORE1(value[1], name, (offset) + uint(1))
#define GC_STORE3(value, name, offset) \
GC_STORE1(value[0], name, offset); \
GC_STORE1(value[1], name, (offset) + uint(1)); \
GC_STORE1(value[2], name, (offset) + uint(2))
// has to manually expand them since not supported by compiler
#define GC_LOAD1_1D_OFFSET(r, name, x) \
GC_LOAD1(r, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size)
#define GC_LOAD1_2D_OFFSET(r, name, x, y) \
GC_LOAD1(r, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size)
#define GC_LOAD1_3D_OFFSET(r, name, x, y, z) \
GC_LOAD1(r, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size)
#define GC_STORE1_1D_OFFSET(value, name, x) \
GC_STORE1(value, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size)
#define GC_STORE1_2D_OFFSET(value, name, x, y) \
GC_STORE1(value, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size)
#define GC_STORE1_3D_OFFSET(value, name, x, y, z) \
GC_STORE1(value, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size)
#define GC_LOAD2_1D_OFFSET(r, name, x) \
GC_LOAD2(r, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size)
#define GC_LOAD2_2D_OFFSET(r, name, x, y) \
GC_LOAD2(r, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size)
#define GC_LOAD2_3D_OFFSET(r, name, x, y, z) \
GC_LOAD2(r, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size)
#define GC_STORE2_1D_OFFSET(value, name, x) \
GC_STORE2(value, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size)
#define GC_STORE2_2D_OFFSET(value, name, x, y) \
GC_STORE2(value, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size)
#define GC_STORE2_3D_OFFSET(value, name, x, y, z) \
GC_STORE2(value, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size)
#define GC_LOAD3_1D_OFFSET(r, name, x) \
GC_LOAD3(r, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size)
#define GC_LOAD3_2D_OFFSET(r, name, x, y) \
GC_LOAD3(r, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size)
#define GC_LOAD3_3D_OFFSET(r, name, x, y, z) \
GC_LOAD3(r, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size)
/////////////////////////////////////////////////////////////
#endif // _HELPER_H