APPBROWSER-372: Rewrite the direct_convolution5x5.cs with the new common code
Change-Id: Ie2f398d62dea97e9201f77d22c9f0796db297b63
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/115280
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Zhenglin Li <zhenglin.li@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/src/core/GLES_COMPUTE/cs_shaders/helpers_cs.h b/src/core/GLES_COMPUTE/cs_shaders/helpers_cs.h
index fffc87d..dd9e1a3 100755
--- a/src/core/GLES_COMPUTE/cs_shaders/helpers_cs.h
+++ b/src/core/GLES_COMPUTE/cs_shaders/helpers_cs.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -326,6 +326,23 @@
#define VLOAD4_CURRENT_ITEM(return_type, tensor_ptr, tensor_iter) VLOAD4(return_type, tensor_ptr, CURRENT_ITEM_OFFSET(tensor_iter))
#define VSTORE4_CURRENT_ITEM(tensor_ptr, tensor_iter, data) VSTORE4(tensor_ptr, CURRENT_ITEM_OFFSET(tensor_iter), data)
+#define VLOAD5(return_type, tensor_ptr, offset) \
+ return_type(LOAD(tensor_ptr, offset), \
+ LOAD(tensor_ptr, (offset) + uint(1)), \
+ LOAD(tensor_ptr, (offset) + uint(2)), \
+ LOAD(tensor_ptr, (offset) + uint(3)), \
+ LOAD(tensor_ptr, (offset) + uint(4)))
+
+#define VSTORE5(tensor_ptr, offset, data) \
+ STORE(tensor_ptr, offset, data[0]); \
+ STORE(tensor_ptr, (offset) + uint(1), data[1]); \
+ STORE(tensor_ptr, (offset) + uint(2), data[2]); \
+ STORE(tensor_ptr, (offset) + uint(3), data[3]); \
+ STORE(tensor_ptr, (offset) + uint(4), data[4])
+
+#define VLOAD5_CURRENT_ITEM(return_type, tensor_ptr, tensor_iter) VLOAD5(return_type, tensor_ptr, CURRENT_ITEM_OFFSET(tensor_iter))
+#define VSTORE5_CURRENT_ITEM(tensor_ptr, tensor_iter, data) VSTORE5(tensor_ptr, CURRENT_ITEM_OFFSET(tensor_iter), data)
+
/** Converting the vec4 object to 4 half-precision (16-bits) floating point values and packing into a uvec2 object
*
* @param[in] data The vec4 object to be packed
@@ -348,6 +365,19 @@
return vec4(unpackHalf2x16(packed_data.x), unpackHalf2x16(packed_data.y));
}
+/** Unpacking the uvec3 object to 6 half-precision (16-bits) floating point values and converting to a vec2[3] object
+ *
+ * @param[in] packed_data The uvec3 object to be unpacked
+ *
+ * @return The unpacked vec2[3] object
+ */
+mediump vec2[3] unpack6_half(highp uvec3 packed_data)
+{
+ return vec2[3](unpackHalf2x16(packed_data[0]),
+ unpackHalf2x16(packed_data[1]),
+ unpackHalf2x16(packed_data[2]));
+}
+
/** Converting the vec4[2] object to 8 half-precision (16-bits) floating point values and packing into a uvec4 object
*
* @param[in] data The vec4[2] object to be packed
@@ -396,6 +426,9 @@
#define VLOAD2_UNPACK4_CURRENT_ITEM_HALF(tensor_ptr, tensor_iter) VLOAD2_UNPACK4_HALF(tensor_ptr, CURRENT_ITEM_OFFSET(tensor_iter))
#define VSTORE2_PACK4_CURRENT_ITEM_HALF(tensor_ptr, tensor_iter, data) VSTORE2_PACK4_HALF(tensor_ptr, CURRENT_ITEM_OFFSET(tensor_iter), data)
+#define VLOAD3_UNPACK6_HALF(tensor_ptr, offset) unpack6_half(VLOAD3(uvec3, tensor_ptr, offset))
+#define VLOAD3_UNPACK6_CURRENT_ITEM_HALF(tensor_ptr, tensor_iter) VLOAD3_UNPACK6_HALF(tensor_ptr, CURRENT_ITEM_OFFSET(tensor_iter))
+
#define VLOAD4_UNPACK8_HALF(tensor_ptr, offset) unpack8_half(VLOAD4(uvec4, tensor_ptr, offset))
#define VSTORE4_PACK8_HALF(tensor_ptr, offset, data) VSTORE4(tensor_ptr, offset, pack8_half(data))
#define VLOAD4_UNPACK8_CURRENT_ITEM_HALF(tensor_ptr, tensor_iter) VLOAD4_UNPACK8_HALF(tensor_ptr, CURRENT_ITEM_OFFSET(tensor_iter))