APPBROWSER-372: Rewrite the direct_convolution5x5.cs with the new common code

Change-Id: Ie2f398d62dea97e9201f77d22c9f0796db297b63
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/115280
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Zhenglin Li <zhenglin.li@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/src/core/GLES_COMPUTE/cs_shaders/helpers_cs.h b/src/core/GLES_COMPUTE/cs_shaders/helpers_cs.h
index fffc87d..dd9e1a3 100755
--- a/src/core/GLES_COMPUTE/cs_shaders/helpers_cs.h
+++ b/src/core/GLES_COMPUTE/cs_shaders/helpers_cs.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -326,6 +326,23 @@
 #define VLOAD4_CURRENT_ITEM(return_type, tensor_ptr, tensor_iter) VLOAD4(return_type, tensor_ptr, CURRENT_ITEM_OFFSET(tensor_iter))
 #define VSTORE4_CURRENT_ITEM(tensor_ptr, tensor_iter, data) VSTORE4(tensor_ptr, CURRENT_ITEM_OFFSET(tensor_iter), data)
 
+#define VLOAD5(return_type, tensor_ptr, offset)       \
+    return_type(LOAD(tensor_ptr, offset),             \
+                LOAD(tensor_ptr, (offset) + uint(1)), \
+                LOAD(tensor_ptr, (offset) + uint(2)), \
+                LOAD(tensor_ptr, (offset) + uint(3)), \
+                LOAD(tensor_ptr, (offset) + uint(4)))
+
+#define VSTORE5(tensor_ptr, offset, data)           \
+    STORE(tensor_ptr, offset, data[0]);             \
+    STORE(tensor_ptr, (offset) + uint(1), data[1]); \
+    STORE(tensor_ptr, (offset) + uint(2), data[2]); \
+    STORE(tensor_ptr, (offset) + uint(3), data[3]); \
+    STORE(tensor_ptr, (offset) + uint(4), data[4])
+
+#define VLOAD5_CURRENT_ITEM(return_type, tensor_ptr, tensor_iter) VLOAD5(return_type, tensor_ptr, CURRENT_ITEM_OFFSET(tensor_iter))
+#define VSTORE5_CURRENT_ITEM(tensor_ptr, tensor_iter, data) VSTORE5(tensor_ptr, CURRENT_ITEM_OFFSET(tensor_iter), data)
+
 /** Converting the vec4 object to 4 half-precision (16-bits) floating point values and packing into a uvec2 object
  *
  * @param[in] data The vec4 object to be packed
@@ -348,6 +365,19 @@
     return vec4(unpackHalf2x16(packed_data.x), unpackHalf2x16(packed_data.y));
 }
 
+/** Unpacking the uvec3 object to 6 half-precision (16-bits) floating point values and converting to a vec2[3] object
+ *
+ * @param[in] packed_data The uvec3 object to be unpacked
+ *
+ * @return The unpacked vec2[3] object
+ */
+mediump vec2[3] unpack6_half(highp uvec3 packed_data)
+{
+    return vec2[3](unpackHalf2x16(packed_data[0]),
+                   unpackHalf2x16(packed_data[1]),
+                   unpackHalf2x16(packed_data[2]));
+}
+
 /** Converting the vec4[2] object to 8 half-precision (16-bits) floating point values and packing into a uvec4 object
  *
  * @param[in] data The vec4[2] object to be packed
@@ -396,6 +426,9 @@
 #define VLOAD2_UNPACK4_CURRENT_ITEM_HALF(tensor_ptr, tensor_iter) VLOAD2_UNPACK4_HALF(tensor_ptr, CURRENT_ITEM_OFFSET(tensor_iter))
 #define VSTORE2_PACK4_CURRENT_ITEM_HALF(tensor_ptr, tensor_iter, data) VSTORE2_PACK4_HALF(tensor_ptr, CURRENT_ITEM_OFFSET(tensor_iter), data)
 
+#define VLOAD3_UNPACK6_HALF(tensor_ptr, offset) unpack6_half(VLOAD3(uvec3, tensor_ptr, offset))
+#define VLOAD3_UNPACK6_CURRENT_ITEM_HALF(tensor_ptr, tensor_iter) VLOAD3_UNPACK6_HALF(tensor_ptr, CURRENT_ITEM_OFFSET(tensor_iter))
+
 #define VLOAD4_UNPACK8_HALF(tensor_ptr, offset) unpack8_half(VLOAD4(uvec4, tensor_ptr, offset))
 #define VSTORE4_PACK8_HALF(tensor_ptr, offset, data) VSTORE4(tensor_ptr, offset, pack8_half(data))
 #define VLOAD4_UNPACK8_CURRENT_ITEM_HALF(tensor_ptr, tensor_iter) VLOAD4_UNPACK8_HALF(tensor_ptr, CURRENT_ITEM_OFFSET(tensor_iter))