[Nightly Failure] Fix DeconvolutionLayer OpenCL kernel compilation

- Add case for VEC_SIZE == 3 in the TRANSPOSED_U macro

Resolves: COMPMID-4094

Change-Id: I31870e589e66d895f9bf65c87aa04f32038365c0
Signed-off-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4864
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/core/CL/cl_kernels/transpose.cl b/src/core/CL/cl_kernels/transpose.cl
index 832572b..82db290 100644
--- a/src/core/CL/cl_kernels/transpose.cl
+++ b/src/core/CL/cl_kernels/transpose.cl
@@ -40,6 +40,11 @@
     {                     \
         u0, u1            \
     }
+#elif VEC_SIZE_Y == 3
+#define TRANSPOSED_U(val) \
+    {                     \
+        u0, u1, u2        \
+    }
 #elif VEC_SIZE_Y == 4
 #define TRANSPOSED_U(val) \
     {                     \
@@ -68,6 +73,11 @@
     {                     \
         u0.val, u1.val    \
     }
+#elif VEC_SIZE_Y == 3
+#define TRANSPOSED_U(val)      \
+    {                          \
+        u0.val, u1.val, u2.val \
+    }
 #elif VEC_SIZE_Y == 4
 #define TRANSPOSED_U(val)              \
     {                                  \
@@ -142,9 +152,11 @@
 #if VEC_SIZE_Y > 2
     VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_X)
     u2 = VLOAD(VEC_SIZE_X)(0, (__global DATA_TYPE *)(src_addr + 2 * src_stride_y));
+#endif /* VEC_SIZE_Y > 2 */
+#if VEC_SIZE_Y > 3
     VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_X)
     u3 = VLOAD(VEC_SIZE_X)(0, (__global DATA_TYPE *)(src_addr + 3 * src_stride_y));
-#endif /* VEC_SIZE_Y > 2 */
+#endif /* VEC_SIZE_Y > 3 */
 #if VEC_SIZE_Y > 4
     VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_X)
     u4 = VLOAD(VEC_SIZE_X)(0, (__global DATA_TYPE *)(src_addr + 4 * src_stride_y));