COMPMID-809: Add NHWC data format on CLGEMMConvolutionLayer.

Change-Id: I50e4f5e7d47e21c300f754bee2c216863075b5cf
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/136191
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
diff --git a/src/core/CL/cl_kernels/im2col.cl b/src/core/CL/cl_kernels/im2col.cl
index c60c9a9..6f25ad4 100644
--- a/src/core/CL/cl_kernels/im2col.cl
+++ b/src/core/CL/cl_kernels/im2col.cl
@@ -136,6 +136,7 @@
  * @note The pad_left, pad_right, pad_top and pad_bottom must be passed at compile time using -DPAD_LEFT, -DPAD_RIGHT, -DPAD_TOP and -DPAD_BOTTOM: e.g. -DPAD_LEFT=1, -DPAD_RIGHT=2, -DPAD_TOP=3 and -DPAD_BOTTOM=2
  * @note The zero value to store in case we load values out-of-bounds must be passed at compile time using -DPAD_VALUE: e.g. -DPAD_VALUE=0.0
  * @note The stride along the X and Y directions must be passed at compile time using -DSTRIDE_X and -DSTRIDE_Y: e.g. -DSTRIDE_X=1 and -DSTRIDE_Y=1
+ * @note The dilation_x and dilation_y must be passed at compile time using -DDILATION_X and -DDILATION_Y: e.g. -DDILATION_X=1, -DDILATION_Y=1
  * @note In case biases will be added to the convolution -DHAS_BIAS has to be passed to append the final matrix with 1 in each row.
  *
  * @param[in]  src_ptr                           Pointer to the source tensor. Supported data types: QS8/QASYMM8/QS16/F16/F32
@@ -182,16 +183,18 @@
 
     for(int yk = 0; yk < KERNEL_HEIGHT; ++yk)
     {
-        const int y0 = yi + yk;
+        const int dilated_offset_y = yk * DILATION_Y;
+        const int y0               = yi + dilated_offset_y;
         if(y0 >= 0 && y0 < SRC_HEIGHT)
         {
             int xk;
             for(xk = 0; xk < KERNEL_WIDTH; xk++)
             {
-                const int x0 = xi + xk;
+                const int dilated_offset_x = xk * DILATION_X;
+                const int x0               = xi + dilated_offset_x;
                 if(x0 >= 0 && x0 < SRC_WIDTH)
                 {
-                    *((__global DATA_TYPE *)output_ptr) = PTR_TO_VALUE(input_ptr + xk * src_stride_y + yk * src_stride_z, DATA_TYPE);
+                    *((__global DATA_TYPE *)output_ptr) = PTR_TO_VALUE(input_ptr + dilated_offset_x * src_stride_y + dilated_offset_y * src_stride_z, DATA_TYPE);
                 }
                 else
                 {