COMPMID-2580: Fix out of bound read in Depthwise Convolution layer (OpenCL)

Change-Id: I00e39ed21cc30034aa10ac58b64d533e833eafc8
Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1727
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/core/CL/cl_kernels/depthwise_convolution.cl b/src/core/CL/cl_kernels/depthwise_convolution.cl
index c55a3d9..fb4a0fc 100644
--- a/src/core/CL/cl_kernels/depthwise_convolution.cl
+++ b/src/core/CL/cl_kernels/depthwise_convolution.cl
@@ -280,16 +280,17 @@
  * @return a float2 containing 2 convoluted values.
  */
 inline float2 convolution3x3(
-    Image      *src,
+    __global const uchar *src,
+    unsigned int          src_stride_y,
     const float mat0, const float mat1, const float mat2,
     const float mat3, const float mat4, const float mat5,
     const float mat6, const float mat7, const float mat8)
 {
     float2 pixels;
 
-    pixels = convolution1x3(offset(src, 0, 0), mat0, mat1, mat2);
-    pixels += convolution1x3(offset(src, 0, DILATION_Y), mat3, mat4, mat5);
-    pixels += convolution1x3(offset(src, 0, DILATION_Y * 2), mat6, mat7, mat8);
+    pixels = convolution1x3((src + 0 * DILATION_Y * src_stride_y), mat0, mat1, mat2);
+    pixels += convolution1x3((src + 1 * DILATION_Y * src_stride_y), mat3, mat4, mat5);
+    pixels += convolution1x3((src + 2 * DILATION_Y * src_stride_y), mat6, mat7, mat8);
 
     return pixels;
 }
@@ -341,27 +342,33 @@
     Image    src     = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(src);
     Image    dst     = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(dst);
     Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(weights);
-#if defined(HAS_BIAS)
-    Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases);
-#endif //defined(HAS_BIAS)
+
+    float2 pixels = 0.0f;
 
     // Extract channel and linearized batch indices
     const int channel = get_global_id(2) % DST_CHANNELS;
     const int batch   = get_global_id(2) / DST_CHANNELS;
     // Load relevant input and weights data (Accounts depth multiplier when indexing input, OFM = IFM * DEPTH_MULTIPLIER)
-    src.ptr -= batch * (DST_CHANNELS / DEPTH_MULTIPLIER) * (DEPTH_MULTIPLIER - 1) * src_step_z + (channel - (channel / DEPTH_MULTIPLIER)) * src_step_z;
+
     __global uchar *weights_addr = weights.ptr + get_global_id(0) * weights_step_x + get_global_id(1) * weights_step_y + channel * weights_step_z;
 
-    uchar3 offset          = (uchar3)(0, 1, 2) * (uchar3)weights_stride_y;
-    float3 weights_values0 = vload3(0, (__global float *)(weights_addr + offset.s0));
-    float3 weights_values1 = vload3(0, (__global float *)(weights_addr + offset.s1));
-    float3 weights_values2 = vload3(0, (__global float *)(weights_addr + offset.s2));
+    __global uchar *src_addr = src.ptr - batch * (DST_CHANNELS / DEPTH_MULTIPLIER) * (DEPTH_MULTIPLIER - 1) * src_step_z - (channel - (channel / DEPTH_MULTIPLIER)) * src_step_z;
 
-    float2 pixels = convolution3x3(&src, weights_values0.s0, weights_values0.s1, weights_values0.s2,
-                                   weights_values1.s0, weights_values1.s1, weights_values1.s2,
-                                   weights_values2.s0, weights_values2.s1, weights_values2.s2);
+    // Load the weights
+    float3 weights_values0 = vload3(0, (__global float *)(weights_addr + 0 * weights_stride_y));
+    float3 weights_values1 = vload3(0, (__global float *)(weights_addr + 1 * weights_stride_y));
+    float3 weights_values2 = vload3(0, (__global float *)(weights_addr + 2 * weights_stride_y));
+
+    pixels = convolution3x3(src_addr, src_stride_y,
+                            weights_values0.s0, weights_values0.s1, weights_values0.s2,
+                            weights_values1.s0, weights_values1.s1, weights_values1.s2,
+                            weights_values2.s0, weights_values2.s1, weights_values2.s2);
 #if defined(HAS_BIAS)
-    pixels += (float2)(*((__global float *)(biases.ptr + channel * biases_stride_x)));
+    Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases);
+
+    float bias = *((__global float *)(vector_offset(&biases, channel)));
+
+    pixels += (float2)bias;
 #endif //defined(HAS_BIAS)
 
     vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels, A_VAL, B_VAL), 0, (__global float *)dst.ptr);
@@ -1862,4 +1869,4 @@
 }
 
 #endif // defined(NUM_ROWS_PROCESSED) && defined(NUM_PLANES_PROCESSED)
-#endif // defined(VEC_SIZE) && defined(SRC_DIM_2) && defined(CONV_PAD_TOP) && defined(CONV_PAD_LEFT) && defined(DATA_TYPE)
+#endif // defined(VEC_SIZE) && defined(SRC_DIM_2) && defined(CONV_PAD_TOP) && defined(CONV_PAD_LEFT) && defined(DATA_TYPE)
\ No newline at end of file