COMPMID-3703 Remove OpenCL padding: CLActivationLayerKernel + create utility macro

Change-Id: I73edadc7299247e7bc51ae37c00d3709023da44a
Signed-off-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4073
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/core/CL/cl_kernels/activation_layer.cl b/src/core/CL/cl_kernels/activation_layer.cl
index f846cb2..499378c 100644
--- a/src/core/CL/cl_kernels/activation_layer.cl
+++ b/src/core/CL/cl_kernels/activation_layer.cl
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -61,23 +61,24 @@
 #endif /* not IN_PLACE */
 )
 {
+    uint x_offs = max((int)(get_global_id(0) * VEC_SIZE * sizeof(DATA_TYPE) - (VEC_SIZE - VEC_SIZE_LEFTOVER) % VEC_SIZE * sizeof(DATA_TYPE)), 0);
+
     // Get pixels pointer
-    Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
+    __global uchar *input_addr = input_ptr + input_offset_first_element_in_bytes + x_offs + get_global_id(1) * input_stride_y + get_global_id(2) * input_stride_z;
 #ifdef IN_PLACE
-    Tensor3D output = input;
+    __global uchar *output_addr = input_addr;
 #else  /* IN_PLACE */
-    Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
+    __global uchar *output_addr = output_ptr + output_offset_first_element_in_bytes + x_offs + get_global_id(1) * output_stride_y + get_global_id(2) * output_stride_z;
 #endif /* IN_PLACE */
 
     // Load data
-    TYPE data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr);
+    TYPE data0 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input_addr);
 
     // Perform activation
-    data = ACTIVATION(ACT, DATA_TYPE, data, A_VAL, B_VAL);
+    data0 = ACTIVATION(ACT, DATA_TYPE, data0, A_VAL, B_VAL);
 
     // Store result
-    VSTORE(VEC_SIZE)
-    (data, 0, (__global DATA_TYPE *)output.ptr);
+    STORE_VECTOR_SELECT(data, DATA_TYPE, output_addr, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0)
 }
 
 #endif /* defined(ACT) */