COMPMID-3735 Remove OpenCL padding: CLSoftmaxLayerKernel - Renamed SELECT_DATA_TYPE to SELECT_VEC_DATA_TYPE to reflect its usage with vectors. SELECT_DATA_TYPE(dt) will now return the primitive data type - Changed the interface of VEC_OFFS and V_OFFS in order to receive the primitive data type as a parameter rather than its vector form - Performed a general cleanup of the kernels, such as creating macro for sum and max reduces, remove reduntant macros, defines, variables, calculations, etc... - Using VEC_SIZE and VEC_SIZE_LEFTOVER in every kernel in order to allow computation for smaller shapes without adding paddings - Removed the actual padding from the kernel and adjusting its calculations accordingly. Added asserts for padding removal checks. Removed invalid Validate tests. Change-Id: If5ccbd5d34e255d38c7f6bfe8740e2b80b28e264 Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4277 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: SiCong Li <sicong.li@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>

commit: 2d1a835b68eb27a800838fc2b563b12eddf2c19f [log] [tgz]
author: Giorgio Arena <giorgio.arena@arm.com> Mon Oct 26 15:04:08 2020 +0000
committer: Giorgio Arena <giorgio.arena@arm.com> Thu Nov 12 12:42:51 2020 +0000
tree: 228dee073d37d2ec5b5dfbdb3d0e1e512ecb2d22
parent: 00c7601b1f9c3bec1d3b1db844abb513b9012541 [diff] [blame]
diff --git a/src/core/CL/cl_kernels/depthwise_convolution.cl b/src/core/CL/cl_kernels/depthwise_convolution.cl
index 5aba206..81fa01a 100644
--- a/src/core/CL/cl_kernels/depthwise_convolution.cl
+++ b/src/core/CL/cl_kernels/depthwise_convolution.cl

@@ -1476,17 +1476,17 @@
 
 #define VEC_FLOAT VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
 
-#define FILL_ZERO_OUT_OF_BOUND_3(data_type, vec_size, basename, cond)                                                                 \
-    ({                                                                                                                                \
-        basename##0 = select(basename##0, (VEC_DATA_TYPE(data_type, vec_size))0, (SELECT_DATA_TYPE(data_type, vec_size))((cond).s0)); \
-        basename##1 = select(basename##1, (VEC_DATA_TYPE(data_type, vec_size))0, (SELECT_DATA_TYPE(data_type, vec_size))((cond).s1)); \
-        basename##2 = select(basename##2, (VEC_DATA_TYPE(data_type, vec_size))0, (SELECT_DATA_TYPE(data_type, vec_size))((cond).s2)); \
+#define FILL_ZERO_OUT_OF_BOUND_3(data_type, vec_size, basename, cond)                                                                     \
+    ({                                                                                                                                    \
+        basename##0 = select(basename##0, (VEC_DATA_TYPE(data_type, vec_size))0, (SELECT_VEC_DATA_TYPE(data_type, vec_size))((cond).s0)); \
+        basename##1 = select(basename##1, (VEC_DATA_TYPE(data_type, vec_size))0, (SELECT_VEC_DATA_TYPE(data_type, vec_size))((cond).s1)); \
+        basename##2 = select(basename##2, (VEC_DATA_TYPE(data_type, vec_size))0, (SELECT_VEC_DATA_TYPE(data_type, vec_size))((cond).s2)); \
     })
 
-#define FILL_ZERO_OUT_OF_BOUND_4(data_type, vec_size, basename, cond)                                                                 \
-    ({                                                                                                                                \
-        FILL_ZERO_OUT_OF_BOUND_3(data_type, vec_size, basename, cond);                                                                \
-        basename##3 = select(basename##3, (VEC_DATA_TYPE(data_type, vec_size))0, (SELECT_DATA_TYPE(data_type, vec_size))((cond).s3)); \
+#define FILL_ZERO_OUT_OF_BOUND_4(data_type, vec_size, basename, cond)                                                                     \
+    ({                                                                                                                                    \
+        FILL_ZERO_OUT_OF_BOUND_3(data_type, vec_size, basename, cond);                                                                    \
+        basename##3 = select(basename##3, (VEC_DATA_TYPE(data_type, vec_size))0, (SELECT_VEC_DATA_TYPE(data_type, vec_size))((cond).s3)); \
     })
 
 #if defined(CONV_STRIDE_X) && defined(CONV_STRIDE_Y)
@@ -1728,8 +1728,8 @@
     __global uchar *src_addr = src_ptr + src_offset_first_element_in_bytes + x_offset;
 #endif /* defined(DST_DEPTH) */
 
-    int4 src_coord_y = (int4)(y * NUM_ROWS_PROCESSED - CONV_PAD_LEFT) + V_OFFS4(int4);
-    int4 src_coord_z = (int4)(z * NUM_PLANES_PROCESSED - CONV_PAD_TOP) + V_OFFS4(int4);
+    int4 src_coord_y = (int4)(y * NUM_ROWS_PROCESSED - CONV_PAD_LEFT) + V_OFFS4(int);
+    int4 src_coord_z = (int4)(z * NUM_PLANES_PROCESSED - CONV_PAD_TOP) + V_OFFS4(int);
 
     int4 src_offset_y = clamp(src_coord_y, (int4)0, (int4)(SRC_DIM_1 - 1));
     int4 src_offset_z = clamp(src_coord_z, (int4)0, (int4)(SRC_DIM_2 - 1));
@@ -1844,7 +1844,7 @@
     acc3 += bias_values;
 #endif // defined(HAS_BIAS)
 
-    int2 dst_offset_y = min((int2)(y * NUM_ROWS_PROCESSED) + V_OFFS2(int2), (int2)(DST_DIM_1 - 1)) * (int2)dst_stride_y;
+    int2 dst_offset_y = min((int2)(y * NUM_ROWS_PROCESSED) + V_OFFS2(int), (int2)(DST_DIM_1 - 1)) * (int2)dst_stride_y;
     int  dst_coord_z  = z * NUM_PLANES_PROCESSED;
 
 #if defined(DST_DEPTH)
commit	2d1a835b68eb27a800838fc2b563b12eddf2c19f	[log] [tgz]
author	Giorgio Arena <giorgio.arena@arm.com>	Mon Oct 26 15:04:08 2020 +0000
committer	Giorgio Arena <giorgio.arena@arm.com>	Thu Nov 12 12:42:51 2020 +0000
tree	228dee073d37d2ec5b5dfbdb3d0e1e512ecb2d22
parent	00c7601b1f9c3bec1d3b1db844abb513b9012541 [diff] [blame]