COMPMID-3045: CTS failures in ARGMAX/MIN

Change-Id: I35276a3d95dc99a7f4dea00e89c8ed206a5f13f1
Signed-off-by: Manuel Bottini <manuel.bottini@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2669
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
diff --git a/src/core/CL/cl_kernels/arg_min_max.cl b/src/core/CL/cl_kernels/arg_min_max.cl
index 06dcc8d..104d30d 100644
--- a/src/core/CL/cl_kernels/arg_min_max.cl
+++ b/src/core/CL/cl_kernels/arg_min_max.cl
@@ -254,10 +254,15 @@
 
         barrier(CLK_LOCAL_MEM_FENCE);
 
+        // Looking for the next highest power of 2 (maximum value of lsize is 8)
+        unsigned int middle = lsize - 1;
+        middle |= middle >> 1;
+        middle |= middle >> 2;
+        middle += 1;
         // Perform parallel reduction
-        for(unsigned int i = lsize >> 1; i > 0; i >>= 1)
+        for(unsigned int i = middle; i > 0; i >>= 1)
         {
-            if(lid < i)
+            if( lid < i && lid + i < lsize)
             {
                 DATA_TYPE tmp0 = *(src_in_row + local_results[lid]);
                 DATA_TYPE tmp1 = *(src_in_row + local_results[lid + i]);