COMPMID-541: Fix padding in CLMinMaxLocationKernel Change-Id: Ie17e3f14c428553d433da2a564e016bfac7749a9 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/88881 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>

commit: 4726fdf9f7bb9638ffbb4e358cc788686b68a414 [log] [tgz]
author: Moritz Pflanzer <moritz.pflanzer@arm.com> Sat Sep 23 10:47:54 2017 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> Fri Nov 02 16:35:24 2018 +0000
tree: 5f578776ae671a54c91d11c8b76de687dc7d02c3
parent: 219c69108f72a0c01f0f14dda579fc0bce808d07 [diff] [blame]
diff --git a/src/core/CL/cl_kernels/minmaxloc.cl b/src/core/CL/cl_kernels/minmaxloc.cl
index 05fc78d..0f557a4 100644
--- a/src/core/CL/cl_kernels/minmaxloc.cl
+++ b/src/core/CL/cl_kernels/minmaxloc.cl

@@ -45,7 +45,7 @@
 
 __constant VEC_DATA_TYPE(DATA_TYPE, 16) type_min = (VEC_DATA_TYPE(DATA_TYPE, 16))(DATA_TYPE_MIN);
 __constant VEC_DATA_TYPE(DATA_TYPE, 16) type_max = (VEC_DATA_TYPE(DATA_TYPE, 16))(DATA_TYPE_MAX);
-__constant uint16 idx16 = (uint16)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+__constant int16 idx16 = (int16)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
 
 /** This function identifies the min and maximum value of an input image.
  *
@@ -65,7 +65,7 @@
 __kernel void minmax(
     IMAGE_DECLARATION(src),
     __global int *min_max,
-    uint          width)
+    int           width)
 {
     Image src = CONVERT_TO_IMAGE_STRUCT(src);
 
@@ -76,11 +76,11 @@
     local_max = type_min;
 
     // Calculate min/max of row
-    uint width4 = width >> 4;
-    for(uint i = 0; i < width4; i++)
+    int i = 0;
+    for(; i + 16 <= width; i += 16)
     {
         VEC_DATA_TYPE(DATA_TYPE, 16)
-        data      = vload16(0, (__global DATA_TYPE *)offset(&src, i << 4, 0));
+        data      = vload16(0, (__global DATA_TYPE *)offset(&src, i, 0));
         local_min = min(data, local_min);
         local_max = max(data, local_max);
     }
@@ -88,15 +88,15 @@
 #ifdef NON_MULTIPLE_OF_16
     // Handle non multiple of 16
     VEC_DATA_TYPE(DATA_TYPE, 16)
-    data = vload16(0, (__global DATA_TYPE *)offset(&src, width4 << 4, 0));
+    data = vload16(0, (__global DATA_TYPE *)offset(&src, i, 0));
 #ifdef IS_DATA_TYPE_FLOAT
-    int16 widx = convert_int16(((uint16)(width4 << 4) + idx16) < width);
+    int16 valid_indices = (i + idx16) < width;
 #else  /* IS_DATA_TYPE_FLOAT */
     VEC_DATA_TYPE(DATA_TYPE, 16)
-    widx = CONVERT(((uint16)(width4 << 4) + idx16) < width, VEC_DATA_TYPE(DATA_TYPE, 16));
+    valid_indices = CONVERT((i + idx16) < width, VEC_DATA_TYPE(DATA_TYPE, 16));
 #endif /* IS_DATA_TYPE_FLOAT */
-    local_max = max(local_max, select(type_min, data, widx));
-    local_min = min(local_min, select(type_max, data, widx));
+    local_max = max(local_max, select(type_min, data, valid_indices));
+    local_min = min(local_min, select(type_max, data, valid_indices));
 #endif /* NON_MULTIPLE_OF_16 */
 
     // Perform min/max reduction
commit	4726fdf9f7bb9638ffbb4e358cc788686b68a414	[log] [tgz]
author	Moritz Pflanzer <moritz.pflanzer@arm.com>	Sat Sep 23 10:47:54 2017 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	Fri Nov 02 16:35:24 2018 +0000
tree	5f578776ae671a54c91d11c8b76de687dc7d02c3
parent	219c69108f72a0c01f0f14dda579fc0bce808d07 [diff] [blame]