COMPMID-2243 ArgMinMaxLayer: support new datatypes

Change-Id: I846e833e0c94090cbbdcd6aee6061cea8295f4f9
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1131
Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/core/CL/cl_kernels/reduction_operation.cl b/src/core/CL/cl_kernels/reduction_operation.cl
index 2651123..749e3cd 100644
--- a/src/core/CL/cl_kernels/reduction_operation.cl
+++ b/src/core/CL/cl_kernels/reduction_operation.cl
@@ -23,6 +23,19 @@
  */
 #include "helpers.h"
 
+#if FLOAT_DATA_TYPE
+#define ISGREATER(x, y) isgreater(x, y)
+#define ISLESS(x, y) isless(x, y)
+#else // !FLOAT_DATA_TYPE
+#if defined(WIDTH)
+#define ISGREATER(x, y) (x > y) ? 1 : 0
+#define ISLESS(x, y) (x < y) ? 1 : 0
+#else // !defined(WIDTH)
+#define ISGREATER(x, y) select((int16)0, (int16)-1, x > y)
+#define ISLESS(x, y) select((int16)0, (int16)-1, x < y)
+#endif // defined(WIDTH)
+#endif // FLOAT_DATA_TYPE
+
 /** Calculate square sum of a vector
  *
  * @param[in] input Pointer to the first pixel.
@@ -124,9 +137,9 @@
             {
 #if defined(PROD)
                 local_results[lid] *= local_results[lid + i];
-#else  //!defined(PROD)
+#else  // !defined(PROD)
                 local_results[lid] += local_results[lid + i];
-#endif //defined(PROD)
+#endif // defined(PROD)
             }
             barrier(CLK_LOCAL_MEM_FENCE);
         }
@@ -138,7 +151,7 @@
             {
                 local_results[0] /= WIDTH;
             }
-#endif /* defined(MEAN) && defined(WIDTH) */
+#endif // defined(MEAN) && defined(WIDTH)
             ((__global DATA_TYPE *)offset(&partial_res, get_group_id(0), y))[0] = local_results[0];
         }
     }
@@ -153,7 +166,7 @@
  * @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used
  * @note In case of ARG_MIN and ARG_MAX the condition data type must be passed at compile time using -DCOND_DATA_TYPE e.g. -DCOND_DATA_TYPE=short
  *
- * @param[in] src_ptr                              Pointer to the source tensor. Supported data types: F16/F32 and QASYMM8 for operation MEAN
+ * @param[in] src_ptr                              Pointer to the source tensor. Supported data types: S32/F16/F32 and QASYMM8 for operation MEAN
  * @param[in] src_stride_x                         Stride of the source tensor in X dimension (in bytes)
  * @param[in] src_step_x                           src_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in] src_offset_first_element_in_bytes    The offset of the first element in the source tensor
@@ -179,11 +192,11 @@
     {
         DATA_TYPE_PROMOTED in = *((__global DATA_TYPE *)vector_offset(&src, x));
 #if defined(ARG_MAX)
-        indx = select(indx, x, isgreater(in, res));
-        res  = select(res, in, CONVERT(isgreater(in, res), COND_DATA_TYPE));
+        indx = select(indx, x, ISGREATER(in, res));
+        res  = select(res, in, CONVERT(ISGREATER(in, res), COND_DATA_TYPE));
 #elif defined(ARG_MIN)
-        indx = select(indx, x, isless(in, res));
-        res  = select(res, in, CONVERT(isless(in, res), COND_DATA_TYPE));
+        indx = select(indx, x, ISLESS(in, res));
+        res  = select(res, in, CONVERT(ISLESS(in, res), COND_DATA_TYPE));
 #else  // !(defined(ARG_MAX) || defined(ARG_MIN))
         res += in;
 #endif // defined(ARG_MAX) || defined(ARG_MIN)
@@ -199,7 +212,7 @@
     *((__global uchar *)output.ptr) = convert_uchar(res);
 #endif // defined(ARG_MAX) || defined(ARG_MIN)
 }
-#endif /* defined(WIDTH) */
+#endif // defined(WIDTH)
 
 #if defined(HEIGHT)
 /** This kernel performs reduction on y-axis.
@@ -207,7 +220,7 @@
  * @note The input data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
  * @note The height size must be passed at compile time using -DHEIGHT e.g. -DHEIGHT=128
  *
- * @param[in] src_ptr                              Pointer to the source tensor. Supported data types: QASYMM8/F16/F32
+ * @param[in] src_ptr                              Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
  * @param[in] src_stride_x                         Stride of the source tensor in X dimension (in bytes)
  * @param[in] src_step_x                           src_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in] src_stride_y                         Stride of the source tensor in Y dimension (in bytes)
@@ -243,22 +256,22 @@
         VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
         in = CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, y)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
 #if defined(ARG_MAX)
-        uint16 cond_conv = CONVERT(isgreater(in, res), uint16);
+        uint16 cond_conv = CONVERT(ISGREATER(in, res), uint16);
         indx             = select(indx, y, cond_conv);
-        res              = select(res, in, isgreater(in, res));
+        res              = select(res, in, ISGREATER(in, res));
 #elif defined(ARG_MIN)
-        uint16  cond_conv           = CONVERT(isless(in, res), uint16);
+        uint16  cond_conv           = CONVERT(ISLESS(in, res), uint16);
         indx                        = select(indx, y, cond_conv);
-        res                         = select(res, in, isless(in, res));
+        res                         = select(res, in, ISLESS(in, res));
 #else // !(defined(ARG_MAX) || defined(ARG_MIN))
 #if defined(SUM_SQUARE)
         in *= in;
 #endif // defined(SUM_SQUARE)
 #if defined(PROD)
         res *= in;
-#else  //!defined(PROD)
+#else  // !defined(PROD)
         res += in;
-#endif //defined(PROD)
+#endif // defined(PROD)
 #endif // defined(ARG_MAX) || defined(ARG_MIN)
     }
 
@@ -272,7 +285,7 @@
     vstore16(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
 #endif // defined(ARG_MAX) || defined(ARG_MIN)
 }
-#endif /* defined(HEIGHT) */
+#endif // defined(HEIGHT)
 
 #if defined(DEPTH)
 /** This kernel performs reduction on z-axis.
@@ -280,7 +293,7 @@
  * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
  * @note The depth size must be passed at compile time using -DDEPTH e.g. -DDEPTH=128
  *
- * @param[in] input_ptr                            Pointer to the source tensor. Supported data types: QASYMM8/F16/F32
+ * @param[in] input_ptr                            Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
  * @param[in] input_stride_x                       Stride of the source tensor in X dimension (in bytes)
  * @param[in] input_step_x                         input_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in] input_stride_y                       Stride of the source tensor in Y dimension (in bytes)
@@ -330,13 +343,13 @@
 #endif // defined(COMPLEX)
 
 #if defined(ARG_MAX)
-        uint16 cond_conv = CONVERT(isgreater(in, res), uint16);
+        uint16 cond_conv = CONVERT(ISGREATER(in, res), uint16);
         indx             = select(indx, z, cond_conv);
-        res              = select(res, in, isgreater(in, res));
+        res              = select(res, in, ISGREATER(in, res));
 #elif defined(ARG_MIN)
-        uint16 cond_conv = CONVERT(isless(in, res), uint16);
+        uint16 cond_conv = CONVERT(ISLESS(in, res), uint16);
         indx             = select(indx, z, cond_conv);
-        res              = select(res, in, isless(in, res));
+        res              = select(res, in, ISLESS(in, res));
 #else // !(defined(ARG_MAX) || defined(ARG_MIN))
 #if defined(SUM_SQUARE)
         in *= in;
@@ -374,7 +387,7 @@
  * @note The batch size must be passed at compile time using -DBATCH e.g. -DBATCH=128
  * @note The depth size must be passed at compile time using -DBATCH e.g. -DDEPTH=128
  *
- * @param[in] input_ptr                            Pointer to the source tensor. Supported data types: QASYMM8/F16/F32
+ * @param[in] input_ptr                            Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
  * @param[in] input_stride_x                       Stride of the source tensor in X dimension (in bytes)
  * @param[in] input_step_x                         input_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in] input_stride_y                       Stride of the source tensor in Y dimension (in bytes)
@@ -419,13 +432,13 @@
         in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, w)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
 
 #if defined(ARG_MAX)
-        uint16 cond_conv = CONVERT(isgreater(in, res), uint16);
+        uint16 cond_conv = CONVERT(ISGREATER(in, res), uint16);
         indx             = select(indx, w, cond_conv);
-        res              = select(res, in, isgreater(in, res));
+        res              = select(res, in, ISGREATER(in, res));
 #elif defined(ARG_MIN)
-        uint16 cond_conv = CONVERT(isless(in, res), uint16);
+        uint16 cond_conv = CONVERT(ISLESS(in, res), uint16);
         indx             = select(indx, w, cond_conv);
-        res              = select(res, in, isless(in, res));
+        res              = select(res, in, ISLESS(in, res));
 #else // !(defined(ARG_MAX) || defined(ARG_MIN))
 #if defined(SUM_SQUARE)
         in *= in;