COMPMID-2671 use Signed32 for default output data type of ArgMinMax

Signed32 is used as data types before and after reshaping of ArgMinMax.

Change-Id: I230af43a931d4e106de6c72f716ced1dab511084
Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2262
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
diff --git a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h b/arm_compute/core/CL/kernels/CLReductionOperationKernel.h
index 18a1bab..172ed89 100644
--- a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h
+++ b/arm_compute/core/CL/kernels/CLReductionOperationKernel.h
@@ -33,10 +33,10 @@
 
 /** Interface for the reduction operation kernel
  *
- * @note For ARG_MIN/ARG_MAX reduction, the indices are computed in unsigned
- *       32-bit (U32). It is the user's responsibility to check that the
- *       results do not overflow in case the output data type is set to signed
- *       32-bit integer (S32).
+ * @note For ARG_MIN/ARG_MAX reduction, the default data type for an uninitialized
+ *       output tensor is signed 32-bit integer (S32). It is the user's responsibility
+ *       to check that the results do not overflow because the indices are computed
+ *       in unsigned 32-bit (U32).
  */
 class CLReductionOperationKernel : public ICLKernel
 {
diff --git a/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h b/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h
index 4b28b8d..0d402fb 100644
--- a/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h
+++ b/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h
@@ -32,10 +32,10 @@
 
 /** NEON kernel to perform a reduction operation
  *
- * @note For ARG_MIN/ARG_MAX reduction, the indices are computed in unsigned
- *       32-bit (U32). It is the user's responsibility to check that the
- *       results do not overflow in case the output data type is set to signed
- *       32-bit integer (S32).
+ * @note For ARG_MIN/ARG_MAX reduction, the default data type for an uninitialized
+ *       output tensor is signed 32-bit integer (S32). It is the user's responsibility
+ *       to check that the results do not overflow because the indices are computed
+ *       in unsigned 32-bit (U32).
  */
 class NEReductionOperationKernel : public INEKernel
 {
diff --git a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h
index 28feee0..1b465a4 100644
--- a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h
+++ b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h
@@ -38,9 +38,10 @@
 /** Function to calculate the index of the minimum or maximum values in a
  *  tensor based on an axis.
  *
- * @note The indices are computed in unsigned 32-bit (U32). It is the user's
- *       responsibility to check that the results do not overflow in case the
- *       output data type is set to signed 32-bit integer (S32).
+ * @note The default data type for an uninitialized output tensor is
+ *       signed 32-bit integer (S32). It is the user's responsibility to check
+ *       that the results do not overflow because the indices are computed
+ *       in unsigned 32-bit (U32).
  */
 class CLArgMinMaxLayer : public IFunction
 {
diff --git a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
index d2ddff2..ca6794b 100644
--- a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
@@ -42,9 +42,10 @@
  * -# @ref NEReductionOperationKernel
  * -# @ref NEFillBorderKernel
  *
- * @note The indices are computed in unsigned 32-bit (U32). It is the user's
- *       responsibility to check that the results do not overflow in case the
- *       output data type is set to signed 32-bit integer (S32).
+ * @note The default data type for an uninitialized output tensor is
+ *       signed 32-bit integer (S32). It is the user's responsibility to check
+ *       that the results do not overflow because the indices are computed
+ *       in unsigned 32-bit (U32).
  */
 class NEArgMinMaxLayer : public IFunction
 {
diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp
index a085ab1..cbf3923 100644
--- a/src/core/CL/kernels/CLReductionOperationKernel.cpp
+++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp
@@ -83,7 +83,7 @@
     // Output tensor auto initialization if not yet initialized
     const bool        is_arg_min_max   = (op == ReductionOperation::ARG_IDX_MIN || op == ReductionOperation::ARG_IDX_MAX);
     const TensorShape output_shape     = arm_compute::misc::shape_calculator::compute_reduced_shape(input->tensor_shape(), axis, !is_arg_min_max);
-    const DataType    output_data_type = is_arg_min_max ? DataType::U32 : input->data_type();
+    const DataType    output_data_type = is_arg_min_max ? DataType::S32 : input->data_type();
     auto_init_if_empty(*output, input->clone()->set_tensor_shape(output_shape).set_data_type(output_data_type).reset_padding().set_is_resizable(true));
 
     const unsigned int num_elems_processed_per_iteration = (is_data_type_quantized(input->data_type()) && (axis == 0)) ? 1 : 16;
diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
index 85abda5..a2ce0de 100644
--- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp
+++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
@@ -1204,7 +1204,7 @@
 
     // Output auto initialization if not yet initialized
     const bool is_arg_min_max   = (op == ReductionOperation::ARG_IDX_MIN || op == ReductionOperation::ARG_IDX_MAX);
-    DataType   output_data_type = is_arg_min_max ? DataType::U32 : input->data_type();
+    DataType   output_data_type = is_arg_min_max ? DataType::S32 : input->data_type();
     auto_init_if_empty(*output, input->clone()->set_tensor_shape(output_shape).set_data_type(output_data_type).reset_padding().set_is_resizable(true));
 
     unsigned int num_elems_processed_per_iteration = 16 / data_size_from_type(input->data_type());
diff --git a/src/runtime/CL/functions/CLReductionOperation.cpp b/src/runtime/CL/functions/CLReductionOperation.cpp
index 447c15b..3aa5a81 100644
--- a/src/runtime/CL/functions/CLReductionOperation.cpp
+++ b/src/runtime/CL/functions/CLReductionOperation.cpp
@@ -86,7 +86,7 @@
     const auto input_data_type    = input->data_type();
     const auto input_num_channles = input->num_channels();
     const auto input_qinfo        = input->quantization_info();
-    const auto output_data_type   = is_arg_min_max ? DataType::U32 : output->data_type();
+    const auto output_data_type   = is_arg_min_max ? DataType::S32 : output->data_type();
 
     auto initialize_tensorinfo = [](TensorInfo & ti, TensorShape shape, DataType data_type, int num_channels, QuantizationInfo qinfo)
     {
@@ -208,7 +208,7 @@
 
     if(is_arg_min_max)
     {
-        _results_vector.back().info()->set_data_type(DataType::U32).set_is_resizable(true).reset_padding();
+        _results_vector.back().info()->set_data_type(DataType::S32).set_is_resizable(true).reset_padding();
     }
 
     return _is_reshape_required ? &_results_vector.back() : output;
@@ -229,7 +229,7 @@
     if(_is_reshape_required)
     {
         const TensorShape output_shape     = arm_compute::misc::shape_calculator::compute_reduced_shape(input->info()->tensor_shape(), axis, false);
-        const auto        output_data_type = is_arg_min_max ? DataType::U32 : input->info()->data_type();
+        const auto        output_data_type = is_arg_min_max ? DataType::S32 : input->info()->data_type();
         auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape).set_data_type(output_data_type).reset_padding().set_is_resizable(true));
     }
 
diff --git a/src/runtime/NEON/functions/NEReductionOperation.cpp b/src/runtime/NEON/functions/NEReductionOperation.cpp
index 09cd765..397fe21 100644
--- a/src/runtime/NEON/functions/NEReductionOperation.cpp
+++ b/src/runtime/NEON/functions/NEReductionOperation.cpp
@@ -80,7 +80,7 @@
         const auto input_num_channles = input->num_channels();
         const auto input_qinfo        = input->quantization_info();
         const auto is_arg_min_max     = (op == ReductionOperation::ARG_IDX_MAX) || (op == ReductionOperation::ARG_IDX_MIN);
-        const auto output_data_type   = is_arg_min_max ? DataType::U32 : output->data_type();
+        const auto output_data_type   = is_arg_min_max ? DataType::S32 : output->data_type();
 
         info_before_reshape.set_data_type(output_data_type).set_tensor_shape(shape_before_reshape).set_num_channels(input_num_channles).set_quantization_info(input_qinfo);
 
@@ -110,7 +110,7 @@
     {
         const auto output_internal_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(input->info()->tensor_shape(), axis);
         const auto output_external_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(input->info()->tensor_shape(), axis, false);
-        const auto output_data_type      = is_arg_min_max ? DataType::U32 : input->info()->data_type();
+        const auto output_data_type      = is_arg_min_max ? DataType::S32 : input->info()->data_type();
         const auto num_channels          = input->info()->num_channels();
         const auto qinfo                 = input->info()->quantization_info();
 
diff --git a/tests/validation/CL/ArgMinMax.cpp b/tests/validation/CL/ArgMinMax.cpp
index 845fdbf..5b2e6f3 100644
--- a/tests/validation/CL/ArgMinMax.cpp
+++ b/tests/validation/CL/ArgMinMax.cpp
@@ -56,7 +56,7 @@
         }),
         framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(27U, 3U, 2U), 1, DataType::F32),
                                                  TensorInfo(TensorShape(27U, 3U, 2U), 1, DataType::F32),
-                                                 TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::U32),
+                                                 TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::S32),
                                                  TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32),
                                                  TensorInfo(TensorShape(32U, 16U, 1U, 2U), 1, DataType::U32)
         })),
diff --git a/tests/validation/NEON/ArgMinMax.cpp b/tests/validation/NEON/ArgMinMax.cpp
index 642a69b..174cb19 100644
--- a/tests/validation/NEON/ArgMinMax.cpp
+++ b/tests/validation/NEON/ArgMinMax.cpp
@@ -56,7 +56,7 @@
         }),
         framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(27U, 3U, 1U, 2U), 1, DataType::F32),
                                                  TensorInfo(TensorShape(27U, 3U, 1U, 2U), 1, DataType::F32),
-                                                 TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::U32),
+                                                 TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::S32),
                                                  TensorInfo(TensorShape(32U, 16U, 1U, 2U), 1, DataType::F32)
         })),
         framework::dataset::make("Axis", { 4, 0, 2, 0 })),
diff --git a/tests/validation/fixtures/ArgMinMaxFixture.h b/tests/validation/fixtures/ArgMinMaxFixture.h
index f8fe4ff..a4d03fb 100644
--- a/tests/validation/fixtures/ArgMinMaxFixture.h
+++ b/tests/validation/fixtures/ArgMinMaxFixture.h
@@ -114,7 +114,7 @@
         return dst;
     }
 
-    SimpleTensor<uint32_t> compute_reference(TensorShape &src_shape, DataType data_type, int axis, ReductionOperation op, QuantizationInfo q_info)
+    SimpleTensor<int32_t> compute_reference(TensorShape &src_shape, DataType data_type, int axis, ReductionOperation op, QuantizationInfo q_info)
     {
         // Create reference
         SimpleTensor<T> src{ src_shape, data_type, 1, q_info };
@@ -123,11 +123,11 @@
         fill(src);
 
         TensorShape output_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(src_shape, axis, false);
-        return reference::reduction_operation<T, uint32_t>(src, output_shape, axis, op);
+        return reference::reduction_operation<T, int32_t>(src, output_shape, axis, op);
     }
 
-    TensorType             _target{};
-    SimpleTensor<uint32_t> _reference{};
+    TensorType            _target{};
+    SimpleTensor<int32_t> _reference{};
 };
 
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
diff --git a/tests/validation/reference/ReductionOperation.cpp b/tests/validation/reference/ReductionOperation.cpp
index 965365d..330a3b8 100644
--- a/tests/validation/reference/ReductionOperation.cpp
+++ b/tests/validation/reference/ReductionOperation.cpp
@@ -179,7 +179,7 @@
 {
     // Create reference
     const bool         is_arg_min_max   = (op == ReductionOperation::ARG_IDX_MIN || op == ReductionOperation::ARG_IDX_MAX);
-    DataType           output_data_type = is_arg_min_max ? DataType::U32 : src.data_type();
+    DataType           output_data_type = is_arg_min_max ? DataType::S32 : src.data_type();
     SimpleTensor<OT>   dst{ dst_shape, output_data_type, 1, src.quantization_info() };
     const unsigned int src_width    = src.shape().x();
     const unsigned int src_height   = src.shape().y();
@@ -292,10 +292,10 @@
 template SimpleTensor<float> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
 template SimpleTensor<half> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
 
-template SimpleTensor<uint32_t> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
-template SimpleTensor<uint32_t> reduction_operation(const SimpleTensor<int32_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
-template SimpleTensor<uint32_t> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
-template SimpleTensor<uint32_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
+template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
+template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<int32_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
+template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
+template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
 
 } // namespace reference
 } // namespace validation