COMPMID-2707: add keep_dims parameter to Reduction Operation

The added parameter is used to decide whether or not to keep
the target dimension of reduction operation. ArgMinMax operations
will always remove the reduced dimension. Following things
are updated to support the parameter.

- [CL/NEON] functions and reference kernel
- [CL/NEON] ArgMinMax function to use ReductionOperation function
- [CL/NEON] validation test suite for Reduction and ArgMinMax operations
  to validate the added parameter
- ReductionOperationFixture is modified NOT to pre-populate output
  tensor and now relies on underlying kernel/function.
- Adjust CL validation test suite for Reduction operation to remove
  excessive test cases with axis values beyond input tensor's
  dimension.

Change-Id: I3e24d276ed469a4201f323001708f0c525f11c4f
Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2167
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
diff --git a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
index 85bf7d9..b0e2d78 100644
--- a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
@@ -24,8 +24,6 @@
 #ifndef __ARM_COMPUTE_NEARGMINMAXLAYER_H__
 #define __ARM_COMPUTE_NEARGMINMAXLAYER_H__
 
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEReductionOperationKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/MemoryGroup.h"
 #include "arm_compute/runtime/NEON/INESimpleFunction.h"
@@ -33,6 +31,7 @@
 namespace arm_compute
 {
 class ITensor;
+class NEReductionOperation;
 
 /** Function to calculate the index of the minimum or maximum values in a
  *  tensor based on an axis.
@@ -74,10 +73,7 @@
     void run() override;
 
 private:
-    MemoryGroup                _memory_group;
-    NEReductionOperationKernel _reduction_kernel;
-    NEFillBorderKernel         _fill_border_kernel;
-    bool                       _run_fill_border;
+    std::unique_ptr<NEReductionOperation> _reduction_function;
 };
 } // namespace arm_compute
 #endif /* __ARM_COMPUTE_NEARGMINMAXLAYER_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEReductionOperation.h b/arm_compute/runtime/NEON/functions/NEReductionOperation.h
index 5bc7059..1e72c4f 100644
--- a/arm_compute/runtime/NEON/functions/NEReductionOperation.h
+++ b/arm_compute/runtime/NEON/functions/NEReductionOperation.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -28,7 +28,9 @@
 
 #include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
 #include "arm_compute/core/NEON/kernels/NEReductionOperationKernel.h"
+#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/Tensor.h"
 
 namespace arm_compute
 {
@@ -44,35 +46,41 @@
 {
 public:
     /** Default constructor */
-    NEReductionOperation();
+    NEReductionOperation(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
     /** Set the input and output tensors.
      *
-     * @param[in]  input  Source tensor. Data type supported: QASYMM8/F16/F32. Data layouts supported: NCHW. (Written to only for border_size != 0)
-     * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
-     * @param[in]  axis   Dimension along which to reduce. Supported reduction axis : 0
-     * @param[in]  op     Reduction operation to perform.
+     * @param[in]  input     Source tensor. Data type supported: QASYMM8/F16/F32. Data layouts supported: NCHW. (Written to only for border_size != 0)
+     * @param[out] output    Destination tensor. Data types and data layouts supported: same as @p input.
+     * @param[in]  axis      Dimension along which to reduce. Supported reduction axis : 0
+     * @param[in]  op        Reduction operation to perform.
+     * @param[in]  keep_dims (Optional) Whether to keep the reduced dimension after the operation. Defaults to true.
      */
-    void configure(ITensor *input, ITensor *output, unsigned int axis, ReductionOperation op);
+    void configure(ITensor *input, ITensor *output, unsigned int axis, ReductionOperation op, bool keep_dims = true);
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEReductionOperation.
      *
-     * @param[in] input  Source tensor info. Data type supported: QASYMM8/F16/F32. Data layouts supported: NCHW. (Written to only for border_size != 0)
-     * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input.
-     * @param[in] axis   Dimension along which to reduce. Supported reduction axis : 0
-     * @param[in] op     Reduction operation to perform.
+     * @param[in] input     Source tensor info. Data type supported: QASYMM8/F16/F32. Data layouts supported: NCHW. (Written to only for border_size != 0)
+     * @param[in] output    Destination tensor info. Data types and data layouts supported: same as @p input.
+     * @param[in] axis      Dimension along which to reduce. Supported reduction axis : 0
+     * @param[in] op        Reduction operation to perform.
+     * @param[in] keep_dims (Optional) Whether to keep the reduced dimension after the operation. Defaults to true.
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, bool keep_dims = true);
 
     // Inherited methods overridden:
     void run() override;
 
 private:
+    MemoryGroup                _memory_group;
     NEReductionOperationKernel _reduction_kernel;
     NEFillBorderKernel         _fill_border_kernel;
+    NEReshapeLayerKernel       _reshape_kernel;
+    Tensor                     _output_internal;
     size_t                     _window_split;
     int                        _reduction_axis;
+    bool                       _is_reshape_required;
 };
 } // namespace arm_compute
 #endif /* __ARM_COMPUTE_NEREDUCTIONOPERATION_H__ */