COMPMID-1580 Implement ReduceMean in NEON

Change-Id: Id974efad304c2513b8824a6561ad45ee60b9e7fb
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/153763
Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com>
Reviewed-by: Isabella Gottardi <isabella.gottardi@arm.com>
Tested-by: bsgcomp <bsgcomp@arm.com>
diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h
index 2bf8bcd..57bd585 100644
--- a/arm_compute/runtime/NEON/NEFunctions.h
+++ b/arm_compute/runtime/NEON/NEFunctions.h
@@ -101,6 +101,7 @@
 #include "arm_compute/runtime/NEON/functions/NEQuantizationLayer.h"
 #include "arm_compute/runtime/NEON/functions/NERNNLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEReduceMean.h"
 #include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
 #include "arm_compute/runtime/NEON/functions/NERemap.h"
 #include "arm_compute/runtime/NEON/functions/NEReorgLayer.h"
diff --git a/arm_compute/runtime/NEON/functions/NEReduceMean.h b/arm_compute/runtime/NEON/functions/NEReduceMean.h
new file mode 100644
index 0000000..b20ca9c
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEReduceMean.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEON_REDUCE_MEAN_H__
+#define __ARM_COMPUTE_NEON_REDUCE_MEAN_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
+#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to perform reduce operation */
+class NEReduceMean : public IFunction
+{
+public:
+    /** Constructor */
+    NEReduceMean(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Configure kernel
+     *
+     * @note Supported tensor rank: up to 4
+     *
+     * @param[in]  input          Source tensor. Data type supported: QASYMM8/F16/F32
+     * @param[in]  reduction_axis Reduction axis vector.
+     * @param[in]  keep_dims      If positive, retains reduced dimensions with length 1.
+     * @param[out] output         Destination tensor. Data type supported: Same as @p input
+     */
+    void configure(ITensor *input, const Coordinates &reduction_axis, bool keep_dims, ITensor *output);
+
+    /** Static function to check if given info will lead to a valid configuration of @ref NEReduceMean
+     *
+     * @param[in] input          Source tensor. Data type supported: QASYMM8/F16/F32
+     * @param[in] reduction_axis Reduction axis vector.
+     * @param[in] keep_dims      If positive, retains reduced dimensions with length 1.
+     * @param[in] output         Destination tensor. Data type supported: Same as @p input
+     *
+     * @return A status
+     */
+    static Status validate(const ITensorInfo *input, const Coordinates &reduction_axis, bool keep_dims, const ITensorInfo *output);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    MemoryGroup                             _memory_group;
+    std::unique_ptr<NEReductionOperation[]> _reduction_kernels{ nullptr };
+    std::unique_ptr<Tensor[]>               _reduced_outs{ nullptr };
+    NEReshapeLayer                          _reshape;
+    unsigned int                            _reduction_ops;
+    bool                                    _keep_dims;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEON_REDUCE_MEAN_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEReductionOperation.h b/arm_compute/runtime/NEON/functions/NEReductionOperation.h
index 02b29fb..5bc7059 100644
--- a/arm_compute/runtime/NEON/functions/NEReductionOperation.h
+++ b/arm_compute/runtime/NEON/functions/NEReductionOperation.h
@@ -47,16 +47,16 @@
     NEReductionOperation();
     /** Set the input and output tensors.
      *
-     * @param[in, out] input  Source tensor. Data type supported: F32. Data layouts supported: NCHW. (Written to only for border_size != 0)
-     * @param[out]     output Destination tensor. Data types and data layouts supported: same as @p input.
-     * @param[in]      axis   Dimension along which to reduce. Supported reduction axis : 0
-     * @param[in]      op     Reduction operation to perform.
+     * @param[in]  input  Source tensor. Data type supported: QASYMM8/F16/F32. Data layouts supported: NCHW. (Written to only for border_size != 0)
+     * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
+     * @param[in]  axis   Dimension along which to reduce. Supported reduction axis : 0
+     * @param[in]  op     Reduction operation to perform.
      */
     void configure(ITensor *input, ITensor *output, unsigned int axis, ReductionOperation op);
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEReductionOperation.
      *
-     * @param[in] input  Source tensor info. Data type supported: F32. Data layouts supported: NCHW. (Written to only for border_size != 0)
+     * @param[in] input  Source tensor info. Data type supported: QASYMM8/F16/F32. Data layouts supported: NCHW. (Written to only for border_size != 0)
      * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input.
      * @param[in] axis   Dimension along which to reduce. Supported reduction axis : 0
      * @param[in] op     Reduction operation to perform.
@@ -72,6 +72,7 @@
     NEReductionOperationKernel _reduction_kernel;
     NEFillBorderKernel         _fill_border_kernel;
     size_t                     _window_split;
+    int                        _reduction_axis;
 };
-}
+} // namespace arm_compute
 #endif /* __ARM_COMPUTE_NEREDUCTIONOPERATION_H__ */