MLCE-347 'REDUCE_MIN, REDUCE_MAX, REDUCE_SUM Support'

* Added TfLiteParser support for REDUCE_MIN and REDUCE_MAX operators
* Added ACL workloads support for REDUCE_MIN, REDUCE_MAX, and REDUCE_SUM operators
* Added TfLite Delegate support for REDUCE_MIN, REDUCE_MAX, and REDUCE_SUM operators

Signed-off-by: Sadik Armagan <sadik.armagan@arm.com>
Change-Id: I8085d59946bfd4ab78a59a61f899031ae53371a8
diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp
index 2d22576..66999c1 100644
--- a/src/backends/neon/NeonLayerSupport.cpp
+++ b/src/backends/neon/NeonLayerSupport.cpp
@@ -58,6 +58,7 @@
 #include "workloads/NeonQLstmWorkload.hpp"
 #include "workloads/NeonQuantizeWorkload.hpp"
 #include "workloads/NeonQuantizedLstmWorkload.hpp"
+#include "workloads/NeonReduceWorkload.hpp"
 #include "workloads/NeonReshapeWorkload.hpp"
 #include "workloads/NeonResizeWorkload.hpp"
 #include "workloads/NeonRsqrtWorkload.hpp"
@@ -784,6 +785,18 @@
                                    paramsInfo);
 }
 
+bool NeonLayerSupport::IsReduceSupported(const TensorInfo& input,
+                                         const TensorInfo& output,
+                                         const ReduceDescriptor& descriptor,
+                                         Optional<std::string&> reasonIfUnsupported) const
+{
+    FORWARD_WORKLOAD_VALIDATE_FUNC(NeonReduceWorkloadValidate,
+                                   reasonIfUnsupported,
+                                   input,
+                                   output,
+                                   descriptor);
+}
+
 bool NeonLayerSupport::IsReshapeSupported(const TensorInfo& input,
                                           const TensorInfo& output,
                                           const ReshapeDescriptor& descriptor,
diff --git a/src/backends/neon/NeonLayerSupport.hpp b/src/backends/neon/NeonLayerSupport.hpp
index dc13cc2..2ae1b0d 100644
--- a/src/backends/neon/NeonLayerSupport.hpp
+++ b/src/backends/neon/NeonLayerSupport.hpp
@@ -263,6 +263,11 @@
                                   const QuantizedLstmInputParamsInfo& paramsInfo,
                                   Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
 
+    bool IsReduceSupported(const TensorInfo& input,
+                           const TensorInfo& output,
+                           const ReduceDescriptor& descriptor,
+                           Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
     bool IsReshapeSupported(const TensorInfo& input,
                             const TensorInfo& output,
                             const ReshapeDescriptor& descriptor,
diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp
index 0d36110..7d09428 100644
--- a/src/backends/neon/NeonWorkloadFactory.cpp
+++ b/src/backends/neon/NeonWorkloadFactory.cpp
@@ -497,6 +497,12 @@
     return std::make_unique<NeonRankWorkload>(descriptor, info);
 }
 
+std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReduce(const ReduceQueueDescriptor& descriptor,
+                                                             const WorkloadInfo& info) const
+{
+    return std::make_unique<NeonReduceWorkload>(descriptor, info);
+}
+
 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
                                                               const WorkloadInfo& info) const
 {
diff --git a/src/backends/neon/NeonWorkloadFactory.hpp b/src/backends/neon/NeonWorkloadFactory.hpp
index 745dece..4817a06 100644
--- a/src/backends/neon/NeonWorkloadFactory.hpp
+++ b/src/backends/neon/NeonWorkloadFactory.hpp
@@ -207,6 +207,9 @@
     std::unique_ptr<IWorkload> CreateRank(const RankQueueDescriptor& descriptor,
                                           const WorkloadInfo& info) const override;
 
+    std::unique_ptr<IWorkload> CreateReduce(const ReduceQueueDescriptor& descriptor,
+                                            const WorkloadInfo& info) const override;
+
     std::unique_ptr<IWorkload> CreateReshape(const ReshapeQueueDescriptor& descriptor,
                                              const WorkloadInfo& info) const override;
 
diff --git a/src/backends/neon/backend.mk b/src/backends/neon/backend.mk
index 54560cb..6feeeb5 100644
--- a/src/backends/neon/backend.mk
+++ b/src/backends/neon/backend.mk
@@ -66,6 +66,7 @@
         workloads/NeonQLstmWorkload.cpp \
         workloads/NeonQuantizedLstmWorkload.cpp \
         workloads/NeonQuantizeWorkload.cpp \
+        workloads/NeonReduceWorkload.cpp \
         workloads/NeonReshapeWorkload.cpp \
         workloads/NeonResizeWorkload.cpp \
         workloads/NeonRsqrtWorkload.cpp \
diff --git a/src/backends/neon/test/NeonLayerTests.cpp b/src/backends/neon/test/NeonLayerTests.cpp
index d351870..8434a67 100644
--- a/src/backends/neon/test/NeonLayerTests.cpp
+++ b/src/backends/neon/test/NeonLayerTests.cpp
@@ -1372,6 +1372,21 @@
 ARMNN_AUTO_TEST_CASE_WITH_THF(LogicalOrBroadcast2, LogicalOrBroadcast2Test)
 ARMNN_AUTO_TEST_CASE_WITH_THF(LogicalOrBroadcast3, LogicalOrBroadcast3Test)
 
+// ReduceSum
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumFloat32, ReduceSumSimpleTest<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_1, ReduceSumSingleAxisTest1<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_2, ReduceSumSingleAxisTest2<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_3, ReduceSumSingleAxisTest3<DataType::Float32>)
+
+// ReduceMax
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMaxFloat32, ReduceMaxSimpleTest<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMaxNegativeAxisFloat32, ReduceMaxNegativeAxisTest<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMax2Float32, ReduceMaxSimpleTest2<DataType::Float32>)
+
+// ReduceMin
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMinFloat32, ReduceMinSimpleTest<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMinNegativeAxisFloat32, ReduceMinNegativeAxisTest<DataType::Float32>)
+
 #if defined(ARMNNREF_ENABLED)
 
 // The ARMNN_COMPARE_REF_AUTO_TEST_CASE and the ARMNN_COMPARE_REF_FIXTURE_TEST_CASE test units are not available
diff --git a/src/backends/neon/workloads/CMakeLists.txt b/src/backends/neon/workloads/CMakeLists.txt
index f1a723b..7c2b185 100644
--- a/src/backends/neon/workloads/CMakeLists.txt
+++ b/src/backends/neon/workloads/CMakeLists.txt
@@ -93,6 +93,8 @@
     NeonQuantizeWorkload.cpp
     NeonQuantizeWorkload.hpp
     NeonRankWorkload.hpp
+    NeonReduceWorkload.cpp
+    NeonReduceWorkload.hpp
     NeonReshapeWorkload.cpp
     NeonReshapeWorkload.hpp
     NeonResizeWorkload.cpp
diff --git a/src/backends/neon/workloads/NeonReduceWorkload.cpp b/src/backends/neon/workloads/NeonReduceWorkload.cpp
new file mode 100644
index 0000000..0e1b46a
--- /dev/null
+++ b/src/backends/neon/workloads/NeonReduceWorkload.cpp
@@ -0,0 +1,66 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "NeonReduceWorkload.hpp"
+
+#include <aclCommon/ArmComputeUtils.hpp>
+#include <aclCommon/ArmComputeTensorUtils.hpp>
+
+#include <neon/NeonTensorHandle.hpp>
+
+#include "NeonWorkloadUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+arm_compute::Status NeonReduceWorkloadValidate(const TensorInfo& input,
+                                               const TensorInfo& output,
+                                               const ReduceDescriptor& desc)
+{
+    const arm_compute::TensorInfo aclInputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
+    const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+    if (!desc.m_vAxis.empty() && desc.m_vAxis.size() > 1)
+    {
+        return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
+                                   "NeonReduceWorkload: Reduction is supported only on 1 axis.");
+    }
+
+    arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
+                                                                          input.GetNumDimensions(),
+                                                                          desc.m_vAxis);
+
+    return arm_compute::NEReductionOperation::validate(&aclInputInfo,
+                                                       &aclOutputInfo,
+                                                       static_cast<unsigned int>(coords[0]),
+                                                       ConvertReductionOperationToAcl(desc),
+                                                       desc.m_KeepDims);
+}
+
+NeonReduceWorkload::NeonReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info)
+    : BaseWorkload<ReduceQueueDescriptor>(descriptor, info)
+{
+    m_Data.ValidateInputsOutputs("NeonReduceWorkload", 1, 1);
+
+    arm_compute::ITensor& input  = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = static_cast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+    arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(input.info()->num_dimensions(),
+                                                                          info.m_InputTensorInfos[0].GetNumDimensions(),
+                                                                          m_Data.m_Parameters.m_vAxis);
+    m_Layer.configure(&input,
+                      &output,
+                      static_cast<unsigned int>(coords[0]),
+                      ConvertReductionOperationToAcl(m_Data.m_Parameters),
+                      m_Data.m_Parameters.m_KeepDims);
+}
+
+void NeonReduceWorkload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonReduceWorkload_Execute");
+    m_Layer.run();
+}
+
+} //namespace armnn
diff --git a/src/backends/neon/workloads/NeonReduceWorkload.hpp b/src/backends/neon/workloads/NeonReduceWorkload.hpp
new file mode 100644
index 0000000..0472091
--- /dev/null
+++ b/src/backends/neon/workloads/NeonReduceWorkload.hpp
@@ -0,0 +1,30 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backendsCommon/Workload.hpp>
+
+#include <arm_compute/runtime/NEON/functions/NEReductionOperation.h>
+
+namespace armnn
+{
+
+arm_compute::Status NeonReduceWorkloadValidate(const TensorInfo& input,
+                                               const TensorInfo& output,
+                                               const ReduceDescriptor& desc);
+
+class NeonReduceWorkload : public BaseWorkload<ReduceQueueDescriptor>
+{
+public:
+    NeonReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+    void Execute() const override;
+
+private:
+    mutable arm_compute::NEReductionOperation m_Layer;
+};
+
+} //namespace armnn
diff --git a/src/backends/neon/workloads/NeonWorkloads.hpp b/src/backends/neon/workloads/NeonWorkloads.hpp
index 949100d..4eb526a 100644
--- a/src/backends/neon/workloads/NeonWorkloads.hpp
+++ b/src/backends/neon/workloads/NeonWorkloads.hpp
@@ -49,6 +49,7 @@
 #include "NeonQuantizedLstmWorkload.hpp"
 #include "NeonQuantizeWorkload.hpp"
 #include "NeonRankWorkload.hpp"
+#include "NeonReduceWorkload.hpp"
 #include "NeonReshapeWorkload.hpp"
 #include "NeonResizeWorkload.hpp"
 #include "NeonRsqrtWorkload.hpp"