backends/reference: Add ReduceSum operation support

This patch addes ReduceSum operation support for reference backend,
which computes the sum of elements across dimensions of a tensor.

Changelog v1:
- Fix file header descriptions.

Chagelog v2:
- Fix line limit issue.
- Fix type conversion issue.

Changelog v3:
- Remove tabs.
- Modify newly added file headers.

Changelog v4:
- Symbol on header isn't allowed so drop it from newly added file headers.

Changelog v5:
- Remove tabs, fix the use of brackets and align lines correctly.

Changelog v6:
- Add serializer and deserializer support.

Changelog v7:
- Fix build error add missed code.

Changelog v8:
- Rename ReduceSumDecriptor to ReduceDescriptor
    - Update m_KeepDims field data type to bool on ReduceDescriptor
    - Add ReduceOperation field to ReduceDescriptor

- Rename ReduceSumLayer to ReduceLayer
    - Update ReduceLayer to use ReduceDescriptor
    - Update ReduceLayer::ValidateTensorShapesFromInputs() function

- Rename RefReduceSumWokload to RefReduceWorkload
    - Update workload to use ReduceDescriptor
    - Update workload to use Decoders and Encoders

- Remove ReduceSum.hpp and ReduceSum.cpp
- Added Reduce.hpp and Reduce.cpp
     - Move Mean.cpp (which is implementing REDUCE_MEAN) functionality to Reduce.cpp
     - Update RefMeanWorkload to call Reduce function with ReduceOperation::Mean argument

- Remove Mean.hpp and Mean.cpp
- Update the Serializer/Deserializer ArmnnSchema.fbs for ReduceLayer, ReduceDescriptor, and ReduceOperation
- Update Serializer and Deserializer for serializing/parsing ReduceLayer
- Added TfLiter parser Sum test for REDUCE_SUM operator
- Make corresponding changes on front-end and Ref backend to support REDUCE_SUM operator

Changelog v9:
- Fixed build errors.

Change-Id: I8c8e034f3df73f9565b3c18eff51ecca6c542195
Signed-off-by: Inki Dae <inki.dae@samsung.com>
Signed-off-by: Sadik Armagan <sadik.armagan@arm.com>
diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp
index bdaaafb..992ae71 100644
--- a/src/backends/reference/RefLayerSupport.cpp
+++ b/src/backends/reference/RefLayerSupport.cpp
@@ -1706,6 +1706,36 @@
            "Reference rank: input type not supported.");
 }
 
+bool RefLayerSupport::IsReduceSupported(const TensorInfo& input,
+                                        const TensorInfo& output,
+                                        const ReduceDescriptor& descriptor,
+                                        Optional<std::string&> reasonIfUnsupported) const
+{
+    IgnoreUnused(descriptor);
+    bool supported = true;
+    std::array<DataType,7> supportedTypes =
+    {
+        DataType::BFloat16,
+        DataType::Float32,
+        DataType::Float16,
+        DataType::QAsymmS8,
+        DataType::QAsymmU8,
+        DataType::QSymmS16,
+        DataType::Signed32
+    };
+
+    supported &= CheckSupportRule(TypeAnyOf(input, supportedTypes), reasonIfUnsupported,
+                                  "Reference Reduce: input type not supported");
+
+    supported &= CheckSupportRule(TypeAnyOf(output, supportedTypes), reasonIfUnsupported,
+                                  "Reference Reduce: output type not supported");
+
+    supported &= CheckSupportRule(TypesAreEqual(input, output), reasonIfUnsupported,
+                                  "Reference Reduce: input and output types not matching");
+
+    return supported;
+}
+
 bool RefLayerSupport::IsReshapeSupported(const TensorInfo& input,
                                          const TensorInfo& output,
                                          const ReshapeDescriptor& descriptor,
diff --git a/src/backends/reference/RefLayerSupport.hpp b/src/backends/reference/RefLayerSupport.hpp
index 6b64408..b75b778 100644
--- a/src/backends/reference/RefLayerSupport.hpp
+++ b/src/backends/reference/RefLayerSupport.hpp
@@ -275,6 +275,11 @@
                          const TensorInfo& output,
                          Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
 
+    bool IsReduceSupported(const TensorInfo& input,
+                           const TensorInfo& output,
+                           const ReduceDescriptor& descriptor,
+                           Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
     bool IsReshapeSupported(const TensorInfo& input,
                             const TensorInfo& output,
                             const ReshapeDescriptor& descriptor,
diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp
index 468aeb3..fde6c86 100644
--- a/src/backends/reference/RefWorkloadFactory.cpp
+++ b/src/backends/reference/RefWorkloadFactory.cpp
@@ -580,6 +580,12 @@
     return std::make_unique<RefRankWorkload>(descriptor, info);
 }
 
+std::unique_ptr<IWorkload> RefWorkloadFactory::CreateReduce(const ReduceQueueDescriptor& descriptor,
+                                                            const WorkloadInfo& info) const
+{
+    return std::make_unique<RefReduceWorkload>(descriptor, info);
+}
+
 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
                                                              const WorkloadInfo& info) const
 {
diff --git a/src/backends/reference/RefWorkloadFactory.hpp b/src/backends/reference/RefWorkloadFactory.hpp
index 41cefd3..c22d87f 100644
--- a/src/backends/reference/RefWorkloadFactory.hpp
+++ b/src/backends/reference/RefWorkloadFactory.hpp
@@ -223,6 +223,9 @@
     std::unique_ptr<IWorkload> CreateRank(const RankQueueDescriptor& descriptor,
                                           const WorkloadInfo& info) const override;
 
+    std::unique_ptr<IWorkload> CreateReduce(const ReduceQueueDescriptor& descriptor,
+                                            const WorkloadInfo& info) const override;
+
     std::unique_ptr<IWorkload> CreateReshape(const ReshapeQueueDescriptor& descriptor,
                                              const WorkloadInfo& info) const override;
 
diff --git a/src/backends/reference/backend.mk b/src/backends/reference/backend.mk
index b4aa3a0..9676509 100644
--- a/src/backends/reference/backend.mk
+++ b/src/backends/reference/backend.mk
@@ -38,11 +38,11 @@
         workloads/InstanceNorm.cpp \
         workloads/LogSoftmax.cpp \
         workloads/LstmUtils.cpp \
-        workloads/Mean.cpp \
         workloads/Concatenate.cpp \
         workloads/Pad.cpp \
         workloads/Pooling2d.cpp \
         workloads/PreluImpl.cpp \
+        workloads/Reduce.cpp \
         workloads/RefActivationWorkload.cpp \
         workloads/RefArgMinMaxWorkload.cpp \
         workloads/RefBatchNormalizationWorkload.cpp \
@@ -81,6 +81,7 @@
         workloads/RefPreluWorkload.cpp \
         workloads/RefQLstmWorkload.cpp \
         workloads/RefQuantizeWorkload.cpp \
+        workloads/RefReduceWorkload.cpp \
         workloads/RefReshapeWorkload.cpp \
         workloads/RefResizeBilinearWorkload.cpp \
         workloads/RefResizeWorkload.cpp \
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index 502e0cb..d5e0f82 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -2234,4 +2234,11 @@
 ARMNN_AUTO_TEST_CASE_WITH_THF(LogicalAndBroadcast3, LogicalAndBroadcast3Test)
 ARMNN_AUTO_TEST_CASE_WITH_THF(LogicalOrBroadcast3, LogicalOrBroadcast3Test)
 
+// ReduceSum
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumFloat32, ReduceSumSimpleTest<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_1, ReduceSumSingleAxisTest1<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_2, ReduceSumSingleAxisTest2<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_3, ReduceSumSingleAxisTest3<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumMultipleAxisFloat32, ReduceSumMultipleAxisTest<DataType::Float32>)
+
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
index 1b20e5b..1f4298b 100644
--- a/src/backends/reference/workloads/CMakeLists.txt
+++ b/src/backends/reference/workloads/CMakeLists.txt
@@ -44,8 +44,6 @@
     LstmUtils.hpp
     LstmUtils.cpp
     Maximum.hpp
-    Mean.cpp
-    Mean.hpp
     Concatenate.hpp
     Concatenate.cpp
     Minimum.hpp
@@ -55,6 +53,8 @@
     Pooling2d.hpp
     PreluImpl.cpp
     PreluImpl.hpp
+    Reduce.cpp
+    Reduce.hpp
     RefActivationWorkload.cpp
     RefActivationWorkload.hpp
     RefArgMinMaxWorkload.cpp
@@ -132,6 +132,8 @@
     RefQLstmWorkload.cpp
     RefQLstmWorkload.hpp
     RefRankWorkload.hpp
+    RefReduceWorkload.cpp
+    RefReduceWorkload.hpp
     RefReshapeWorkload.cpp
     RefReshapeWorkload.hpp
     RefResizeBilinearWorkload.cpp
diff --git a/src/backends/reference/workloads/Mean.hpp b/src/backends/reference/workloads/Mean.hpp
deleted file mode 100644
index dfb0302..0000000
--- a/src/backends/reference/workloads/Mean.hpp
+++ /dev/null
@@ -1,22 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#pragma once
-
-#include "armnn/DescriptorsFwd.hpp"
-#include "armnn/Tensor.hpp"
-#include "BaseIterator.hpp"
-
-#include <vector>
-
-namespace armnn
-{
-void Mean(const TensorInfo& inputInfo,
-          const TensorInfo& outputInfo,
-          const std::vector<unsigned int>& axis,
-          Decoder<float>& input,
-          Encoder<float>& output);
-} //namespace armnn
-
diff --git a/src/backends/reference/workloads/Mean.cpp b/src/backends/reference/workloads/Reduce.cpp
similarity index 78%
rename from src/backends/reference/workloads/Mean.cpp
rename to src/backends/reference/workloads/Reduce.cpp
index fe34efe..5375c71 100644
--- a/src/backends/reference/workloads/Mean.cpp
+++ b/src/backends/reference/workloads/Reduce.cpp
@@ -1,13 +1,14 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
-#include "Mean.hpp"
-#include <backendsCommon/WorkloadData.hpp>
+#include "Reduce.hpp"
 
 #include <armnn/utility/NumericCast.hpp>
 
+#include <backendsCommon/WorkloadData.hpp>
+
 #include <cmath>
 #include <cstddef>
 #include <functional>
@@ -15,6 +16,7 @@
 
 namespace armnn
 {
+
 bool NextIndex(const unsigned int numDims, const armnn::TensorShape& dims, std::vector<unsigned int>& current)
 {
     unsigned int carry = 1;
@@ -64,18 +66,16 @@
     }
     return offset;
 }
-} // namespace
 
-namespace armnn
-{
-void Mean(const armnn::TensorInfo& inputInfo,
-          const armnn::TensorInfo& outputInfo,
-          const std::vector<unsigned int>& axis,
-          Decoder<float>& input,
-          Encoder<float>& output)
-{
 
-    unsigned int inputNumDims = inputInfo.GetNumDimensions();
+void Reduce(const TensorInfo& inputInfo,
+            const TensorInfo& outputInfo,
+            Decoder<float>& input,
+            Encoder<float>& output,
+            const std::vector<uint32_t> axis,
+            const ReduceOperation reduceOperation)
+{
+    unsigned int inputNumDims  = inputInfo.GetNumDimensions();
     unsigned int outputNumDims = outputInfo.GetNumDimensions();
 
     armnn::TensorShape outputDims = outputInfo.GetShape();
@@ -106,10 +106,10 @@
     std::vector<unsigned int> resolvedAxis = axis;
     if (resolvedAxis.empty())
     {
-      for (unsigned int idx = 0; idx < inputNumDims; ++idx)
-      {
-          resolvedAxis.push_back(idx);
-      }
+        for (unsigned int idx = 0; idx < inputNumDims; ++idx)
+        {
+            resolvedAxis.push_back(idx);
+        }
     }
     auto numResolvedAxis = armnn::numeric_cast<unsigned int>(resolvedAxis.size());
 
@@ -129,15 +129,23 @@
     {
         unsigned int current = inputDims[resolvedAxis[idx]];
         ARMNN_ASSERT(armnn::numeric_cast<float>(current) <
-              (std::numeric_limits<float>::max() / armnn::numeric_cast<float>(numElementsInAxis)));
+                     (std::numeric_limits<float>::max() / armnn::numeric_cast<float>(numElementsInAxis)));
         numElementsInAxis *= current;
     }
     if (numElementsInAxis > 0) {
         for (unsigned int idx = 0; idx < numOutputs; ++idx)
         {
             output[idx];
-            output.Set(tempSum[idx] / armnn::numeric_cast<float>(numElementsInAxis));
+            if (reduceOperation == ReduceOperation::Sum)
+            {
+                output.Set(tempSum[idx]);
+            }
+            else if (reduceOperation == ReduceOperation::Mean)
+            {
+                output.Set(tempSum[idx] / armnn::numeric_cast<float>(numElementsInAxis));
+            }
         }
     }
 }
-} //namespace armnn
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/backends/reference/workloads/Reduce.hpp b/src/backends/reference/workloads/Reduce.hpp
new file mode 100644
index 0000000..ad777ad
--- /dev/null
+++ b/src/backends/reference/workloads/Reduce.hpp
@@ -0,0 +1,24 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma  once
+
+#include "BaseIterator.hpp"
+#include "Decoders.hpp"
+#include "Encoders.hpp"
+
+#include <armnn/Tensor.hpp>
+
+namespace armnn
+{
+
+void Reduce(const TensorInfo& inputInfo,
+            const TensorInfo& outputInfo,
+            Decoder<float>& input,
+            Encoder<float>& output,
+            const std::vector<uint32_t> axis,
+            const ReduceOperation reduceOperation);
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefMeanWorkload.cpp b/src/backends/reference/workloads/RefMeanWorkload.cpp
index 375ab39..00e59bc 100644
--- a/src/backends/reference/workloads/RefMeanWorkload.cpp
+++ b/src/backends/reference/workloads/RefMeanWorkload.cpp
@@ -5,7 +5,7 @@
 
 #include "RefMeanWorkload.hpp"
 
-#include "Mean.hpp"
+#include "Reduce.hpp"
 #include "RefWorkloadUtils.hpp"
 
 #include "Profiling.hpp"
@@ -28,7 +28,12 @@
     auto inputDecoder  = MakeDecoder<float>(inputInfo,  m_Data.m_Inputs[0]->Map());
     auto outputEncoder = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map());
 
-    Mean(inputInfo, outputInfo, m_Data.m_Parameters.m_Axis, *inputDecoder, *outputEncoder);
+    Reduce(inputInfo,
+           outputInfo,
+           *inputDecoder,
+           *outputEncoder,
+           m_Data.m_Parameters.m_Axis,
+           armnn::ReduceOperation::Mean);
 }
 
 } //namespace armnn
diff --git a/src/backends/reference/workloads/RefReduceWorkload.cpp b/src/backends/reference/workloads/RefReduceWorkload.cpp
new file mode 100644
index 0000000..7a46ff9
--- /dev/null
+++ b/src/backends/reference/workloads/RefReduceWorkload.cpp
@@ -0,0 +1,42 @@
+//
+// Copyright © 2020 Samsung Electronics Co Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefReduceWorkload.hpp"
+
+#include "Reduce.hpp"
+#include "RefWorkloadUtils.hpp"
+#include "BaseIterator.hpp"
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+RefReduceWorkload::RefReduceWorkload(
+    const ReduceQueueDescriptor& descriptor,
+    const WorkloadInfo& info)
+    : BaseWorkload<ReduceQueueDescriptor>(descriptor, info) {}
+
+void RefReduceWorkload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefReduceWorkload_Execute");
+
+    const TensorInfo& inputInfo  = GetTensorInfo(m_Data.m_Inputs[0]);
+    const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+
+    std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map());
+    Decoder<float>& decoder = *decoderPtr;
+
+    std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map());
+    Encoder<float>& encoder = *encoderPtr;
+
+    Reduce(inputInfo,
+           outputInfo,
+           decoder,
+           encoder,
+           m_Data.m_Parameters.m_vAxis,
+           m_Data.m_Parameters.m_ReduceOperation);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefReduceWorkload.hpp b/src/backends/reference/workloads/RefReduceWorkload.hpp
new file mode 100644
index 0000000..1d551ac
--- /dev/null
+++ b/src/backends/reference/workloads/RefReduceWorkload.hpp
@@ -0,0 +1,23 @@
+//
+// Copyright © 2020 Samsung Electronics Co Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backendsCommon/Workload.hpp>
+#include <backendsCommon/WorkloadData.hpp>
+
+namespace armnn
+{
+
+class RefReduceWorkload : public BaseWorkload<ReduceQueueDescriptor>
+{
+public:
+    explicit RefReduceWorkload(const ReduceQueueDescriptor& descriptor,
+                               const WorkloadInfo& info);
+
+    virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp
index 390b2a8..989644f 100644
--- a/src/backends/reference/workloads/RefWorkloads.hpp
+++ b/src/backends/reference/workloads/RefWorkloads.hpp
@@ -54,6 +54,7 @@
 #include "RefQLstmWorkload.hpp"
 #include "RefQuantizeWorkload.hpp"
 #include "RefRankWorkload.hpp"
+#include "RefReduceWorkload.hpp"
 #include "RefReshapeWorkload.hpp"
 #include "RefResizeBilinearWorkload.hpp"
 #include "RefResizeWorkload.hpp"