backends/reference: Add ReduceSum operation support

This patch addes ReduceSum operation support for reference backend,
which computes the sum of elements across dimensions of a tensor.

Changelog v1:
- Fix file header descriptions.

Chagelog v2:
- Fix line limit issue.
- Fix type conversion issue.

Changelog v3:
- Remove tabs.
- Modify newly added file headers.

Changelog v4:
- Symbol on header isn't allowed so drop it from newly added file headers.

Changelog v5:
- Remove tabs, fix the use of brackets and align lines correctly.

Changelog v6:
- Add serializer and deserializer support.

Changelog v7:
- Fix build error add missed code.

Changelog v8:
- Rename ReduceSumDecriptor to ReduceDescriptor
    - Update m_KeepDims field data type to bool on ReduceDescriptor
    - Add ReduceOperation field to ReduceDescriptor

- Rename ReduceSumLayer to ReduceLayer
    - Update ReduceLayer to use ReduceDescriptor
    - Update ReduceLayer::ValidateTensorShapesFromInputs() function

- Rename RefReduceSumWokload to RefReduceWorkload
    - Update workload to use ReduceDescriptor
    - Update workload to use Decoders and Encoders

- Remove ReduceSum.hpp and ReduceSum.cpp
- Added Reduce.hpp and Reduce.cpp
     - Move Mean.cpp (which is implementing REDUCE_MEAN) functionality to Reduce.cpp
     - Update RefMeanWorkload to call Reduce function with ReduceOperation::Mean argument

- Remove Mean.hpp and Mean.cpp
- Update the Serializer/Deserializer ArmnnSchema.fbs for ReduceLayer, ReduceDescriptor, and ReduceOperation
- Update Serializer and Deserializer for serializing/parsing ReduceLayer
- Added TfLiter parser Sum test for REDUCE_SUM operator
- Make corresponding changes on front-end and Ref backend to support REDUCE_SUM operator

Changelog v9:
- Fixed build errors.

Change-Id: I8c8e034f3df73f9565b3c18eff51ecca6c542195
Signed-off-by: Inki Dae <inki.dae@samsung.com>
Signed-off-by: Sadik Armagan <sadik.armagan@arm.com>
diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt
index 7894895..b20ef2d 100644
--- a/src/backends/backendsCommon/test/CMakeLists.txt
+++ b/src/backends/backendsCommon/test/CMakeLists.txt
@@ -137,6 +137,8 @@
     layerTests/QuantizeTestImpl.hpp
     layerTests/RankTestImpl.cpp
     layerTests/RankTestImpl.hpp
+    layerTests/ReduceSumTestImpl.cpp
+    layerTests/ReduceSumTestImpl.hpp
     layerTests/ReshapeTestImpl.cpp
     layerTests/ReshapeTestImpl.hpp
     layerTests/ResizeTestImpl.cpp
diff --git a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
index 1492a80..c7d1dd2 100644
--- a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
+++ b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
@@ -677,6 +677,8 @@
 
 DECLARE_LAYER_POLICY_1_PARAM(Subtraction)
 
+DECLARE_LAYER_POLICY_2_PARAM(Reduce)
+
 DECLARE_LAYER_POLICY_1_PARAM(Switch)
 
 DECLARE_LAYER_POLICY_2_PARAM(Transpose)
diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp
index e9eb5b9..d87a3b0 100644
--- a/src/backends/backendsCommon/test/LayerTests.hpp
+++ b/src/backends/backendsCommon/test/LayerTests.hpp
@@ -48,6 +48,7 @@
 #include <backendsCommon/test/layerTests/PreluTestImpl.hpp>
 #include <backendsCommon/test/layerTests/QuantizeTestImpl.hpp>
 #include <backendsCommon/test/layerTests/RankTestImpl.hpp>
+#include <backendsCommon/test/layerTests/ReduceSumTestImpl.hpp>
 #include <backendsCommon/test/layerTests/ReshapeTestImpl.hpp>
 #include <backendsCommon/test/layerTests/ResizeTestImpl.hpp>
 #include <backendsCommon/test/layerTests/RsqrtTestImpl.hpp>
diff --git a/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.cpp
new file mode 100644
index 0000000..4edbd11
--- /dev/null
+++ b/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.cpp
@@ -0,0 +1,344 @@
+//
+// Copyright © 2020 Samsung Electronics Co Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ReduceSumTestImpl.hpp"
+
+#include <backendsCommon/test/DataTypeUtils.hpp>
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <test/TensorHelpers.hpp>
+
+namespace
+{
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<float, 4> ReduceTestCommon(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory,
+        const armnn::TensorInfo inputTensorInfo,
+        const armnn::TensorInfo outputTensorInfo,
+        const std::vector<float>& inputData,
+        const std::vector<float>& outputData,
+        const std::vector<int32_t> vAxis,
+        const armnn::ReduceOperation reduceOperation)
+{
+    IgnoreUnused(memoryManager);
+    auto inputTensor = MakeTensor<T, 4>(inputTensorInfo, ConvertToDataType<ArmnnType>(inputData, inputTensorInfo));
+
+    LayerTestResult<float, 4> result(outputTensorInfo);
+    result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, outputData);
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::ReduceQueueDescriptor descriptor;
+    std::vector<uint32_t> updated_idx;
+    uint32_t resolvedAxis = 0;
+    for (uint32_t i = 0; i < vAxis.size(); ++i)
+    {
+        if (vAxis[i] <  0)
+        {
+            resolvedAxis = inputTensorInfo.GetNumDimensions() + static_cast<uint32_t>(vAxis[i]);
+        } else
+        {
+            resolvedAxis = static_cast<uint32_t>(vAxis[i]);
+        }
+
+        updated_idx.push_back(resolvedAxis);
+    }
+
+    descriptor.m_Parameters.m_vAxis = updated_idx;
+    descriptor.m_Parameters.m_ReduceOperation = reduceOperation;
+    armnn::WorkloadInfo info;
+
+    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateReduce(descriptor, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), inputTensor.origin());
+
+    workload->Execute();
+
+    CopyDataFromITensorHandle(result.output.origin(), outputHandle.get());
+
+    return result;
+}
+
+} // namespace
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<float, 4> ReduceSumSimpleTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    const armnn::TensorShape inputShape{ 1, 1, 1, 5 };
+    const armnn::TensorShape outputShape{ 1, 1, 1, 1};
+
+    armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(1.0f);
+        inputTensorInfo.SetQuantizationOffset(0);
+    }
+
+    armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
+
+    std::vector<float> inputValues({ 5.0f, 2.0f, 8.0f, 10.0f, 9.0f });
+    std::vector<float> outputValues({ 34.0f });
+
+    return ReduceTestCommon<ArmnnType>(workloadFactory,
+                                       memoryManager,
+                                       tensorHandleFactory,
+                                       inputTensorInfo,
+                                       outputTensorInfo,
+                                       inputValues,
+                                       outputValues,
+                                       { -1 },
+                                       armnn::ReduceOperation::Sum);
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<float, 4> ReduceSumSingleAxisTest1(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    const armnn::TensorShape inputShape{ 1, 3, 2, 4 };
+    const armnn::TensorShape outputShape{ 1, 1, 2, 4};
+
+    armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(1.0f);
+        inputTensorInfo.SetQuantizationOffset(0);
+    }
+
+    armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
+
+    std::vector<float> inputValues({  1.0f,   2.0f,   3.0f,   4.0f,
+                                      5.0f,   6.0f,   7.0f,   8.0f,
+
+                                     10.0f,  20.0f,  30.0f,  40.0f,
+                                     50.0f,  60.0f,  70.0f,  80.0f,
+
+                                    100.0f, 200.0f, 300.0f, 400.0f,
+                                    500.0f, 600.0f, 700.0f, 800.0f });
+    std::vector<float> outputValues({ 111.0f, 222.0f, 333.0f, 444.0f,
+                                      555.0f, 666.0f, 777.0f, 888.0f });
+
+    return ReduceTestCommon<ArmnnType>(workloadFactory,
+                                       memoryManager,
+                                       tensorHandleFactory,
+                                       inputTensorInfo,
+                                       outputTensorInfo,
+                                       inputValues,
+                                       outputValues,
+                                       { 1 },
+                                       armnn::ReduceOperation::Sum);
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<float, 4> ReduceSumSingleAxisTest2(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    const armnn::TensorShape inputShape{ 1, 6, 3, 4 };
+    const armnn::TensorShape outputShape{ 1, 1, 3, 4};
+
+    armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(1.0f);
+        inputTensorInfo.SetQuantizationOffset(0);
+    }
+
+    armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
+
+    std::vector<float> inputValues( {7, 8, 6, 1,
+                                     1, 1, 8, 7,
+                                     3, 7, 7, 7,
+
+                                     6, 8, 4, 7,
+                                     3, 8, 7, 3,
+                                     5, 8, 8, 8,
+
+
+                                     7, 8, 2, 7,
+                                     3, 8, 5, 6,
+                                     8, 4, 2, 7,
+
+                                     1, 6, 7, 2,
+                                     8, 3, 3, 1,
+                                     7, 6, 2, 6,
+
+
+                                     5, 3, 4, 8,
+                                     7, 8, 2, 4,
+                                     6, 6, 2, 8,
+
+                                     2, 2, 7, 2,
+                                     5, 3, 6, 3,
+                                     6, 1, 8, 8});
+    std::vector<float> outputValues({  28.0f, 35.0f, 30.0f, 27.0f,
+                                       27.0f, 31.0f, 31.0f, 24.0f,
+                                       35.0f, 32.0f, 29.0f, 44.0f});
+
+    return ReduceTestCommon<ArmnnType>(workloadFactory,
+                                       memoryManager,
+                                       tensorHandleFactory,
+                                       inputTensorInfo,
+                                       outputTensorInfo,
+                                       inputValues,
+                                       outputValues,
+                                       { 1 },
+                                       armnn::ReduceOperation::Sum);
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<float, 4> ReduceSumSingleAxisTest3(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    const armnn::TensorShape inputShape{ 1, 6, 3, 4 };
+    const armnn::TensorShape outputShape{ 1, 6, 3, 1};
+
+    armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(1.0f);
+        inputTensorInfo.SetQuantizationOffset(0);
+    }
+
+    armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
+
+    std::vector<float> inputValues( {7, 8, 6, 1,
+                                     1, 1, 8, 7,
+                                     3, 7, 7, 7,
+
+                                     6, 8, 4, 7,
+                                     3, 8, 7, 3,
+                                     5, 8, 8, 8,
+
+
+                                     7, 8, 2, 7,
+                                     3, 8, 5, 6,
+                                     8, 4, 2, 7,
+
+                                     1, 6, 7, 2,
+                                     8, 3, 3, 1,
+                                     7, 6, 2, 6,
+
+
+                                     5, 3, 4, 8,
+                                     7, 8, 2, 4,
+                                     6, 6, 2, 8,
+
+                                     2, 2, 7, 2,
+                                     5, 3, 6, 3,
+                                     6, 1, 8, 8});
+    std::vector<float> outputValues({  22.0f, 17.0f, 24.0f,
+                                       25.0f, 21.0f, 29.0f,
+
+                                       24.0f, 22.0f, 21.0f,
+                                       16.0f, 15.0f, 21.0f,
+
+                                       20.0f, 21.0f, 22.0f,
+                                       13.0f, 17.0f, 23.0f});
+
+    return ReduceTestCommon<ArmnnType>(workloadFactory,
+                                       memoryManager,
+                                       tensorHandleFactory,
+                                       inputTensorInfo,
+                                       outputTensorInfo,
+                                       inputValues,
+                                       outputValues,
+                                       { 3 },
+                                       armnn::ReduceOperation::Sum);
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<float, 4> ReduceSumMultipleAxisTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    const armnn::TensorShape inputShape{ 1, 3, 2, 4 };
+    const armnn::TensorShape outputShape{ 1, 1, 1, 4};
+
+    armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(1.0f);
+        inputTensorInfo.SetQuantizationOffset(0);
+    }
+
+    armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
+
+    std::vector<float> inputValues({  1.0f,   2.0f,   3.0f,   4.0f,
+                                      5.0f,   6.0f,   7.0f,   8.0f,
+
+                                     10.0f,  20.0f,  30.0f,  40.0f,
+                                     50.0f,  60.0f,  70.0f,  80.0f,
+
+                                    100.0f, 200.0f, 300.0f, 400.0f,
+                                    500.0f, 600.0f, 700.0f, 800.0f });
+    std::vector<float> outputValues({ 666.0f, 888.0f, 1110.0f, 1332.0f });
+
+    return ReduceTestCommon<ArmnnType>(workloadFactory,
+                                       memoryManager,
+                                       tensorHandleFactory,
+                                       inputTensorInfo,
+                                       outputTensorInfo,
+                                       inputValues,
+                                       outputValues,
+                                       { 1, 2 },
+                                       armnn::ReduceOperation::Sum);
+}
+
+// Explicit template specializations
+
+template LayerTestResult<float, 4>
+ReduceSumSimpleTest<armnn::DataType::Float32>(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<float, 4>
+ReduceSumSingleAxisTest1<armnn::DataType::Float32>(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<float, 4>
+ReduceSumSingleAxisTest2<armnn::DataType::Float32>(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<float, 4>
+ReduceSumSingleAxisTest3<armnn::DataType::Float32>(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<float, 4>
+ReduceSumMultipleAxisTest<armnn::DataType::Float32>(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
diff --git a/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.hpp
new file mode 100644
index 0000000..db23240
--- /dev/null
+++ b/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.hpp
@@ -0,0 +1,43 @@
+//
+// Copyright © 2020 Samsung Electronics Co Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <ResolveType.hpp>
+
+#include <armnn/backends/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<float, 4> ReduceSumSimpleTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<float, 4> ReduceSumSingleAxisTest1(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<float, 4> ReduceSumSingleAxisTest2(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<float, 4> ReduceSumSingleAxisTest3(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<float, 4> ReduceSumMultipleAxisTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);