backends/reference: Add ReduceSum operation support

This patch addes ReduceSum operation support for reference backend,
which computes the sum of elements across dimensions of a tensor.

Changelog v1:
- Fix file header descriptions.

Chagelog v2:
- Fix line limit issue.
- Fix type conversion issue.

Changelog v3:
- Remove tabs.
- Modify newly added file headers.

Changelog v4:
- Symbol on header isn't allowed so drop it from newly added file headers.

Changelog v5:
- Remove tabs, fix the use of brackets and align lines correctly.

Changelog v6:
- Add serializer and deserializer support.

Changelog v7:
- Fix build error add missed code.

Changelog v8:
- Rename ReduceSumDecriptor to ReduceDescriptor
    - Update m_KeepDims field data type to bool on ReduceDescriptor
    - Add ReduceOperation field to ReduceDescriptor

- Rename ReduceSumLayer to ReduceLayer
    - Update ReduceLayer to use ReduceDescriptor
    - Update ReduceLayer::ValidateTensorShapesFromInputs() function

- Rename RefReduceSumWokload to RefReduceWorkload
    - Update workload to use ReduceDescriptor
    - Update workload to use Decoders and Encoders

- Remove ReduceSum.hpp and ReduceSum.cpp
- Added Reduce.hpp and Reduce.cpp
     - Move Mean.cpp (which is implementing REDUCE_MEAN) functionality to Reduce.cpp
     - Update RefMeanWorkload to call Reduce function with ReduceOperation::Mean argument

- Remove Mean.hpp and Mean.cpp
- Update the Serializer/Deserializer ArmnnSchema.fbs for ReduceLayer, ReduceDescriptor, and ReduceOperation
- Update Serializer and Deserializer for serializing/parsing ReduceLayer
- Added TfLiter parser Sum test for REDUCE_SUM operator
- Make corresponding changes on front-end and Ref backend to support REDUCE_SUM operator

Changelog v9:
- Fixed build errors.

Change-Id: I8c8e034f3df73f9565b3c18eff51ecca6c542195
Signed-off-by: Inki Dae <inki.dae@samsung.com>
Signed-off-by: Sadik Armagan <sadik.armagan@arm.com>
diff --git a/src/armnn/InternalTypes.hpp b/src/armnn/InternalTypes.hpp
index 6e47399..6e65591 100644
--- a/src/armnn/InternalTypes.hpp
+++ b/src/armnn/InternalTypes.hpp
@@ -63,6 +63,7 @@
     X(QuantizedLstm) \
     X(Reshape) \
     X(Rank) \
+    X(Reduce) \
     X(Resize) \
     X(Slice) \
     X(Softmax) \
diff --git a/src/armnn/LayerSupport.cpp b/src/armnn/LayerSupport.cpp
index 197e1af..8812e0e 100644
--- a/src/armnn/LayerSupport.cpp
+++ b/src/armnn/LayerSupport.cpp
@@ -528,6 +528,7 @@
                                cellStateOut, output, paramsInfo);
 }
 
+
 bool IsPermuteSupported(const BackendId& backend,
                         const TensorInfo& input,
                         const TensorInfo& output,
@@ -558,6 +559,16 @@
     FORWARD_LAYER_SUPPORT_FUNC(backend, IsPreluSupported, input, alpha, output);
 }
 
+bool IsReduceSupported(const BackendId& backend,
+                       const TensorInfo& input,
+                       const TensorInfo& output,
+                       const ReduceDescriptor& descriptor,
+                       char* reasonIfUnsupported,
+                       size_t reasonIfUnsupportedMaxLength)
+{
+    FORWARD_LAYER_SUPPORT_FUNC(backend, IsReduceSupported, input, output, descriptor);
+}
+
 bool IsReshapeSupported(const BackendId& backend,
                         const TensorInfo& input,
                         const TensorInfo& output,
diff --git a/src/armnn/LayersFwd.hpp b/src/armnn/LayersFwd.hpp
index b9ca61a..6782fb5 100644
--- a/src/armnn/LayersFwd.hpp
+++ b/src/armnn/LayersFwd.hpp
@@ -56,6 +56,7 @@
 #include "layers/QLstmLayer.hpp"
 #include "layers/QuantizedLstmLayer.hpp"
 #include "layers/RankLayer.hpp"
+#include "layers/ReduceLayer.hpp"
 #include "layers/ReshapeLayer.hpp"
 #include "layers/ResizeLayer.hpp"
 #include "layers/SliceLayer.hpp"
@@ -149,6 +150,7 @@
 DECLARE_LAYER(QLstm)
 DECLARE_LAYER(QuantizedLstm)
 DECLARE_LAYER(Rank)
+DECLARE_LAYER(Reduce)
 DECLARE_LAYER(Reshape)
 DECLARE_LAYER(Resize)
 DECLARE_LAYER(Slice)
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index d41f2f6..f8b0675 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -1491,6 +1491,12 @@
     return m_Graph->AddLayer<RankLayer>(name);
 }
 
+IConnectableLayer* Network::AddReduceLayer(const ReduceDescriptor& reduceDescriptor,
+                                           const char* name)
+{
+    return m_Graph->AddLayer<ReduceLayer>(reduceDescriptor, name);
+}
+
 IConnectableLayer* Network::AddResizeBilinearLayer(const ResizeBilinearDescriptor& descriptor,
                                                    const char* name)
 {
diff --git a/src/armnn/Network.hpp b/src/armnn/Network.hpp
index c652edb..1205bd8 100644
--- a/src/armnn/Network.hpp
+++ b/src/armnn/Network.hpp
@@ -169,6 +169,9 @@
     IConnectableLayer* AddResizeLayer(const ResizeDescriptor& resizeDescriptor,
                                       const char* name = nullptr) override;
 
+    IConnectableLayer* AddReduceLayer(const ReduceDescriptor& reduceDescriptor,
+                                      const char* name = nullptr) override;
+
     IConnectableLayer* AddInstanceNormalizationLayer(const InstanceNormalizationDescriptor& desc,
                                                      const char* name = nullptr) override;
 
diff --git a/src/armnn/layers/ReduceLayer.cpp b/src/armnn/layers/ReduceLayer.cpp
new file mode 100644
index 0000000..b68cd2e
--- /dev/null
+++ b/src/armnn/layers/ReduceLayer.cpp
@@ -0,0 +1,100 @@
+//
+// Copyright © 2020 Samsung Electronics Co Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ReduceLayer.hpp"
+#include "LayerCloneBase.hpp"
+
+#include <armnn/TypesUtils.hpp>
+
+#include <backendsCommon/WorkloadData.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+namespace armnn
+{
+
+ReduceLayer::ReduceLayer(const ReduceDescriptor& param, const char* name)
+    : LayerWithParameters(1, 1, LayerType::Reduce, param, name)
+{
+}
+
+std::unique_ptr<IWorkload> ReduceLayer::CreateWorkload(const IWorkloadFactory& factory) const
+{
+    ReduceQueueDescriptor descriptor;
+    return factory.CreateReduce(descriptor, PrepInfoAndDesc(descriptor));
+}
+
+ReduceLayer* ReduceLayer::Clone(Graph& graph) const
+{
+    return CloneBase<ReduceLayer>(graph, m_Param, GetName());
+}
+
+void ReduceLayer::ValidateTensorShapesFromInputs()
+{
+    VerifyLayerConnections(1, CHECK_LOCATION());
+
+    const TensorShape& outputShape = GetOutputSlot(0).GetTensorInfo().GetShape();
+
+    VerifyShapeInferenceType(outputShape, m_ShapeInferenceMethod);
+
+    const TensorInfo& input = GetInputSlot(0).GetConnection()->GetTensorInfo();
+
+    ARMNN_ASSERT_MSG(input.GetNumDimensions() > 0 && input.GetNumDimensions() <= 4,
+                     "ReduceLayer: Reduce supports up to 4D input.");
+
+    unsigned int rank = input.GetNumDimensions();
+    unsigned int outputRank = 0;
+
+    // Calculate output dimension
+    if (m_Param.m_KeepDims)
+    {
+        outputRank = rank;
+    }
+    else if (m_Param.m_vAxis.empty())
+    {
+        outputRank = 1;
+    }
+    else if (m_Param.m_vAxis.size() > input.GetNumDimensions())
+    {
+        throw LayerValidationException("ReduceLayer: Dimensions to reduce can not be bigger than input dimensions");
+    }
+    else
+    {
+        outputRank = input.GetNumDimensions() - armnn::numeric_cast<unsigned int>(m_Param.m_vAxis.size());
+        if (outputRank == 0)
+        {
+            outputRank = 1;
+        }
+    }
+
+    std::vector<unsigned int> dimSizes(outputRank, 1);
+    if (!m_Param.m_vAxis.empty())
+    {
+        // Skip the dimension that has been reduced unless keepDims is true.
+        unsigned int outputIndex = 0;
+        for (unsigned int i = 0; i < input.GetNumDimensions(); ++i)
+        {
+            if (std::find(m_Param.m_vAxis.begin(), m_Param.m_vAxis.end(), i) == m_Param.m_vAxis.end())
+            {
+                dimSizes[outputIndex] = armnn::numeric_cast<unsigned int>(input.GetShape()[i]);
+                ++outputIndex;
+            }
+            else if (m_Param.m_KeepDims)
+            {
+                dimSizes[outputIndex] = 1;
+                ++outputIndex;
+            }
+        }
+    }
+    const TensorShape& inferredShape = TensorShape(outputRank, dimSizes.data());
+
+    ValidateAndCopyShape(outputShape, inferredShape, m_ShapeInferenceMethod, "ReduceLayer");
+}
+
+void ReduceLayer::Accept(ILayerVisitor& visitor) const
+{
+    visitor.VisitReduceLayer(this, GetParameters(), GetName());
+}
+
+} // namespace armnn
diff --git a/src/armnn/layers/ReduceLayer.hpp b/src/armnn/layers/ReduceLayer.hpp
new file mode 100644
index 0000000..fd4f207
--- /dev/null
+++ b/src/armnn/layers/ReduceLayer.hpp
@@ -0,0 +1,42 @@
+//
+// Copyright © 2020 Samsung Electronics Co Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "LayerWithParameters.hpp"
+
+namespace armnn
+{
+
+/// This layer represents a reduction operation.
+class ReduceLayer : public LayerWithParameters<ReduceDescriptor>
+{
+public:
+    /// Makes a workload for the Reduce type.
+    /// @param [in] graph The graph where this layer can be found.
+    /// @param [in] factory The workload factory which will create the workload.
+    /// @return A pointer to the created workload, or nullptr if not created.
+    virtual std::unique_ptr<IWorkload>CreateWorkload(const IWorkloadFactory& factory) const override;
+
+    /// Creates a dynamically-allocated copy of this layer.
+    /// @param [in] graph The graph into which this layer is being cloned.
+    ReduceLayer* Clone(Graph& graph) const override;
+
+    /// Check if the input tensor shape(s)
+    /// will lead to a valid configuration of @ref ReduceLayer.
+    void ValidateTensorShapesFromInputs() override;
+
+    void Accept(ILayerVisitor& visitor) const override;
+
+protected:
+    /// Constructor to create a ReduceLayer.
+    /// @param [in] param ReduceDescriptor to configure the reduction operation.
+    /// @param [in] name Optional name for the layer.
+    ReduceLayer(const ReduceDescriptor& param, const char* name);
+
+    /// Default destructor
+    ~ReduceLayer() = default;
+};
+
+} // namespace armnn
diff --git a/src/armnn/test/TestNameAndDescriptorLayerVisitor.hpp b/src/armnn/test/TestNameAndDescriptorLayerVisitor.hpp
index dc6d114..c911caa 100644
--- a/src/armnn/test/TestNameAndDescriptorLayerVisitor.hpp
+++ b/src/armnn/test/TestNameAndDescriptorLayerVisitor.hpp
@@ -60,6 +60,7 @@
 DECLARE_TEST_NAME_AND_DESCRIPTOR_LAYER_VISITOR_CLASS(Pad)
 DECLARE_TEST_NAME_AND_DESCRIPTOR_LAYER_VISITOR_CLASS(Permute)
 DECLARE_TEST_NAME_AND_DESCRIPTOR_LAYER_VISITOR_CLASS(Pooling2d)
+DECLARE_TEST_NAME_AND_DESCRIPTOR_LAYER_VISITOR_CLASS(Reduce)
 DECLARE_TEST_NAME_AND_DESCRIPTOR_LAYER_VISITOR_CLASS(Reshape)
 DECLARE_TEST_NAME_AND_DESCRIPTOR_LAYER_VISITOR_CLASS(Resize)
 DECLARE_TEST_NAME_AND_DESCRIPTOR_LAYER_VISITOR_CLASS(Slice)
diff --git a/src/armnnDeserializer/Deserializer.cpp b/src/armnnDeserializer/Deserializer.cpp
index 14c2af4..e98ff15 100644
--- a/src/armnnDeserializer/Deserializer.cpp
+++ b/src/armnnDeserializer/Deserializer.cpp
@@ -251,6 +251,7 @@
     m_ParserFunctions[Layer_QuantizeLayer]               = &DeserializerImpl::ParseQuantize;
     m_ParserFunctions[Layer_QuantizedLstmLayer]          = &DeserializerImpl::ParseQuantizedLstm;
     m_ParserFunctions[Layer_RankLayer]                   = &DeserializerImpl::ParseRank;
+    m_ParserFunctions[Layer_ReduceLayer]                 = &DeserializerImpl::ParseReduce;
     m_ParserFunctions[Layer_ReshapeLayer]                = &DeserializerImpl::ParseReshape;
     m_ParserFunctions[Layer_ResizeBilinearLayer]         = &DeserializerImpl::ParseResizeBilinear;
     m_ParserFunctions[Layer_ResizeLayer]                 = &DeserializerImpl::ParseResize;
@@ -363,6 +364,8 @@
             return graphPtr->layers()->Get(layerIndex)->layer_as_QuantizedLstmLayer()->base();
         case Layer::Layer_RankLayer:
             return graphPtr->layers()->Get(layerIndex)->layer_as_RankLayer()->base();
+        case Layer::Layer_ReduceLayer:
+            return graphPtr->layers()->Get(layerIndex)->layer_as_ReduceLayer()->base();
         case Layer::Layer_ReshapeLayer:
             return graphPtr->layers()->Get(layerIndex)->layer_as_ReshapeLayer()->base();
         case Layer::Layer_ResizeBilinearLayer:
@@ -498,6 +501,23 @@
     }
 }
 
+armnn::ReduceOperation ToReduceOperation(armnnSerializer::ReduceOperation operation)
+{
+    switch (operation)
+    {
+        case armnnSerializer::ReduceOperation::ReduceOperation_Sum:
+            return armnn::ReduceOperation::Sum;
+        case armnnSerializer::ReduceOperation::ReduceOperation_Max:
+            return armnn::ReduceOperation::Max;
+        case armnnSerializer::ReduceOperation::ReduceOperation_Mean:
+            return armnn::ReduceOperation::Mean;
+        case armnnSerializer::ReduceOperation::ReduceOperation_Min:
+            return armnn::ReduceOperation::Min;
+        default:
+            return armnn::ReduceOperation::Sum;
+    }
+}
+
 armnn::LogicalBinaryOperation ToLogicalBinaryOperation(armnnSerializer::LogicalBinaryOperation operation)
 {
     switch (operation)
@@ -2082,6 +2102,38 @@
     RegisterOutputSlots(graph, layerIndex, layer);
 }
 
+void IDeserializer::DeserializerImpl::ParseReduce(GraphPtr graph, unsigned int layerIndex)
+{
+    CHECK_LAYERS(graph, 0, layerIndex);
+    CHECK_LOCATION();
+
+    auto inputs = GetInputs(graph, layerIndex);
+    CHECK_VALID_SIZE(inputs.size(), 1);
+
+    auto outputs = GetOutputs(graph, layerIndex);
+    CHECK_VALID_SIZE(outputs.size(), 1);
+
+    auto fbLayer      = graph->layers()->Get(layerIndex)->layer_as_ReduceLayer();
+    auto fbDescriptor = fbLayer->descriptor();
+    auto flatBufferAxis = fbDescriptor->axis();
+
+    armnn::ReduceDescriptor descriptor;
+    descriptor.m_TargetHeight = fbDescriptor->targetHeight();
+    descriptor.m_TargetWidth  = fbDescriptor->targetWidth();
+    descriptor.m_KeepDims     = fbDescriptor->keepDims();
+    descriptor.m_vAxis = std::vector<unsigned int>(flatBufferAxis->begin(), flatBufferAxis->end());
+    descriptor.m_ReduceOperation = ToReduceOperation(fbDescriptor->reduceOperation());
+
+    const std::string& layerName = GetLayerName(graph, layerIndex);
+    IConnectableLayer* layer     = m_Network->AddReduceLayer(descriptor, layerName.c_str());
+
+    armnn::TensorInfo outputTensorInfo = ToTensorInfo(outputs[0]);
+    layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
+
+    RegisterInputSlots(graph, layerIndex, layer);
+    RegisterOutputSlots(graph, layerIndex, layer);
+}
+
 void IDeserializer::DeserializerImpl::ParseReshape(GraphPtr graph, unsigned int layerIndex)
 {
     CHECK_LAYERS(graph, 0, layerIndex);
diff --git a/src/armnnDeserializer/Deserializer.hpp b/src/armnnDeserializer/Deserializer.hpp
index e232fee..f4f6424 100644
--- a/src/armnnDeserializer/Deserializer.hpp
+++ b/src/armnnDeserializer/Deserializer.hpp
@@ -119,6 +119,7 @@
     void ParseQLstm(GraphPtr graph, unsigned int layerIndex);
     void ParseQuantize(GraphPtr graph, unsigned int layerIndex);
     void ParseRank(GraphPtr graph, unsigned int layerIndex);
+    void ParseReduce(GraphPtr graph, unsigned int layerIndex);
     void ParseReshape(GraphPtr graph, unsigned int layerIndex);
     void ParseResize(GraphPtr graph, unsigned int layerIndex);
     void ParseResizeBilinear(GraphPtr graph, unsigned int layerIndex);
diff --git a/src/armnnDeserializer/test/DeserializeReduceSum.cpp b/src/armnnDeserializer/test/DeserializeReduceSum.cpp
new file mode 100644
index 0000000..d88613e
--- /dev/null
+++ b/src/armnnDeserializer/test/DeserializeReduceSum.cpp
@@ -0,0 +1,126 @@
+//
+// Copyright © 2020 Samsung Electronics Co Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <boost/test/unit_test.hpp>
+#include "ParserFlatbuffersSerializeFixture.hpp"
+#include "../Deserializer.hpp"
+
+#include <string>
+#include <iostream>
+
+BOOST_AUTO_TEST_SUITE(Deserializer)
+
+struct ReduceSumFixture : public ParserFlatbuffersSerializeFixture
+{
+    explicit ReduceSumFixture(const std::string& inputShape,
+                              const std::string& outputShape,
+                              const std::string& axis,
+                              const std::string& dataType)
+    {
+        m_JsonString = R"(
+            {
+                inputIds: [0],
+                outputIds: [2],
+                layers: [
+                    {
+                        layer_type: "InputLayer",
+                        layer: {
+                            base: {
+                                layerBindingId: 0,
+                                base: {
+                                    index: 0,
+                                    layerName: "InputLayer",
+                                    layerType: "Input",
+                                    inputSlots: [{
+                                        index: 0,
+                                        connection: {sourceLayerIndex:0, outputSlotIndex:0 },
+                                    }],
+                                    outputSlots: [{
+                                        index: 0,
+                                        tensorInfo: {
+                                            dimensions: )" + inputShape + R"(,
+                                            dataType: )" + dataType + R"(
+                                        }
+                                    }]
+                                }
+                            }
+                        }
+                    },
+                    {
+                        layer_type: "ReduceLayer",
+                        layer: {
+                            base: {
+                                index: 1,
+                                layerName: "ReduceSumLayer",
+                                layerType: "Reduce",
+                                inputSlots: [{
+                                    index: 0,
+                                    connection: {sourceLayerIndex:0, outputSlotIndex:0 },
+                                }],
+                                outputSlots: [{
+                                    index: 0,
+                                    tensorInfo: {
+                                        dimensions: )" + outputShape + R"(,
+                                        dataType: )" + dataType + R"(
+                                    }
+                                }]
+                            },
+                            descriptor: {
+                                axis: )" + axis + R"(,
+                                keepDims: true,
+                                reduceOperation: Sum
+                            }
+                        }
+                    },
+                    {
+                        layer_type: "OutputLayer",
+                        layer: {
+                            base:{
+                                layerBindingId: 2,
+                                base: {
+                                    index: 2,
+                                    layerName: "OutputLayer",
+                                    layerType: "Output",
+                                    inputSlots: [{
+                                        index: 0,
+                                        connection: {sourceLayerIndex:1, outputSlotIndex:0 },
+                                    }],
+                                    outputSlots: [{
+                                        index: 0,
+                                        tensorInfo: {
+                                            dimensions: )" + outputShape + R"(,
+                                            dataType: )" + dataType + R"(
+                                        },
+                                    }],
+                                }
+                            }
+                        },
+                    }
+                ]
+            }
+        )";
+        Setup();
+    }
+};
+
+struct SimpleReduceSumFixture : ReduceSumFixture
+{
+    SimpleReduceSumFixture()
+        : ReduceSumFixture("[ 1, 1, 3, 2 ]",     // inputShape
+                           "[ 1, 1, 1, 2 ]",     // outputShape
+                           "[ 2 ]",              // axis
+                           "Float32")            // dataType
+    {}
+};
+
+BOOST_FIXTURE_TEST_CASE(SimpleReduceSum, SimpleReduceSumFixture)
+{
+    RunTest<4, armnn::DataType::Float32>(
+         0,
+         {{"InputLayer",  { 1.0f, 1.0f, 2.0f, 2.0f, 3.0f, 3.0f }}},
+         {{"OutputLayer", { 6.0f, 6.0f }}});
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnnSerializer/ArmnnSchema.fbs b/src/armnnSerializer/ArmnnSchema.fbs
index 1f71ce1..aa539b1 100644
--- a/src/armnnSerializer/ArmnnSchema.fbs
+++ b/src/armnnSerializer/ArmnnSchema.fbs
@@ -47,6 +47,13 @@
     NCHW = 1
 }
 
+enum ReduceOperation: byte {
+    Sum  = 0,
+    Max  = 1,
+    Mean = 2,
+    Min  = 3
+}
+
 enum ResizeMethod: byte {
     NearestNeighbor = 0,
     Bilinear = 1,
@@ -160,7 +167,8 @@
     QLstm = 56,
     Fill = 57,
     Rank = 58,
-    LogicalBinary = 59
+    LogicalBinary = 59,
+    Reduce = 60
 }
 
 // Base layer table to be used as part of other layers
@@ -881,6 +889,19 @@
     base:LayerBase;
 }
 
+table ReduceLayer {
+    base:LayerBase;
+    descriptor:ReduceDescriptor;
+}
+
+table ReduceDescriptor {
+    targetHeight:uint;
+    targetWidth:uint;
+    keepDims:bool = false;
+    axis:[uint];
+    reduceOperation:ReduceOperation = Sum;
+}
+
 union Layer {
     ActivationLayer,
     AdditionLayer,
@@ -941,7 +962,8 @@
     QLstmLayer,
     FillLayer,
     RankLayer,
-    LogicalBinaryLayer
+    LogicalBinaryLayer,
+    ReduceLayer
 }
 
 table AnyLayer {
@@ -960,4 +982,4 @@
     featureVersions:FeatureCompatibilityVersions;
 }
 
-root_type SerializedGraph;
\ No newline at end of file
+root_type SerializedGraph;
diff --git a/src/armnnSerializer/ArmnnSchema_generated.h b/src/armnnSerializer/ArmnnSchema_generated.h
index 031da5d..32548b2 100644
--- a/src/armnnSerializer/ArmnnSchema_generated.h
+++ b/src/armnnSerializer/ArmnnSchema_generated.h
@@ -4,6 +4,7 @@
 //
 // automatically generated by the FlatBuffers compiler, do not modify
 
+
 #ifndef FLATBUFFERS_GENERATED_ARMNNSCHEMA_ARMNNSERIALIZER_H_
 #define FLATBUFFERS_GENERATED_ARMNNSCHEMA_ARMNNSERIALIZER_H_
 
@@ -349,6 +350,12 @@
 struct RankLayer;
 struct RankLayerBuilder;
 
+struct ReduceLayer;
+struct ReduceLayerBuilder;
+
+struct ReduceDescriptor;
+struct ReduceDescriptorBuilder;
+
 struct AnyLayer;
 struct AnyLayerBuilder;
 
@@ -532,6 +539,42 @@
   return EnumNamesDataLayout()[index];
 }
 
+enum ReduceOperation {
+  ReduceOperation_Sum = 0,
+  ReduceOperation_Max = 1,
+  ReduceOperation_Mean = 2,
+  ReduceOperation_Min = 3,
+  ReduceOperation_MIN = ReduceOperation_Sum,
+  ReduceOperation_MAX = ReduceOperation_Min
+};
+
+inline const ReduceOperation (&EnumValuesReduceOperation())[4] {
+  static const ReduceOperation values[] = {
+    ReduceOperation_Sum,
+    ReduceOperation_Max,
+    ReduceOperation_Mean,
+    ReduceOperation_Min
+  };
+  return values;
+}
+
+inline const char * const *EnumNamesReduceOperation() {
+  static const char * const names[5] = {
+    "Sum",
+    "Max",
+    "Mean",
+    "Min",
+    nullptr
+  };
+  return names;
+}
+
+inline const char *EnumNameReduceOperation(ReduceOperation e) {
+  if (flatbuffers::IsOutRange(e, ReduceOperation_Sum, ReduceOperation_Min)) return "";
+  const size_t index = static_cast<size_t>(e);
+  return EnumNamesReduceOperation()[index];
+}
+
 enum ResizeMethod {
   ResizeMethod_NearestNeighbor = 0,
   ResizeMethod_Bilinear = 1,
@@ -685,11 +728,12 @@
   LayerType_Fill = 57,
   LayerType_Rank = 58,
   LayerType_LogicalBinary = 59,
+  LayerType_Reduce = 60,
   LayerType_MIN = LayerType_Addition,
-  LayerType_MAX = LayerType_LogicalBinary
+  LayerType_MAX = LayerType_Reduce
 };
 
-inline const LayerType (&EnumValuesLayerType())[60] {
+inline const LayerType (&EnumValuesLayerType())[61] {
   static const LayerType values[] = {
     LayerType_Addition,
     LayerType_Input,
@@ -750,13 +794,14 @@
     LayerType_QLstm,
     LayerType_Fill,
     LayerType_Rank,
-    LayerType_LogicalBinary
+    LayerType_LogicalBinary,
+    LayerType_Reduce
   };
   return values;
 }
 
 inline const char * const *EnumNamesLayerType() {
-  static const char * const names[61] = {
+  static const char * const names[62] = {
     "Addition",
     "Input",
     "Multiplication",
@@ -817,13 +862,14 @@
     "Fill",
     "Rank",
     "LogicalBinary",
+    "Reduce",
     nullptr
   };
   return names;
 }
 
 inline const char *EnumNameLayerType(LayerType e) {
-  if (flatbuffers::IsOutRange(e, LayerType_Addition, LayerType_LogicalBinary)) return "";
+  if (flatbuffers::IsOutRange(e, LayerType_Addition, LayerType_Reduce)) return "";
   const size_t index = static_cast<size_t>(e);
   return EnumNamesLayerType()[index];
 }
@@ -1157,11 +1203,12 @@
   Layer_FillLayer = 58,
   Layer_RankLayer = 59,
   Layer_LogicalBinaryLayer = 60,
+  Layer_ReduceLayer = 61,
   Layer_MIN = Layer_NONE,
-  Layer_MAX = Layer_LogicalBinaryLayer
+  Layer_MAX = Layer_ReduceLayer
 };
 
-inline const Layer (&EnumValuesLayer())[61] {
+inline const Layer (&EnumValuesLayer())[62] {
   static const Layer values[] = {
     Layer_NONE,
     Layer_ActivationLayer,
@@ -1223,13 +1270,14 @@
     Layer_QLstmLayer,
     Layer_FillLayer,
     Layer_RankLayer,
-    Layer_LogicalBinaryLayer
+    Layer_LogicalBinaryLayer,
+    Layer_ReduceLayer
   };
   return values;
 }
 
 inline const char * const *EnumNamesLayer() {
-  static const char * const names[62] = {
+  static const char * const names[63] = {
     "NONE",
     "ActivationLayer",
     "AdditionLayer",
@@ -1291,13 +1339,14 @@
     "FillLayer",
     "RankLayer",
     "LogicalBinaryLayer",
+    "ReduceLayer",
     nullptr
   };
   return names;
 }
 
 inline const char *EnumNameLayer(Layer e) {
-  if (flatbuffers::IsOutRange(e, Layer_NONE, Layer_LogicalBinaryLayer)) return "";
+  if (flatbuffers::IsOutRange(e, Layer_NONE, Layer_ReduceLayer)) return "";
   const size_t index = static_cast<size_t>(e);
   return EnumNamesLayer()[index];
 }
@@ -1546,6 +1595,10 @@
   static const Layer enum_value = Layer_LogicalBinaryLayer;
 };
 
+template<> struct LayerTraits<armnnSerializer::ReduceLayer> {
+  static const Layer enum_value = Layer_ReduceLayer;
+};
+
 bool VerifyLayer(flatbuffers::Verifier &verifier, const void *obj, Layer type);
 bool VerifyLayerVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types);
 
@@ -9097,6 +9150,160 @@
   return builder_.Finish();
 }
 
+struct ReduceLayer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef ReduceLayerBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_BASE = 4,
+    VT_DESCRIPTOR = 6
+  };
+  const armnnSerializer::LayerBase *base() const {
+    return GetPointer<const armnnSerializer::LayerBase *>(VT_BASE);
+  }
+  const armnnSerializer::ReduceDescriptor *descriptor() const {
+    return GetPointer<const armnnSerializer::ReduceDescriptor *>(VT_DESCRIPTOR);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffset(verifier, VT_BASE) &&
+           verifier.VerifyTable(base()) &&
+           VerifyOffset(verifier, VT_DESCRIPTOR) &&
+           verifier.VerifyTable(descriptor()) &&
+           verifier.EndTable();
+  }
+};
+
+struct ReduceLayerBuilder {
+  typedef ReduceLayer Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_base(flatbuffers::Offset<armnnSerializer::LayerBase> base) {
+    fbb_.AddOffset(ReduceLayer::VT_BASE, base);
+  }
+  void add_descriptor(flatbuffers::Offset<armnnSerializer::ReduceDescriptor> descriptor) {
+    fbb_.AddOffset(ReduceLayer::VT_DESCRIPTOR, descriptor);
+  }
+  explicit ReduceLayerBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  ReduceLayerBuilder &operator=(const ReduceLayerBuilder &);
+  flatbuffers::Offset<ReduceLayer> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ReduceLayer>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ReduceLayer> CreateReduceLayer(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<armnnSerializer::LayerBase> base = 0,
+    flatbuffers::Offset<armnnSerializer::ReduceDescriptor> descriptor = 0) {
+  ReduceLayerBuilder builder_(_fbb);
+  builder_.add_descriptor(descriptor);
+  builder_.add_base(base);
+  return builder_.Finish();
+}
+
+struct ReduceDescriptor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef ReduceDescriptorBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_TARGETHEIGHT = 4,
+    VT_TARGETWIDTH = 6,
+    VT_KEEPDIMS = 8,
+    VT_AXIS = 10,
+    VT_REDUCEOPERATION = 12
+  };
+  uint32_t targetHeight() const {
+    return GetField<uint32_t>(VT_TARGETHEIGHT, 0);
+  }
+  uint32_t targetWidth() const {
+    return GetField<uint32_t>(VT_TARGETWIDTH, 0);
+  }
+  bool keepDims() const {
+    return GetField<uint8_t>(VT_KEEPDIMS, 0) != 0;
+  }
+  const flatbuffers::Vector<uint32_t> *axis() const {
+    return GetPointer<const flatbuffers::Vector<uint32_t> *>(VT_AXIS);
+  }
+  armnnSerializer::ReduceOperation reduceOperation() const {
+    return static_cast<armnnSerializer::ReduceOperation>(GetField<int8_t>(VT_REDUCEOPERATION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<uint32_t>(verifier, VT_TARGETHEIGHT) &&
+           VerifyField<uint32_t>(verifier, VT_TARGETWIDTH) &&
+           VerifyField<uint8_t>(verifier, VT_KEEPDIMS) &&
+           VerifyOffset(verifier, VT_AXIS) &&
+           verifier.VerifyVector(axis()) &&
+           VerifyField<int8_t>(verifier, VT_REDUCEOPERATION) &&
+           verifier.EndTable();
+  }
+};
+
+struct ReduceDescriptorBuilder {
+  typedef ReduceDescriptor Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_targetHeight(uint32_t targetHeight) {
+    fbb_.AddElement<uint32_t>(ReduceDescriptor::VT_TARGETHEIGHT, targetHeight, 0);
+  }
+  void add_targetWidth(uint32_t targetWidth) {
+    fbb_.AddElement<uint32_t>(ReduceDescriptor::VT_TARGETWIDTH, targetWidth, 0);
+  }
+  void add_keepDims(bool keepDims) {
+    fbb_.AddElement<uint8_t>(ReduceDescriptor::VT_KEEPDIMS, static_cast<uint8_t>(keepDims), 0);
+  }
+  void add_axis(flatbuffers::Offset<flatbuffers::Vector<uint32_t>> axis) {
+    fbb_.AddOffset(ReduceDescriptor::VT_AXIS, axis);
+  }
+  void add_reduceOperation(armnnSerializer::ReduceOperation reduceOperation) {
+    fbb_.AddElement<int8_t>(ReduceDescriptor::VT_REDUCEOPERATION, static_cast<int8_t>(reduceOperation), 0);
+  }
+  explicit ReduceDescriptorBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  ReduceDescriptorBuilder &operator=(const ReduceDescriptorBuilder &);
+  flatbuffers::Offset<ReduceDescriptor> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ReduceDescriptor>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ReduceDescriptor> CreateReduceDescriptor(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    uint32_t targetHeight = 0,
+    uint32_t targetWidth = 0,
+    bool keepDims = false,
+    flatbuffers::Offset<flatbuffers::Vector<uint32_t>> axis = 0,
+    armnnSerializer::ReduceOperation reduceOperation = armnnSerializer::ReduceOperation_Sum) {
+  ReduceDescriptorBuilder builder_(_fbb);
+  builder_.add_axis(axis);
+  builder_.add_targetWidth(targetWidth);
+  builder_.add_targetHeight(targetHeight);
+  builder_.add_reduceOperation(reduceOperation);
+  builder_.add_keepDims(keepDims);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<ReduceDescriptor> CreateReduceDescriptorDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    uint32_t targetHeight = 0,
+    uint32_t targetWidth = 0,
+    bool keepDims = false,
+    const std::vector<uint32_t> *axis = nullptr,
+    armnnSerializer::ReduceOperation reduceOperation = armnnSerializer::ReduceOperation_Sum) {
+  auto axis__ = axis ? _fbb.CreateVector<uint32_t>(*axis) : 0;
+  return armnnSerializer::CreateReduceDescriptor(
+      _fbb,
+      targetHeight,
+      targetWidth,
+      keepDims,
+      axis__,
+      reduceOperation);
+}
+
 struct AnyLayer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   typedef AnyLayerBuilder Builder;
   enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
@@ -9290,6 +9497,9 @@
   const armnnSerializer::LogicalBinaryLayer *layer_as_LogicalBinaryLayer() const {
     return layer_type() == armnnSerializer::Layer_LogicalBinaryLayer ? static_cast<const armnnSerializer::LogicalBinaryLayer *>(layer()) : nullptr;
   }
+  const armnnSerializer::ReduceLayer *layer_as_ReduceLayer() const {
+    return layer_type() == armnnSerializer::Layer_ReduceLayer ? static_cast<const armnnSerializer::ReduceLayer *>(layer()) : nullptr;
+  }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyField<uint8_t>(verifier, VT_LAYER_TYPE) &&
@@ -9539,6 +9749,10 @@
   return layer_as_LogicalBinaryLayer();
 }
 
+template<> inline const armnnSerializer::ReduceLayer *AnyLayer::layer_as<armnnSerializer::ReduceLayer>() const {
+  return layer_as_ReduceLayer();
+}
+
 struct AnyLayerBuilder {
   typedef AnyLayer Table;
   flatbuffers::FlatBufferBuilder &fbb_;
@@ -9989,6 +10203,10 @@
       auto ptr = reinterpret_cast<const armnnSerializer::LogicalBinaryLayer *>(obj);
       return verifier.VerifyTable(ptr);
     }
+    case Layer_ReduceLayer: {
+      auto ptr = reinterpret_cast<const armnnSerializer::ReduceLayer *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
     default: return true;
   }
 }
diff --git a/src/armnnSerializer/Serializer.cpp b/src/armnnSerializer/Serializer.cpp
index a0c99b9..a2217a3 100644
--- a/src/armnnSerializer/Serializer.cpp
+++ b/src/armnnSerializer/Serializer.cpp
@@ -904,6 +904,25 @@
 
     CreateAnyLayer(flatBufferRankLayer.o, serializer::Layer::Layer_RankLayer);
 }
+
+void SerializerVisitor::VisitReduceLayer(const armnn::IConnectableLayer* layer,
+                                         const armnn::ReduceDescriptor& reduceDescriptor,
+                                         const char*)
+{
+    auto fbReduceBaseLayer = CreateLayerBase(layer, serializer::LayerType::LayerType_Reduce);
+    auto fbDescriptor = CreateReduceDescriptor(m_flatBufferBuilder,
+                                               reduceDescriptor.m_TargetHeight,
+                                               reduceDescriptor.m_TargetWidth,
+                                               reduceDescriptor.m_KeepDims,
+                                               m_flatBufferBuilder.CreateVector(reduceDescriptor.m_vAxis),
+                                               GetFlatBufferReduceOperation(reduceDescriptor.m_ReduceOperation));
+    auto fbReduceLayer = serializer::CreateReduceLayer(m_flatBufferBuilder,
+                                                       fbReduceBaseLayer,
+                                                       fbDescriptor);
+
+    CreateAnyLayer(fbReduceLayer.o, serializer::Layer::Layer_ReduceLayer);
+}
+
 // Build FlatBuffer for Reshape Layer
 void SerializerVisitor::VisitReshapeLayer(const armnn::IConnectableLayer* layer,
                                           const armnn::ReshapeDescriptor& reshapeDescriptor,
diff --git a/src/armnnSerializer/Serializer.hpp b/src/armnnSerializer/Serializer.hpp
index f28be09..10971fd 100644
--- a/src/armnnSerializer/Serializer.hpp
+++ b/src/armnnSerializer/Serializer.hpp
@@ -226,6 +226,10 @@
     void VisitRankLayer(const armnn::IConnectableLayer* layer,
                         const char* name = nullptr) override;
 
+   void VisitReduceLayer(const armnn::IConnectableLayer* layer,
+                         const armnn::ReduceDescriptor& reduceDescriptor,
+                         const char* name = nullptr) override;
+
     void VisitReshapeLayer(const armnn::IConnectableLayer* layer,
                            const armnn::ReshapeDescriptor& reshapeDescriptor,
                            const char* name = nullptr) override;
diff --git a/src/armnnSerializer/SerializerUtils.cpp b/src/armnnSerializer/SerializerUtils.cpp
index 045d6aa..32ac75e 100644
--- a/src/armnnSerializer/SerializerUtils.cpp
+++ b/src/armnnSerializer/SerializerUtils.cpp
@@ -198,4 +198,21 @@
     }
 }
 
+armnnSerializer::ReduceOperation GetFlatBufferReduceOperation(armnn::ReduceOperation reduceOperation)
+{
+    switch (reduceOperation)
+    {
+        case armnn::ReduceOperation::Sum:
+            return armnnSerializer::ReduceOperation::ReduceOperation_Sum;
+        case armnn::ReduceOperation::Max:
+            return armnnSerializer::ReduceOperation::ReduceOperation_Max;
+        case armnn::ReduceOperation::Mean:
+            return armnnSerializer::ReduceOperation::ReduceOperation_Mean;
+        case armnn::ReduceOperation::Min:
+            return armnnSerializer::ReduceOperation::ReduceOperation_Min;
+        default:
+            return armnnSerializer::ReduceOperation::ReduceOperation_Sum;
+    }
+}
+
 } // namespace armnnSerializer
diff --git a/src/armnnSerializer/SerializerUtils.hpp b/src/armnnSerializer/SerializerUtils.hpp
index a3cf5ba..5517986 100644
--- a/src/armnnSerializer/SerializerUtils.hpp
+++ b/src/armnnSerializer/SerializerUtils.hpp
@@ -38,4 +38,6 @@
 armnnSerializer::LogicalBinaryOperation GetFlatBufferLogicalBinaryOperation(
     armnn::LogicalBinaryOperation logicalBinaryOperation);
 
+armnnSerializer::ReduceOperation GetFlatBufferReduceOperation(armnn::ReduceOperation reduceOperation);
+
 } // namespace armnnSerializer
diff --git a/src/armnnSerializer/test/SerializerTests.cpp b/src/armnnSerializer/test/SerializerTests.cpp
index 11177f5..44e8a38 100644
--- a/src/armnnSerializer/test/SerializerTests.cpp
+++ b/src/armnnSerializer/test/SerializerTests.cpp
@@ -2297,6 +2297,36 @@
     deserializedNetwork->Accept(verifier);
 }
 
+BOOST_AUTO_TEST_CASE(SerializeReduceSum)
+{
+    DECLARE_LAYER_VERIFIER_CLASS_WITH_DESCRIPTOR(Reduce)
+
+    const std::string layerName("Reduce_Sum");
+    const armnn::TensorInfo inputInfo({1, 1, 3, 2}, armnn::DataType::Float32);
+    const armnn::TensorInfo outputInfo({1, 1, 1, 2}, armnn::DataType::Float32);
+
+    armnn::ReduceDescriptor descriptor;
+    descriptor.m_vAxis = { 2 };
+    descriptor.m_ReduceOperation = armnn::ReduceOperation::Sum;
+
+    armnn::INetworkPtr network = armnn::INetwork::Create();
+    armnn::IConnectableLayer* const inputLayer   = network->AddInputLayer(0);
+    armnn::IConnectableLayer* const reduceSumLayer = network->AddReduceLayer(descriptor, layerName.c_str());
+    armnn::IConnectableLayer* const outputLayer  = network->AddOutputLayer(0);
+
+    inputLayer->GetOutputSlot(0).Connect(reduceSumLayer->GetInputSlot(0));
+    reduceSumLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+
+    inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
+    reduceSumLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
+
+    armnn::INetworkPtr deserializedNetwork = DeserializeNetwork(SerializeNetwork(*network));
+    BOOST_CHECK(deserializedNetwork);
+
+    ReduceLayerVerifier verifier(layerName, {inputInfo}, {outputInfo}, descriptor);
+    deserializedNetwork->Accept(verifier);
+}
+
 BOOST_AUTO_TEST_CASE(SerializeReshape)
 {
     DECLARE_LAYER_VERIFIER_CLASS_WITH_DESCRIPTOR(Reshape)
diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp
index 1a1e854..db60224 100644
--- a/src/armnnTfLiteParser/TfLiteParser.cpp
+++ b/src/armnnTfLiteParser/TfLiteParser.cpp
@@ -10,6 +10,7 @@
 #include <armnn/Exceptions.hpp>
 #include <armnn/Logging.hpp>
 #include <armnn/Tensor.hpp>
+#include <armnnUtils/TensorUtils.hpp>
 #include <armnn/TypesUtils.hpp>
 #include <armnn/utility/Assert.hpp>
 #include <armnn/utility/IgnoreUnused.hpp>
@@ -580,6 +581,7 @@
     m_ParserFunctions[tflite::BuiltinOperator_SQUEEZE]                 = &TfLiteParser::ParseSqueeze;
     m_ParserFunctions[tflite::BuiltinOperator_STRIDED_SLICE]           = &TfLiteParser::ParseStridedSlice;
     m_ParserFunctions[tflite::BuiltinOperator_SUB]                     = &TfLiteParser::ParseSub;
+    m_ParserFunctions[tflite::BuiltinOperator_SUM]                     = &TfLiteParser::ParseSum;
     m_ParserFunctions[tflite::BuiltinOperator_TANH]                    = &TfLiteParser::ParseTanH;
     m_ParserFunctions[tflite::BuiltinOperator_TRANSPOSE]               = &TfLiteParser::ParseTranspose;
     m_ParserFunctions[tflite::BuiltinOperator_TRANSPOSE_CONV]          = &TfLiteParser::ParseTransposeConv;
@@ -2994,6 +2996,58 @@
     RegisterOutputSlots(subgraphIndex, operatorIndex, layer, {outputTensorIndexes[0]});
 }
 
+void TfLiteParser::ParseSum(size_t subgraphIndex, size_t operatorIndex)
+{
+    CHECK_MODEL(m_Model, subgraphIndex, operatorIndex);
+
+    const auto &operatorPtr = m_Model->subgraphs[subgraphIndex]->operators[operatorIndex];
+    const auto *options = operatorPtr->builtin_options.AsReducerOptions();
+
+    auto inputs = GetInputs(m_Model, subgraphIndex, operatorIndex);
+    CHECK_VALID_SIZE(inputs.size(), 2);
+
+    auto outputs = GetOutputs(m_Model, subgraphIndex, operatorIndex);
+    CHECK_VALID_SIZE(outputs.size(), 1);
+
+    auto layerName = fmt::format("Sum:{}:{}", subgraphIndex, operatorIndex);
+
+    armnn::TensorInfo inputTensorInfo0 = ToTensorInfo(inputs[0]);
+    armnn::TensorInfo inputTensorInfo1 = ToTensorInfo(inputs[1]);
+    TensorShape input0Shape = inputTensorInfo0.GetShape();
+
+    ReduceDescriptor desc;
+
+    BufferRawPtr axisBufferPtr = GetBuffer(m_Model, inputs[1]->buffer);
+    // Get const axis value from model and set it to descriptor.
+    if (axisBufferPtr != nullptr)
+    {
+        for (uint32_t i = 0; i < inputTensorInfo1.GetNumElements(); ++i)
+        {
+            desc.m_vAxis.push_back(armnnUtils::GetUnsignedAxis(inputTensorInfo0.GetNumDimensions(),
+                                                               axisBufferPtr->data.data()[i]));
+        }
+    }
+
+    desc.m_TargetHeight    = input0Shape[1];
+    desc.m_TargetWidth     = input0Shape[2];
+    desc.m_KeepDims        = options->keep_dims;
+    desc.m_ReduceOperation = armnn::ReduceOperation::Sum;
+
+    // Register a new layer object, Sum.
+    IConnectableLayer *layer = m_Network->AddReduceLayer(desc, layerName.c_str());
+
+    armnn::TensorInfo outputTensorInfo = ToTensorInfo(outputs[0]);
+    layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
+
+    // Register input tensor to the layer.
+    auto inputTensorIndexes = AsUnsignedVector(GetInputTensorIds(m_Model, subgraphIndex, operatorIndex));
+    RegisterInputSlots(subgraphIndex, operatorIndex, layer, {inputTensorIndexes[0]});
+
+    // Register output tensor to the layer.
+    auto outputTensorIndexes = AsUnsignedVector(GetOutputTensorIds(m_Model, subgraphIndex, operatorIndex));
+    RegisterOutputSlots(subgraphIndex, operatorIndex, layer, outputTensorIndexes);
+}
+
 armnn::IConnectableLayer* TfLiteParser::AddFusedActivationLayer(armnn::IConnectableLayer* prevLayer,
                                                                 unsigned int outputSlot,
                                                                 tflite::ActivationFunctionType activationType)
diff --git a/src/armnnTfLiteParser/TfLiteParser.hpp b/src/armnnTfLiteParser/TfLiteParser.hpp
index 418180f..5f18060 100644
--- a/src/armnnTfLiteParser/TfLiteParser.hpp
+++ b/src/armnnTfLiteParser/TfLiteParser.hpp
@@ -135,6 +135,7 @@
     void ParseSqueeze(size_t subgraphIndex, size_t operatorIndex);
     void ParseStridedSlice(size_t subgraphIndex, size_t operatorIndex);
     void ParseSub(size_t subgraphIndex, size_t operatorIndex);
+    void ParseSum(size_t subgraphIndex, size_t operatorIndex);
     void ParseDiv(size_t subgraphIndex, size_t operatorIndex);
     void ParseTanH(size_t subgraphIndex, size_t operatorIndex);
     void ParseTranspose(size_t subgraphIndex, size_t operatorIndex);
diff --git a/src/armnnTfLiteParser/test/Sum.cpp b/src/armnnTfLiteParser/test/Sum.cpp
new file mode 100644
index 0000000..22b19ae
--- /dev/null
+++ b/src/armnnTfLiteParser/test/Sum.cpp
@@ -0,0 +1,110 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <boost/test/unit_test.hpp>
+#include "ParserFlatbuffersFixture.hpp"
+#include "../TfLiteParser.hpp"
+
+#include <string>
+#include <iostream>
+
+BOOST_AUTO_TEST_SUITE(TensorflowLiteParser)
+
+struct SumFixture : public ParserFlatbuffersFixture
+{
+    explicit SumFixture(const std::string& inputShape,
+                        const std::string& outputShape,
+                        const std::string& axisShape,
+                        const std::string& axisData)
+    {
+        m_JsonString = R"(
+            {
+                "version": 3,
+                "operator_codes": [ { "builtin_code": "SUM" } ],
+                "subgraphs": [ {
+                    "tensors": [
+                        {
+                            "shape": )" + inputShape + R"(,
+                            "type": "FLOAT32",
+                            "buffer": 0,
+                            "name": "inputTensor",
+                            "quantization": {
+                                "min": [ 0.0 ],
+                                "max": [ 255.0 ],
+                                "scale": [ 1.0 ],
+                                "zero_point": [ 0 ],
+                            }
+                        },
+                        {
+                            "shape": )" + outputShape + R"( ,
+                            "type": "FLOAT32",
+                            "buffer": 1,
+                            "name": "outputTensor",
+                            "quantization": {
+                                "min": [ 0.0 ],
+                                "max": [ 255.0 ],
+                                "scale": [ 1.0 ],
+                                "zero_point": [ 0 ],
+                            }
+                        },
+                        {
+                            "shape": )" + axisShape + R"( ,
+                            "type": "INT32",
+                            "buffer": 2,
+                            "name": "axis",
+                            "quantization": {
+                                "min": [ 0.0 ],
+                                "max": [ 255.0 ],
+                                "scale": [ 1.0 ],
+                                "zero_point": [ 0 ],
+                            }
+                        }
+                    ],
+                    "inputs": [ 0 ],
+                    "outputs": [ 1 ],
+                    "operators": [
+                        {
+                            "opcode_index": 0,
+                            "inputs": [ 0 , 2 ],
+                            "outputs": [ 1 ],
+                            "builtin_options_type": "ReducerOptions",
+                            "builtin_options": {
+                              "keep_dims": true,
+                            },
+                            "custom_options_format": "FLEXBUFFERS"
+                        }
+                    ],
+                } ],
+                "buffers" : [
+                    { },
+                    { },
+                    { "data": )" + axisData + R"(, },
+                ]
+            }
+        )";
+        SetupSingleInputSingleOutput("inputTensor", "outputTensor");
+    }
+};
+
+struct SimpleSumFixture : public SumFixture
+{
+    SimpleSumFixture() : SumFixture("[ 1, 3, 2, 4 ]", "[ 1, 1, 1, 4 ]", "[ 2 ]", "[ 1, 2 ]") {}
+};
+
+BOOST_FIXTURE_TEST_CASE(ParseSum, SimpleSumFixture)
+{
+    RunTest<4, armnn::DataType::Float32, armnn::DataType::Float32>
+        (0, {{ "inputTensor", { 1.0f,   2.0f,   3.0f,   4.0f,
+                                5.0f,   6.0f,   7.0f,   8.0f,
+
+                                10.0f,  20.0f,  30.0f,  40.0f,
+                                50.0f,  60.0f,  70.0f,  80.0f,
+
+                                100.0f, 200.0f, 300.0f, 400.0f,
+                                500.0f, 600.0f, 700.0f, 800.0f } } },
+            {{ "outputTensor", { 666.0f, 888.0f, 1110.0f, 1332.0f } } });
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/backendsCommon/LayerSupportBase.cpp b/src/backends/backendsCommon/LayerSupportBase.cpp
index 5435910..77067d9 100644
--- a/src/backends/backendsCommon/LayerSupportBase.cpp
+++ b/src/backends/backendsCommon/LayerSupportBase.cpp
@@ -512,6 +512,14 @@
     return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
 }
 
+bool LayerSupportBase::IsReduceSupported(const TensorInfo& /*input*/,
+                                         const TensorInfo& /*output*/,
+                                         const ReduceDescriptor& /*descriptor*/,
+                                         Optional<std::string&> reasonIfUnsupported) const
+{
+    return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
+}
+
 bool LayerSupportBase::IsReshapeSupported(const TensorInfo&, // input
                                           const TensorInfo&, // output
                                           const ReshapeDescriptor&, // descriptor
diff --git a/src/backends/backendsCommon/LayerSupportBase.hpp b/src/backends/backendsCommon/LayerSupportBase.hpp
index 7b873e3..e04d657 100644
--- a/src/backends/backendsCommon/LayerSupportBase.hpp
+++ b/src/backends/backendsCommon/LayerSupportBase.hpp
@@ -315,6 +315,11 @@
                          const TensorInfo& output,
                          Optional<std::string&> reasonIfUnsupported) const override;
 
+    bool IsReduceSupported(const TensorInfo& input,
+                           const TensorInfo& output,
+                           const ReduceDescriptor& descriptor,
+                           Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
     bool IsReshapeSupported(const TensorInfo& input,
                             const TensorInfo& output,
                             const ReshapeDescriptor& descriptor,
diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp
index d795e32..b51099f 100644
--- a/src/backends/backendsCommon/WorkloadData.cpp
+++ b/src/backends/backendsCommon/WorkloadData.cpp
@@ -3633,4 +3633,31 @@
     }
 }
 
+void ReduceQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
+{
+    const std::string descriptorName{"ReduceQueueDescriptor"};
+
+    ValidateNumInputs(workloadInfo,  descriptorName, 1);
+    ValidateNumOutputs(workloadInfo, descriptorName, 1);
+
+    const TensorInfo& inputTensorInfo  = workloadInfo.m_InputTensorInfos[0];
+    const TensorInfo& outputTensorInfo = workloadInfo.m_OutputTensorInfos[0];
+
+    ValidateTensorNumDimensions(inputTensorInfo,  descriptorName, 4, "input");
+
+    std::vector<DataType> supportedTypes =
+    {
+        DataType::BFloat16,
+        DataType::Float16,
+        DataType::Float32,
+        DataType::QAsymmS8,
+        DataType::QAsymmU8,
+        DataType::QSymmS16,
+        DataType::Signed32
+    };
+
+    ValidateDataTypes(inputTensorInfo, supportedTypes, descriptorName);
+    ValidateTensorDataTypesMatch(inputTensorInfo, outputTensorInfo, descriptorName, "input", "output");
+}
+
 } // namespace armnn
diff --git a/src/backends/backendsCommon/WorkloadData.hpp b/src/backends/backendsCommon/WorkloadData.hpp
index 0a232dc..8a2dd1f 100644
--- a/src/backends/backendsCommon/WorkloadData.hpp
+++ b/src/backends/backendsCommon/WorkloadData.hpp
@@ -668,4 +668,9 @@
     void Validate(const WorkloadInfo& workloadInfo) const;
 };
 
+struct ReduceQueueDescriptor : QueueDescriptorWithParameters<ReduceDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
 } // namespace armnn
diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp
index 3a8a2ae..19281a8 100644
--- a/src/backends/backendsCommon/WorkloadFactory.cpp
+++ b/src/backends/backendsCommon/WorkloadFactory.cpp
@@ -1220,6 +1220,18 @@
 
             break;
         }
+        case LayerType::Reduce:
+        {
+            auto cLayer = PolymorphicDowncast<const ReduceLayer*>(&layer);
+            const TensorInfo& input  = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
+            const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
+
+            result = layerSupportObject->IsReduceSupported(OverrideDataType(input, dataType),
+                                                           OverrideDataType(output, dataType),
+                                                           cLayer->GetParameters(),
+                                                           reason);
+            break;
+        }
         default:
         {
             ARMNN_ASSERT_MSG(false, "WorkloadFactory did not recognise type of layer.");
@@ -1593,6 +1605,12 @@
     return std::unique_ptr<IWorkload>();
 }
 
+std::unique_ptr<IWorkload> IWorkloadFactory::CreateReduce(const ReduceQueueDescriptor& /*descriptor*/,
+                                                          const WorkloadInfo& /*info*/) const
+{
+    return std::unique_ptr<IWorkload>();
+}
+
 std::unique_ptr<IWorkload> IWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& /*descriptor*/,
                                                            const WorkloadInfo& /*info*/) const
 {
diff --git a/src/backends/backendsCommon/WorkloadFactory.hpp b/src/backends/backendsCommon/WorkloadFactory.hpp
index 2e813e9..6ab6d2c 100644
--- a/src/backends/backendsCommon/WorkloadFactory.hpp
+++ b/src/backends/backendsCommon/WorkloadFactory.hpp
@@ -231,6 +231,9 @@
     virtual std::unique_ptr<IWorkload> CreateRank(const RankQueueDescriptor& descriptor,
                                                   const WorkloadInfo& info) const;
 
+    virtual std::unique_ptr<IWorkload> CreateReduce(const ReduceQueueDescriptor& descriptor,
+                                                    const WorkloadInfo& info) const;
+
     virtual std::unique_ptr<IWorkload> CreateReshape(const ReshapeQueueDescriptor& descriptor,
                                                      const WorkloadInfo& info) const;
 
diff --git a/src/backends/backendsCommon/common.mk b/src/backends/backendsCommon/common.mk
index 7254d21..3b6299d 100644
--- a/src/backends/backendsCommon/common.mk
+++ b/src/backends/backendsCommon/common.mk
@@ -75,6 +75,7 @@
     test/layerTests/PadTestImpl.cpp \
     test/layerTests/Pooling2dTestImpl.cpp \
     test/layerTests/RankTestImpl.cpp \
+    test/layerTests/ReduceSumTestImpl.cpp \
     test/layerTests/ReshapeTestImpl.cpp \
     test/layerTests/ResizeTestImpl.cpp \
     test/layerTests/RsqrtTestImpl.cpp \
diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt
index 7894895..b20ef2d 100644
--- a/src/backends/backendsCommon/test/CMakeLists.txt
+++ b/src/backends/backendsCommon/test/CMakeLists.txt
@@ -137,6 +137,8 @@
     layerTests/QuantizeTestImpl.hpp
     layerTests/RankTestImpl.cpp
     layerTests/RankTestImpl.hpp
+    layerTests/ReduceSumTestImpl.cpp
+    layerTests/ReduceSumTestImpl.hpp
     layerTests/ReshapeTestImpl.cpp
     layerTests/ReshapeTestImpl.hpp
     layerTests/ResizeTestImpl.cpp
diff --git a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
index 1492a80..c7d1dd2 100644
--- a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
+++ b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
@@ -677,6 +677,8 @@
 
 DECLARE_LAYER_POLICY_1_PARAM(Subtraction)
 
+DECLARE_LAYER_POLICY_2_PARAM(Reduce)
+
 DECLARE_LAYER_POLICY_1_PARAM(Switch)
 
 DECLARE_LAYER_POLICY_2_PARAM(Transpose)
diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp
index e9eb5b9..d87a3b0 100644
--- a/src/backends/backendsCommon/test/LayerTests.hpp
+++ b/src/backends/backendsCommon/test/LayerTests.hpp
@@ -48,6 +48,7 @@
 #include <backendsCommon/test/layerTests/PreluTestImpl.hpp>
 #include <backendsCommon/test/layerTests/QuantizeTestImpl.hpp>
 #include <backendsCommon/test/layerTests/RankTestImpl.hpp>
+#include <backendsCommon/test/layerTests/ReduceSumTestImpl.hpp>
 #include <backendsCommon/test/layerTests/ReshapeTestImpl.hpp>
 #include <backendsCommon/test/layerTests/ResizeTestImpl.hpp>
 #include <backendsCommon/test/layerTests/RsqrtTestImpl.hpp>
diff --git a/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.cpp
new file mode 100644
index 0000000..4edbd11
--- /dev/null
+++ b/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.cpp
@@ -0,0 +1,344 @@
+//
+// Copyright © 2020 Samsung Electronics Co Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ReduceSumTestImpl.hpp"
+
+#include <backendsCommon/test/DataTypeUtils.hpp>
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <test/TensorHelpers.hpp>
+
+namespace
+{
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<float, 4> ReduceTestCommon(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory,
+        const armnn::TensorInfo inputTensorInfo,
+        const armnn::TensorInfo outputTensorInfo,
+        const std::vector<float>& inputData,
+        const std::vector<float>& outputData,
+        const std::vector<int32_t> vAxis,
+        const armnn::ReduceOperation reduceOperation)
+{
+    IgnoreUnused(memoryManager);
+    auto inputTensor = MakeTensor<T, 4>(inputTensorInfo, ConvertToDataType<ArmnnType>(inputData, inputTensorInfo));
+
+    LayerTestResult<float, 4> result(outputTensorInfo);
+    result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, outputData);
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::ReduceQueueDescriptor descriptor;
+    std::vector<uint32_t> updated_idx;
+    uint32_t resolvedAxis = 0;
+    for (uint32_t i = 0; i < vAxis.size(); ++i)
+    {
+        if (vAxis[i] <  0)
+        {
+            resolvedAxis = inputTensorInfo.GetNumDimensions() + static_cast<uint32_t>(vAxis[i]);
+        } else
+        {
+            resolvedAxis = static_cast<uint32_t>(vAxis[i]);
+        }
+
+        updated_idx.push_back(resolvedAxis);
+    }
+
+    descriptor.m_Parameters.m_vAxis = updated_idx;
+    descriptor.m_Parameters.m_ReduceOperation = reduceOperation;
+    armnn::WorkloadInfo info;
+
+    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateReduce(descriptor, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), inputTensor.origin());
+
+    workload->Execute();
+
+    CopyDataFromITensorHandle(result.output.origin(), outputHandle.get());
+
+    return result;
+}
+
+} // namespace
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<float, 4> ReduceSumSimpleTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    const armnn::TensorShape inputShape{ 1, 1, 1, 5 };
+    const armnn::TensorShape outputShape{ 1, 1, 1, 1};
+
+    armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(1.0f);
+        inputTensorInfo.SetQuantizationOffset(0);
+    }
+
+    armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
+
+    std::vector<float> inputValues({ 5.0f, 2.0f, 8.0f, 10.0f, 9.0f });
+    std::vector<float> outputValues({ 34.0f });
+
+    return ReduceTestCommon<ArmnnType>(workloadFactory,
+                                       memoryManager,
+                                       tensorHandleFactory,
+                                       inputTensorInfo,
+                                       outputTensorInfo,
+                                       inputValues,
+                                       outputValues,
+                                       { -1 },
+                                       armnn::ReduceOperation::Sum);
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<float, 4> ReduceSumSingleAxisTest1(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    const armnn::TensorShape inputShape{ 1, 3, 2, 4 };
+    const armnn::TensorShape outputShape{ 1, 1, 2, 4};
+
+    armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(1.0f);
+        inputTensorInfo.SetQuantizationOffset(0);
+    }
+
+    armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
+
+    std::vector<float> inputValues({  1.0f,   2.0f,   3.0f,   4.0f,
+                                      5.0f,   6.0f,   7.0f,   8.0f,
+
+                                     10.0f,  20.0f,  30.0f,  40.0f,
+                                     50.0f,  60.0f,  70.0f,  80.0f,
+
+                                    100.0f, 200.0f, 300.0f, 400.0f,
+                                    500.0f, 600.0f, 700.0f, 800.0f });
+    std::vector<float> outputValues({ 111.0f, 222.0f, 333.0f, 444.0f,
+                                      555.0f, 666.0f, 777.0f, 888.0f });
+
+    return ReduceTestCommon<ArmnnType>(workloadFactory,
+                                       memoryManager,
+                                       tensorHandleFactory,
+                                       inputTensorInfo,
+                                       outputTensorInfo,
+                                       inputValues,
+                                       outputValues,
+                                       { 1 },
+                                       armnn::ReduceOperation::Sum);
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<float, 4> ReduceSumSingleAxisTest2(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    const armnn::TensorShape inputShape{ 1, 6, 3, 4 };
+    const armnn::TensorShape outputShape{ 1, 1, 3, 4};
+
+    armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(1.0f);
+        inputTensorInfo.SetQuantizationOffset(0);
+    }
+
+    armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
+
+    std::vector<float> inputValues( {7, 8, 6, 1,
+                                     1, 1, 8, 7,
+                                     3, 7, 7, 7,
+
+                                     6, 8, 4, 7,
+                                     3, 8, 7, 3,
+                                     5, 8, 8, 8,
+
+
+                                     7, 8, 2, 7,
+                                     3, 8, 5, 6,
+                                     8, 4, 2, 7,
+
+                                     1, 6, 7, 2,
+                                     8, 3, 3, 1,
+                                     7, 6, 2, 6,
+
+
+                                     5, 3, 4, 8,
+                                     7, 8, 2, 4,
+                                     6, 6, 2, 8,
+
+                                     2, 2, 7, 2,
+                                     5, 3, 6, 3,
+                                     6, 1, 8, 8});
+    std::vector<float> outputValues({  28.0f, 35.0f, 30.0f, 27.0f,
+                                       27.0f, 31.0f, 31.0f, 24.0f,
+                                       35.0f, 32.0f, 29.0f, 44.0f});
+
+    return ReduceTestCommon<ArmnnType>(workloadFactory,
+                                       memoryManager,
+                                       tensorHandleFactory,
+                                       inputTensorInfo,
+                                       outputTensorInfo,
+                                       inputValues,
+                                       outputValues,
+                                       { 1 },
+                                       armnn::ReduceOperation::Sum);
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<float, 4> ReduceSumSingleAxisTest3(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    const armnn::TensorShape inputShape{ 1, 6, 3, 4 };
+    const armnn::TensorShape outputShape{ 1, 6, 3, 1};
+
+    armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(1.0f);
+        inputTensorInfo.SetQuantizationOffset(0);
+    }
+
+    armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
+
+    std::vector<float> inputValues( {7, 8, 6, 1,
+                                     1, 1, 8, 7,
+                                     3, 7, 7, 7,
+
+                                     6, 8, 4, 7,
+                                     3, 8, 7, 3,
+                                     5, 8, 8, 8,
+
+
+                                     7, 8, 2, 7,
+                                     3, 8, 5, 6,
+                                     8, 4, 2, 7,
+
+                                     1, 6, 7, 2,
+                                     8, 3, 3, 1,
+                                     7, 6, 2, 6,
+
+
+                                     5, 3, 4, 8,
+                                     7, 8, 2, 4,
+                                     6, 6, 2, 8,
+
+                                     2, 2, 7, 2,
+                                     5, 3, 6, 3,
+                                     6, 1, 8, 8});
+    std::vector<float> outputValues({  22.0f, 17.0f, 24.0f,
+                                       25.0f, 21.0f, 29.0f,
+
+                                       24.0f, 22.0f, 21.0f,
+                                       16.0f, 15.0f, 21.0f,
+
+                                       20.0f, 21.0f, 22.0f,
+                                       13.0f, 17.0f, 23.0f});
+
+    return ReduceTestCommon<ArmnnType>(workloadFactory,
+                                       memoryManager,
+                                       tensorHandleFactory,
+                                       inputTensorInfo,
+                                       outputTensorInfo,
+                                       inputValues,
+                                       outputValues,
+                                       { 3 },
+                                       armnn::ReduceOperation::Sum);
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<float, 4> ReduceSumMultipleAxisTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    const armnn::TensorShape inputShape{ 1, 3, 2, 4 };
+    const armnn::TensorShape outputShape{ 1, 1, 1, 4};
+
+    armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(1.0f);
+        inputTensorInfo.SetQuantizationOffset(0);
+    }
+
+    armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
+
+    std::vector<float> inputValues({  1.0f,   2.0f,   3.0f,   4.0f,
+                                      5.0f,   6.0f,   7.0f,   8.0f,
+
+                                     10.0f,  20.0f,  30.0f,  40.0f,
+                                     50.0f,  60.0f,  70.0f,  80.0f,
+
+                                    100.0f, 200.0f, 300.0f, 400.0f,
+                                    500.0f, 600.0f, 700.0f, 800.0f });
+    std::vector<float> outputValues({ 666.0f, 888.0f, 1110.0f, 1332.0f });
+
+    return ReduceTestCommon<ArmnnType>(workloadFactory,
+                                       memoryManager,
+                                       tensorHandleFactory,
+                                       inputTensorInfo,
+                                       outputTensorInfo,
+                                       inputValues,
+                                       outputValues,
+                                       { 1, 2 },
+                                       armnn::ReduceOperation::Sum);
+}
+
+// Explicit template specializations
+
+template LayerTestResult<float, 4>
+ReduceSumSimpleTest<armnn::DataType::Float32>(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<float, 4>
+ReduceSumSingleAxisTest1<armnn::DataType::Float32>(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<float, 4>
+ReduceSumSingleAxisTest2<armnn::DataType::Float32>(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<float, 4>
+ReduceSumSingleAxisTest3<armnn::DataType::Float32>(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<float, 4>
+ReduceSumMultipleAxisTest<armnn::DataType::Float32>(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
diff --git a/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.hpp
new file mode 100644
index 0000000..db23240
--- /dev/null
+++ b/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.hpp
@@ -0,0 +1,43 @@
+//
+// Copyright © 2020 Samsung Electronics Co Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <ResolveType.hpp>
+
+#include <armnn/backends/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<float, 4> ReduceSumSimpleTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<float, 4> ReduceSumSingleAxisTest1(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<float, 4> ReduceSumSingleAxisTest2(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<float, 4> ReduceSumSingleAxisTest3(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<float, 4> ReduceSumMultipleAxisTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp
index bdaaafb..992ae71 100644
--- a/src/backends/reference/RefLayerSupport.cpp
+++ b/src/backends/reference/RefLayerSupport.cpp
@@ -1706,6 +1706,36 @@
            "Reference rank: input type not supported.");
 }
 
+bool RefLayerSupport::IsReduceSupported(const TensorInfo& input,
+                                        const TensorInfo& output,
+                                        const ReduceDescriptor& descriptor,
+                                        Optional<std::string&> reasonIfUnsupported) const
+{
+    IgnoreUnused(descriptor);
+    bool supported = true;
+    std::array<DataType,7> supportedTypes =
+    {
+        DataType::BFloat16,
+        DataType::Float32,
+        DataType::Float16,
+        DataType::QAsymmS8,
+        DataType::QAsymmU8,
+        DataType::QSymmS16,
+        DataType::Signed32
+    };
+
+    supported &= CheckSupportRule(TypeAnyOf(input, supportedTypes), reasonIfUnsupported,
+                                  "Reference Reduce: input type not supported");
+
+    supported &= CheckSupportRule(TypeAnyOf(output, supportedTypes), reasonIfUnsupported,
+                                  "Reference Reduce: output type not supported");
+
+    supported &= CheckSupportRule(TypesAreEqual(input, output), reasonIfUnsupported,
+                                  "Reference Reduce: input and output types not matching");
+
+    return supported;
+}
+
 bool RefLayerSupport::IsReshapeSupported(const TensorInfo& input,
                                          const TensorInfo& output,
                                          const ReshapeDescriptor& descriptor,
diff --git a/src/backends/reference/RefLayerSupport.hpp b/src/backends/reference/RefLayerSupport.hpp
index 6b64408..b75b778 100644
--- a/src/backends/reference/RefLayerSupport.hpp
+++ b/src/backends/reference/RefLayerSupport.hpp
@@ -275,6 +275,11 @@
                          const TensorInfo& output,
                          Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
 
+    bool IsReduceSupported(const TensorInfo& input,
+                           const TensorInfo& output,
+                           const ReduceDescriptor& descriptor,
+                           Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
     bool IsReshapeSupported(const TensorInfo& input,
                             const TensorInfo& output,
                             const ReshapeDescriptor& descriptor,
diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp
index 468aeb3..fde6c86 100644
--- a/src/backends/reference/RefWorkloadFactory.cpp
+++ b/src/backends/reference/RefWorkloadFactory.cpp
@@ -580,6 +580,12 @@
     return std::make_unique<RefRankWorkload>(descriptor, info);
 }
 
+std::unique_ptr<IWorkload> RefWorkloadFactory::CreateReduce(const ReduceQueueDescriptor& descriptor,
+                                                            const WorkloadInfo& info) const
+{
+    return std::make_unique<RefReduceWorkload>(descriptor, info);
+}
+
 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
                                                              const WorkloadInfo& info) const
 {
diff --git a/src/backends/reference/RefWorkloadFactory.hpp b/src/backends/reference/RefWorkloadFactory.hpp
index 41cefd3..c22d87f 100644
--- a/src/backends/reference/RefWorkloadFactory.hpp
+++ b/src/backends/reference/RefWorkloadFactory.hpp
@@ -223,6 +223,9 @@
     std::unique_ptr<IWorkload> CreateRank(const RankQueueDescriptor& descriptor,
                                           const WorkloadInfo& info) const override;
 
+    std::unique_ptr<IWorkload> CreateReduce(const ReduceQueueDescriptor& descriptor,
+                                            const WorkloadInfo& info) const override;
+
     std::unique_ptr<IWorkload> CreateReshape(const ReshapeQueueDescriptor& descriptor,
                                              const WorkloadInfo& info) const override;
 
diff --git a/src/backends/reference/backend.mk b/src/backends/reference/backend.mk
index b4aa3a0..9676509 100644
--- a/src/backends/reference/backend.mk
+++ b/src/backends/reference/backend.mk
@@ -38,11 +38,11 @@
         workloads/InstanceNorm.cpp \
         workloads/LogSoftmax.cpp \
         workloads/LstmUtils.cpp \
-        workloads/Mean.cpp \
         workloads/Concatenate.cpp \
         workloads/Pad.cpp \
         workloads/Pooling2d.cpp \
         workloads/PreluImpl.cpp \
+        workloads/Reduce.cpp \
         workloads/RefActivationWorkload.cpp \
         workloads/RefArgMinMaxWorkload.cpp \
         workloads/RefBatchNormalizationWorkload.cpp \
@@ -81,6 +81,7 @@
         workloads/RefPreluWorkload.cpp \
         workloads/RefQLstmWorkload.cpp \
         workloads/RefQuantizeWorkload.cpp \
+        workloads/RefReduceWorkload.cpp \
         workloads/RefReshapeWorkload.cpp \
         workloads/RefResizeBilinearWorkload.cpp \
         workloads/RefResizeWorkload.cpp \
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index 502e0cb..d5e0f82 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -2234,4 +2234,11 @@
 ARMNN_AUTO_TEST_CASE_WITH_THF(LogicalAndBroadcast3, LogicalAndBroadcast3Test)
 ARMNN_AUTO_TEST_CASE_WITH_THF(LogicalOrBroadcast3, LogicalOrBroadcast3Test)
 
+// ReduceSum
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumFloat32, ReduceSumSimpleTest<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_1, ReduceSumSingleAxisTest1<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_2, ReduceSumSingleAxisTest2<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_3, ReduceSumSingleAxisTest3<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumMultipleAxisFloat32, ReduceSumMultipleAxisTest<DataType::Float32>)
+
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
index 1b20e5b..1f4298b 100644
--- a/src/backends/reference/workloads/CMakeLists.txt
+++ b/src/backends/reference/workloads/CMakeLists.txt
@@ -44,8 +44,6 @@
     LstmUtils.hpp
     LstmUtils.cpp
     Maximum.hpp
-    Mean.cpp
-    Mean.hpp
     Concatenate.hpp
     Concatenate.cpp
     Minimum.hpp
@@ -55,6 +53,8 @@
     Pooling2d.hpp
     PreluImpl.cpp
     PreluImpl.hpp
+    Reduce.cpp
+    Reduce.hpp
     RefActivationWorkload.cpp
     RefActivationWorkload.hpp
     RefArgMinMaxWorkload.cpp
@@ -132,6 +132,8 @@
     RefQLstmWorkload.cpp
     RefQLstmWorkload.hpp
     RefRankWorkload.hpp
+    RefReduceWorkload.cpp
+    RefReduceWorkload.hpp
     RefReshapeWorkload.cpp
     RefReshapeWorkload.hpp
     RefResizeBilinearWorkload.cpp
diff --git a/src/backends/reference/workloads/Mean.hpp b/src/backends/reference/workloads/Mean.hpp
deleted file mode 100644
index dfb0302..0000000
--- a/src/backends/reference/workloads/Mean.hpp
+++ /dev/null
@@ -1,22 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#pragma once
-
-#include "armnn/DescriptorsFwd.hpp"
-#include "armnn/Tensor.hpp"
-#include "BaseIterator.hpp"
-
-#include <vector>
-
-namespace armnn
-{
-void Mean(const TensorInfo& inputInfo,
-          const TensorInfo& outputInfo,
-          const std::vector<unsigned int>& axis,
-          Decoder<float>& input,
-          Encoder<float>& output);
-} //namespace armnn
-
diff --git a/src/backends/reference/workloads/Mean.cpp b/src/backends/reference/workloads/Reduce.cpp
similarity index 78%
rename from src/backends/reference/workloads/Mean.cpp
rename to src/backends/reference/workloads/Reduce.cpp
index fe34efe..5375c71 100644
--- a/src/backends/reference/workloads/Mean.cpp
+++ b/src/backends/reference/workloads/Reduce.cpp
@@ -1,13 +1,14 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
-#include "Mean.hpp"
-#include <backendsCommon/WorkloadData.hpp>
+#include "Reduce.hpp"
 
 #include <armnn/utility/NumericCast.hpp>
 
+#include <backendsCommon/WorkloadData.hpp>
+
 #include <cmath>
 #include <cstddef>
 #include <functional>
@@ -15,6 +16,7 @@
 
 namespace armnn
 {
+
 bool NextIndex(const unsigned int numDims, const armnn::TensorShape& dims, std::vector<unsigned int>& current)
 {
     unsigned int carry = 1;
@@ -64,18 +66,16 @@
     }
     return offset;
 }
-} // namespace
 
-namespace armnn
-{
-void Mean(const armnn::TensorInfo& inputInfo,
-          const armnn::TensorInfo& outputInfo,
-          const std::vector<unsigned int>& axis,
-          Decoder<float>& input,
-          Encoder<float>& output)
-{
 
-    unsigned int inputNumDims = inputInfo.GetNumDimensions();
+void Reduce(const TensorInfo& inputInfo,
+            const TensorInfo& outputInfo,
+            Decoder<float>& input,
+            Encoder<float>& output,
+            const std::vector<uint32_t> axis,
+            const ReduceOperation reduceOperation)
+{
+    unsigned int inputNumDims  = inputInfo.GetNumDimensions();
     unsigned int outputNumDims = outputInfo.GetNumDimensions();
 
     armnn::TensorShape outputDims = outputInfo.GetShape();
@@ -106,10 +106,10 @@
     std::vector<unsigned int> resolvedAxis = axis;
     if (resolvedAxis.empty())
     {
-      for (unsigned int idx = 0; idx < inputNumDims; ++idx)
-      {
-          resolvedAxis.push_back(idx);
-      }
+        for (unsigned int idx = 0; idx < inputNumDims; ++idx)
+        {
+            resolvedAxis.push_back(idx);
+        }
     }
     auto numResolvedAxis = armnn::numeric_cast<unsigned int>(resolvedAxis.size());
 
@@ -129,15 +129,23 @@
     {
         unsigned int current = inputDims[resolvedAxis[idx]];
         ARMNN_ASSERT(armnn::numeric_cast<float>(current) <
-              (std::numeric_limits<float>::max() / armnn::numeric_cast<float>(numElementsInAxis)));
+                     (std::numeric_limits<float>::max() / armnn::numeric_cast<float>(numElementsInAxis)));
         numElementsInAxis *= current;
     }
     if (numElementsInAxis > 0) {
         for (unsigned int idx = 0; idx < numOutputs; ++idx)
         {
             output[idx];
-            output.Set(tempSum[idx] / armnn::numeric_cast<float>(numElementsInAxis));
+            if (reduceOperation == ReduceOperation::Sum)
+            {
+                output.Set(tempSum[idx]);
+            }
+            else if (reduceOperation == ReduceOperation::Mean)
+            {
+                output.Set(tempSum[idx] / armnn::numeric_cast<float>(numElementsInAxis));
+            }
         }
     }
 }
-} //namespace armnn
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/backends/reference/workloads/Reduce.hpp b/src/backends/reference/workloads/Reduce.hpp
new file mode 100644
index 0000000..ad777ad
--- /dev/null
+++ b/src/backends/reference/workloads/Reduce.hpp
@@ -0,0 +1,24 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma  once
+
+#include "BaseIterator.hpp"
+#include "Decoders.hpp"
+#include "Encoders.hpp"
+
+#include <armnn/Tensor.hpp>
+
+namespace armnn
+{
+
+void Reduce(const TensorInfo& inputInfo,
+            const TensorInfo& outputInfo,
+            Decoder<float>& input,
+            Encoder<float>& output,
+            const std::vector<uint32_t> axis,
+            const ReduceOperation reduceOperation);
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefMeanWorkload.cpp b/src/backends/reference/workloads/RefMeanWorkload.cpp
index 375ab39..00e59bc 100644
--- a/src/backends/reference/workloads/RefMeanWorkload.cpp
+++ b/src/backends/reference/workloads/RefMeanWorkload.cpp
@@ -5,7 +5,7 @@
 
 #include "RefMeanWorkload.hpp"
 
-#include "Mean.hpp"
+#include "Reduce.hpp"
 #include "RefWorkloadUtils.hpp"
 
 #include "Profiling.hpp"
@@ -28,7 +28,12 @@
     auto inputDecoder  = MakeDecoder<float>(inputInfo,  m_Data.m_Inputs[0]->Map());
     auto outputEncoder = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map());
 
-    Mean(inputInfo, outputInfo, m_Data.m_Parameters.m_Axis, *inputDecoder, *outputEncoder);
+    Reduce(inputInfo,
+           outputInfo,
+           *inputDecoder,
+           *outputEncoder,
+           m_Data.m_Parameters.m_Axis,
+           armnn::ReduceOperation::Mean);
 }
 
 } //namespace armnn
diff --git a/src/backends/reference/workloads/RefReduceWorkload.cpp b/src/backends/reference/workloads/RefReduceWorkload.cpp
new file mode 100644
index 0000000..7a46ff9
--- /dev/null
+++ b/src/backends/reference/workloads/RefReduceWorkload.cpp
@@ -0,0 +1,42 @@
+//
+// Copyright © 2020 Samsung Electronics Co Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefReduceWorkload.hpp"
+
+#include "Reduce.hpp"
+#include "RefWorkloadUtils.hpp"
+#include "BaseIterator.hpp"
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+RefReduceWorkload::RefReduceWorkload(
+    const ReduceQueueDescriptor& descriptor,
+    const WorkloadInfo& info)
+    : BaseWorkload<ReduceQueueDescriptor>(descriptor, info) {}
+
+void RefReduceWorkload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefReduceWorkload_Execute");
+
+    const TensorInfo& inputInfo  = GetTensorInfo(m_Data.m_Inputs[0]);
+    const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+
+    std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map());
+    Decoder<float>& decoder = *decoderPtr;
+
+    std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map());
+    Encoder<float>& encoder = *encoderPtr;
+
+    Reduce(inputInfo,
+           outputInfo,
+           decoder,
+           encoder,
+           m_Data.m_Parameters.m_vAxis,
+           m_Data.m_Parameters.m_ReduceOperation);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefReduceWorkload.hpp b/src/backends/reference/workloads/RefReduceWorkload.hpp
new file mode 100644
index 0000000..1d551ac
--- /dev/null
+++ b/src/backends/reference/workloads/RefReduceWorkload.hpp
@@ -0,0 +1,23 @@
+//
+// Copyright © 2020 Samsung Electronics Co Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backendsCommon/Workload.hpp>
+#include <backendsCommon/WorkloadData.hpp>
+
+namespace armnn
+{
+
+class RefReduceWorkload : public BaseWorkload<ReduceQueueDescriptor>
+{
+public:
+    explicit RefReduceWorkload(const ReduceQueueDescriptor& descriptor,
+                               const WorkloadInfo& info);
+
+    virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp
index 390b2a8..989644f 100644
--- a/src/backends/reference/workloads/RefWorkloads.hpp
+++ b/src/backends/reference/workloads/RefWorkloads.hpp
@@ -54,6 +54,7 @@
 #include "RefQLstmWorkload.hpp"
 #include "RefQuantizeWorkload.hpp"
 #include "RefRankWorkload.hpp"
+#include "RefReduceWorkload.hpp"
 #include "RefReshapeWorkload.hpp"
 #include "RefResizeBilinearWorkload.hpp"
 #include "RefResizeWorkload.hpp"