IVGCVSW-1812 Adding Ref implementation and tests of MeanWorkloads

Change-Id: I6fb15c407024e3b91d5abf4513f8090be5821760
diff --git a/src/armnn/layers/MeanLayer.cpp b/src/armnn/layers/MeanLayer.cpp
index 6bbb094..01f1133 100644
--- a/src/armnn/layers/MeanLayer.cpp
+++ b/src/armnn/layers/MeanLayer.cpp
@@ -60,7 +60,7 @@
     {
         outputRank = 1;
     }
-    else if (m_Param.m_Axis.size() <= input.GetNumDimensions())
+    else if (m_Param.m_Axis.size() >= input.GetNumDimensions())
     {
         throw LayerValidationException("MeanLayer: Dimensions to reduce can not be bigger than input dimensions");
     }
diff --git a/src/armnn/test/NetworkTests.cpp b/src/armnn/test/NetworkTests.cpp
index 11c26da..2f36f4d 100644
--- a/src/armnn/test/NetworkTests.cpp
+++ b/src/armnn/test/NetworkTests.cpp
@@ -845,6 +845,39 @@
     }
 }
 
+BOOST_AUTO_TEST_CASE(OptimizeValidateWorkloadsCpuRefMeanLayer)
+{
+    // Create runtime in which test will run
+    armnn::IRuntime::CreationOptions options;
+    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+    std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef};
+
+    // build up the structure of the network
+    armnn::INetworkPtr net(armnn::INetwork::Create());
+
+    armnn::IConnectableLayer* input = net->AddInputLayer(0);
+
+    armnn::MeanDescriptor descriptor({ 0, 1 }, false);
+    armnn::IConnectableLayer* meanLayer = net->AddMeanLayer(descriptor);
+
+    armnn::IConnectableLayer* output = net->AddOutputLayer(0);
+
+    input->GetOutputSlot(0).Connect(meanLayer->GetInputSlot(0));
+    meanLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+    input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 4, 3, 2 }, armnn::DataType::Float32));
+    meanLayer->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 2 }, armnn::DataType::Float32));
+
+    // optimize the network
+    armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
+
+    for (auto&& layer : static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph())
+    {
+        BOOST_CHECK_EQUAL(armnn::Compute::CpuRef, layer->GetComputeDevice());
+    }
+}
+
 BOOST_AUTO_TEST_CASE(FP16TurboModeTestOnCpuRef)
 {
     // Test to check when FP16 Turbo mode set
diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp
index d56cdeb..12a2817 100644
--- a/src/backends/reference/RefLayerSupport.cpp
+++ b/src/backends/reference/RefLayerSupport.cpp
@@ -392,7 +392,12 @@
                         const MeanDescriptor& descriptor,
                         std::string* reasonIfUnsupported)
 {
-    return false;
+    ignore_unused(output);
+    ignore_unused(descriptor);
+    return IsSupportedForDataTypeRef(reasonIfUnsupported,
+                                     input.GetDataType(),
+                                     &TrueFunc<>,
+                                     &TrueFunc<>);
 }
 
 }
diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp
index 5cefd1b..582c691 100644
--- a/src/backends/reference/RefWorkloadFactory.cpp
+++ b/src/backends/reference/RefWorkloadFactory.cpp
@@ -242,7 +242,7 @@
 std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateMean(
     const MeanQueueDescriptor& descriptor, const WorkloadInfo& info) const
 {
-    return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info);
+    return MakeWorkload<RefMeanFloat32Workload, RefMeanUint8Workload>(descriptor, info);
 }
 
 std::unique_ptr<IWorkload> RefWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor,
diff --git a/src/backends/reference/backend.mk b/src/backends/reference/backend.mk
index 23dab11..e5345c0 100644
--- a/src/backends/reference/backend.mk
+++ b/src/backends/reference/backend.mk
@@ -15,6 +15,7 @@
         workloads/Broadcast.cpp \
         workloads/ConvImpl.cpp \
         workloads/FullyConnected.cpp \
+        workloads/Mean.cpp \
         workloads/Pooling2d.cpp \
         workloads/RefActivationFloat32Workload.cpp \
         workloads/RefActivationUint8Workload.cpp \
@@ -36,6 +37,8 @@
         workloads/RefFullyConnectedUint8Workload.cpp \
         workloads/RefL2NormalizationFloat32Workload.cpp \
         workloads/RefLstmFloat32Workload.cpp \
+        workloads/RefMeanFloat32Workload.cpp \
+        workloads/RefMeanUint8Workload.cpp \
         workloads/RefMergerFloat32Workload.cpp \
         workloads/RefMergerUint8Workload.cpp \
         workloads/RefNormalizationFloat32Workload.cpp \
diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
index 7343b70..5a756e4 100644
--- a/src/backends/reference/workloads/CMakeLists.txt
+++ b/src/backends/reference/workloads/CMakeLists.txt
@@ -94,6 +94,12 @@
     Softmax.hpp
     Splitter.hpp
     TensorBufferArrayView.hpp
+    Mean.cpp
+    Mean.hpp
+    RefMeanFloat32Workload.cpp
+    RefMeanFloat32Workload.hpp
+    RefMeanUint8Workload.cpp
+    RefMeanUint8Workload.hpp
 )
 
 add_library(armnnRefBackendWorkloads STATIC ${armnnRefBackendWorkloads_sources})
diff --git a/src/backends/reference/workloads/Mean.cpp b/src/backends/reference/workloads/Mean.cpp
new file mode 100644
index 0000000..0db67a0
--- /dev/null
+++ b/src/backends/reference/workloads/Mean.cpp
@@ -0,0 +1,136 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "Mean.hpp"
+#include "backends/WorkloadData.hpp"
+
+#include <boost/numeric/conversion/cast.hpp>
+
+#include <cmath>
+#include <cstddef>
+#include <functional>
+#include <limits>
+
+namespace armnn
+{
+bool NextIndex(const unsigned int numDims, const armnn::TensorShape& dims, std::vector<unsigned int>& current)
+{
+    unsigned int carry = 1;
+
+    for (unsigned int idx = numDims; idx-- > 0; )
+    {
+        unsigned int current_val = current[idx] + carry;
+        if (dims[idx] == current_val)
+        {
+            current[idx] = 0;
+        }
+        else
+        {
+            current[idx] = current_val;
+            carry = 0;
+            break;
+        }
+    }
+    return (carry == 0);
+}
+
+std::size_t ReducedOutputOffset(const unsigned int numDims, const armnn::TensorShape& dims,
+                                std::vector<unsigned int>& index, const unsigned int numAxis,
+                                const std::vector<unsigned int>& axis) {
+    std::size_t offset = 0;
+    for (unsigned int idx = 0; idx < numDims; ++idx)
+    {
+        bool isAxis = false;
+        if (!axis.empty())
+        {
+            for (unsigned int axisIdx = 0; axisIdx < numAxis; ++axisIdx)
+            {
+                if (idx == axis[axisIdx])
+                {
+                    isAxis = true;
+                    break;
+                }
+            }
+        }
+        if (!isAxis)
+        {
+            offset = offset * boost::numeric_cast<size_t>(dims[idx]) + boost::numeric_cast<size_t>(index[idx]);
+        }
+    }
+    return offset;
+}
+} // namespace
+
+namespace armnn
+{
+void Mean(const armnn::TensorInfo& inputInfo,
+          const armnn::TensorInfo& outputInfo,
+          const std::vector<unsigned int>& axis,
+          const float* inputData,
+          float* outputData) {
+
+    unsigned int inputNumDims = inputInfo.GetNumDimensions();
+    unsigned int outputNumDims = outputInfo.GetNumDimensions();
+
+    armnn::TensorShape outputDims = outputInfo.GetShape();
+    armnn::TensorShape inputDims = inputInfo.GetShape();
+
+    // Initialise output data.
+    size_t numOutputs = 1;
+    for (unsigned int idx = 0; idx < outputNumDims; ++idx)
+    {
+        numOutputs *= boost::numeric_cast<size_t>(outputDims[idx]);
+    }
+
+    std::vector<float> tempSum(numOutputs);
+    for (size_t idx = 0; idx < numOutputs; ++idx)
+    {
+        outputData[idx] = 0.0f;
+        tempSum[idx] = 0.0f;
+    }
+
+    // Initialise temp index.
+    std::vector<unsigned int> tempIndex(inputNumDims);
+    for (unsigned int idx = 0; idx < inputNumDims; ++idx)
+    {
+        tempIndex[idx] = 0;
+    }
+
+    std::vector<unsigned int> resolvedAxis = axis;
+    if (resolvedAxis.empty())
+    {
+      for (unsigned int idx = 0; idx < inputNumDims; ++idx)
+      {
+          resolvedAxis.push_back(idx);
+      }
+    }
+    unsigned int numResolvedAxis = boost::numeric_cast<unsigned int>(resolvedAxis.size());
+
+    // Iterates through input_data and sum up the reduced axis.
+    for (bool hasNext = true; hasNext; hasNext = NextIndex(inputNumDims, inputDims, tempIndex))
+    {
+        size_t inputOffset = ReducedOutputOffset(inputNumDims, inputDims, tempIndex, 0, {});
+        size_t outputOffset = ReducedOutputOffset(inputNumDims, inputDims, tempIndex,
+                                                  numResolvedAxis, resolvedAxis);
+        tempSum[outputOffset] += inputData[inputOffset];
+    }
+
+    // Takes average by num of elements added to get mean.
+    size_t numElementsInAxis = 1;
+    for (unsigned int idx = 0; idx < numResolvedAxis; ++idx)
+    {
+        size_t current = boost::numeric_cast<size_t>(inputDims[resolvedAxis[idx]]);
+        BOOST_ASSERT(boost::numeric_cast<float>(current) <
+              (std::numeric_limits<float>::max() / boost::numeric_cast<float>(numElementsInAxis)));
+        numElementsInAxis *= current;
+    }
+    if (numElementsInAxis > 0) {
+        for (size_t idx = 0; idx < numOutputs; ++idx)
+        {
+            outputData[idx] = tempSum[idx] / boost::numeric_cast<float>(numElementsInAxis);
+        }
+    }
+}
+} //namespace armnn
diff --git a/src/backends/reference/workloads/Mean.hpp b/src/backends/reference/workloads/Mean.hpp
new file mode 100644
index 0000000..38c2e39
--- /dev/null
+++ b/src/backends/reference/workloads/Mean.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "armnn/DescriptorsFwd.hpp"
+#include "armnn/Tensor.hpp"
+
+#include <vector>
+
+namespace armnn
+{
+void Mean(const TensorInfo& inputInfo,
+          const TensorInfo& outputInfo,
+          const std::vector<unsigned int>& axis,
+          const float* inputData,
+          float* outputData);
+} //namespace armnn
+
diff --git a/src/backends/reference/workloads/RefMeanFloat32Workload.cpp b/src/backends/reference/workloads/RefMeanFloat32Workload.cpp
new file mode 100644
index 0000000..a23906b
--- /dev/null
+++ b/src/backends/reference/workloads/RefMeanFloat32Workload.cpp
@@ -0,0 +1,35 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefMeanFloat32Workload.hpp"
+
+#include "Mean.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+#include "vector"
+
+namespace armnn
+{
+
+RefMeanFloat32Workload::RefMeanFloat32Workload(const MeanQueueDescriptor& descriptor, const WorkloadInfo& info)
+  :Float32Workload<MeanQueueDescriptor>(descriptor, info) {}
+
+
+void RefMeanFloat32Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefMeanFloat32Workload_Execute");
+
+    const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+    const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+    const float* inputData = GetInputTensorDataFloat(0, m_Data);
+    float* outputData = GetOutputTensorDataFloat(0, m_Data);
+
+    Mean(inputInfo, outputInfo, m_Data.m_Parameters.m_Axis, inputData, outputData);
+}
+
+} //namespace armnn
+
+
diff --git a/src/backends/reference/workloads/RefMeanFloat32Workload.hpp b/src/backends/reference/workloads/RefMeanFloat32Workload.hpp
new file mode 100644
index 0000000..a4c559f
--- /dev/null
+++ b/src/backends/reference/workloads/RefMeanFloat32Workload.hpp
@@ -0,0 +1,22 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+
+class RefMeanFloat32Workload : public Float32Workload<MeanQueueDescriptor>
+{
+public:
+    explicit RefMeanFloat32Workload (const MeanQueueDescriptor& descriptor, const WorkloadInfo& info);
+    virtual void Execute() const override;
+};
+
+}//namespace armnn
diff --git a/src/backends/reference/workloads/RefMeanUint8Workload.cpp b/src/backends/reference/workloads/RefMeanUint8Workload.cpp
new file mode 100644
index 0000000..4ebffcf
--- /dev/null
+++ b/src/backends/reference/workloads/RefMeanUint8Workload.cpp
@@ -0,0 +1,39 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefMeanUint8Workload.hpp"
+
+#include "Mean.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+#include <vector>
+
+namespace armnn
+{
+
+RefMeanUint8Workload::RefMeanUint8Workload(const MeanQueueDescriptor& descriptor, const WorkloadInfo& info)
+  :Uint8Workload<MeanQueueDescriptor>(descriptor, info) {}
+
+
+void RefMeanUint8Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefMeanUint8Workload_Execute");
+
+    const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+    const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+
+    auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo);
+
+    std::vector<float> results(outputInfo.GetNumElements());
+
+    Mean(inputInfo, outputInfo, m_Data.m_Parameters.m_Axis, dequant.data(), results.data());
+
+    Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo);
+}
+
+} //namespace armnn
+
diff --git a/src/backends/reference/workloads/RefMeanUint8Workload.hpp b/src/backends/reference/workloads/RefMeanUint8Workload.hpp
new file mode 100644
index 0000000..21cf72b
--- /dev/null
+++ b/src/backends/reference/workloads/RefMeanUint8Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefMeanUint8Workload : public Uint8Workload<MeanQueueDescriptor>
+{
+public:
+    explicit RefMeanUint8Workload (const MeanQueueDescriptor& descriptor, const WorkloadInfo& info);
+    virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp
index e5c6e1e..7e89cab 100644
--- a/src/backends/reference/workloads/RefWorkloads.hpp
+++ b/src/backends/reference/workloads/RefWorkloads.hpp
@@ -51,3 +51,5 @@
 #include "RefLstmFloat32Workload.hpp"
 #include "RefConvertFp16ToFp32Workload.hpp"
 #include "RefConvertFp32ToFp16Workload.hpp"
+#include "RefMeanUint8Workload.hpp"
+#include "RefMeanFloat32Workload.hpp"
\ No newline at end of file
diff --git a/src/backends/test/LayerTests.cpp b/src/backends/test/LayerTests.cpp
index 8f06690..4f6cb93 100644
--- a/src/backends/test/LayerTests.cpp
+++ b/src/backends/test/LayerTests.cpp
@@ -4747,4 +4747,185 @@
 LayerTestResult<float, 4> PermuteFloat32ValueSet3Test(armnn::IWorkloadFactory& workloadFactory)
 {
     return PermuteFloat32ValueSet3TestCommon(workloadFactory);
-};
\ No newline at end of file
+};
+
+namespace
+{
+template <typename T, std::size_t InputDim, std::size_t OutputDim>
+LayerTestResult<T, OutputDim> MeanTestHelper(armnn::IWorkloadFactory& workloadFactory,
+                                     const unsigned int* inputShape,
+                                     const std::vector<T>& inputData,
+                                     const std::vector<unsigned int>& axis,
+                                     bool keepDims,
+                                     const unsigned int* outputShape,
+                                     const std::vector<T>& outputData,
+                                     float scale = 1.0f,
+                                     int32_t offset = 0)
+{
+    auto dataType = (std::is_same<T, uint8_t>::value ?
+                     armnn::DataType::QuantisedAsymm8 :
+                     armnn::DataType::Float32);
+
+    armnn::TensorInfo inputTensorInfo(InputDim, inputShape, dataType);
+    armnn::TensorInfo outputTensorInfo(OutputDim, outputShape, dataType);
+
+    inputTensorInfo.SetQuantizationScale(scale);
+    inputTensorInfo.SetQuantizationOffset(offset);
+
+    outputTensorInfo.SetQuantizationScale(scale);
+    outputTensorInfo.SetQuantizationOffset(offset);
+
+    auto input = MakeTensor<T, InputDim>(inputTensorInfo, inputData);
+
+    LayerTestResult<T, OutputDim> result(outputTensorInfo);
+    result.outputExpected = MakeTensor<T, OutputDim>(outputTensorInfo, outputData);
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::MeanQueueDescriptor data;
+    data.m_Parameters.m_Axis = axis;
+    data.m_Parameters.m_KeepDims = keepDims;
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(data,  info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMean(data, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), input.origin());
+
+    workloadFactory.Finalize();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(result.output.origin(), outputHandle.get());
+
+    return result;
+}
+} // anonymous namespace
+
+LayerTestResult<uint8_t, 1> MeanUint8SimpleTest(armnn::IWorkloadFactory& workloadFactory)
+{
+    const unsigned int inputShape[] = { 3, 2 };
+    const unsigned int outputShape[] = { 1 };
+
+    std::vector<uint8_t> input({ 1, 1, 2, 2, 3, 3 });
+    std::vector<uint8_t> output({ 2 });
+
+    return MeanTestHelper<uint8_t, 2, 1>(workloadFactory, inputShape, input, {}, false, outputShape, output);
+}
+
+LayerTestResult<uint8_t, 3> MeanUint8SimpleAxisTest(armnn::IWorkloadFactory& workloadFactory)
+{
+    const unsigned int inputShape[] = { 1, 1, 3, 2 };
+    const unsigned int outputShape[] = { 1, 1, 2 };
+
+    std::vector<uint8_t> input({ 1, 1, 2, 2, 3, 3 });
+    std::vector<uint8_t> output({ 2, 2 });
+
+    return MeanTestHelper<uint8_t, 4, 3>(workloadFactory, inputShape, input, {2}, false, outputShape, output);
+}
+
+LayerTestResult<uint8_t, 4> MeanUint8KeepDimsTest(armnn::IWorkloadFactory& workloadFactory)
+{
+    const unsigned int inputShape[] = { 1, 1, 3, 2 };
+    const unsigned int outputShape[] = { 1, 1, 1, 2 };
+
+    std::vector<uint8_t> input({ 1, 1, 2, 2, 3, 3 });
+    std::vector<uint8_t> output({ 2, 2 });
+
+    return MeanTestHelper<uint8_t, 4, 4>(workloadFactory, inputShape, input, {2}, true, outputShape, output);
+}
+
+LayerTestResult<uint8_t, 4> MeanUint8MultipleDimsTest(armnn::IWorkloadFactory& workloadFactory)
+{
+    const unsigned int inputShape[] = { 2, 3, 1, 2 };
+    const unsigned int outputShape[] = { 1, 3, 1, 1 };
+
+    std::vector<uint8_t> input({ 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6});
+    std::vector<uint8_t> output({ 1, 3, 5 });
+
+    return MeanTestHelper<uint8_t, 4, 4>(workloadFactory, inputShape, input, {0, 3}, true, outputShape, output);
+}
+
+LayerTestResult<uint8_t, 1> MeanVtsUint8Test(armnn::IWorkloadFactory& workloadFactory)
+{
+    const unsigned int inputShape[] = {4, 3, 2};
+    const unsigned int outputShape[] = { 2 };
+
+    std::vector<uint8_t> input({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24});
+    std::vector<uint8_t> output({12, 13});
+
+    return MeanTestHelper<uint8_t, 3, 1>(workloadFactory, inputShape, input, {0, 1}, false, outputShape,
+        output, 0.8f, 5);
+}
+
+LayerTestResult<float, 1> MeanFloatSimpleTest(armnn::IWorkloadFactory& workloadFactory)
+{
+    const unsigned int inputShape[] = { 3, 2 };
+    const unsigned int outputShape[] = { 1 };
+
+    std::vector<float> input({ 1., 1., 2., 2., 3., 3. });
+    std::vector<float> output({ 2. });
+
+    return MeanTestHelper<float, 2, 1>(workloadFactory, inputShape, input, {}, false, outputShape, output);
+}
+
+LayerTestResult<float, 3> MeanFloatSimpleAxisTest(armnn::IWorkloadFactory& workloadFactory)
+{
+    const unsigned int inputShape[] = { 2, 3, 1, 2 };
+    const unsigned int outputShape[] = { 3, 1, 2 };
+
+    std::vector<float> input({ 1., 2., 3., 4., 5., 6., 1., 2., 3., 4., 5., 6.});
+    std::vector<float> output({ 1., 2., 3., 4., 5., 6. });
+
+    return MeanTestHelper<float, 4, 3>(workloadFactory, inputShape, input, {0}, false, outputShape, output);
+}
+
+LayerTestResult<float, 4> MeanFloatKeepDimsTest(armnn::IWorkloadFactory& workloadFactory)
+{
+    const unsigned int inputShape[] = { 1, 1, 3, 2 };
+    const unsigned int outputShape[] = { 1, 1, 1, 2 };
+
+    std::vector<float> input({ 1., 1., 2., 2., 3., 3. });
+    std::vector<float> output({ 2., 2. });
+
+    return MeanTestHelper<float, 4, 4>(workloadFactory, inputShape, input, {2}, true, outputShape, output);
+}
+
+LayerTestResult<float, 4> MeanFloatMultipleDimsTest(armnn::IWorkloadFactory& workloadFactory)
+{
+    const unsigned int inputShape[] = { 2, 3, 1, 2 };
+    const unsigned int outputShape[] = { 1, 3, 1, 1 };
+
+    std::vector<float> input({ 1., 2., 3., 4., 5., 6., 1., 2., 3., 4., 5., 6.});
+    std::vector<float> output({ 1.5, 3.5, 5.5 });
+
+    return MeanTestHelper<float, 4, 4>(workloadFactory, inputShape, input, {0, 3}, true, outputShape, output);
+}
+
+LayerTestResult<float, 1> MeanVtsFloat1Test(armnn::IWorkloadFactory& workloadFactory)
+{
+    const unsigned int inputShape[] = {4, 3, 2};
+    const unsigned int outputShape[] = { 2 };
+
+    std::vector<float> input({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f,
+                              15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f});
+    std::vector<float> output({12.0f, 13.0f});
+
+    return MeanTestHelper<float, 3, 1>(workloadFactory, inputShape, input, {0, 1}, false, outputShape, output);
+}
+
+LayerTestResult<float, 3> MeanVtsFloat2Test(armnn::IWorkloadFactory& workloadFactory)
+{
+    const unsigned int inputShape[] = {4, 3, 2};
+    const unsigned int outputShape[] = {1, 3, 1 };
+
+    std::vector<float> input({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f,
+                              15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f});
+    std::vector<float> output({10.5f, 12.5f, 14.5f});
+
+    return MeanTestHelper<float, 3, 3>(workloadFactory, inputShape, input, {0, 2}, true, outputShape, output);
+}
diff --git a/src/backends/test/LayerTests.hpp b/src/backends/test/LayerTests.hpp
index 365a1f5..9dc3afa 100644
--- a/src/backends/test/LayerTests.hpp
+++ b/src/backends/test/LayerTests.hpp
@@ -343,3 +343,16 @@
 
 LayerTestResult<float, 4> SimpleConvertFp16ToFp32Test(armnn::IWorkloadFactory& workloadFactory);
 LayerTestResult<armnn::Half, 4> SimpleConvertFp32ToFp16Test(armnn::IWorkloadFactory& workloadFactory);
+
+
+LayerTestResult<uint8_t, 1> MeanUint8SimpleTest(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<uint8_t, 3> MeanUint8SimpleAxisTest(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<uint8_t, 4> MeanUint8KeepDimsTest(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<uint8_t, 4> MeanUint8MultipleDimsTest(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<uint8_t, 1> MeanVtsUint8Test(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<float, 1> MeanFloatSimpleTest(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<float, 3> MeanFloatSimpleAxisTest(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<float, 4> MeanFloatKeepDimsTest(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<float, 4> MeanFloatMultipleDimsTest(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<float, 1> MeanVtsFloat1Test(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<float, 3> MeanVtsFloat2Test(armnn::IWorkloadFactory& workloadFactory);
diff --git a/src/backends/test/Reference.cpp b/src/backends/test/Reference.cpp
index 97a209d..30a8f8e 100644
--- a/src/backends/test/Reference.cpp
+++ b/src/backends/test/Reference.cpp
@@ -250,4 +250,18 @@
 // Convert from Float32 to Float16
 ARMNN_AUTO_TEST_CASE(SimpleConvertFp32ToFp16, SimpleConvertFp32ToFp16Test)
 
+// Mean
+ARMNN_AUTO_TEST_CASE(MeanUint8Simple, MeanUint8SimpleTest)
+ARMNN_AUTO_TEST_CASE(MeanUint8SimpleAxis, MeanUint8SimpleAxisTest)
+ARMNN_AUTO_TEST_CASE(MeanUint8KeepDims, MeanUint8KeepDimsTest)
+ARMNN_AUTO_TEST_CASE(MeanUint8MultipleDims, MeanUint8MultipleDimsTest)
+ARMNN_AUTO_TEST_CASE(MeanVtsUint8, MeanVtsUint8Test)
+
+ARMNN_AUTO_TEST_CASE(MeanFloatSimple, MeanFloatSimpleTest)
+ARMNN_AUTO_TEST_CASE(MeanFloatSimpleAxis, MeanFloatSimpleAxisTest)
+ARMNN_AUTO_TEST_CASE(MeanFloatKeepDims, MeanFloatKeepDimsTest)
+ARMNN_AUTO_TEST_CASE(MeanFloatMultipleDims, MeanFloatMultipleDimsTest)
+ARMNN_AUTO_TEST_CASE(MeanVtsFloat1, MeanVtsFloat1Test)
+ARMNN_AUTO_TEST_CASE(MeanVtsFloat2, MeanVtsFloat2Test)
+
 BOOST_AUTO_TEST_SUITE_END()