IVGCVSW-2049 + IVGCVSW-2051 Create the CL Mean Float workload and add
the unit tests

 * Created the ClFloatWorkload class
 * Added ClMeanValidate validation function
 * Added helper function to convert the reduction axes from the ArmNN
   format to ACL's
 * Added workload tests
 * Added some unit tests
 * These changes need the CL pin to be pointing at least to revision
   88d871028eeae57f9e4536d0329110eccb5e2890 (COMPMID-1574 Implement
   ReduceMean in OpenCL)

!android-nn-driver:155033

Change-Id: I694fd36be0458c90e158172afde045fcc88c32ae
diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp
index 3ca8bb5..6c5704d 100644
--- a/src/backends/cl/ClLayerSupport.cpp
+++ b/src/backends/cl/ClLayerSupport.cpp
@@ -26,6 +26,7 @@
 #include "workloads/ClFullyConnectedWorkload.hpp"
 #include "workloads/ClL2NormalizationFloatWorkload.hpp"
 #include "workloads/ClLstmFloatWorkload.hpp"
+#include "workloads/ClMeanWorkload.hpp"
 #include "workloads/ClMultiplicationWorkload.hpp"
 #include "workloads/ClNormalizationFloatWorkload.hpp"
 #include "workloads/ClPadWorkload.hpp"
@@ -372,11 +373,11 @@
                                      const MeanDescriptor& descriptor,
                                      Optional<std::string&> reasonIfUnsupported) const
 {
-    ignore_unused(input);
-    ignore_unused(output);
-    ignore_unused(descriptor);
-    ignore_unused(reasonIfUnsupported);
-    return false;
+    FORWARD_WORKLOAD_VALIDATE_FUNC(ClMeanValidate,
+                                   reasonIfUnsupported,
+                                   input,
+                                   output,
+                                   descriptor);
 }
 
 bool ClLayerSupport::IsMergerSupported(const std::vector<const TensorInfo*> inputs,
diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp
index fd92db3..08ee9e9 100644
--- a/src/backends/cl/ClWorkloadFactory.cpp
+++ b/src/backends/cl/ClWorkloadFactory.cpp
@@ -303,7 +303,7 @@
 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor,
                                                          const WorkloadInfo& info) const
 {
-    return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info);
+    return std::make_unique<ClMeanWorkload>(descriptor, info);
 }
 
 std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor,
diff --git a/src/backends/cl/backend.mk b/src/backends/cl/backend.mk
index 996db3f..97df8e4 100644
--- a/src/backends/cl/backend.mk
+++ b/src/backends/cl/backend.mk
@@ -26,6 +26,7 @@
         workloads/ClFullyConnectedWorkload.cpp \
         workloads/ClL2NormalizationFloatWorkload.cpp \
         workloads/ClLstmFloatWorkload.cpp \
+        workloads/ClMeanWorkload.cpp \
         workloads/ClMultiplicationWorkload.cpp \
         workloads/ClNormalizationFloatWorkload.cpp \
         workloads/ClPadWorkload.cpp \
diff --git a/src/backends/cl/test/ClCreateWorkloadTests.cpp b/src/backends/cl/test/ClCreateWorkloadTests.cpp
index 4f99894..2a705de 100644
--- a/src/backends/cl/test/ClCreateWorkloadTests.cpp
+++ b/src/backends/cl/test/ClCreateWorkloadTests.cpp
@@ -14,8 +14,6 @@
 #include <backends/cl/workloads/ClWorkloads.hpp>
 #include <backends/cl/workloads/ClWorkloadUtils.hpp>
 
-#include <backends/reference/RefWorkloadFactory.hpp>
-
 boost::test_tools::predicate_result CompareIClTensorHandleShape(IClTensorHandle*                    tensorHandle,
                                                                 std::initializer_list<unsigned int> expectedDimensions)
 {
@@ -739,4 +737,36 @@
     ClResizeBilinearWorkloadTest<ClResizeBilinearFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
 }
 
+template <typename MeanWorkloadType, typename armnn::DataType DataType>
+static void ClMeanWorkloadTest()
+{
+    Graph graph;
+    ClWorkloadFactory factory;
+    auto workload = CreateMeanWorkloadTest<MeanWorkloadType, DataType>(factory, graph);
+
+    // Checks that inputs/outputs are as we expect them (see definition of CreateMeanWorkloadTest).
+    MeanQueueDescriptor queueDescriptor = workload->GetData();
+    auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+
+    // The first dimension (batch size) in both input and output is singular thus it has been reduced by ACL.
+    BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 3, 7, 4 }));
+    BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 4 }));
+}
+
+BOOST_AUTO_TEST_CASE(CreateMeanFloat32Workload)
+{
+    ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float32>();
+}
+
+BOOST_AUTO_TEST_CASE(CreateMeanFloat16Workload)
+{
+    ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float16>();
+}
+
+BOOST_AUTO_TEST_CASE(CreateMeanUint8Workload)
+{
+    ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::QuantisedAsymm8>();
+}
+
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp
index d5e9419..937c58c 100755
--- a/src/backends/cl/test/ClLayerTests.cpp
+++ b/src/backends/cl/test/ClLayerTests.cpp
@@ -274,6 +274,21 @@
 
 ARMNN_AUTO_TEST_CASE(AdditionAfterMaxPool, AdditionAfterMaxPoolTest)
 
+// Mean
+ARMNN_AUTO_TEST_CASE(MeanUint8Simple, MeanUint8SimpleTest)
+ARMNN_AUTO_TEST_CASE(MeanUint8SimpleAxis, MeanUint8SimpleAxisTest)
+ARMNN_AUTO_TEST_CASE(MeanUint8KeepDims, MeanUint8KeepDimsTest)
+ARMNN_AUTO_TEST_CASE(MeanUint8MultipleDims, MeanUint8MultipleDimsTest)
+ARMNN_AUTO_TEST_CASE(MeanVtsUint8, MeanVtsUint8Test)
+
+ARMNN_AUTO_TEST_CASE(MeanFloatSimple, MeanFloatSimpleTest)
+ARMNN_AUTO_TEST_CASE(MeanFloatSimpleAxis, MeanFloatSimpleAxisTest)
+ARMNN_AUTO_TEST_CASE(MeanFloatKeepDims, MeanFloatKeepDimsTest)
+ARMNN_AUTO_TEST_CASE(MeanFloatMultipleDims, MeanFloatMultipleDimsTest)
+ARMNN_AUTO_TEST_CASE(MeanVtsFloat1, MeanVtsFloat1Test)
+ARMNN_AUTO_TEST_CASE(MeanVtsFloat2, MeanVtsFloat2Test)
+ARMNN_AUTO_TEST_CASE(MeanVtsFloat3, MeanVtsFloat3Test)
+
 // ============================================================================
 // COMPARE tests
 
diff --git a/src/backends/cl/workloads/CMakeLists.txt b/src/backends/cl/workloads/CMakeLists.txt
index 59a45fa..86c3804 100644
--- a/src/backends/cl/workloads/CMakeLists.txt
+++ b/src/backends/cl/workloads/CMakeLists.txt
@@ -30,6 +30,8 @@
     ClL2NormalizationFloatWorkload.hpp
     ClLstmFloatWorkload.cpp
     ClLstmFloatWorkload.hpp
+    ClMeanWorkload.cpp
+    ClMeanWorkload.hpp
     ClMergerWorkload.hpp
     ClMultiplicationWorkload.cpp
     ClMultiplicationWorkload.hpp
diff --git a/src/backends/cl/workloads/ClMeanWorkload.cpp b/src/backends/cl/workloads/ClMeanWorkload.cpp
new file mode 100644
index 0000000..7e9649b
--- /dev/null
+++ b/src/backends/cl/workloads/ClMeanWorkload.cpp
@@ -0,0 +1,100 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClMeanWorkload.hpp"
+
+#include <backends/cl/ClTensorHandle.hpp>
+#include <backends/aclCommon/ArmComputeTensorUtils.hpp>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace
+{
+
+void ConvertArmnnAxesToAclCoordinates(size_t inputDimensions,
+                                      unsigned int originalInputRank,
+                                      const std::vector<unsigned int>& armnnAxes,
+                                      arm_compute::Coordinates& outAclCoords)
+{
+    if (armnnAxes.empty())
+    {
+        // If no reduction axes were provided, then the input must be reduced along all dimensions.
+        // Since arm_compute::CLReduceMean does not accept an empty vector as the reduction dimensions, we then
+        // manually create a vector including all the input dimensions (in reversed order) as:
+        //
+        // { inputDimensions - 1, inputDimensions - 2, ..., 1, 0 }
+        //
+        outAclCoords.set_num_dimensions(inputDimensions);
+        std::generate(outAclCoords.begin(), outAclCoords.end(), [d = inputDimensions - 1] () mutable { return d--; });
+    }
+    else
+    {
+        // Create a vector of reduction dimensions (in reversed order) with the given reduction axes.
+        //
+        // Adjust the given reduction axes according to the original rank of the input tensor (before ACL applied any
+        // dimension correction).
+        // For example, if the input tensor originally had 4 dimensions, and one of the reduction axes was 2, then the
+        // new value for that reduction axis should be 1.
+        //
+        // Example:
+        // ArmNN input shape = { 1, 1, 3, 2 } -> ACL input shape = { 2, 3 }
+        // ArmNN reduction axis = { 2 }       -> ACL reduction axis = { 1 }
+        // ArmNN reduction axis = { 3 }       -> ACL reduction axis = { 0 }
+        //
+        // The transformation: ACL reduction axis index = original rank - ArmNN reduction axis index - 1
+        //
+        outAclCoords.set_num_dimensions(armnnAxes.size());
+        std::transform(armnnAxes.begin(), armnnAxes.end(),
+                       outAclCoords.begin(),
+                       [originalInputRank](unsigned int i){ return originalInputRank - i - 1; });
+    }
+}
+
+} // anonymous namespace
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+arm_compute::Status ClMeanValidate(const TensorInfo& input,
+                                   const TensorInfo& output,
+                                   const MeanDescriptor& desc)
+{
+    const arm_compute::TensorInfo aclInputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
+    const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+
+    arm_compute::Coordinates coords;
+    ConvertArmnnAxesToAclCoordinates(aclInputInfo.num_dimensions(),
+                                     input.GetNumDimensions(),
+                                     desc.m_Axis,
+                                     coords);
+
+    return arm_compute::CLReduceMean::validate(&aclInputInfo, coords, desc.m_KeepDims, &aclOutputInfo);
+}
+
+ClMeanWorkload::ClMeanWorkload(const MeanQueueDescriptor& descriptor, const WorkloadInfo& info)
+    : BaseWorkload<MeanQueueDescriptor>(descriptor, info)
+{
+    m_Data.ValidateInputsOutputs("ClMeanWorkload", 1, 1);
+
+    arm_compute::ICLTensor& input  = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+    arm_compute::Coordinates coords;
+    ConvertArmnnAxesToAclCoordinates(input.info()->num_dimensions(),
+                                     info.m_InputTensorInfos[0].GetNumDimensions(),
+                                     m_Data.m_Parameters.m_Axis,
+                                     coords);
+
+    m_Layer.configure(&input, coords, m_Data.m_Parameters.m_KeepDims, &output);
+}
+
+void ClMeanWorkload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT_CL("ClMeanWorkload_Execute");
+    m_Layer.run();
+}
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClMeanWorkload.hpp b/src/backends/cl/workloads/ClMeanWorkload.hpp
new file mode 100644
index 0000000..c9f0356
--- /dev/null
+++ b/src/backends/cl/workloads/ClMeanWorkload.hpp
@@ -0,0 +1,31 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace armnn
+{
+
+arm_compute::Status ClMeanValidate(const TensorInfo& input,
+                                   const TensorInfo& output,
+                                   const MeanDescriptor& desc);
+
+class ClMeanWorkload : public BaseWorkload<MeanQueueDescriptor>
+{
+public:
+    ClMeanWorkload(const MeanQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+    void Execute() const override;
+
+private:
+    // Not using CLMeanStdDev, as 4D input tensor support for Mean has been added to a new function called CLReduceMean.
+    mutable arm_compute::CLReduceMean m_Layer;
+};
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClWorkloads.hpp b/src/backends/cl/workloads/ClWorkloads.hpp
index 63de744..eeca403 100644
--- a/src/backends/cl/workloads/ClWorkloads.hpp
+++ b/src/backends/cl/workloads/ClWorkloads.hpp
@@ -16,6 +16,7 @@
 #include "ClL2NormalizationFloatWorkload.hpp"
 #include "ClLstmFloatWorkload.hpp"
 #include "ClMergerWorkload.hpp"
+#include "ClMeanWorkload.hpp"
 #include "ClMultiplicationWorkload.hpp"
 #include "ClNormalizationFloatWorkload.hpp"
 #include "ClPermuteWorkload.hpp"
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index 30f5b10..38ce94d 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -292,6 +292,7 @@
 ARMNN_AUTO_TEST_CASE(MeanFloatMultipleDims, MeanFloatMultipleDimsTest)
 ARMNN_AUTO_TEST_CASE(MeanVtsFloat1, MeanVtsFloat1Test)
 ARMNN_AUTO_TEST_CASE(MeanVtsFloat2, MeanVtsFloat2Test)
+ARMNN_AUTO_TEST_CASE(MeanVtsFloat3, MeanVtsFloat3Test)
 
 ARMNN_AUTO_TEST_CASE(AdditionAfterMaxPool, AdditionAfterMaxPoolTest)
 
diff --git a/src/backends/test/LayerTests.cpp b/src/backends/test/LayerTests.cpp
index 5c7a887..e5a4258 100755
--- a/src/backends/test/LayerTests.cpp
+++ b/src/backends/test/LayerTests.cpp
@@ -5807,20 +5807,19 @@
 
 namespace
 {
+
 template <typename T, std::size_t InputDim, std::size_t OutputDim>
 LayerTestResult<T, OutputDim> MeanTestHelper(armnn::IWorkloadFactory& workloadFactory,
-                                     const unsigned int* inputShape,
-                                     const std::vector<T>& inputData,
-                                     const std::vector<unsigned int>& axis,
-                                     bool keepDims,
-                                     const unsigned int* outputShape,
-                                     const std::vector<T>& outputData,
-                                     float scale = 1.0f,
-                                     int32_t offset = 0)
+                                             const unsigned int* inputShape,
+                                             const std::vector<T>& inputData,
+                                             const std::vector<unsigned int>& axis,
+                                             bool keepDims,
+                                             const unsigned int* outputShape,
+                                             const std::vector<T>& outputData,
+                                             float scale = 1.0f,
+                                             int32_t offset = 0)
 {
-    auto dataType = (std::is_same<T, uint8_t>::value ?
-                     armnn::DataType::QuantisedAsymm8 :
-                     armnn::DataType::Float32);
+    auto dataType = (std::is_same<T, uint8_t>::value ? armnn::DataType::QuantisedAsymm8 : armnn::DataType::Float32);
 
     armnn::TensorInfo inputTensorInfo(InputDim, inputShape, dataType);
     armnn::TensorInfo outputTensorInfo(OutputDim, outputShape, dataType);
@@ -5860,6 +5859,7 @@
 
     return result;
 }
+
 } // anonymous namespace
 
 LayerTestResult<uint8_t, 1> MeanUint8SimpleTest(armnn::IWorkloadFactory& workloadFactory)
@@ -5881,7 +5881,7 @@
     std::vector<uint8_t> input({ 1, 1, 2, 2, 3, 3 });
     std::vector<uint8_t> output({ 2, 2 });
 
-    return MeanTestHelper<uint8_t, 4, 3>(workloadFactory, inputShape, input, {2}, false, outputShape, output);
+    return MeanTestHelper<uint8_t, 4, 3>(workloadFactory, inputShape, input, { 2 }, false, outputShape, output);
 }
 
 LayerTestResult<uint8_t, 4> MeanUint8KeepDimsTest(armnn::IWorkloadFactory& workloadFactory)
@@ -5892,7 +5892,7 @@
     std::vector<uint8_t> input({ 1, 1, 2, 2, 3, 3 });
     std::vector<uint8_t> output({ 2, 2 });
 
-    return MeanTestHelper<uint8_t, 4, 4>(workloadFactory, inputShape, input, {2}, true, outputShape, output);
+    return MeanTestHelper<uint8_t, 4, 4>(workloadFactory, inputShape, input, { 2 }, true, outputShape, output);
 }
 
 LayerTestResult<uint8_t, 4> MeanUint8MultipleDimsTest(armnn::IWorkloadFactory& workloadFactory)
@@ -5900,22 +5900,23 @@
     const unsigned int inputShape[] = { 2, 3, 1, 2 };
     const unsigned int outputShape[] = { 1, 3, 1, 1 };
 
-    std::vector<uint8_t> input({ 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6});
+    std::vector<uint8_t> input({ 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6 });
     std::vector<uint8_t> output({ 1, 3, 5 });
 
-    return MeanTestHelper<uint8_t, 4, 4>(workloadFactory, inputShape, input, {0, 3}, true, outputShape, output);
+    return MeanTestHelper<uint8_t, 4, 4>(workloadFactory, inputShape, input, { 0, 3 }, true, outputShape, output);
 }
 
 LayerTestResult<uint8_t, 1> MeanVtsUint8Test(armnn::IWorkloadFactory& workloadFactory)
 {
-    const unsigned int inputShape[] = {4, 3, 2};
+    const unsigned int inputShape[] = { 4, 3, 2 };
     const unsigned int outputShape[] = { 2 };
 
-    std::vector<uint8_t> input({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24});
-    std::vector<uint8_t> output({12, 13});
+    std::vector<uint8_t> input({ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
+                                 24 });
+    std::vector<uint8_t> output({ 12, 13 });
 
-    return MeanTestHelper<uint8_t, 3, 1>(workloadFactory, inputShape, input, {0, 1}, false, outputShape,
-        output, 0.8f, 5);
+    return MeanTestHelper<uint8_t, 3, 1>(workloadFactory, inputShape, input, { 0, 1 }, false, outputShape,
+                                         output, 0.8f, 5);
 }
 
 LayerTestResult<float, 1> MeanFloatSimpleTest(armnn::IWorkloadFactory& workloadFactory)
@@ -5923,8 +5924,8 @@
     const unsigned int inputShape[] = { 3, 2 };
     const unsigned int outputShape[] = { 1 };
 
-    std::vector<float> input({ 1., 1., 2., 2., 3., 3. });
-    std::vector<float> output({ 2. });
+    std::vector<float> input({ 1.0f, 1.0f, 2.0f, 2.0f, 3.0f, 3.0f });
+    std::vector<float> output({ 2.0f });
 
     return MeanTestHelper<float, 2, 1>(workloadFactory, inputShape, input, {}, false, outputShape, output);
 }
@@ -5934,10 +5935,10 @@
     const unsigned int inputShape[] = { 2, 3, 1, 2 };
     const unsigned int outputShape[] = { 3, 1, 2 };
 
-    std::vector<float> input({ 1., 2., 3., 4., 5., 6., 1., 2., 3., 4., 5., 6.});
-    std::vector<float> output({ 1., 2., 3., 4., 5., 6. });
+    std::vector<float> input({ 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f });
+    std::vector<float> output({ 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f });
 
-    return MeanTestHelper<float, 4, 3>(workloadFactory, inputShape, input, {0}, false, outputShape, output);
+    return MeanTestHelper<float, 4, 3>(workloadFactory, inputShape, input, { 0 }, false, outputShape, output);
 }
 
 LayerTestResult<float, 4> MeanFloatKeepDimsTest(armnn::IWorkloadFactory& workloadFactory)
@@ -5945,10 +5946,10 @@
     const unsigned int inputShape[] = { 1, 1, 3, 2 };
     const unsigned int outputShape[] = { 1, 1, 1, 2 };
 
-    std::vector<float> input({ 1., 1., 2., 2., 3., 3. });
-    std::vector<float> output({ 2., 2. });
+    std::vector<float> input({ 1.0f, 1.0f, 2.0f, 2.0f, 3.0f, 3.0f });
+    std::vector<float> output({ 2.0f, 2.0f });
 
-    return MeanTestHelper<float, 4, 4>(workloadFactory, inputShape, input, {2}, true, outputShape, output);
+    return MeanTestHelper<float, 4, 4>(workloadFactory, inputShape, input, { 2 }, true, outputShape, output);
 }
 
 LayerTestResult<float, 4> MeanFloatMultipleDimsTest(armnn::IWorkloadFactory& workloadFactory)
@@ -5956,34 +5957,45 @@
     const unsigned int inputShape[] = { 2, 3, 1, 2 };
     const unsigned int outputShape[] = { 1, 3, 1, 1 };
 
-    std::vector<float> input({ 1., 2., 3., 4., 5., 6., 1., 2., 3., 4., 5., 6.});
-    std::vector<float> output({ 1.5, 3.5, 5.5 });
+    std::vector<float> input({ 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f });
+    std::vector<float> output({ 1.5f, 3.5f, 5.5f });
 
-    return MeanTestHelper<float, 4, 4>(workloadFactory, inputShape, input, {0, 3}, true, outputShape, output);
+    return MeanTestHelper<float, 4, 4>(workloadFactory, inputShape, input, { 0, 3 }, true, outputShape, output);
 }
 
 LayerTestResult<float, 1> MeanVtsFloat1Test(armnn::IWorkloadFactory& workloadFactory)
 {
-    const unsigned int inputShape[] = {4, 3, 2};
+    const unsigned int inputShape[] = { 4, 3, 2 };
     const unsigned int outputShape[] = { 2 };
 
-    std::vector<float> input({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f,
-                              15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f});
-    std::vector<float> output({12.0f, 13.0f});
+    std::vector<float> input({ 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f,
+                               15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f });
+    std::vector<float> output({ 12.0f, 13.0f });
 
-    return MeanTestHelper<float, 3, 1>(workloadFactory, inputShape, input, {0, 1}, false, outputShape, output);
+    return MeanTestHelper<float, 3, 1>(workloadFactory, inputShape, input, { 0, 1 }, false, outputShape, output);
 }
 
 LayerTestResult<float, 3> MeanVtsFloat2Test(armnn::IWorkloadFactory& workloadFactory)
 {
-    const unsigned int inputShape[] = {4, 3, 2};
-    const unsigned int outputShape[] = {1, 3, 1 };
+    const unsigned int inputShape[] = { 4, 3, 2 };
+    const unsigned int outputShape[] = { 1, 3, 1 };
 
-    std::vector<float> input({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f,
-                              15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f});
-    std::vector<float> output({10.5f, 12.5f, 14.5f});
+    std::vector<float> input({ 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f,
+                               15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f });
+    std::vector<float> output({ 10.5f, 12.5f, 14.5f });
 
-    return MeanTestHelper<float, 3, 3>(workloadFactory, inputShape, input, {0, 2}, true, outputShape, output);
+    return MeanTestHelper<float, 3, 3>(workloadFactory, inputShape, input, { 0, 2 }, true, outputShape, output);
+}
+
+LayerTestResult<float, 3> MeanVtsFloat3Test(armnn::IWorkloadFactory& workloadFactory)
+{
+    const unsigned int inputShape[] = { 1, 2, 2, 1 };
+    const unsigned int outputShape[] = { 1, 2, 1 };
+
+    std::vector<float> input({ 1.0f, 2.0f, 3.0f, 4.0f });
+    std::vector<float> output({ 1.5f, 3.5f });
+
+    return MeanTestHelper<float, 4, 3>(workloadFactory, inputShape, input, { 2 }, false, outputShape, output);
 }
 
 LayerTestResult<float, 4> AdditionAfterMaxPoolTest(armnn::IWorkloadFactory& workloadFactory)
diff --git a/src/backends/test/LayerTests.hpp b/src/backends/test/LayerTests.hpp
index 9aae5da..ebd3841 100644
--- a/src/backends/test/LayerTests.hpp
+++ b/src/backends/test/LayerTests.hpp
@@ -388,4 +388,6 @@
 LayerTestResult<float, 4> MeanFloatMultipleDimsTest(armnn::IWorkloadFactory& workloadFactory);
 LayerTestResult<float, 1> MeanVtsFloat1Test(armnn::IWorkloadFactory& workloadFactory);
 LayerTestResult<float, 3> MeanVtsFloat2Test(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<float, 3> MeanVtsFloat3Test(armnn::IWorkloadFactory& workloadFactory);
+
 LayerTestResult<float, 4> AdditionAfterMaxPoolTest(armnn::IWorkloadFactory& workloadFactory);