IVGCVSW-6174 Add Cl Pooling3d Workload

 * Add IsSupported for Pooling3d
 * Add CreateWorkload case for Pooling3d
 * Create new ClPooling3dWorkload header and source files
 * Add Pooling3d workload to ClWorkloads.hpp
 * Add tests for Pooling3d workload
 * Add Pooling3d build function to ArmComputeTensorUtils

Change-Id: Ia270b0fe809a171ed73af14376de8708b346d500
Signed-off-by: Ryan OShea <ryan.oshea3@arm.com>
diff --git a/src/backends/aclCommon/ArmComputeTensorUtils.cpp b/src/backends/aclCommon/ArmComputeTensorUtils.cpp
index 2dc6d2a..e476eb3 100644
--- a/src/backends/aclCommon/ArmComputeTensorUtils.cpp
+++ b/src/backends/aclCommon/ArmComputeTensorUtils.cpp
@@ -187,17 +187,10 @@
 arm_compute::PoolingLayerInfo BuildArmComputePoolingLayerInfo(const Pooling2dDescriptor& descriptor,
                                                               bool fpMixedPrecision)
 {
-    using arm_compute::PoolingType;
-    using arm_compute::DimensionRoundingType;
-    using arm_compute::PadStrideInfo;
-    using arm_compute::PoolingLayerInfo;
-    using arm_compute::Size2D;
-    using arm_compute::DataLayout;
-
     // Resolve ARM Compute layer parameters.
-    const PoolingType poolingType = ConvertPoolingAlgorithmToAclPoolingType(descriptor.m_PoolType);
+    const arm_compute::PoolingType poolingType = ConvertPoolingAlgorithmToAclPoolingType(descriptor.m_PoolType);
 
-    const DataLayout dataLayout = ConvertDataLayout(descriptor.m_DataLayout);
+    const arm_compute::DataLayout dataLayout = ConvertDataLayout(descriptor.m_DataLayout);
 
     bool isGlobalPooling = (descriptor.m_StrideX==0 && descriptor.m_StrideY==0);
     //use specific constructor if global pooling
@@ -206,9 +199,9 @@
         return arm_compute::PoolingLayerInfo(poolingType, dataLayout);
     }
 
-    const DimensionRoundingType rounding = ConvertOutputShapeRoundingToAclDimensionRoundingType(
+    const arm_compute::DimensionRoundingType rounding = ConvertOutputShapeRoundingToAclDimensionRoundingType(
                                                                                     descriptor.m_OutputShapeRounding);
-    const PadStrideInfo padStrideInfo(descriptor.m_StrideX,
+    const arm_compute::PadStrideInfo padStrideInfo(descriptor.m_StrideX,
                                       descriptor.m_StrideY,
                                       descriptor.m_PadLeft,
                                       descriptor.m_PadRight,
@@ -218,12 +211,51 @@
 
     const bool excludePadding = (descriptor.m_PaddingMethod == PaddingMethod::Exclude);
 
-    const Size2D poolSize(descriptor.m_PoolWidth, descriptor.m_PoolHeight);
+    const arm_compute::Size2D poolSize(descriptor.m_PoolWidth, descriptor.m_PoolHeight);
 
     return arm_compute::PoolingLayerInfo(poolingType, poolSize, dataLayout, padStrideInfo, excludePadding,
                                          fpMixedPrecision);
 }
 
+arm_compute::Pooling3dLayerInfo BuildArmComputePooling3dLayerInfo(const Pooling3dDescriptor& descriptor,
+                                                                  bool fpMixedPrecision)
+{
+    const arm_compute::PoolingType poolingType = ConvertPoolingAlgorithmToAclPoolingType(descriptor.m_PoolType);
+
+    bool isGlobalPooling = (descriptor.m_StrideX==0 && descriptor.m_StrideY==0 && descriptor.m_StrideZ==0);
+    //use specific constructor if global pooling
+    if(isGlobalPooling)
+    {
+        return arm_compute::Pooling3dLayerInfo(poolingType);
+    }
+
+    const arm_compute::Size3D poolSize(descriptor.m_PoolWidth, descriptor.m_PoolHeight, descriptor.m_PoolDepth);
+
+    const arm_compute::Size3D stride(descriptor.m_StrideX,
+                        descriptor.m_StrideY,
+                        descriptor.m_StrideZ);
+
+    const arm_compute::Padding3D padding(descriptor.m_PadLeft,
+                            descriptor.m_PadRight,
+                            descriptor.m_PadTop,
+                            descriptor.m_PadBottom,
+                            descriptor.m_PadFront,
+                            descriptor.m_PadBack);
+
+    const bool excludePadding = (descriptor.m_PaddingMethod == PaddingMethod::Exclude);
+
+    const arm_compute::DimensionRoundingType rounding = ConvertOutputShapeRoundingToAclDimensionRoundingType(
+            descriptor.m_OutputShapeRounding);
+
+    return arm_compute::Pooling3dLayerInfo(poolingType,
+                                           poolSize,
+                                           stride,
+                                           padding,
+                                           excludePadding,
+                                           fpMixedPrecision,
+                                           rounding);
+}
+
 arm_compute::NormalizationLayerInfo BuildArmComputeNormalizationLayerInfo(const NormalizationDescriptor& descriptor)
 {
     const arm_compute::NormType normType =
diff --git a/src/backends/aclCommon/ArmComputeTensorUtils.hpp b/src/backends/aclCommon/ArmComputeTensorUtils.hpp
index ba6ef6a..31992b9 100644
--- a/src/backends/aclCommon/ArmComputeTensorUtils.hpp
+++ b/src/backends/aclCommon/ArmComputeTensorUtils.hpp
@@ -56,6 +56,12 @@
 arm_compute::PoolingLayerInfo BuildArmComputePoolingLayerInfo(const Pooling2dDescriptor& descriptor,
                                                               bool fpMixedPrecision = false);
 
+/// Utility function used to setup an arm_compute::Pooling3dLayerInfo object from given
+/// armnn::Pooling3dDescriptor
+/// bool fpMixedPrecision
+arm_compute::Pooling3dLayerInfo BuildArmComputePooling3dLayerInfo(const Pooling3dDescriptor& descriptor,
+                                                                  bool fpMixedPrecision = false);
+
 /// Utility function to setup an arm_compute::NormalizationLayerInfo object from an armnn::NormalizationDescriptor.
 arm_compute::NormalizationLayerInfo BuildArmComputeNormalizationLayerInfo(const NormalizationDescriptor& desc);
 
diff --git a/src/backends/backendsCommon/test/layerTests/Pooling3dTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/Pooling3dTestImpl.cpp
index a708afa..a241aea 100644
--- a/src/backends/backendsCommon/test/layerTests/Pooling3dTestImpl.cpp
+++ b/src/backends/backendsCommon/test/layerTests/Pooling3dTestImpl.cpp
@@ -29,6 +29,15 @@
 
 using namespace armnnUtils;
 
+template<typename T>
+void PermuteNCDHWToNDHWC(std::vector<T> &src, armnn::TensorInfo &srcInfo)
+{
+    const armnn::PermutationVector NCDHWToNDHWC = { 0, 4, 1, 2, 3 };
+    std::vector<T> tmp(src.size());
+    armnnUtils::Permute(srcInfo.GetShape(), NCDHWToNDHWC, src.data(), tmp.data(), sizeof(T));
+    src = tmp;
+}
+
 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
 LayerTestResult<T, 5> SimplePooling3dTestImpl(
     armnn::IWorkloadFactory& workloadFactory,
@@ -137,6 +146,7 @@
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout,
     float qScale = 1.0f,
     int32_t qOffset = 0)
 {
@@ -153,6 +163,7 @@
     descriptor.m_PadFront = descriptor.m_PadBack = 0;
     descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor;
     descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude;
+    descriptor.m_DataLayout = dataLayout;
 
     unsigned int inputWidth = 3;
     unsigned int inputHeight = 3;
@@ -169,8 +180,10 @@
     unsigned int channels = 2;
     unsigned int batchSize = 2;
 
-    armnn::TensorInfo inputTensorInfo({ batchSize, channels, inputDepth, inputHeight, inputWidth }, ArmnnType);
-    armnn::TensorInfo outputTensorInfo({ batchSize, channels, outputDepth, outputHeight, outputWidth }, ArmnnType);
+    armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo( batchSize, channels, inputDepth, inputHeight,
+                                                                   inputWidth, dataLayout, ArmnnType);
+    armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo( batchSize, channels, outputDepth, outputHeight,
+                                                                    outputWidth, dataLayout, ArmnnType);
 
     // Set quantization parameters if the requested type is a quantized type.
     if(armnn::IsQuantizedType<T>())
@@ -239,6 +252,12 @@
             },
             qScale, qOffset);
 
+    if (dataLayout == armnn::DataLayout::NDHWC)
+    {
+        PermuteNCDHWToNDHWC(input, inputTensorInfo);
+        PermuteNCDHWToNDHWC(outputExpected, outputTensorInfo);
+    }
+
     return SimplePooling3dTestImpl<ArmnnType>(
         workloadFactory, memoryManager, tensorHandleFactory, descriptor, qScale, qOffset,
         input, outputExpected, inputTensorInfo.GetShape(), outputTensorInfo.GetShape());
@@ -249,7 +268,7 @@
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
-    const armnn::DataLayout dataLayout = armnn::DataLayout::NCDHW,
+    const armnn::DataLayout dataLayout,
     float qScale = 1.0f,
     int32_t qOffset = 0)
 {
@@ -306,16 +325,10 @@
         },
         qScale, qOffset));
 
-    const armnn::PermutationVector NCDHWToNDHWC = { 0, 4, 1, 2, 3 };
     if (dataLayout == armnn::DataLayout::NDHWC)
     {
-        std::vector<T> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCDHWToNDHWC, inputData.data(), tmp.data(), sizeof(T));
-        inputData = tmp;
-
-        std::vector<T> tmp1(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCDHWToNDHWC, outputData.data(), tmp1.data(), sizeof(T));
-        outputData = tmp1;
+        PermuteNCDHWToNDHWC(inputData, inputTensorInfo);
+        PermuteNCDHWToNDHWC(outputData, outputTensorInfo);
     }
 
     return SimplePooling3dTestImpl<ArmnnType>(
@@ -328,6 +341,7 @@
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout,
     float qScale = 1.0f,
     int32_t qOffset = 0)
 {
@@ -342,9 +356,10 @@
     descriptor.m_PadFront = 1;
     descriptor.m_PadBack = 1;
     descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue;
+    descriptor.m_DataLayout = dataLayout;
 
-    armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4, 4 }, ArmnnType);
-    armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3, 3 }, ArmnnType);
+    armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo( 1, 1, 4, 4, 4 , dataLayout, ArmnnType);
+    armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo( 1, 1, 3, 3, 3 , dataLayout, ArmnnType);
 
     // Set quantization parameters if the requested type is a quantized type.
     if(armnn::IsQuantizedType<T>())
@@ -395,6 +410,12 @@
         },
         qScale, qOffset);
 
+    if (dataLayout == armnn::DataLayout::NDHWC)
+    {
+        PermuteNCDHWToNDHWC(input, inputTensorInfo);
+        PermuteNCDHWToNDHWC(outputExpected, outputTensorInfo);
+    }
+
     return SimplePooling3dTestImpl<ArmnnType>(
         workloadFactory, memoryManager, tensorHandleFactory, descriptor, qScale, qOffset,
         input, outputExpected, inputTensorInfo.GetShape(), outputTensorInfo.GetShape());
@@ -462,16 +483,10 @@
         },
         qScale, qOffset));
 
-    const armnn::PermutationVector NCDHWToNDHWC = { 0, 4, 1, 2, 3 };
     if (dataLayout == armnn::DataLayout::NDHWC)
     {
-        std::vector<T> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCDHWToNDHWC, inputData.data(), tmp.data(), sizeof(T));
-        inputData = tmp;
-
-        std::vector<T> tmp1(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCDHWToNDHWC, outputData.data(), tmp1.data(), sizeof(T));
-        outputData = tmp1;
+        PermuteNCDHWToNDHWC(inputData, inputTensorInfo);
+        PermuteNCDHWToNDHWC(outputData, outputTensorInfo);
     }
 
     return SimplePooling3dTestImpl<ArmnnType>(
@@ -484,6 +499,7 @@
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout,
     float qScale = 1.0f,
     int32_t qOffset = 0)
 {
@@ -498,11 +514,12 @@
     descriptor.m_PadFront = 50;
     descriptor.m_PadBack = 50;
     descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude;
+    descriptor.m_DataLayout = dataLayout;
 
-    armnn::TensorInfo inputTensorInfo({ 5, 3, 52, 60, 68 }, ArmnnType);
-    armnn::TensorInfo outputTensorInfo({ 5, 3, 11, 13, 15 }, ArmnnType);
+    armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo( 5, 3, 52, 60, 68, dataLayout, ArmnnType);
+    armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo( 5, 3, 11, 13, 14, dataLayout, ArmnnType);
 
-    // Set quantization parameters if the requested type is a quantized type.
+    // Set quantization parameters if the requested type is a quantized type.armnnUtils::GetTensorInfo(
     if(armnn::IsQuantizedType<T>())
     {
         inputTensorInfo.SetQuantizationScale(qScale);
@@ -535,6 +552,7 @@
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout,
     float qScale = 1.0f,
     int32_t qOffset = 0)
 {
@@ -549,9 +567,11 @@
     descriptor.m_PadFront = 1;
     descriptor.m_PadBack = 1;
     descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue;
+    descriptor.m_DataLayout = dataLayout;
 
-    armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4, 4 }, ArmnnType);
-    armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3, 3 }, ArmnnType);
+
+    armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo ( 1, 1, 4, 4, 4, dataLayout, ArmnnType);
+    armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo( 1, 1, 3, 3, 3, dataLayout, ArmnnType);
 
     // Set quantization parameters if the requested type is a quantized type.
     if(armnn::IsQuantizedType<T>())
@@ -602,6 +622,12 @@
         },
         qScale, qOffset);
 
+    if (dataLayout == armnn::DataLayout::NDHWC)
+    {
+        PermuteNCDHWToNDHWC(input, inputTensorInfo);
+        PermuteNCDHWToNDHWC(outputExpected, outputTensorInfo);
+    }
+
     return SimplePooling3dTestImpl<ArmnnType>(
         workloadFactory, memoryManager, tensorHandleFactory, descriptor, qScale, qOffset,
         input, outputExpected, inputTensorInfo.GetShape(), outputTensorInfo.GetShape());
@@ -612,7 +638,7 @@
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
-    armnn::DataLayout dataLayout = armnn::DataLayout::NCDHW,
+    const armnn::DataLayout dataLayout,
     float qScale = 1.0f,
     int32_t qOffset = 0)
 {
@@ -669,16 +695,10 @@
         },
         qScale, qOffset));
 
-    const armnn::PermutationVector NCDHWToNDHWC = { 0, 4, 1, 2, 3 };
     if (dataLayout == armnn::DataLayout::NDHWC)
     {
-        std::vector<T> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCDHWToNDHWC, inputData.data(), tmp.data(), sizeof(T));
-        inputData = tmp;
-
-        std::vector<T> tmp1(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCDHWToNDHWC, outputData.data(), tmp1.data(), sizeof(T));
-        outputData = tmp1;
+        PermuteNCDHWToNDHWC(inputData, inputTensorInfo);
+        PermuteNCDHWToNDHWC(outputData, outputTensorInfo);
     }
 
     return SimplePooling3dTestImpl<ArmnnType>(
@@ -691,6 +711,7 @@
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout,
     float qScale = 1.0f,
     int32_t qOffset = 0)
 {
@@ -705,9 +726,10 @@
     descriptor.m_PadFront = 1;
     descriptor.m_PadBack = 1;
     descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue;
+    descriptor.m_DataLayout = dataLayout;
 
-    armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4, 4 }, ArmnnType);
-    armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3, 3 }, ArmnnType);
+    armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo( 1, 1, 4, 4, 4, dataLayout, ArmnnType);
+    armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo( 1, 1, 3, 3, 3, dataLayout,ArmnnType);
 
     // Set quantization parameters if the requested type is a quantized type.
     if(armnn::IsQuantizedType<T>())
@@ -794,28 +816,129 @@
         },
         qScale, qOffset);
 
+    if (dataLayout == armnn::DataLayout::NDHWC)
+    {
+        PermuteNCDHWToNDHWC(input, inputTensorInfo);
+        PermuteNCDHWToNDHWC(outputExpected, outputTensorInfo);
+    }
+
     return SimplePooling3dTestImpl<ArmnnType>(
         workloadFactory, memoryManager, tensorHandleFactory, descriptor, qScale, qOffset,
         input, outputExpected, inputTensorInfo.GetShape(), outputTensorInfo.GetShape());
 }
 
 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 5> AsymmetricNonSquareMaxPooling3dWithPaddingOnlyPoolTestCommon(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory,
+        const armnn::DataLayout dataLayout,
+        float qScale = 1.0f,
+        int32_t qOffset = 0)
+{
+    armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo( 1, 1, 1, 3, 1, dataLayout, ArmnnType);
+    armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo( 1, 1, 2, 2, 1, dataLayout, ArmnnType);
+
+    armnn::Pooling3dDescriptor descriptor;
+    descriptor.m_PoolType = armnn::PoolingAlgorithm::Max;
+    descriptor.m_PoolWidth = 1;
+    descriptor.m_PoolHeight = 2;
+    descriptor.m_PoolDepth = 3;
+    descriptor.m_StrideX = 1;
+    descriptor.m_StrideY = 2;
+    descriptor.m_StrideZ = 1;
+    descriptor.m_PadLeft = 0;
+    descriptor.m_PadRight = 0;
+    descriptor.m_PadTop = 2;
+    descriptor.m_PadBottom = 0;
+    descriptor.m_PadFront = 1;
+    descriptor.m_PadBack = 2;
+    descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor;
+    descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude;
+    descriptor.m_DataLayout = dataLayout;
+
+    // Construct input data.
+    auto input = QuantizedVector<T>( { 1.0f, 3.0f, 4.0f, }, qScale, qOffset);
+
+    // These were calculated manually.
+    auto outputExpected = QuantizedVector<T>( { 0.0f, 3.0f, 0.0f, 3.0f, }, qScale, qOffset);
+
+    if (dataLayout == armnn::DataLayout::NDHWC)
+    {
+        PermuteNCDHWToNDHWC<T>(input, inputTensorInfo);
+        PermuteNCDHWToNDHWC<T>(outputExpected, outputTensorInfo);
+    }
+
+    return SimplePooling3dTestImpl<ArmnnType>(
+            workloadFactory, memoryManager, tensorHandleFactory, descriptor, qScale, qOffset,
+            input, outputExpected, inputTensorInfo.GetShape(), outputTensorInfo.GetShape());
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
 LayerTestResult<T, 5> AsymmetricNonSquareMaxPooling3dTestCommon(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout,
     float qScale = 1.0f,
     int32_t qOffset = 0)
 {
-    armnn::TensorInfo inputTensorInfo({ 1, 1, 1, 3, 1 }, ArmnnType);
-    armnn::TensorInfo outputTensorInfo({ 1, 1, 2, 2, 1 }, ArmnnType);
+    armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo( 1, 1, 1, 3, 1, dataLayout, ArmnnType);
+    armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo( 1, 1, 2, 2, 1, dataLayout, ArmnnType);
 
     armnn::Pooling3dDescriptor descriptor;
     descriptor.m_PoolType = armnn::PoolingAlgorithm::Max;
     descriptor.m_PoolWidth = 1;
     descriptor.m_PoolHeight = 2;
     descriptor.m_PoolDepth = 3;
-    descriptor.m_StrideX = 0;
+    descriptor.m_StrideX = 1;
+    descriptor.m_StrideY = 2;
+    descriptor.m_StrideZ = 1;
+    descriptor.m_PadLeft = 0;
+    descriptor.m_PadRight = 0;
+    descriptor.m_PadTop = 1;
+    descriptor.m_PadBottom = 0;
+    descriptor.m_PadFront = 1;
+    descriptor.m_PadBack = 2;
+    descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor;
+    descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude;
+    descriptor.m_DataLayout = dataLayout;
+
+    // Construct input data.
+    auto input = QuantizedVector<T>( { 1.0f, 3.0f, 4.0f, }, qScale, qOffset);
+
+    // These were calculated manually.
+    auto outputExpected = QuantizedVector<T>( { 1.0f, 4.0f, 1.0f, 4.0f, }, qScale, qOffset);
+
+    if (dataLayout == armnn::DataLayout::NDHWC)
+    {
+        PermuteNCDHWToNDHWC<T>(input, inputTensorInfo);
+        PermuteNCDHWToNDHWC<T>(outputExpected, outputTensorInfo);
+    }
+
+    return SimplePooling3dTestImpl<ArmnnType>(
+        workloadFactory, memoryManager, tensorHandleFactory, descriptor, qScale, qOffset,
+        input, outputExpected, inputTensorInfo.GetShape(), outputTensorInfo.GetShape());
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 5> AsymmetricNonSquareAveragePooling3dWithPaddingOnlyPoolTestCommon(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout,
+    float qScale = 1.0f,
+    int32_t qOffset = 0)
+{
+    armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo( 1, 1, 1, 3, 1, dataLayout, ArmnnType);
+    armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo( 1, 1, 2, 2, 1, dataLayout, ArmnnType);
+
+    armnn::Pooling3dDescriptor descriptor;
+    descriptor.m_PoolType = armnn::PoolingAlgorithm::Average;
+    descriptor.m_PoolWidth = 1;
+    descriptor.m_PoolHeight = 2;
+    descriptor.m_PoolDepth = 3;
+    descriptor.m_StrideX = 1;
     descriptor.m_StrideY = 2;
     descriptor.m_StrideZ = 1;
     descriptor.m_PadLeft = 0;
@@ -826,21 +949,19 @@
     descriptor.m_PadBack = 2;
     descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor;
     descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude;
+    descriptor.m_DataLayout = dataLayout;
 
     // Construct input data.
-    auto input = QuantizedVector<T>(
-        {
-            1.0f, 3.0f, 4.0f,
-        },
-        qScale, qOffset);
+    auto input = QuantizedVector<T>({ 1.0f, 3.0f, 4.0f, }, qScale, qOffset);
 
     // These were calculated manually.
-    auto outputExpected = QuantizedVector<T>(
-        {
-            0.0f, 3.0f, 0.0f, 3.0f,
-        },
-        qScale, qOffset);
+    auto outputExpected = QuantizedVector<T>( { 0.0f, 2.0f, 0.0f, 2.0f, }, qScale, qOffset);
 
+    if (dataLayout == armnn::DataLayout::NDHWC)
+    {
+        PermuteNCDHWToNDHWC<T>(input, inputTensorInfo);
+        PermuteNCDHWToNDHWC<T>(outputExpected, outputTensorInfo);
+    }
     return SimplePooling3dTestImpl<ArmnnType>(
         workloadFactory, memoryManager, tensorHandleFactory, descriptor, qScale, qOffset,
         input, outputExpected, inputTensorInfo.GetShape(), outputTensorInfo.GetShape());
@@ -848,61 +969,61 @@
 
 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
 LayerTestResult<T, 5> AsymmetricNonSquareAveragePooling3dTestCommon(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory,
-    float qScale = 1.0f,
-    int32_t qOffset = 0)
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory,
+        const armnn::DataLayout dataLayout,
+        float qScale = 1.0f,
+        int32_t qOffset = 0)
 {
-    armnn::TensorInfo inputTensorInfo({ 1, 1, 1, 3, 1 }, ArmnnType);
-    armnn::TensorInfo outputTensorInfo({ 1, 1, 2, 2, 1 }, ArmnnType);
+    armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo( 1, 1, 1, 3, 1, dataLayout, ArmnnType);
+    armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo( 1, 1, 2, 2, 1, dataLayout, ArmnnType);
 
     armnn::Pooling3dDescriptor descriptor;
     descriptor.m_PoolType = armnn::PoolingAlgorithm::Average;
     descriptor.m_PoolWidth = 1;
     descriptor.m_PoolHeight = 2;
     descriptor.m_PoolDepth = 3;
-    descriptor.m_StrideX = 0;
+    descriptor.m_StrideX = 1;
     descriptor.m_StrideY = 2;
     descriptor.m_StrideZ = 1;
     descriptor.m_PadLeft = 0;
     descriptor.m_PadRight = 0;
-    descriptor.m_PadTop = 2;
+    descriptor.m_PadTop = 1;
     descriptor.m_PadBottom = 0;
     descriptor.m_PadFront = 1;
     descriptor.m_PadBack = 2;
     descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor;
     descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude;
+    descriptor.m_DataLayout = dataLayout;
 
     // Construct input data.
-    auto input = QuantizedVector<T>(
-        {
-            1.0f, 3.0f, 4.0f,
-        },
-        qScale, qOffset);
+    auto input = QuantizedVector<T>( { 1.0f, 3.0f, 4.0f, }, qScale, qOffset);
 
     // These were calculated manually.
-    auto outputExpected = QuantizedVector<T>(
-        {
-            0.0f, 2.0f, 0.0f, 2.0f,
-        },
-        qScale, qOffset);
+    auto outputExpected = QuantizedVector<T>( { 1.0f, 3.5f, 1.0f, 3.5f, }, qScale, qOffset);
 
+    if (dataLayout == armnn::DataLayout::NDHWC)
+    {
+        PermuteNCDHWToNDHWC<T>(input, inputTensorInfo);
+        PermuteNCDHWToNDHWC<T>(outputExpected, outputTensorInfo);
+    }
     return SimplePooling3dTestImpl<ArmnnType>(
-        workloadFactory, memoryManager, tensorHandleFactory, descriptor, qScale, qOffset,
-        input, outputExpected, inputTensorInfo.GetShape(), outputTensorInfo.GetShape());
+            workloadFactory, memoryManager, tensorHandleFactory, descriptor, qScale, qOffset,
+            input, outputExpected, inputTensorInfo.GetShape(), outputTensorInfo.GetShape());
 }
 
 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-LayerTestResult<T, 5> AsymmetricNonSquareL2Pooling3dTestCommon(
+LayerTestResult<T, 5> AsymmetricNonSquareL2Pooling3dWithPaddingOnlyPoolTestCommon(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout,
     float qScale = 1.0f,
     int32_t qOffset = 0)
 {
-    armnn::TensorInfo inputTensorInfo({ 1, 1, 1, 3, 1 }, ArmnnType);
-    armnn::TensorInfo outputTensorInfo({ 1, 1, 2, 2, 1 }, ArmnnType);
+    armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo( 1, 1, 1, 3, 1, dataLayout, ArmnnType);
+    armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo( 1, 1, 2, 2, 1, dataLayout, ArmnnType);
 
     armnn::Pooling3dDescriptor descriptor;
     descriptor.m_PoolType = armnn::PoolingAlgorithm::L2;
@@ -920,20 +1041,19 @@
     descriptor.m_PadBack = 2;
     descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor;
     descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude;
+    descriptor.m_DataLayout = dataLayout;
 
     // Construct input data.
-    auto input = QuantizedVector<T>(
-        {
-            1.0f, 3.0f, 4.0f,
-        },
-        qScale, qOffset);
+    auto input = QuantizedVector<T>( { 1.0f, 3.0f, 4.0f, }, qScale, qOffset);
 
     // These were calculated manually.
-    auto outputExpected = QuantizedVector<T>(
-        {
-            0.0f, 2.2360679775f, 0.0f, 2.2360679775f,
-        },
-        qScale, qOffset);
+    auto outputExpected = QuantizedVector<T>( { 0.0f, 2.2360679775f, 0.0f, 2.2360679775f, }, qScale, qOffset);
+
+    if (dataLayout == armnn::DataLayout::NDHWC)
+    {
+        PermuteNCDHWToNDHWC<T>(input, inputTensorInfo);
+        PermuteNCDHWToNDHWC<T>(outputExpected, outputTensorInfo);
+    }
 
     return SimplePooling3dTestImpl<ArmnnType>(
         workloadFactory, memoryManager, tensorHandleFactory, descriptor, qScale, qOffset,
@@ -941,6 +1061,53 @@
 }
 
 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 5> AsymmetricNonSquareL2Pooling3dTestCommon(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory,
+        const armnn::DataLayout dataLayout,
+        float qScale = 1.0f,
+        int32_t qOffset = 0)
+{
+    armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo( 1, 1, 1, 3, 1, dataLayout, ArmnnType);
+    armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo( 1, 1, 2, 2, 1, dataLayout, ArmnnType);
+
+    armnn::Pooling3dDescriptor descriptor;
+    descriptor.m_PoolType = armnn::PoolingAlgorithm::L2;
+    descriptor.m_PoolWidth = 1;
+    descriptor.m_PoolHeight = 2;
+    descriptor.m_PoolDepth = 3;
+    descriptor.m_StrideX = 1;
+    descriptor.m_StrideY = 2;
+    descriptor.m_StrideZ = 1;
+    descriptor.m_PadLeft = 0;
+    descriptor.m_PadRight = 0;
+    descriptor.m_PadTop = 1;
+    descriptor.m_PadBottom = 0;
+    descriptor.m_PadFront = 1;
+    descriptor.m_PadBack = 2;
+    descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor;
+    descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude;
+    descriptor.m_DataLayout = dataLayout;
+
+    // Construct input data.
+    auto input = QuantizedVector<T>( { 1.0f, 3.0f, 4.0f, }, qScale, qOffset);
+
+    // These were calculated manually.
+    auto outputExpected = QuantizedVector<T>( { 1.0f, 3.53553390593f, 1.0f, 3.53553390593f, }, qScale, qOffset);
+
+    if (dataLayout == armnn::DataLayout::NDHWC)
+    {
+        PermuteNCDHWToNDHWC<T>(input, inputTensorInfo);
+        PermuteNCDHWToNDHWC<T>(outputExpected, outputTensorInfo);
+    }
+
+    return SimplePooling3dTestImpl<ArmnnType>(
+            workloadFactory, memoryManager, tensorHandleFactory, descriptor, qScale, qOffset,
+            input, outputExpected, inputTensorInfo.GetShape(), outputTensorInfo.GetShape());
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
 LayerTestResult<T, 5> ComparePooling3dTestCommon(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
@@ -948,6 +1115,7 @@
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     const armnn::ITensorHandleFactory& refTensorHandleFactory,
     armnn::PoolingAlgorithm poolingType,
+    const armnn::DataLayout dataLayout,
     float qScale = 1.0f,
     int32_t qOffset = 0)
 {
@@ -970,14 +1138,10 @@
     const unsigned int outputHeight = (inputHeight + 2 * padY + strideY - poolSize) / strideY;
     const unsigned int outputDepth = (inputDepth + 2 * padZ + strideZ - poolSize) / strideZ;
 
-    armnn::TensorInfo inputTensorInfo;
-    armnn::TensorInfo outputTensorInfo;
-
-    unsigned int inputShape[] = { batchSize, channelCount, inputHeight, inputWidth, inputDepth };
-    unsigned int outputShape[] = { batchSize, channelCount, outputHeight, outputWidth, outputDepth };
-
-    inputTensorInfo = armnn::TensorInfo(5, inputShape, ArmnnType);
-    outputTensorInfo = armnn::TensorInfo(5, outputShape, ArmnnType);
+    armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo(batchSize, channelCount, inputDepth, inputHeight,
+                                                                  inputWidth, dataLayout, ArmnnType);
+    armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo(batchSize, channelCount, outputDepth, outputHeight,
+                                                                   outputWidth, dataLayout, ArmnnType);
 
     // Set quantization parameters if the requested type is a quantized type.
     if(armnn::IsQuantizedType<T>())
@@ -991,7 +1155,6 @@
     std::vector<T> input = MakeRandomTensor<T>(inputTensorInfo, 81715);
     std::vector<T> actualOutput(outputTensorInfo.GetNumElements());
     std::vector<T> expectedOutput(outputTensorInfo.GetNumElements());
-
     LayerTestResult<T, 5> comparisonResult(outputTensorInfo);
 
     std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
@@ -1015,8 +1178,10 @@
     data.m_Parameters.m_PadFront = padZ;
     data.m_Parameters.m_PadBack = padZ;
     data.m_Parameters.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor;
+    data.m_Parameters.m_DataLayout = dataLayout;
 
-    std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refTensorHandleFactory.CreateTensorHandle(outputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandleRef =
+                                          refTensorHandleFactory.CreateTensorHandle(outputTensorInfo);
     std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refTensorHandleFactory.CreateTensorHandle(inputTensorInfo);
 
     // Don't execute if Pooling is not supported, as an exception will be raised.
@@ -1062,34 +1227,36 @@
     return comparisonResult;
 }
 
-
 } // anonymous namespace
 
 LayerTestResult<float, 5> SimpleMaxPooling3dSize2x2x2Stride1x1x1Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory)
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout)
 {
     return SimpleMaxPooling3dSize2x2x2Stride1x1x1TestCommon<armnn::DataType::Float32>(
-        workloadFactory, memoryManager, tensorHandleFactory);
+        workloadFactory, memoryManager, tensorHandleFactory, dataLayout);
 }
 
 LayerTestResult<uint8_t, 5> SimpleMaxPooling3dSize2x2x2Stride1x1x1Uint8Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory)
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout)
 {
     return SimpleMaxPooling3dSize2x2x2Stride1x1x1TestCommon<armnn::DataType::QAsymmU8>(
-        workloadFactory, memoryManager, tensorHandleFactory, 0.1f, 128);
+        workloadFactory, memoryManager, tensorHandleFactory, dataLayout, 0.1f, 128);
 }
 
 LayerTestResult<int16_t, 5> SimpleMaxPooling3dSize2x2x2Stride1x1x1Int16Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory)
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout)
 {
     return SimpleMaxPooling3dSize2x2x2Stride1x1x1TestCommon<armnn::DataType::QSymmS16>(
-            workloadFactory, memoryManager, tensorHandleFactory);
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout);
 }
 
 LayerTestResult<float, 5> SimpleMaxPooling3dTest(
@@ -1125,28 +1292,31 @@
 LayerTestResult<float, 5> IgnorePaddingSimpleMaxPooling3dTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory)
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout)
 {
     return IgnorePaddingSimpleMaxPooling3dTestCommon<armnn::DataType::Float32>(
-            workloadFactory, memoryManager, tensorHandleFactory);
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout);
 }
 
 LayerTestResult<uint8_t, 5> IgnorePaddingSimpleMaxPooling3dUint8Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory)
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout)
 {
     return IgnorePaddingSimpleMaxPooling3dTestCommon<armnn::DataType::QAsymmU8>(
-            workloadFactory, memoryManager, tensorHandleFactory, 1.0f, -5);
+            workloadFactory, memoryManager, tensorHandleFactory,dataLayout, 1.0f, -5);
 }
 
 LayerTestResult<int16_t, 5> IgnorePaddingSimpleMaxPooling3dInt16Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory)
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout)
 {
     return IgnorePaddingSimpleMaxPooling3dTestCommon<armnn::DataType::QSymmS16>(
-            workloadFactory, memoryManager, tensorHandleFactory);
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout);
 }
 
 LayerTestResult<float, 5> SimpleAveragePooling3dTest(
@@ -1212,163 +1382,271 @@
 LayerTestResult<float, 5> LargeTensorsAveragePooling3dTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory)
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout)
 {
     return LargeTensorsAveragePooling3dTestCommon<armnn::DataType::Float32>(
-            workloadFactory, memoryManager, tensorHandleFactory);
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout);
 }
 
 LayerTestResult<uint8_t, 5> LargeTensorsAveragePooling3dUint8Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory)
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout)
 {
     return LargeTensorsAveragePooling3dTestCommon<armnn::DataType::QAsymmU8>(
-        workloadFactory, memoryManager, tensorHandleFactory, 0.5, -1);
+        workloadFactory, memoryManager, tensorHandleFactory, dataLayout, 0.5, -1);
 }
 
 LayerTestResult<int16_t, 5> LargeTensorsAveragePooling3dInt16Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory)
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout)
 {
     return LargeTensorsAveragePooling3dTestCommon<armnn::DataType::QSymmS16>(
-            workloadFactory, memoryManager, tensorHandleFactory);
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout);
 }
 
 LayerTestResult<float, 5> IgnorePaddingSimpleAveragePooling3dTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory)
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout)
 {
     return IgnorePaddingSimpleAveragePooling3dTestCommon<armnn::DataType::Float32>(
-            workloadFactory, memoryManager, tensorHandleFactory);
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout);
 }
 
 LayerTestResult<uint8_t, 5> IgnorePaddingSimpleAveragePooling3dUint8Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory)
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout)
 {
     return IgnorePaddingSimpleAveragePooling3dTestCommon<armnn::DataType::QAsymmU8>(
-            workloadFactory, memoryManager, tensorHandleFactory, 1.0f, -5);
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout, 1.0f, -5);
 }
 
 LayerTestResult<int16_t, 5> IgnorePaddingSimpleAveragePooling3dInt16Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory)
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout)
 {
     return IgnorePaddingSimpleAveragePooling3dTestCommon<armnn::DataType::QSymmS16>(
-            workloadFactory, memoryManager, tensorHandleFactory);
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout);
 }
 
 LayerTestResult<float, 5> IgnorePaddingSimpleL2Pooling3dTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory)
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout)
 {
     return IgnorePaddingSimpleL2Pooling3dTestCommon<armnn::DataType::Float32>(
-            workloadFactory, memoryManager, tensorHandleFactory);
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout);
 }
 
 LayerTestResult<uint8_t, 5> IgnorePaddingSimpleL2Pooling3dUint8Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory)
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout)
 {
     return IgnorePaddingSimpleL2Pooling3dTestCommon<armnn::DataType::QAsymmU8>(
-            workloadFactory, memoryManager, tensorHandleFactory, 1.0f, -5);
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout, 1.0f, -5);
 }
 
 LayerTestResult<int16_t, 5> IgnorePaddingSimpleL2Pooling3dInt16Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory)
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout)
 {
     return IgnorePaddingSimpleL2Pooling3dTestCommon<armnn::DataType::QSymmS16>(
-            workloadFactory, memoryManager, tensorHandleFactory);
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout);
 }
 
 LayerTestResult<float, 5> AsymmetricNonSquareMaxPooling3dTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory)
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout)
 {
     return AsymmetricNonSquareMaxPooling3dTestCommon<armnn::DataType::Float32>(
-            workloadFactory, memoryManager, tensorHandleFactory);
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout);
 }
 
 LayerTestResult<uint8_t, 5> AsymmetricNonSquareMaxPooling3dUint8Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory)
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout)
 {
     return AsymmetricNonSquareMaxPooling3dTestCommon<armnn::DataType::QAsymmU8>(
-            workloadFactory, memoryManager, tensorHandleFactory);
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout);
 }
 
 LayerTestResult<int16_t, 5> AsymmetricNonSquareMaxPooling3dInt16Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory)
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout)
 {
     return AsymmetricNonSquareMaxPooling3dTestCommon<armnn::DataType::QSymmS16>(
-            workloadFactory, memoryManager, tensorHandleFactory);
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout);
+}
+
+LayerTestResult<float, 5> AsymmetricNonSquareMaxPooling3dWithPaddingOnlyPoolTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory,
+        const armnn::DataLayout dataLayout)
+{
+    return AsymmetricNonSquareMaxPooling3dWithPaddingOnlyPoolTestCommon<armnn::DataType::Float32>(
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout);
+}
+
+LayerTestResult<uint8_t, 5> AsymmetricNonSquareMaxPooling3dWithPaddingOnlyPoolUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory,
+        const armnn::DataLayout dataLayout)
+{
+    return AsymmetricNonSquareMaxPooling3dWithPaddingOnlyPoolTestCommon<armnn::DataType::QAsymmU8>(
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout);
+}
+
+LayerTestResult<int16_t, 5> AsymmetricNonSquareMaxPooling3dWithPaddingOnlyPoolInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory,
+        const armnn::DataLayout dataLayout)
+{
+    return AsymmetricNonSquareMaxPooling3dWithPaddingOnlyPoolTestCommon<armnn::DataType::QSymmS16>(
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout);
 }
 
 LayerTestResult<float, 5> AsymmetricNonSquareAveragePooling3dTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory)
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout)
 {
     return AsymmetricNonSquareAveragePooling3dTestCommon<armnn::DataType::Float32>(
-            workloadFactory, memoryManager, tensorHandleFactory);
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout);
 }
 
 LayerTestResult<uint8_t, 5> AsymmetricNonSquareAveragePooling3dUint8Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory)
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout)
 {
     return AsymmetricNonSquareAveragePooling3dTestCommon<armnn::DataType::QAsymmU8>(
-            workloadFactory, memoryManager, tensorHandleFactory);
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout);
 }
 
 LayerTestResult<int16_t, 5> AsymmetricNonSquareAveragePooling3dInt16Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory)
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout)
 {
     return AsymmetricNonSquareAveragePooling3dTestCommon<armnn::DataType::QSymmS16>(
-            workloadFactory, memoryManager, tensorHandleFactory);
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout);
+}
+
+LayerTestResult<float, 5> AsymmetricNonSquareAveragePooling3dWithPaddingOnlyPoolTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory,
+        const armnn::DataLayout dataLayout)
+{
+    return AsymmetricNonSquareAveragePooling3dWithPaddingOnlyPoolTestCommon<armnn::DataType::Float32>(
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout);
+}
+
+LayerTestResult<uint8_t, 5> AsymmetricNonSquareAveragePooling3dWithPaddingOnlyPoolUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory,
+        const armnn::DataLayout dataLayout)
+{
+    return AsymmetricNonSquareAveragePooling3dWithPaddingOnlyPoolTestCommon<armnn::DataType::QAsymmU8>(
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout);
+}
+
+LayerTestResult<int16_t, 5> AsymmetricNonSquareAveragePooling3dWithPaddingOnlyPoolInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory,
+        const armnn::DataLayout dataLayout)
+{
+    return AsymmetricNonSquareAveragePooling3dWithPaddingOnlyPoolTestCommon<armnn::DataType::QSymmS16>(
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout);
 }
 
 LayerTestResult<float, 5> AsymmetricNonSquareL2Pooling3dTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory)
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout)
 {
     return AsymmetricNonSquareL2Pooling3dTestCommon<armnn::DataType::Float32>(
-            workloadFactory, memoryManager, tensorHandleFactory);
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout);
 }
 
 LayerTestResult<uint8_t, 5> AsymmetricNonSquareL2Pooling3dUint8Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory)
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout)
 {
     return AsymmetricNonSquareL2Pooling3dTestCommon<armnn::DataType::QAsymmU8>(
-            workloadFactory, memoryManager, tensorHandleFactory);
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout);
 }
 
 LayerTestResult<int16_t, 5> AsymmetricNonSquareL2Pooling3dInt16Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory)
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout)
 {
     return AsymmetricNonSquareL2Pooling3dTestCommon<armnn::DataType::QSymmS16>(
-            workloadFactory, memoryManager, tensorHandleFactory);
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout);
+}
+
+LayerTestResult<float, 5> AsymmetricNonSquareL2Pooling3dWithPaddingOnlyPoolTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory,
+        const armnn::DataLayout dataLayout)
+{
+    return AsymmetricNonSquareL2Pooling3dWithPaddingOnlyPoolTestCommon<armnn::DataType::Float32>(
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout);
+}
+
+LayerTestResult<uint8_t, 5> AsymmetricNonSquareL2Pooling3dWithPaddingOnlyPoolUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory,
+        const armnn::DataLayout dataLayout)
+{
+    return AsymmetricNonSquareL2Pooling3dWithPaddingOnlyPoolTestCommon<armnn::DataType::QAsymmU8>(
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout);
+}
+
+LayerTestResult<int16_t, 5> AsymmetricNonSquareL2Pooling3dWithPaddingOnlyPoolInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory,
+        const armnn::DataLayout dataLayout)
+{
+    return AsymmetricNonSquareL2Pooling3dWithPaddingOnlyPoolTestCommon<armnn::DataType::QSymmS16>(
+            workloadFactory, memoryManager, tensorHandleFactory, dataLayout);
 }
 
 LayerTestResult<float, 5> ComparePooling3dTest(
@@ -1377,10 +1655,12 @@
     armnn::IWorkloadFactory& refWorkloadFactory,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     const armnn::ITensorHandleFactory& refTensorHandleFactory,
-    armnn::PoolingAlgorithm  poolingType)
+    armnn::PoolingAlgorithm  poolingType,
+    const armnn::DataLayout dataLayout)
 {
     return ComparePooling3dTestCommon<armnn::DataType::Float32>(
-        workloadFactory, memoryManager,  refWorkloadFactory, tensorHandleFactory, refTensorHandleFactory, poolingType);
+        workloadFactory, memoryManager,  refWorkloadFactory, tensorHandleFactory, refTensorHandleFactory,
+        poolingType, dataLayout);
 }
 
 LayerTestResult<uint8_t, 5> ComparePooling3dUint8Test(
@@ -1389,11 +1669,12 @@
     armnn::IWorkloadFactory& refWorkloadFactory,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     const armnn::ITensorHandleFactory& refTensorHandleFactory,
-    armnn::PoolingAlgorithm  poolingType)
+    armnn::PoolingAlgorithm  poolingType,
+    const armnn::DataLayout dataLayout)
 {
     return ComparePooling3dTestCommon<armnn::DataType::QAsymmU8>(
         workloadFactory, memoryManager,  refWorkloadFactory, tensorHandleFactory, refTensorHandleFactory,
-        poolingType, 0.1f, 128);
+        poolingType, dataLayout, 0.1f, 128);
 }
 
 LayerTestResult<int16_t, 5> ComparePooling3dInt16Test(
@@ -1402,8 +1683,10 @@
     armnn::IWorkloadFactory& refWorkloadFactory,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     const armnn::ITensorHandleFactory& refTensorHandleFactory,
-    armnn::PoolingAlgorithm  poolingType)
+    armnn::PoolingAlgorithm  poolingType,
+    const armnn::DataLayout dataLayout)
 {
     return ComparePooling3dTestCommon<armnn::DataType::QSymmS16>(
-        workloadFactory, memoryManager,  refWorkloadFactory, tensorHandleFactory, refTensorHandleFactory, poolingType);
+        workloadFactory, memoryManager,  refWorkloadFactory, tensorHandleFactory, refTensorHandleFactory,
+        poolingType, dataLayout);
 }
\ No newline at end of file
diff --git a/src/backends/backendsCommon/test/layerTests/Pooling3dTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/Pooling3dTestImpl.hpp
index 8ad8111..f17b6df 100644
--- a/src/backends/backendsCommon/test/layerTests/Pooling3dTestImpl.hpp
+++ b/src/backends/backendsCommon/test/layerTests/Pooling3dTestImpl.hpp
@@ -15,17 +15,20 @@
 LayerTestResult<float,   5> SimpleMaxPooling3dSize2x2x2Stride1x1x1Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory);
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout);
 
 LayerTestResult<uint8_t, 5>SimpleMaxPooling3dSize2x2x2Stride1x1x1Uint8Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory);
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout);
 
 LayerTestResult<int16_t, 5> SimpleMaxPooling3dSize2x2x2Stride1x1x1Int16Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory);
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout);
 
 LayerTestResult<float,   5> SimpleMaxPooling3dTest(
     armnn::IWorkloadFactory& workloadFactory,
@@ -48,17 +51,20 @@
 LayerTestResult<float,   5> IgnorePaddingSimpleMaxPooling3dTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory);
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout);
 
 LayerTestResult<uint8_t, 5> IgnorePaddingSimpleMaxPooling3dUint8Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory);
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout);
 
 LayerTestResult<int16_t, 5> IgnorePaddingSimpleMaxPooling3dInt16Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory);
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout);
 
 LayerTestResult<float,   5> SimpleAveragePooling3dTest(
     armnn::IWorkloadFactory& workloadFactory,
@@ -81,32 +87,38 @@
 LayerTestResult<float,   5> LargeTensorsAveragePooling3dTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory);
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout);
 
 LayerTestResult<uint8_t, 5> LargeTensorsAveragePooling3dUint8Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory);
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout);
 
 LayerTestResult<int16_t, 5> LargeTensorsAveragePooling3dInt16Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory);
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout);
 
 LayerTestResult<float,   5> IgnorePaddingSimpleAveragePooling3dTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory);
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout);
 
 LayerTestResult<uint8_t, 5> IgnorePaddingSimpleAveragePooling3dUint8Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory);
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout);
 
 LayerTestResult<int16_t, 5> IgnorePaddingSimpleAveragePooling3dInt16Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory);
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout);
 
 LayerTestResult<float,   5> SimpleL2Pooling3dTest(
     armnn::IWorkloadFactory& workloadFactory,
@@ -129,62 +141,128 @@
 LayerTestResult<float,   5> IgnorePaddingSimpleL2Pooling3dTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory);
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout);
 
 LayerTestResult<uint8_t, 5> IgnorePaddingSimpleL2Pooling3dUint8Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory);
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout);
 
 LayerTestResult<int16_t, 5> IgnorePaddingSimpleL2Pooling3dInt16Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory);
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout);
 
 LayerTestResult<float,   5> AsymmetricNonSquareMaxPooling3dTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory);
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout);
 
 LayerTestResult<uint8_t, 5> AsymmetricNonSquareMaxPooling3dUint8Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory);
+    const armnn::ITensorHandleFactory& tensorHandleFactor,
+    const armnn::DataLayout dataLayout);
 
 LayerTestResult<int16_t, 5> AsymmetricNonSquareMaxPooling3dInt16Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory);
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout);
+
+LayerTestResult<float,   5> AsymmetricNonSquareMaxPooling3dWithPaddingOnlyPoolTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory,
+        const armnn::DataLayout dataLayout);
+
+LayerTestResult<uint8_t, 5> AsymmetricNonSquareMaxPooling3dWithPaddingOnlyPoolUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactor,
+        const armnn::DataLayout dataLayout);
+
+LayerTestResult<int16_t, 5> AsymmetricNonSquareMaxPooling3dWithPaddingOnlyPoolInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory,
+        const armnn::DataLayout dataLayout);
 
 LayerTestResult<float,   5> AsymmetricNonSquareAveragePooling3dTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory);
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout);
 
 LayerTestResult<uint8_t, 5> AsymmetricNonSquareAveragePooling3dUint8Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory);
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout);
 
 LayerTestResult<int16_t, 5> AsymmetricNonSquareAveragePooling3dInt16Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory);
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout);
+
+LayerTestResult<float,   5> AsymmetricNonSquareAveragePooling3dWithPaddingOnlyPoolTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory,
+        const armnn::DataLayout dataLayout);
+
+LayerTestResult<uint8_t, 5> AsymmetricNonSquareAveragePooling3dWithPaddingOnlyPoolUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory,
+        const armnn::DataLayout dataLayout);
+
+LayerTestResult<int16_t, 5> AsymmetricNonSquareAveragePooling3dWithPaddingOnlyPoolInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory,
+        const armnn::DataLayout dataLayout);
 
 LayerTestResult<float,   5> AsymmetricNonSquareL2Pooling3dTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory);
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout);
 
 LayerTestResult<uint8_t, 5> AsymmetricNonSquareL2Pooling3dUint8Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory);
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout);
 
 LayerTestResult<int16_t, 5> AsymmetricNonSquareL2Pooling3dInt16Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const armnn::ITensorHandleFactory& tensorHandleFactory);
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const armnn::DataLayout dataLayout);
+
+LayerTestResult<float,   5> AsymmetricNonSquareL2Pooling3dWithPaddingOnlyPoolTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory,
+        const armnn::DataLayout dataLayout);
+
+LayerTestResult<uint8_t, 5> AsymmetricNonSquareL2Pooling3dWithPaddingOnlyPoolUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory,
+        const armnn::DataLayout dataLayout);
+
+LayerTestResult<int16_t, 5> AsymmetricNonSquareL2Pooling3dWithPaddingOnlyPoolInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory,
+        const armnn::DataLayout dataLayout);
 
 LayerTestResult<float,   5> ComparePooling3dTest(
     armnn::IWorkloadFactory& workloadFactory,
@@ -192,7 +270,8 @@
     armnn::IWorkloadFactory& refWorkloadFactory,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     const armnn::ITensorHandleFactory& refTensorHandleFactory,
-    armnn::PoolingAlgorithm  poolingType);
+    armnn::PoolingAlgorithm  poolingType,
+    const armnn::DataLayout dataLayout);
 
 LayerTestResult<uint8_t, 5> ComparePooling3dUint8Test(
     armnn::IWorkloadFactory& workloadFactory,
@@ -200,7 +279,8 @@
     armnn::IWorkloadFactory& refWorkloadFactory,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     const armnn::ITensorHandleFactory& refTensorHandleFactory,
-    armnn::PoolingAlgorithm  poolingType);
+    armnn::PoolingAlgorithm  poolingType,
+    const armnn::DataLayout dataLayout);
 
 LayerTestResult<int16_t, 5> ComparePooling3dInt16Test(
     armnn::IWorkloadFactory& workloadFactory,
@@ -208,6 +288,5 @@
     armnn::IWorkloadFactory& refWorkloadFactory,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     const armnn::ITensorHandleFactory& refTensorHandleFactory,
-    armnn::PoolingAlgorithm  poolingType);
-
-
+    armnn::PoolingAlgorithm  poolingType,
+    const armnn::DataLayout dataLayout);
diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp
index e52f578..4dcaca9 100644
--- a/src/backends/cl/ClLayerSupport.cpp
+++ b/src/backends/cl/ClLayerSupport.cpp
@@ -59,6 +59,7 @@
 #include "workloads/ClPadWorkload.hpp"
 #include "workloads/ClPermuteWorkload.hpp"
 #include "workloads/ClPooling2dWorkload.hpp"
+#include "workloads/ClPooling3dWorkload.hpp"
 #include "workloads/ClPreluWorkload.hpp"
 #include "workloads/ClQLstmWorkload.hpp"
 #include "workloads/ClQuantizedLstmWorkload.hpp"
@@ -449,6 +450,11 @@
                                         infos[1],
                                         *(PolymorphicDowncast<const Pooling2dDescriptor*>(&descriptor)),
                                         reasonIfUnsupported);
+        case LayerType::Pooling3d:
+            return IsPooling3dSupported(infos[0],
+                                        infos[1],
+                                        *(PolymorphicDowncast<const Pooling3dDescriptor*>(&descriptor)),
+                                        reasonIfUnsupported);
         case LayerType::Prelu:
             return IsPreluSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
         case LayerType::QLstm:
@@ -1194,6 +1200,14 @@
     FORWARD_WORKLOAD_VALIDATE_FUNC(ClPooling2dWorkloadValidate, reasonIfUnsupported, input, output, descriptor);
 }
 
+bool ClLayerSupport::IsPooling3dSupported(const TensorInfo& input,
+                                          const TensorInfo& output,
+                                          const Pooling3dDescriptor& descriptor,
+                                          Optional<std::string&> reasonIfUnsupported) const
+{
+    FORWARD_WORKLOAD_VALIDATE_FUNC(ClPooling3dWorkloadValidate, reasonIfUnsupported, input, output, descriptor);
+}
+
 bool ClLayerSupport::IsPreluSupported(const armnn::TensorInfo &input,
                                       const armnn::TensorInfo &alpha,
                                       const armnn::TensorInfo &output,
diff --git a/src/backends/cl/ClLayerSupport.hpp b/src/backends/cl/ClLayerSupport.hpp
index 103944e..b4d0e82 100644
--- a/src/backends/cl/ClLayerSupport.hpp
+++ b/src/backends/cl/ClLayerSupport.hpp
@@ -232,6 +232,11 @@
                               const Pooling2dDescriptor& descriptor,
                               Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
 
+    bool IsPooling3dSupported(const TensorInfo& input,
+                              const TensorInfo& output,
+                              const Pooling3dDescriptor& descriptor,
+                              Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
     bool IsPreluSupported(const TensorInfo& input,
                           const TensorInfo& alpha,
                           const TensorInfo& output,
diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp
index c561bf2..213f474 100644
--- a/src/backends/cl/ClWorkloadFactory.cpp
+++ b/src/backends/cl/ClWorkloadFactory.cpp
@@ -581,6 +581,11 @@
             auto pooling2dQueueDescriptor = PolymorphicDowncast<const Pooling2dQueueDescriptor*>(&descriptor);
             return MakeWorkload<ClPooling2dWorkload>(*pooling2dQueueDescriptor, info, m_CLCompileContext);
         }
+        case LayerType::Pooling3d :
+        {
+            auto pooling3dQueueDescriptor = PolymorphicDowncast<const Pooling3dQueueDescriptor*>(&descriptor);
+            return MakeWorkload<ClPooling3dWorkload>(*pooling3dQueueDescriptor, info, m_CLCompileContext);
+        }
         case LayerType::PreCompiled :
         {
             auto preCompiledQueueDescriptor = PolymorphicDowncast<const PreCompiledQueueDescriptor*>(&descriptor);
diff --git a/src/backends/cl/backend.mk b/src/backends/cl/backend.mk
index fde6b7e..ed0d925 100644
--- a/src/backends/cl/backend.mk
+++ b/src/backends/cl/backend.mk
@@ -67,6 +67,7 @@
         workloads/ClPadWorkload.cpp \
         workloads/ClPermuteWorkload.cpp \
         workloads/ClPooling2dWorkload.cpp \
+        workloads/ClPooling3dWorkload.cpp \
         workloads/ClPreluWorkload.cpp \
         workloads/ClQLstmWorkload.cpp \
         workloads/ClQuantizedLstmWorkload.cpp \
diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp
index 1c34214..fd24043 100644
--- a/src/backends/cl/test/ClLayerTests.cpp
+++ b/src/backends/cl/test/ClLayerTests.cpp
@@ -469,7 +469,7 @@
                                  ClContextControlFixture,
                                  AcrossChannelNormalizationTest)
 
-// Pooling
+// Pooling2d
 ARMNN_AUTO_TEST_FIXTURE_WITH_THF(SimpleMaxPooling2dSize3x3Stride2x4,
                                  ClContextControlFixture,
                                  SimpleMaxPooling2dSize3x3Stride2x4Test,
@@ -611,6 +611,85 @@
 ARMNN_AUTO_TEST_FIXTURE_WITH_THF(L2Pooling2dSize9, ClContextControlFixture, L2Pooling2dSize9Test)
 ARMNN_AUTO_TEST_FIXTURE_WITH_THF(UNSUPPORTED_L2Pooling2dSize9Uint8, ClContextControlFixture, L2Pooling2dSize9Uint8Test)
 
+// Pooling3d
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(SimpleMaxPooling3dSize2x2x2Stride1x1x1,
+                                 ClContextControlFixture,
+                                 SimpleMaxPooling3dSize2x2x2Stride1x1x1Test,
+                                 DataLayout::NDHWC)
+
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(SimpleMaxPooling3d,
+                                 ClContextControlFixture,
+                                 SimpleMaxPooling3dTest,
+                                 DataLayout::NDHWC)
+
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(IgnorePaddingSimpleMaxPooling3d,
+                                 ClContextControlFixture,
+                                 IgnorePaddingSimpleMaxPooling3dTest,
+                                 DataLayout::NDHWC)
+
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(SimpleAveragePooling3d,
+                                 ClContextControlFixture,
+                                 SimpleAveragePooling3dTest,
+                                 DataLayout::NDHWC)
+
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(LargeTensorsAveragePooling3d,
+                                 ClContextControlFixture,
+                                 LargeTensorsAveragePooling3dTest,
+                                 DataLayout::NDHWC)
+
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(IgnorePaddingSimpleAveragePooling3d,
+                                 ClContextControlFixture,
+                                 IgnorePaddingSimpleAveragePooling3dTest,
+                                 DataLayout::NDHWC)
+
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(SimpleL2Pooling3d,
+                                 ClContextControlFixture,
+                                 SimpleL2Pooling3dTest,
+                                 DataLayout::NDHWC)
+
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(IgnorePaddingSimpleL2Pooling3d,
+                                 ClContextControlFixture,
+                                 IgnorePaddingSimpleL2Pooling3dTest,
+                                 DataLayout::NDHWC)
+
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(AsymmetricNonSquareMaxPooling3d,
+                                 ClContextControlFixture,
+                                 AsymmetricNonSquareMaxPooling3dTest,
+                                 DataLayout::NDHWC)
+
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(AsymmetricNonSquareAveragePooling3d,
+                                 ClContextControlFixture,
+                                 AsymmetricNonSquareAveragePooling3dTest,
+                                 DataLayout::NDHWC)
+
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(AsymmetricNonSquareL2Pooling3d,
+                                 ClContextControlFixture,
+                                 AsymmetricNonSquareL2Pooling3dTest,
+                                 DataLayout::NDHWC)
+
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(UNSUPPORTED_AsymmetricNonSquareMaxPooling3dWithPaddingOnlyPool,
+                                 ClContextControlFixture,
+                                 AsymmetricNonSquareMaxPooling3dWithPaddingOnlyPoolTest,
+                                 DataLayout::NDHWC)
+
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(UNSUPPORTED_AsymmetricNonSquareAveragePooling3dWithPaddingOnlyPool,
+                                 ClContextControlFixture,
+                                 AsymmetricNonSquareAveragePooling3dWithPaddingOnlyPoolTest,
+                                 DataLayout::NDHWC)
+
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(UNSUPPORTEDAsymmetricNonSquareL2Pooling3dWithPaddingOnlyPool,
+                                 ClContextControlFixture,
+                                 AsymmetricNonSquareL2Pooling3dWithPaddingOnlyPoolTest,
+                                 DataLayout::NDHWC)
+
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(SimpleAveragePooling3d,
+                                 ClContextControlFixture,
+                                 SimpleAveragePooling3dTest,
+                                 DataLayout::NDHWC)
+
+
+
+
 // Add
 ARMNN_AUTO_TEST_FIXTURE_WITH_THF(SimpleAdd, ClContextControlFixture, AdditionTest)
 ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Add5d, ClContextControlFixture, Addition5dTest)
@@ -2010,7 +2089,6 @@
 // The ARMNN_COMPARE_REF_AUTO_TEST_CASE and the ARMNN_COMPARE_REF_FIXTURE_TEST_CASE test units are not available
 // if the reference backend is not built
 
-// ============================================================================
 // COMPARE tests
 
 ARMNN_COMPARE_REF_AUTO_TEST_CASE_WITH_THF(CompareSoftmaxBeta1WithReference, CompareSoftmaxTest, 1.0f)
@@ -2049,6 +2127,15 @@
 
 ARMNN_COMPARE_REF_AUTO_TEST_CASE_WITH_THF(CompareL2Pooling2dWithRef, ComparePooling2dTest, PoolingAlgorithm::L2)
 
+ARMNN_COMPARE_REF_AUTO_TEST_CASE_WITH_THF(CompareMaxPooling3dWithRef, ComparePooling3dTest, PoolingAlgorithm::Max,
+                                          DataLayout::NDHWC)
+
+ARMNN_COMPARE_REF_AUTO_TEST_CASE_WITH_THF(CompareAveragePooling3dWithRef, ComparePooling3dTest,
+                                          PoolingAlgorithm::Average, DataLayout::NDHWC)
+
+ARMNN_COMPARE_REF_AUTO_TEST_CASE_WITH_THF(CompareL2Pooling3dWithRef, ComparePooling3dTest, PoolingAlgorithm::L2,
+                                          DataLayout::NDHWC)
+
 ARMNN_COMPARE_REF_AUTO_TEST_CASE_WITH_THF(CompareAddition, CompareAdditionTest)
 
 ARMNN_COMPARE_REF_AUTO_TEST_CASE_WITH_THF(CompareMultiplicationWithRef, CompareMultiplicationTest)
diff --git a/src/backends/cl/workloads/CMakeLists.txt b/src/backends/cl/workloads/CMakeLists.txt
index 6bb8d68..52326ae 100644
--- a/src/backends/cl/workloads/CMakeLists.txt
+++ b/src/backends/cl/workloads/CMakeLists.txt
@@ -86,6 +86,8 @@
     ClPermuteWorkload.hpp
     ClPooling2dWorkload.cpp
     ClPooling2dWorkload.hpp
+    ClPooling3dWorkload.cpp
+    ClPooling3dWorkload.hpp
     ClPreluWorkload.cpp
     ClPreluWorkload.hpp
     ClQLstmWorkload.cpp
diff --git a/src/backends/cl/workloads/ClPooling3dWorkload.cpp b/src/backends/cl/workloads/ClPooling3dWorkload.cpp
new file mode 100644
index 0000000..a896110
--- /dev/null
+++ b/src/backends/cl/workloads/ClPooling3dWorkload.cpp
@@ -0,0 +1,73 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClPooling3dWorkload.hpp"
+#include <cl/ClLayerSupport.hpp>
+#include <cl/ClTensorHandle.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
+#include <aclCommon/ArmComputeTensorUtils.hpp>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+    using namespace armcomputetensorutils;
+
+    arm_compute::Status ClPooling3dWorkloadValidate(const TensorInfo& input,
+                                                    const TensorInfo& output,
+                                                    const Pooling3dDescriptor& descriptor)
+    {
+        const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
+        const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
+
+        arm_compute::Pooling3dLayerInfo layerInfo = BuildArmComputePooling3dLayerInfo(descriptor);
+
+        return arm_compute::CLPooling3dLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
+    }
+
+    ClPooling3dWorkload::ClPooling3dWorkload( const Pooling3dQueueDescriptor& descriptor,
+                                              const WorkloadInfo& info,
+                                              const arm_compute::CLCompileContext& clCompileContext)
+                                              : ClBaseWorkload<Pooling3dQueueDescriptor>(descriptor, info)
+    {
+        // Report Profiling Details
+        ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClPooling3dWorkload_Construct",
+                                             descriptor.m_Parameters,
+                                             info,
+                                             this->GetGuid());
+
+        m_Data.ValidateInputsOutputs("ClPooling3dWorkload", 1, 1);
+
+        arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+        arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+        arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
+        input.info()->set_data_layout(aclDataLayout);
+        output.info()->set_data_layout(aclDataLayout);
+
+        // flag to use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy
+        // enable fp_mixed_precision for the the FP16 cases that
+        // accumulation reaches a limit beyond which there is no more increment of the value
+        bool fpMixedPrecision = false;
+
+        arm_compute::Pooling3dLayerInfo layerInfo = BuildArmComputePooling3dLayerInfo(m_Data.m_Parameters,
+                                                                                      fpMixedPrecision);
+
+        {
+            ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClPooling3dWorkload_configure");
+            // Run the layer.
+            m_PoolingLayer.configure(clCompileContext, &input, &output, layerInfo);
+        }
+    }
+
+    void ClPooling3dWorkload::Execute() const
+    {
+        ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClPooling3dWorkload_Execute", this->GetGuid());
+        RunClFunction(m_PoolingLayer, CHECK_LOCATION());
+    }
+
+}
+
+
diff --git a/src/backends/cl/workloads/ClPooling3dWorkload.hpp b/src/backends/cl/workloads/ClPooling3dWorkload.hpp
new file mode 100644
index 0000000..75777db
--- /dev/null
+++ b/src/backends/cl/workloads/ClPooling3dWorkload.hpp
@@ -0,0 +1,34 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ClBaseWorkload.hpp"
+
+#include <arm_compute/runtime/CL/functions/CLPooling3dLayer.h>
+
+namespace armnn
+{
+
+    arm_compute::Status ClPooling3dWorkloadValidate(const TensorInfo& input,
+                                                    const TensorInfo& output,
+                                                    const Pooling3dDescriptor& descriptor);
+
+    class ClPooling3dWorkload : public ClBaseWorkload<Pooling3dQueueDescriptor>
+    {
+    public:
+        using BaseWorkload<Pooling3dQueueDescriptor>::m_Data;
+
+        ClPooling3dWorkload(const Pooling3dQueueDescriptor& descriptor,
+                            const WorkloadInfo& info,
+                            const arm_compute::CLCompileContext& clCompileContext);
+
+        void Execute() const override;
+
+    private:
+        mutable arm_compute::CLPooling3dLayer m_PoolingLayer;
+    };
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClWorkloads.hpp b/src/backends/cl/workloads/ClWorkloads.hpp
index 3558da3..27119bb 100644
--- a/src/backends/cl/workloads/ClWorkloads.hpp
+++ b/src/backends/cl/workloads/ClWorkloads.hpp
@@ -43,6 +43,7 @@
 #include "ClPermuteWorkload.hpp"
 #include "ClPadWorkload.hpp"
 #include "ClPooling2dWorkload.hpp"
+#include "ClPooling3dWorkload.hpp"
 #include "ClPreluWorkload.hpp"
 #include "ClQLstmWorkload.hpp"
 #include "ClQuantizeWorkload.hpp"
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index aa8076e..9dca621 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -568,11 +568,12 @@
 
 // [ Pooling 3D
 //MaxPooling
-ARMNN_AUTO_TEST_CASE_WITH_THF(SimpleMaxPooling3dSize2x2x2Stride1x1x1, SimpleMaxPooling3dSize2x2x2Stride1x1x1Test)
+ARMNN_AUTO_TEST_CASE_WITH_THF(SimpleMaxPooling3dSize2x2x2Stride1x1x1, SimpleMaxPooling3dSize2x2x2Stride1x1x1Test,
+                              DataLayout::NCDHW)
 ARMNN_AUTO_TEST_CASE_WITH_THF(SimpleMaxPooling3dSize2x2x2Stride1x1x1Uint8,
-                              SimpleMaxPooling3dSize2x2x2Stride1x1x1Uint8Test)
+                              SimpleMaxPooling3dSize2x2x2Stride1x1x1Uint8Test, DataLayout::NCDHW)
 ARMNN_AUTO_TEST_CASE_WITH_THF(SimpleMaxPooling3dSize2x2x2Stride1x1x1Int16,
-                              SimpleMaxPooling3dSize2x2x2Stride1x1x1Int16Test)
+                              SimpleMaxPooling3dSize2x2x2Stride1x1x1Int16Test, DataLayout::NCDHW)
 
 ARMNN_AUTO_TEST_CASE_WITH_THF(SimpleMaxPooling3d, SimpleMaxPooling3dTest, DataLayout::NDHWC)
 ARMNN_AUTO_TEST_CASE_WITH_THF(SimpleMaxPooling3dNCDHW, SimpleMaxPooling3dTest, DataLayout::NCDHW)
@@ -581,9 +582,12 @@
 ARMNN_AUTO_TEST_CASE_WITH_THF(SimpleMaxPooling3dUint8NCDHW, SimpleMaxPooling3dUint8Test, DataLayout::NCDHW)
 ARMNN_AUTO_TEST_CASE_WITH_THF(SimpleMaxPooling3dInt16NCDHW, SimpleMaxPooling3dInt16Test, DataLayout::NCDHW)
 
-ARMNN_AUTO_TEST_CASE_WITH_THF(IgnorePaddingSimpleMaxPooling3d, IgnorePaddingSimpleMaxPooling3dTest)
-ARMNN_AUTO_TEST_CASE_WITH_THF(IgnorePaddingSimpleMaxPooling3dUint8, IgnorePaddingSimpleMaxPooling3dUint8Test)
-ARMNN_AUTO_TEST_CASE_WITH_THF(IgnorePaddingSimpleMaxPooling3dInt16, IgnorePaddingSimpleMaxPooling3dInt16Test)
+ARMNN_AUTO_TEST_CASE_WITH_THF(IgnorePaddingSimpleMaxPooling3d, IgnorePaddingSimpleMaxPooling3dTest,
+                              DataLayout::NCDHW)
+ARMNN_AUTO_TEST_CASE_WITH_THF(IgnorePaddingSimpleMaxPooling3dUint8, IgnorePaddingSimpleMaxPooling3dUint8Test,
+                              DataLayout::NCDHW)
+ARMNN_AUTO_TEST_CASE_WITH_THF(IgnorePaddingSimpleMaxPooling3dInt16, IgnorePaddingSimpleMaxPooling3dInt16Test,
+                              DataLayout::NCDHW)
 
 //AveragePooling
 ARMNN_AUTO_TEST_CASE_WITH_THF(SimpleAveragePooling3d, SimpleAveragePooling3dTest, DataLayout::NDHWC)
@@ -596,13 +600,18 @@
 // Due to the size of the input each of these test cases takes about 3 minutes
 // to execute in an x86 environment.  We'll disable but not remove them for the
 // moment.
-//ARMNN_AUTO_TEST_CASE_WITH_THF(LargeTensorsAveragePooling3d, LargeTensorsAveragePooling3dTest)
-//ARMNN_AUTO_TEST_CASE_WITH_THF(LargeTensorsAveragePooling3dUint8, LargeTensorsAveragePooling3dUint8Test)
-//ARMNN_AUTO_TEST_CASE_WITH_THF(LargeTensorsAveragePooling3dInt16, LargeTensorsAveragePooling3dInt16Test)
+//ARMNN_AUTO_TEST_CASE_WITH_THF(LargeTensorsAveragePooling3d, LargeTensorsAveragePooling3dTest, DataLayout::NCDHW)
+//ARMNN_AUTO_TEST_CASE_WITH_THF(LargeTensorsAveragePooling3dUint8, LargeTensorsAveragePooling3dUint8Test,
+//                              DataLayout::NCDHW)
+//ARMNN_AUTO_TEST_CASE_WITH_THF(LargeTensorsAveragePooling3dInt16, LargeTensorsAveragePooling3dInt16Test,
+//                              DataLayout::NCDHW)
 
-ARMNN_AUTO_TEST_CASE_WITH_THF(IgnorePaddingSimpleAveragePooling3d, IgnorePaddingSimpleAveragePooling3dTest)
-ARMNN_AUTO_TEST_CASE_WITH_THF(IgnorePaddingSimpleAveragePooling3dUint8, IgnorePaddingSimpleAveragePooling3dUint8Test)
-ARMNN_AUTO_TEST_CASE_WITH_THF(IgnorePaddingSimpleAveragePooling3dInt16, IgnorePaddingSimpleAveragePooling3dInt16Test)
+ARMNN_AUTO_TEST_CASE_WITH_THF(IgnorePaddingSimpleAveragePooling3d, IgnorePaddingSimpleAveragePooling3dTest,
+                              DataLayout::NCDHW)
+ARMNN_AUTO_TEST_CASE_WITH_THF(IgnorePaddingSimpleAveragePooling3dUint8, IgnorePaddingSimpleAveragePooling3dUint8Test,
+                              DataLayout::NCDHW)
+ARMNN_AUTO_TEST_CASE_WITH_THF(IgnorePaddingSimpleAveragePooling3dInt16, IgnorePaddingSimpleAveragePooling3dInt16Test,
+                              DataLayout::NCDHW)
 
 //L2Pooling
 ARMNN_AUTO_TEST_CASE_WITH_THF(SimpleL2Pooling3d, SimpleL2Pooling3dTest, DataLayout::NDHWC)
@@ -612,22 +621,61 @@
 ARMNN_AUTO_TEST_CASE_WITH_THF(SimpleL2Pooling3dUint8NCDHW, SimpleL2Pooling3dUint8Test, DataLayout::NCDHW)
 ARMNN_AUTO_TEST_CASE_WITH_THF(SimpleL2Pooling3dInt16NCDHW, SimpleL2Pooling3dInt16Test, DataLayout::NCDHW)
 
-ARMNN_AUTO_TEST_CASE_WITH_THF(IgnorePaddingSimpleL2Pooling3d, IgnorePaddingSimpleL2Pooling3dTest)
-ARMNN_AUTO_TEST_CASE_WITH_THF(IgnorePaddingSimpleL2Pooling3dUint8, IgnorePaddingSimpleL2Pooling3dUint8Test)
-ARMNN_AUTO_TEST_CASE_WITH_THF(IgnorePaddingSimpleL2Pooling3dInt16, IgnorePaddingSimpleL2Pooling3dInt16Test)
+ARMNN_AUTO_TEST_CASE_WITH_THF(IgnorePaddingSimpleL2Pooling3d, IgnorePaddingSimpleL2Pooling3dTest, DataLayout::NCDHW)
+ARMNN_AUTO_TEST_CASE_WITH_THF(IgnorePaddingSimpleL2Pooling3dUint8, IgnorePaddingSimpleL2Pooling3dUint8Test,
+                              DataLayout::NCDHW)
+ARMNN_AUTO_TEST_CASE_WITH_THF(IgnorePaddingSimpleL2Pooling3dInt16, IgnorePaddingSimpleL2Pooling3dInt16Test,
+                              DataLayout::NCDHW)
 
 //NonSquarePooling
-ARMNN_AUTO_TEST_CASE_WITH_THF(AsymmNonSquareMaxPooling3d, AsymmetricNonSquareMaxPooling3dTest)
-ARMNN_AUTO_TEST_CASE_WITH_THF(AsymmNonSquareMaxPooling3dUint8, AsymmetricNonSquareMaxPooling3dUint8Test)
-ARMNN_AUTO_TEST_CASE_WITH_THF(AsymmNonSquareMaxPooling3dInt16, AsymmetricNonSquareMaxPooling3dInt16Test)
+ARMNN_AUTO_TEST_CASE_WITH_THF(AsymmNonSquareMaxPooling3d, AsymmetricNonSquareMaxPooling3dTest, DataLayout::NCDHW)
+ARMNN_AUTO_TEST_CASE_WITH_THF(AsymmNonSquareMaxPooling3dUint8, AsymmetricNonSquareMaxPooling3dUint8Test,
+                              DataLayout::NCDHW)
+ARMNN_AUTO_TEST_CASE_WITH_THF(AsymmNonSquareMaxPooling3dInt16, AsymmetricNonSquareMaxPooling3dInt16Test,
+                              DataLayout::NCDHW)
 
-ARMNN_AUTO_TEST_CASE_WITH_THF(AsymmNonSquareAveragePooling3d, AsymmetricNonSquareAveragePooling3dTest)
-ARMNN_AUTO_TEST_CASE_WITH_THF(AsymmNonSquareAveragePooling3dUint8, AsymmetricNonSquareAveragePooling3dUint8Test)
-ARMNN_AUTO_TEST_CASE_WITH_THF(AsymmNonSquareAveragePooling3dInt16, AsymmetricNonSquareAveragePooling3dInt16Test)
+ARMNN_AUTO_TEST_CASE_WITH_THF(AsymmNonSquareAveragePooling3d, AsymmetricNonSquareAveragePooling3dTest,
+                              DataLayout::NCDHW)
+ARMNN_AUTO_TEST_CASE_WITH_THF(AsymmNonSquareAveragePooling3dUint8, AsymmetricNonSquareAveragePooling3dUint8Test,
+                              DataLayout::NCDHW)
+ARMNN_AUTO_TEST_CASE_WITH_THF(AsymmNonSquareAveragePooling3dInt16, AsymmetricNonSquareAveragePooling3dInt16Test,
+                              DataLayout::NCDHW)
 
-ARMNN_AUTO_TEST_CASE_WITH_THF(AsymmNonSquareL2Pooling3d, AsymmetricNonSquareL2Pooling3dTest)
-ARMNN_AUTO_TEST_CASE_WITH_THF(AsymmNonSquareL2Pooling3dUint8, AsymmetricNonSquareL2Pooling3dUint8Test)
-ARMNN_AUTO_TEST_CASE_WITH_THF(AsymmNonSquareL2Pooling3dInt16, AsymmetricNonSquareL2Pooling3dInt16Test)
+ARMNN_AUTO_TEST_CASE_WITH_THF(AsymmNonSquareL2Pooling3d, AsymmetricNonSquareL2Pooling3dTest, DataLayout::NCDHW)
+ARMNN_AUTO_TEST_CASE_WITH_THF(AsymmNonSquareL2Pooling3dUint8, AsymmetricNonSquareL2Pooling3dUint8Test,
+                              DataLayout::NCDHW)
+ARMNN_AUTO_TEST_CASE_WITH_THF(AsymmNonSquareL2Pooling3dInt16, AsymmetricNonSquareL2Pooling3dInt16Test,
+                              DataLayout::NCDHW)
+
+ARMNN_AUTO_TEST_CASE_WITH_THF(AsymmetricNonSquareMaxPooling3dWithPaddingOnlyPool,
+                              AsymmetricNonSquareMaxPooling3dWithPaddingOnlyPoolTest,
+                              DataLayout::NDHWC)
+ARMNN_AUTO_TEST_CASE_WITH_THF(AsymmetricNonSquareMaxPooling3dWithPaddingOnlyPoolUint8,
+                              AsymmetricNonSquareMaxPooling3dWithPaddingOnlyPoolUint8Test,
+                              DataLayout::NDHWC)
+ARMNN_AUTO_TEST_CASE_WITH_THF(AsymmetricNonSquareMaxPooling3dWithPaddingOnlyPoolInt16,
+                              AsymmetricNonSquareMaxPooling3dWithPaddingOnlyPoolInt16Test,
+                              DataLayout::NDHWC)
+
+ARMNN_AUTO_TEST_CASE_WITH_THF(AsymmetricNonSquareAveragePooling3dWithPaddingOnlyPool,
+                              AsymmetricNonSquareAveragePooling3dWithPaddingOnlyPoolTest,
+                              DataLayout::NDHWC)
+ARMNN_AUTO_TEST_CASE_WITH_THF(AsymmetricNonSquareAveragePooling3dWithPaddingOnlyPoolUint8,
+                              AsymmetricNonSquareAveragePooling3dWithPaddingOnlyPoolUint8Test,
+                              DataLayout::NDHWC)
+ARMNN_AUTO_TEST_CASE_WITH_THF(AsymmetricNonSquareAveragePooling3dWithPaddingOnlyPoolInt16,
+                              AsymmetricNonSquareAveragePooling3dWithPaddingOnlyPoolInt16Test,
+                              DataLayout::NDHWC)
+
+ARMNN_AUTO_TEST_CASE_WITH_THF(AsymmetricNonSquareL2Pooling3dWithPaddingOnlyPool,
+                              AsymmetricNonSquareL2Pooling3dWithPaddingOnlyPoolTest,
+                              DataLayout::NDHWC)
+ARMNN_AUTO_TEST_CASE_WITH_THF(AsymmetricNonSquareL2Pooling3dWithPaddingOnlyPoolUint8,
+                              AsymmetricNonSquareL2Pooling3dWithPaddingOnlyPoolUint8Test,
+                              DataLayout::NDHWC)
+ARMNN_AUTO_TEST_CASE_WITH_THF(AsymmetricNonSquareL2Pooling3dWithPaddingOnlyPoolInt16,
+                              AsymmetricNonSquareL2Pooling3dWithPaddingOnlyPoolInt16Test,
+                              DataLayout::NDHWC)
 // Pooling 3D ]