IVGCVSW-1885 add RefPadWorkload implementation and associated unit tests

 * Added RefPadWorkload implementation
 * Added unit tests and applied them to CL and Ref backends
 * Fixed a bug in ClPadWorkload

Change-Id: I8cb76bc9d60ae8a39b08d40f05d628e3b72f6410
diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp
index 58aad99..0f8b75f 100755
--- a/src/backends/cl/test/ClLayerTests.cpp
+++ b/src/backends/cl/test/ClLayerTests.cpp
@@ -246,6 +246,11 @@
 ARMNN_AUTO_TEST_CASE(SimpleReshapeFloat32, SimpleReshapeFloat32Test)
 ARMNN_AUTO_TEST_CASE(SimpleReshapeUint8, SimpleReshapeUint8Test)
 
+// Pad
+ARMNN_AUTO_TEST_CASE(Pad2d, Pad2dTest)
+ARMNN_AUTO_TEST_CASE(Pad3d, Pad3dTest)
+ARMNN_AUTO_TEST_CASE(Pad4d, Pad4dTest)
+
 // Permute
 ARMNN_AUTO_TEST_CASE(SimplePermuteFloat32, SimplePermuteFloat32Test)
 ARMNN_AUTO_TEST_CASE(SimplePermuteUint8, SimplePermuteUint8Test)
diff --git a/src/backends/cl/workloads/ClPadWorkload.cpp b/src/backends/cl/workloads/ClPadWorkload.cpp
index e75af83..3e63d5c 100644
--- a/src/backends/cl/workloads/ClPadWorkload.cpp
+++ b/src/backends/cl/workloads/ClPadWorkload.cpp
@@ -22,7 +22,14 @@
 
     arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor();
     arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor();
-    arm_compute::PaddingList padList = static_cast<arm_compute::PaddingList>(descriptor.m_Parameters.m_PadList);
+
+    std::vector<std::pair<unsigned int, unsigned int>> reversed_PadList(descriptor.m_Parameters.m_PadList.size());
+
+    std::reverse_copy(std::begin(descriptor.m_Parameters.m_PadList),
+            std::end(descriptor.m_Parameters.m_PadList),
+            std::begin(reversed_PadList));
+
+    arm_compute::PaddingList padList = static_cast<arm_compute::PaddingList>(reversed_PadList);
 
     m_Layer.configure(&input, &output, padList);
 }
diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp
index d7d2e27..4d157d4 100644
--- a/src/backends/reference/RefWorkloadFactory.cpp
+++ b/src/backends/reference/RefWorkloadFactory.cpp
@@ -249,7 +249,7 @@
 std::unique_ptr<IWorkload> RefWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor,
                                                  const WorkloadInfo& info) const
 {
-    return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info);
+    return MakeWorkload<RefPadWorkload, NullWorkload>(descriptor, info);
 }
 
 
diff --git a/src/backends/reference/backend.mk b/src/backends/reference/backend.mk
index 365faa6..4403ea2 100644
--- a/src/backends/reference/backend.mk
+++ b/src/backends/reference/backend.mk
@@ -17,6 +17,7 @@
         workloads/ConvImpl.cpp \
         workloads/FullyConnected.cpp \
         workloads/Mean.cpp \
+        workloads/Pad.cpp \
         workloads/Pooling2d.cpp \
         workloads/RefActivationFloat32Workload.cpp \
         workloads/RefActivationUint8Workload.cpp \
@@ -43,6 +44,7 @@
         workloads/RefMergerFloat32Workload.cpp \
         workloads/RefMergerUint8Workload.cpp \
         workloads/RefNormalizationFloat32Workload.cpp \
+        workloads/RefPadWorkload.cpp \
         workloads/RefPermuteWorkload.cpp \
         workloads/RefPooling2dFloat32Workload.cpp \
         workloads/RefPooling2dUint8Workload.cpp \
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index 236dedd..797051e 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -206,6 +206,11 @@
 ARMNN_AUTO_TEST_CASE(L2Normalization3d, L2Normalization3dTest)
 ARMNN_AUTO_TEST_CASE(L2Normalization4d, L2Normalization4dTest)
 
+// Pad
+ARMNN_AUTO_TEST_CASE(Pad2d, Pad2dTest)
+ARMNN_AUTO_TEST_CASE(Pad3d, Pad3dTest)
+ARMNN_AUTO_TEST_CASE(Pad4d, Pad4dTest)
+
 // NOTE: These tests are disabled until NHWC is supported by the reference L2Normalization implementation.
 //ARMNN_AUTO_TEST_CASE(L2Normalization1dNhwc, L2Normalization1dNhwcTest);
 //ARMNN_AUTO_TEST_CASE(L2Normalization2dNhwc, L2Normalization2dNhwcTest);
diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
index be71a85..bf65639 100644
--- a/src/backends/reference/workloads/CMakeLists.txt
+++ b/src/backends/reference/workloads/CMakeLists.txt
@@ -16,6 +16,8 @@
     FullyConnected.cpp
     FullyConnected.hpp
     Merger.hpp
+    Pad.cpp
+    Pad.hpp
     Pooling2d.cpp
     Pooling2d.hpp
     RefActivationFloat32Workload.cpp
@@ -64,6 +66,8 @@
     RefMergerUint8Workload.hpp
     RefNormalizationFloat32Workload.cpp
     RefNormalizationFloat32Workload.hpp
+    RefPadWorkload.cpp
+    RefPadWorkload.hpp
     RefPermuteWorkload.cpp
     RefPermuteWorkload.hpp
     RefPooling2dFloat32Workload.cpp
diff --git a/src/backends/reference/workloads/Pad.cpp b/src/backends/reference/workloads/Pad.cpp
new file mode 100644
index 0000000..5c85931
--- /dev/null
+++ b/src/backends/reference/workloads/Pad.cpp
@@ -0,0 +1,158 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "Pad.hpp"
+#include "backends/WorkloadData.hpp"
+
+#include <boost/numeric/conversion/cast.hpp>
+#include "TensorBufferArrayView.hpp"
+
+#include <cmath>
+#include <cstddef>
+#include <functional>
+#include <limits>
+#include <cassert>
+
+
+namespace armnn
+{
+void Pad(const TensorInfo& inputInfo,
+         const TensorInfo& outputInfo,
+         std::vector<std::pair<unsigned int, unsigned int>> m_PadList,
+         const float* inputData,
+         float* outData)
+{
+    unsigned int numOutputElements = outputInfo.GetNumElements();
+
+    TensorShape outputShape = outputInfo.GetShape();
+    TensorShape inputShape = inputInfo.GetShape();
+
+    unsigned int numInputDimensions = inputShape.GetNumDimensions();
+    #ifndef NDEBUG
+    unsigned int numOutputDimensions = outputShape.GetNumDimensions();
+
+    assert(numInputDimensions == numOutputDimensions);
+    #endif
+
+    unsigned int inputBatches = 0;
+    unsigned int inputChannels = 0;
+    unsigned int inputHeight = 0;
+    unsigned int inputWidth = 0;
+
+    unsigned int outputChannels = 0;
+    unsigned int outputHeight = 0;
+    unsigned int outputWidth = 0;
+
+    for (unsigned int i = 0; i < numOutputElements; ++i)
+    {
+       outData[i] = 0;
+    }
+
+    switch(numInputDimensions) {
+        case 1:
+
+            inputWidth = inputShape[0];
+
+            for (unsigned int w = 0; w < inputWidth ; w++)
+            {
+
+                outData[w+std::get<0>(m_PadList[0])] = inputData[w];
+
+            }
+
+            break;
+        case 2  :
+
+            inputHeight = inputShape[0];
+            inputWidth = inputShape[1];
+
+            outputHeight = outputShape[0];
+            outputWidth = outputShape[1];
+
+            for (unsigned int h = 0; h < inputHeight; h++)
+            {
+
+                for (unsigned int w = 0; w < inputWidth ; w++)
+                {
+                    outData[(h+std::get<0>(m_PadList[0]))*outputWidth
+                    + (w+std::get<0>(m_PadList[1]))] = inputData[h * inputWidth + w];
+                }
+            }
+
+            break;
+        case 3  :
+
+            inputChannels = inputShape[0];
+            inputHeight = inputShape[1];
+            inputWidth = inputShape[2];
+
+            outputChannels = outputShape[0];
+            outputHeight = outputShape[1];
+            outputWidth = outputShape[2];
+
+            for (unsigned int c = 0; c < inputChannels; c++)
+            {
+
+                for (unsigned int h = 0; h < inputHeight; h++)
+                {
+
+                    for (unsigned int w = 0; w < inputWidth ; w++)
+                    {
+
+                        outData[(c+std::get<0>(m_PadList[0]))*outputHeight*outputWidth
+                        + (h+std::get<0>(m_PadList[1]))*outputWidth
+                        + (w+std::get<0>(m_PadList[2]))] = inputData[c * inputHeight * inputWidth
+                                                                      + h * inputWidth
+                                                                      + w];
+                    }
+                }
+            }
+
+            break;
+        case 4  :
+
+            inputBatches = inputShape[0];
+            inputChannels = inputShape[1];
+            inputHeight = inputShape[2];
+            inputWidth = inputShape[3];
+
+            outputChannels = outputShape[1];
+            outputHeight = outputShape[2];
+            outputWidth = outputShape[3];
+
+            for (unsigned int b = 0; b < inputBatches; b++)
+            {
+                for (unsigned int c = 0; c < inputChannels; c++)
+                {
+
+                    for (unsigned int h = 0; h < inputHeight; h++)
+                    {
+
+                        for (unsigned int w = 0; w < inputWidth ; w++)
+                        {
+
+                            outData[(b+std::get<0>(m_PadList[0])) * outputChannels * outputHeight * outputWidth
+                                   + (c+std::get<0>(m_PadList[1])) * outputHeight * outputWidth
+                                   + (h+std::get<0>(m_PadList[2])) * outputWidth
+                                   + (w+std::get<0>(m_PadList[3]))] = inputData[b * inputChannels * inputHeight
+                                                                                * inputWidth
+                                                                             + c * inputHeight * inputWidth
+                                                                             + h * inputWidth
+                                                                             + w];
+
+                        }
+                    }
+                }
+            }
+
+            break;
+
+        default :
+            break;
+    }
+
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/Pad.hpp b/src/backends/reference/workloads/Pad.hpp
new file mode 100644
index 0000000..ed80ef8
--- /dev/null
+++ b/src/backends/reference/workloads/Pad.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "armnn/DescriptorsFwd.hpp"
+#include "armnn/Tensor.hpp"
+
+#include <vector>
+
+namespace armnn
+{
+void Pad(const TensorInfo& inputInfo,
+        const TensorInfo& outputInfo,
+        std::vector<std::pair<unsigned int, unsigned int>> m_PadList,
+        const float* inputData,
+        float* outData);
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefPadWorkload.cpp b/src/backends/reference/workloads/RefPadWorkload.cpp
new file mode 100644
index 0000000..233fbe4
--- /dev/null
+++ b/src/backends/reference/workloads/RefPadWorkload.cpp
@@ -0,0 +1,37 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefPadWorkload.hpp"
+
+#include "Pad.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+#include <vector>
+
+namespace armnn
+{
+
+RefPadWorkload::RefPadWorkload(const PadQueueDescriptor& descriptor, const WorkloadInfo& info)
+  :BaseWorkload<PadQueueDescriptor>(descriptor, info) {}
+
+
+void RefPadWorkload::Execute() const
+{
+
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefPadWorkload_Execute");
+
+    const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+    const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+
+    const float* inputData = GetInputTensorDataFloat(0, m_Data);
+    float* outputData = GetOutputTensorDataFloat(0, m_Data);
+
+
+    Pad(inputInfo, outputInfo, m_Data.m_Parameters.m_PadList, inputData, outputData);
+}
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/backends/reference/workloads/RefPadWorkload.hpp b/src/backends/reference/workloads/RefPadWorkload.hpp
new file mode 100644
index 0000000..7ff117d
--- /dev/null
+++ b/src/backends/reference/workloads/RefPadWorkload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefPadWorkload : public BaseWorkload<PadQueueDescriptor>
+{
+public:
+    explicit RefPadWorkload (const PadQueueDescriptor& descriptor, const WorkloadInfo& info);
+    virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp
index 7e89cab..14e6699 100644
--- a/src/backends/reference/workloads/RefWorkloads.hpp
+++ b/src/backends/reference/workloads/RefWorkloads.hpp
@@ -52,4 +52,5 @@
 #include "RefConvertFp16ToFp32Workload.hpp"
 #include "RefConvertFp32ToFp16Workload.hpp"
 #include "RefMeanUint8Workload.hpp"
-#include "RefMeanFloat32Workload.hpp"
\ No newline at end of file
+#include "RefMeanFloat32Workload.hpp"
+#include "RefPadWorkload.hpp"
\ No newline at end of file
diff --git a/src/backends/test/LayerTests.cpp b/src/backends/test/LayerTests.cpp
index d955e42..c28a1d4 100755
--- a/src/backends/test/LayerTests.cpp
+++ b/src/backends/test/LayerTests.cpp
@@ -3443,6 +3443,408 @@
 
 } // anonymous namespace
 
+LayerTestResult<float, 2> Pad2dTest(armnn::IWorkloadFactory& workloadFactory)
+{
+    const armnn::TensorShape inputShape{ 3, 3 };
+    const armnn::TensorShape outputShape{ 7, 7 };
+
+    const armnn::TensorInfo inputTensorInfo(inputShape, armnn::DataType::Float32);
+    const armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
+
+
+    std::vector<float> inputValues
+    {
+
+        // Height (3) x Width (3)
+        4.0f, 8.0f, 6.0f,
+        7.0f, 4.0f, 4.0f,
+        3.0f, 2.0f, 4.0f
+
+    };
+
+    std::vector<float> expectedOutputValues
+    {
+
+        0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 4.0f, 8.0f, 6.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 7.0f, 4.0f, 4.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 3.0f, 2.0f, 4.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
+
+    };
+
+    auto inputTensor = MakeTensor<float, 2>(inputTensorInfo, std::vector<float>(inputValues));
+
+    LayerTestResult<float, 2> result(outputTensorInfo);
+    result.outputExpected = MakeTensor<float, 2>(outputTensorInfo, std::vector<float>(expectedOutputValues));
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::PadQueueDescriptor descriptor;
+
+    std::vector<std::pair<unsigned int, unsigned int>> PadList;
+    PadList.push_back(std::pair<unsigned int, unsigned int>(2,2));
+    PadList.push_back(std::pair<unsigned int, unsigned int>(2,2));
+
+    descriptor.m_Parameters.m_PadList = PadList;
+    armnn::WorkloadInfo info;
+
+    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePad(descriptor, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]);
+
+    workloadFactory.Finalize();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&result.output[0][0], outputHandle.get());
+
+    return result;
+};
+
+LayerTestResult<float, 3> Pad3dTest(armnn::IWorkloadFactory& workloadFactory)
+{
+    const armnn::TensorShape inputShape{ 2, 2, 2 };
+    const armnn::TensorShape outputShape{ 3, 5, 6 };
+
+    const armnn::TensorInfo inputTensorInfo(inputShape, armnn::DataType::Float32);
+    const armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
+
+
+    std::vector<float> inputValues
+    {
+
+        // Channel 0, Height (2) x Width (2)
+        0.0f, 4.0f,
+        2.0f, 5.0f,
+
+        // Channel 1, Height (2) x Width (2)
+        6.0f, 1.0f,
+        5.0f, 2.0f
+    };
+
+    std::vector<float> expectedOutputValues
+    {
+
+        0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 4.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 2.0f, 5.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+
+
+        0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 6.0f, 1.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 5.0f, 2.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+
+
+        0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
+
+    };
+
+    auto inputTensor = MakeTensor<float, 3>(inputTensorInfo, std::vector<float>(inputValues));
+
+    LayerTestResult<float, 3> result(outputTensorInfo);
+    result.outputExpected = MakeTensor<float, 3>(outputTensorInfo, std::vector<float>(expectedOutputValues));
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::PadQueueDescriptor descriptor;
+
+    std::vector<std::pair<unsigned int, unsigned int>> PadList;
+    PadList.push_back(std::pair<unsigned int, unsigned int>(0,1));
+    PadList.push_back(std::pair<unsigned int, unsigned int>(2,1));
+    PadList.push_back(std::pair<unsigned int, unsigned int>(2,2));
+
+    descriptor.m_Parameters.m_PadList = PadList;
+    armnn::WorkloadInfo info;
+
+    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePad(descriptor, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0][0]);
+
+    workloadFactory.Finalize();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&result.output[0][0][0], outputHandle.get());
+
+    return result;
+};
+
+LayerTestResult<float, 4> Pad4dTest(armnn::IWorkloadFactory& workloadFactory)
+{
+    const armnn::TensorShape inputShape{ 2, 2, 3, 2 };
+    const armnn::TensorShape outputShape{ 4, 5, 7, 4 };
+
+    const armnn::TensorInfo inputTensorInfo(inputShape, armnn::DataType::Float32);
+    const armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
+
+    std::vector<float> inputValues
+    {
+        // Batch 0, Channel 0, Height (3) x Width (2)
+        0.0f, 1.0f,
+        2.0f, 3.0f,
+        4.0f, 5.0f,
+
+        // Batch 0, Channel 1, Height (3) x Width (2)
+        6.0f, 7.0f,
+        8.0f, 9.0f,
+        10.0f, 11.0f,
+
+        // Batch 1, Channel 0, Height (3) x Width (2)
+        12.0f, 13.0f,
+        14.0f, 15.0f,
+        16.0f, 17.0f,
+
+        // Batch 1, Channel 1, Height (3) x Width (2)
+        18.0f, 19.0f,
+        20.0f, 21.0f,
+        22.0f, 23.0f
+
+    };
+
+    std::vector<float> expectedOutputValues
+    {
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+
+
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+
+
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+
+
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+
+
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+
+
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+
+
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+
+
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 1.0f, 0.0f,
+        0.0f, 2.0f, 3.0f, 0.0f,
+        0.0f, 4.0f, 5.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+
+
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 6.0f, 7.0f, 0.0f,
+        0.0f, 8.0f, 9.0f, 0.0f,
+        0.0f, 10.0f, 11.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+
+
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+
+
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+
+
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+
+
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 12.0f, 13.0f, 0.0f,
+        0.0f, 14.0f, 15.0f, 0.0f,
+        0.0f, 16.0f, 17.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+
+
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 18.0f, 19.0f, 0.0f,
+        0.0f, 20.0f, 21.0f, 0.0f,
+        0.0f, 22.0f, 23.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+
+
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+
+
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+
+
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+
+
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+
+
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+
+
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f
+
+    };
+
+    auto inputTensor = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>(inputValues));
+
+    LayerTestResult<float, 4> result(outputTensorInfo);
+    result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>(expectedOutputValues));
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::PadQueueDescriptor descriptor;
+
+    std::vector<std::pair<unsigned int, unsigned int>> PadList;
+    PadList.push_back(std::pair<unsigned int, unsigned int>(1,1));
+    PadList.push_back(std::pair<unsigned int, unsigned int>(2,1));
+    PadList.push_back(std::pair<unsigned int, unsigned int>(3,1));
+    PadList.push_back(std::pair<unsigned int, unsigned int>(1,1));
+
+    descriptor.m_Parameters.m_PadList = PadList;
+    armnn::WorkloadInfo info;
+
+    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePad(descriptor, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0][0][0]);
+
+    workloadFactory.Finalize();
+
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
+
+    return result;
+};
+
 LayerTestResult<float, 4> L2Normalization1dTest(armnn::IWorkloadFactory& workloadFactory)
 {
     // Width: 1
diff --git a/src/backends/test/LayerTests.hpp b/src/backends/test/LayerTests.hpp
index 6687439..d9d4fb9 100644
--- a/src/backends/test/LayerTests.hpp
+++ b/src/backends/test/LayerTests.hpp
@@ -350,6 +350,11 @@
 LayerTestResult<float, 4> SimplePermuteFloat32Test(armnn::IWorkloadFactory& workloadFactory);
 LayerTestResult<uint8_t, 4> SimplePermuteUint8Test(armnn::IWorkloadFactory& workloadFactory);
 
+LayerTestResult<float, 2> Pad2dTest(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<float, 3> Pad3dTest(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<float, 4> Pad4dTest(armnn::IWorkloadFactory& workloadFactory);
+
+
 LayerTestResult<float, 4> PermuteFloat32ValueSet1Test(armnn::IWorkloadFactory& workloadFactory);
 LayerTestResult<float, 4> PermuteFloat32ValueSet2Test(armnn::IWorkloadFactory& workloadFactory);
 LayerTestResult<float, 4> PermuteFloat32ValueSet3Test(armnn::IWorkloadFactory& workloadFactory);