diff --git a/src/backends/reference/workloads/Pad.cpp b/src/backends/reference/workloads/Pad.cpp
index 5c85931..a50fa23 100644
--- a/src/backends/reference/workloads/Pad.cpp
+++ b/src/backends/reference/workloads/Pad.cpp
@@ -5,24 +5,22 @@
 
 #include "Pad.hpp"
 #include "backends/WorkloadData.hpp"
-
 #include <boost/numeric/conversion/cast.hpp>
 #include "TensorBufferArrayView.hpp"
-
 #include <cmath>
 #include <cstddef>
 #include <functional>
 #include <limits>
 #include <cassert>
 
-
 namespace armnn
 {
+template <typename T>
 void Pad(const TensorInfo& inputInfo,
          const TensorInfo& outputInfo,
          std::vector<std::pair<unsigned int, unsigned int>> m_PadList,
-         const float* inputData,
-         float* outData)
+         const T* inputData,
+         T* outData)
 {
     unsigned int numOutputElements = outputInfo.GetNumElements();
 
@@ -30,10 +28,12 @@
     TensorShape inputShape = inputInfo.GetShape();
 
     unsigned int numInputDimensions = inputShape.GetNumDimensions();
-    #ifndef NDEBUG
-    unsigned int numOutputDimensions = outputShape.GetNumDimensions();
 
+    #ifndef NDEBUG
+
+    unsigned int numOutputDimensions = outputShape.GetNumDimensions();
     assert(numInputDimensions == numOutputDimensions);
+
     #endif
 
     unsigned int inputBatches = 0;
@@ -51,29 +51,27 @@
     }
 
     switch(numInputDimensions) {
+
         case 1:
 
             inputWidth = inputShape[0];
 
             for (unsigned int w = 0; w < inputWidth ; w++)
             {
-
                 outData[w+std::get<0>(m_PadList[0])] = inputData[w];
-
             }
 
             break;
+
         case 2  :
 
             inputHeight = inputShape[0];
             inputWidth = inputShape[1];
-
             outputHeight = outputShape[0];
             outputWidth = outputShape[1];
 
             for (unsigned int h = 0; h < inputHeight; h++)
             {
-
                 for (unsigned int w = 0; w < inputWidth ; w++)
                 {
                     outData[(h+std::get<0>(m_PadList[0]))*outputWidth
@@ -82,25 +80,22 @@
             }
 
             break;
+
         case 3  :
 
             inputChannels = inputShape[0];
             inputHeight = inputShape[1];
             inputWidth = inputShape[2];
-
             outputChannels = outputShape[0];
             outputHeight = outputShape[1];
             outputWidth = outputShape[2];
 
             for (unsigned int c = 0; c < inputChannels; c++)
             {
-
                 for (unsigned int h = 0; h < inputHeight; h++)
                 {
-
                     for (unsigned int w = 0; w < inputWidth ; w++)
                     {
-
                         outData[(c+std::get<0>(m_PadList[0]))*outputHeight*outputWidth
                         + (h+std::get<0>(m_PadList[1]))*outputWidth
                         + (w+std::get<0>(m_PadList[2]))] = inputData[c * inputHeight * inputWidth
@@ -111,13 +106,13 @@
             }
 
             break;
+
         case 4  :
 
             inputBatches = inputShape[0];
             inputChannels = inputShape[1];
             inputHeight = inputShape[2];
             inputWidth = inputShape[3];
-
             outputChannels = outputShape[1];
             outputHeight = outputShape[2];
             outputWidth = outputShape[3];
@@ -126,13 +121,10 @@
             {
                 for (unsigned int c = 0; c < inputChannels; c++)
                 {
-
                     for (unsigned int h = 0; h < inputHeight; h++)
                     {
-
                         for (unsigned int w = 0; w < inputWidth ; w++)
                         {
-
                             outData[(b+std::get<0>(m_PadList[0])) * outputChannels * outputHeight * outputWidth
                                    + (c+std::get<0>(m_PadList[1])) * outputHeight * outputWidth
                                    + (h+std::get<0>(m_PadList[2])) * outputWidth
@@ -141,7 +133,6 @@
                                                                              + c * inputHeight * inputWidth
                                                                              + h * inputWidth
                                                                              + w];
-
                         }
                     }
                 }
@@ -150,9 +141,20 @@
             break;
 
         default :
+
             break;
     }
-
 }
 
-} //namespace armnn
+template void Pad<float>(const TensorInfo& inputInfo,
+                         const TensorInfo& outputInfo,
+                         std::vector<std::pair<unsigned int, unsigned int>> m_PadList,
+                         const float* inputData,
+                         float* outData);
+template void Pad<uint8_t>(const TensorInfo& inputInfo,
+                           const TensorInfo& outputInfo,
+                           std::vector<std::pair<unsigned int, unsigned int>> m_PadList,
+                           const uint8_t* inputData,
+                           uint8_t* outData);
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/backends/reference/workloads/Pad.hpp b/src/backends/reference/workloads/Pad.hpp
index ed80ef8..42318d6 100644
--- a/src/backends/reference/workloads/Pad.hpp
+++ b/src/backends/reference/workloads/Pad.hpp
@@ -12,9 +12,10 @@
 
 namespace armnn
 {
+template <typename T>
 void Pad(const TensorInfo& inputInfo,
-        const TensorInfo& outputInfo,
-        std::vector<std::pair<unsigned int, unsigned int>> m_PadList,
-        const float* inputData,
-        float* outData);
+         const TensorInfo& outputInfo,
+         std::vector<std::pair<unsigned int, unsigned int>> m_PadList,
+         const T* inputData,
+         T* outData);
 } //namespace armnn
diff --git a/src/backends/reference/workloads/RefPadWorkload.cpp b/src/backends/reference/workloads/RefPadWorkload.cpp
index 233fbe4..b41c2de 100644
--- a/src/backends/reference/workloads/RefPadWorkload.cpp
+++ b/src/backends/reference/workloads/RefPadWorkload.cpp
@@ -10,28 +10,31 @@
 
 #include "Profiling.hpp"
 
+#include "TypeUtils.hpp"
+
 #include <vector>
 
 namespace armnn
 {
 
-RefPadWorkload::RefPadWorkload(const PadQueueDescriptor& descriptor, const WorkloadInfo& info)
-  :BaseWorkload<PadQueueDescriptor>(descriptor, info) {}
-
-
-void RefPadWorkload::Execute() const
+template <armnn::DataType DataType>
+void RefPadWorkload<DataType>::Execute() const
 {
+    using T = ResolveType<DataType>;
 
     ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefPadWorkload_Execute");
 
     const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
     const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
 
-    const float* inputData = GetInputTensorDataFloat(0, m_Data);
-    float* outputData = GetOutputTensorDataFloat(0, m_Data);
+    const T* inputData = GetInputTensorData<T>(0, m_Data);
+    T* outputData = GetOutputTensorData<T>(0, m_Data);
 
 
     Pad(inputInfo, outputInfo, m_Data.m_Parameters.m_PadList, inputData, outputData);
 }
 
+template class RefPadWorkload<DataType::Float32>;
+template class RefPadWorkload<DataType::QuantisedAsymm8>;
+
 } //namespace armnn
\ No newline at end of file
diff --git a/src/backends/reference/workloads/RefPadWorkload.hpp b/src/backends/reference/workloads/RefPadWorkload.hpp
index 7ff117d..938fcf2 100644
--- a/src/backends/reference/workloads/RefPadWorkload.hpp
+++ b/src/backends/reference/workloads/RefPadWorkload.hpp
@@ -5,17 +5,32 @@
 
 #pragma once
 
-#include "backends/Workload.hpp"
-#include "backends/WorkloadData.hpp"
+#include <backends/Workload.hpp>
+#include <backends/WorkloadData.hpp>
+
+#include <armnn/TypesUtils.hpp>
 
 namespace armnn
 {
 
-class RefPadWorkload : public BaseWorkload<PadQueueDescriptor>
+template <armnn::DataType DataType>
+class RefPadWorkload : public TypedWorkload<PadQueueDescriptor, DataType>
 {
 public:
-    explicit RefPadWorkload (const PadQueueDescriptor& descriptor, const WorkloadInfo& info);
-    virtual void Execute() const override;
+
+    static const std::string& GetName()
+    {
+        static const std::string name = std::string("RefPad") + GetDataTypeName(DataType) + "Workload";
+        return name;
+    }
+
+    using TypedWorkload<PadQueueDescriptor, DataType>::m_Data;
+    using TypedWorkload<PadQueueDescriptor, DataType>::TypedWorkload;
+
+    void Execute() const override;
 };
 
+using RefPadFloat32Workload = RefPadWorkload<DataType::Float32>;
+using RefPadUint8Workload   = RefPadWorkload<DataType::QuantisedAsymm8>;
+
 } //namespace armnn
diff --git a/src/backends/reference/workloads/RefPermuteWorkload.hpp b/src/backends/reference/workloads/RefPermuteWorkload.hpp
index 841a080..50caa3e 100644
--- a/src/backends/reference/workloads/RefPermuteWorkload.hpp
+++ b/src/backends/reference/workloads/RefPermuteWorkload.hpp
@@ -31,4 +31,4 @@
 using RefPermuteFloat32Workload = RefPermuteWorkload<DataType::Float32>;
 using RefPermuteUint8Workload   = RefPermuteWorkload<DataType::QuantisedAsymm8>;
 
-} //namespace armnn
+} //namespace armnn
\ No newline at end of file
