Release 18.02

Change-Id: Id3c11dc5ee94ef664374a988fcc6901e9a232fa6
diff --git a/src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.cpp
new file mode 100644
index 0000000..fb5d784
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.cpp
@@ -0,0 +1,33 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClActivationFloat32Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/ArmComputeUtils.hpp"
+
+namespace armnn
+{
+
+ClActivationFloat32Workload::ClActivationFloat32Workload(const ActivationQueueDescriptor& descriptor,
+                                                         const WorkloadInfo& info)
+    : Float32Workload<ActivationQueueDescriptor>(descriptor, info)
+{
+    m_Data.ValidateInputsOutputs("ClActivationFloat32Workload", 1, 1);
+
+    const arm_compute::ActivationLayerInfo activationLayerInfo =
+        ConvertActivationDescriptorToAclActivationLayerInfo(m_Data.m_Parameters);
+
+    arm_compute::ICLTensor& input  = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    m_ActivationLayer.configure(&input, &output, activationLayerInfo);
+}
+
+void ClActivationFloat32Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClActivationFloat32Workload_Execute");
+    m_ActivationLayer.run();
+}
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.hpp
new file mode 100644
index 0000000..9bab420
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.hpp
@@ -0,0 +1,24 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+// Activation layer execution
+class ClActivationFloat32Workload : public Float32Workload<ActivationQueueDescriptor>
+{
+public:
+    ClActivationFloat32Workload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info);
+    void Execute() const override;
+
+private:
+    mutable arm_compute::CLActivationLayer m_ActivationLayer;
+};
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.cpp
new file mode 100644
index 0000000..3671dd7
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.cpp
@@ -0,0 +1,47 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClActivationUint8Workload.hpp"
+#include "backends/ClLayerSupport.hpp"
+
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+namespace armnn
+{
+
+ClActivationUint8Workload::ClActivationUint8Workload(const ActivationQueueDescriptor& descriptor,
+                                                     const WorkloadInfo& info)
+    : Uint8Workload<ActivationQueueDescriptor>(descriptor, info)
+{
+
+    std::string reasonIfUnsupported;
+    if (!IsClActivationUint8Supported(&reasonIfUnsupported, m_Data.m_Parameters))
+    {
+        throw InvalidArgumentException(reasonIfUnsupported);
+    }
+
+    // Only BoundedReLu is supported (see IsClActivationUint8Supported)
+    arm_compute::ActivationLayerInfo layerInfo(arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
+                                               m_Data.m_Parameters.m_A,
+                                               m_Data.m_Parameters.m_B);
+
+    m_Data.ValidateInputsOutputs("ClActivationUint8Workload", 1, 1);
+
+    arm_compute::ICLTensor& input  = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+    m_ActivationLayer.configure(&input, &output, layerInfo);
+}
+
+void ClActivationUint8Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClActivationUint8Workload_Execute");
+
+    m_ActivationLayer.run();
+}
+
+} //namespace Armnn
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.hpp
new file mode 100644
index 0000000..3a9cceb
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.hpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+// Activation layer execution
+class ClActivationUint8Workload : public Uint8Workload<ActivationQueueDescriptor>
+{
+public:
+    ClActivationUint8Workload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info);
+    void Execute() const override;
+
+private:
+    mutable arm_compute::CLActivationLayer m_ActivationLayer;
+};
+
+} //namespace armnn
+
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.cpp
new file mode 100644
index 0000000..153167f
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.cpp
@@ -0,0 +1,57 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClAdditionFloat32Workload.hpp"
+
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+ClAdditionFloat32Workload::ClAdditionFloat32Workload(const AdditionQueueDescriptor& descriptor,
+                                                     const WorkloadInfo& info)
+    : Float32Workload<AdditionQueueDescriptor>(descriptor, info)
+{
+    m_Data.ValidateInputsOutputs("ClAdditionFloat32Workload", 2, 1);
+
+    arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+    arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    m_Layer.configure(&input0, &input1, &output, ms_AclConvertPolicy);
+}
+
+void ClAdditionFloat32Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClAdditionFloat32Workload_Execute");
+    m_Layer.run();
+}
+
+bool ClAdditionFloat32Workload::IsSupported(const TensorInfo& input0,
+                                            const TensorInfo& input1,
+                                            const TensorInfo& output,
+                                            std::string* reasonIfUnsupported)
+{
+    const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
+    const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
+    const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
+
+    const arm_compute::Status aclStatus = decltype(m_Layer)::validate(&aclInput0Info,
+                                                                      &aclInput1Info,
+                                                                      &aclOutputInfo,
+                                                                      ms_AclConvertPolicy);
+
+    const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK);
+    if (!supported && reasonIfUnsupported)
+    {
+        *reasonIfUnsupported = aclStatus.error_description();
+    }
+
+    return supported;
+}
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.hpp
new file mode 100644
index 0000000..37e50c2
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.hpp
@@ -0,0 +1,30 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+class ClAdditionFloat32Workload : public Float32Workload<AdditionQueueDescriptor>
+{
+public:
+    ClAdditionFloat32Workload(const AdditionQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+    void Execute() const override;
+
+    static bool IsSupported(const TensorInfo& input0,
+                            const TensorInfo& input1,
+                            const TensorInfo& output,
+                            std::string* reasonIfUnsupported);
+
+private:
+    mutable arm_compute::CLArithmeticAddition m_Layer;
+    static constexpr arm_compute::ConvertPolicy ms_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE;
+};
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.cpp b/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.cpp
new file mode 100644
index 0000000..4b72d92
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.cpp
@@ -0,0 +1,54 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClBaseConstantWorkload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+
+namespace armnn
+{
+
+template class ClBaseConstantWorkload<DataType::Float32>;
+template class ClBaseConstantWorkload<DataType::QuantisedAsymm8>;
+
+template<armnn::DataType dataType>
+void ClBaseConstantWorkload<dataType>::Execute() const
+{
+    // The intermediate tensor held by the corresponding layer output handler can be initialised with the given data
+    // on the first inference, then reused for subsequent inferences.
+    // The initialisation cannot happen at workload construction time since the ACL kernel for the next layer may not
+    // have been configured at the time.
+    if (!m_RanOnce)
+    {
+        const ConstantQueueDescriptor& data = this->m_Data;
+
+        BOOST_ASSERT(data.m_LayerOutput != nullptr);
+        arm_compute::CLTensor& output = static_cast<ClTensorHandle*>(data.m_Outputs[0])->GetTensor();
+
+        switch (dataType)
+        {
+            case DataType::Float32:
+            {
+                CopyArmComputeClTensorData(data.m_LayerOutput->GetConstTensor<float>(), output);
+                break;
+            }
+            case DataType::QuantisedAsymm8:
+            {
+                CopyArmComputeClTensorData(data.m_LayerOutput->GetConstTensor<uint8_t>(), output);
+                break;
+            }
+            default:
+            {
+                BOOST_ASSERT_MSG(false, "Unknown data type");
+                break;
+            }
+        }
+
+        m_RanOnce = true;
+    }
+}
+
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.hpp b/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.hpp
new file mode 100644
index 0000000..660842f
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.hpp
@@ -0,0 +1,28 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+template <armnn::DataType DataType>
+class ClBaseConstantWorkload : public TypedWorkload<ConstantQueueDescriptor, DataType>
+{
+public:
+    ClBaseConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info)
+        : TypedWorkload<ConstantQueueDescriptor, DataType>(descriptor, info)
+        , m_RanOnce(false)
+    {
+    }
+
+    void Execute() const override;
+
+private:
+    mutable bool m_RanOnce;
+};
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClBaseMergerWorkload.hpp b/src/armnn/backends/ClWorkloads/ClBaseMergerWorkload.hpp
new file mode 100644
index 0000000..7542c62
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClBaseMergerWorkload.hpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+// Base class template providing an implementation of the Merger layer common to all data types
+template <armnn::DataType DataType>
+class ClBaseMergerWorkload : public TypedWorkload<MergerQueueDescriptor, DataType>
+{
+public:
+    using TypedWorkload<MergerQueueDescriptor, DataType>::TypedWorkload;
+
+     void Execute() const override
+    {
+        // With subtensors, merger is a no-op
+    }
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClBaseSplitterWorkload.hpp b/src/armnn/backends/ClWorkloads/ClBaseSplitterWorkload.hpp
new file mode 100644
index 0000000..fef841c
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClBaseSplitterWorkload.hpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+// Base class template providing an implementation of the Splitter layer common to all data types
+template <armnn::DataType DataType>
+class ClBaseSplitterWorkload : public TypedWorkload<SplitterQueueDescriptor, DataType>
+{
+public:
+    using TypedWorkload<SplitterQueueDescriptor, DataType>::TypedWorkload;
+
+    void Execute() const override
+    {
+        // With subtensors, merger is a no-op
+    }
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.cpp
new file mode 100644
index 0000000..dabd495
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.cpp
@@ -0,0 +1,42 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClBatchNormalizationFloat32Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+ClBatchNormalizationFloat32Workload::ClBatchNormalizationFloat32Workload(
+    const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info)
+    : Float32Workload<BatchNormalizationQueueDescriptor>(descriptor, info)
+{
+    BuildArmComputeTensor(m_Mean, m_Data.m_Mean->GetTensorInfo());
+    BuildArmComputeTensor(m_Variance, m_Data.m_Variance->GetTensorInfo());
+    BuildArmComputeTensor(m_Gamma, m_Data.m_Gamma->GetTensorInfo());
+    BuildArmComputeTensor(m_Beta, m_Data.m_Beta->GetTensorInfo());
+
+    m_Data.ValidateInputsOutputs("ClBatchNormalizationFloat32Workload", 1, 1);
+
+    arm_compute::ICLTensor& input  = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    m_Layer.configure(&input, &output, &m_Mean, &m_Variance, &m_Beta, &m_Gamma, m_Data.m_Parameters.m_Eps);
+
+    InitialiseArmComputeClTensorData(m_Mean, m_Data.m_Mean->GetConstTensor<float>());
+    InitialiseArmComputeClTensorData(m_Variance, m_Data.m_Variance->GetConstTensor<float>());
+    InitialiseArmComputeClTensorData(m_Beta, m_Data.m_Beta->GetConstTensor<float>());
+    InitialiseArmComputeClTensorData(m_Gamma, m_Data.m_Gamma->GetConstTensor<float>());
+}
+
+void ClBatchNormalizationFloat32Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClBatchNormalizationFloat32Workload_Execute");
+    m_Layer.run();
+}
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.hpp
new file mode 100644
index 0000000..ddbd0f0
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.hpp
@@ -0,0 +1,34 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+class ClBatchNormalizationFloat32Workload : public Float32Workload<BatchNormalizationQueueDescriptor>
+{
+public:
+    ClBatchNormalizationFloat32Workload(const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+    using Float32Workload<BatchNormalizationQueueDescriptor>::Float32Workload;
+    void Execute() const override;
+
+private:
+    mutable arm_compute::CLBatchNormalizationLayer m_Layer;
+
+    arm_compute::CLTensor m_Mean;
+    arm_compute::CLTensor m_Variance;
+    arm_compute::CLTensor m_Gamma;
+    arm_compute::CLTensor m_Beta;
+};
+
+} //namespace armnn
+
+
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.cpp
new file mode 100644
index 0000000..99880d6
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.cpp
@@ -0,0 +1,16 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClConstantFloat32Workload.hpp"
+namespace armnn
+{
+
+void ClConstantFloat32Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClConstantFloat32Workload_Execute");
+    ClBaseConstantWorkload::Execute();
+}
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.hpp
new file mode 100644
index 0000000..5f86d3b
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "ClBaseConstantWorkload.hpp"
+
+namespace armnn
+{
+class ClConstantFloat32Workload : public ClBaseConstantWorkload<DataType::Float32>
+{
+public:
+    using ClBaseConstantWorkload<DataType::Float32>::ClBaseConstantWorkload;
+    void Execute() const override;
+};
+
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClConstantUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClConstantUint8Workload.cpp
new file mode 100644
index 0000000..078d426
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClConstantUint8Workload.cpp
@@ -0,0 +1,16 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClConstantUint8Workload.hpp"
+namespace armnn
+{
+
+void ClConstantUint8Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClConstantUint8Workload_Execute");
+    ClBaseConstantWorkload::Execute();
+}
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClConstantUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClConstantUint8Workload.hpp
new file mode 100644
index 0000000..3a53f10
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClConstantUint8Workload.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "ClBaseConstantWorkload.hpp"
+
+namespace armnn
+{
+
+class ClConstantUint8Workload : public ClBaseConstantWorkload<DataType::QuantisedAsymm8>
+{
+public:
+    using ClBaseConstantWorkload<DataType::QuantisedAsymm8>::ClBaseConstantWorkload;
+    void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.cpp
new file mode 100644
index 0000000..6f4069b
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.cpp
@@ -0,0 +1,70 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClConvolution2dFloat32Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+#include "backends/ClLayerSupport.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+ClConvolution2dFloat32Workload::ClConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor,
+                                                               const WorkloadInfo& info)
+    : Float32Workload<Convolution2dQueueDescriptor>(descriptor, info)
+{
+
+    // todo: check tensor shapes match
+    const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo();
+    BuildArmComputeTensor(m_KernelTensor, weightInfo);
+
+    arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX,
+                                             m_Data.m_Parameters.m_StrideY,
+                                             m_Data.m_Parameters.m_PadLeft,
+                                             m_Data.m_Parameters.m_PadRight,
+                                             m_Data.m_Parameters.m_PadTop,
+                                             m_Data.m_Parameters.m_PadBottom,
+                                             arm_compute::DimensionRoundingType::FLOOR);
+
+    arm_compute::CLTensor* optionalBias = nullptr;
+    if (m_Data.m_Parameters.m_BiasEnabled)
+    {
+        BuildArmComputeTensor(m_BiasTensor, m_Data.m_Bias->GetTensorInfo());
+        optionalBias = &m_BiasTensor;
+    }
+
+    m_Data.ValidateInputsOutputs("ClConvolution2dFloat32Workload", 1, 1);
+
+    arm_compute::ICLTensor& input  = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+    m_pConvolutionLayer = std::make_unique<arm_compute::CLConvolutionLayer>();
+    static_cast<arm_compute::CLConvolutionLayer*>(m_pConvolutionLayer.get())->configure(&input,
+                                                                                        &m_KernelTensor,
+                                                                                        optionalBias,
+                                                                                        &output,
+                                                                                        padStrideInfo);
+
+    BOOST_ASSERT(m_pConvolutionLayer);
+
+    InitialiseArmComputeClTensorData(m_KernelTensor, m_Data.m_Weight->GetConstTensor<float>());
+
+    if (optionalBias)
+    {
+        InitialiseArmComputeClTensorData(*optionalBias, m_Data.m_Bias->GetConstTensor<float>());
+    }
+}
+
+void ClConvolution2dFloat32Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClConvolution2dFloat32Workload_Execute");
+    BOOST_ASSERT(m_pConvolutionLayer);
+
+    m_pConvolutionLayer->run();
+}
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.hpp
new file mode 100644
index 0000000..2993105
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.hpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+class ClConvolution2dFloat32Workload : public Float32Workload<Convolution2dQueueDescriptor>
+{
+public:
+    ClConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info);
+    void Execute() const override;
+
+private:
+    mutable std::unique_ptr<arm_compute::IFunction>         m_pConvolutionLayer;
+
+    arm_compute::CLTensor m_KernelTensor;
+    arm_compute::CLTensor m_BiasTensor;
+};
+
+} //namespace armnn
+
diff --git a/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp
new file mode 100644
index 0000000..a3c6ac9
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp
@@ -0,0 +1,72 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClConvolution2dUint8Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+#include "backends/ClLayerSupport.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+ClConvolution2dUint8Workload::ClConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor,
+                                                           const WorkloadInfo& info)
+    : Uint8Workload<Convolution2dQueueDescriptor>(descriptor, info)
+{
+
+    // todo: check tensor shapes match
+    const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo();
+    BuildArmComputeTensor(m_KernelTensor, weightInfo);
+
+    arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX,
+                                             m_Data.m_Parameters.m_StrideY,
+                                             m_Data.m_Parameters.m_PadLeft,
+                                             m_Data.m_Parameters.m_PadRight,
+                                             m_Data.m_Parameters.m_PadTop,
+                                             m_Data.m_Parameters.m_PadBottom,
+                                             arm_compute::DimensionRoundingType::FLOOR);
+
+    arm_compute::CLTensor* optionalBias = nullptr;
+    if (m_Data.m_Parameters.m_BiasEnabled)
+    {
+        BuildArmComputeTensor(m_BiasTensor, m_Data.m_Bias->GetTensorInfo());
+        optionalBias = &m_BiasTensor;
+    }
+
+    m_Data.ValidateInputsOutputs("ClConvolution2dUint8Workload", 1, 1);
+
+    arm_compute::ICLTensor& input  = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+    BOOST_ASSERT_MSG(IsClDirectConvolution2dSupported(weightInfo, m_Data.m_Parameters),
+                     "Unsupported parameters for u8 convolution");
+
+    m_pConvolutionLayer = std::make_unique<arm_compute::CLDirectConvolutionLayer>();
+    static_cast<arm_compute::CLDirectConvolutionLayer*>(m_pConvolutionLayer.get())->configure(&input,
+                                                                                              &m_KernelTensor,
+                                                                                              optionalBias,
+                                                                                              &output,
+                                                                                              padStrideInfo);
+    BOOST_ASSERT(m_pConvolutionLayer);
+
+    InitialiseArmComputeClTensorData(m_KernelTensor, m_Data.m_Weight->GetConstTensor<uint8_t>());
+
+    if (optionalBias)
+    {
+        InitialiseArmComputeClTensorData(*optionalBias, m_Data.m_Bias->GetConstTensor<int32_t>());
+    }
+}
+
+void ClConvolution2dUint8Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClConvolution2dUint8Workload_Execute");
+    BOOST_ASSERT(m_pConvolutionLayer);
+
+    m_pConvolutionLayer->run();
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp
new file mode 100644
index 0000000..b2849d7
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp
@@ -0,0 +1,28 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+
+namespace armnn
+{
+
+class ClConvolution2dUint8Workload : public Uint8Workload<Convolution2dQueueDescriptor>
+{
+public:
+    ClConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info);
+    void Execute() const override;
+
+private:
+    mutable std::unique_ptr<arm_compute::IFunction>         m_pConvolutionLayer;
+
+    arm_compute::CLTensor m_KernelTensor;
+    arm_compute::CLTensor m_BiasTensor;
+};
+
+} //namespace armnn
+
diff --git a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.cpp
new file mode 100644
index 0000000..f31c73b
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.cpp
@@ -0,0 +1,30 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClDepthwiseConvolutionFloat32Workload.hpp"
+#include "ClDepthwiseConvolutionHelper.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+
+namespace armnn
+{
+
+ClDepthwiseConvolutionFloat32Workload::ClDepthwiseConvolutionFloat32Workload(
+    const DepthwiseConvolution2dQueueDescriptor& descriptor,
+    const WorkloadInfo& info)
+    : Float32Workload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
+{
+    InitClDepthwiseConvolutionWorkload(*this);
+}
+
+void ClDepthwiseConvolutionFloat32Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClDepthwiseConvolutionFloat32Workload_Execute");
+    BOOST_ASSERT(m_pDepthwiseConvolutionLayer);
+
+    m_pDepthwiseConvolutionLayer->run();
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.hpp
new file mode 100644
index 0000000..8711f0c
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.hpp
@@ -0,0 +1,37 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+class ClDepthwiseConvolutionFloat32Workload : public Float32Workload<DepthwiseConvolution2dQueueDescriptor>
+{
+public:
+    ClDepthwiseConvolutionFloat32Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor,
+                                          const WorkloadInfo& info);
+    void Execute() const override;
+
+private:
+    typedef float KernelDataType;
+    typedef float BiasDataType;
+
+    mutable std::unique_ptr<arm_compute::IFunction> m_pDepthwiseConvolutionLayer;
+
+    arm_compute::CLTensor m_KernelTensor;
+    arm_compute::CLTensor m_BiasTensor;
+
+    template <typename WorkloadType>
+    friend void InitClDepthwiseConvolutionWorkload(WorkloadType& workload);
+};
+
+} //namespace armnn
+
+
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionHelper.hpp b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionHelper.hpp
new file mode 100644
index 0000000..cd71157
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionHelper.hpp
@@ -0,0 +1,91 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <armnn/TypesUtils.hpp>
+#include "backends/ClLayerSupport.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+#include "backends/ClTensorHandle.hpp"
+
+namespace armnn
+{
+
+template <typename WorkloadType>
+void InitClDepthwiseConvolutionWorkload(WorkloadType& workload)
+{
+    using T = typename WorkloadType::KernelDataType;
+    using B = typename WorkloadType::BiasDataType;
+
+    auto& m_Data = workload.GetData();
+    auto& m_KernelTensor = workload.m_KernelTensor;
+    auto& m_BiasTensor = workload.m_BiasTensor;
+    auto& m_pDepthwiseConvolutionLayer = workload.m_pDepthwiseConvolutionLayer;
+
+    auto& weightInfo = m_Data.m_Weight->GetTensorInfo();
+
+    std::string reasonIfUnsupported;
+    if (!IsClDepthwiseConvolution2dDescParamsSupported(&reasonIfUnsupported, m_Data.m_Parameters, weightInfo))
+    {
+        throw UnimplementedException(reasonIfUnsupported);
+    }
+
+    armcomputetensorutils::BuildArmComputeTensor(m_KernelTensor, weightInfo);
+
+    arm_compute::CLTensor* optionalBias = nullptr;
+    if (m_Data.m_Parameters.m_BiasEnabled)
+    {
+        armcomputetensorutils::BuildArmComputeTensor(m_BiasTensor, m_Data.m_Bias->GetTensorInfo());
+        optionalBias = &m_BiasTensor;
+    }
+
+    arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX,
+                                             m_Data.m_Parameters.m_StrideY,
+                                             m_Data.m_Parameters.m_PadLeft,
+                                             m_Data.m_Parameters.m_PadRight,
+                                             m_Data.m_Parameters.m_PadTop,
+                                             m_Data.m_Parameters.m_PadBottom,
+                                             arm_compute::DimensionRoundingType::FLOOR);
+
+    std::string name = std::string("ClDepthwiseConvolution") + GetDataTypeName(GetDataType<T>()) + "Workload";
+    m_Data.ValidateInputsOutputs(name, 1, 1);
+
+    arm_compute::ICLTensor& input  = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+    //Check for optimisation opportunities.
+    bool use3x3Optimisation = (weightInfo.GetShape()[3] == 3) && (weightInfo.GetShape()[2] == 3);
+    if (use3x3Optimisation)
+    {
+        m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer3x3>();
+        static_cast<arm_compute::CLDepthwiseConvolutionLayer3x3*>(m_pDepthwiseConvolutionLayer.get())->configure(
+            &input,
+            &m_KernelTensor,
+            optionalBias,
+            &output,
+            padStrideInfo);
+    }
+    else
+    {
+        m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer>();
+        static_cast<arm_compute::CLDepthwiseConvolutionLayer*>(m_pDepthwiseConvolutionLayer.get())->configure(
+            &input,
+            &m_KernelTensor,
+            optionalBias,
+            &output,
+            padStrideInfo);
+    }
+
+    BOOST_ASSERT(m_pDepthwiseConvolutionLayer);
+
+    InitialiseArmComputeClTensorData(m_KernelTensor, m_Data.m_Weight->template GetConstTensor<T>());
+
+    if (optionalBias)
+    {
+        InitialiseArmComputeClTensorData(*optionalBias, m_Data.m_Bias->template GetConstTensor<B>());
+    }
+}
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp
new file mode 100644
index 0000000..7e7c488
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp
@@ -0,0 +1,32 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClDepthwiseConvolutionUint8Workload.hpp"
+#include "ClDepthwiseConvolutionHelper.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+
+namespace armnn
+{
+
+
+ClDepthwiseConvolutionUint8Workload::ClDepthwiseConvolutionUint8Workload(
+    const DepthwiseConvolution2dQueueDescriptor& descriptor,
+    const WorkloadInfo& info)
+    : Uint8Workload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
+{
+    InitClDepthwiseConvolutionWorkload(*this);
+}
+
+void ClDepthwiseConvolutionUint8Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClDepthwiseConvolutionUint8Workload_Execute");
+    BOOST_ASSERT(m_pDepthwiseConvolutionLayer);
+
+    m_pDepthwiseConvolutionLayer->run();
+}
+
+} //namespace armnn
+
diff --git a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp
new file mode 100644
index 0000000..ee09ff3
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp
@@ -0,0 +1,35 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+class ClDepthwiseConvolutionUint8Workload : public Uint8Workload<DepthwiseConvolution2dQueueDescriptor>
+{
+public:
+    ClDepthwiseConvolutionUint8Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor,
+                                        const WorkloadInfo& info);
+    void Execute() const override;
+
+private:
+    typedef uint8_t KernelDataType;
+    typedef int32_t BiasDataType;
+
+    mutable std::unique_ptr<arm_compute::IFunction> m_pDepthwiseConvolutionLayer;
+
+    arm_compute::CLTensor m_KernelTensor;
+    arm_compute::CLTensor m_BiasTensor;
+
+    template <typename WorkloadType>
+    friend void InitClDepthwiseConvolutionWorkload(WorkloadType& workload);
+};
+
+} //namespace armnn
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.cpp
new file mode 100644
index 0000000..882da50
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.cpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClFloorFloat32Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+
+namespace armnn
+{
+
+ClFloorFloat32Workload::ClFloorFloat32Workload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info)
+    : Float32Workload<FloorQueueDescriptor>(descriptor, info)
+{
+    m_Data.ValidateInputsOutputs("ClFloorFloat32Workload", 1, 1);
+
+    arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+    m_Layer.configure(&input, &output);
+}
+
+void ClFloorFloat32Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClFloorFloat32Workload_Execute");
+    m_Layer.run();
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.hpp
new file mode 100644
index 0000000..532dd29
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.hpp
@@ -0,0 +1,28 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+class ClFloorFloat32Workload : public Float32Workload<FloorQueueDescriptor>
+{
+public:
+    ClFloorFloat32Workload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+    void Execute() const override;
+
+private:
+    mutable arm_compute::CLFloor m_Layer;
+};
+
+} //namespace armnn
+
+
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.cpp
new file mode 100644
index 0000000..96596b9
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.cpp
@@ -0,0 +1,52 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClFullyConnectedFloat32Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+ClFullyConnectedFloat32Workload::ClFullyConnectedFloat32Workload(const FullyConnectedQueueDescriptor& descriptor,
+                                                                 const WorkloadInfo& info)
+    : Float32Workload<FullyConnectedQueueDescriptor>(descriptor, info)
+{
+
+    BuildArmComputeTensor(m_WeightsTensor, m_Data.m_Weight->GetTensorInfo());
+
+    arm_compute::CLTensor* optionalBiasTensor = nullptr;
+    if (m_Data.m_Parameters.m_BiasEnabled)
+    {
+        BuildArmComputeTensor(m_BiasesTensor, m_Data.m_Bias->GetTensorInfo());
+        optionalBiasTensor = &m_BiasesTensor;
+    }
+
+    m_Data.ValidateInputsOutputs("ClFullyConnectedFloat32Workload", 1, 1);
+
+    arm_compute::ICLTensor& input  = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    // Construct
+    m_FullyConnected.configure(
+        &input, &m_WeightsTensor, optionalBiasTensor, &output, m_Data.m_Parameters.m_TransposeWeightMatrix);
+
+    // Allocate
+    InitialiseArmComputeClTensorData(m_WeightsTensor, m_Data.m_Weight->GetConstTensor<float>());
+
+    if (optionalBiasTensor)
+    {
+        InitialiseArmComputeClTensorData(*optionalBiasTensor, m_Data.m_Bias->GetConstTensor<float>());
+    }
+}
+
+void ClFullyConnectedFloat32Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClFullyConnectedFloat32Workload_Execute");
+    m_FullyConnected.run();
+}
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.hpp
new file mode 100644
index 0000000..def20e0
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.hpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+
+namespace armnn
+{
+
+class ClFullyConnectedFloat32Workload : public armnn::Float32Workload<armnn::FullyConnectedQueueDescriptor>
+{
+public:
+    ClFullyConnectedFloat32Workload(const armnn::FullyConnectedQueueDescriptor& descriptor,
+        const armnn::WorkloadInfo& info);
+
+    using armnn::Float32Workload<armnn::FullyConnectedQueueDescriptor>::m_Data;
+    void Execute() const override;
+
+private:
+    mutable arm_compute::CLFullyConnectedLayer m_FullyConnected;
+    arm_compute::CLTensor                      m_WeightsTensor;
+    arm_compute::CLTensor                      m_BiasesTensor;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.cpp
new file mode 100644
index 0000000..e15db74
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.cpp
@@ -0,0 +1,35 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClL2NormalizationFloat32Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+#include "backends/ArmComputeUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+ClL2NormalizationFloat32Workload::ClL2NormalizationFloat32Workload(const L2NormalizationQueueDescriptor& descriptor,
+                                                                   const WorkloadInfo& info)
+    : Float32Workload<L2NormalizationQueueDescriptor>(descriptor, info)
+{
+    m_Data.ValidateInputsOutputs("ClL2NormalizationFloat32Workload", 1, 1);
+
+    arm_compute::ICLTensor& input  = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    m_Layer.configure(&input, &output, CreateAclNormalizationLayerInfoForL2Normalization(info.m_InputTensorInfos[0]));
+}
+
+void ClL2NormalizationFloat32Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClL2NormalizationFloat32Workload_Execute");
+    m_Layer.run();
+}
+
+} //namespace armnn
+
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.hpp
new file mode 100644
index 0000000..848803e
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.hpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+class ClL2NormalizationFloat32Workload : public Float32Workload<L2NormalizationQueueDescriptor>
+{
+public:
+    ClL2NormalizationFloat32Workload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+    void Execute() const override;
+
+private:
+    // Purposely not a CLL2Normalize function. See constructor.
+    mutable arm_compute::CLNormalizationLayer m_Layer;
+};
+
+} //namespace armnn
+
+
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.cpp
new file mode 100644
index 0000000..4d2d708
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.cpp
@@ -0,0 +1,19 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClMergerFloat32Workload.hpp"
+
+
+namespace armnn
+{
+
+void ClMergerFloat32Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClMergerFloat32Workload_Execute");
+    ClBaseMergerWorkload::Execute();
+}
+
+} //namespace armnn
+
diff --git a/src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.hpp
new file mode 100644
index 0000000..9808d30
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.hpp
@@ -0,0 +1,22 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "ClBaseMergerWorkload.hpp"
+
+namespace armnn
+{
+
+class ClMergerFloat32Workload : public ClBaseMergerWorkload<armnn::DataType::Float32>
+{
+public:
+    using ClBaseMergerWorkload<armnn::DataType::Float32>::ClBaseMergerWorkload;
+    virtual void Execute() const override;
+};
+
+} //namespace armnn
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClMergerUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClMergerUint8Workload.cpp
new file mode 100644
index 0000000..94a1d3c
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClMergerUint8Workload.cpp
@@ -0,0 +1,18 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClMergerUint8Workload.hpp"
+
+
+namespace armnn
+{
+
+void ClMergerUint8Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClMergerUint8Workload_Execute");
+    ClBaseMergerWorkload<DataType::QuantisedAsymm8>::Execute();
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClMergerUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClMergerUint8Workload.hpp
new file mode 100644
index 0000000..1ddbb2a
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClMergerUint8Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "ClBaseMergerWorkload.hpp"
+
+namespace armnn
+{
+
+class ClMergerUint8Workload : public ClBaseMergerWorkload<armnn::DataType::QuantisedAsymm8>
+{
+public:
+    using ClBaseMergerWorkload<armnn::DataType::QuantisedAsymm8>::ClBaseMergerWorkload;
+    virtual void Execute() const override;
+};
+
+} //namespace armnn
+
diff --git a/src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.cpp
new file mode 100644
index 0000000..405d109
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.cpp
@@ -0,0 +1,39 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClMultiplicationFloat32Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+
+namespace armnn
+{
+
+ClMultiplicationFloat32Workload::ClMultiplicationFloat32Workload(const MultiplicationQueueDescriptor& descriptor,
+                                                                 const WorkloadInfo& info)
+    : Float32Workload<MultiplicationQueueDescriptor>(descriptor, info)
+{
+    m_Data.ValidateInputsOutputs("ClMultiplicationFloat32Workload", 2, 1);
+
+    arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+    arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    // Construct
+    m_PixelWiseMultiplication.configure(&input0,
+                                        &input1,
+                                        &output,
+                                        1.0f,
+                                        arm_compute::ConvertPolicy::SATURATE,
+                                        arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
+}
+
+void ClMultiplicationFloat32Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClMultiplicationFloat32Workload_Execute");
+
+    // Execute the layer
+    m_PixelWiseMultiplication.run();
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.hpp
new file mode 100644
index 0000000..8e38711
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.hpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+class ClMultiplicationFloat32Workload : public Float32Workload<MultiplicationQueueDescriptor>
+{
+public:
+    ClMultiplicationFloat32Workload(const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+    using Float32Workload<MultiplicationQueueDescriptor>::Float32Workload;
+    void Execute() const override;
+
+private:
+    mutable arm_compute::CLPixelWiseMultiplication   m_PixelWiseMultiplication;
+};
+
+} //namespace armnn
+
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.cpp
new file mode 100644
index 0000000..a163ec2
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.cpp
@@ -0,0 +1,49 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClNormalizationFloat32Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+#include "backends/ClLayerSupport.hpp"
+#include "backends/ArmComputeUtils.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+
+namespace armnn
+{
+
+arm_compute::Status ClNormalizationWorkloadValidate(const TensorInfo& input, const TensorInfo& output,
+    const NormalizationDescriptor& descriptor)
+{
+    const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
+    const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+
+    arm_compute::NormalizationLayerInfo layerInfo =
+        armcomputetensorutils::BuildArmComputeNormalizationLayerInfo(descriptor);
+
+    return arm_compute::CLNormalizationLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
+}
+
+ClNormalizationFloat32Workload::ClNormalizationFloat32Workload(const NormalizationQueueDescriptor& descriptor,
+                                                               const WorkloadInfo& info)
+    : Float32Workload<NormalizationQueueDescriptor>(descriptor, info)
+{
+    m_Data.ValidateInputsOutputs("ClNormalizationFloat32Workload", 1, 1);
+
+    arm_compute::ICLTensor& input  = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+    arm_compute::NormalizationLayerInfo normalizationInfo =
+        armcomputetensorutils::BuildArmComputeNormalizationLayerInfo(m_Data.m_Parameters);
+
+    m_NormalizationLayer.configure(&input, &output, normalizationInfo);
+};
+
+void ClNormalizationFloat32Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClNormalizationFloat32Workload_Execute");
+    m_NormalizationLayer.run();
+}
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.hpp
new file mode 100644
index 0000000..cbd5fa9
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.hpp
@@ -0,0 +1,28 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+arm_compute::Status ClNormalizationWorkloadValidate(const TensorInfo& input,
+    const TensorInfo& output,
+    const NormalizationDescriptor& descriptor);
+
+class ClNormalizationFloat32Workload : public Float32Workload<NormalizationQueueDescriptor>
+{
+public:
+    ClNormalizationFloat32Workload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info);
+    void Execute() const override;
+
+private:
+    mutable arm_compute::CLNormalizationLayer    m_NormalizationLayer;
+};
+
+} //namespace armnn
+
diff --git a/src/armnn/backends/ClWorkloads/ClPermuteWorkload.cpp b/src/armnn/backends/ClWorkloads/ClPermuteWorkload.cpp
new file mode 100644
index 0000000..3147e95
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClPermuteWorkload.cpp
@@ -0,0 +1,54 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClPermuteWorkload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+
+#include <arm_compute/core/Error.h>
+
+namespace armnn
+{
+
+arm_compute::Status ClPermuteWorkloadValidate(const PermuteDescriptor& descriptor)
+{
+    const armnn::PermutationVector& perm = descriptor.m_DimMappings;
+
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(!perm.IsEqual({ 0U, 3U, 1U, 2U })
+                                    && !perm.IsEqual({ 0U, 2U, 3U, 1U })
+                                    && !perm.IsEqual({ 3U, 2U, 0U, 1U }),
+    "Only [0, 3, 1, 2], [0, 2, 3, 1] and [3, 2, 0, 1] permutations are supported");
+
+    return arm_compute::Status{};
+}
+
+template <armnn::DataType DataType>
+ClPermuteWorkload<DataType>::ClPermuteWorkload(const PermuteQueueDescriptor& descriptor,
+                                               const WorkloadInfo& info)
+    : TypedWorkload<PermuteQueueDescriptor, DataType>(descriptor, info)
+{
+    using armcomputetensorutils::BuildArmComputePermutationVector;
+
+    m_Data.ValidateInputsOutputs(GetName(), 1, 1);
+
+    const arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings;
+
+    // Run the layer
+    m_PermuteFunction.configure(&input, &output, BuildArmComputePermutationVector(mappings));
+}
+
+template <armnn::DataType DataType>
+void ClPermuteWorkload<DataType>::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, GetName() + "_Execute");
+    m_PermuteFunction.run();
+}
+
+template class ClPermuteWorkload<DataType::Float32>;
+template class ClPermuteWorkload<DataType::QuantisedAsymm8>;
+
+} // namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClPermuteWorkload.hpp b/src/armnn/backends/ClWorkloads/ClPermuteWorkload.hpp
new file mode 100644
index 0000000..430c595
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClPermuteWorkload.hpp
@@ -0,0 +1,42 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+#include <armnn/TypesUtils.hpp>
+#include <arm_compute/runtime/CL/functions/CLPermute.h>
+
+#include <string>
+
+namespace armnn
+{
+
+arm_compute::Status ClPermuteWorkloadValidate(const PermuteDescriptor& descriptor);
+
+template <armnn::DataType DataType>
+class ClPermuteWorkload : public TypedWorkload<PermuteQueueDescriptor, DataType>
+{
+public:
+    static const std::string& GetName()
+    {
+        static const std::string name = std::string("ClPermute") + GetDataTypeName(DataType) + "Workload";
+        return name;
+    }
+
+    ClPermuteWorkload(const PermuteQueueDescriptor& descriptor, const WorkloadInfo& info);
+    void Execute() const override;
+
+private:
+    using TypedWorkload<PermuteQueueDescriptor, DataType>::m_Data;
+    mutable arm_compute::CLPermute m_PermuteFunction;
+};
+
+using ClPermuteFloat32Workload = ClPermuteWorkload<DataType::Float32>;
+using ClPermuteUint8Workload = ClPermuteWorkload<DataType::QuantisedAsymm8>;
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp b/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp
new file mode 100644
index 0000000..dbdc06f
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp
@@ -0,0 +1,47 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClPooling2dBaseWorkload.hpp"
+#include "backends/ClLayerSupport.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/ArmComputeUtils.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+arm_compute::Status ClPooling2dWorkloadValidate(const TensorInfo& input,
+    const TensorInfo& output,
+    const Pooling2dDescriptor& descriptor)
+{
+    const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
+    const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
+
+    arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(descriptor);
+
+    return arm_compute::CLPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
+}
+
+template <armnn::DataType dataType>
+ClPooling2dBaseWorkload<dataType>::ClPooling2dBaseWorkload(
+    const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, const std::string& name)
+    : TypedWorkload<Pooling2dQueueDescriptor, dataType>(descriptor, info)
+{
+    m_Data.ValidateInputsOutputs(name, 1, 1);
+
+    arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+    arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(m_Data.m_Parameters);
+
+    // Run the layer
+    m_PoolingLayer.configure(&input, &output, layerInfo);
+}
+
+template class ClPooling2dBaseWorkload<DataType::Float32>;
+template class ClPooling2dBaseWorkload<DataType::QuantisedAsymm8>;
+
+}
diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp b/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp
new file mode 100644
index 0000000..828f000
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp
@@ -0,0 +1,31 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+arm_compute::Status ClPooling2dWorkloadValidate(const TensorInfo& input,
+    const TensorInfo& output,
+    const Pooling2dDescriptor& descriptor);
+
+// Base class template providing an implementation of the Pooling2d layer common to all data types
+template <armnn::DataType dataType>
+class ClPooling2dBaseWorkload : public TypedWorkload<Pooling2dQueueDescriptor, dataType>
+{
+public:
+    using TypedWorkload<Pooling2dQueueDescriptor, dataType>::m_Data;
+
+    ClPooling2dBaseWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, 
+                            const std::string& name);
+
+protected:
+    mutable arm_compute::CLPoolingLayer m_PoolingLayer;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.cpp
new file mode 100644
index 0000000..a7f5855
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.cpp
@@ -0,0 +1,24 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClPooling2dFloat32Workload.hpp"
+
+namespace armnn
+{
+
+ClPooling2dFloat32Workload::ClPooling2dFloat32Workload(const Pooling2dQueueDescriptor& descriptor,
+                                                       const WorkloadInfo& info)
+    : ClPooling2dBaseWorkload<DataType::Float32>(descriptor, info, "ClPooling2dFloat32Workload")
+{
+}
+
+void ClPooling2dFloat32Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClPooling2dFloat32Workload_Execute");
+    m_PoolingLayer.run();
+}
+
+} //namespace armnn
+
diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.hpp
new file mode 100644
index 0000000..3456a2c
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+#include "backends/ClWorkloads//ClPooling2dBaseWorkload.hpp"
+
+namespace armnn
+{
+class ClPooling2dFloat32Workload : public ClPooling2dBaseWorkload<DataType::Float32>
+{
+public:
+    ClPooling2dFloat32Workload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info);
+    void Execute() const override;
+
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.cpp
new file mode 100644
index 0000000..2d2109e
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.cpp
@@ -0,0 +1,25 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClPooling2dUint8Workload.hpp"
+
+namespace armnn
+{
+
+ClPooling2dUint8Workload::ClPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor,
+                                                   const WorkloadInfo& info)
+    : ClPooling2dBaseWorkload<DataType::QuantisedAsymm8>(descriptor, info, "ClPooling2dUint8Workload")
+{
+}
+
+void ClPooling2dUint8Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClPooling2dUint8Workload_Execute");
+    m_PoolingLayer.run();
+}
+
+} //namespace armnn
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.hpp
new file mode 100644
index 0000000..0875c74
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.hpp
@@ -0,0 +1,24 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+#include "backends/ClWorkloads/ClPooling2dBaseWorkload.hpp"
+
+namespace armnn
+{
+
+class ClPooling2dUint8Workload : public ClPooling2dBaseWorkload<DataType::QuantisedAsymm8>
+{
+public:
+    ClPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info);
+    void Execute() const override;
+
+};
+
+} //namespace armnn
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.cpp
new file mode 100644
index 0000000..7b4ad44
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.cpp
@@ -0,0 +1,31 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClReshapeFloat32Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+
+namespace armnn
+{
+
+ClReshapeFloat32Workload::ClReshapeFloat32Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info)
+    : Float32Workload<ReshapeQueueDescriptor>(descriptor, info)
+{
+    m_Data.ValidateInputsOutputs("ClReshapeFloat32Workload", 1, 1);
+
+    arm_compute::ICLTensor& input  = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+    m_Layer.configure(&input, &output);
+}
+
+void ClReshapeFloat32Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClReshapeFloat32Workload_Execute");
+    m_Layer.run();
+}
+
+} //namespace armnn
+
diff --git a/src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.hpp
new file mode 100644
index 0000000..e344ee0
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.hpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+class ClReshapeFloat32Workload : public Float32Workload<ReshapeQueueDescriptor>
+{
+public:
+    ClReshapeFloat32Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+    void Execute() const override;
+
+private:
+    mutable arm_compute::CLReshapeLayer m_Layer;
+};
+
+} //namespace armnn
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.cpp
new file mode 100644
index 0000000..36cc1de
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.cpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClReshapeUint8Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+
+namespace armnn
+{
+ClReshapeUint8Workload::ClReshapeUint8Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info)
+    : Uint8Workload<ReshapeQueueDescriptor>(descriptor, info)
+{
+    m_Data.ValidateInputsOutputs("ClReshapeUint8Workload", 1, 1);
+
+    arm_compute::ICLTensor& input  = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    m_Layer.configure(&input, &output);
+}
+
+void ClReshapeUint8Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClReshapeUint8Workload_Execute");
+
+    m_Layer.run();
+}
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.hpp
new file mode 100644
index 0000000..9e41990
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.hpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+// Reshape
+class ClReshapeUint8Workload : public Uint8Workload<ReshapeQueueDescriptor>
+{
+public:
+    ClReshapeUint8Workload( const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+    void Execute() const override;
+
+private:
+    mutable arm_compute::CLReshapeLayer m_Layer;
+};
+
+} //namespace armnn
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.cpp
new file mode 100644
index 0000000..d71011a
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.cpp
@@ -0,0 +1,36 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClResizeBilinearFloat32Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+#include "backends/ClLayerSupport.hpp"
+#include "backends/ArmComputeUtils.hpp"
+
+namespace armnn
+{
+
+ClResizeBilinearFloat32Workload::ClResizeBilinearFloat32Workload(const ResizeBilinearQueueDescriptor& descriptor,
+                                                               const WorkloadInfo& info)
+    : Float32Workload<ResizeBilinearQueueDescriptor>(descriptor, info)
+{
+    m_Data.ValidateInputsOutputs("ClResizeBilinearFloat32Workload", 1, 1);
+
+    arm_compute::ICLTensor& input  = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+    m_ResizeBilinearLayer.configure(&input, &output, arm_compute::InterpolationPolicy::BILINEAR,
+                                    arm_compute::BorderMode::REPLICATE, arm_compute::PixelValue(0.f),
+                                    arm_compute::SamplingPolicy::TOP_LEFT);
+};
+
+void ClResizeBilinearFloat32Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClResizeBilinearFloat32Workload_Execute");
+    m_ResizeBilinearLayer.run();
+}
+
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.hpp
new file mode 100644
index 0000000..5f70e71
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.hpp
@@ -0,0 +1,23 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+class ClResizeBilinearFloat32Workload : public Float32Workload<ResizeBilinearQueueDescriptor>
+{
+public:
+    ClResizeBilinearFloat32Workload(const ResizeBilinearQueueDescriptor& descriptor, const WorkloadInfo& info);
+    void Execute() const override;
+
+private:
+    mutable arm_compute::CLScale m_ResizeBilinearLayer;
+};
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.cpp
new file mode 100644
index 0000000..257e76a
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.cpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClSoftmaxFloat32Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+
+namespace armnn
+{
+
+ClSoftmaxFloat32Workload::ClSoftmaxFloat32Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info)
+    : Float32Workload<SoftmaxQueueDescriptor>(descriptor, info)
+{
+    m_Data.ValidateInputsOutputs("ClSoftmaxFloat32Workload", 1, 1);
+
+    arm_compute::ICLTensor& input  = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    m_SoftmaxLayer.configure(&input, &output, m_Data.m_Parameters.m_Beta);
+}
+
+void ClSoftmaxFloat32Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClSoftmaxFloat32Workload_Execute");
+    m_SoftmaxLayer.run();
+}
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.hpp
new file mode 100644
index 0000000..a26bbe8
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.hpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+class ClSoftmaxFloat32Workload : public Float32Workload<SoftmaxQueueDescriptor>
+{
+public:
+    ClSoftmaxFloat32Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info);
+    void Execute() const override;
+
+private:
+    mutable arm_compute::CLSoftmaxLayer m_SoftmaxLayer;
+};
+
+} //namespace armnn
+
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp
new file mode 100644
index 0000000..9e856fe
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp
@@ -0,0 +1,39 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClSoftmaxUint8Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+
+namespace armnn
+{
+
+ClSoftmaxUint8Workload::ClSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info)
+    : Uint8Workload<SoftmaxQueueDescriptor>(descriptor, info)
+{
+    m_Data.ValidateInputsOutputs("ClSoftmaxUint8Workload", 1, 1);
+
+    arm_compute::ICLTensor& input  = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+    const auto outputQuantization = output.info()->quantization_info();
+
+    if ((outputQuantization.scale != (1.0f / 256.0f)) || (outputQuantization.offset != 0))
+    {
+        throw InvalidArgumentException(
+            "Invalid quantization for output. Only scale = 1.0f / 256.0f and offset = 0 supported");
+    }
+
+    m_SoftmaxLayer.configure(&input, &output, descriptor.m_Parameters.m_Beta);
+}
+
+void ClSoftmaxUint8Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClSoftmaxUint8Workload_Execute");
+
+    m_SoftmaxLayer.run();
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp
new file mode 100644
index 0000000..07ee625
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp
@@ -0,0 +1,28 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+// Softmax
+class ClSoftmaxUint8Workload : public Uint8Workload<SoftmaxQueueDescriptor>
+{
+public:
+    ClSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+    void Execute() const override;
+private:
+
+    mutable arm_compute::CLSoftmaxLayer m_SoftmaxLayer;
+};
+
+} //namespace armnn
+
+
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.cpp
new file mode 100644
index 0000000..6221d56
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.cpp
@@ -0,0 +1,17 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClSplitterFloat32Workload.hpp"
+
+namespace armnn
+{
+
+void ClSplitterFloat32Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClSplitterFloat32Workload_Execute");
+    ClBaseSplitterWorkload::Execute();
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.hpp
new file mode 100644
index 0000000..cfc7eaa
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "ClBaseSplitterWorkload.hpp"
+
+namespace armnn
+{
+
+class ClSplitterFloat32Workload : public ClBaseSplitterWorkload<DataType::Float32>
+{
+public:
+    using ClBaseSplitterWorkload<DataType::Float32>::ClBaseSplitterWorkload;
+    virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.cpp
new file mode 100644
index 0000000..3aa4708
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.cpp
@@ -0,0 +1,17 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClSplitterUint8Workload.hpp"
+
+namespace armnn
+{
+
+void ClSplitterUint8Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClSplitterUint8Workload_Execute");
+    ClBaseSplitterWorkload::Execute();
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.hpp
new file mode 100644
index 0000000..ed8b3cc
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "ClBaseSplitterWorkload.hpp"
+
+namespace armnn
+{
+class ClSplitterUint8Workload : public ClBaseSplitterWorkload<DataType::QuantisedAsymm8>
+{
+public:
+    using ClBaseSplitterWorkload<DataType::QuantisedAsymm8>::ClBaseSplitterWorkload;
+    virtual void Execute() const override;
+};
+} //namespace armnn
+
+
+