IVGCVSW-6249 Add ProfilingDetails Macros to all workloads in CL

Signed-off-by: Keith Davis <keith.davis@arm.com>
Change-Id: I92dd410da7ad633a46d025fdc2b26093041c439b
diff --git a/src/backends/cl/workloads/ClAbsWorkload.cpp b/src/backends/cl/workloads/ClAbsWorkload.cpp
index 4682c64..fa8e4f7 100644
--- a/src/backends/cl/workloads/ClAbsWorkload.cpp
+++ b/src/backends/cl/workloads/ClAbsWorkload.cpp
@@ -39,7 +39,7 @@
 
 void ClAbsWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClAbsWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClAbsWorkload_Execute", this->GetGuid());
     RunClFunction(m_AbsLayer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClActivationWorkload.cpp b/src/backends/cl/workloads/ClActivationWorkload.cpp
index e2f64a9..20a65b6 100644
--- a/src/backends/cl/workloads/ClActivationWorkload.cpp
+++ b/src/backends/cl/workloads/ClActivationWorkload.cpp
@@ -34,19 +34,25 @@
                                            const arm_compute::CLCompileContext& clCompileContext)
     : BaseWorkload<ActivationQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClActivationWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     m_Data.ValidateInputsOutputs("ClActivationWorkload", 1, 1);
 
     const arm_compute::ActivationLayerInfo activationLayerInfo =
         ConvertActivationDescriptorToAclActivationLayerInfo(m_Data.m_Parameters);
 
-    arm_compute::ICLTensor& input  = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
     arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
     m_ActivationLayer.configure(clCompileContext, &input, &output, activationLayerInfo);
 }
 
 void ClActivationWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClActivationWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClActivationWorkload_Execute", this->GetGuid());
     RunClFunction(m_ActivationLayer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClAdditionWorkload.cpp b/src/backends/cl/workloads/ClAdditionWorkload.cpp
index 4793cc6..9bef060 100644
--- a/src/backends/cl/workloads/ClAdditionWorkload.cpp
+++ b/src/backends/cl/workloads/ClAdditionWorkload.cpp
@@ -36,7 +36,7 @@
 
 void ClAdditionWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClAdditionWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClAdditionWorkload_Execute", this->GetGuid());
     RunClFunction(m_Layer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp b/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp
index 7475cfa..78646a7 100644
--- a/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp
+++ b/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp
@@ -57,6 +57,12 @@
                                          const arm_compute::CLCompileContext& clCompileContext)
         : BaseWorkload<ArgMinMaxQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClArgMinMaxWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor();
     arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor();
 
@@ -80,7 +86,7 @@
 
 void ClArgMinMaxWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClArgMinMaxWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClArgMinMaxWorkload_Execute", this->GetGuid());
     RunClFunction(m_ArgMinMaxLayer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp
index 361d6f8..8367d7e 100644
--- a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp
@@ -22,21 +22,21 @@
                                                  const TensorInfo& var,
                                                  const TensorInfo& beta,
                                                  const TensorInfo& gamma,
-                                                 const BatchNormalizationDescriptor& desc,
+                                                 const BatchNormalizationDescriptor& descriptor,
                                                  const ActivationDescriptor* activationDescriptor)
 {
     const arm_compute::TensorInfo aclInputInfo =
-          armcomputetensorutils::BuildArmComputeTensorInfo(input, desc.m_DataLayout);
+        armcomputetensorutils::BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo =
-          armcomputetensorutils::BuildArmComputeTensorInfo(output, desc.m_DataLayout);
+        armcomputetensorutils::BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclMeanInfo =
-          armcomputetensorutils::BuildArmComputeTensorInfo(mean, desc.m_DataLayout);
+        armcomputetensorutils::BuildArmComputeTensorInfo(mean, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclVarInfo =
-          armcomputetensorutils::BuildArmComputeTensorInfo(var, desc.m_DataLayout);
+        armcomputetensorutils::BuildArmComputeTensorInfo(var, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclBetaInfo =
-          armcomputetensorutils::BuildArmComputeTensorInfo(beta, desc.m_DataLayout);
+        armcomputetensorutils::BuildArmComputeTensorInfo(beta, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclGammaInfo =
-          armcomputetensorutils::BuildArmComputeTensorInfo(gamma, desc.m_DataLayout);
+        armcomputetensorutils::BuildArmComputeTensorInfo(gamma, descriptor.m_DataLayout);
 
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
@@ -47,7 +47,7 @@
                                                             &aclVarInfo,
                                                             &aclBetaInfo,
                                                             &aclGammaInfo,
-                                                            desc.m_Eps,
+                                                            descriptor.m_Eps,
                                                             activationInfo);
 }
 
@@ -57,6 +57,12 @@
     const arm_compute::CLCompileContext& clCompileContext)
     : FloatWorkload<BatchNormalizationQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClBatchNormalizationWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     m_Mean = std::make_unique<arm_compute::CLTensor>();
     BuildArmComputeTensor(*m_Mean, m_Data.m_Mean->GetTensorInfo());
 
@@ -103,7 +109,7 @@
 
 void ClBatchNormalizationFloatWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClBatchNormalizationFloatWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClBatchNormalizationFloatWorkload_Execute", this->GetGuid());
     RunClFunction(m_Layer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp
index c9f1f7f..0ba2d97 100644
--- a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp
+++ b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp
@@ -19,7 +19,7 @@
                                                  const TensorInfo& var,
                                                  const TensorInfo& beta,
                                                  const TensorInfo& gamma,
-                                                 const BatchNormalizationDescriptor& desc,
+                                                 const BatchNormalizationDescriptor& descriptor,
                                                  const ActivationDescriptor* activationDescriptor = nullptr);
 
 class ClBatchNormalizationFloatWorkload : public FloatWorkload<BatchNormalizationQueueDescriptor>
diff --git a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp
index b9736db..8eef587 100644
--- a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp
+++ b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp
@@ -17,11 +17,17 @@
 {
 using namespace armcomputetensorutils;
 
-ClBatchToSpaceNdWorkload::ClBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDescriptor& desc,
+ClBatchToSpaceNdWorkload::ClBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDescriptor& descriptor,
                                                    const WorkloadInfo& info,
                                                    const arm_compute::CLCompileContext& clCompileContext)
-   : BaseWorkload<BatchToSpaceNdQueueDescriptor>(desc, info)
+    : BaseWorkload<BatchToSpaceNdQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClBatchToSpaceWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     m_Data.ValidateInputsOutputs("ClBatchToSpaceNdWorkload", 1, 1);
 
     arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
@@ -30,8 +36,8 @@
     input.info()->set_data_layout(aclDataLayout);
 
     // ArmNN blockShape is [H, W] Cl asks for W, H
-    int32_t blockHeight = armnn::numeric_cast<int32_t>(desc.m_Parameters.m_BlockShape[0]);
-    int32_t blockWidth = armnn::numeric_cast<int32_t>(desc.m_Parameters.m_BlockShape[1]);
+    int32_t blockHeight = armnn::numeric_cast<int32_t>(descriptor.m_Parameters.m_BlockShape[0]);
+    int32_t blockWidth = armnn::numeric_cast<int32_t>(descriptor.m_Parameters.m_BlockShape[1]);
 
     arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
     output.info()->set_data_layout(aclDataLayout);
@@ -41,19 +47,20 @@
 
 void ClBatchToSpaceNdWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClBatchToSpaceNdWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClBatchToSpaceNdWorkload_Execute", this->GetGuid());
     RunClFunction(m_Layer, CHECK_LOCATION());
 }
 
 arm_compute::Status ClBatchToSpaceNdWorkloadValidate(const TensorInfo& input,
                                                      const TensorInfo& output,
-                                                     const BatchToSpaceNdDescriptor& desc) {
-    DataLayout dataLayout = desc.m_DataLayout;
+                                                     const BatchToSpaceNdDescriptor& descriptor)
+{
+    DataLayout dataLayout = descriptor.m_DataLayout;
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, dataLayout);
 
     // ArmNN blockShape is [H, W] Cl asks for W, H
-    int32_t blockHeight = armnn::numeric_cast<int32_t>(desc.m_BlockShape[0]);
-    int32_t blockWidth = armnn::numeric_cast<int32_t>(desc.m_BlockShape[1]);
+    int32_t blockHeight = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[0]);
+    int32_t blockWidth = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[1]);
 
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, dataLayout);
 
diff --git a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp
index 2262f33..7ef8300 100644
--- a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp
+++ b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp
@@ -13,7 +13,7 @@
 
 arm_compute::Status ClBatchToSpaceNdWorkloadValidate(const TensorInfo& input,
                                                      const TensorInfo& output,
-                                                     const BatchToSpaceNdDescriptor& desc);
+                                                     const BatchToSpaceNdDescriptor& descriptor);
 
 class ClBatchToSpaceNdWorkload : public BaseWorkload<BatchToSpaceNdQueueDescriptor>
 {
diff --git a/src/backends/cl/workloads/ClCastWorkload.cpp b/src/backends/cl/workloads/ClCastWorkload.cpp
index e995e42..07b76dc 100644
--- a/src/backends/cl/workloads/ClCastWorkload.cpp
+++ b/src/backends/cl/workloads/ClCastWorkload.cpp
@@ -40,7 +40,7 @@
 
 void ClCastWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClCastWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClCastWorkload_Execute", this->GetGuid());
     RunClFunction(m_CastLayer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClComparisonWorkload.cpp b/src/backends/cl/workloads/ClComparisonWorkload.cpp
index 35e6d68..d83682d 100644
--- a/src/backends/cl/workloads/ClComparisonWorkload.cpp
+++ b/src/backends/cl/workloads/ClComparisonWorkload.cpp
@@ -44,6 +44,12 @@
                                            const arm_compute::CLCompileContext& clCompileContext)
     : BaseWorkload<ComparisonQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonComparisonWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     m_Data.ValidateInputsOutputs("ClComparisonWorkload", 2, 1);
 
     arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -57,7 +63,7 @@
 
 void ClComparisonWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClComparisonWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClComparisonWorkload_Execute", this->GetGuid());
     RunClFunction(m_ComparisonLayer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClConcatWorkload.cpp b/src/backends/cl/workloads/ClConcatWorkload.cpp
index 1c2d476..233fd19 100644
--- a/src/backends/cl/workloads/ClConcatWorkload.cpp
+++ b/src/backends/cl/workloads/ClConcatWorkload.cpp
@@ -18,9 +18,9 @@
 
 namespace
 {
-size_t CalcAxis(const OriginsDescriptor& desc)
+size_t CalcAxis(const OriginsDescriptor& descriptor)
 {
-    return (desc.GetNumDimensions() - desc.GetConcatAxis()) - 1;
+    return (descriptor.GetNumDimensions() - descriptor.GetConcatAxis()) - 1;
 }
 } //namespace
 
@@ -50,6 +50,12 @@
                                    const arm_compute::CLCompileContext& clCompileContext)
 : BaseWorkload<ConcatQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClConcatWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     bool allInputsAreSubtensors = true;
 
     // Check that all inputs are sub-tensors
@@ -95,7 +101,7 @@
 {
     if (m_Layer)
     {
-        ARMNN_SCOPED_PROFILING_EVENT_CL("ClConcatWorkload_Execute");
+        ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClConcatWorkload_Execute", this->GetGuid());
         m_Layer->run();
     }
 }
diff --git a/src/backends/cl/workloads/ClConstantWorkload.cpp b/src/backends/cl/workloads/ClConstantWorkload.cpp
index 60dcd59..1ff7504 100644
--- a/src/backends/cl/workloads/ClConstantWorkload.cpp
+++ b/src/backends/cl/workloads/ClConstantWorkload.cpp
@@ -51,7 +51,7 @@
 
 void ClConstantWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClConstantWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClConstantWorkload_Execute", this->GetGuid());
 
     // The intermediate tensor held by the corresponding layer output handler can be initialised with the given data
     // on the first inference, then reused for subsequent inferences.
diff --git a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp
index aaffd83..455ec1a 100644
--- a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp
+++ b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp
@@ -30,7 +30,7 @@
 
 void ClConvertFp16ToFp32Workload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvertFp16ToFp32Workload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClConvertFp16ToFp32Workload_Execute", this->GetGuid());
     RunClFunction(m_Layer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp
index a9f1d91..8e6b0ce 100644
--- a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp
+++ b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp
@@ -30,7 +30,7 @@
 
 void ClConvertFp32ToFp16Workload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvertFp32ToFp16Workload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClConvertFp32ToFp16Workload_Execute", this->GetGuid());
     RunClFunction(m_Layer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
index ab9d5bc..12a47dc 100644
--- a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
+++ b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
@@ -132,7 +132,7 @@
     }
 
     // Report Profiling Details
-    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClConvolution2dWorkload_Execute_Guid",
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClConvolution2dWorkload_Construct",
                                          descriptor.m_Parameters,
                                          detailsInfo,
                                          this->GetGuid());
diff --git a/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp b/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp
index d42b261..aeab029 100644
--- a/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp
+++ b/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp
@@ -21,12 +21,12 @@
 
 arm_compute::Status ClDepthToSpaceWorkloadValidate(const TensorInfo& input,
                                                    const TensorInfo& output,
-                                                   const DepthToSpaceDescriptor& desc)
+                                                   const DepthToSpaceDescriptor& descriptor)
 {
-    DataLayout dataLayout = desc.m_DataLayout;
+    DataLayout dataLayout = descriptor.m_DataLayout;
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, dataLayout);
 
-    int32_t blockSize = armnn::numeric_cast<int32_t>(desc.m_BlockSize);
+    int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_BlockSize);
 
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, dataLayout);
 
@@ -36,11 +36,17 @@
     return aclStatus;
 }
 
-ClDepthToSpaceWorkload::ClDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor& desc,
+ClDepthToSpaceWorkload::ClDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor& descriptor,
                                                const WorkloadInfo& info,
                                                const arm_compute::CLCompileContext& clCompileContext)
-    : BaseWorkload<DepthToSpaceQueueDescriptor>(desc, info)
+    : BaseWorkload<DepthToSpaceQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClDepthToSpaceWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     m_Data.ValidateInputsOutputs("ClDepthToSpaceWorkload", 1, 1);
 
     arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
@@ -49,7 +55,7 @@
         PolymorphicPointerDowncast<IClTensorHandle>(m_Data.m_Inputs[0])->GetTensor();
     input.info()->set_data_layout(aclDataLayout);
 
-    int32_t blockSize = armnn::numeric_cast<int32_t>(desc.m_Parameters.m_BlockSize);
+    int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_Parameters.m_BlockSize);
 
     arm_compute::ICLTensor& output =
         PolymorphicPointerDowncast<IClTensorHandle>(m_Data.m_Outputs[0])->GetTensor();
@@ -60,7 +66,7 @@
 
 void ClDepthToSpaceWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClDepthToSpaceWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClDepthToSpaceWorkload_Execute", this->GetGuid());
     RunClFunction(m_Layer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp b/src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp
index 6cb8bb5..01f8333 100644
--- a/src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp
+++ b/src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp
@@ -16,7 +16,7 @@
 
 arm_compute::Status ClDepthToSpaceWorkloadValidate(const TensorInfo& input,
                                                    const TensorInfo& output,
-                                                   const DepthToSpaceDescriptor& desc);
+                                                   const DepthToSpaceDescriptor& descriptor);
 
 class ClDepthToSpaceWorkload : public BaseWorkload<DepthToSpaceQueueDescriptor>
 {
diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
index 9a9977b..9592b37 100644
--- a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
+++ b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
@@ -78,15 +78,32 @@
     const arm_compute::CLCompileContext& clCompileContext)
     : BaseWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
 {
+    // Add details for profiling output
+    WorkloadInfo detailsInfo;
+
+    detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos;
+    detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos;
+    detailsInfo.m_WeightsTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Weight->GetTensorInfo());
+    if (descriptor.m_Parameters.m_BiasEnabled)
+    {
+        detailsInfo.m_BiasTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Bias->GetTensorInfo());
+    }
+
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClDepthwiseConvolutionWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         detailsInfo,
+                                         this->GetGuid());
+
     // ArmNN's weight format is usually [ M, I, H, W ] but for depthwise its [ 1, H, W, I*M]
     // Permute to [ 1, I * M, H, W ] (if NCHW), as required by the compute library
     ConstTensor weightPermuted;
     unsigned int depthMultiplier;
     std::unique_ptr<unsigned char[]> permuteBuffer(new unsigned char[m_Data.m_Weight->GetTensorInfo().GetNumBytes()]);
     std::tie(weightPermuted, depthMultiplier) = Convert1HWOTensorToAcl(m_Data.m_Weight,
-                                                                        info.m_InputTensorInfos[0],
-                                                                        m_Data.m_Parameters.m_DataLayout,
-                                                                        permuteBuffer.get());
+                                                                       info.m_InputTensorInfos[0],
+                                                                       m_Data.m_Parameters.m_DataLayout,
+                                                                       permuteBuffer.get());
 
     // Convert the weights into the compute library format
     m_KernelTensor = std::make_unique<arm_compute::CLTensor>();
@@ -151,7 +168,7 @@
 
 void ClDepthwiseConvolutionWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClDepthwiseConvolutionWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClDepthwiseConvolutionWorkload_Execute", this->GetGuid());
     ARMNN_ASSERT(m_DepthwiseConvolutionLayer);
 
     RunClFunction(*m_DepthwiseConvolutionLayer, CHECK_LOCATION());
diff --git a/src/backends/cl/workloads/ClDequantizeWorkload.cpp b/src/backends/cl/workloads/ClDequantizeWorkload.cpp
index 42cc579..6bdeaa8 100644
--- a/src/backends/cl/workloads/ClDequantizeWorkload.cpp
+++ b/src/backends/cl/workloads/ClDequantizeWorkload.cpp
@@ -49,7 +49,7 @@
 {
     if (m_Layer)
     {
-        ARMNN_SCOPED_PROFILING_EVENT_CL("ClDequantizeWorkload_Execute");
+        ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClDequantizeWorkload_Execute", this->GetGuid());
         m_Layer->run();
     }
 }
diff --git a/src/backends/cl/workloads/ClDivisionWorkload.cpp b/src/backends/cl/workloads/ClDivisionWorkload.cpp
index 76220a1..d444a19 100644
--- a/src/backends/cl/workloads/ClDivisionWorkload.cpp
+++ b/src/backends/cl/workloads/ClDivisionWorkload.cpp
@@ -49,7 +49,7 @@
 
 void ClDivisionWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClDivisionWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClDivisionWorkload_Execute", this->GetGuid());
     RunClFunction(m_ArithmeticDivision, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClExpWorkload.cpp b/src/backends/cl/workloads/ClExpWorkload.cpp
index 60c383f..9c1f036 100644
--- a/src/backends/cl/workloads/ClExpWorkload.cpp
+++ b/src/backends/cl/workloads/ClExpWorkload.cpp
@@ -28,6 +28,12 @@
                              const arm_compute::CLCompileContext& clCompileContext)
     : BaseWorkload<ElementwiseUnaryQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClExpWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     m_Data.ValidateInputsOutputs("ClExpWorkload", 1, 1);
 
     arm_compute::ICLTensor& input  = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -38,7 +44,7 @@
 
 void ClExpWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClExpWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClExpWorkload_Execute", this->GetGuid());
     RunClFunction(m_ExpLayer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClFillWorkload.cpp b/src/backends/cl/workloads/ClFillWorkload.cpp
index a2204fa..8cb2db4 100644
--- a/src/backends/cl/workloads/ClFillWorkload.cpp
+++ b/src/backends/cl/workloads/ClFillWorkload.cpp
@@ -20,6 +20,12 @@
                                const arm_compute::CLCompileContext& clCompileContext)
     : BaseWorkload<FillQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClFillWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     m_Data.ValidateInputsOutputs("ClFillWorkload", 1, 1);
 
     arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor();
@@ -30,7 +36,7 @@
 
 void ClFillWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClFillWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClFillWorkload_Execute", this->GetGuid());
     RunClFunction(m_Layer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClFloorFloatWorkload.cpp b/src/backends/cl/workloads/ClFloorFloatWorkload.cpp
index 3915270..d2b4871 100644
--- a/src/backends/cl/workloads/ClFloorFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClFloorFloatWorkload.cpp
@@ -35,7 +35,7 @@
 
 void ClFloorFloatWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClFloorFloatWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClFloorFloatWorkload_Execute", this->GetGuid());
     RunClFunction(m_Layer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
index d1d911a..a0889e1 100644
--- a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
+++ b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
@@ -28,10 +28,10 @@
     const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);
 
     arm_compute::TensorInfo aclBiases;
-    arm_compute::TensorInfo *optionalAclBiases = nullptr;
+    arm_compute::TensorInfo* optionalAclBiases = nullptr;
     if (descriptor.m_BiasEnabled)
     {
-        aclBiases  = BuildArmComputeTensorInfo(biases);
+        aclBiases = BuildArmComputeTensorInfo(biases);
         optionalAclBiases = &aclBiases;
     }
 
@@ -50,9 +50,25 @@
     const WorkloadInfo& info,
     std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
     const arm_compute::CLCompileContext& clCompileContext)
-    : BaseWorkload<FullyConnectedQueueDescriptor>(descriptor, info)
-    , m_FullyConnectedLayer(memoryManager)
+    : BaseWorkload<FullyConnectedQueueDescriptor>(descriptor, info), m_FullyConnectedLayer(memoryManager)
 {
+    // Add details for profiling output
+    WorkloadInfo detailsInfo;
+
+    detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos;
+    detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos;
+    detailsInfo.m_WeightsTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Weight->GetTensorInfo());
+    if (descriptor.m_Parameters.m_BiasEnabled)
+    {
+        detailsInfo.m_BiasTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Bias->GetTensorInfo());
+    }
+
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClFullyConnectedWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         detailsInfo,
+                                         this->GetGuid());
+
     m_WeightsTensor = std::make_unique<arm_compute::CLTensor>();
     BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo());
 
@@ -64,13 +80,13 @@
 
     m_Data.ValidateInputsOutputs("ClFullyConnectedWorkload", 1, 1);
 
-    arm_compute::ICLTensor& input  = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
     arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
 
     arm_compute::FullyConnectedLayerInfo fc_info =
-            ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo);
+        ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo);
 
     m_FullyConnectedLayer.configure(clCompileContext,
                                     &input,
@@ -94,7 +110,7 @@
 
 void ClFullyConnectedWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClFullyConnectedWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClFullyConnectedWorkload_Execute", this->GetGuid());
     RunClFunction(m_FullyConnectedLayer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClGatherWorkload.cpp b/src/backends/cl/workloads/ClGatherWorkload.cpp
index 98dfe7b..7c8d1ab 100644
--- a/src/backends/cl/workloads/ClGatherWorkload.cpp
+++ b/src/backends/cl/workloads/ClGatherWorkload.cpp
@@ -31,6 +31,12 @@
                                    const arm_compute::CLCompileContext& clCompileContext)
         : BaseWorkload<GatherQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClGatherWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     m_Data.ValidateInputsOutputs("ClGatherWorkload", 1, 1);
 
     arm_compute::ICLTensor& input    = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -44,7 +50,7 @@
 
 void ClGatherWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClGatherWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClGatherWorkload_Execute", this->GetGuid());
     RunClFunction(m_Layer, CHECK_LOCATION());
 }
 } // namespace armnn
diff --git a/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp b/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp
index a0e8e7b..a4f20c5 100644
--- a/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp
+++ b/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp
@@ -35,6 +35,12 @@
     const arm_compute::CLCompileContext& clCompileContext)
     : BaseWorkload<InstanceNormalizationQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClInstanceNormalizationWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     m_Data.ValidateInputsOutputs("ClInstanceNormalizationWorkload", 1, 1);
 
     arm_compute::ICLTensor& input  = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -54,7 +60,7 @@
 
 void ClInstanceNormalizationWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClInstanceNormalizationWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClInstanceNormalizationWorkload_Execute", this->GetGuid());
     RunClFunction(m_Layer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp
index 984f21a..953ff4a 100644
--- a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp
@@ -31,6 +31,12 @@
                                                                const arm_compute::CLCompileContext& clCompileContext)
     : FloatWorkload<L2NormalizationQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClL2NormalizationFloatWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     m_Data.ValidateInputsOutputs("ClL2NormalizationFloatWorkload", 1, 1);
 
     arm_compute::ICLTensor& input  = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -47,7 +53,7 @@
 
 void ClL2NormalizationFloatWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClL2NormalizationFloatWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClL2NormalizationFloatWorkload_Execute", this->GetGuid());
     RunClFunction(m_Layer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp b/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp
index 1a255f1..6c03211 100644
--- a/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp
+++ b/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp
@@ -32,6 +32,12 @@
         : BaseWorkload<LogSoftmaxQueueDescriptor>(descriptor, info)
         , m_LogSoftmaxLayer(memoryManager)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClLogSoftmaxWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     m_Data.ValidateInputsOutputs("ClLogSoftmaxWorkload", 1, 1);
 
     arm_compute::ICLTensor& input  = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -43,7 +49,7 @@
 
 void ClLogSoftmaxWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClLogSoftmaxWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClLogSoftmaxWorkload_Execute", this->GetGuid());
     RunClFunction(m_LogSoftmaxLayer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClLogWorkload.cpp b/src/backends/cl/workloads/ClLogWorkload.cpp
index b35345f..180c0af 100644
--- a/src/backends/cl/workloads/ClLogWorkload.cpp
+++ b/src/backends/cl/workloads/ClLogWorkload.cpp
@@ -38,7 +38,7 @@
 
 void ClLogWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClLogWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClLogWorkload_Execute", this->GetGuid());
     RunClFunction(m_LogLayer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClLogicalAndWorkload.cpp b/src/backends/cl/workloads/ClLogicalAndWorkload.cpp
index f04cede..30a187b 100644
--- a/src/backends/cl/workloads/ClLogicalAndWorkload.cpp
+++ b/src/backends/cl/workloads/ClLogicalAndWorkload.cpp
@@ -36,6 +36,12 @@
                                            const arm_compute::CLCompileContext& clCompileContext)
     : BaseWorkload<LogicalBinaryQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClLogicalAndWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     m_Data.ValidateInputsOutputs("ClLogicalAndWorkload", 2, 1);
 
     arm_compute::ICLTensor& input0 = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -47,7 +53,7 @@
 
 void ClLogicalAndWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClLogicalAndWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClLogicalAndWorkload_Execute", this->GetGuid());
     m_LogicalAndLayer.run();
 }
 
diff --git a/src/backends/cl/workloads/ClLogicalNotWorkload.cpp b/src/backends/cl/workloads/ClLogicalNotWorkload.cpp
index 475e57f..4e95fcd 100644
--- a/src/backends/cl/workloads/ClLogicalNotWorkload.cpp
+++ b/src/backends/cl/workloads/ClLogicalNotWorkload.cpp
@@ -33,6 +33,12 @@
                                            const arm_compute::CLCompileContext& clCompileContext)
     : BaseWorkload<ElementwiseUnaryQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClLogicalNotWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     m_Data.ValidateInputsOutputs("ClLogicalNotWorkload", 1, 1);
 
     arm_compute::ICLTensor& input  = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -43,7 +49,7 @@
 
 void ClLogicalNotWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClLogicalNotWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClLogicalNotWorkload_Execute", this->GetGuid());
     m_LogicalNotLayer.run();
 }
 
diff --git a/src/backends/cl/workloads/ClLogicalOrWorkload.cpp b/src/backends/cl/workloads/ClLogicalOrWorkload.cpp
index 355310e..b4eb11c 100644
--- a/src/backends/cl/workloads/ClLogicalOrWorkload.cpp
+++ b/src/backends/cl/workloads/ClLogicalOrWorkload.cpp
@@ -36,6 +36,12 @@
                                          const arm_compute::CLCompileContext& clCompileContext)
     : BaseWorkload<LogicalBinaryQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClLogicalOrWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     m_Data.ValidateInputsOutputs("ClLogicalOrWorkload", 2, 1);
 
     arm_compute::ICLTensor& input0 = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -47,7 +53,7 @@
 
 void ClLogicalOrWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClLogicalOrWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClLogicalOrWorkload_Execute", this->GetGuid());
     m_LogicalOrLayer.run();
 }
 
diff --git a/src/backends/cl/workloads/ClLstmFloatWorkload.cpp b/src/backends/cl/workloads/ClLstmFloatWorkload.cpp
index 908f20b..709b145 100644
--- a/src/backends/cl/workloads/ClLstmFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClLstmFloatWorkload.cpp
@@ -24,6 +24,12 @@
                                          const arm_compute::CLCompileContext& clCompileContext)
         : FloatWorkload<LstmQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClLstmFloatWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     arm_compute::LSTMParams<arm_compute::ICLTensor> lstm_param;
 
     // Basic parameters
@@ -254,7 +260,7 @@
 
 void ClLstmFloatWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClLstmFloatWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClLstmFloatWorkload_Execute", this->GetGuid());
     RunClFunction(m_LstmLayer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClMaximumWorkload.cpp b/src/backends/cl/workloads/ClMaximumWorkload.cpp
index 0aa15e5..5a19c69 100644
--- a/src/backends/cl/workloads/ClMaximumWorkload.cpp
+++ b/src/backends/cl/workloads/ClMaximumWorkload.cpp
@@ -52,7 +52,7 @@
 
 void ClMaximumWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClMaximumWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClMaximumWorkload_Execute", this->GetGuid());
     RunClFunction(m_MaximumLayer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClMeanWorkload.cpp b/src/backends/cl/workloads/ClMeanWorkload.cpp
index 4cc0f7c..cd79d04 100644
--- a/src/backends/cl/workloads/ClMeanWorkload.cpp
+++ b/src/backends/cl/workloads/ClMeanWorkload.cpp
@@ -16,16 +16,16 @@
 
 arm_compute::Status ClMeanValidate(const TensorInfo& input,
                                    const TensorInfo& output,
-                                   const MeanDescriptor& desc)
+                                   const MeanDescriptor& descriptor)
 {
     const arm_compute::TensorInfo aclInputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
                                                                           input.GetNumDimensions(),
-                                                                          desc.m_Axis);
+                                                                          descriptor.m_Axis);
 
-    return arm_compute::CLReduceMean::validate(&aclInputInfo, coords, desc.m_KeepDims, &aclOutputInfo);
+    return arm_compute::CLReduceMean::validate(&aclInputInfo, coords, descriptor.m_KeepDims, &aclOutputInfo);
 }
 
 ClMeanWorkload::ClMeanWorkload(const MeanQueueDescriptor& descriptor,
@@ -33,6 +33,11 @@
                                const arm_compute::CLCompileContext& clCompileContext)
     : BaseWorkload<MeanQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClMeanWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
     m_Data.ValidateInputsOutputs("ClMeanWorkload", 1, 1);
 
     arm_compute::ICLTensor& input  = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -47,7 +52,7 @@
 
 void ClMeanWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClMeanWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClMeanWorkload_Execute", this->GetGuid());
     m_Layer.run();
 }
 
diff --git a/src/backends/cl/workloads/ClMeanWorkload.hpp b/src/backends/cl/workloads/ClMeanWorkload.hpp
index 04e9fe2..c9229ac 100644
--- a/src/backends/cl/workloads/ClMeanWorkload.hpp
+++ b/src/backends/cl/workloads/ClMeanWorkload.hpp
@@ -14,7 +14,7 @@
 
 arm_compute::Status ClMeanValidate(const TensorInfo& input,
                                    const TensorInfo& output,
-                                   const MeanDescriptor& desc);
+                                   const MeanDescriptor& descriptor);
 
 class ClMeanWorkload : public BaseWorkload<MeanQueueDescriptor>
 {
diff --git a/src/backends/cl/workloads/ClMinimumWorkload.cpp b/src/backends/cl/workloads/ClMinimumWorkload.cpp
index 4924002..22e9287 100644
--- a/src/backends/cl/workloads/ClMinimumWorkload.cpp
+++ b/src/backends/cl/workloads/ClMinimumWorkload.cpp
@@ -52,7 +52,7 @@
 
 void ClMinimumWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClMinimumWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClMinimumWorkload_Execute", this->GetGuid());
     RunClFunction(m_MinimumLayer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClMultiplicationWorkload.cpp b/src/backends/cl/workloads/ClMultiplicationWorkload.cpp
index 2bd1e16..b0b71ce 100644
--- a/src/backends/cl/workloads/ClMultiplicationWorkload.cpp
+++ b/src/backends/cl/workloads/ClMultiplicationWorkload.cpp
@@ -75,7 +75,7 @@
 
 void ClMultiplicationWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClMultiplicationWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClMultiplicationWorkload_Execute", this->GetGuid());
     RunClFunction(m_PixelWiseMultiplication, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClNegWorkload.cpp b/src/backends/cl/workloads/ClNegWorkload.cpp
index 7505ab6..fb5b040 100644
--- a/src/backends/cl/workloads/ClNegWorkload.cpp
+++ b/src/backends/cl/workloads/ClNegWorkload.cpp
@@ -38,7 +38,7 @@
 
 void ClNegWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClNegWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClNegWorkload_Execute", this->GetGuid());
     RunClFunction(m_NegLayer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp
index e9b2caf..9c6e0a1 100644
--- a/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp
@@ -33,6 +33,12 @@
                                                            const arm_compute::CLCompileContext& clCompileContext)
     : FloatWorkload<NormalizationQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClNormalizationWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     m_Data.ValidateInputsOutputs("ClNormalizationFloatWorkload", 1, 1);
 
     arm_compute::ICLTensor& input  = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -49,7 +55,7 @@
 
 void ClNormalizationFloatWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClNormalizationFloatWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClNormalizationFloatWorkload_Execute", this->GetGuid());
     RunClFunction(m_NormalizationLayer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClPadWorkload.cpp b/src/backends/cl/workloads/ClPadWorkload.cpp
index 533855c..10c8907 100644
--- a/src/backends/cl/workloads/ClPadWorkload.cpp
+++ b/src/backends/cl/workloads/ClPadWorkload.cpp
@@ -20,6 +20,12 @@
                              const arm_compute::CLCompileContext& clCompileContext)
     : BaseWorkload<PadQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClPadWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     this->m_Data.ValidateInputsOutputs("ClPadWorkload", 1, 1);
 
     arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor();
@@ -40,7 +46,7 @@
 
 void ClPadWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClPadWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClPadWorkload_Execute", this->GetGuid());
     RunClFunction(m_Layer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClPermuteWorkload.cpp b/src/backends/cl/workloads/ClPermuteWorkload.cpp
index 5aadc76..c7efe7a 100644
--- a/src/backends/cl/workloads/ClPermuteWorkload.cpp
+++ b/src/backends/cl/workloads/ClPermuteWorkload.cpp
@@ -31,6 +31,12 @@
                                      const arm_compute::CLCompileContext& clCompileContext)
     : BaseWorkload<PermuteQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClPermuteWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     using armcomputetensorutils::BuildArmComputePermutationVector;
 
     m_Data.ValidateInputsOutputs(GetName(), 1, 1);
@@ -45,7 +51,7 @@
 
 void ClPermuteWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL( GetName() + "_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID(GetName() + "_Execute", this->GetGuid());
     RunClFunction(m_PermuteFunction, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClPooling2dWorkload.cpp b/src/backends/cl/workloads/ClPooling2dWorkload.cpp
index c7cc102..ff441ef 100644
--- a/src/backends/cl/workloads/ClPooling2dWorkload.cpp
+++ b/src/backends/cl/workloads/ClPooling2dWorkload.cpp
@@ -33,6 +33,12 @@
     const arm_compute::CLCompileContext& clCompileContext)
     : BaseWorkload<Pooling2dQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClPooling2dWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     m_Data.ValidateInputsOutputs("ClPooling2dWorkload", 1, 1);
 
     arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -55,7 +61,7 @@
 
 void ClPooling2dWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClPooling2dWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClPooling2dWorkload_Execute", this->GetGuid());
     RunClFunction(m_PoolingLayer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClPreluWorkload.cpp b/src/backends/cl/workloads/ClPreluWorkload.cpp
index 9b45441..beb9e43 100644
--- a/src/backends/cl/workloads/ClPreluWorkload.cpp
+++ b/src/backends/cl/workloads/ClPreluWorkload.cpp
@@ -42,7 +42,7 @@
 
 void ClPreluWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClPreluWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClPreluWorkload_Execute", this->GetGuid());
     RunClFunction(m_PreluLayer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClQLstmWorkload.cpp b/src/backends/cl/workloads/ClQLstmWorkload.cpp
index 0ae3715..d7c7af7 100644
--- a/src/backends/cl/workloads/ClQLstmWorkload.cpp
+++ b/src/backends/cl/workloads/ClQLstmWorkload.cpp
@@ -19,6 +19,12 @@
                                  const arm_compute::CLCompileContext& clCompileContext)
     : BaseWorkload<QLstmQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClQLstmWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     arm_compute::LSTMParams<arm_compute::ICLTensor> qLstmParams;
 
     // Mandatory params
@@ -231,6 +237,7 @@
 
 void ClQLstmWorkload::Execute() const
 {
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClQuantizedLstmWorkload_Execute", this->GetGuid());
     m_QLstmLayer.run();
 }
 
diff --git a/src/backends/cl/workloads/ClQuantizeWorkload.cpp b/src/backends/cl/workloads/ClQuantizeWorkload.cpp
index 527c640..dc668fd 100644
--- a/src/backends/cl/workloads/ClQuantizeWorkload.cpp
+++ b/src/backends/cl/workloads/ClQuantizeWorkload.cpp
@@ -44,7 +44,7 @@
 
 void ClQuantizeWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClQuantizeWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClQuantizeWorkload_Execute", this->GetGuid());
     RunClFunction(m_Layer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp b/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp
index d50414b..7bacf70 100644
--- a/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp
+++ b/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp
@@ -137,7 +137,7 @@
 
 void ClQuantizedLstmWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClQuantizedLstmWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClQuantizedLstmWorkload_Execute", this->GetGuid());
     RunClFunction(m_QuantizedLstmLayer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClReduceWorkload.cpp b/src/backends/cl/workloads/ClReduceWorkload.cpp
index c9c2e0a..1a7bc64 100644
--- a/src/backends/cl/workloads/ClReduceWorkload.cpp
+++ b/src/backends/cl/workloads/ClReduceWorkload.cpp
@@ -17,28 +17,28 @@
 
 arm_compute::Status ClReduceWorkloadValidate(const TensorInfo& input,
                                              const TensorInfo& output,
-                                             const ReduceDescriptor& desc)
+                                             const ReduceDescriptor& descriptor)
 {
-    if ( desc.m_vAxis.size()==1 || desc.m_vAxis.empty())
+    if (descriptor.m_vAxis.size() == 1 || descriptor.m_vAxis.empty())
     {
-        const arm_compute::TensorInfo aclInputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
+        const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
         const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
         arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
                                                                               input.GetNumDimensions(),
-                                                                              desc.m_vAxis);
+                                                                              descriptor.m_vAxis);
 
         return arm_compute::CLReductionOperation::validate(&aclInputInfo,
                                                            &aclOutputInfo,
                                                            static_cast<unsigned int>(coords[0]),
-                                                           ConvertReductionOperationToAcl(desc),
-                                                           desc.m_KeepDims);
+                                                           ConvertReductionOperationToAcl(descriptor),
+                                                           descriptor.m_KeepDims);
     }
     else
     {
         // Validate layer if there are multiple axes.
         arm_compute::Status status;
-        IS_MULTI_AXES_REDUCE_SUPPORTED(ClReduceWorkloadValidate, input, desc, status);
+        IS_MULTI_AXES_REDUCE_SUPPORTED(ClReduceWorkloadValidate, input, descriptor, status);
         return status;
     }
 }
@@ -46,6 +46,12 @@
 ClReduceWorkload::ClReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info)
     : BaseWorkload<ReduceQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClReduceWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     m_Data.ValidateInputsOutputs("ClReduceWorkload", 1, 1);
 
     arm_compute::ICLTensor& input  = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -63,7 +69,7 @@
 
 void ClReduceWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClReduceWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClReduceWorkload_Execute", this->GetGuid());
     m_Layer.run();
 }
 
diff --git a/src/backends/cl/workloads/ClReduceWorkload.hpp b/src/backends/cl/workloads/ClReduceWorkload.hpp
index 8481eee..8b0aadb 100644
--- a/src/backends/cl/workloads/ClReduceWorkload.hpp
+++ b/src/backends/cl/workloads/ClReduceWorkload.hpp
@@ -14,7 +14,7 @@
 
 arm_compute::Status ClReduceWorkloadValidate(const TensorInfo& input,
                                              const TensorInfo& output,
-                                             const ReduceDescriptor& desc);
+                                             const ReduceDescriptor& descriptor);
 
 class ClReduceWorkload : public BaseWorkload<ReduceQueueDescriptor>
 {
diff --git a/src/backends/cl/workloads/ClReshapeWorkload.cpp b/src/backends/cl/workloads/ClReshapeWorkload.cpp
index 1f82cfb..b9b92a8 100644
--- a/src/backends/cl/workloads/ClReshapeWorkload.cpp
+++ b/src/backends/cl/workloads/ClReshapeWorkload.cpp
@@ -36,7 +36,7 @@
 
 void ClReshapeWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClReshapeWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClReshapeWorkload_Execute", this->GetGuid());
     RunClFunction(m_Layer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClResizeWorkload.cpp b/src/backends/cl/workloads/ClResizeWorkload.cpp
index 3406011..0c2b930 100644
--- a/src/backends/cl/workloads/ClResizeWorkload.cpp
+++ b/src/backends/cl/workloads/ClResizeWorkload.cpp
@@ -51,6 +51,12 @@
                                    const arm_compute::CLCompileContext& clCompileContext)
   : BaseWorkload<ResizeQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClResizeWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     m_Data.ValidateInputsOutputs("ClResizeWorkload", 1, 1);
 
     arm_compute::ICLTensor& input  = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -81,7 +87,7 @@
 
 void ClResizeWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClResizeWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClResizeWorkload_Execute", this->GetGuid());
     RunClFunction(m_ResizeLayer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClRsqrtWorkload.cpp b/src/backends/cl/workloads/ClRsqrtWorkload.cpp
index a3a04c1..8d48bfa 100644
--- a/src/backends/cl/workloads/ClRsqrtWorkload.cpp
+++ b/src/backends/cl/workloads/ClRsqrtWorkload.cpp
@@ -38,7 +38,7 @@
 
 void ClRsqrtWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClRsqrtWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClRsqrtWorkload_Execute", this->GetGuid());
     RunClFunction(m_RsqrtLayer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClSinWorkload.cpp b/src/backends/cl/workloads/ClSinWorkload.cpp
index 17572c6..dcde349 100644
--- a/src/backends/cl/workloads/ClSinWorkload.cpp
+++ b/src/backends/cl/workloads/ClSinWorkload.cpp
@@ -38,7 +38,7 @@
 
 void ClSinWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClSinWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClSinWorkload_Execute", this->GetGuid());
     RunClFunction(m_SinLayer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClSliceWorkload.cpp b/src/backends/cl/workloads/ClSliceWorkload.cpp
index 1627196..6f3c1a9 100644
--- a/src/backends/cl/workloads/ClSliceWorkload.cpp
+++ b/src/backends/cl/workloads/ClSliceWorkload.cpp
@@ -35,6 +35,12 @@
                                  const arm_compute::CLCompileContext& clCompileContext)
     : BaseWorkload<SliceQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClSliceWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     m_Data.ValidateInputsOutputs("ClSliceWorkload", 1, 1);
 
     arm_compute::ICLTensor& input  = PolymorphicDowncast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -50,7 +56,7 @@
 
 void ClSliceWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClSliceWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClSliceWorkload_Execute", this->GetGuid());
     RunClFunction(m_SliceFunction, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClSoftmaxWorkload.cpp b/src/backends/cl/workloads/ClSoftmaxWorkload.cpp
index 4547c68..0b7b10d 100644
--- a/src/backends/cl/workloads/ClSoftmaxWorkload.cpp
+++ b/src/backends/cl/workloads/ClSoftmaxWorkload.cpp
@@ -32,6 +32,12 @@
         : BaseWorkload<SoftmaxQueueDescriptor>(descriptor, info)
         , m_SoftmaxLayer(memoryManager)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClSoftmaxWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     m_Data.ValidateInputsOutputs("ClSoftmaxWorkload", 1, 1);
 
     arm_compute::ICLTensor& input  = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -43,7 +49,7 @@
 
 void ClSoftmaxWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClSoftmaxWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClSoftmaxWorkload_Execute", this->GetGuid());
     RunClFunction(m_SoftmaxLayer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp b/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp
index 3aa8ebd..7016619 100644
--- a/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp
+++ b/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp
@@ -50,6 +50,12 @@
     const arm_compute::CLCompileContext& clCompileContext)
     : BaseWorkload<SpaceToBatchNdQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClSpaceToBatchNdWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     m_Data.ValidateInputsOutputs("ClSpaceToBatchNdWorkload", 1, 1);
 
     arm_compute::ICLTensor& input  =
@@ -81,7 +87,7 @@
 
 void ClSpaceToBatchNdWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClSpaceToBatchNdWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClSpaceToBatchNdWorkload_Execute", this->GetGuid());
     RunClFunction(m_SpaceToBatchLayer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp b/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp
index 67487c4..119605a 100644
--- a/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp
+++ b/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp
@@ -17,11 +17,16 @@
 {
 using namespace armcomputetensorutils;
 
-ClSpaceToDepthWorkload::ClSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor& desc,
+ClSpaceToDepthWorkload::ClSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor& descriptor,
                                                const WorkloadInfo& info,
                                                const arm_compute::CLCompileContext& clCompileContext)
-    : BaseWorkload<SpaceToDepthQueueDescriptor>(desc, info)
+    : BaseWorkload<SpaceToDepthQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClSpaceToDepthWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
     m_Data.ValidateInputsOutputs("ClSpaceToDepthWorkload", 1, 1);
 
     arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
@@ -29,7 +34,7 @@
     arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
     input.info()->set_data_layout(aclDataLayout);
 
-    int32_t blockSize = armnn::numeric_cast<int32_t>(desc.m_Parameters.m_BlockSize);
+    int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_Parameters.m_BlockSize);
 
     arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
     output.info()->set_data_layout(aclDataLayout);
@@ -39,18 +44,18 @@
 
 void ClSpaceToDepthWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClSpaceToDepthWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClSpaceToDepthWorkload_Execute", this->GetGuid());
     RunClFunction(m_Layer, CHECK_LOCATION());
 }
 
 arm_compute::Status ClSpaceToDepthWorkloadValidate(const TensorInfo& input,
                                                    const TensorInfo& output,
-                                                   const SpaceToDepthDescriptor& desc)
+                                                   const SpaceToDepthDescriptor& descriptor)
 {
-    DataLayout dataLayout = desc.m_DataLayout;
+    DataLayout dataLayout = descriptor.m_DataLayout;
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, dataLayout);
 
-    int32_t blockSize = armnn::numeric_cast<int32_t>(desc.m_BlockSize);
+    int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_BlockSize);
 
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, dataLayout);
 
diff --git a/src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp b/src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp
index b782bbe..3674bda 100644
--- a/src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp
+++ b/src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp
@@ -14,7 +14,7 @@
 {
 arm_compute::Status ClSpaceToDepthWorkloadValidate(const TensorInfo& input,
                                                    const TensorInfo& output,
-                                                   const SpaceToDepthDescriptor& desc);
+                                                   const SpaceToDepthDescriptor& descriptor);
 
 class ClSpaceToDepthWorkload : public BaseWorkload<SpaceToDepthQueueDescriptor>
 {
diff --git a/src/backends/cl/workloads/ClSplitterWorkload.cpp b/src/backends/cl/workloads/ClSplitterWorkload.cpp
index 8eb58c9..b1ab17d 100644
--- a/src/backends/cl/workloads/ClSplitterWorkload.cpp
+++ b/src/backends/cl/workloads/ClSplitterWorkload.cpp
@@ -57,6 +57,11 @@
                                        const arm_compute::CLCompileContext&)
         : BaseWorkload<SplitterQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClSplitterWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
     bool allOutputsAreSubtensors = true;
 
     // Check that all outputs are sub-tensors
@@ -109,7 +114,7 @@
 {
     if (m_Layer)
     {
-        ARMNN_SCOPED_PROFILING_EVENT_CL("ClSplitterWorkload_Execute");
+        ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClSplitterWorkload_Execute", this->GetGuid());
         m_Layer->run();
     }
 }
diff --git a/src/backends/cl/workloads/ClStackWorkload.cpp b/src/backends/cl/workloads/ClStackWorkload.cpp
index 38c76eb..5070356 100644
--- a/src/backends/cl/workloads/ClStackWorkload.cpp
+++ b/src/backends/cl/workloads/ClStackWorkload.cpp
@@ -49,6 +49,12 @@
                                  const arm_compute::CLCompileContext& clCompileContext)
 : BaseWorkload<StackQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClStackWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     std::vector<arm_compute::ICLTensor*> aclInputs;
     for (auto input : m_Data.m_Inputs)
     {
@@ -67,7 +73,7 @@
 {
     if (m_Layer)
     {
-        ARMNN_SCOPED_PROFILING_EVENT_CL("ClStackWorkload_Execute");
+        ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClStackWorkload_Execute", this->GetGuid());
         m_Layer->run();
     }
 }
diff --git a/src/backends/cl/workloads/ClStridedSliceWorkload.cpp b/src/backends/cl/workloads/ClStridedSliceWorkload.cpp
index adf32ce..51a77c5 100644
--- a/src/backends/cl/workloads/ClStridedSliceWorkload.cpp
+++ b/src/backends/cl/workloads/ClStridedSliceWorkload.cpp
@@ -57,6 +57,12 @@
                                                const arm_compute::CLCompileContext& clCompileContext)
     : BaseWorkload<StridedSliceQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClStridedSliceWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     m_Data.ValidateInputsOutputs("ClStridedSliceWorkload", 1, 1);
 
     arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -92,7 +98,7 @@
 
 void ClStridedSliceWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClStridedSliceWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClStridedSliceWorkload_Execute", this->GetGuid());
     RunClFunction(m_StridedSliceLayer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClSubtractionWorkload.cpp b/src/backends/cl/workloads/ClSubtractionWorkload.cpp
index e320fec..6465e3e 100644
--- a/src/backends/cl/workloads/ClSubtractionWorkload.cpp
+++ b/src/backends/cl/workloads/ClSubtractionWorkload.cpp
@@ -36,7 +36,7 @@
 
 void ClSubtractionWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClSubtractionWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClSubtractionWorkload_Execute", this->GetGuid());
     RunClFunction(m_Layer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp
index b40b4b1..0b5c7c6 100644
--- a/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp
+++ b/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp
@@ -61,6 +61,23 @@
     BaseWorkload<TransposeConvolution2dQueueDescriptor>(descriptor, info),
     m_Layer(memoryManager)
 {
+    // Add details for profiling output
+    WorkloadInfo detailsInfo;
+
+    detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos;
+    detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos;
+    detailsInfo.m_WeightsTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Weight->GetTensorInfo());
+    if (descriptor.m_Parameters.m_BiasEnabled)
+    {
+        detailsInfo.m_BiasTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Bias->GetTensorInfo());
+    }
+
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClTransposeConvolutionWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         detailsInfo,
+                                         this->GetGuid());
+
     const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo();
 
     m_WeightsTensor = std::make_unique<arm_compute::CLTensor>();
@@ -98,7 +115,7 @@
 
 void ClTransposeConvolution2dWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL("ClTransposeConvolution2dWorkload_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClTransposeConvolution2dWorkload_Execute", this->GetGuid());
     RunClFunction(m_Layer, CHECK_LOCATION());
 }
 
diff --git a/src/backends/cl/workloads/ClTransposeWorkload.cpp b/src/backends/cl/workloads/ClTransposeWorkload.cpp
index 7ef502e..d80eae8 100644
--- a/src/backends/cl/workloads/ClTransposeWorkload.cpp
+++ b/src/backends/cl/workloads/ClTransposeWorkload.cpp
@@ -31,6 +31,12 @@
                                          const arm_compute::CLCompileContext& clCompileContext)
     : BaseWorkload<TransposeQueueDescriptor>(descriptor, info)
 {
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClTransposeWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         info,
+                                         this->GetGuid());
+
     m_Data.ValidateInputsOutputs(GetName(), 1, 1);
 
     const arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -45,7 +51,7 @@
 
 void ClTransposeWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL(GetName() + "_Execute");
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID(GetName() + "_Execute", this->GetGuid());
     RunClFunction(m_PermuteFunction, CHECK_LOCATION());
 }