IVGCVSW-5980 JSON profiling output

 * Add new ProfilingDetails class to construct operator details string
 * Add new macro which helps append layer details to ostream
 * Add ProfilingEnabled to NetworkProperties so that profiling can be
   realised when loading the network
 * Add further optional info to WorkloadInfo specific to convolutions
 * Generalise some JsonPrinter functions into JsonUtils for reusability
 * Remove explicit enabling of profiling within InferenceModel as it is
   done when loading network
 * Add ProfilingDetails macros to ConvolutionWorkloads for validation

Signed-off-by: Keith Davis <keith.davis@arm.com>
Change-Id: Ie84bc7dc667e72e6bcb635544f9ead7af1765690
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0156a19..2b0c952 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -377,6 +377,7 @@
     src/armnn/OutputHandler.hpp
     src/armnn/Profiling.cpp
     src/armnn/ProfilingEvent.cpp
+    src/armnn/ProfilingDetails.hpp
     src/armnn/ProfilingEvent.hpp
     src/armnn/Profiling.hpp
     src/armnn/Runtime.cpp
diff --git a/include/armnn/IProfiler.hpp b/include/armnn/IProfiler.hpp
index ac422b7..1d35085 100644
--- a/include/armnn/IProfiler.hpp
+++ b/include/armnn/IProfiler.hpp
@@ -16,6 +16,8 @@
 class BackendId;
 class Instrument;
 class Event;
+struct WorkloadInfo;
+
 class IProfiler
 {
 public:
@@ -41,12 +43,22 @@
     IProfiler();
 
 private:
+
     using InstrumentPtr = std::unique_ptr<Instrument>;
+
+    template<typename DescriptorType>
+    void AddLayerDetails(const std::string& name,
+                         const DescriptorType& desc,
+                         const WorkloadInfo& infos);
+
     Event* BeginEvent(const BackendId& backendId,
                       const std::string& label,
                       std::vector<InstrumentPtr>&& instruments);
+
     std::unique_ptr<ProfilerImpl> pProfilerImpl;
+
     friend class ScopedProfilingEvent;
+    friend class ScopedProfilingUpdateDescriptions;
 
     // Friend functions for unit testing, see ProfilerTests.cpp.
     friend size_t GetProfilerEventSequenceSize(armnn::IProfiler* profiler);
diff --git a/include/armnn/IRuntime.hpp b/include/armnn/IRuntime.hpp
index fcb8c05..1bae943 100644
--- a/include/armnn/IRuntime.hpp
+++ b/include/armnn/IRuntime.hpp
@@ -33,44 +33,52 @@
     ARMNN_DEPRECATED_MSG("Please use INetworkProperties constructor with MemorySource argument")
     INetworkProperties(bool importEnabled = false,
                        bool exportEnabled = false,
-                       bool asyncEnabled = false)
-        : m_ImportEnabled(importEnabled)
-        , m_ExportEnabled(exportEnabled)
-        , m_AsyncEnabled(asyncEnabled)
-        , m_InputSource(m_ImportEnabled ? MemorySource::Malloc : MemorySource::Undefined)
-        , m_OutputSource(m_ExportEnabled ? MemorySource::Malloc : MemorySource::Undefined)
+                       bool asyncEnabled = false,
+                       bool profilingEnabled = false)
+        : m_ImportEnabled(importEnabled),
+          m_ExportEnabled(exportEnabled),
+          m_AsyncEnabled(asyncEnabled),
+          m_ProfilingEnabled(profilingEnabled),
+          m_InputSource(m_ImportEnabled ? MemorySource::Malloc : MemorySource::Undefined),
+          m_OutputSource(m_ExportEnabled ? MemorySource::Malloc : MemorySource::Undefined)
     {}
 
     ARMNN_DEPRECATED_MSG("Please use INetworkProperties constructor without numThreads argument")
     INetworkProperties(bool asyncEnabled,
                        MemorySource m_InputSource,
                        MemorySource m_OutputSource,
-                       size_t numThreads)
-            : m_ImportEnabled(m_InputSource != MemorySource::Undefined)
-            , m_ExportEnabled(m_OutputSource != MemorySource::Undefined)
-            , m_AsyncEnabled(asyncEnabled)
-            , m_InputSource(m_InputSource)
-            , m_OutputSource(m_OutputSource)
+                       size_t numThreads,
+                       bool profilingEnabled = false)
+        : m_ImportEnabled(m_InputSource != MemorySource::Undefined),
+          m_ExportEnabled(m_OutputSource != MemorySource::Undefined),
+          m_AsyncEnabled(asyncEnabled),
+          m_ProfilingEnabled(profilingEnabled),
+          m_InputSource(m_InputSource),
+          m_OutputSource(m_OutputSource)
     {
         armnn::IgnoreUnused(numThreads);
     }
 
     INetworkProperties(bool asyncEnabled,
                        MemorySource m_InputSource,
-                       MemorySource m_OutputSource)
-        : m_ImportEnabled(m_InputSource != MemorySource::Undefined)
-        , m_ExportEnabled(m_OutputSource != MemorySource::Undefined)
-        , m_AsyncEnabled(asyncEnabled)
-        , m_InputSource(m_InputSource)
-        , m_OutputSource(m_OutputSource)
-        {}
+                       MemorySource m_OutputSource,
+                       bool profilingEnabled = false)
+        : m_ImportEnabled(m_InputSource != MemorySource::Undefined),
+          m_ExportEnabled(m_OutputSource != MemorySource::Undefined),
+          m_AsyncEnabled(asyncEnabled),
+          m_ProfilingEnabled(profilingEnabled),
+          m_InputSource(m_InputSource),
+          m_OutputSource(m_OutputSource)
+    {}
 
     /// Deprecated and will be removed in future release.
     const bool m_ImportEnabled;
     /// Deprecated and will be removed in future release.
     const bool m_ExportEnabled;
 
-    const bool   m_AsyncEnabled;
+    const bool m_AsyncEnabled;
+
+    const bool m_ProfilingEnabled;
 
     const MemorySource m_InputSource;
     const MemorySource m_OutputSource;
diff --git a/include/armnn/backends/WorkloadInfo.hpp b/include/armnn/backends/WorkloadInfo.hpp
index edf3581..1d6967e 100644
--- a/include/armnn/backends/WorkloadInfo.hpp
+++ b/include/armnn/backends/WorkloadInfo.hpp
@@ -11,12 +11,15 @@
 namespace armnn
 {
 
-/// Contains information about inputs and outputs to a layer.
+/// Contains information about TensorInfos of a layer.
 /// This is needed at construction of workloads, but are not stored.
 struct WorkloadInfo
 {
     std::vector<TensorInfo> m_InputTensorInfos;
     std::vector<TensorInfo> m_OutputTensorInfos;
+    Optional<TensorInfo> m_WeightsTensorInfo = EmptyOptional();
+    Optional<TensorInfo> m_BiasTensorInfo = EmptyOptional();
+    Optional<std::string> m_ConvolutionMethod = EmptyOptional();
 };
 
 } //namespace armnn
diff --git a/src/armnn/JsonPrinter.cpp b/src/armnn/JsonPrinter.cpp
index 9dc648c..986edb9 100644
--- a/src/armnn/JsonPrinter.cpp
+++ b/src/armnn/JsonPrinter.cpp
@@ -21,15 +21,17 @@
         id++;
     }
 
-    PrintLabel(object.m_Label, id);
-    PrintType(object.m_Type);
+    if (object.GetType() != JsonObjectType::ExecObjectDesc)
+    {
+        PrintLabel(object.m_Label, id);
+        PrintType(object.m_Type);
+    }
 
     if (!object.m_Measurements.empty() || !object.m_Children.empty())
     {
         PrintSeparator();
         PrintNewLine();
     }
-
     if (object.GetType() == JsonObjectType::Measurement)
     {
         PrintMeasurementsList(object.m_Measurements);
@@ -37,6 +39,15 @@
         PrintNewLine();
         PrintUnit(object.m_Unit);
     }
+    else if (object.GetType() == JsonObjectType::ExecObjectDesc)
+    {
+        for (std::string stringLine : object.m_LayerDetailsList)
+        {
+           PrintTabs();
+           m_OutputStream << stringLine;
+           PrintNewLine();
+        }
+    }
     if (!object.m_Children.empty())
     {
         for (unsigned int childIndex = 0; childIndex < object.m_Children.size(); ++childIndex)
@@ -50,21 +61,11 @@
             }
         }
     }
-    PrintNewLine();
-    PrintFooter();
-}
-
-void JsonPrinter::PrintHeader()
-{
-    m_OutputStream << "{" << std::endl;
-    IncrementNumberOfTabs();
-}
-
-void JsonPrinter::PrintArmNNHeader()
-{
-    PrintTabs();
-    m_OutputStream << R"("ArmNN": {)" << std::endl;
-    IncrementNumberOfTabs();
+    if (object.GetType() != JsonObjectType::ExecObjectDesc)
+    {
+        PrintNewLine();
+        PrintFooter();
+    }
 }
 
 std::string JsonPrinter::MakeKey(const std::string& label, size_t id)
@@ -103,6 +104,10 @@
                 {
                     return "Event";
                 }
+                case JsonObjectType::ExecObjectDesc:
+                {
+                    return "Operator Description";
+                }
                 default:
                 {
                     return "Unknown";
@@ -141,44 +146,4 @@
     m_OutputStream << "]";
 }
 
-void JsonPrinter::PrintTabs()
-{
-    unsigned int numTabs = m_NumTabs;
-    while (numTabs-- > 0)
-    {
-        m_OutputStream << "\t";
-    }
-}
-
-void JsonPrinter::PrintSeparator()
-{
-    m_OutputStream << ",";
-}
-
-void JsonPrinter::PrintNewLine()
-{
-    m_OutputStream << std::endl;
-}
-
-void JsonPrinter::PrintFooter()
-{
-    DecrementNumberOfTabs();
-    PrintTabs();
-    m_OutputStream << "}";
-}
-
-void JsonPrinter::DecrementNumberOfTabs()
-{
-    if (m_NumTabs == 0)
-    {
-        return;
-    }
-    --m_NumTabs;
-}
-
-void JsonPrinter::IncrementNumberOfTabs()
-{
-    ++m_NumTabs;
-}
-
 } // namespace armnn
\ No newline at end of file
diff --git a/src/armnn/JsonPrinter.hpp b/src/armnn/JsonPrinter.hpp
index 04f56b0..4af1609 100644
--- a/src/armnn/JsonPrinter.hpp
+++ b/src/armnn/JsonPrinter.hpp
@@ -5,12 +5,13 @@
 
 #pragma once
 
-#include <ostream>
-#include <string.h>
+#include <string>
 #include <map>
 #include <set>
+#include <sstream>
 
 #include "Instrument.hpp"
+#include "JsonUtils.hpp"
 
 namespace armnn
 {
@@ -18,13 +19,15 @@
 enum class JsonObjectType
 {
     Measurement,
-    Event
+    Event,
+    ExecObjectDesc
 };
 
 struct JsonChildObject
 {
+    // Object type changes according to the JsonObjectType specified in enum
     JsonChildObject(const std::string& label)
-            : m_Label(label), m_Unit(Measurement::Unit::TIME_MS), m_Type(JsonObjectType::Event)
+        : m_Label(label), m_Unit(Measurement::Unit::TIME_MS), m_Type(JsonObjectType::Event)
     {}
     JsonChildObject(const JsonChildObject&) = default;
 
@@ -33,6 +36,16 @@
         m_Measurements.push_back(measurement);
     }
 
+    void SetAndParseDetails(std::string layerDetailsStr)
+    {
+        std::stringstream layerDetails(layerDetailsStr);
+        std::string stringLine;
+        while (std::getline(layerDetails, stringLine, '\n'))
+        {
+            m_LayerDetailsList.push_back(stringLine);
+        }
+    }
+
     void AddChild(const JsonChildObject& childObject)
     {
         m_Children.push_back(childObject);
@@ -69,39 +82,31 @@
     Measurement::Unit m_Unit;
     JsonObjectType m_Type;
     std::vector<double> m_Measurements;
+    std::vector<std::string> m_LayerDetailsList;
     std::vector<JsonChildObject> m_Children;
 
 private:
     JsonChildObject() = delete;
 };
 
-class JsonPrinter
+class JsonPrinter : public JsonUtils
 {
 public:
     void PrintJsonChildObject(const JsonChildObject& object, size_t& id);
-    void PrintHeader();
-    void PrintArmNNHeader();
-    void PrintFooter();
-    void PrintSeparator();
-    void PrintNewLine();
     void PrintLabel(const std::string& label, size_t id);
     void PrintUnit(armnn::Measurement::Unit unit);
     void PrintType(armnn::JsonObjectType type);
     void PrintMeasurementsList(const std::vector<double>& measurementsVector);
 
 public:
-    JsonPrinter(std::ostream &outputStream)
-        : m_OutputStream(outputStream), m_NumTabs(0)
+    JsonPrinter(std::ostream& outputStream)
+        : JsonUtils(outputStream), m_OutputStream(outputStream)
     {}
 
 private:
     std::string MakeKey(const std::string& label, size_t id);
-    void PrintTabs();
-    void DecrementNumberOfTabs();
-    void IncrementNumberOfTabs();
 
-    std::ostream &m_OutputStream;
-    unsigned int m_NumTabs;
+    std::ostream& m_OutputStream;
 };
 
 } // namespace armnn
\ No newline at end of file
diff --git a/src/armnn/JsonUtils.hpp b/src/armnn/JsonUtils.hpp
new file mode 100644
index 0000000..44fa7ed
--- /dev/null
+++ b/src/armnn/JsonUtils.hpp
@@ -0,0 +1,80 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <iomanip>
+
+#include "armnn/Types.hpp"
+#include "armnn/backends/WorkloadInfo.hpp"
+
+namespace armnn
+{
+
+class JsonUtils
+{
+public:
+    JsonUtils(std::ostream& outputStream)
+        : m_NumTabs(0), m_OutputStream(outputStream)
+    {}
+
+    void PrintTabs()
+    {
+        unsigned int numTabs = m_NumTabs;
+        while ( numTabs-- > 0 )
+        {
+            m_OutputStream << "\t";
+        }
+    }
+
+    void DecrementNumberOfTabs()
+    {
+        if ( m_NumTabs == 0 )
+        {
+            return;
+        }
+        --m_NumTabs;
+    }
+
+    void IncrementNumberOfTabs()
+    {
+        ++m_NumTabs;
+    }
+
+    void PrintNewLine()
+    {
+        m_OutputStream << std::endl;
+    }
+
+    void PrintFooter()
+    {
+        DecrementNumberOfTabs();
+        PrintTabs();
+        m_OutputStream << "}";
+    }
+
+    void PrintHeader()
+    {
+        m_OutputStream << "{" << std::endl;
+        IncrementNumberOfTabs();
+    }
+
+    void PrintArmNNHeader()
+    {
+        PrintTabs();
+        m_OutputStream << R"("ArmNN": {)" << std::endl;
+        IncrementNumberOfTabs();
+    }
+    void PrintSeparator()
+    {
+        m_OutputStream << ",";
+    }
+
+private:
+    unsigned int m_NumTabs;
+    std::ostream& m_OutputStream;
+};
+
+} // namespace armnn
\ No newline at end of file
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp
index 13beb13..c8dbcaa 100644
--- a/src/armnn/LoadedNetwork.cpp
+++ b/src/armnn/LoadedNetwork.cpp
@@ -125,6 +125,8 @@
     m_Profiler = std::make_shared<IProfiler>();
     ProfilerManager::GetInstance().RegisterProfiler(m_Profiler.get());
 
+    m_Profiler->EnableProfiling(networkProperties.m_ProfilingEnabled);
+
     Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
     //First create tensor handlers, backends and workload factories.
     //Handlers are created before workloads are.
diff --git a/src/armnn/Profiling.cpp b/src/armnn/Profiling.cpp
index d62c18a..171d22b 100644
--- a/src/armnn/Profiling.cpp
+++ b/src/armnn/Profiling.cpp
@@ -281,6 +281,13 @@
     }
 }
 
+void ConfigureDetailsObject(JsonChildObject& detailsObject,
+                            std::string layerDetailsStr)
+{
+    detailsObject.SetType(JsonObjectType::ExecObjectDesc);
+    detailsObject.SetAndParseDetails(layerDetailsStr);
+
+}
 
 void ExtractJsonObjects(unsigned int inferenceIndex,
                         const Event* parentEvent,
@@ -347,7 +354,6 @@
     PopulateDescendants(descendantsMap);
 
     JsonChildObject inferenceObject{"inference_measurements"};
-    JsonChildObject layerObject{"layer_measurements"};
     std::vector<JsonChildObject> workloadObjects;
     std::map<unsigned int, std::vector<JsonChildObject>> workloadToKernelObjects;
 
@@ -360,6 +366,15 @@
     printer.PrintHeader();
     printer.PrintArmNNHeader();
 
+    if (m_ProfilingDetails.get()->DetailsExist())
+    {
+        JsonChildObject detailsObject{"layer_details"};
+        ConfigureDetailsObject(detailsObject, m_ProfilingDetails.get()->GetProfilingDetails());
+
+        size_t id=0;
+        printer.PrintJsonChildObject(detailsObject, id);
+    }
+
     // print inference object, also prints child layer and kernel measurements
     size_t id=0;
     printer.PrintJsonChildObject(inferenceObject, id);
@@ -525,10 +540,10 @@
 }
 
 Event* IProfiler::BeginEvent(const BackendId& backendId,
-                  const std::string& label,
-                  std::vector<InstrumentPtr>&& instruments)
+                             const std::string& label,
+                             std::vector<InstrumentPtr>&& instruments)
 {
-    return pProfilerImpl->BeginEvent(this, backendId, label,  std::move(instruments));
+    return pProfilerImpl->BeginEvent(this, backendId, label, std::move(instruments));
 }
 
 IProfiler::~IProfiler() = default;
diff --git a/src/armnn/Profiling.hpp b/src/armnn/Profiling.hpp
index d134425..785f505 100644
--- a/src/armnn/Profiling.hpp
+++ b/src/armnn/Profiling.hpp
@@ -5,6 +5,7 @@
 #pragma once
 
 #include "ProfilingEvent.hpp"
+#include "ProfilingDetails.hpp"
 
 #include <armnn/utility/IgnoreUnused.hpp>
 #include "armnn/IProfiler.hpp"
@@ -38,6 +39,14 @@
                       const std::string& name,
                       std::vector<InstrumentPtr>&& instruments);
 
+    template<typename DescriptorType>
+    void AddLayerDetails(const std::string& label,
+                         const DescriptorType& desc,
+                         const WorkloadInfo& infos)
+    {
+        m_ProfilingDetails->AddDetailsToString(label, desc, infos);
+    }
+
     // Marks the end of a user-defined event.
     void EndEvent(Event* event);
 
@@ -61,6 +70,8 @@
     uint32_t GetEventColor(const BackendId& backendId) const;
 
     using EventPtr = std::unique_ptr<Event>;
+    using DescPtr = std::unique_ptr<ProfilingDetails>;
+
     struct Marker
     {
         std::size_t m_Id;
@@ -83,6 +94,7 @@
 
     std::stack<Event*> m_Parents;
     std::vector<EventPtr> m_EventSequence;
+    DescPtr m_ProfilingDetails = std::make_unique<ProfilingDetails>();
     bool m_ProfilingEnabled;
 };
 
@@ -152,8 +164,39 @@
     IProfiler* m_Profiler; ///< Profiler used
 };
 
+// Helper to easily add operator details during profiling.
+class ScopedProfilingUpdateDescriptions
+{
+public:
+    template<typename DescriptorType>
+    ScopedProfilingUpdateDescriptions(const std::string& name, const DescriptorType& desc, const WorkloadInfo& infos)
+        : m_Profiler(ProfilerManager::GetInstance().GetProfiler())
+    {
+        if (m_Profiler && m_Profiler->IsProfilingEnabled())
+        {
+            m_Profiler->AddLayerDetails(name, desc, infos);
+        }
+    }
+
+    ~ScopedProfilingUpdateDescriptions()
+    {}
+
+private:
+
+    IProfiler* m_Profiler; ///< Profiler used
+};
+
+template<typename DescriptorType>
+void IProfiler::AddLayerDetails(const std::string& name,
+                                const DescriptorType& desc,
+                                const WorkloadInfo& infos)
+{
+    return pProfilerImpl->AddLayerDetails(name, desc, infos);
+}
+
 } // namespace armnn
 
+// Event Definitions for profiling
 #define ARMNN_SCOPED_PROFILING_EVENT_WITH_INSTRUMENTS_UNIQUE_LOC_INNER(lineNumber, backendId, /*name,*/ ...) \
     armnn::ScopedProfilingEvent e_ ## lineNumber(backendId, /*name,*/ __VA_ARGS__);
 
@@ -172,3 +215,13 @@
 
 #define ARMNN_SCOPED_PROFILING_EVENT(backendId, name) \
     ARMNN_SCOPED_PROFILING_EVENT_WITH_INSTRUMENTS(backendId, name, armnn::WallClockTimer())
+
+// Workload Description definitons for profiling
+#define ARMNN_REPORT_PROFILING_WORKLOAD_DESC_UNIQUE_LOC_INNER(lineNumber, name, desc, infos) \
+    armnn::ScopedProfilingUpdateDescriptions e_ ## lineNumber(name, desc, infos);
+
+#define ARMNN_REPORT_PROFILING_WORKLOAD_DESC_UNIQUE_LOC(lineNumber, name, desc, infos) \
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC_UNIQUE_LOC_INNER(lineNumber, name, desc, infos)
+
+#define ARMNN_REPORT_PROFILING_WORKLOAD_DESC(name, desc, infos) \
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC_UNIQUE_LOC(__LINE__, name, desc, infos)
diff --git a/src/armnn/ProfilingDetails.hpp b/src/armnn/ProfilingDetails.hpp
new file mode 100644
index 0000000..7224aad
--- /dev/null
+++ b/src/armnn/ProfilingDetails.hpp
@@ -0,0 +1,153 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <iomanip>
+
+#include "armnn/Types.hpp"
+#include "armnn/TypesUtils.hpp"
+#include "armnn/backends/WorkloadInfo.hpp"
+
+#include "SerializeLayerParameters.hpp"
+#include "JsonUtils.hpp"
+
+namespace armnn
+{
+
+/// ProfilingDetails class records any details associated with the operator and passes on for outputting to the user
+class ProfilingDetails : public JsonUtils
+{
+public:
+    /// Constructor
+    ProfilingDetails() : JsonUtils(m_ProfilingDetails), m_DetailsExist(false)
+    {}
+
+    /// Destructor
+    ~ProfilingDetails() noexcept
+    {}
+
+    /// Add to the ProfilingDetails
+    template<typename DescriptorType>
+    void AddDetailsToString(const std::string& workloadName,
+                            const DescriptorType& desc,
+                            const WorkloadInfo& infos)
+    {
+        m_ProfilingDetails << std::quoted("Name") << ": " << std::quoted(workloadName) << " ";
+        PrintHeader();
+
+        // Print tensor infos and related data types
+        PrintInfos(infos.m_InputTensorInfos, "Input");
+
+        PrintInfos(infos.m_OutputTensorInfos, "Output");
+
+        if ( infos.m_BiasTensorInfo.has_value())
+        {
+            PrintInfo(infos.m_BiasTensorInfo.value(), "Bias");
+        }
+        if ( infos.m_BiasTensorInfo.has_value())
+        {
+            PrintInfo(infos.m_WeightsTensorInfo.value(), "Weights");
+        }
+        if ( infos.m_ConvolutionMethod.has_value())
+        {
+            PrintTabs();
+
+            m_ProfilingDetails << std::quoted("Convolution Method") << ": "
+                               << std::quoted(infos.m_ConvolutionMethod.value());
+
+            PrintSeparator();
+            PrintNewLine();
+        }
+
+        ParameterStringifyFunction extractParams = [this](const std::string& name, const std::string& value) {
+            PrintTabs();
+            m_ProfilingDetails << std::quoted(name) << " : " << std::quoted(value);
+            if (name != "DataLayout") PrintSeparator();
+            PrintNewLine();
+        };
+
+        StringifyLayerParameters<DescriptorType>::Serialize(extractParams, desc);
+
+        PrintFooter();
+        PrintSeparator();
+        PrintNewLine();
+
+        m_DetailsExist = true;
+    }
+
+    /// Get the ProfilingDetails
+    /// \return the ProfilingDetails
+    std::string GetProfilingDetails() const
+    {
+        return m_ProfilingDetails.str();
+    }
+
+    bool DetailsExist()
+    {
+        return m_DetailsExist;
+    }
+
+private:
+    // Print tensor infos and related data types
+    void PrintInfo(const TensorInfo& info, const std::string& ioString)
+    {
+        const std::vector<TensorInfo> infoVect{ info };
+        PrintInfos(infoVect, ioString);
+    }
+
+    void PrintInfos(const std::vector<TensorInfo>& infos, const std::string& ioString)
+    {
+        for ( size_t i = 0; i < infos.size(); i++ )
+        {
+            auto shape = infos[i].GetShape();
+            PrintTabs();
+
+            m_ProfilingDetails << std::quoted(ioString + " " + std::to_string(i)) << ": ";
+
+            PrintHeader();
+            PrintTabs();
+
+            // Shape
+            m_ProfilingDetails << std::quoted("Shape") << ": \"[";
+            for ( unsigned int dim = 0; dim < shape.GetNumDimensions(); dim++ )
+            {
+                shape.GetNumDimensions() == dim + 1 ?
+                m_ProfilingDetails << shape[dim] << "]\"" : // true
+                m_ProfilingDetails << shape[dim] << ",";    // false
+            }
+
+            PrintSeparator();
+            PrintNewLine();
+
+            // Data Type
+            PrintTabs();
+            m_ProfilingDetails << std::quoted("DataType") << ": "
+                               << std::quoted(GetDataTypeName(infos[i].GetDataType()));
+
+            PrintSeparator();
+            PrintNewLine();
+
+            // Number of Dimensions
+            PrintTabs();
+            m_ProfilingDetails << std::quoted("Num Dims") << ": "
+                               << std::quoted(std::to_string(shape.GetNumDimensions()));
+
+
+            // Close out the scope
+            PrintNewLine();
+            PrintFooter();
+            PrintSeparator();
+            PrintNewLine();
+        }
+    }
+
+    /// Stores ProfilingDetails
+    std::ostringstream m_ProfilingDetails;
+    bool m_DetailsExist;
+
+};
+
+} // namespace armnn
diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
index 5c731aa..b3df7ce 100644
--- a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
+++ b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
@@ -120,6 +120,23 @@
                                                   aclDilationInfo,
                                                   isFastMathEnabled);
 
+     // Add details for profiling output
+    std::string workloadName = "ClConvolution2dWorkload_Execute_Guid" + std::to_string(this->GetGuid());
+
+    WorkloadInfo detailsInfo;
+
+    detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos;
+    detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos;
+    detailsInfo.m_WeightsTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Weight->GetTensorInfo());
+    detailsInfo.m_ConvolutionMethod = armnn::Optional<std::string>(GetConvolutionMethodString());
+    if (descriptor.m_Parameters.m_BiasEnabled)
+    {
+        detailsInfo.m_BiasTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Bias->GetTensorInfo());
+    }
+
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC(workloadName, descriptor.m_Parameters, detailsInfo);
+
     InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight);
 
     if (m_BiasTensor)
@@ -144,6 +161,23 @@
     return m_ConvolutionMethod;
 }
 
+std::string ClConvolution2dWorkload::GetConvolutionMethodString()
+{
+    switch ( m_ConvolutionMethod )
+    {
+        case arm_compute::ConvolutionMethod::FFT:
+            return "FFT";
+        case arm_compute::ConvolutionMethod::DIRECT:
+            return "Direct";
+        case arm_compute::ConvolutionMethod::GEMM:
+            return "GEMM";
+        case arm_compute::ConvolutionMethod::WINOGRAD:
+            return "Winograd";
+        default:
+            return "Unknown";
+    }
+}
+
 void ClConvolution2dWorkload::FreeUnusedTensors()
 {
     FreeTensorIfUnused(m_KernelTensor);
diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp
index d0f7a5b..49d7f77 100644
--- a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp
+++ b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp
@@ -37,6 +37,7 @@
     void Execute() const override;
 
     arm_compute::ConvolutionMethod GetConvolutionMethod() const;
+    std::string GetConvolutionMethodString();
 
 private:
     mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer;
diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
index 32af3f8..1e12e13 100644
--- a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
+++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
@@ -74,8 +74,6 @@
 
     m_Data.ValidateInputsOutputs("NeonConvolution2dWorkload", 1, 1);
 
-    // todo: check tensor shapes match.
-
     arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
     arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
@@ -120,6 +118,23 @@
                                                  activationInfo,
                                                  isFastMathEnabled);
 
+    // Add details for profiling output
+    std::string workloadName = "NeonConvolution2dWorkload_Execute_Guid" + std::to_string(this->GetGuid());
+
+    WorkloadInfo detailsInfo;
+
+    detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos;
+    detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos;
+    detailsInfo.m_WeightsTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Weight->GetTensorInfo());
+    detailsInfo.m_ConvolutionMethod = armnn::Optional<std::string>(GetConvolutionMethodString());
+    if (descriptor.m_Parameters.m_BiasEnabled)
+    {
+        detailsInfo.m_BiasTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Bias->GetTensorInfo());
+    }
+
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC(workloadName, descriptor.m_Parameters, detailsInfo);
+
     m_ConvolutionLayer.reset(convolutionLayer.release());
 
     ARMNN_ASSERT(m_ConvolutionLayer);
@@ -146,6 +161,23 @@
     return m_ConvolutionMethod;
 }
 
+std::string NeonConvolution2dWorkload::GetConvolutionMethodString()
+{
+    switch ( m_ConvolutionMethod )
+    {
+        case arm_compute::ConvolutionMethod::FFT:
+            return "FFT";
+        case arm_compute::ConvolutionMethod::DIRECT:
+            return "Direct";
+        case arm_compute::ConvolutionMethod::GEMM:
+            return "GEMM";
+        case arm_compute::ConvolutionMethod::WINOGRAD:
+            return "Winograd";
+        default:
+            return "Unknown";
+    }
+}
+
 void NeonConvolution2dWorkload::FreeUnusedTensors()
 {
     FreeTensorIfUnused(m_KernelTensor);
diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp
index 4b6e58c..4b4c07a 100644
--- a/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp
+++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp
@@ -37,6 +37,7 @@
     void Execute() const override;
 
     arm_compute::ConvolutionMethod GetConvolutionMethod() const;
+    std::string GetConvolutionMethodString();
 
 private:
     std::unique_ptr<arm_compute::IFunction> m_ConvolutionLayer;
diff --git a/src/backends/reference/workloads/RefConvolution2dWorkload.cpp b/src/backends/reference/workloads/RefConvolution2dWorkload.cpp
index 5ae1af8..7c33171 100644
--- a/src/backends/reference/workloads/RefConvolution2dWorkload.cpp
+++ b/src/backends/reference/workloads/RefConvolution2dWorkload.cpp
@@ -13,18 +13,33 @@
 namespace armnn
 {
 RefConvolution2dWorkload::RefConvolution2dWorkload(
-        const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info)
-        : BaseWorkload<Convolution2dQueueDescriptor>(descriptor, info)
+    const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info)
+    : BaseWorkload<Convolution2dQueueDescriptor>(descriptor, info)
 {
-    m_Weight = std::make_unique<ScopedTensorHandle>(*(descriptor.m_Weight));
+    // Construct params for reporting operator details
+    std::string workloadName = "RefConvolution2dWorkload_Execute_Guid" + std::to_string(this->GetGuid());
+
+    WorkloadInfo detailsInfo;
+    detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos;
+    detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos;
+    detailsInfo.m_WeightsTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Weight->GetTensorInfo());
+    if (descriptor.m_Parameters.m_BiasEnabled)
+    {
+        detailsInfo.m_BiasTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Bias->GetTensorInfo());
+    }
+
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC(workloadName, descriptor.m_Parameters, detailsInfo);
+
+    m_Weight = std::make_unique<ScopedTensorHandle>(*( descriptor.m_Weight ));
     const TensorInfo& rFilterInfo = m_Weight->GetTensorInfo();
 
     m_FilterShape = rFilterInfo.GetShape();
     m_FilterDecoder = MakeDecoder<float>(rFilterInfo, m_Weight.get()->Map(true));
 
-    if (descriptor.m_Parameters.m_BiasEnabled)
+    if ( descriptor.m_Parameters.m_BiasEnabled )
     {
-        m_Bias = std::make_unique<ScopedTensorHandle>(*(descriptor.m_Bias));
+        m_Bias = std::make_unique<ScopedTensorHandle>(*( descriptor.m_Bias ));
         const TensorInfo& biasInfo = m_Bias->GetTensorInfo();
         m_BiasDecoder = MakeDecoder<float>(biasInfo, m_Bias->Map(true));
     }
@@ -35,13 +50,15 @@
     Execute(m_Data.m_Inputs, m_Data.m_Outputs);
 }
 
-void RefConvolution2dWorkload::ExecuteAsync(WorkingMemDescriptor &workingMemDescriptor)
+void RefConvolution2dWorkload::ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor)
 {
     Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
 }
 
-void RefConvolution2dWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const {
-    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvolution2dWorkload_Execute");
+void RefConvolution2dWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
+    std::string workloadName = "RefConvolutionWorkload_Execute_Guid" + std::to_string(this->GetGuid());
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, workloadName);
 
     std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(GetTensorInfo(inputs[0]), inputs[0]->Map());
     std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(GetTensorInfo(outputs[0]), outputs[0]->Map());
diff --git a/tests/InferenceModel.hpp b/tests/InferenceModel.hpp
index 9eb3eab..3107593 100644
--- a/tests/InferenceModel.hpp
+++ b/tests/InferenceModel.hpp
@@ -485,7 +485,8 @@
             const auto loading_start_time = armnn::GetTimeNow();
             armnn::INetworkProperties networkProperties(params.m_AsyncEnabled,
                                                         armnn::MemorySource::Undefined,
-                                                        armnn::MemorySource::Undefined);
+                                                        armnn::MemorySource::Undefined,
+                                                        enableProfiling);
             std::string errorMessage;
             ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet), errorMessage, networkProperties);
 
@@ -563,10 +564,6 @@
         }
 
         std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
-        if (profiler)
-        {
-            profiler->EnableProfiling(m_EnableProfiling);
-        }
 
         // Start timer to record inference time in EnqueueWorkload (in milliseconds)
         const auto start_time = armnn::GetTimeNow();
@@ -617,10 +614,6 @@
         }
 
         std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
-        if (profiler)
-        {
-            profiler->EnableProfiling(m_EnableProfiling);
-        }
 
         // Start timer to record inference time in EnqueueWorkload (in milliseconds)
         const auto start_time = armnn::GetTimeNow();
@@ -672,10 +665,6 @@
         }
 
         std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
-        if (profiler)
-        {
-            profiler->EnableProfiling(m_EnableProfiling);
-        }
 
         m_Threadpool->Schedule(m_NetworkIdentifier,
                                MakeInputTensors(inputContainers),