IVGCVSW-5879 Fix problems with using internal profiling from delegate.

* Pass through the value of m_EnableProfiling from Executenetwork to
  DelegateOptions.
* If internal profiling is enabled print it out from inside the delegate.
* Remove an unnecessary ProfilerImpl instance from WorkingMemhandle.hpp
* Remove an unnecessary parameter from TfLiteDelegateMainImpl in
  ExecuteNetwork.

Signed-off-by: Colm Donelan <colm.donelan@arm.com>
Change-Id: Ia1d4b1eb3a05ca5b4d80cc39e138c7fac182d948
diff --git a/delegate/src/armnn_delegate.cpp b/delegate/src/armnn_delegate.cpp
index 5fbc920..e029e2c 100644
--- a/delegate/src/armnn_delegate.cpp
+++ b/delegate/src/armnn_delegate.cpp
@@ -397,7 +397,7 @@
                                                          networkProperties);
         if (loadingStatus != armnn::Status::Success)
         {
-            // Optimize failed
+            // Network load failed.
             throw armnn::Exception("TfLiteArmnnDelegate: Network could not be loaded:" + errorMessage);
         }
     }
@@ -457,6 +457,12 @@
 
     // Run graph
     auto status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
+    // The delegate holds its own Arm NN runtime so this is our last chance to print internal profiling data.
+    std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
+    if (profiler && profiler->IsProfilingEnabled())
+    {
+        profiler->Print(std::cout);
+    }
     return (status == armnn::Status::Success) ? kTfLiteOk : kTfLiteError;
 }
 
diff --git a/src/armnn/WorkingMemHandle.hpp b/src/armnn/WorkingMemHandle.hpp
index aaa9d59..9078a8d 100644
--- a/src/armnn/WorkingMemHandle.hpp
+++ b/src/armnn/WorkingMemHandle.hpp
@@ -119,7 +119,6 @@
 private:
     using DifferenceType = std::vector<ITensorHandle*>::difference_type;
     NetworkId m_NetworkId;
-    std::shared_ptr<ProfilerImpl> m_Profiler;
 
     std::unordered_map<LayerBindingId, ITensorHandle*> m_InputHandleMap;
     std::unordered_map<LayerBindingId, ITensorHandle*> m_OutputHandleMap;
diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp
index 66be8fd..db15872 100644
--- a/tests/ExecuteNetwork/ExecuteNetwork.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp
@@ -68,8 +68,7 @@
 }
 
 #if defined(ARMNN_TFLITE_DELEGATE)
-int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params, const armnn::IRuntime::CreationOptions runtimeOptions,
-                           const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
+int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params, const armnn::IRuntime::CreationOptions runtimeOptions)
 {
     using namespace tflite;
 
@@ -867,7 +866,7 @@
                     ExecuteNetworkParams::TfLiteExecutor::TfliteInterpreter)
         {
         #if defined(ARMNN_TF_LITE_DELEGATE)
-            return TfLiteDelegateMainImpl(ProgramOptions.m_ExNetParams, ProgramOptions.m_RuntimeOptions, runtime);
+            return TfLiteDelegateMainImpl(ProgramOptions.m_ExNetParams, ProgramOptions.m_RuntimeOptions);
         #else
             ARMNN_LOG(fatal) << "Not built with Arm NN Tensorflow-Lite delegate support.";
             return EXIT_FAILURE;
diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.cpp b/tests/ExecuteNetwork/ExecuteNetworkParams.cpp
index 541430c..b3d18cd 100644
--- a/tests/ExecuteNetwork/ExecuteNetworkParams.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetworkParams.cpp
@@ -250,7 +250,8 @@
     options.m_ReduceFp32ToFp16 = m_EnableFp16TurboMode;
     options.m_ReduceFp32ToBf16 = m_EnableBf16TurboMode;
     options.m_Debug = m_PrintIntermediate;
-
+    options.m_ProfilingEnabled = m_EnableProfiling;
+    delegateOptions.SetInternalProfilingParams(m_EnableProfiling, armnn::ProfilingDetailsMethod::DetailsWithEvents);
     options.m_shapeInferenceMethod = armnn::ShapeInferenceMethod::ValidateOnly;
     if (m_InferOutputShape)
     {