IVGCVSW-6801 Add some timing information to ExecuteGraph.

* Modify all versions of ExecuteGraph to include timing information. This
  can give a rough guide as to how much overhead the driver is adding.

Signed-off-by: Colm Donelan <colm.donelan@arm.com>
Change-Id: Ie168e060c22b8417efa2ede0c9bdc7ef2631b913
diff --git a/ArmnnPreparedModel.cpp b/ArmnnPreparedModel.cpp
index 326351c..4174043 100644
--- a/ArmnnPreparedModel.cpp
+++ b/ArmnnPreparedModel.cpp
@@ -14,6 +14,7 @@
 #include <OperationsUtils.h>
 #include <ValidateHal.h>
 
+#include <chrono>
 #include <cinttypes>
 
 #ifdef ARMNN_ANDROID_S
@@ -299,6 +300,8 @@
         CallbackContext_1_0 cb)
 {
     ALOGV("ArmnnPreparedModel::ExecuteGraph(...)");
+    // Capture the graph execution start time.
+    std::chrono::time_point<std::chrono::system_clock> graphExecutionStart = std::chrono::system_clock::now();
 
     DumpTensorsIfRequired("Input", inputTensors);
 
@@ -328,7 +331,6 @@
             status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors,
                                                 importedInputIds, importedOutputIds);
         }
-
         if (status != armnn::Status::Success)
         {
             ALOGW("EnqueueWorkload failed");
@@ -365,6 +367,12 @@
         #endif
     }
 
+    // Log the total time in this call. This is a good number to compare to that printed out by
+    // RuntimeImpl::EnqueueWorkload. The difference should be the execution overhead of the driver.
+    ALOGI("ArmnnPreparedModel::ExecuteGraph Execution time = %lld µs",
+           std::chrono::duration_cast<std::chrono::microseconds>
+          (std::chrono::system_clock::now() - graphExecutionStart).count());
+
     cb.callback(V1_0::ErrorStatus::NONE, "ExecuteGraph");
 }
 
diff --git a/ArmnnPreparedModel_1_2.cpp b/ArmnnPreparedModel_1_2.cpp
index 37bc3a4..c54ee35 100644
--- a/ArmnnPreparedModel_1_2.cpp
+++ b/ArmnnPreparedModel_1_2.cpp
@@ -16,6 +16,7 @@
 #include <ExecutionBurstServer.h>
 #include <ValidateHal.h>
 
+#include <chrono>
 #include <cinttypes>
 
 #ifdef ARMNN_ANDROID_S
@@ -511,6 +512,8 @@
     ALOGV("ArmnnPreparedModel_1_2::ExecuteGraph(...)");
 
     TimePoint driverEnd, deviceStart, deviceEnd;
+    // Capture the graph execution start time.
+    std::chrono::time_point<std::chrono::system_clock> graphExecutionStart = std::chrono::system_clock::now();
 
     DumpTensorsIfRequired("Input", inputTensors);
 
@@ -599,6 +602,11 @@
         cb.callback(V1_0::ErrorStatus::NONE, outputShapes, g_NoTiming, "ArmnnPreparedModel_1_2::ExecuteGraph");
     }
 
+    // Log the total time in this call. This is a good number to compare to that printed out by
+    // RuntimeImpl::EnqueueWorkload. The difference should be the execution overhead of the driver.
+    ALOGI("ArmnnPreparedModel_1_2::ExecuteGraph Execution time = %lld µs",
+          std::chrono::duration_cast<std::chrono::microseconds>
+          (std::chrono::system_clock::now() - graphExecutionStart).count());
     return true;
 }
 
diff --git a/ArmnnPreparedModel_1_3.cpp b/ArmnnPreparedModel_1_3.cpp
index 34c42ec..20b49f5 100644
--- a/ArmnnPreparedModel_1_3.cpp
+++ b/ArmnnPreparedModel_1_3.cpp
@@ -21,6 +21,7 @@
 #include <ExecutionBurstServer.h>
 #include <ValidateHal.h>
 
+#include <chrono>
 #include <cinttypes>
 
 #ifdef ARMNN_ANDROID_S
@@ -805,6 +806,8 @@
     CallbackContext cb)
 {
     ALOGV("ArmnnPreparedModel_1_3::ExecuteGraph(...)");
+    // Capture the graph execution start time.
+    std::chrono::time_point<std::chrono::system_clock> graphExecutionStart = std::chrono::system_clock::now();
 
     DumpTensorsIfRequired("Input", inputTensors);
 
@@ -890,6 +893,11 @@
     {
         cb.callback(V1_3::ErrorStatus::NONE, outputShapes, g_NoTiming, "ArmnnPreparedModel_1_3::ExecuteGraph");
     }
+    // Log the total time in this call. This is a good number to compare to that printed out by
+    // RuntimeImpl::EnqueueWorkload. The difference should be the execution overhead of the driver.
+    ALOGI("ArmnnPreparedModel_1_3::ExecuteGraph Execution time = %lld µs",
+          std::chrono::duration_cast<std::chrono::microseconds>
+          (std::chrono::system_clock::now() - graphExecutionStart).count());
     return V1_3::ErrorStatus::NONE;
 }