IVGCVSW-6747 Call Cl sync after EnqueueWorkload

 * Add AfterEnqueueWorkload to IBackendContext
 * Implement AfterEnqueueWorkload in ClBackendContext to call Cl sync
 * Set allocated data on outputhandler only once
 * Handle PreImportedHandles and CurImportedId the same way as Async

Signed-off-by: Narumol Prangnawarat <narumol.prangnawarat@arm.com>
Change-Id: I9f59d57e298d4a494569faec3078d66af799f77b
diff --git a/include/armnn/backends/IBackendContext.hpp b/include/armnn/backends/IBackendContext.hpp
index ae85b63..6fca42d 100644
--- a/include/armnn/backends/IBackendContext.hpp
+++ b/include/armnn/backends/IBackendContext.hpp
@@ -25,6 +25,9 @@
     virtual bool BeforeUnloadNetwork(NetworkId networkId) = 0;
     virtual bool AfterUnloadNetwork(NetworkId networkId) = 0;
 
+    // After Enqueue workload events
+    virtual bool AfterEnqueueWorkload(NetworkId networkId) = 0;
+
     virtual ~IBackendContext() {}
 };
 
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp
index 45891f7..48a3040 100644
--- a/src/armnn/LoadedNetwork.cpp
+++ b/src/armnn/LoadedNetwork.cpp
@@ -699,7 +699,7 @@
 
         for (const BindableLayer* inputLayer : graph.GetInputLayers())
         {
-            if (preImportedInputIds.size() != m_PreImportedInputHandles.size())
+            if (preImportedInputIds.size() > graph.GetNumInputs())
             {
                 throw InvalidArgumentException("Invalid number of preImportedInputIds");
             }
@@ -727,7 +727,7 @@
 
         for (const BindableLayer* outputLayer : graph.GetOutputLayers())
         {
-            if (preImportedOutputIds.size() != m_PreImportedOutputHandles.size())
+            if (preImportedOutputIds.size() > graph.GetNumOutputs())
             {
                 throw InvalidArgumentException("Invalid number of preImportedOutputIds");
             }
@@ -770,11 +770,6 @@
             }
         }
     }
-    // Clear m_PreImportedInputHandles and m_PreImportedOutputHandles
-    m_PreImportedInputHandles.clear();
-    m_PreImportedOutputHandles.clear();
-    m_CurImportedInputId = 0;
-    m_CurImportedOutputId = 0;
 
     std::unique_ptr<TimelineUtilityMethods> timelineUtils =
                         TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
@@ -1271,6 +1266,16 @@
             {
                 // Cannot import, use allocated data
                 handler.UseAllocatedData();
+                // Ensure that the workload get correct tensor
+                try
+                {
+                    m_WorkloadQueue[m_InputWorkloadSlotPairs[layerBindingId].first].get()->ReplaceInputTensorHandle(
+                        handler.GetData(), m_InputWorkloadSlotPairs[layerBindingId].second);
+                }
+                catch(armnn::UnimplementedException& e)
+                {
+                    IgnoreUnused(e);
+                }
             }
 
         }
@@ -1437,6 +1442,17 @@
             {
                 // Cannot import, use allocated memory
                 outputHandler.UseAllocatedData();
+                // Ensure that the workload get correct tensor
+                try
+                {
+                    m_WorkloadQueue[m_OutputWorkloadSlotPairs[layerBindingId].first].get()->
+                            ReplaceOutputTensorHandle(outputHandler.GetData(),
+                                                      m_OutputWorkloadSlotPairs[layerBindingId].second);
+                }
+                catch(armnn::UnimplementedException& e)
+                {
+                    IgnoreUnused(e);
+                }
             }
         }
         return importedOutputs;
diff --git a/src/armnn/OutputHandler.cpp b/src/armnn/OutputHandler.cpp
index 807262e..8827d8a 100644
--- a/src/armnn/OutputHandler.cpp
+++ b/src/armnn/OutputHandler.cpp
@@ -35,4 +35,13 @@
     dataCollector.Push(m_TensorHandle.get(), m_TensorInfo);
 }
 
+void OutputHandler::SetAllocatedData()
+{
+    // Set allocated data only once
+    if (!m_AllocatedTensorHandle)
+    {
+       m_AllocatedTensorHandle = std::move(m_TensorHandle);
+    }
+}
+
 } // namespace armnn
diff --git a/src/armnn/OutputHandler.hpp b/src/armnn/OutputHandler.hpp
index 3fd2519..d1cb2de 100644
--- a/src/armnn/OutputHandler.hpp
+++ b/src/armnn/OutputHandler.hpp
@@ -50,15 +50,15 @@
 
     void SetData(std::unique_ptr<ITensorHandle> data) { m_TensorHandle = std::move(data); }
 
-    void SetAllocatedData() { m_AllocatedTensorHandle = std::move(m_TensorHandle); }
+    void SetAllocatedData();
 
-    void UseAllocatedData() { m_TensorHandle = std::move(m_AllocatedTensorHandle); }
+    void UseAllocatedData() { m_TensorHandle = m_AllocatedTensorHandle; }
 
     /// @brief Returns true if SetTensorInfo() has been called at least once on this.
     bool IsTensorInfoSet() const { return m_bTensorInfoSet; }
 private:
-    std::unique_ptr<ITensorHandle> m_TensorHandle;
-    std::unique_ptr<ITensorHandle> m_AllocatedTensorHandle;
+    std::shared_ptr<ITensorHandle> m_TensorHandle;
+    std::shared_ptr<ITensorHandle> m_AllocatedTensorHandle;
     TensorInfo m_TensorInfo;
     bool m_bTensorInfoSet = false;
 };
diff --git a/src/armnn/Runtime.cpp b/src/armnn/Runtime.cpp
index 95fb8a3..1abe0f3 100644
--- a/src/armnn/Runtime.cpp
+++ b/src/armnn/Runtime.cpp
@@ -242,6 +242,7 @@
                                            profiling::LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
             }
         }
+
         if (m_LoadedNetworks.erase(networkId) == 0)
         {
             ARMNN_LOG(warning) << "WARNING: RuntimeImpl::UnloadNetwork(): " << networkId << " not found!";
@@ -632,6 +633,12 @@
     ARMNN_LOG(info) << "Execution time: " << std::setprecision(2)
                     << std::fixed << armnn::GetTimeDuration(startTime).count() << " ms.";
 
+    // Call After EnqueueWorkload events
+    for (auto&& context : m_BackendContexts)
+    {
+        context.second->AfterEnqueueWorkload(networkId);
+    }
+
     return status;
 }
 
diff --git a/src/backends/cl/ClBackendContext.cpp b/src/backends/cl/ClBackendContext.cpp
index 9c5cca9..5358fe9 100644
--- a/src/backends/cl/ClBackendContext.cpp
+++ b/src/backends/cl/ClBackendContext.cpp
@@ -285,6 +285,11 @@
     return true;
 }
 
+bool ClBackendContext::AfterEnqueueWorkload(NetworkId)
+{
+    return m_ClContextControlWrapper->Sync();
+}
+
 ClBackendContext::~ClBackendContext()
 {
     if (m_Tuner && !m_TuningFile.empty())
diff --git a/src/backends/cl/ClBackendContext.hpp b/src/backends/cl/ClBackendContext.hpp
index af988a9..659d47b 100644
--- a/src/backends/cl/ClBackendContext.hpp
+++ b/src/backends/cl/ClBackendContext.hpp
@@ -25,6 +25,8 @@
     bool BeforeUnloadNetwork(NetworkId networkId) override;
     bool AfterUnloadNetwork(NetworkId networkId) override;
 
+    bool AfterEnqueueWorkload(NetworkId networkId) override;
+
     ~ClBackendContext() override;
 
 private: