IVGCVSW-6747 Call Cl sync after EnqueueWorkload
* Add AfterEnqueueWorkload to IBackendContext
* Implement AfterEnqueueWorkload in ClBackendContext to call Cl sync
* Set allocated data on outputhandler only once
* Handle PreImportedHandles and CurImportedId the same way as Async
Signed-off-by: Narumol Prangnawarat <narumol.prangnawarat@arm.com>
Change-Id: I9f59d57e298d4a494569faec3078d66af799f77b
diff --git a/include/armnn/backends/IBackendContext.hpp b/include/armnn/backends/IBackendContext.hpp
index ae85b63..6fca42d 100644
--- a/include/armnn/backends/IBackendContext.hpp
+++ b/include/armnn/backends/IBackendContext.hpp
@@ -25,6 +25,9 @@
virtual bool BeforeUnloadNetwork(NetworkId networkId) = 0;
virtual bool AfterUnloadNetwork(NetworkId networkId) = 0;
+ // After Enqueue workload events
+ virtual bool AfterEnqueueWorkload(NetworkId networkId) = 0;
+
virtual ~IBackendContext() {}
};
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp
index 45891f7..48a3040 100644
--- a/src/armnn/LoadedNetwork.cpp
+++ b/src/armnn/LoadedNetwork.cpp
@@ -699,7 +699,7 @@
for (const BindableLayer* inputLayer : graph.GetInputLayers())
{
- if (preImportedInputIds.size() != m_PreImportedInputHandles.size())
+ if (preImportedInputIds.size() > graph.GetNumInputs())
{
throw InvalidArgumentException("Invalid number of preImportedInputIds");
}
@@ -727,7 +727,7 @@
for (const BindableLayer* outputLayer : graph.GetOutputLayers())
{
- if (preImportedOutputIds.size() != m_PreImportedOutputHandles.size())
+ if (preImportedOutputIds.size() > graph.GetNumOutputs())
{
throw InvalidArgumentException("Invalid number of preImportedOutputIds");
}
@@ -770,11 +770,6 @@
}
}
}
- // Clear m_PreImportedInputHandles and m_PreImportedOutputHandles
- m_PreImportedInputHandles.clear();
- m_PreImportedOutputHandles.clear();
- m_CurImportedInputId = 0;
- m_CurImportedOutputId = 0;
std::unique_ptr<TimelineUtilityMethods> timelineUtils =
TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
@@ -1271,6 +1266,16 @@
{
// Cannot import, use allocated data
handler.UseAllocatedData();
+ // Ensure that the workload get correct tensor
+ try
+ {
+ m_WorkloadQueue[m_InputWorkloadSlotPairs[layerBindingId].first].get()->ReplaceInputTensorHandle(
+ handler.GetData(), m_InputWorkloadSlotPairs[layerBindingId].second);
+ }
+ catch(armnn::UnimplementedException& e)
+ {
+ IgnoreUnused(e);
+ }
}
}
@@ -1437,6 +1442,17 @@
{
// Cannot import, use allocated memory
outputHandler.UseAllocatedData();
+ // Ensure that the workload get correct tensor
+ try
+ {
+ m_WorkloadQueue[m_OutputWorkloadSlotPairs[layerBindingId].first].get()->
+ ReplaceOutputTensorHandle(outputHandler.GetData(),
+ m_OutputWorkloadSlotPairs[layerBindingId].second);
+ }
+ catch(armnn::UnimplementedException& e)
+ {
+ IgnoreUnused(e);
+ }
}
}
return importedOutputs;
diff --git a/src/armnn/OutputHandler.cpp b/src/armnn/OutputHandler.cpp
index 807262e..8827d8a 100644
--- a/src/armnn/OutputHandler.cpp
+++ b/src/armnn/OutputHandler.cpp
@@ -35,4 +35,13 @@
dataCollector.Push(m_TensorHandle.get(), m_TensorInfo);
}
+void OutputHandler::SetAllocatedData()
+{
+ // Set allocated data only once
+ if (!m_AllocatedTensorHandle)
+ {
+ m_AllocatedTensorHandle = std::move(m_TensorHandle);
+ }
+}
+
} // namespace armnn
diff --git a/src/armnn/OutputHandler.hpp b/src/armnn/OutputHandler.hpp
index 3fd2519..d1cb2de 100644
--- a/src/armnn/OutputHandler.hpp
+++ b/src/armnn/OutputHandler.hpp
@@ -50,15 +50,15 @@
void SetData(std::unique_ptr<ITensorHandle> data) { m_TensorHandle = std::move(data); }
- void SetAllocatedData() { m_AllocatedTensorHandle = std::move(m_TensorHandle); }
+ void SetAllocatedData();
- void UseAllocatedData() { m_TensorHandle = std::move(m_AllocatedTensorHandle); }
+ void UseAllocatedData() { m_TensorHandle = m_AllocatedTensorHandle; }
/// @brief Returns true if SetTensorInfo() has been called at least once on this.
bool IsTensorInfoSet() const { return m_bTensorInfoSet; }
private:
- std::unique_ptr<ITensorHandle> m_TensorHandle;
- std::unique_ptr<ITensorHandle> m_AllocatedTensorHandle;
+ std::shared_ptr<ITensorHandle> m_TensorHandle;
+ std::shared_ptr<ITensorHandle> m_AllocatedTensorHandle;
TensorInfo m_TensorInfo;
bool m_bTensorInfoSet = false;
};
diff --git a/src/armnn/Runtime.cpp b/src/armnn/Runtime.cpp
index 95fb8a3..1abe0f3 100644
--- a/src/armnn/Runtime.cpp
+++ b/src/armnn/Runtime.cpp
@@ -242,6 +242,7 @@
profiling::LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
}
}
+
if (m_LoadedNetworks.erase(networkId) == 0)
{
ARMNN_LOG(warning) << "WARNING: RuntimeImpl::UnloadNetwork(): " << networkId << " not found!";
@@ -632,6 +633,12 @@
ARMNN_LOG(info) << "Execution time: " << std::setprecision(2)
<< std::fixed << armnn::GetTimeDuration(startTime).count() << " ms.";
+ // Call After EnqueueWorkload events
+ for (auto&& context : m_BackendContexts)
+ {
+ context.second->AfterEnqueueWorkload(networkId);
+ }
+
return status;
}
diff --git a/src/backends/cl/ClBackendContext.cpp b/src/backends/cl/ClBackendContext.cpp
index 9c5cca9..5358fe9 100644
--- a/src/backends/cl/ClBackendContext.cpp
+++ b/src/backends/cl/ClBackendContext.cpp
@@ -285,6 +285,11 @@
return true;
}
+bool ClBackendContext::AfterEnqueueWorkload(NetworkId)
+{
+ return m_ClContextControlWrapper->Sync();
+}
+
ClBackendContext::~ClBackendContext()
{
if (m_Tuner && !m_TuningFile.empty())
diff --git a/src/backends/cl/ClBackendContext.hpp b/src/backends/cl/ClBackendContext.hpp
index af988a9..659d47b 100644
--- a/src/backends/cl/ClBackendContext.hpp
+++ b/src/backends/cl/ClBackendContext.hpp
@@ -25,6 +25,8 @@
bool BeforeUnloadNetwork(NetworkId networkId) override;
bool AfterUnloadNetwork(NetworkId networkId) override;
+ bool AfterEnqueueWorkload(NetworkId networkId) override;
+
~ClBackendContext() override;
private: