IVGCVSW-6062 update ArmnnPreparedModels to have a single static instance of the threadpool

Signed-off-by: Finn Williams <Finn.Williams@arm.com>
Change-Id: Ie350a11eab5d677dd6a5571ea4094aa51b23c501
diff --git a/ArmnnPreparedModel.cpp b/ArmnnPreparedModel.cpp
index 388a111..f14560a 100644
--- a/ArmnnPreparedModel.cpp
+++ b/ArmnnPreparedModel.cpp
@@ -90,6 +90,9 @@
     ArmnnPreparedModel<HalVersion>::m_RequestThread;
 
 template<typename HalVersion>
+std::unique_ptr<armnn::Threadpool> ArmnnPreparedModel<HalVersion>::m_Threadpool(nullptr);
+
+template<typename HalVersion>
 template <typename TensorBindingCollection>
 void ArmnnPreparedModel<HalVersion>::DumpTensorsIfRequired(char const* tensorNamePrefix,
                                                            const TensorBindingCollection& tensorBindings)
@@ -126,7 +129,7 @@
     // Enable profiling if required.
     m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
 
-    if (asyncModelExecutionEnabled)
+    if (m_AsyncModelExecutionEnabled)
     {
         std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
         for (unsigned int i=0; i < numberOfThreads; ++i)
@@ -134,8 +137,16 @@
             memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId));
         }
 
+        if (!m_Threadpool)
+        {
+            m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
+        }
+        else
+        {
+            m_Threadpool->LoadMemHandles(memHandles);
+        }
+
         m_WorkingMemHandle = memHandles.back();
-        m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
     }
 }
 
@@ -148,6 +159,12 @@
     // Unload the network associated with this model.
     m_Runtime->UnloadNetwork(m_NetworkId);
 
+    // Unload the network memhandles from the threadpool
+    if (m_AsyncModelExecutionEnabled)
+    {
+        m_Threadpool->UnloadMemHandles(m_NetworkId);
+    }
+
     // Dump the profiling info to a file if required.
     DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId, profiler.get());
 }
diff --git a/ArmnnPreparedModel.hpp b/ArmnnPreparedModel.hpp
index 58177d9..685d950 100644
--- a/ArmnnPreparedModel.hpp
+++ b/ArmnnPreparedModel.hpp
@@ -96,17 +96,19 @@
             std::shared_ptr<armnn::OutputTensors>& outputTensors,
             CallbackContext m_CallbackContext);
 
-    armnn::NetworkId                   m_NetworkId;
-    armnn::IRuntime*                   m_Runtime;
-    std::unique_ptr<armnn::Threadpool> m_Threadpool;
-    HalModel                           m_Model;
+    armnn::NetworkId                          m_NetworkId;
+    armnn::IRuntime*                          m_Runtime;
+    HalModel                                  m_Model;
     // There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads
     // It is specific to this class, so it is declared as static here
-    static RequestThread<ArmnnPreparedModel, HalVersion, CallbackContext_1_0> m_RequestThread;
-    uint32_t                                                                m_RequestCount;
-    const std::string&                                                      m_RequestInputsAndOutputsDumpDir;
-    const bool                                                              m_GpuProfilingEnabled;
-
+    static RequestThread<ArmnnPreparedModel,
+                         HalVersion,
+                         CallbackContext_1_0> m_RequestThread;
+    uint32_t                                  m_RequestCount;
+    const std::string&                        m_RequestInputsAndOutputsDumpDir;
+    const bool                                m_GpuProfilingEnabled;
+    // Static to allow sharing of threadpool between ArmnnPreparedModel instances
+    static std::unique_ptr<armnn::Threadpool> m_Threadpool;
     std::shared_ptr<armnn::IWorkingMemHandle> m_WorkingMemHandle;
     const bool m_AsyncModelExecutionEnabled;
 };
diff --git a/ArmnnPreparedModel_1_2.cpp b/ArmnnPreparedModel_1_2.cpp
index e46b5be..5a10d54 100644
--- a/ArmnnPreparedModel_1_2.cpp
+++ b/ArmnnPreparedModel_1_2.cpp
@@ -125,6 +125,9 @@
         ArmnnPreparedModel_1_2<HalVersion>::m_RequestThread;
 
 template<typename HalVersion>
+std::unique_ptr<armnn::Threadpool> ArmnnPreparedModel_1_2<HalVersion>::m_Threadpool(nullptr);
+
+template<typename HalVersion>
 template<typename TensorBindingCollection>
 void ArmnnPreparedModel_1_2<HalVersion>::DumpTensorsIfRequired(char const* tensorNamePrefix,
                                                                const TensorBindingCollection& tensorBindings)
@@ -161,7 +164,7 @@
     // Enable profiling if required.
     m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
 
-    if (asyncModelExecutionEnabled)
+    if (m_AsyncModelExecutionEnabled)
     {
         std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
         for (unsigned int i=0; i < numberOfThreads; ++i)
@@ -169,8 +172,16 @@
             memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId));
         }
 
+        if (!m_Threadpool)
+        {
+            m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
+        }
+        else
+        {
+            m_Threadpool->LoadMemHandles(memHandles);
+        }
+
         m_WorkingMemHandle = memHandles.back();
-        m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
     }
 }
 
@@ -183,6 +194,12 @@
     // Unload the network associated with this model.
     m_Runtime->UnloadNetwork(m_NetworkId);
 
+    // Unload the network memhandles from the threadpool
+    if (m_AsyncModelExecutionEnabled)
+    {
+        m_Threadpool->UnloadMemHandles(m_NetworkId);
+    }
+
     // Dump the profiling info to a file if required.
     DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId, profiler.get());
 }
diff --git a/ArmnnPreparedModel_1_2.hpp b/ArmnnPreparedModel_1_2.hpp
index 4ee2b81..c64c891 100644
--- a/ArmnnPreparedModel_1_2.hpp
+++ b/ArmnnPreparedModel_1_2.hpp
@@ -143,7 +143,6 @@
 
     armnn::NetworkId                          m_NetworkId;
     armnn::IRuntime*                          m_Runtime;
-    std::unique_ptr<armnn::Threadpool>        m_Threadpool;
     V1_2::Model                               m_Model;
     // There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads
     // It is specific to this class, so it is declared as static here
@@ -153,6 +152,8 @@
     uint32_t                                  m_RequestCount;
     const std::string&                        m_RequestInputsAndOutputsDumpDir;
     const bool                                m_GpuProfilingEnabled;
+    // Static to allow sharing of threadpool between ArmnnPreparedModel instances
+    static std::unique_ptr<armnn::Threadpool> m_Threadpool;
     std::shared_ptr<IWorkingMemHandle>        m_WorkingMemHandle;
     const bool                                m_AsyncModelExecutionEnabled;
 };
diff --git a/ArmnnPreparedModel_1_3.cpp b/ArmnnPreparedModel_1_3.cpp
index dcac281..16ea113 100644
--- a/ArmnnPreparedModel_1_3.cpp
+++ b/ArmnnPreparedModel_1_3.cpp
@@ -145,6 +145,9 @@
         ArmnnPreparedModel_1_3<HalVersion>::m_RequestThread;
 
 template<typename HalVersion>
+std::unique_ptr<armnn::Threadpool> ArmnnPreparedModel_1_3<HalVersion>::m_Threadpool(nullptr);
+
+template<typename HalVersion>
 template<typename TensorBindingCollection>
 void ArmnnPreparedModel_1_3<HalVersion>::DumpTensorsIfRequired(char const* tensorNamePrefix,
                                                                const TensorBindingCollection& tensorBindings)
@@ -183,7 +186,7 @@
     // Enable profiling if required.
     m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
 
-    if (asyncModelExecutionEnabled)
+    if (m_AsyncModelExecutionEnabled)
     {
         std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
         for (unsigned int i=0; i < numberOfThreads; ++i)
@@ -191,8 +194,16 @@
             memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId));
         }
 
+        if (!m_Threadpool)
+        {
+            m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
+        }
+        else
+        {
+            m_Threadpool->LoadMemHandles(memHandles);
+        }
+
         m_WorkingMemHandle = memHandles.back();
-        m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
     }
 }
 
@@ -205,6 +216,12 @@
     // Unload the network associated with this model.
     m_Runtime->UnloadNetwork(m_NetworkId);
 
+    // Unload the network memhandles from the threadpool
+    if (m_AsyncModelExecutionEnabled)
+    {
+        m_Threadpool->UnloadMemHandles(m_NetworkId);
+    }
+
     // Dump the profiling info to a file if required.
     DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId, profiler.get());
 }
diff --git a/ArmnnPreparedModel_1_3.hpp b/ArmnnPreparedModel_1_3.hpp
index 46798cd..a245cc4 100644
--- a/ArmnnPreparedModel_1_3.hpp
+++ b/ArmnnPreparedModel_1_3.hpp
@@ -175,20 +175,23 @@
             CallbackContext m_CallbackContext,
             armnn::QosExecPriority priority);
 
-    armnn::NetworkId                                                            m_NetworkId;
-    armnn::IRuntime*                                                            m_Runtime;
-    std::unique_ptr<armnn::Threadpool>                                          m_Threadpool;
-    V1_3::Model                                                                 m_Model;
+    armnn::NetworkId                               m_NetworkId;
+    armnn::IRuntime*                               m_Runtime;
+    V1_3::Model                                    m_Model;
     // There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads
     // It is specific to this class, so it is declared as static here
-    static RequestThread_1_3<ArmnnPreparedModel_1_3, HalVersion, CallbackContext_1_3> m_RequestThread;
-    uint32_t                                                                    m_RequestCount;
-    const std::string&                                                          m_RequestInputsAndOutputsDumpDir;
-    const bool                                                                  m_GpuProfilingEnabled;
-    V1_3::Priority                                                              m_ModelPriority;
+    static RequestThread_1_3<ArmnnPreparedModel_1_3,
+                             HalVersion,
+                             CallbackContext_1_3>  m_RequestThread;
+    uint32_t                                       m_RequestCount;
+    const std::string&                             m_RequestInputsAndOutputsDumpDir;
+    const bool                                     m_GpuProfilingEnabled;
+    V1_3::Priority                                 m_ModelPriority;
 
-    std::shared_ptr<IWorkingMemHandle> m_WorkingMemHandle;
-    const bool m_AsyncModelExecutionEnabled;
+    // Static to allow sharing of threadpool between ArmnnPreparedModel instances
+    static std::unique_ptr<armnn::Threadpool>      m_Threadpool;
+    std::shared_ptr<IWorkingMemHandle>             m_WorkingMemHandle;
+    const bool                                     m_AsyncModelExecutionEnabled;
 };
 
 }