IVGCVSW-2066: Add IMemoryManager and integrate into the backends framework

Change-Id: I93223c8678165cbc3d39f461c36bb8610dc81c05
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp
index 92433d1..24d119c 100644
--- a/src/armnn/LoadedNetwork.cpp
+++ b/src/armnn/LoadedNetwork.cpp
@@ -13,6 +13,7 @@
 
 #include <backendsCommon/CpuTensorHandle.hpp>
 #include <backendsCommon/BackendRegistry.hpp>
+#include <backendsCommon/IMemoryManager.hpp>
 
 #include <boost/polymorphic_cast.hpp>
 #include <boost/assert.hpp>
@@ -90,8 +91,11 @@
         {
             auto createBackend = BackendRegistryInstance().GetFactory(backend);
             auto it = m_Backends.emplace(std::make_pair(backend, createBackend()));
-            m_WorkloadFactories.emplace(std::make_pair(backend,
-                                                       it.first->second->CreateWorkloadFactory()));
+
+            auto memoryManager   = it.first->second->CreateMemoryManager();
+            auto workloadFactory = it.first->second->CreateWorkloadFactory(std::move(memoryManager));
+
+            m_WorkloadFactories.emplace(std::make_pair(backend, std::move(workloadFactory)));
         }
         layer->CreateTensorHandles(m_OptimizedNetwork->GetGraph(), GetWorkloadFactory(*layer));
     }
diff --git a/src/armnn/test/UnitTests.hpp b/src/armnn/test/UnitTests.hpp
index 40765e8..44b737c 100644
--- a/src/armnn/test/UnitTests.hpp
+++ b/src/armnn/test/UnitTests.hpp
@@ -8,6 +8,7 @@
 #include <armnn/Utils.hpp>
 #include <reference/RefWorkloadFactory.hpp>
 #include <backendsCommon/test/LayerTests.hpp>
+#include <backendsCommon/test/WorkloadFactoryHelper.hpp>
 #include "TensorHelpers.hpp"
 #include <boost/test/unit_test.hpp>
 
@@ -65,7 +66,7 @@
     std::unique_ptr<armnn::Profiler> profiler = std::make_unique<armnn::Profiler>();
     armnn::ProfilerManager::GetInstance().RegisterProfiler(profiler.get());
 
-    FactoryType workloadFactory;
+    FactoryType workloadFactory = WorkloadFactoryHelper<FactoryType>::GetFactory();
     auto testResult = (*testFunction)(workloadFactory, args...);
     CompareTestResultIfSupported(testName, testResult);
 }
@@ -79,7 +80,7 @@
 template<typename FactoryType, typename TFuncPtr, typename... Args>
 void CompareRefTestFunction(const char* testName, TFuncPtr testFunction, Args... args)
 {
-    FactoryType workloadFactory;
+    FactoryType workloadFactory = WorkloadFactoryHelper<FactoryType>::GetFactory();
     armnn::RefWorkloadFactory refWorkloadFactory;
     auto testResult = (*testFunction)(workloadFactory, refWorkloadFactory, args...);
     CompareTestResultIfSupported(testName, testResult);
diff --git a/src/backends/aclCommon/BaseMemoryManager.cpp b/src/backends/aclCommon/BaseMemoryManager.cpp
index 4e0d14c..f564dc6 100644
--- a/src/backends/aclCommon/BaseMemoryManager.cpp
+++ b/src/backends/aclCommon/BaseMemoryManager.cpp
@@ -77,6 +77,16 @@
     BOOST_ASSERT(m_InterLayerMemoryMgr);
     m_InterLayerMemoryMgr->clear();
 }
+#else
+void BaseMemoryManager::Acquire()
+{
+    // No-op if neither NEON nor CL enabled
+}
+
+void BaseMemoryManager::Release()
+{
+    // No-op if neither NEON nor CL enabled
+}
 #endif
 
 #ifdef ARMCOMPUTENEON_ENABLED
diff --git a/src/backends/aclCommon/BaseMemoryManager.hpp b/src/backends/aclCommon/BaseMemoryManager.hpp
index ffa440b..a880b9a 100644
--- a/src/backends/aclCommon/BaseMemoryManager.hpp
+++ b/src/backends/aclCommon/BaseMemoryManager.hpp
@@ -4,6 +4,7 @@
 //
 #pragma once
 
+#include <backendsCommon/IMemoryManager.hpp>
 #include <backendsCommon/WorkloadFactory.hpp>
 
 #ifdef ARMCOMPUTENEON_ENABLED
@@ -23,7 +24,7 @@
 namespace armnn
 {
 
-class BaseMemoryManager
+class BaseMemoryManager : public IMemoryManager
 {
 public:
     enum class MemoryAffinity
@@ -35,6 +36,9 @@
     BaseMemoryManager() { }
     virtual ~BaseMemoryManager() { }
 
+    void Acquire() override;
+    void Release() override;
+
 #if defined(ARMCOMPUTENEON_ENABLED) || defined(ARMCOMPUTECL_ENABLED)
 
     BaseMemoryManager(std::unique_ptr<arm_compute::IAllocator> alloc, MemoryAffinity memoryAffinity);
@@ -43,9 +47,6 @@
     std::shared_ptr<arm_compute::MemoryManagerOnDemand>& GetInterLayerManager() { return m_InterLayerMemoryMgr; }
     std::shared_ptr<arm_compute::IMemoryGroup>& GetInterLayerMemoryGroup()      { return m_InterLayerMemoryGroup; }
 
-    void Acquire();
-    void Release();
-
 protected:
     std::unique_ptr<arm_compute::IAllocator>            m_Allocator;
     std::shared_ptr<arm_compute::MemoryManagerOnDemand> m_IntraLayerMemoryMgr;
diff --git a/src/backends/aclCommon/test/MemCopyTestImpl.hpp b/src/backends/aclCommon/test/MemCopyTestImpl.hpp
index 6cafaad..763a3f7 100644
--- a/src/backends/aclCommon/test/MemCopyTestImpl.hpp
+++ b/src/backends/aclCommon/test/MemCopyTestImpl.hpp
@@ -6,6 +6,7 @@
 
 #include <backendsCommon/test/LayerTests.hpp>
 #include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadFactoryHelper.hpp>
 #include <backendsCommon/test/WorkloadTestUtils.hpp>
 
 #include <test/TensorHelpers.hpp>
@@ -75,8 +76,8 @@
 template<typename SrcWorkloadFactory, typename DstWorkloadFactory>
 LayerTestResult<float, 4> MemCopyTest(bool withSubtensors)
 {
-    SrcWorkloadFactory srcWorkloadFactory;
-    DstWorkloadFactory dstWorkloadFactory;
+    SrcWorkloadFactory srcWorkloadFactory = WorkloadFactoryHelper<SrcWorkloadFactory>::GetFactory();
+    DstWorkloadFactory dstWorkloadFactory = WorkloadFactoryHelper<DstWorkloadFactory>::GetFactory();
 
     return MemCopyTest(srcWorkloadFactory, dstWorkloadFactory, withSubtensors);
 }
diff --git a/src/backends/aclCommon/test/MemCopyTests.cpp b/src/backends/aclCommon/test/MemCopyTests.cpp
index 3d4236d..7099a70 100644
--- a/src/backends/aclCommon/test/MemCopyTests.cpp
+++ b/src/backends/aclCommon/test/MemCopyTests.cpp
@@ -9,7 +9,11 @@
 
 #if ARMCOMPUTECL_ENABLED && ARMCOMPUTENEON_ENABLED
 #include <aclCommon/test/MemCopyTestImpl.hpp>
+
 #include <cl/test/ClContextControlFixture.hpp>
+#include <cl/test/ClWorkloadFactoryHelper.hpp>
+
+#include <neon/test/NeonWorkloadFactoryHelper.hpp>
 #endif
 
 #include <boost/test/unit_test.hpp>
diff --git a/src/backends/backendsCommon/CMakeLists.txt b/src/backends/backendsCommon/CMakeLists.txt
index e6ac01c..1fe9888 100644
--- a/src/backends/backendsCommon/CMakeLists.txt
+++ b/src/backends/backendsCommon/CMakeLists.txt
@@ -12,6 +12,7 @@
     IBackendInternal.hpp
     IBackendContext.hpp
     ILayerSupport.cpp
+    IMemoryManager.hpp
     ITensorHandle.hpp
     MakeWorkloadHelper.hpp
     MemCopyWorkload.cpp
diff --git a/src/backends/backendsCommon/IBackendInternal.hpp b/src/backends/backendsCommon/IBackendInternal.hpp
index 9d649fc..b102d1a 100644
--- a/src/backends/backendsCommon/IBackendInternal.hpp
+++ b/src/backends/backendsCommon/IBackendInternal.hpp
@@ -12,6 +12,7 @@
 {
 class IWorkloadFactory;
 class IBackendContext;
+class IMemoryManager;
 class Optimization;
 class ILayerSupport;
 
@@ -33,8 +34,16 @@
     using Optimizations = std::vector<OptimizationPtr>;
     using ILayerSupportSharedPtr = std::shared_ptr<ILayerSupport>;
 
-    virtual IWorkloadFactoryPtr CreateWorkloadFactory() const = 0;
+    using IMemoryManagerUniquePtr = std::unique_ptr<IMemoryManager>;
+    using IMemoryManagerSharedPtr = std::shared_ptr<IMemoryManager>;
+
+    virtual IMemoryManagerUniquePtr CreateMemoryManager() const = 0;
+
+    virtual IWorkloadFactoryPtr CreateWorkloadFactory(
+        const IMemoryManagerSharedPtr& memoryManager = nullptr) const = 0;
+
     virtual IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const = 0;
+
     virtual Optimizations GetOptimizations() const = 0;
     virtual ILayerSupportSharedPtr GetLayerSupport() const = 0;
 };
diff --git a/src/backends/backendsCommon/IMemoryManager.hpp b/src/backends/backendsCommon/IMemoryManager.hpp
new file mode 100644
index 0000000..28b81e7
--- /dev/null
+++ b/src/backends/backendsCommon/IMemoryManager.hpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <memory>
+
+namespace armnn
+{
+
+class IMemoryManager
+{
+protected:
+    IMemoryManager() {}
+
+public:
+    virtual void Acquire() = 0;
+    virtual void Release() = 0;
+
+    virtual ~IMemoryManager() {}
+};
+
+using IMemoryManagerUniquePtr = std::unique_ptr<IMemoryManager>;
+
+} // namespace armnn
\ No newline at end of file
diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt
index ae94ad5..7419c14 100644
--- a/src/backends/backendsCommon/test/CMakeLists.txt
+++ b/src/backends/backendsCommon/test/CMakeLists.txt
@@ -32,6 +32,7 @@
     TensorCopyUtils.cpp
     TensorCopyUtils.hpp
     WorkloadDataValidation.cpp
+    WorkloadFactoryHelper.hpp
     WorkloadTestUtils.hpp
 )
 
diff --git a/src/backends/backendsCommon/test/LayerReleaseConstantDataTest.cpp b/src/backends/backendsCommon/test/LayerReleaseConstantDataTest.cpp
index fc32fdc..25873d8 100644
--- a/src/backends/backendsCommon/test/LayerReleaseConstantDataTest.cpp
+++ b/src/backends/backendsCommon/test/LayerReleaseConstantDataTest.cpp
@@ -35,8 +35,7 @@
 
 BOOST_AUTO_TEST_CASE(ReleaseBatchNormalizationLayerConstantDataTest)
 {
-    Graph             graph;
-    ClWorkloadFactory factory;
+    Graph graph;
 
     // create the layer we're testing
     BatchNormalizationDescriptor layerDesc;
@@ -82,8 +81,7 @@
 
  BOOST_AUTO_TEST_CASE(ReleaseConvolution2dLayerConstantDataTest)
  {
-     Graph             graph;
-     ClWorkloadFactory factory;
+     Graph graph;
 
      // create the layer we're testing
      Convolution2dDescriptor layerDesc;
@@ -127,8 +125,7 @@
 
 BOOST_AUTO_TEST_CASE(ReleaseDepthwiseConvolution2dLayerConstantDataTest)
 {
-    Graph             graph;
-    ClWorkloadFactory factory;
+    Graph graph;
 
     // create the layer we're testing
     DepthwiseConvolution2dDescriptor layerDesc;
@@ -169,8 +166,7 @@
 
 BOOST_AUTO_TEST_CASE(ReleaseFullyConnectedLayerConstantDataTest)
 {
-    Graph             graph;
-    ClWorkloadFactory factory;
+    Graph graph;
 
     // create the layer we're testing
     FullyConnectedDescriptor layerDesc;
diff --git a/src/backends/backendsCommon/test/WorkloadFactoryHelper.hpp b/src/backends/backendsCommon/test/WorkloadFactoryHelper.hpp
new file mode 100644
index 0000000..41bf54a
--- /dev/null
+++ b/src/backends/backendsCommon/test/WorkloadFactoryHelper.hpp
@@ -0,0 +1,12 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+namespace
+{
+
+template<typename WorkloadFactoryType> struct WorkloadFactoryHelper {};
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/src/backends/cl/CMakeLists.txt b/src/backends/cl/CMakeLists.txt
index dd2a4a1..7593f09 100644
--- a/src/backends/cl/CMakeLists.txt
+++ b/src/backends/cl/CMakeLists.txt
@@ -34,8 +34,6 @@
         ClContextControl.hpp
         ClLayerSupport.cpp
         ClLayerSupport.hpp
-        ClWorkloadFactory.cpp
-        ClWorkloadFactory.hpp
     )
 endif()
 
diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp
index 4ef8d90..2b82c18 100644
--- a/src/backends/cl/ClBackend.cpp
+++ b/src/backends/cl/ClBackend.cpp
@@ -9,10 +9,18 @@
 #include "ClBackendContext.hpp"
 #include "ClLayerSupport.hpp"
 
-#include <backendsCommon/IBackendContext.hpp>
+#include <aclCommon/BaseMemoryManager.hpp>
+
 #include <backendsCommon/BackendRegistry.hpp>
+#include <backendsCommon/IBackendContext.hpp>
+#include <backendsCommon/IMemoryManager.hpp>
+
 #include <Optimizer.hpp>
 
+#include <arm_compute/runtime/CL/CLBufferAllocator.h>
+
+#include <boost/polymorphic_pointer_cast.hpp>
+
 namespace armnn
 {
 
@@ -37,9 +45,16 @@
     return s_Id;
 }
 
-IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory() const
+IBackendInternal::IMemoryManagerUniquePtr ClBackend::CreateMemoryManager() const
 {
-    return std::make_unique<ClWorkloadFactory>();
+    return std::make_unique<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+}
+
+IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
+    const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const
+{
+    return std::make_unique<ClWorkloadFactory>(
+        boost::polymorphic_pointer_downcast<ClMemoryManager>(memoryManager));
 }
 
 IBackendInternal::IBackendContextPtr
diff --git a/src/backends/cl/ClBackend.hpp b/src/backends/cl/ClBackend.hpp
index 7ee8598..ef98da0 100644
--- a/src/backends/cl/ClBackend.hpp
+++ b/src/backends/cl/ClBackend.hpp
@@ -18,8 +18,13 @@
     static const BackendId& GetIdStatic();
     const BackendId& GetId() const override { return GetIdStatic(); }
 
-    IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory() const override;
+    IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override;
+
+    IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(
+        const IBackendInternal::IMemoryManagerSharedPtr& memoryManager = nullptr) const override;
+
     IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const override;
+
     IBackendInternal::Optimizations GetOptimizations() const override;
     IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override;
 };
diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp
index 0862ea1..5679549 100644
--- a/src/backends/cl/ClWorkloadFactory.cpp
+++ b/src/backends/cl/ClWorkloadFactory.cpp
@@ -82,15 +82,15 @@
     }
 }
 
-ClWorkloadFactory::ClWorkloadFactory()
-: m_MemoryManager(std::make_unique<arm_compute::CLBufferAllocator>())
+ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager)
+    : m_MemoryManager(memoryManager)
 {
 }
 
 std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
 {
     std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo);
-    tensorHandle->SetMemoryGroup(m_MemoryManager.GetInterLayerMemoryGroup());
+    tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
 
     return tensorHandle;
 }
@@ -99,7 +99,7 @@
                                                                      DataLayout dataLayout) const
 {
     std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo, dataLayout);
-    tensorHandle->SetMemoryGroup(m_MemoryManager.GetInterLayerMemoryGroup());
+    tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
 
     return tensorHandle;
 }
@@ -145,7 +145,7 @@
                                                             const WorkloadInfo&           info) const
 {
     return MakeWorkload<ClSoftmaxFloatWorkload, ClSoftmaxUint8Workload>(descriptor, info,
-                                                                        m_MemoryManager.GetIntraLayerManager());
+                                                                        m_MemoryManager->GetIntraLayerManager());
 }
 
 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
@@ -164,7 +164,7 @@
     const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const
 {
     return MakeWorkload<ClFullyConnectedWorkload, ClFullyConnectedWorkload>(descriptor, info,
-                                                                            m_MemoryManager.GetIntraLayerManager());
+                                                                            m_MemoryManager->GetIntraLayerManager());
 }
 
 std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
@@ -182,7 +182,7 @@
 std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
                                                                          const WorkloadInfo&               info) const
 {
-    return MakeWorkload<ClConvolution2dWorkload>(descriptor, info, m_MemoryManager.GetIntraLayerManager());
+    return MakeWorkload<ClConvolution2dWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
 }
 
 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDepthwiseConvolution2d(
@@ -322,20 +322,16 @@
 
 void ClWorkloadFactory::Release()
 {
-    m_MemoryManager.Release();
+    m_MemoryManager->Release();
 }
 
 void ClWorkloadFactory::Acquire()
 {
-    m_MemoryManager.Acquire();
+    m_MemoryManager->Acquire();
 }
 
 #else // #if ARMCOMPUTECL_ENABLED
 
-ClWorkloadFactory::ClWorkloadFactory()
-{
-}
-
 std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
 {
     return nullptr;
diff --git a/src/backends/cl/ClWorkloadFactory.hpp b/src/backends/cl/ClWorkloadFactory.hpp
index 6a928db..cb715e1 100644
--- a/src/backends/cl/ClWorkloadFactory.hpp
+++ b/src/backends/cl/ClWorkloadFactory.hpp
@@ -17,7 +17,7 @@
 class ClWorkloadFactory : public IWorkloadFactory
 {
 public:
-    ClWorkloadFactory();
+    ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager);
 
     const BackendId& GetBackendId() const override;
 
@@ -134,8 +134,6 @@
     virtual void Acquire() override;
 
 private:
-
-#ifdef ARMCOMPUTECL_ENABLED
     template<typename FloatWorkload, typename Uint8Workload, typename QueueDescriptorType, typename... Args>
     static std::unique_ptr<IWorkload> MakeWorkload(const QueueDescriptorType& descriptor,
                                                    const WorkloadInfo& info,
@@ -146,8 +144,7 @@
                                                    const WorkloadInfo& info,
                                                    Args&&... args);
 
-    mutable ClMemoryManager m_MemoryManager;
-#endif
+    mutable std::shared_ptr<ClMemoryManager> m_MemoryManager;
 };
 
 } // namespace armnn
diff --git a/src/backends/cl/test/CMakeLists.txt b/src/backends/cl/test/CMakeLists.txt
index 574edf4..206cf5a 100644
--- a/src/backends/cl/test/CMakeLists.txt
+++ b/src/backends/cl/test/CMakeLists.txt
@@ -13,6 +13,8 @@
     ClMemCopyTests.cpp
     ClOptimizedNetworkTests.cpp
     ClRuntimeTests.cpp
+    ClWorkloadFactoryHelper.hpp
+    Fp16SupportTest.cpp
     OpenClTimerTest.cpp
 )
 
diff --git a/src/backends/cl/test/ClCreateWorkloadTests.cpp b/src/backends/cl/test/ClCreateWorkloadTests.cpp
index 72a2eb2..978b3bc 100644
--- a/src/backends/cl/test/ClCreateWorkloadTests.cpp
+++ b/src/backends/cl/test/ClCreateWorkloadTests.cpp
@@ -4,6 +4,7 @@
 //
 
 #include "ClContextControlFixture.hpp"
+#include "ClWorkloadFactoryHelper.hpp"
 
 #include <backendsCommon/MemCopyWorkload.hpp>
 
@@ -26,7 +27,7 @@
 static void ClCreateActivationWorkloadTest()
 {
     Graph graph;
-    ClWorkloadFactory factory;
+    ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory();
 
     auto workload = CreateActivationWorkloadTest<ClActivationWorkload, DataType>(factory, graph);
 
@@ -56,7 +57,7 @@
 static void ClCreateArithmethicWorkloadTest()
 {
     Graph graph;
-    ClWorkloadFactory factory;
+    ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory();
     auto workload = CreateArithmeticWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
 
     // Checks that inputs/outputs are as we expect them (see definition of CreateArithmeticWorkloadTest).
@@ -145,7 +146,7 @@
 static void ClCreateBatchNormalizationWorkloadTest(DataLayout dataLayout)
 {
     Graph graph;
-    ClWorkloadFactory factory;
+    ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory();
 
     auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType>
                     (factory, graph, dataLayout);
@@ -194,7 +195,7 @@
 BOOST_AUTO_TEST_CASE(CreateConvertFp16ToFp32Workload)
 {
     Graph graph;
-    ClWorkloadFactory factory;
+    ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory();
     auto workload = CreateConvertFp16ToFp32WorkloadTest<ClConvertFp16ToFp32Workload>(factory, graph);
 
     ConvertFp16ToFp32QueueDescriptor queueDescriptor = workload->GetData();
@@ -210,7 +211,7 @@
 BOOST_AUTO_TEST_CASE(CreateConvertFp32ToFp16Workload)
 {
     Graph graph;
-    ClWorkloadFactory factory;
+    ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory();
     auto workload = CreateConvertFp32ToFp16WorkloadTest<ClConvertFp32ToFp16Workload>(factory, graph);
 
     ConvertFp32ToFp16QueueDescriptor queueDescriptor = workload->GetData();
@@ -227,7 +228,7 @@
 static void ClConvolution2dWorkloadTest(DataLayout dataLayout)
 {
     Graph graph;
-    ClWorkloadFactory factory;
+    ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory();
     auto workload = CreateConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory,
                                                                                        graph,
                                                                                        dataLayout);
@@ -269,7 +270,7 @@
 static void ClDepthwiseConvolutionWorkloadTest(DataLayout dataLayout)
 {
     Graph graph;
-    ClWorkloadFactory factory;
+    ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory();
 
     auto workload = CreateDepthwiseConvolution2dWorkloadTest<DepthwiseConvolutionWorkloadType, DataType>
                     (factory, graph, dataLayout);
@@ -299,7 +300,7 @@
 static void ClDirectConvolution2dWorkloadTest()
 {
     Graph graph;
-    ClWorkloadFactory factory;
+    ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory();
     auto workload = CreateDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory, graph);
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateDirectConvolution2dWorkloadTest).
@@ -329,7 +330,7 @@
 static void ClCreateFullyConnectedWorkloadTest()
 {
     Graph graph;
-    ClWorkloadFactory factory;
+    ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory();
     auto workload =
         CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType, DataType>(factory, graph);
 
@@ -356,7 +357,7 @@
 static void ClNormalizationWorkloadTest(DataLayout dataLayout)
 {
     Graph graph;
-    ClWorkloadFactory factory;
+    ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory();
     auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
 
     // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
@@ -397,7 +398,7 @@
 static void ClPooling2dWorkloadTest(DataLayout dataLayout)
 {
     Graph graph;
-    ClWorkloadFactory factory;
+    ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory();
 
     auto workload = CreatePooling2dWorkloadTest<ClPooling2dWorkload, DataType>(factory, graph, dataLayout);
 
@@ -439,7 +440,7 @@
 static void ClCreateReshapeWorkloadTest()
 {
     Graph graph;
-    ClWorkloadFactory factory;
+    ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory();
 
     auto workload = CreateReshapeWorkloadTest<ClReshapeWorkload, DataType>(factory, graph);
 
@@ -471,7 +472,7 @@
 static void ClSoftmaxWorkloadTest()
 {
     Graph graph;
-    ClWorkloadFactory factory;
+    ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory();
 
     auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType, DataType>(factory, graph);
 
@@ -499,7 +500,7 @@
 static void ClSplitterWorkloadTest()
 {
     Graph graph;
-    ClWorkloadFactory factory;
+    ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory();
 
     auto workload = CreateSplitterWorkloadTest<ClSplitterWorkload, DataType>(factory, graph);
 
@@ -540,7 +541,7 @@
     // of the merger.
 
     Graph graph;
-    ClWorkloadFactory factory;
+    ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory();
 
     auto workloads =
         CreateSplitterMergerWorkloadTest<ClSplitterWorkload, ClMergerWorkload, DataType>
@@ -589,7 +590,7 @@
     // We create a splitter with two outputs. That each of those outputs is used by two different activation layers.
 
     Graph graph;
-    ClWorkloadFactory factory;
+    ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory();
     std::unique_ptr<ClSplitterWorkload> wlSplitter;
     std::unique_ptr<ClActivationWorkload> wlActiv0_0;
     std::unique_ptr<ClActivationWorkload> wlActiv0_1;
@@ -624,7 +625,7 @@
 
 BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsCl)
 {
-    ClWorkloadFactory    factory;
+    ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory();
     CreateMemCopyWorkloads<IClTensorHandle>(factory);
 }
 
@@ -632,7 +633,7 @@
 static void ClL2NormalizationWorkloadTest(DataLayout dataLayout)
 {
     Graph graph;
-    ClWorkloadFactory factory;
+    ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory();
     auto workload =
             CreateL2NormalizationWorkloadTest<L2NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
 
@@ -676,7 +677,7 @@
 static void ClCreateLstmWorkloadTest()
 {
     Graph graph;
-    ClWorkloadFactory factory;
+    ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory();
     auto workload = CreateLstmWorkloadTest<LstmWorkloadType>(factory, graph);
 
     LstmQueueDescriptor queueDescriptor = workload->GetData();
@@ -695,7 +696,7 @@
 static void ClResizeBilinearWorkloadTest(DataLayout dataLayout)
 {
     Graph graph;
-    ClWorkloadFactory factory;
+    ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory();
 
     auto workload = CreateResizeBilinearWorkloadTest<ResizeBilinearWorkloadType, DataType>(factory, graph, dataLayout);
 
@@ -741,7 +742,7 @@
 static void ClMeanWorkloadTest()
 {
     Graph graph;
-    ClWorkloadFactory factory;
+    ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory();
     auto workload = CreateMeanWorkloadTest<MeanWorkloadType, DataType>(factory, graph);
 
     // Checks that inputs/outputs are as we expect them (see definition of CreateMeanWorkloadTest).
diff --git a/src/backends/cl/test/ClLayerSupportTests.cpp b/src/backends/cl/test/ClLayerSupportTests.cpp
index 0019afe..2218d82 100644
--- a/src/backends/cl/test/ClLayerSupportTests.cpp
+++ b/src/backends/cl/test/ClLayerSupportTests.cpp
@@ -3,6 +3,8 @@
 // SPDX-License-Identifier: MIT
 //
 
+#include "ClWorkloadFactoryHelper.hpp"
+
 #include <layers/ConvertFp16ToFp32Layer.hpp>
 #include <layers/ConvertFp32ToFp16Layer.hpp>
 #include <test/TensorHelpers.hpp>
@@ -21,19 +23,19 @@
 
 BOOST_FIXTURE_TEST_CASE(IsLayerSupportedFloat16Cl, ClContextControlFixture)
 {
-    armnn::ClWorkloadFactory factory;
+    armnn::ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory();
     IsLayerSupportedTests<armnn::ClWorkloadFactory, armnn::DataType::Float16>(&factory);
 }
 
 BOOST_FIXTURE_TEST_CASE(IsLayerSupportedFloat32Cl, ClContextControlFixture)
 {
-    armnn::ClWorkloadFactory factory;
+    armnn::ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory();
     IsLayerSupportedTests<armnn::ClWorkloadFactory, armnn::DataType::Float32>(&factory);
 }
 
 BOOST_FIXTURE_TEST_CASE(IsLayerSupportedUint8Cl, ClContextControlFixture)
 {
-    armnn::ClWorkloadFactory factory;
+    armnn::ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory();
     IsLayerSupportedTests<armnn::ClWorkloadFactory, armnn::DataType::QuantisedAsymm8>(&factory);
 }
 
diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp
index ade0790..c7d64ef 100755
--- a/src/backends/cl/test/ClLayerTests.cpp
+++ b/src/backends/cl/test/ClLayerTests.cpp
@@ -4,6 +4,7 @@
 //
 
 #include "ClContextControlFixture.hpp"
+#include "ClWorkloadFactoryHelper.hpp"
 
 #include "test/TensorHelpers.hpp"
 #include "test/UnitTests.hpp"
diff --git a/src/backends/cl/test/ClMemCopyTests.cpp b/src/backends/cl/test/ClMemCopyTests.cpp
index 93b8df1..93d8dd5 100644
--- a/src/backends/cl/test/ClMemCopyTests.cpp
+++ b/src/backends/cl/test/ClMemCopyTests.cpp
@@ -3,10 +3,14 @@
 // SPDX-License-Identifier: MIT
 //
 
+#include "ClWorkloadFactoryHelper.hpp"
+
 #include <cl/ClWorkloadFactory.hpp>
-#include <reference/RefWorkloadFactory.hpp>
 #include <aclCommon/test/MemCopyTestImpl.hpp>
 
+#include <reference/RefWorkloadFactory.hpp>
+#include <reference/test/RefWorkloadFactoryHelper.hpp>
+
 #include <boost/test/unit_test.hpp>
 
 BOOST_AUTO_TEST_SUITE(ClMemCopy)
diff --git a/src/backends/cl/test/ClOptimizedNetworkTests.cpp b/src/backends/cl/test/ClOptimizedNetworkTests.cpp
index cd8a770..7e32147 100644
--- a/src/backends/cl/test/ClOptimizedNetworkTests.cpp
+++ b/src/backends/cl/test/ClOptimizedNetworkTests.cpp
@@ -3,6 +3,8 @@
 // SPDX-License-Identifier: MIT
 //
 
+#include "ClWorkloadFactoryHelper.hpp"
+
 #include <armnn/ArmNN.hpp>
 #include <Network.hpp>
 
@@ -32,7 +34,7 @@
     armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
     BOOST_CHECK(optNet);
     // validate workloads
-    armnn::ClWorkloadFactory fact;
+    armnn::ClWorkloadFactory fact = ClWorkloadFactoryHelper::GetFactory();
     for (auto&& layer : static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph())
     {
         BOOST_CHECK(layer->GetBackendId() == armnn::Compute::GpuAcc);
diff --git a/src/backends/cl/test/ClWorkloadFactoryHelper.hpp b/src/backends/cl/test/ClWorkloadFactoryHelper.hpp
new file mode 100644
index 0000000..7b60b8a
--- /dev/null
+++ b/src/backends/cl/test/ClWorkloadFactoryHelper.hpp
@@ -0,0 +1,35 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/IMemoryManager.hpp>
+#include <backendsCommon/test/WorkloadFactoryHelper.hpp>
+
+#include <cl/ClWorkloadFactory.hpp>
+
+#include <arm_compute/runtime/CL/CLBufferAllocator.h>
+
+#include <boost/polymorphic_pointer_cast.hpp>
+
+namespace
+{
+
+template<>
+struct WorkloadFactoryHelper<armnn::ClWorkloadFactory>
+{
+    static armnn::ClWorkloadFactory GetFactory()
+    {
+        armnn::IBackendInternal::IMemoryManagerSharedPtr memoryManager =
+            std::make_shared<armnn::ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+
+        return armnn::ClWorkloadFactory(boost::polymorphic_pointer_downcast<armnn::ClMemoryManager>(memoryManager));
+    }
+};
+
+using ClWorkloadFactoryHelper = WorkloadFactoryHelper<armnn::ClWorkloadFactory>;
+
+} // anonymous namespace
diff --git a/src/backends/cl/test/OpenClTimerTest.cpp b/src/backends/cl/test/OpenClTimerTest.cpp
index 0c40a86..6e55be6 100644
--- a/src/backends/cl/test/OpenClTimerTest.cpp
+++ b/src/backends/cl/test/OpenClTimerTest.cpp
@@ -5,6 +5,8 @@
 
 #if (defined(__aarch64__)) || (defined(__x86_64__)) // disable test failing on FireFly/Armv7
 
+#include "ClWorkloadFactoryHelper.hpp"
+
 #include <test/TensorHelpers.hpp>
 
 #include <backendsCommon/CpuTensorHandle.hpp>
@@ -42,7 +44,7 @@
 
 BOOST_AUTO_TEST_CASE(OpenClTimerBatchNorm)
 {
-    ClWorkloadFactory  workloadFactory;
+    ClWorkloadFactory  workloadFactory = ClWorkloadFactoryHelper::GetFactory();
 
     const unsigned int width    = 2;
     const unsigned int height   = 3;
diff --git a/src/backends/neon/CMakeLists.txt b/src/backends/neon/CMakeLists.txt
index badad2f..be318e9 100644
--- a/src/backends/neon/CMakeLists.txt
+++ b/src/backends/neon/CMakeLists.txt
@@ -30,8 +30,6 @@
         NeonBackendId.hpp
         NeonLayerSupport.cpp
         NeonLayerSupport.hpp
-        NeonWorkloadFactory.cpp
-        NeonWorkloadFactory.hpp
     )
 endif()
 
diff --git a/src/backends/neon/NeonBackend.cpp b/src/backends/neon/NeonBackend.cpp
index 4d57eda..cde2998 100644
--- a/src/backends/neon/NeonBackend.cpp
+++ b/src/backends/neon/NeonBackend.cpp
@@ -8,11 +8,18 @@
 #include "NeonWorkloadFactory.hpp"
 #include "NeonLayerSupport.hpp"
 
-#include <backendsCommon/IBackendContext.hpp>
+#include <aclCommon/BaseMemoryManager.hpp>
+
 #include <backendsCommon/BackendRegistry.hpp>
+#include <backendsCommon/IBackendContext.hpp>
+#include <backendsCommon/IMemoryManager.hpp>
+
 #include <Optimizer.hpp>
 
+#include <arm_compute/runtime/Allocator.h>
+
 #include <boost/cast.hpp>
+#include <boost/polymorphic_pointer_cast.hpp>
 
 namespace armnn
 {
@@ -38,9 +45,17 @@
     return s_Id;
 }
 
-IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory() const
+IBackendInternal::IMemoryManagerUniquePtr NeonBackend::CreateMemoryManager() const
 {
-    return std::make_unique<NeonWorkloadFactory>();
+    return std::make_unique<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
+                                               BaseMemoryManager::MemoryAffinity::Offset);
+}
+
+IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory(
+    const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const
+{
+    return std::make_unique<NeonWorkloadFactory>(
+        boost::polymorphic_pointer_downcast<NeonMemoryManager>(memoryManager));
 }
 
 IBackendInternal::IBackendContextPtr NeonBackend::CreateBackendContext(const IRuntime::CreationOptions&) const
diff --git a/src/backends/neon/NeonBackend.hpp b/src/backends/neon/NeonBackend.hpp
index d83710d..127a5a4 100644
--- a/src/backends/neon/NeonBackend.hpp
+++ b/src/backends/neon/NeonBackend.hpp
@@ -18,8 +18,13 @@
     static const BackendId& GetIdStatic();
     const BackendId& GetId() const override { return GetIdStatic(); }
 
-    IWorkloadFactoryPtr CreateWorkloadFactory() const override;
+    IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override;
+
+    IWorkloadFactoryPtr CreateWorkloadFactory(
+        const IBackendInternal::IMemoryManagerSharedPtr& memoryManager = nullptr) const override;
+
     IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const override;
+
     IBackendInternal::Optimizations GetOptimizations() const override;
     IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override;
 };
diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp
index f0d916b..6584447 100644
--- a/src/backends/neon/NeonWorkloadFactory.cpp
+++ b/src/backends/neon/NeonWorkloadFactory.cpp
@@ -2,24 +2,23 @@
 // Copyright © 2017 Arm Ltd. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
-#include "NeonWorkloadFactory.hpp"
+
 #include "NeonBackendId.hpp"
-#include <armnn/Utils.hpp>
-#include <backendsCommon/CpuTensorHandle.hpp>
+#include "NeonTensorHandle.hpp"
+#include "NeonWorkloadFactory.hpp"
+
 #include <Layer.hpp>
 
-#ifdef ARMCOMPUTENEON_ENABLED
-#include <arm_compute/runtime/Allocator.h>
+#include <armnn/Utils.hpp>
 
-#include <backendsCommon/MemCopyWorkload.hpp>
-#include "NeonTensorHandle.hpp"
-#include "workloads/NeonWorkloadUtils.hpp"
-#include "workloads/NeonWorkloads.hpp"
-
-#endif
-
+#include <backendsCommon/CpuTensorHandle.hpp>
 #include <backendsCommon/MakeWorkloadHelper.hpp>
+#include <backendsCommon/MemCopyWorkload.hpp>
 
+#include <neon/workloads/NeonWorkloadUtils.hpp>
+#include <neon/workloads/NeonWorkloads.hpp>
+
+#include <boost/core/ignore_unused.hpp>
 #include <boost/polymorphic_cast.hpp>
 
 namespace armnn
@@ -42,10 +41,8 @@
     return s_Id;
 }
 
-#ifdef ARMCOMPUTENEON_ENABLED
-
-NeonWorkloadFactory::NeonWorkloadFactory()
-    : m_MemoryManager(std::make_unique<arm_compute::Allocator>(), BaseMemoryManager::MemoryAffinity::Offset)
+NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager)
+    : m_MemoryManager(memoryManager)
 {
 }
 
@@ -71,7 +68,7 @@
 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
 {
     auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo);
-    tensorHandle->SetMemoryGroup(m_MemoryManager.GetInterLayerMemoryGroup());
+    tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
 
     return tensorHandle;
 }
@@ -80,7 +77,7 @@
                                                                        DataLayout dataLayout) const
 {
     auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo, dataLayout);
-    tensorHandle->SetMemoryGroup(m_MemoryManager.GetInterLayerMemoryGroup());
+    tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
 
     return tensorHandle;
 }
@@ -107,7 +104,7 @@
                                                               const WorkloadInfo&           info) const
 {
     return MakeWorkloadHelper<NeonSoftmaxFloatWorkload, NeonSoftmaxUint8Workload>(descriptor, info,
-        m_MemoryManager.GetIntraLayerManager());
+        m_MemoryManager->GetIntraLayerManager());
 }
 
 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
@@ -126,7 +123,7 @@
     const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const
 {
     return MakeWorkloadHelper<NeonFullyConnectedWorkload, NeonFullyConnectedWorkload>(descriptor, info,
-        m_MemoryManager.GetIntraLayerManager());
+        m_MemoryManager->GetIntraLayerManager());
 }
 
 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
@@ -145,7 +142,7 @@
     const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
 {
     return std::make_unique<NeonConvolution2dWorkload>(descriptor, info,
-                                                       m_MemoryManager.GetIntraLayerManager());
+                                                       m_MemoryManager->GetIntraLayerManager());
 }
 
 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthwiseConvolution2d(
@@ -158,7 +155,7 @@
     const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
 {
     return MakeWorkloadHelper<NeonNormalizationFloatWorkload, NullWorkload>(descriptor, info,
-        m_MemoryManager.GetIntraLayerManager());
+        m_MemoryManager->GetIntraLayerManager());
 }
 
 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
@@ -220,7 +217,7 @@
     const WorkloadInfo& info) const
 {
     return MakeWorkloadHelper<NeonL2NormalizationFloatWorkload, NullWorkload>(descriptor, info,
-        m_MemoryManager.GetIntraLayerManager());
+        m_MemoryManager->GetIntraLayerManager());
 }
 
 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor,
@@ -287,232 +284,12 @@
 
 void NeonWorkloadFactory::Release()
 {
-    m_MemoryManager.Release();
+    m_MemoryManager->Release();
 }
 
 void NeonWorkloadFactory::Acquire()
 {
-    m_MemoryManager.Acquire();
+    m_MemoryManager->Acquire();
 }
 
-#else // Compiled without ArmCompute libs
-
-NeonWorkloadFactory::NeonWorkloadFactory()
-{
-}
-
-std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent,
-    TensorShape const& subTensorShape,
-    unsigned int const* subTensorOrigin) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
-                                                                       DataLayout dataLayout) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor,
-                                                            const WorkloadInfo&        info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
-                                                             const WorkloadInfo&        info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
-                                                                 const WorkloadInfo&              info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
-                                                              const WorkloadInfo&           info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
-                                                               const WorkloadInfo&            info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor,
-                                                             const WorkloadInfo&          info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor,
-                                                                     const WorkloadInfo&                  info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
-                                                                     const WorkloadInfo&           info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
-                                                                const WorkloadInfo&           info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
-                                                                    const WorkloadInfo&               info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthwiseConvolution2d(
-    const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor,
-                                                                    const WorkloadInfo&                 info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
-                                                               const WorkloadInfo&            info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateBatchNormalization(const BatchNormalizationQueueDescriptor& data,
-                                                                         const WorkloadInfo& info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMultiplication(const MultiplicationQueueDescriptor& data,
-                                                                     const WorkloadInfo&                  info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
-                                                              const WorkloadInfo&        info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor,
-                                                                     const WorkloadInfo& info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFakeQuantization(
-        const FakeQuantizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor,
-    const WorkloadInfo& info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor,
-    const WorkloadInfo& info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
-    const WorkloadInfo&           info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor& descriptor,
-    const WorkloadInfo& info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
-    const WorkloadInfo& info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor,
-    const WorkloadInfo& info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvertFp16ToFp32(
-    const ConvertFp16ToFp32QueueDescriptor& descriptor,
-    const WorkloadInfo& info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvertFp32ToFp16(
-    const ConvertFp32ToFp16QueueDescriptor& descriptor,
-    const WorkloadInfo& info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDivision(const DivisionQueueDescriptor& data,
-                                                               const WorkloadInfo& info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSubtraction(const SubtractionQueueDescriptor& data,
-                                                                  const WorkloadInfo& info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor,
-                                                           const WorkloadInfo& info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor,
-                                                          const WorkloadInfo& info) const
-{
-    return nullptr;
-}
-
-std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
-                                                                     const WorkloadInfo& info) const
-{
-    return nullptr;
-}
-
-void NeonWorkloadFactory::Release()
-{}
-
-void NeonWorkloadFactory::Acquire()
-{}
-
-#endif
-
-} //namespace armnn
+} // namespace armnn
diff --git a/src/backends/neon/NeonWorkloadFactory.hpp b/src/backends/neon/NeonWorkloadFactory.hpp
index 98f323a..d5444f5 100644
--- a/src/backends/neon/NeonWorkloadFactory.hpp
+++ b/src/backends/neon/NeonWorkloadFactory.hpp
@@ -11,7 +11,6 @@
 
 #include <boost/core/ignore_unused.hpp>
 
-
 namespace armnn
 {
 
@@ -19,7 +18,7 @@
 class NeonWorkloadFactory : public IWorkloadFactory
 {
 public:
-    NeonWorkloadFactory();
+    NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager);
 
     const BackendId& GetBackendId() const override;
 
@@ -136,9 +135,7 @@
     virtual void Acquire() override;
 
 private:
-#ifdef ARMCOMPUTENEON_ENABLED
-    mutable NeonMemoryManager m_MemoryManager;
-#endif
+    mutable std::shared_ptr<NeonMemoryManager> m_MemoryManager;
 };
 
-} //namespace armnn
+} // namespace armnn
diff --git a/src/backends/neon/test/CMakeLists.txt b/src/backends/neon/test/CMakeLists.txt
index a1ac1e0..19512f9 100644
--- a/src/backends/neon/test/CMakeLists.txt
+++ b/src/backends/neon/test/CMakeLists.txt
@@ -13,6 +13,7 @@
     NeonOptimizedNetworkTests.cpp
     NeonRuntimeTests.cpp
     NeonTimerTest.cpp
+    NeonWorkloadFactoryHelper.hpp
 )
 
 add_library(armnnNeonBackendUnitTests OBJECT ${armnnNeonBackendUnitTests_sources})
diff --git a/src/backends/neon/test/NeonCreateWorkloadTests.cpp b/src/backends/neon/test/NeonCreateWorkloadTests.cpp
index 5cd305c..07953bf 100644
--- a/src/backends/neon/test/NeonCreateWorkloadTests.cpp
+++ b/src/backends/neon/test/NeonCreateWorkloadTests.cpp
@@ -3,6 +3,8 @@
 // SPDX-License-Identifier: MIT
 //
 
+#include "NeonWorkloadFactoryHelper.hpp"
+
 #include <backendsCommon/MemCopyWorkload.hpp>
 
 #include <aclCommon/test/CreateWorkloadClNeon.hpp>
@@ -56,7 +58,7 @@
 static void NeonCreateActivationWorkloadTest()
 {
     Graph graph;
-    NeonWorkloadFactory factory;
+    NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory();
     auto workload = CreateActivationWorkloadTest<NeonActivationWorkload, DataType>(factory, graph);
 
     // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest).
@@ -86,7 +88,7 @@
 static void NeonCreateArithmethicWorkloadTest()
 {
     Graph graph;
-    NeonWorkloadFactory factory;
+    NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory();
     auto workload = CreateArithmeticWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
 
     DescriptorType queueDescriptor = workload->GetData();
@@ -156,7 +158,7 @@
 static void NeonCreateBatchNormalizationWorkloadTest(DataLayout dataLayout)
 {
     Graph                graph;
-    NeonWorkloadFactory  factory;
+    NeonWorkloadFactory  factory = NeonWorkloadFactoryHelper::GetFactory();
     auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType>
                     (factory, graph, dataLayout);
 
@@ -198,7 +200,7 @@
 static void NeonCreateConvolution2dWorkloadTest(DataLayout dataLayout = DataLayout::NCHW)
 {
     Graph                graph;
-    NeonWorkloadFactory  factory;
+    NeonWorkloadFactory  factory = NeonWorkloadFactoryHelper::GetFactory();
     auto                 workload = CreateConvolution2dWorkloadTest<NeonConvolution2dWorkload,
                                     DataType>(factory, graph, dataLayout);
 
@@ -239,7 +241,7 @@
 static void NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout dataLayout)
 {
     Graph graph;
-    NeonWorkloadFactory factory;
+    NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory();
 
     auto workload = CreateDepthwiseConvolution2dWorkloadTest<NeonDepthwiseConvolutionWorkload,
                                                              DataType>(factory, graph, dataLayout);
@@ -276,7 +278,7 @@
 static void NeonCreateFullyConnectedWorkloadTest()
 {
     Graph               graph;
-    NeonWorkloadFactory factory;
+    NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory();
     auto                workload = CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType,
                                    DataType>(factory, graph);
 
@@ -304,7 +306,7 @@
 static void NeonCreateNormalizationWorkloadTest(DataLayout dataLayout)
 {
     Graph graph;
-    NeonWorkloadFactory factory;
+    NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory();
     auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
@@ -346,7 +348,7 @@
 static void NeonCreatePooling2dWorkloadTest(DataLayout dataLayout = DataLayout::NCHW)
 {
     Graph               graph;
-    NeonWorkloadFactory factory;
+    NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory();
     auto                workload = CreatePooling2dWorkloadTest<NeonPooling2dWorkload, DataType>
                                    (factory, graph, dataLayout);
 
@@ -392,7 +394,7 @@
 static void NeonCreateReshapeWorkloadTest()
 {
     Graph               graph;
-    NeonWorkloadFactory factory;
+    NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory();
     auto                workload = CreateReshapeWorkloadTest<NeonReshapeWorkload, DataType>(factory, graph);
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
@@ -424,7 +426,7 @@
 static void NeonCreateSoftmaxWorkloadTest()
 {
     Graph               graph;
-    NeonWorkloadFactory factory;
+    NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory();
     auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType, DataType>(factory, graph);
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateSoftmaxWorkloadTest).
@@ -450,7 +452,7 @@
 BOOST_AUTO_TEST_CASE(CreateSplitterWorkload)
 {
     Graph graph;
-    NeonWorkloadFactory factory;
+    NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory();
     auto workload = CreateSplitterWorkloadTest<NeonSplitterWorkload, DataType::Float32>(factory, graph);
 
     // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest).
@@ -477,7 +479,7 @@
     // of the merger.
 
     Graph graph;
-    NeonWorkloadFactory factory;
+    NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory();
 
     auto workloads =
         CreateSplitterMergerWorkloadTest<NeonSplitterWorkload, NeonMergerWorkload,
@@ -508,7 +510,7 @@
     // We created a splitter with two outputs. That each of those outputs is used by two different activation layers
 
     Graph graph;
-    NeonWorkloadFactory factory;
+    NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory();
     std::unique_ptr<NeonSplitterWorkload> wlSplitter;
     std::unique_ptr<NeonActivationWorkload> wlActiv0_0;
     std::unique_ptr<NeonActivationWorkload> wlActiv0_1;
@@ -542,7 +544,7 @@
 
 BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsNeon)
 {
-    NeonWorkloadFactory    factory;
+    NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory();
     CreateMemCopyWorkloads<INeonTensorHandle>(factory);
 }
 
@@ -550,7 +552,7 @@
 static void NeonCreateL2NormalizationWorkloadTest(DataLayout dataLayout)
 {
     Graph graph;
-    NeonWorkloadFactory factory;
+    NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory();
     auto workload =
             CreateL2NormalizationWorkloadTest<L2NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
 
diff --git a/src/backends/neon/test/NeonLayerSupportTests.cpp b/src/backends/neon/test/NeonLayerSupportTests.cpp
index 893f11e..5695543 100644
--- a/src/backends/neon/test/NeonLayerSupportTests.cpp
+++ b/src/backends/neon/test/NeonLayerSupportTests.cpp
@@ -3,6 +3,8 @@
 // SPDX-License-Identifier: MIT
 //
 
+#include "NeonWorkloadFactoryHelper.hpp"
+
 #include <layers/ConvertFp16ToFp32Layer.hpp>
 #include <layers/ConvertFp32ToFp16Layer.hpp>
 #include <test/TensorHelpers.hpp>
@@ -20,19 +22,19 @@
 
 BOOST_AUTO_TEST_CASE(IsLayerSupportedFloat16Neon)
 {
-    armnn::NeonWorkloadFactory factory;
+    armnn::NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory();
     IsLayerSupportedTests<armnn::NeonWorkloadFactory, armnn::DataType::Float16>(&factory);
 }
 
 BOOST_AUTO_TEST_CASE(IsLayerSupportedFloat32Neon)
 {
-    armnn::NeonWorkloadFactory factory;
+    armnn::NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory();
     IsLayerSupportedTests<armnn::NeonWorkloadFactory, armnn::DataType::Float32>(&factory);
 }
 
 BOOST_AUTO_TEST_CASE(IsLayerSupportedUint8Neon)
 {
-    armnn::NeonWorkloadFactory factory;
+    armnn::NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory();
     IsLayerSupportedTests<armnn::NeonWorkloadFactory, armnn::DataType::QuantisedAsymm8>(&factory);
 }
 
diff --git a/src/backends/neon/test/NeonLayerTests.cpp b/src/backends/neon/test/NeonLayerTests.cpp
index 4a1c5f9..48ef667 100644
--- a/src/backends/neon/test/NeonLayerTests.cpp
+++ b/src/backends/neon/test/NeonLayerTests.cpp
@@ -3,6 +3,8 @@
 // SPDX-License-Identifier: MIT
 //
 
+#include "NeonWorkloadFactoryHelper.hpp"
+
 #include <test/TensorHelpers.hpp>
 #include <test/UnitTests.hpp>
 
diff --git a/src/backends/neon/test/NeonMemCopyTests.cpp b/src/backends/neon/test/NeonMemCopyTests.cpp
index a37a07e..f6699a6 100644
--- a/src/backends/neon/test/NeonMemCopyTests.cpp
+++ b/src/backends/neon/test/NeonMemCopyTests.cpp
@@ -3,10 +3,15 @@
 // SPDX-License-Identifier: MIT
 //
 
-#include <neon/NeonWorkloadFactory.hpp>
-#include <reference/RefWorkloadFactory.hpp>
+#include "NeonWorkloadFactoryHelper.hpp"
+
 #include <aclCommon/test/MemCopyTestImpl.hpp>
 
+#include <neon/NeonWorkloadFactory.hpp>
+
+#include <reference/RefWorkloadFactory.hpp>
+#include <reference/test/RefWorkloadFactoryHelper.hpp>
+
 #include <boost/test/unit_test.hpp>
 
 BOOST_AUTO_TEST_SUITE(NeonMemCopy)
diff --git a/src/backends/neon/test/NeonOptimizedNetworkTests.cpp b/src/backends/neon/test/NeonOptimizedNetworkTests.cpp
index 3bf1eb8..b6f4798 100644
--- a/src/backends/neon/test/NeonOptimizedNetworkTests.cpp
+++ b/src/backends/neon/test/NeonOptimizedNetworkTests.cpp
@@ -3,6 +3,8 @@
 // SPDX-License-Identifier: MIT
 //
 
+#include "NeonWorkloadFactoryHelper.hpp"
+
 #include <armnn/ArmNN.hpp>
 #include <Graph.hpp>
 #include <Network.hpp>
@@ -31,7 +33,7 @@
     armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
     BOOST_CHECK(optNet);
     // validate workloads
-    armnn::NeonWorkloadFactory fact;
+    armnn::NeonWorkloadFactory fact = NeonWorkloadFactoryHelper::GetFactory();
     for (auto&& layer : static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph())
     {
         BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuAcc);
diff --git a/src/backends/neon/test/NeonTimerTest.cpp b/src/backends/neon/test/NeonTimerTest.cpp
index 11b319a..a2bf4a9 100644
--- a/src/backends/neon/test/NeonTimerTest.cpp
+++ b/src/backends/neon/test/NeonTimerTest.cpp
@@ -3,6 +3,8 @@
 // SPDX-License-Identifier: MIT
 //
 
+#include "NeonWorkloadFactoryHelper.hpp"
+
 #include <armnn/ArmNN.hpp>
 
 #include <test/TensorHelpers.hpp>
@@ -35,7 +37,7 @@
 
 BOOST_AUTO_TEST_CASE(NeonTimerMeasure)
 {
-    NeonWorkloadFactory workloadFactory;
+    NeonWorkloadFactory workloadFactory = NeonWorkloadFactoryHelper::GetFactory();
 
     unsigned int inputWidth = 4000u;
     unsigned int inputHeight = 5000u;
diff --git a/src/backends/neon/test/NeonWorkloadFactoryHelper.hpp b/src/backends/neon/test/NeonWorkloadFactoryHelper.hpp
new file mode 100644
index 0000000..bcf9c57
--- /dev/null
+++ b/src/backends/neon/test/NeonWorkloadFactoryHelper.hpp
@@ -0,0 +1,37 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/IMemoryManager.hpp>
+#include <backendsCommon/test/WorkloadFactoryHelper.hpp>
+
+#include <neon/NeonWorkloadFactory.hpp>
+
+#include <arm_compute/runtime/Allocator.h>
+
+#include <boost/polymorphic_pointer_cast.hpp>
+
+namespace
+{
+
+template<>
+struct WorkloadFactoryHelper<armnn::NeonWorkloadFactory>
+{
+    static armnn::NeonWorkloadFactory GetFactory()
+    {
+        armnn::IBackendInternal::IMemoryManagerSharedPtr memoryManager =
+            std::make_shared<armnn::NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
+                                                       armnn::BaseMemoryManager::MemoryAffinity::Offset);
+
+        return armnn::NeonWorkloadFactory(
+            boost::polymorphic_pointer_downcast<armnn::NeonMemoryManager>(memoryManager));
+    }
+};
+
+using NeonWorkloadFactoryHelper = WorkloadFactoryHelper<armnn::NeonWorkloadFactory>;
+
+} // anonymous namespace
diff --git a/src/backends/reference/CMakeLists.txt b/src/backends/reference/CMakeLists.txt
index ff16f18..82880cf 100644
--- a/src/backends/reference/CMakeLists.txt
+++ b/src/backends/reference/CMakeLists.txt
@@ -11,6 +11,7 @@
     RefLayerSupport.hpp
     RefWorkloadFactory.cpp
     RefWorkloadFactory.hpp
+
 )
 
 add_library(armnnRefBackend OBJECT ${armnnRefBackend_sources})
diff --git a/src/backends/reference/RefBackend.cpp b/src/backends/reference/RefBackend.cpp
index 7c92404..8f5e9c4 100644
--- a/src/backends/reference/RefBackend.cpp
+++ b/src/backends/reference/RefBackend.cpp
@@ -9,6 +9,7 @@
 #include "RefLayerSupport.hpp"
 
 #include <backendsCommon/IBackendContext.hpp>
+#include <backendsCommon/IMemoryManager.hpp>
 #include <backendsCommon/BackendRegistry.hpp>
 
 #include <Optimizer.hpp>
@@ -39,7 +40,8 @@
     return s_Id;
 }
 
-IBackendInternal::IWorkloadFactoryPtr RefBackend::CreateWorkloadFactory() const
+IBackendInternal::IWorkloadFactoryPtr RefBackend::CreateWorkloadFactory(
+    const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const
 {
     return std::make_unique<RefWorkloadFactory>();
 }
@@ -49,6 +51,11 @@
     return IBackendContextPtr{};
 }
 
+IBackendInternal::IMemoryManagerUniquePtr RefBackend::CreateMemoryManager() const
+{
+    return IMemoryManagerUniquePtr{};
+}
+
 IBackendInternal::Optimizations RefBackend::GetOptimizations() const
 {
     return Optimizations{};
diff --git a/src/backends/reference/RefBackend.hpp b/src/backends/reference/RefBackend.hpp
index 12d56ff..1a0aef5 100644
--- a/src/backends/reference/RefBackend.hpp
+++ b/src/backends/reference/RefBackend.hpp
@@ -18,8 +18,13 @@
     static const BackendId& GetIdStatic();
     const BackendId& GetId() const override { return GetIdStatic(); }
 
-    IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory() const override;
+    IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override;
+
+    IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(
+        const IBackendInternal::IMemoryManagerSharedPtr& memoryManager = nullptr) const override;
+
     IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const override;
+
     IBackendInternal::Optimizations GetOptimizations() const override;
     IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override;
 };
diff --git a/src/backends/reference/test/CMakeLists.txt b/src/backends/reference/test/CMakeLists.txt
index aee6216..8fa9b5c 100644
--- a/src/backends/reference/test/CMakeLists.txt
+++ b/src/backends/reference/test/CMakeLists.txt
@@ -11,6 +11,7 @@
     RefLayerTests.cpp
     RefOptimizedNetworkTests.cpp
     RefRuntimeTests.cpp
+    RefWorkloadFactoryHelper.hpp
 )
 
 add_library(armnnRefBackendUnitTests OBJECT ${armnnRefBackendUnitTests_sources})
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index 703ec58..95ddbad 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -3,6 +3,8 @@
 // SPDX-License-Identifier: MIT
 //
 
+#include "RefWorkloadFactoryHelper.hpp"
+
 #include <test/TensorHelpers.hpp>
 #include <test/UnitTests.hpp>
 
diff --git a/src/backends/reference/test/RefWorkloadFactoryHelper.hpp b/src/backends/reference/test/RefWorkloadFactoryHelper.hpp
new file mode 100644
index 0000000..5005111
--- /dev/null
+++ b/src/backends/reference/test/RefWorkloadFactoryHelper.hpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backendsCommon/test/WorkloadFactoryHelper.hpp>
+
+#include <reference/RefBackend.hpp>
+#include <reference/RefWorkloadFactory.hpp>
+
+namespace
+{
+
+template<>
+struct WorkloadFactoryHelper<armnn::RefWorkloadFactory>
+{
+    static armnn::RefWorkloadFactory GetFactory()
+    {
+        return armnn::RefWorkloadFactory();
+    }
+};
+
+using RefWorkloadFactoryHelper = WorkloadFactoryHelper<armnn::RefWorkloadFactory>;
+
+} // anonymous namespace