IVGCVSW-3277 Refactor TensorHandle factory API

 * Added backend support for multiple types of TensorHandle factories
 * Refactored the backend API to enable new tensor strategies
 * Added mechanism to determine memory strategies during optimization
 * Perform mem-copy only when Direct access is not found
 * Explicitly deleted the copy-constructor from OutputSlot to prevent
   accidental local copies that would cause the DisconnectAll to be
   called by the destructor

Change-Id: I7e812c8e5e6c1c20db1c5932749ac70fd93db7f8
Signed-off-by: Derek Lamberti <derek.lamberti@arm.com>
Signed-off-by: Matteo Martincigh <matteo.martincigh@arm.com>
diff --git a/Android.mk b/Android.mk
index 0eb2e01..b516e69 100644
--- a/Android.mk
+++ b/Android.mk
@@ -263,7 +263,8 @@
         src/armnn/test/ProfilingEventTest.cpp \
         src/armnn/test/ObservableTest.cpp \
         src/armnn/test/OptionalTest.cpp \
-        src/armnn/test/TestUtils.cpp
+        src/armnn/test/TestUtils.cpp \
+        src/armnn/test/TensorHandleStrategyTest.cpp
 
 LOCAL_STATIC_LIBRARIES := \
         libneuralnetworks_common \
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8fa2bf9..836465a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -484,6 +484,7 @@
         src/armnn/test/RuntimeTests.cpp
         src/armnn/test/RuntimeTests.hpp
         src/armnn/test/SubgraphViewTests.cpp
+        src/armnn/test/TensorHandleStrategyTest.cpp
         src/armnn/test/TensorHelpers.hpp
         src/armnn/test/TensorTest.cpp
         src/armnn/test/TestInputOutputLayerVisitor.cpp
diff --git a/src/armnn/Graph.cpp b/src/armnn/Graph.cpp
index a5c8509..e521623 100644
--- a/src/armnn/Graph.cpp
+++ b/src/armnn/Graph.cpp
@@ -7,6 +7,9 @@
 #include "SubgraphView.hpp"
 #include "LayersFwd.hpp"
 
+#include <backendsCommon/IBackendInternal.hpp>
+
+#include <armnn/BackendId.hpp>
 #include <armnn/Utils.hpp>
 #include <armnn/TypesUtils.hpp>
 
@@ -252,53 +255,96 @@
     return *this;
 }
 
-void Graph::AddCopyLayers()
+void Graph::AddCopyLayers(std::map<BackendId, std::unique_ptr<IBackendInternal>>& backends,
+                          TensorHandleFactoryRegistry& registry)
 {
     // Returns true if the given layer could potentially need an intermediate copy layer (depending on its
     // connections to other layers). At the time of writing, copy layers will be inserted in the following situations:
     // CPU -> CL (and viceversa)
     // CPU -> Neon (and viceversa)
     auto MayNeedCopyLayer = [](const Layer& layer)
-        {
-            // All layers should have been associated with a valid compute device at this point.
-            BOOST_ASSERT(layer.GetBackendId() != Compute::Undefined);
-            // Does not need another copy layer if a copy layer is already present.
-            return layer.GetType() != LayerType::MemCopy &&
-                   // Input and Output layers can perform their own copies internally.
-                   layer.GetType() != LayerType::Input &&
-                   layer.GetType() != LayerType::Output;
-        };
-
-    for (auto&& srcLayer : m_Layers)
     {
-        if (MayNeedCopyLayer(*srcLayer))
-        {
-            unsigned int srcOutputIndex = 0;
-            for (auto&& srcOutput : srcLayer->GetOutputSlots())
-            {
-                std::vector<InputSlot*> connectionCopy = srcOutput.GetConnections();
-                for (auto&& dstInput : connectionCopy)
-                {
-                    Layer& dstLayer = dstInput->GetOwningLayer();
-                    if (MayNeedCopyLayer(dstLayer) && (dstLayer.GetBackendId() != srcLayer->GetBackendId()))
-                    {
-                        // A copy layer is needed in between the source and destination layers.
-                        // Record the operation rather than attempting to modify the graph as we go.
-                        // (invalidating iterators)
-                        const std::string copyLayerName = boost::str(boost::format("[ %1% (%2%) -> %3% (%4%) ]")
-                                                                     % srcLayer->GetName()
-                                                                     % srcOutputIndex
-                                                                     % dstLayer.GetName()
-                                                                     % dstInput->GetSlotIndex());
+        // All layers should have been associated with a valid compute device at this point.
+        BOOST_ASSERT(layer.GetBackendId() != Compute::Undefined);
+        // Does not need another copy layer if a copy layer is already present.
+        return layer.GetType() != LayerType::MemCopy;
+    };
 
-                        MemCopyLayer* const copyLayer = InsertNewLayer<MemCopyLayer>(*dstInput, copyLayerName.c_str());
-                        copyLayer->SetBackendId(dstLayer.GetBackendId());
+    ForEachLayer([this, &backends, &registry, MayNeedCopyLayer](Layer* srcLayer)
+    {
+        BOOST_ASSERT(srcLayer);
+
+        if (!MayNeedCopyLayer(*srcLayer))
+        {
+            // The current layer does not need copy layers, move to the next one
+            return;
+        }
+
+        const std::vector<OutputSlot>& srcOutputSlots = srcLayer->GetOutputSlots();
+        for (unsigned int srcOutputIndex = 0; srcOutputIndex < srcOutputSlots.size(); srcOutputIndex++)
+        {
+            OutputSlot& srcOutputSlot = srcLayer->GetOutputSlot(srcOutputIndex);
+            const std::vector<InputSlot*> srcConnections = srcOutputSlot.GetConnections();
+            for (unsigned int srcConnectionIndex = 0; srcConnectionIndex < srcConnections.size(); srcConnectionIndex++)
+            {
+                InputSlot* dstInputSlot = srcConnections[srcConnectionIndex];
+                BOOST_ASSERT(dstInputSlot);
+
+                auto strategy = srcOutputSlot.GetMemoryStrategyForConnection(srcConnectionIndex);
+                BOOST_ASSERT_MSG(strategy != MemoryStrategy::Undefined,
+                                 "Undefined memory strategy found while adding copy layers for compatibility");
+
+                const Layer& dstLayer = dstInputSlot->GetOwningLayer();
+                if (MayNeedCopyLayer(dstLayer) &&
+                    strategy == MemoryStrategy::CopyToTarget)
+                {
+                    // A copy layer is needed in between the source and destination layers.
+                    // Record the operation rather than attempting to modify the graph as we go.
+                    // (invalidating iterators)
+                    const std::string copyLayerName = boost::str(boost::format("[ %1% (%2%) -> %3% (%4%) ]")
+                                                                 % srcLayer->GetName()
+                                                                 % srcOutputIndex
+                                                                 % dstLayer.GetName()
+                                                                 % dstInputSlot->GetSlotIndex());
+
+                    MemCopyLayer* const copyLayer = InsertNewLayer<MemCopyLayer>(*dstInputSlot, copyLayerName.c_str());
+                    copyLayer->SetBackendId(dstLayer.GetBackendId());
+
+                    OutputSlot& copyOutputSlot = copyLayer->GetOutputSlot(0);
+                    auto backendIt = backends.find(dstLayer.GetBackendId());
+                    if (backendIt != backends.end() &&
+                        backendIt->second &&
+                        backendIt->second->SupportsTensorAllocatorAPI())
+                    {
+                        auto backend = backendIt->second.get();
+                        auto tensorHandleFactoryIds = backend->GetHandleFactoryPreferences();
+                        bool found = false;
+                        boost::ignore_unused(found);
+
+                        for (auto preference : tensorHandleFactoryIds)
+                        {
+                            auto factory = registry.GetFactory(preference);
+                            if (factory && factory->SupportsMapUnmap())
+                            {
+                                copyOutputSlot.SetTensorHandleFactory(preference);
+                                found = true;
+                                break;
+                            }
+                        }
+
+                        BOOST_ASSERT_MSG(found, "Could not find a mappable TensorHandle for copy layer");
                     }
+                    else
+                    {
+                        copyOutputSlot.SetTensorHandleFactory(ITensorHandleFactory::LegacyFactoryId);
+                    }
+
+                    copyOutputSlot.SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility);
+                    srcOutputSlot.SetMemoryStrategy(srcConnectionIndex, MemoryStrategy::DirectCompatibility);
                 }
-                ++srcOutputIndex;
             }
         }
-    }
+    });
 }
 
 void Graph::SubstituteSubgraph(SubgraphView& subgraph, IConnectableLayer* substituteLayer)
diff --git a/src/armnn/Graph.hpp b/src/armnn/Graph.hpp
index 47e0e3b..f811337 100644
--- a/src/armnn/Graph.hpp
+++ b/src/armnn/Graph.hpp
@@ -191,7 +191,8 @@
 
     /// Modifies the graph in-place, removing edges connecting layers using different compute devices,
     /// and relinking them via an intermediary copy layers.
-    void AddCopyLayers();
+    void AddCopyLayers(std::map<BackendId, std::unique_ptr<class IBackendInternal>>& backends,
+                       TensorHandleFactoryRegistry& registry);
 
     /// Substitutes the given sub-graph with either a new layer or a new sub-graph.
     /// In either case, the given layer or all the layers in the given sub-graph must belong to this graph.
diff --git a/src/armnn/Layer.cpp b/src/armnn/Layer.cpp
index ced87b0..a287220 100644
--- a/src/armnn/Layer.cpp
+++ b/src/armnn/Layer.cpp
@@ -30,7 +30,8 @@
 
         // Connects inserted layer to parent.
         BOOST_ASSERT(layer.GetNumInputSlots() == 1);
-        prevSlot->Connect(layer.GetInputSlot(0));
+        int idx = prevSlot->Connect(layer.GetInputSlot(0));
+        prevSlot->SetMemoryStrategy(boost::numeric_cast<unsigned int>(idx), MemoryStrategy::Undefined);
 
         // Sets tensor info for inserted layer.
         const TensorInfo& tensorInfo = prevSlot->GetTensorInfo();
@@ -39,6 +40,7 @@
 
     // Connects inserted layer to this.
     layer.GetOutputSlot(0).Connect(*this);
+    layer.GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::Undefined);
 }
 
 const InputSlot* OutputSlot::GetConnection(unsigned int index) const
@@ -78,13 +80,24 @@
 {
     destination.SetConnection(this);
     m_Connections.push_back(&destination);
+    m_MemoryStrategies.push_back(MemoryStrategy::Undefined);
     return boost::numeric_cast<int>(m_Connections.size() - 1);
 }
 
 void OutputSlot::Disconnect(InputSlot& slot)
 {
     slot.SetConnection(nullptr);
+    auto it = std::find(m_Connections.begin(), m_Connections.end(), &slot);
+
+    if (it == m_Connections.end())
+    {
+        return;
+    }
+
+    auto idx = std::distance(m_Connections.begin(), it);
     m_Connections.erase(std::remove(m_Connections.begin(), m_Connections.end(), &slot), m_Connections.end());
+
+    m_MemoryStrategies.erase(m_MemoryStrategies.begin() + idx);
 }
 
 void OutputSlot::DisconnectAll()
@@ -100,6 +113,9 @@
 {
     while (GetNumConnections() > 0)
     {
+        BOOST_ASSERT_MSG(m_MemoryStrategies[0] == MemoryStrategy::Undefined,
+            "Cannot move connections once memory strategies have be established.");
+
         InputSlot& connection = *GetConnection(0);
         Disconnect(connection);
         destination.Connect(connection);
@@ -148,6 +164,26 @@
     return GetOwningLayer().GetGuid();
 }
 
+void OutputSlot::SetTensorHandleFactory(const ITensorHandleFactory::FactoryId& id)
+{
+    m_TensorHandleFactoryId = id;
+}
+
+ITensorHandleFactory::FactoryId OutputSlot::GetTensorHandleFactoryId() const
+{
+    return m_TensorHandleFactoryId;
+}
+
+void OutputSlot::SetMemoryStrategy(unsigned int connectionIndex, MemoryStrategy strategy)
+{
+    m_MemoryStrategies[connectionIndex] = strategy;
+}
+
+MemoryStrategy OutputSlot::GetMemoryStrategyForConnection(unsigned int connectionIdx) const
+{
+    return m_MemoryStrategies[connectionIdx];
+}
+
 namespace {
 LayerGuid GenerateLayerGuid()
 {
@@ -208,11 +244,26 @@
     }
 }
 
-void Layer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory)
+void Layer::CreateTensorHandles(const TensorHandleFactoryRegistry& registry, const IWorkloadFactory& workloadFactory)
 {
-    for (auto&& outputHandler : m_OutputHandlers)
+    for (unsigned int idx=0; idx < GetNumOutputSlots(); idx++)
     {
-        outputHandler.CreateTensorHandles(factory);
+
+        OutputSlot& slot = GetOutputSlot(idx);
+        ITensorHandleFactory::FactoryId factoryId = slot.GetTensorHandleFactoryId();
+
+        OutputHandler& handler = GetOutputHandler(idx);
+        if (factoryId == ITensorHandleFactory::LegacyFactoryId)
+        {
+            handler.CreateTensorHandles(workloadFactory);
+        }
+        else
+        {
+            ITensorHandleFactory* handleFactory = registry.GetFactory(factoryId);
+            BOOST_ASSERT(handleFactory);
+
+            handler.CreateTensorHandles(*handleFactory);
+        }
     }
 }
 
diff --git a/src/armnn/Layer.hpp b/src/armnn/Layer.hpp
index cbb1771..1ddbc00 100644
--- a/src/armnn/Layer.hpp
+++ b/src/armnn/Layer.hpp
@@ -6,7 +6,9 @@
 
 #include "LayerFwd.hpp"
 
+#include <backendsCommon/ITensorHandleFactory.hpp>
 #include <backendsCommon/OutputHandler.hpp>
+#include <backendsCommon/TensorHandleFactoryRegistry.hpp>
 #include <backendsCommon/WorkloadDataCollector.hpp>
 #include <backendsCommon/WorkloadInfo.hpp>
 #include "InternalTypes.hpp"
@@ -84,8 +86,15 @@
     explicit OutputSlot(Layer& owner, OutputHandler& outputHandler)
     : m_OwningLayer(owner)
     , m_OutputHandler(outputHandler)
+    , m_TensorHandleFactoryId(ITensorHandleFactory::LegacyFactoryId)
     {}
 
+    OutputSlot(const OutputSlot&) = delete;
+    OutputSlot& operator=(const OutputSlot&) = delete;
+
+    OutputSlot(OutputSlot&&) = default;
+    OutputSlot& operator=(OutputSlot&&) = default;
+
     ~OutputSlot()
     {
         try
@@ -147,12 +156,21 @@
 
     bool operator==(const OutputSlot& other) const;
 
+    void SetTensorHandleFactory(const ITensorHandleFactory::FactoryId& id);
+    ITensorHandleFactory::FactoryId GetTensorHandleFactoryId() const;
+
+    void SetMemoryStrategy(unsigned int connectionIndex, MemoryStrategy strategy);
+    MemoryStrategy GetMemoryStrategyForConnection(unsigned int connectionIdx) const;
+
 private:
     void ValidateConnectionIndex(unsigned int index) const;
 
     Layer& m_OwningLayer;
     OutputHandler& m_OutputHandler;
     std::vector<InputSlot*> m_Connections;
+
+    ITensorHandleFactory::FactoryId m_TensorHandleFactoryId;
+    std::vector<MemoryStrategy> m_MemoryStrategies;
 };
 
 // InputSlot inlines that need OutputSlot declaration.
@@ -248,7 +266,7 @@
 
     virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const = 0;
 
-    virtual void CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory);
+    virtual void CreateTensorHandles(const TensorHandleFactoryRegistry& registry, const IWorkloadFactory& factory);
 
     /// Creates a dynamically-allocated copy of this layer.
     /// @param graph - The Graph into which this Layer is being cloned.
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp
index 3c7dfb0..7873e48 100644
--- a/src/armnn/LoadedNetwork.cpp
+++ b/src/armnn/LoadedNetwork.cpp
@@ -85,19 +85,37 @@
     //(for example the splitter and concat layers).
     for (auto&& layer : order)
     {
-        auto const& backend = layer->GetBackendId();
-        if (m_Backends.count(backend) == 0)
+        auto const& backendId = layer->GetBackendId();
+        if (m_Backends.count(backendId) == 0)
         {
-            auto createBackend = BackendRegistryInstance().GetFactory(backend);
-            auto it = m_Backends.emplace(std::make_pair(backend, createBackend()));
+            auto createBackend = BackendRegistryInstance().GetFactory(backendId);
+            auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
 
-            IBackendInternal::IMemoryManagerSharedPtr memoryManager = it.first->second->CreateMemoryManager();
-            auto workloadFactory = it.first->second->CreateWorkloadFactory(memoryManager);
+            IBackendInternal* backend = it.first->second.get();
 
-            m_WorkloadFactories.emplace(std::make_pair(backend,
-                std::make_pair(std::move(workloadFactory), memoryManager)));
+            if (backend->SupportsTensorAllocatorAPI())
+            {
+                backend->RegisterTensorHandleFactories(m_TensorHandleFactoryRegistry);
+
+                auto workloadFactory = backend->CreateWorkloadFactory();
+                m_WorkloadFactories.emplace(
+                    std::make_pair(backendId, std::make_pair(std::move(workloadFactory), nullptr)));
+            }
+            else
+            {
+                IBackendInternal::IMemoryManagerSharedPtr memoryManager = backend->CreateMemoryManager();
+                auto workloadFactory = backend->CreateWorkloadFactory(memoryManager);
+
+                m_WorkloadFactories.emplace(
+                    std::make_pair(backendId, std::make_pair(std::move(workloadFactory), memoryManager)));
+            }
         }
-        layer->CreateTensorHandles(m_OptimizedNetwork->GetGraph(), GetWorkloadFactory(*layer));
+    }
+
+    for (auto&& layer : order)
+    {
+        auto& workloadFacory = GetWorkloadFactory(*layer);
+        layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFacory);
     }
 
     //Then create workloads.
diff --git a/src/armnn/LoadedNetwork.hpp b/src/armnn/LoadedNetwork.hpp
index 75af4a4..808a932 100644
--- a/src/armnn/LoadedNetwork.hpp
+++ b/src/armnn/LoadedNetwork.hpp
@@ -12,6 +12,7 @@
 #include "Profiling.hpp"
 
 #include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/TensorHandleFactoryRegistry.hpp>
 #include <backendsCommon/Workload.hpp>
 #include <backendsCommon/WorkloadFactory.hpp>
 
@@ -83,6 +84,8 @@
     mutable std::mutex m_WorkingMemMutex;
 
     bool m_IsWorkingMemAllocated=false;
+
+    TensorHandleFactoryRegistry m_TensorHandleFactoryRegistry;
 };
 
 }
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 9436fc6..58ccfb7 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -16,6 +16,7 @@
 #include <backendsCommon/WorkloadFactory.hpp>
 #include <backendsCommon/BackendRegistry.hpp>
 #include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/TensorHandleFactoryRegistry.hpp>
 
 #include <armnn/Exceptions.hpp>
 #include <armnn/Utils.hpp>
@@ -74,16 +75,7 @@
     return m_Graph->SerializeToDot(stream);
 }
 
-struct OptimizationResult
-{
-    bool m_Warning;
-    bool m_Error;
 
-    OptimizationResult()
-        : m_Warning(false)
-        , m_Error(false)
-    {}
-};
 
 void ReportError(const std::string& errorMessage,
                  Optional<std::vector<std::string>&> errorMessages)
@@ -323,8 +315,28 @@
                           errMessages);
 }
 
+BackendsMap CreateSupportedBackends(TensorHandleFactoryRegistry& handleFactoryRegistry,
+                                    BackendSettings& backendSettings)
+{
+    BackendsMap backends;
+    auto const& backendRegistry = BackendRegistryInstance();
+    for (auto&& selectedBackend : backendSettings.m_SupportedBackends)
+    {
+        auto backendFactory = backendRegistry.GetFactory(selectedBackend);
+        auto backendObjPtr = backendFactory();
+        BOOST_ASSERT(backendObjPtr);
+
+        backendObjPtr->RegisterTensorHandleFactories(handleFactoryRegistry);
+
+        backends[backendObjPtr->GetId()] = std::move(backendObjPtr);
+    }
+
+    return backends;
+}
+
 OptimizationResult ApplyBackendOptimizations(OptimizedNetwork* optNetObjPtr,
                                              BackendSettings& backendSettings,
+                                             BackendsMap& backends,
                                              Optional<std::vector<std::string>&> errMessages)
 {
     BOOST_ASSERT(optNetObjPtr);
@@ -338,11 +350,9 @@
     SubgraphView mainSubgraph(optGraph);
 
     // Run backend specific optimizations
-    auto const& backendRegistry = BackendRegistryInstance();
     for (auto&& selectedBackend : backendSettings.m_SelectedBackends)
     {
-        auto backendFactory = backendRegistry.GetFactory(selectedBackend);
-        auto backendObjPtr  = backendFactory();
+        auto backendObjPtr = backends.find(selectedBackend)->second.get();
         BOOST_ASSERT(backendObjPtr);
 
         // Select sub-graphs based on backend
@@ -425,6 +435,359 @@
     return result;
 }
 
+bool RequiresCopy(ITensorHandleFactory::FactoryId src,
+                  ITensorHandleFactory::FactoryId dst,
+                  TensorHandleFactoryRegistry& registry)
+{
+    if (src != dst)
+    {
+        ITensorHandleFactory* srcFactory = registry.GetFactory(src);
+        ITensorHandleFactory* dstFactory = registry.GetFactory(dst);
+
+        if (srcFactory->SupportsExport() && dstFactory->SupportsImport())
+        {
+            return false;
+        }
+        return true;
+    }
+    return false;
+}
+
+// Find the handle factory for the input layer which results in fewest required copies.
+ITensorHandleFactory::FactoryId CalculateSlotOptionForInput(BackendsMap& backends,
+                                                            OutputSlot& slot,
+                                                            TensorHandleFactoryRegistry& registry)
+{
+    Layer& layer = slot.GetOwningLayer();
+    BOOST_ASSERT(layer.GetType() == LayerType::Input);
+
+    // Explicitly select the tensorhandle factory for InputLayer because the rules for it are slightly different. It
+    // doesn't matter which backend it is assigned to because they all use the same implementation, which
+    // requires Map/Unmap support. This means that, so long as the handle type supports map/unmap semantics, we can
+    // select a factory with maximum compatibility with the layers connected to the InputLayer.
+
+    // First ensure the from backends can support the TensorHandeAPI
+    auto frmBackend = backends.find(layer.GetBackendId());
+    if (frmBackend == backends.end() ||
+        !frmBackend->second->SupportsTensorAllocatorAPI())
+    {
+        return ITensorHandleFactory::LegacyFactoryId;
+    }
+
+    // Go through all connections to the output slot and determine the TensorHandleFactory which results in the
+    // fewest copies.
+    std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
+    int topScore = 0;
+    ITensorHandleFactory::FactoryId topChoice = ITensorHandleFactory::LegacyFactoryId;
+
+    for (auto&& connection : slot.GetConnections())
+    {
+        const Layer& connectedLayer = connection->GetOwningLayer();
+
+        auto toBackend = backends.find(connectedLayer.GetBackendId());
+        BOOST_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
+
+        if (!toBackend->second.get()->SupportsTensorAllocatorAPI())
+        {
+            // The destination backend does not support the tensor allocator API, move to the next one
+            continue;
+        }
+
+        auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
+        for (auto&& dst : dstPrefs)
+        {
+            // Input layers use the mem copy workload, so the selected factory must support map/unmap API
+            ITensorHandleFactory* factory = registry.GetFactory(dst);
+            if (!factory->SupportsMapUnmap())
+            {
+                // The current tensor handle factory does not support the map/unmap strategy, move to the next one
+                continue;
+            }
+
+            auto it = factoryScores.find(dst);
+            if (it == factoryScores.end())
+            {
+                // Add new score to the table
+                factoryScores[dst] = 0;
+                if (topChoice == ITensorHandleFactory::LegacyFactoryId)
+                {
+                    topChoice = dst;
+                }
+            }
+            else
+            {
+                // Increase the score
+                factoryScores[dst]++;
+
+                // Track the best option
+                if (factoryScores[dst] > topScore)
+                {
+                    topScore = factoryScores[dst];
+                    topChoice = dst;
+                }
+            }
+        }
+    }
+
+    return topChoice;
+}
+
+// Find the handle factory for the output layer which results in fewest required copies.
+ITensorHandleFactory::FactoryId CalculateSlotOptionForOutput(BackendsMap& backends,
+                                                            OutputSlot& slot,
+                                                            TensorHandleFactoryRegistry& registry)
+{
+   return ITensorHandleFactory::DeferredFactoryId;
+}
+
+// For all handle factories supported on the source backend, we wish to find the one which requires the fewest copies
+// when considering all connections.
+ITensorHandleFactory::FactoryId CalculateSlotOption(BackendsMap& backends,
+                                                    OutputSlot& outputSlot,
+                                                    TensorHandleFactoryRegistry& registry)
+{
+    // First ensure the from backends can support the TensorHandeAPI
+    Layer& layer = outputSlot.GetOwningLayer();
+    auto frmBackend = backends.find(layer.GetBackendId());
+    if (frmBackend == backends.end() ||
+        !frmBackend->second->SupportsTensorAllocatorAPI())
+    {
+        return ITensorHandleFactory::LegacyFactoryId;
+    }
+
+    // Connections to Output Layers requires support for map/unmap on the TensorHandle.
+    bool requiresMapUnmap = false;
+    for (auto&& connection : outputSlot.GetConnections())
+    {
+        const Layer& connectedLayer = connection->GetOwningLayer();
+        if (connectedLayer.GetType() == LayerType::Output)
+        {
+            requiresMapUnmap = true;
+        }
+    }
+
+    IBackendInternal* srcBackend = frmBackend->second.get();
+    auto srcPrefs = srcBackend->GetHandleFactoryPreferences();
+
+    // Initialize the scores
+    std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
+    for (auto&& pref : srcPrefs)
+    {
+        if (requiresMapUnmap) // Only consider factories that support map/unmap if required
+        {
+            ITensorHandleFactory* factory = registry.GetFactory(pref);
+            if (!factory->SupportsMapUnmap())
+            {
+                // The current tensor handle factory does not support the map/unmap strategy, move to the next one
+                continue;
+            }
+        }
+
+        auto it = factoryScores.find(pref);
+        if (it == factoryScores.end())
+        {
+            // Add new score to the table
+            factoryScores[pref] = 0;
+        }
+    }
+
+    // Score each handle factory based on how many times it requires copies on the slot connections
+    for (auto&& connection : outputSlot.GetConnections())
+    {
+        const Layer& connectedLayer = connection->GetOwningLayer();
+
+        auto toBackend = backends.find(connectedLayer.GetBackendId());
+        BOOST_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
+
+        auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
+        for (auto&& src : srcPrefs)
+        {
+            if (factoryScores.find(src) == factoryScores.end()) // Don't consider excluded factories
+            {
+                continue;
+            }
+
+            for (auto&& dst : dstPrefs)
+            {
+                if (RequiresCopy(src, dst, registry))
+                {
+                    // Copy avoided, increase the score
+                    factoryScores[src]++;
+                    break;
+                }
+            }
+        }
+    }
+
+    // Find the lowest score
+    int minScore = std::numeric_limits<int>::max();
+    for (auto it : factoryScores)
+    {
+        minScore = std::min(minScore, it.second);
+    }
+
+    // Collect factories matching the best(lowest) score
+    std::vector<ITensorHandleFactory::FactoryId> optimalFactories;
+    for (auto it : factoryScores)
+    {
+        if (it.second == minScore)
+        {
+            optimalFactories.push_back(it.first);
+        }
+    }
+
+    // For all compatible Factories matching the best score, find the preferred one for the current layer.
+    for (auto&& srcPref : srcPrefs)
+    {
+        for (auto&& comp : optimalFactories)
+        {
+            if (comp == srcPref)
+            {
+                return comp;
+            }
+        }
+    }
+
+    return ITensorHandleFactory::LegacyFactoryId;
+}
+
+MemoryStrategy CalculateStrategy(BackendsMap& backends,
+                                 ITensorHandleFactory::FactoryId srcFactoryId,
+                                 const Layer& layer,
+                                 const Layer& connectedLayer,
+                                 TensorHandleFactoryRegistry& registry)
+{
+    auto toBackend = backends.find(connectedLayer.GetBackendId());
+    BOOST_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
+
+    auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
+
+    // Legacy API check for backward compatibility
+    if (srcFactoryId == ITensorHandleFactory::LegacyFactoryId || dstPrefs.empty())
+    {
+        if (layer.GetBackendId() != connectedLayer.GetBackendId())
+        {
+            return MemoryStrategy::CopyToTarget;
+        }
+        else
+        {
+            return MemoryStrategy::DirectCompatibility;
+        }
+    }
+
+    // TensorHandleFactory API present, so perform more sophisticated strategies.
+    // Dst Output layers don't require copy because they use map/unmap
+    if (connectedLayer.GetType() == LayerType::Output)
+    {
+        return MemoryStrategy::DirectCompatibility;
+    }
+
+    // Search for direct match in prefs
+    for (auto&& pref : dstPrefs)
+    {
+        if (pref == srcFactoryId)
+        {
+            return MemoryStrategy::DirectCompatibility;
+        }
+    }
+
+    // Search for export/import options
+    ITensorHandleFactory* srcFactory = registry.GetFactory(srcFactoryId);
+    if (srcFactory->SupportsExport())
+    {
+        for (auto&& pref : dstPrefs)
+        {
+            ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
+            if (dstFactory->SupportsImport())
+            {
+                return MemoryStrategy::ExportToTarget;
+            }
+        }
+    }
+
+    // Search for copy options via map/unmap
+    if (srcFactory->SupportsMapUnmap())
+    {
+        for (auto&& pref : dstPrefs)
+        {
+            ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
+            if (dstFactory->SupportsMapUnmap())
+            {
+                return MemoryStrategy::CopyToTarget;
+            }
+        }
+    }
+
+    return MemoryStrategy::Undefined;
+}
+
+// Select the TensorHandleFactories and the corresponding memory strategy
+OptimizationResult SelectTensorHandleStrategy(Graph& optGraph,
+                                              BackendsMap& backends,
+                                              TensorHandleFactoryRegistry& registry,
+                                              Optional<std::vector<std::string>&> errMessages)
+{
+    OptimizationResult result;
+
+    optGraph.ForEachLayer([&backends, &registry, &result, &errMessages](Layer* layer)
+    {
+        BOOST_ASSERT(layer);
+
+        // Lets make sure the backend is in our list of supported backends. Something went wrong during backend
+        // assignment if this check fails
+        BOOST_ASSERT(backends.find(layer->GetBackendId()) != backends.end());
+
+        // Check each output separately
+        for (unsigned int slotIdx = 0; slotIdx < layer->GetNumOutputSlots(); slotIdx++)
+        {
+            OutputSlot& outputSlot = layer->GetOutputSlot(slotIdx);
+
+            ITensorHandleFactory::FactoryId slotOption = ITensorHandleFactory::LegacyFactoryId;
+
+            // Calculate the factory to use which results in the fewest copies being made.
+            switch(layer->GetType())
+            {
+                case LayerType::Input:
+                    slotOption = CalculateSlotOptionForInput(backends, outputSlot, registry);
+                    break;
+                case LayerType::Output:
+                    slotOption = CalculateSlotOptionForOutput(backends, outputSlot, registry);
+                    break;
+                default:
+                    slotOption = CalculateSlotOption(backends, outputSlot, registry);
+                    break;
+            }
+            outputSlot.SetTensorHandleFactory(slotOption);
+
+            // Now determine the "best" memory strategy for each connection given the slotOption.
+            unsigned int connectionIdx = 0;
+            for (auto&& connection : outputSlot.GetConnections())
+            {
+                const Layer& connectedLayer = connection->GetOwningLayer();
+
+                MemoryStrategy strategy = CalculateStrategy(backends, slotOption, *layer, connectedLayer, registry);
+
+                if (strategy == MemoryStrategy::Undefined)
+                {
+                    result.m_Error = true;
+                    if (errMessages)
+                    {
+                        errMessages.value().emplace_back("Could not find valid strategy required for compatibility"
+                                                         " between backends.");
+                    }
+                    return;
+                }
+
+                outputSlot.SetMemoryStrategy(connectionIdx, strategy);
+
+                connectionIdx++;
+            }
+        }
+    });
+
+    return result;
+}
+
 IOptimizedNetworkPtr Optimize(const INetwork& inNetwork,
                               const std::vector<BackendId>& backendPreferences,
                               const IDeviceSpec& deviceSpec,
@@ -476,15 +839,19 @@
         return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
     }
 
+    // Create a map to temporarily hold initialized backend objects
+    TensorHandleFactoryRegistry tensorHandleFactoryRegistry;
+    BackendsMap backends = CreateSupportedBackends(tensorHandleFactoryRegistry, backendSettings);
+
     // Assign an available backend to each layer
     Graph::Iterator firstLayer = optGraph.begin();
     Graph::Iterator lastLayer  = optGraph.end();
-    OptimizationResult assigBackendsResult = AssignBackends(optNetObjPtr,
-                                                            backendSettings,
-                                                            firstLayer,
-                                                            lastLayer,
-                                                            errMessages);
-    if (assigBackendsResult.m_Error)
+    OptimizationResult assignBackendsResult = AssignBackends(optNetObjPtr,
+                                                             backendSettings,
+                                                             firstLayer,
+                                                             lastLayer,
+                                                             errMessages);
+    if (assignBackendsResult.m_Error)
     {
         // Failed to assign a backend to each layer
         return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
@@ -496,6 +863,7 @@
     // Apply the backend-specific optimizations
     OptimizationResult backendOptimizationResult = ApplyBackendOptimizations(optNetObjPtr,
                                                                              backendSettings,
+                                                                             backends,
                                                                              errMessages);
     if (backendOptimizationResult.m_Error)
     {
@@ -510,13 +878,25 @@
         Optimizer::Pass(optGraph, MakeOptimizations(InsertDebugLayer()));
     }
 
-    optGraph.AddCopyLayers();
+    // Calculate the compatibility strategies for tensor handles
+    OptimizationResult strategyResult = SelectTensorHandleStrategy(optGraph,
+                                                                   backends,
+                                                                   tensorHandleFactoryRegistry,
+                                                                   errMessages);
+    if (strategyResult.m_Error)
+    {
+        // Failed to apply the backend-specific optimizations
+        return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
+    }
+
+    // Based on the tensor handle strategy determined above, insert copy layers where required.
+    optGraph.AddCopyLayers(backends, tensorHandleFactoryRegistry);
 
     // Convert constants
     Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
     Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsHalfToFloat()));
 
-    // Run backend specific optimizations
+    // Run backend specific optimizations (deprecated)
     for (auto&& chosenBackend : backendSettings.m_SelectedBackends)
     {
         auto factoryFun = BackendRegistryInstance().GetFactory(chosenBackend);
diff --git a/src/armnn/Network.hpp b/src/armnn/Network.hpp
index b90e3c2..8db968a 100644
--- a/src/armnn/Network.hpp
+++ b/src/armnn/Network.hpp
@@ -13,6 +13,7 @@
 
 #include <string>
 #include <vector>
+#include <map>
 #include <memory>
 
 #include "Layer.hpp"
@@ -229,4 +230,27 @@
     std::unique_ptr<Graph> m_Graph;
 };
 
+
+
+struct OptimizationResult
+{
+    bool m_Warning;
+    bool m_Error;
+
+    OptimizationResult()
+        : m_Warning(false)
+        , m_Error(false)
+    {}
+};
+
+using BackendsMap = std::map<BackendId, std::unique_ptr<class IBackendInternal>>;
+
+BackendsMap CreateSupportedBackends(TensorHandleFactoryRegistry& handleFactoryRegistry,
+                                    struct BackendSettings& backendSettings);
+
+OptimizationResult SelectTensorHandleStrategy(Graph& optGraph,
+                                              BackendsMap& backends,
+                                              TensorHandleFactoryRegistry& registry,
+                                              Optional<std::vector<std::string>&> errMessages);
+
 } // namespace armnn
diff --git a/src/armnn/Optimizer.cpp b/src/armnn/Optimizer.cpp
index 4d0aae8..0a31f84 100644
--- a/src/armnn/Optimizer.cpp
+++ b/src/armnn/Optimizer.cpp
@@ -28,6 +28,7 @@
         --it;
         for (auto&& optimization : optimizations)
         {
+            BOOST_ASSERT(*it);
             optimization->Run(graph, **it);
 
             if ((*it)->IsOutputUnconnected())
diff --git a/src/armnn/layers/ConcatLayer.cpp b/src/armnn/layers/ConcatLayer.cpp
index 1d2641c..24051a2 100644
--- a/src/armnn/layers/ConcatLayer.cpp
+++ b/src/armnn/layers/ConcatLayer.cpp
@@ -34,7 +34,8 @@
     return factory.CreateConcat(descriptor, PrepInfoAndDesc(descriptor, graph));
 }
 
-void ConcatLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory)
+template<typename FactoryType>
+void ConcatLayer::CreateTensors(const FactoryType& factory)
 {
     //If sub tensors are supported then the concat
     //just needs to make sure that the outputs of the prev layer
@@ -43,6 +44,8 @@
 
     if (factory.SupportsSubTensors())
     {
+        ITensorHandleFactory::FactoryId factoryId = GetOutputSlot(0).GetTensorHandleFactoryId();
+
         std::queue<ConcatLayer*> m_ConcatLayers;
 
         m_ConcatLayers.push(this);
@@ -66,7 +69,8 @@
                 auto CreateSubTensor = [&]()
                 {
                     // Make sure quantization parameters are in the same space
-                    if (parentInfo.IsTypeSpaceMatch(info))
+                    if (parentInfo.IsTypeSpaceMatch(info) &&
+                        factoryId == slot->GetTensorHandleFactoryId())
                     {
                         return factory.CreateSubTensorHandle(*parentTensor,
                                                              info.GetShape(),
@@ -114,6 +118,24 @@
     }
 }
 
+void ConcatLayer::CreateTensorHandles(const TensorHandleFactoryRegistry& registry,
+                                      const IWorkloadFactory& workloadFactory)
+{
+    OutputSlot& slot = GetOutputSlot(0);
+    ITensorHandleFactory::FactoryId factoryId = slot.GetTensorHandleFactoryId();
+
+    if (factoryId == ITensorHandleFactory::LegacyFactoryId)
+    {
+        CreateTensors(workloadFactory);
+    }
+    else
+    {
+        ITensorHandleFactory* handleFactory = registry.GetFactory(factoryId);
+        BOOST_ASSERT(handleFactory);
+        CreateTensors(*handleFactory);
+    }
+}
+
 ConcatLayer* ConcatLayer::Clone(Graph& graph) const
 {
     return CloneBase<ConcatLayer>(graph, m_Param, GetName());
diff --git a/src/armnn/layers/ConcatLayer.hpp b/src/armnn/layers/ConcatLayer.hpp
index 4268291..eb7d93c 100644
--- a/src/armnn/layers/ConcatLayer.hpp
+++ b/src/armnn/layers/ConcatLayer.hpp
@@ -22,9 +22,11 @@
 
     /// Set the outputs to be appropriate sub tensors of the input if sub tensors are supported
     /// otherwise creates tensor handlers.
-    /// @param [in] graph The graph where this layer can be found.
+    /// @param [in] registry Contains all the registered tensor handle factories available for use.
     /// @param [in] factory The workload factory which will create the workload.
-    virtual void CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory) override;
+//    virtual void CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory) override;
+    virtual void CreateTensorHandles(const TensorHandleFactoryRegistry& registry,
+                                     const IWorkloadFactory& factory) override;
 
     /// Creates a dynamically-allocated copy of this layer.
     /// @param [in] graph The graph into which this layer is being cloned.
@@ -50,6 +52,11 @@
 
     /// Default destructor
     ~ConcatLayer() = default;
+
+private:
+    template <typename FactoryType>
+    void CreateTensors(const FactoryType& factory);
+
 };
 
 } // namespace
diff --git a/src/armnn/layers/OutputLayer.hpp b/src/armnn/layers/OutputLayer.hpp
index b86f8e2..2aa2dbd 100644
--- a/src/armnn/layers/OutputLayer.hpp
+++ b/src/armnn/layers/OutputLayer.hpp
@@ -22,11 +22,12 @@
 
     /// Set the outputs to be appropriate sub tensors of the input if sub tensors are supported
     /// otherwise creates tensor handlers by default. Ignores parameters for Output type.
-    /// @param [in] graph The graph where this layer can be found.
+    /// @param [in] registry Contains all the registered tensor handle factories available for use.
     /// @param [in] factory The workload factory which will create the workload.
-    virtual void CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory) override
+    virtual void CreateTensorHandles(const TensorHandleFactoryRegistry& registry,
+                                     const IWorkloadFactory& factory) override
     {
-        boost::ignore_unused(graph, factory);
+        boost::ignore_unused(registry, factory);
     }
 
     /// Creates a dynamically-allocated copy of this layer.
diff --git a/src/armnn/layers/SplitterLayer.cpp b/src/armnn/layers/SplitterLayer.cpp
index 4a6b222..dc04b3f 100644
--- a/src/armnn/layers/SplitterLayer.cpp
+++ b/src/armnn/layers/SplitterLayer.cpp
@@ -32,7 +32,8 @@
     return factory.CreateSplitter(descriptor, PrepInfoAndDesc(descriptor, graph));
 }
 
-void SplitterLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory)
+template<typename FactoryType>
+void SplitterLayer::CreateTensors(const FactoryType& factory)
 {
     //If sub tensors are supported than all the "splitter" need to do is to
     //set the outputs to be appropriate sub tensors of the input.
@@ -40,6 +41,7 @@
 
     if (useSubTensors)
     {
+        const OutputSlot* slot = GetInputSlots()[0].GetConnectedOutputSlot();
         const OutputHandler& outputHandler = GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
 
         const TensorInfo& parentInfo = outputHandler.GetTensorInfo();
@@ -53,10 +55,13 @@
         {
             const TensorInfo& info = m_OutputHandlers[i].GetTensorInfo();
 
+            OutputSlot& outSlot = GetOutputSlot(i);
+            ITensorHandleFactory::FactoryId factoryId = outSlot.GetTensorHandleFactoryId();
             auto CreateSubTensor = [&]()
             {
                 // Make sure quantization parameters are in the same space
-                if (parentInfo.IsTypeSpaceMatch(info))
+                if (parentInfo.IsTypeSpaceMatch(info) &&
+                    factoryId == slot->GetTensorHandleFactoryId())
                 {
                     return factory.CreateSubTensorHandle(*inputData,
                                                          info.GetShape(),
@@ -95,6 +100,24 @@
     }
 }
 
+void SplitterLayer::CreateTensorHandles(const TensorHandleFactoryRegistry& registry,
+                                        const IWorkloadFactory& workloadFactory)
+{
+    OutputSlot& slot = GetOutputSlot(0);
+    ITensorHandleFactory::FactoryId factoryId = slot.GetTensorHandleFactoryId();
+
+    if (factoryId == ITensorHandleFactory::LegacyFactoryId)
+    {
+        CreateTensors(workloadFactory);
+    }
+    else
+    {
+        ITensorHandleFactory* handleFactory = registry.GetFactory(factoryId);
+        BOOST_ASSERT(handleFactory);
+        CreateTensors(*handleFactory);
+    }
+}
+
 SplitterLayer* SplitterLayer::Clone(Graph& graph) const
 {
     return CloneBase<SplitterLayer>(graph, m_Param, GetName());
diff --git a/src/armnn/layers/SplitterLayer.hpp b/src/armnn/layers/SplitterLayer.hpp
index 19b0556..9c684d4 100644
--- a/src/armnn/layers/SplitterLayer.hpp
+++ b/src/armnn/layers/SplitterLayer.hpp
@@ -22,9 +22,11 @@
 
     /// Set the outputs to be appropriate sub tensors of the input if sub tensors are supported
     /// otherwise creates tensor handlers.
-    /// @param [in] graph The graph where this layer can be found.
+    /// @param [in] registry Contains all the registered tensor handle factories available for use.
     /// @param [in] factory The workload factory which will create the workload.
-    virtual void CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory) override;
+    //virtual void CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory) override;
+    virtual void CreateTensorHandles(const TensorHandleFactoryRegistry& registry,
+                                     const IWorkloadFactory& factory) override;
 
     /// Creates a dynamically-allocated copy of this layer.
     /// @param [in] graph The graph into which this layer is being cloned.
@@ -50,6 +52,10 @@
 
     /// Default destructor
     ~SplitterLayer() = default;
+
+private:
+    template <typename FactoryType>
+    void CreateTensors(const FactoryType& factory);
 };
 
 } // namespace
diff --git a/src/armnn/test/CreateWorkload.hpp b/src/armnn/test/CreateWorkload.hpp
index b071977..47af4a8 100644
--- a/src/armnn/test/CreateWorkload.hpp
+++ b/src/armnn/test/CreateWorkload.hpp
@@ -41,11 +41,13 @@
 }
 
 // Helper function to create tensor handlers for workloads, assuming they all use the same factory.
-void CreateTensorHandles(armnn::Graph& graph, armnn::IWorkloadFactory& factory)
+void CreateTensorHandles(armnn::Graph& graph,
+                         armnn::IWorkloadFactory& factory)
 {
+    TensorHandleFactoryRegistry tmpRegistry;
     for (auto&& layer : graph.TopologicalSort())
     {
-        layer->CreateTensorHandles(graph, factory);
+        layer->CreateTensorHandles(tmpRegistry, factory);
     }
 }
 
diff --git a/src/armnn/test/GraphTests.cpp b/src/armnn/test/GraphTests.cpp
index 44cfa2e..ae5ff22 100644
--- a/src/armnn/test/GraphTests.cpp
+++ b/src/armnn/test/GraphTests.cpp
@@ -14,6 +14,8 @@
 #include <Layer.hpp>
 
 #include <backendsCommon/CpuTensorHandle.hpp>
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/TensorHandleFactoryRegistry.hpp>
 
 
 /// Checks that first comes before second in the order.
@@ -477,10 +479,21 @@
         outputLayer->SetBackendId(armnn::Compute::CpuRef);
 
         softmaxLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+
+        // Set the memory strategies
+        inputLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility);
+        convLayer1->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility);
+        convLayer1->GetOutputSlot(0).SetMemoryStrategy(1, MemoryStrategy::DirectCompatibility);
+        convLayer2->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility);
+        concatLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility);
+        actLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility);
+        softmaxLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility);
     }
 
     armnn::TensorInfo m_TensorDesc;
     armnn::Graph m_Graph;
+    std::map<armnn::BackendId, std::unique_ptr<armnn::IBackendInternal>> m_Backends;
+    armnn::TensorHandleFactoryRegistry m_FactoryRegistry;
 
 private:
 
@@ -501,26 +514,26 @@
 BOOST_FIXTURE_TEST_CASE(AddCopyLayers, CopyLayersFixture)
 {
     const armnn::Graph origGraph(m_Graph);
-    m_Graph.AddCopyLayers();
+    m_Graph.AddCopyLayers(m_Backends, m_FactoryRegistry);
 
     TestGraphAfterAddingCopyLayers(m_Graph, origGraph);
 }
 
 BOOST_FIXTURE_TEST_CASE(AddCopyLayersSeveralTimes, CopyLayersFixture)
 {
-    m_Graph.AddCopyLayers();
+    m_Graph.AddCopyLayers(m_Backends, m_FactoryRegistry);
 
     // Calling AddCopyLayers() several times should not change the connections.
     const std::vector<Edge> edges = GetEdgeList(m_Graph);
     for (int i = 0; i < 4; ++i)
     {
-        m_Graph.AddCopyLayers();
+        m_Graph.AddCopyLayers(m_Backends, m_FactoryRegistry);
         const std::vector<Edge> otherEdges = GetEdgeList(m_Graph);
         BOOST_TEST((edges == otherEdges));
     }
 }
 
-BOOST_AUTO_TEST_CASE(CopyLayersAddedBetweenSameLayersHaveDifferentNames)
+BOOST_FIXTURE_TEST_CASE(CopyLayersAddedBetweenSameLayersHaveDifferentNames, CopyLayersFixture)
 {
     armnn::Graph graph;
 
@@ -542,7 +555,12 @@
     splitterLayer->GetOutputSlot(1).Connect(additionLayer->GetInputSlot(1));
     additionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
 
-    graph.AddCopyLayers();
+    inputLayer->GetOutputSlot(0).SetMemoryStrategy(0, armnn::MemoryStrategy::DirectCompatibility);
+    splitterLayer->GetOutputSlot(0).SetMemoryStrategy(0, armnn::MemoryStrategy::CopyToTarget);
+    splitterLayer->GetOutputSlot(1).SetMemoryStrategy(0, armnn::MemoryStrategy::CopyToTarget);
+    additionLayer->GetOutputSlot(0).SetMemoryStrategy(0, armnn::MemoryStrategy::DirectCompatibility);
+
+    graph.AddCopyLayers(m_Backends, m_FactoryRegistry);
 
     std::vector<Edge> edges = GetEdgeList(graph);
     BOOST_CHECK(edges.size() == 6u);
diff --git a/src/armnn/test/TensorHandleStrategyTest.cpp b/src/armnn/test/TensorHandleStrategyTest.cpp
new file mode 100644
index 0000000..3bb1c68
--- /dev/null
+++ b/src/armnn/test/TensorHandleStrategyTest.cpp
@@ -0,0 +1,274 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#include <boost/test/unit_test.hpp>
+
+#include <armnn/LayerVisitorBase.hpp>
+
+#include <backendsCommon/IBackendContext.hpp>
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/IMemoryManager.hpp>
+#include <backendsCommon/ITensorHandleFactory.hpp>
+#include <backendsCommon/TensorHandleFactoryRegistry.hpp>
+
+#include <optimizations/Optimization.hpp>
+
+#include <Network.hpp>
+
+#include <vector>
+#include <string>
+
+using namespace armnn;
+
+class TestMemMgr : public IMemoryManager
+{
+public:
+    TestMemMgr() = default;
+
+    void Acquire() override {}
+    void Release() override {}
+};
+
+class TestFactory1 : public ITensorHandleFactory
+{
+public:
+    TestFactory1(std::weak_ptr<IMemoryManager> mgr, ITensorHandleFactory::FactoryId id)
+        : m_Id(id)
+        , m_MemMgr(mgr)
+    {}
+
+    std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent,
+                                                         TensorShape const& subTensorShape,
+                                                         unsigned int const* subTensorOrigin) const override
+    {
+        return nullptr;
+    }
+
+    std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override
+    {
+        return nullptr;
+    }
+
+    virtual const FactoryId GetId() const override { return m_Id; }
+
+    virtual bool SupportsSubTensors() const override { return true; }
+
+private:
+    FactoryId m_Id = "UninitializedId";
+
+    std::weak_ptr<IMemoryManager> m_MemMgr;
+};
+
+class TestBackendA : public IBackendInternal
+{
+public:
+    TestBackendA() = default;
+
+    const BackendId& GetId() const override { return m_Id; }
+
+    IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr& memoryManager = nullptr) const override
+    {
+        return IWorkloadFactoryPtr{};
+    }
+
+    IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
+    {
+        return ILayerSupportSharedPtr{};
+    }
+
+    std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override
+    {
+        return std::vector<ITensorHandleFactory::FactoryId>
+        {
+            "TestHandleFactoryA1",
+            "TestHandleFactoryA2",
+            "TestHandleFactoryB1"
+        };
+    }
+
+    void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override
+    {
+        auto mgr = std::make_shared<TestMemMgr>();
+
+        registry.RegisterMemoryManager(mgr);
+        registry.RegisterFactory(std::make_unique<TestFactory1>(mgr, "TestHandleFactoryA1"));
+        registry.RegisterFactory(std::make_unique<TestFactory1>(mgr, "TestHandleFactoryA2"));
+    }
+
+private:
+    BackendId m_Id = "BackendA";
+};
+
+class TestBackendB : public IBackendInternal
+{
+public:
+    TestBackendB() = default;
+
+    const BackendId& GetId() const override { return m_Id; }
+
+    IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr& memoryManager = nullptr) const override
+    {
+        return IWorkloadFactoryPtr{};
+    }
+
+    IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
+    {
+        return ILayerSupportSharedPtr{};
+    }
+
+    std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override
+    {
+        return std::vector<ITensorHandleFactory::FactoryId>
+        {
+            "TestHandleFactoryB1"
+        };
+    }
+
+    void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override
+    {
+        auto mgr = std::make_shared<TestMemMgr>();
+
+        registry.RegisterMemoryManager(mgr);
+        registry.RegisterFactory(std::make_unique<TestFactory1>(mgr, "TestHandleFactoryB1"));
+    }
+
+private:
+    BackendId m_Id = "BackendB";
+};
+
+class TestBackendC : public IBackendInternal
+{
+public:
+    TestBackendC() = default;
+
+    const BackendId& GetId() const override { return m_Id; }
+
+    IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr& memoryManager = nullptr) const override
+    {
+        return IWorkloadFactoryPtr{};
+    }
+
+    IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
+    {
+        return ILayerSupportSharedPtr{};
+    }
+
+    std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override
+    {
+        return std::vector<ITensorHandleFactory::FactoryId>{
+            "TestHandleFactoryC1"
+        };
+    }
+
+    void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override
+    {
+        auto mgr = std::make_shared<TestMemMgr>();
+
+        registry.RegisterMemoryManager(mgr);
+        registry.RegisterFactory(std::make_unique<TestFactory1>(mgr, "TestHandleFactoryC1"));
+    }
+
+private:
+    BackendId m_Id = "BackendC";
+};
+
+
+BOOST_AUTO_TEST_SUITE(TensorHandle)
+
+BOOST_AUTO_TEST_CASE(RegisterFactories)
+{
+    TestBackendA backendA;
+    TestBackendB backendB;
+
+    BOOST_TEST(backendA.GetHandleFactoryPreferences()[0] == "TestHandleFactoryA1");
+    BOOST_TEST(backendA.GetHandleFactoryPreferences()[1] == "TestHandleFactoryA2");
+    BOOST_TEST(backendA.GetHandleFactoryPreferences()[2] == "TestHandleFactoryB1");
+
+    TensorHandleFactoryRegistry registry;
+    backendA.RegisterTensorHandleFactories(registry);
+    backendB.RegisterTensorHandleFactories(registry);
+
+    BOOST_TEST((registry.GetFactory("Non-existing Backend") == nullptr));
+    BOOST_TEST((registry.GetFactory("TestHandleFactoryA1") != nullptr));
+    BOOST_TEST((registry.GetFactory("TestHandleFactoryA2") != nullptr));
+    BOOST_TEST((registry.GetFactory("TestHandleFactoryB1") != nullptr));
+}
+
+BOOST_AUTO_TEST_CASE(TensorHandleSelectionStrategy)
+{
+    auto backendA = std::make_unique<TestBackendA>();
+    auto backendB = std::make_unique<TestBackendB>();
+    auto backendC = std::make_unique<TestBackendC>();
+
+    TensorHandleFactoryRegistry registry;
+    backendA->RegisterTensorHandleFactories(registry);
+    backendB->RegisterTensorHandleFactories(registry);
+    backendC->RegisterTensorHandleFactories(registry);
+
+    BackendsMap backends;
+    backends["BackendA"] = std::move(backendA);
+    backends["BackendB"] = std::move(backendB);
+    backends["BackendC"] = std::move(backendC);
+
+    armnn::Graph graph;
+
+    armnn::InputLayer* const inputLayer = graph.AddLayer<armnn::InputLayer>(0, "input");
+    inputLayer->SetBackendId("BackendA");
+
+    armnn::SoftmaxDescriptor smDesc;
+    armnn::SoftmaxLayer* const softmaxLayer1 = graph.AddLayer<armnn::SoftmaxLayer>(smDesc, "softmax1");
+    softmaxLayer1->SetBackendId("BackendA");
+
+    armnn::SoftmaxLayer* const softmaxLayer2 = graph.AddLayer<armnn::SoftmaxLayer>(smDesc, "softmax2");
+    softmaxLayer2->SetBackendId("BackendB");
+
+    armnn::SoftmaxLayer* const softmaxLayer3 = graph.AddLayer<armnn::SoftmaxLayer>(smDesc, "softmax3");
+    softmaxLayer3->SetBackendId("BackendC");
+
+    armnn::OutputLayer* const outputLayer = graph.AddLayer<armnn::OutputLayer>(0, "output");
+    outputLayer->SetBackendId("BackendA");
+
+    inputLayer->GetOutputSlot(0).Connect(softmaxLayer1->GetInputSlot(0));
+    softmaxLayer1->GetOutputSlot(0).Connect(softmaxLayer2->GetInputSlot(0));
+    softmaxLayer2->GetOutputSlot(0).Connect(softmaxLayer3->GetInputSlot(0));
+    softmaxLayer3->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+
+    graph.TopologicalSort();
+
+    std::vector<std::string> errors;
+    auto result = SelectTensorHandleStrategy(graph, backends, registry, errors);
+
+    BOOST_TEST(result.m_Error == false);
+    BOOST_TEST(result.m_Warning == false);
+
+    OutputSlot& inputLayerOut = inputLayer->GetOutputSlot(0);
+    OutputSlot& softmaxLayer1Out = softmaxLayer1->GetOutputSlot(0);
+    OutputSlot& softmaxLayer2Out = softmaxLayer2->GetOutputSlot(0);
+    OutputSlot& softmaxLayer3Out = softmaxLayer3->GetOutputSlot(0);
+
+    // Check that the correct factory was selected
+    BOOST_TEST(inputLayerOut.GetTensorHandleFactoryId() == "TestHandleFactoryA1");
+    BOOST_TEST(softmaxLayer1Out.GetTensorHandleFactoryId() == "TestHandleFactoryB1");
+    BOOST_TEST(softmaxLayer2Out.GetTensorHandleFactoryId() == "TestHandleFactoryB1");
+    BOOST_TEST(softmaxLayer3Out.GetTensorHandleFactoryId() == "TestHandleFactoryC1");
+
+    // Check that the correct strategy was selected
+    BOOST_TEST((inputLayerOut.GetMemoryStrategyForConnection(0) == MemoryStrategy::DirectCompatibility));
+    BOOST_TEST((softmaxLayer1Out.GetMemoryStrategyForConnection(0) == MemoryStrategy::DirectCompatibility));
+    BOOST_TEST((softmaxLayer2Out.GetMemoryStrategyForConnection(0) == MemoryStrategy::CopyToTarget));
+    BOOST_TEST((softmaxLayer3Out.GetMemoryStrategyForConnection(0) == MemoryStrategy::DirectCompatibility));
+
+    graph.AddCopyLayers(backends, registry);
+    int count= 0;
+    graph.ForEachLayer([&count](Layer* layer)
+    {
+        if (layer->GetType() == LayerType::MemCopy)
+        {
+            count++;
+        }
+    });
+    BOOST_TEST(count == 1);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp b/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp
index f544c12..03bcf32 100644
--- a/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp
+++ b/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp
@@ -62,6 +62,7 @@
 template<typename IComputeTensorHandle>
 void CreateMemCopyWorkloads(IWorkloadFactory& factory)
 {
+    TensorHandleFactoryRegistry registry;
     Graph graph;
     RefWorkloadFactory refFactory;
 
@@ -79,10 +80,10 @@
     Connect(layer1, layer2, tensorInfo);
     Connect(layer2, output, tensorInfo);
 
-    input->CreateTensorHandles(graph, refFactory);
-    layer1->CreateTensorHandles(graph, factory);
-    layer2->CreateTensorHandles(graph, refFactory);
-    output->CreateTensorHandles(graph, refFactory);
+    input->CreateTensorHandles(registry, refFactory);
+    layer1->CreateTensorHandles(registry, factory);
+    layer2->CreateTensorHandles(registry, refFactory);
+    output->CreateTensorHandles(registry, refFactory);
 
     // make the workloads and check them
     auto workload1 = MakeAndCheckWorkload<CopyMemGenericWorkload>(*layer1, graph, factory);
diff --git a/src/backends/backendsCommon/CMakeLists.txt b/src/backends/backendsCommon/CMakeLists.txt
index e1e387b..bc1c15b 100644
--- a/src/backends/backendsCommon/CMakeLists.txt
+++ b/src/backends/backendsCommon/CMakeLists.txt
@@ -11,6 +11,8 @@
     CpuTensorHandle.hpp
     IBackendInternal.hpp
     IBackendContext.hpp
+    ITensorHandleFactory.cpp
+    ITensorHandleFactory.hpp
     LayerSupportBase.cpp
     LayerSupportBase.hpp
     IMemoryManager.hpp
@@ -22,6 +24,8 @@
     OptimizationViews.hpp
     OutputHandler.cpp
     OutputHandler.hpp
+    TensorHandleFactoryRegistry.cpp
+    TensorHandleFactoryRegistry.hpp
     WorkloadDataCollector.hpp
     WorkloadData.cpp
     WorkloadDataFwd.hpp
diff --git a/src/backends/backendsCommon/IBackendInternal.hpp b/src/backends/backendsCommon/IBackendInternal.hpp
index fe9d620..a0d6569 100644
--- a/src/backends/backendsCommon/IBackendInternal.hpp
+++ b/src/backends/backendsCommon/IBackendInternal.hpp
@@ -10,7 +10,11 @@
 
 #include <ISubgraphViewConverter.hpp>
 #include <SubgraphView.hpp>
+#include <optimizations/Optimization.hpp>
 
+#include "IBackendContext.hpp"
+#include "IMemoryManager.hpp"
+#include "ITensorHandleFactory.hpp"
 #include "OptimizationViews.hpp"
 
 #include <vector>
@@ -18,9 +22,7 @@
 namespace armnn
 {
 class IWorkloadFactory;
-class IBackendContext;
 class IMemoryManager;
-class Optimization;
 class ILayerSupport;
 
 class IBackendInternal : public IBackend
@@ -60,7 +62,10 @@
     }
 
     ARMNN_DEPRECATED_MSG("Use \"OptimizationViews OptimizeSubgraphView(const SubgraphView&)\" instead")
-    virtual Optimizations GetOptimizations() const = 0;
+    virtual Optimizations GetOptimizations() const
+    {
+        return Optimizations{};
+    }
 
     ARMNN_DEPRECATED_MSG("Use \"OptimizationViews OptimizeSubgraphView(const SubgraphView&)\" instead")
     virtual SubGraphUniquePtr OptimizeSubGraph(const SubGraph& subGraph, bool& optimizationAttempted) const
@@ -70,12 +75,19 @@
     }
     ARMNN_NO_DEPRECATE_WARN_END
 
-    virtual IMemoryManagerUniquePtr CreateMemoryManager() const = 0;
+
+    virtual IMemoryManagerUniquePtr CreateMemoryManager() const
+    {
+        return IMemoryManagerUniquePtr();
+    };
 
     virtual IWorkloadFactoryPtr CreateWorkloadFactory(
         const IMemoryManagerSharedPtr& memoryManager = nullptr) const = 0;
 
-    virtual IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const = 0;
+    virtual IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const
+    {
+        return IBackendContextPtr{};
+    }
 
     virtual ILayerSupportSharedPtr GetLayerSupport() const = 0;
 
@@ -107,6 +119,29 @@
         }
         return result;
     }
+
+    bool SupportsTensorAllocatorAPI() const { return GetHandleFactoryPreferences().empty() == false; }
+
+    ITensorHandleFactory::FactoryId GetBackwardCompatibleFavoriteHandleFactory()
+    {
+        auto favorites = GetHandleFactoryPreferences();
+        if (favorites.empty())
+        {
+            return ITensorHandleFactory::LegacyFactoryId;
+        }
+        return favorites[0];
+    }
+
+    /// (Optional) Returns a vector of supported TensorHandleFactory ids in preference order.
+    virtual std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const
+    {
+        return std::vector<ITensorHandleFactory::FactoryId>();
+    }
+
+    /// (Optional) Register TensorHandleFactories
+    /// Either this method or CreateMemoryManager() and
+    /// IWorkloadFactory::CreateTensor()/IWorkloadFactory::CreateSubtensor() methods must be implemented.
+    virtual void RegisterTensorHandleFactories(class TensorHandleFactoryRegistry& registry) {}
 };
 
 using IBackendInternalUniquePtr = std::unique_ptr<IBackendInternal>;
diff --git a/src/backends/backendsCommon/ITensorHandleFactory.cpp b/src/backends/backendsCommon/ITensorHandleFactory.cpp
new file mode 100644
index 0000000..91f5692
--- /dev/null
+++ b/src/backends/backendsCommon/ITensorHandleFactory.cpp
@@ -0,0 +1,14 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ITensorHandleFactory.hpp"
+
+namespace armnn
+{
+
+const ITensorHandleFactory::FactoryId ITensorHandleFactory::LegacyFactoryId = "armnn_legacy_factory";
+const ITensorHandleFactory::FactoryId ITensorHandleFactory::DeferredFactoryId = "armnn_deferred_factory";
+
+} // namespace armnn
diff --git a/src/backends/backendsCommon/ITensorHandleFactory.hpp b/src/backends/backendsCommon/ITensorHandleFactory.hpp
new file mode 100644
index 0000000..7685061
--- /dev/null
+++ b/src/backends/backendsCommon/ITensorHandleFactory.hpp
@@ -0,0 +1,49 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/Types.hpp>
+#include <armnn/IRuntime.hpp>
+
+namespace armnn
+{
+
+class ITensorHandleFactory
+{
+public:
+    using FactoryId = std::string;
+    static const FactoryId LegacyFactoryId;   // Use the workload factory to create the tensor handle
+    static const FactoryId DeferredFactoryId; // Some TensorHandleFactory decisions are deferred to run-time
+
+    virtual ~ITensorHandleFactory() {}
+
+
+    virtual std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent,
+                                                                 TensorShape const& subTensorShape,
+                                                                 unsigned int const* subTensorOrigin) const = 0;
+
+    virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const = 0;
+
+    virtual const FactoryId GetId() const = 0;
+
+    virtual bool SupportsSubTensors() const = 0;
+
+    virtual bool SupportsMapUnmap() const final { return true; }
+
+    virtual bool SupportsExport() const final { return false; }
+
+    virtual bool SupportsImport() const final { return false; }
+};
+
+enum class MemoryStrategy
+{
+    Undefined,
+    DirectCompatibility,    // Only allocate the tensorhandle using the assigned factory
+    CopyToTarget,           // Default + Insert MemCopy node before target
+    ExportToTarget,         // Default + Insert Import node
+};
+
+} //namespace armnn
diff --git a/src/backends/backendsCommon/OutputHandler.cpp b/src/backends/backendsCommon/OutputHandler.cpp
index 2df2fb5..8f4942d 100644
--- a/src/backends/backendsCommon/OutputHandler.cpp
+++ b/src/backends/backendsCommon/OutputHandler.cpp
@@ -27,9 +27,9 @@
     m_TensorHandle = factory.CreateTensorHandle(m_TensorInfo);
 }
 
-void OutputHandler::CreateTensorHandles(const IWorkloadFactory& factory, DataLayout dataLayout)
+void OutputHandler::CreateTensorHandles(const ITensorHandleFactory& factory)
 {
-    m_TensorHandle = factory.CreateTensorHandle(m_TensorInfo, dataLayout);
+    m_TensorHandle = factory.CreateTensorHandle(m_TensorInfo);
 }
 
 void OutputHandler::CollectWorkloadOutputs(WorkloadDataCollector& dataCollector) const
diff --git a/src/backends/backendsCommon/OutputHandler.hpp b/src/backends/backendsCommon/OutputHandler.hpp
index 240b369..01e255d 100644
--- a/src/backends/backendsCommon/OutputHandler.hpp
+++ b/src/backends/backendsCommon/OutputHandler.hpp
@@ -5,6 +5,7 @@
 #pragma once
 
 #include "ITensorHandle.hpp"
+#include "ITensorHandleFactory.hpp"
 
 #include <armnn/Descriptors.hpp>
 #include <armnn/INetwork.hpp>
@@ -35,14 +36,10 @@
     /// @param tensorInfo - TensorInfo for the output.
     void SetTensorInfo(const TensorInfo& tensorInfo);
 
-    /// @brief - Creates tensor handlers used by the intermediate tensors. Does not allocate memory.
+    /// @brief - Creates tensor handles used by the intermediate tensors. Does not allocate memory.
     /// @param factory - Factory to be used for handler creation.
     void CreateTensorHandles(const IWorkloadFactory& factory);
-
-    /// @brief - Creates tensor handlers used by the intermediate tensors. Does not allocate memory.
-    /// @param factory - Factory to be used for handler creation.
-    /// @param dataLayout - Data Layout to be used for handler creation.
-    void CreateTensorHandles(const IWorkloadFactory& factory, DataLayout dataLayout);
+    void CreateTensorHandles(const ITensorHandleFactory& factory);
 
     /// @brief - Gets the matching TensorInfo for the output.
     /// @return - References to the output TensorInfo.
diff --git a/src/backends/backendsCommon/TensorHandleFactoryRegistry.cpp b/src/backends/backendsCommon/TensorHandleFactoryRegistry.cpp
new file mode 100644
index 0000000..4692b9f
--- /dev/null
+++ b/src/backends/backendsCommon/TensorHandleFactoryRegistry.cpp
@@ -0,0 +1,69 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "TensorHandleFactoryRegistry.hpp"
+#include "IMemoryManager.hpp"
+
+
+namespace armnn
+{
+
+void TensorHandleFactoryRegistry::RegisterFactory(std::unique_ptr <ITensorHandleFactory> newFactory)
+{
+    if (!newFactory)
+    {
+        return;
+    }
+
+    ITensorHandleFactory::FactoryId id = newFactory->GetId();
+
+    // Don't register duplicates
+    for (auto& registeredFactory : m_Factories)
+    {
+        if (id == registeredFactory->GetId())
+        {
+            return;
+        }
+    }
+
+    // Take ownership of the new allocator
+    m_Factories.push_back(std::move(newFactory));
+}
+
+void TensorHandleFactoryRegistry::RegisterMemoryManager(std::shared_ptr<armnn::IMemoryManager> memoryManger)
+{
+    m_MemoryManagers.push_back(memoryManger);
+}
+
+ITensorHandleFactory* TensorHandleFactoryRegistry::GetFactory(ITensorHandleFactory::FactoryId id) const
+{
+    for (auto& factory : m_Factories)
+    {
+        if (factory->GetId() == id)
+        {
+            return factory.get();
+        }
+    }
+
+    return nullptr;
+}
+
+void TensorHandleFactoryRegistry::AquireMemory()
+{
+    for (auto& mgr : m_MemoryManagers)
+    {
+        mgr->Acquire();
+    }
+}
+
+void TensorHandleFactoryRegistry::ReleaseMemory()
+{
+    for (auto& mgr : m_MemoryManagers)
+    {
+        mgr->Release();
+    }
+}
+
+} // namespace armnn
diff --git a/src/backends/backendsCommon/TensorHandleFactoryRegistry.hpp b/src/backends/backendsCommon/TensorHandleFactoryRegistry.hpp
new file mode 100644
index 0000000..9e02985
--- /dev/null
+++ b/src/backends/backendsCommon/TensorHandleFactoryRegistry.hpp
@@ -0,0 +1,49 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ITensorHandleFactory.hpp"
+
+#include <memory>
+#include <vector>
+
+namespace armnn
+{
+
+//Forward
+class IMemoryManager;
+
+///
+class TensorHandleFactoryRegistry
+{
+public:
+    TensorHandleFactoryRegistry() = default;
+
+    TensorHandleFactoryRegistry(const TensorHandleFactoryRegistry& other) = delete;
+    TensorHandleFactoryRegistry(TensorHandleFactoryRegistry&& other) = delete;
+
+    /// Register a TensorHandleFactory and transfer ownership
+    void RegisterFactory(std::unique_ptr<ITensorHandleFactory> allocator);
+
+    /// Register a memory manager with shared ownership
+    void RegisterMemoryManager(std::shared_ptr<IMemoryManager> memoryManger);
+
+    /// Find a TensorHandleFactory by Id
+    /// Returns nullptr if not found
+    ITensorHandleFactory* GetFactory(ITensorHandleFactory::FactoryId id) const;
+
+    /// Aquire memory required for inference
+    void AquireMemory();
+
+    /// Release memory required for inference
+    void ReleaseMemory();
+
+private:
+    std::vector<std::unique_ptr<ITensorHandleFactory>> m_Factories;
+    std::vector<std::shared_ptr<IMemoryManager>> m_MemoryManagers;
+};
+
+} // namespace armnn
diff --git a/src/backends/backendsCommon/common.mk b/src/backends/backendsCommon/common.mk
index 90d3d16..8df5ab9 100644
--- a/src/backends/backendsCommon/common.mk
+++ b/src/backends/backendsCommon/common.mk
@@ -10,10 +10,12 @@
 COMMON_SOURCES := \
     BackendRegistry.cpp \
     CpuTensorHandle.cpp \
+    ITensorHandleFactory.cpp \
     LayerSupportBase.cpp \
     MemCopyWorkload.cpp \
     OptimizationViews.cpp \
     OutputHandler.cpp \
+    TensorHandleFactoryRegistry.cpp \
     WorkloadData.cpp \
     WorkloadFactory.cpp \
     WorkloadUtils.cpp