COMPMID-605: Transition buffer memory manager Change-Id: Ide7c6124eb19f13f15f517e62d705646a0cd1ecd Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/130184 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>

commit: 3d1489de593574e65ef1e64a7ae64e4e56c2978b [log] [tgz]
author: Georgios Pinitas <georgios.pinitas@arm.com> Thu May 03 20:47:16 2018 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> Fri Nov 02 16:51:50 2018 +0000
tree: f87f3df521cb5ed8bd383dad89cbeb92c49670ac
parent: 54d6fae4dbb4f556cc5ec484c51681ad84c015a7 [diff]
diff --git a/arm_compute/graph/GraphContext.h b/arm_compute/graph/GraphContext.h
index 2f9ab66..1831cc2 100644
--- a/arm_compute/graph/GraphContext.h
+++ b/arm_compute/graph/GraphContext.h

@@ -38,8 +38,10 @@
 /** Contains structs required for memory management */
 struct MemoryManagerContext
 {
-    Target                                       target = { Target::UNSPECIFIED }; /**< Target */
-    std::shared_ptr<arm_compute::IMemoryManager> mm     = { nullptr };             /**< Memory manager */
+    Target                                       target      = { Target::UNSPECIFIED }; /**< Target */
+    std::shared_ptr<arm_compute::IMemoryManager> intra_mm    = { nullptr };             /**< Intra-function memory manager */
+    std::shared_ptr<arm_compute::IMemoryManager> cross_mm    = { nullptr };             /**< Cross-function memory manager */
+    std::shared_ptr<arm_compute::IMemoryGroup>   cross_group = { nullptr };             /**< Cross-function memory group */
 };
 
 /** Graph context **/
@@ -82,6 +84,11 @@
      * @return Management context for the target if exists else nullptr
      */
     MemoryManagerContext *memory_management_ctx(Target target);
+    /** Gets the memory managers map
+     *
+     * @return Memory manager contexts
+     */
+    std::map<Target, MemoryManagerContext> &memory_managers();
     /** Finalizes memory managers in graph context */
     void finalize();
 

diff --git a/arm_compute/graph/IDeviceBackend.h b/arm_compute/graph/IDeviceBackend.h
index fa6fbae..f28cb1a 100644
--- a/arm_compute/graph/IDeviceBackend.h
+++ b/arm_compute/graph/IDeviceBackend.h

@@ -61,6 +61,11 @@
      * @return True if the backend is supported else false
      */
     virtual bool is_backend_supported() = 0;
+    /** Gets a backend memory allocator
+     *
+     * @return Backend memory allocator
+     */
+    virtual IAllocator *backend_allocator() = 0;
     /** Create a backend Tensor
      *
      * @param[in] tensor The tensor we want to create a backend tensor for

diff --git a/arm_compute/graph/ITensorHandle.h b/arm_compute/graph/ITensorHandle.h
index cc7132e..261ebf5 100644
--- a/arm_compute/graph/ITensorHandle.h
+++ b/arm_compute/graph/ITensorHandle.h

@@ -25,9 +25,13 @@
 #define __ARM_COMPUTE_GRAPH_ITENSORHANDLE_H__
 
 #include "arm_compute/core/ITensor.h"
+#include "arm_compute/graph/Types.h"
 
 namespace arm_compute
 {
+// Forward declarations
+class IMemoryGroup;
+
 namespace graph
 {
 /** Tensor handle interface object */
@@ -38,10 +42,13 @@
     virtual ~ITensorHandle() = default;
     /** Allocates backend memory for the handle */
     virtual void allocate() = 0;
-    /** Backend tensor object accessor */
-    virtual arm_compute::ITensor &tensor() = 0;
-    /** Backend tensor object const accessor */
-    virtual const arm_compute::ITensor &tensor() const = 0;
+    /** Allocates backend memory for the handle */
+    virtual void free() = 0;
+    /** Set backend tensor to be managed by a memory group
+     *
+     * @param[in] mg Memory group
+     */
+    virtual void manage(IMemoryGroup *mg) = 0;
     /** Maps backend tensor object
      *
      * @param[in] blocking Flags if the mapping operations should be blocking
@@ -58,11 +65,25 @@
      *          on the other hand if a sub-tensor is marked as unused then the parent tensor won't be released
      */
     virtual void release_if_unused() = 0;
+    /** Backend tensor object accessor */
+    virtual arm_compute::ITensor &tensor() = 0;
+    /** Backend tensor object const accessor */
+    virtual const arm_compute::ITensor &tensor() const = 0;
+    /** Return the parent tensor handle if is a subtensor else this
+     *
+     * @return Parent tensor handle
+     */
+    virtual ITensorHandle *parent_handle() = 0;
     /** Checks if a backing tensor is a sub-tensor object or not
      *
      * @return True if the backend tensor is a sub-tensor else false
      */
     virtual bool is_subtensor() const = 0;
+    /** Returns target type
+     *
+     * @return Target type
+     */
+    virtual Target target() const = 0;
 };
 } // namespace graph
 } // namespace arm_compute

diff --git a/arm_compute/graph/Types.h b/arm_compute/graph/Types.h
index 02e5d92..b195ed7 100644
--- a/arm_compute/graph/Types.h
+++ b/arm_compute/graph/Types.h

@@ -76,10 +76,10 @@
 /** Graph configuration structure */
 struct GraphConfig
 {
-    bool use_function_memory_manager{ false };   /**< Use a memory manager to manage per-funcion auxilary memory */
-    bool use_transition_memory_manager{ false }; /**< Use a memory manager to manager transition buffer memory */
-    bool use_tuner{ false };                     /**< Use a tuner in tunable backends */
-    int  num_threads{ -1 };                      /**< Number of threads to use (thread capable backends), if 0 the backend will auto-initialize, if -1 the backend will stay as it is. */
+    bool use_function_memory_manager{ true };   /**< Use a memory manager to manage per-funcion auxilary memory */
+    bool use_transition_memory_manager{ true }; /**< Use a memory manager to manager transition buffer memory */
+    bool use_tuner{ false };                    /**< Use a tuner in tunable backends */
+    int  num_threads{ -1 };                     /**< Number of threads to use (thread capable backends), if 0 the backend will auto-initialize, if -1 the backend will stay as it is. */
 };
 
 /**< Device target types */

diff --git a/arm_compute/graph/Workload.h b/arm_compute/graph/Workload.h
index 35066c4..e9368ee 100644
--- a/arm_compute/graph/Workload.h
+++ b/arm_compute/graph/Workload.h

@@ -24,7 +24,9 @@
 #ifndef __ARM_COMPUTE_GRAPH_WORKLOAD_H__
 #define __ARM_COMPUTE_GRAPH_WORKLOAD_H__
 
+#include "arm_compute/graph/GraphContext.h"
 #include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryGroup.h"
 
 #include <functional>
 #include <memory>
@@ -68,10 +70,8 @@
 struct ExecutionTask
 {
     // TODO (geopin01) : Support vector of functions?
-    std::unique_ptr<arm_compute::IFunction> task            = {}; /**< Task to execute */
-    INode                                  *node            = {}; /**< Node bound to this workload */
-    std::vector<ITensorHandle *>            commit_handles  = {}; /**< Handles needs to sync for this task to execute */
-    std::vector<ITensorHandle *>            release_handles = {}; /**< Handles that can be released after this node execution */
+    std::unique_ptr<arm_compute::IFunction> task = {}; /**< Task to execute */
+    INode                                  *node = {}; /**< Node bound to this workload */
 
     /** Function operator */
     void operator()();
@@ -83,10 +83,11 @@
 /** Execution workload */
 struct ExecutionWorkload
 {
-    std::vector<Tensor *>      inputs  = {};      /**< Input handles */
-    std::vector<Tensor *>      outputs = {};      /**< Output handles */
-    std::vector<ExecutionTask> tasks   = {};      /**< Execution workload */
-    Graph                     *graph   = nullptr; /**< Graph bound to the workload */
+    std::vector<Tensor *>      inputs  = {};          /**< Input handles */
+    std::vector<Tensor *>      outputs = {};          /**< Output handles */
+    std::vector<ExecutionTask> tasks   = {};          /**< Execution workload */
+    Graph                     *graph   = { nullptr }; /**< Graph bound to the workload */
+    GraphContext              *ctx     = { nullptr }; /**< Graph execution context */
 };
 } // namespace graph
 } // namespace arm_compute

diff --git a/arm_compute/graph/backends/CL/CLDeviceBackend.h b/arm_compute/graph/backends/CL/CLDeviceBackend.h
index 5adbe0e..ab39d0f 100644
--- a/arm_compute/graph/backends/CL/CLDeviceBackend.h
+++ b/arm_compute/graph/backends/CL/CLDeviceBackend.h

@@ -55,6 +55,7 @@
     void initialize_backend() override;
     void setup_backend_context(GraphContext &ctx) override;
     bool                           is_backend_supported() override;
+    IAllocator                    *backend_allocator() override;
     std::unique_ptr<ITensorHandle> create_tensor(const Tensor &tensor) override;
     std::unique_ptr<ITensorHandle> create_subtensor(ITensorHandle *parent, TensorShape shape, Coordinates coords, bool extend_parent) override;
     std::unique_ptr<arm_compute::IFunction> configure_node(INode &node, GraphContext &ctx) override;

diff --git a/arm_compute/graph/backends/CL/CLSubTensorHandle.h b/arm_compute/graph/backends/CL/CLSubTensorHandle.h
index 4be5842..0c515a1 100644
--- a/arm_compute/graph/backends/CL/CLSubTensorHandle.h
+++ b/arm_compute/graph/backends/CL/CLSubTensorHandle.h

@@ -52,18 +52,27 @@
     CLSubTensorHandle(CLSubTensorHandle &&) = default;
     /** Allow instances of this class to be moved */
     CLSubTensorHandle &operator=(CLSubTensorHandle &&) = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLSubTensorHandle(const CLSubTensorHandle &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLSubTensorHandle &operator=(const CLSubTensorHandle &) = delete;
 
     // Inherited overridden methods
-    void                        allocate() override;
+    void allocate() override;
+    void free() override;
+    void manage(IMemoryGroup *mg) override;
+    void map(bool blocking) override;
+    void                        unmap() override;
+    void                        release_if_unused() override;
     arm_compute::ITensor       &tensor() override;
     const arm_compute::ITensor &tensor() const override;
-    void map(bool blocking) override;
-    void unmap() override;
-    void release_if_unused() override;
-    bool is_subtensor() const override;
+    ITensorHandle              *parent_handle() override;
+    bool                        is_subtensor() const override;
+    Target                      target() const override;
 
 private:
-    arm_compute::CLSubTensor _sub_tensor; /**< Backend Sub-Tensor */
+    arm_compute::CLSubTensor _sub_tensor;    /**< Backend Sub-Tensor */
+    ITensorHandle           *_parent_handle; /**< Parent handle */
 };
 } // namespace backends
 } // namespace graph

diff --git a/arm_compute/graph/backends/CL/CLTensorHandle.h b/arm_compute/graph/backends/CL/CLTensorHandle.h
index 8f5a70c..2399732 100644
--- a/arm_compute/graph/backends/CL/CLTensorHandle.h
+++ b/arm_compute/graph/backends/CL/CLTensorHandle.h

@@ -51,13 +51,17 @@
     CLTensorHandle &operator=(CLTensorHandle &&) = default;
 
     // Inherited overridden methods
-    void                        allocate() override;
+    void allocate() override;
+    void free() override;
+    void manage(IMemoryGroup *mg) override;
+    void map(bool blocking) override;
+    void                        unmap() override;
+    void                        release_if_unused() override;
     arm_compute::ITensor       &tensor() override;
     const arm_compute::ITensor &tensor() const override;
-    void map(bool blocking) override;
-    void unmap() override;
-    void release_if_unused() override;
-    bool is_subtensor() const override;
+    ITensorHandle              *parent_handle() override;
+    bool                        is_subtensor() const override;
+    Target                      target() const override;
 
 private:
     arm_compute::CLTensor _tensor; /**< Backend Tensor */

diff --git a/arm_compute/graph/backends/GLES/GCDeviceBackend.h b/arm_compute/graph/backends/GLES/GCDeviceBackend.h
index be81a8f..dc0e2b0 100644
--- a/arm_compute/graph/backends/GLES/GCDeviceBackend.h
+++ b/arm_compute/graph/backends/GLES/GCDeviceBackend.h

@@ -45,6 +45,7 @@
     void initialize_backend() override;
     void setup_backend_context(GraphContext &ctx) override;
     bool                           is_backend_supported() override;
+    IAllocator                    *backend_allocator() override;
     std::unique_ptr<ITensorHandle> create_tensor(const Tensor &tensor) override;
     std::unique_ptr<ITensorHandle> create_subtensor(ITensorHandle *parent, TensorShape shape, Coordinates coords, bool extend_parent) override;
     std::unique_ptr<arm_compute::IFunction> configure_node(INode &node, GraphContext &ctx) override;

diff --git a/arm_compute/graph/backends/GLES/GCTensorHandle.h b/arm_compute/graph/backends/GLES/GCTensorHandle.h
index 774268f..29b0319 100644
--- a/arm_compute/graph/backends/GLES/GCTensorHandle.h
+++ b/arm_compute/graph/backends/GLES/GCTensorHandle.h

@@ -51,13 +51,17 @@
     GCTensorHandle &operator=(GCTensorHandle &&) = default;
 
     // Inherited overridden methods
-    void                        allocate() override;
+    void allocate() override;
+    void free() override;
+    void manage(IMemoryGroup *mg) override;
+    void map(bool blocking) override;
+    void                        unmap() override;
+    void                        release_if_unused() override;
     arm_compute::ITensor       &tensor() override;
     const arm_compute::ITensor &tensor() const override;
-    void map(bool blocking) override;
-    void unmap() override;
-    void release_if_unused() override;
-    bool is_subtensor() const override;
+    ITensorHandle              *parent_handle() override;
+    bool                        is_subtensor() const override;
+    Target                      target() const override;
 
 private:
     arm_compute::GCTensor _tensor; /**< Backend Tensor */

diff --git a/arm_compute/graph/backends/NEON/NEDeviceBackend.h b/arm_compute/graph/backends/NEON/NEDeviceBackend.h
index b23c83a..c1e2e0c 100644
--- a/arm_compute/graph/backends/NEON/NEDeviceBackend.h
+++ b/arm_compute/graph/backends/NEON/NEDeviceBackend.h

@@ -44,6 +44,7 @@
     void initialize_backend() override;
     void setup_backend_context(GraphContext &ctx) override;
     bool                           is_backend_supported() override;
+    IAllocator                    *backend_allocator() override;
     std::unique_ptr<ITensorHandle> create_tensor(const Tensor &tensor) override;
     std::unique_ptr<ITensorHandle> create_subtensor(ITensorHandle *parent, TensorShape shape, Coordinates coords, bool extend_parent) override;
     std::unique_ptr<arm_compute::IFunction> configure_node(INode &node, GraphContext &ctx) override;

diff --git a/arm_compute/graph/backends/NEON/NESubTensorHandle.h b/arm_compute/graph/backends/NEON/NESubTensorHandle.h
index 11dcec6..101d3e6 100644
--- a/arm_compute/graph/backends/NEON/NESubTensorHandle.h
+++ b/arm_compute/graph/backends/NEON/NESubTensorHandle.h

@@ -52,18 +52,27 @@
     NESubTensorHandle(NESubTensorHandle &&) = default;
     /** Allow instances of this class to be moved */
     NESubTensorHandle &operator=(NESubTensorHandle &&) = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NESubTensorHandle(const NESubTensorHandle &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NESubTensorHandle &operator=(const NESubTensorHandle &) = delete;
 
     // Inherited overridden methods
-    void                        allocate() override;
+    void allocate() override;
+    void free() override;
+    void manage(IMemoryGroup *mg) override;
+    void map(bool blocking) override;
+    void                        unmap() override;
+    void                        release_if_unused() override;
     arm_compute::ITensor       &tensor() override;
     const arm_compute::ITensor &tensor() const override;
-    void map(bool blocking) override;
-    void unmap() override;
-    void release_if_unused() override;
-    bool is_subtensor() const override;
+    ITensorHandle              *parent_handle() override;
+    bool                        is_subtensor() const override;
+    Target                      target() const override;
 
 private:
-    arm_compute::SubTensor _sub_tensor; /**< Backend Sub-Tensor */
+    arm_compute::SubTensor _sub_tensor;    /**< Backend Sub-Tensor */
+    ITensorHandle         *_parent_handle; /**< Parent handle */
 };
 } // namespace backends
 } // namespace graph

diff --git a/arm_compute/graph/backends/NEON/NETensorHandle.h b/arm_compute/graph/backends/NEON/NETensorHandle.h
index 06ccdd8..150e0c9 100644
--- a/arm_compute/graph/backends/NEON/NETensorHandle.h
+++ b/arm_compute/graph/backends/NEON/NETensorHandle.h

@@ -51,13 +51,17 @@
     NETensorHandle &operator=(NETensorHandle &&) = default;
 
     // Inherited overridden methods
-    void                        allocate() override;
+    void allocate() override;
+    void free() override;
+    void manage(IMemoryGroup *mg) override;
+    void map(bool blocking) override;
+    void                        unmap() override;
+    void                        release_if_unused() override;
     arm_compute::ITensor       &tensor() override;
     const arm_compute::ITensor &tensor() const override;
-    void map(bool blocking) override;
-    void unmap() override;
-    void release_if_unused() override;
-    bool is_subtensor() const override;
+    ITensorHandle              *parent_handle() override;
+    bool                        is_subtensor() const override;
+    Target                      target() const override;
 
 private:
     arm_compute::Tensor _tensor; /**< Backend Tensor */

diff --git a/arm_compute/graph/backends/Utils.h b/arm_compute/graph/backends/Utils.h
index b902d17..c7a50d9 100644
--- a/arm_compute/graph/backends/Utils.h
+++ b/arm_compute/graph/backends/Utils.h

@@ -88,7 +88,7 @@
 inline std::shared_ptr<IMemoryManager> get_memory_manager(GraphContext &ctx, Target target)
 {
     bool enabled = ctx.config().use_function_memory_manager && (ctx.memory_management_ctx(target) != nullptr);
-    return enabled ? ctx.memory_management_ctx(target)->mm : nullptr;
+    return enabled ? ctx.memory_management_ctx(target)->intra_mm : nullptr;
 }
 } // namespace backends
 } // namespace graph

diff --git a/arm_compute/graph/detail/CrossLayerMemoryManagerHelpers.h b/arm_compute/graph/detail/CrossLayerMemoryManagerHelpers.h
new file mode 100644
index 0000000..b7424c8
--- /dev/null
+++ b/arm_compute/graph/detail/CrossLayerMemoryManagerHelpers.h

@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GRAPH_DETAIL_CROSS_LAYER_MEMORY_MANAGER_HELPERS_H__
+#define __ARM_COMPUTE_GRAPH_DETAIL_CROSS_LAYER_MEMORY_MANAGER_HELPERS_H__
+
+#include <vector>
+
+namespace arm_compute
+{
+namespace graph
+{
+// Forward declarations
+class Graph;
+class GraphContext;
+class ExecutionWorkload;
+class ITransMemoryManager;
+class ITensorHandle;
+
+namespace detail
+{
+/** Configures transition manager and execution workload
+ *
+ * @param[in] g        Graph to configure
+ * @param[in] ctx      Graph context
+ * @param[in] workload Workload to configure
+ */
+void configure_transition_manager(Graph &g, GraphContext &ctx, ExecutionWorkload &workload);
+} // namespace detail
+} // namespace graph
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_GRAPH_DETAIL_CROSS_LAYER_MEMORY_MANAGER_HELPERS_H__ */

diff --git a/arm_compute/runtime/ISimpleLifetimeManager.h b/arm_compute/runtime/ISimpleLifetimeManager.h
index 792ab0b..7942e40 100644
--- a/arm_compute/runtime/ISimpleLifetimeManager.h
+++ b/arm_compute/runtime/ISimpleLifetimeManager.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,7 +30,9 @@
 #include "arm_compute/runtime/Types.h"
 
 #include <cstddef>
+#include <list>
 #include <map>
+#include <set>
 #include <vector>
 
 namespace arm_compute
@@ -77,9 +79,19 @@
         bool   status; /**< Lifetime status */
     };
 
-    IMemoryGroup        *_active_group;                               /**< Active group */
-    std::vector<Element> _active_elements;                            /**< A map that contains the active elements */
-    std::map<IMemoryGroup *, std::vector<Element>> _finalized_groups; /**< A map that contains the finalized groups */
+    /** Blob struct */
+    struct Blob
+    {
+        void            *id;
+        size_t           max_size;
+        std::set<void *> bound_elements;
+    };
+
+    IMemoryGroup *_active_group;                                           /**< Active group */
+    std::map<void *, Element> _active_elements;                            /**< A map that contains the active elements */
+    std::list<Blob> _free_blobs;                                           /**< Free blobs */
+    std::list<Blob> _occupied_blobs;                                       /**< Occupied blobs */
+    std::map<IMemoryGroup *, std::map<void *, Element>> _finalized_groups; /**< A map that contains the finalized groups */
 };
 } // namespace arm_compute
 #endif /* __ARM_COMPUTE_ISIMPLELIFETIMEMANAGER_H__ */

diff --git a/examples/graph_alexnet.cpp b/examples/graph_alexnet.cpp
index 291a586..45c2b56 100644
--- a/examples/graph_alexnet.cpp
+++ b/examples/graph_alexnet.cpp

@@ -169,8 +169,7 @@
 
         // Finalize graph
         GraphConfig config;
-        config.use_function_memory_manager = true;
-        config.use_tuner                   = (target == 2);
+        config.use_tuner = (target == 2);
         graph.finalize(target_hint, config);
     }
     void do_run() override

diff --git a/examples/graph_googlenet.cpp b/examples/graph_googlenet.cpp
index 25e9e7f..deafe5a 100644
--- a/examples/graph_googlenet.cpp
+++ b/examples/graph_googlenet.cpp

@@ -135,8 +135,7 @@
 
         // Finalize graph
         GraphConfig config;
-        config.use_function_memory_manager = true;
-        config.use_tuner                   = (target == 2);
+        config.use_tuner = (target == 2);
         graph.finalize(target_hint, config);
     }
     void do_run() override

diff --git a/examples/graph_inception_v3.cpp b/examples/graph_inception_v3.cpp
index 01a4d0c..7fa0fc7 100644
--- a/examples/graph_inception_v3.cpp
+++ b/examples/graph_inception_v3.cpp

@@ -190,8 +190,7 @@
 
         // Finalize graph
         GraphConfig config;
-        config.use_function_memory_manager = true;
-        config.use_tuner                   = (target == 2);
+        config.use_tuner = (target == 2);
         graph.finalize(target_hint, config);
     }
 

diff --git a/examples/graph_inception_v4.cpp b/examples/graph_inception_v4.cpp
index 8f34035..4217c78 100644
--- a/examples/graph_inception_v4.cpp
+++ b/examples/graph_inception_v4.cpp

@@ -159,8 +159,7 @@
 
         // Finalize graph
         GraphConfig config;
-        config.use_function_memory_manager = true;
-        config.use_tuner                   = (target == 2);
+        config.use_tuner = (target == 2);
         graph.finalize(target_hint, config);
 #else  /* __aarch64__ */
         using namespace arm_compute;

diff --git a/examples/graph_lenet.cpp b/examples/graph_lenet.cpp
index 895d9aa..ea0916b 100644
--- a/examples/graph_lenet.cpp
+++ b/examples/graph_lenet.cpp

@@ -109,8 +109,7 @@
 
         // Finalize graph
         GraphConfig config;
-        config.use_function_memory_manager = true;
-        config.use_tuner                   = (target == 2);
+        config.use_tuner = (target == 2);
         graph.finalize(target_hint, config);
     }
     void do_run() override

diff --git a/examples/graph_mobilenet.cpp b/examples/graph_mobilenet.cpp
index 870e67d..813c0bf 100644
--- a/examples/graph_mobilenet.cpp
+++ b/examples/graph_mobilenet.cpp

@@ -167,8 +167,7 @@
 
         // Finalize graph
         GraphConfig config;
-        config.use_function_memory_manager = true;
-        config.use_tuner                   = (target == 2);
+        config.use_tuner = (target == 2);
         graph.finalize(target_hint, config);
     }
     void do_run() override

diff --git a/examples/graph_mobilenet_qasymm8.cpp b/examples/graph_mobilenet_qasymm8.cpp
index ddf6175..7edd182 100644
--- a/examples/graph_mobilenet_qasymm8.cpp
+++ b/examples/graph_mobilenet_qasymm8.cpp

@@ -169,8 +169,7 @@
 
         // Finalize graph
         GraphConfig config;
-        config.use_function_memory_manager = true;
-        config.use_tuner                   = (target == 2);
+        config.use_tuner = (target == 2);
         graph.finalize(target_hint, config);
     }
     void do_run() override

diff --git a/examples/graph_resnet50.cpp b/examples/graph_resnet50.cpp
index d0fa106..18a028d 100644
--- a/examples/graph_resnet50.cpp
+++ b/examples/graph_resnet50.cpp

@@ -126,8 +126,7 @@
 
         // Finalize graph
         GraphConfig config;
-        config.use_function_memory_manager = true;
-        config.use_tuner                   = (target == 2);
+        config.use_tuner = (target == 2);
         graph.finalize(target_hint, config);
     }
 

diff --git a/examples/graph_squeezenet.cpp b/examples/graph_squeezenet.cpp
index ff2487c..8ed43f7 100644
--- a/examples/graph_squeezenet.cpp
+++ b/examples/graph_squeezenet.cpp

@@ -171,8 +171,7 @@
 
         // Finalize graph
         GraphConfig config;
-        config.use_function_memory_manager = true;
-        config.use_tuner                   = (target == 2);
+        config.use_tuner = (target == 2);
         graph.finalize(target_hint, config);
     }
     void do_run() override

diff --git a/examples/graph_squeezenet_v1_1.cpp b/examples/graph_squeezenet_v1_1.cpp
index e1a1f66..529f4fe 100644
--- a/examples/graph_squeezenet_v1_1.cpp
+++ b/examples/graph_squeezenet_v1_1.cpp

@@ -176,8 +176,7 @@
 
         // Finalize graph
         GraphConfig config;
-        config.use_function_memory_manager = true;
-        config.use_tuner                   = (target == 2);
+        config.use_tuner = (target == 2);
         graph.finalize(target_hint, config);
     }
     void do_run() override

diff --git a/examples/graph_vgg16.cpp b/examples/graph_vgg16.cpp
index 9c2763f..44b4c4c 100644
--- a/examples/graph_vgg16.cpp
+++ b/examples/graph_vgg16.cpp

@@ -230,8 +230,7 @@
 
         // Finalize graph
         GraphConfig config;
-        config.use_function_memory_manager = true;
-        config.use_tuner                   = (target == 2);
+        config.use_tuner = (target == 2);
         graph.finalize(target_hint, config);
     }
     void do_run() override

diff --git a/examples/graph_vgg19.cpp b/examples/graph_vgg19.cpp
index 0684309..229112b 100644
--- a/examples/graph_vgg19.cpp
+++ b/examples/graph_vgg19.cpp

@@ -243,8 +243,7 @@
 
         // Finalize graph
         GraphConfig config;
-        config.use_function_memory_manager = true;
-        config.use_tuner                   = (target == 2);
+        config.use_tuner = (target == 2);
         graph.finalize(target_hint, config);
     }
     void do_run() override

diff --git a/src/graph/GraphContext.cpp b/src/graph/GraphContext.cpp
index 6fc45c0..3f31114 100644
--- a/src/graph/GraphContext.cpp
+++ b/src/graph/GraphContext.cpp

@@ -60,13 +60,24 @@
     return (_memory_managers.find(target) != std::end(_memory_managers)) ? &_memory_managers[target] : nullptr;
 }
 
+std::map<Target, MemoryManagerContext> &GraphContext::memory_managers()
+{
+    return _memory_managers;
+}
+
 void GraphContext::finalize()
 {
     for(auto &mm_obj : _memory_managers)
     {
-        if(mm_obj.second.mm != nullptr)
+        // Finalize intra layer memory manager
+        if(mm_obj.second.intra_mm != nullptr)
         {
-            mm_obj.second.mm->finalize();
+            mm_obj.second.intra_mm->finalize();
+        }
+        // Finalize cross layer memory manager
+        if(mm_obj.second.cross_mm != nullptr)
+        {
+            mm_obj.second.cross_mm->finalize();
         }
     }
 }

diff --git a/src/graph/GraphManager.cpp b/src/graph/GraphManager.cpp
index aac6488..a67e5b2 100644
--- a/src/graph/GraphManager.cpp
+++ b/src/graph/GraphManager.cpp

@@ -28,6 +28,7 @@
 #include "arm_compute/graph/Logger.h"
 #include "arm_compute/graph/PassManager.h"
 #include "arm_compute/graph/Utils.h"
+#include "arm_compute/graph/detail/CrossLayerMemoryManagerHelpers.h"
 #include "arm_compute/graph/detail/ExecutionHelpers.h"
 
 namespace arm_compute
@@ -72,41 +73,37 @@
     auto workload = detail::configure_all_nodes(graph, ctx);
     ARM_COMPUTE_ERROR_ON_MSG(workload.tasks.empty(), "Could not configure all nodes!");
 
+    // Allocate const tensors and call accessors
+    detail::allocate_const_tensors(graph);
+    detail::call_all_const_node_accessors(graph);
+
     // TODO (COMPMID-920) : Update prepare for NEON/GC
     if(forced_target == Target::CL)
     {
-        // Allocate const tensors and call accessors
-        detail::allocate_const_tensors(graph);
-        detail::call_all_const_node_accessors(graph);
-
         // Prepare graph
         detail::prepare_all_tasks(workload);
+    }
 
-        // Allocate all tensors
-        detail::allocate_all_tensors(graph);
-
-        // Finalize Graph context
-        ctx.finalize();
-
-        // Register graph
-        _workloads.insert(std::make_pair(graph.id(), std::move(workload)));
-        ARM_COMPUTE_LOG_GRAPH_VERBOSE("Created workload for graph with ID : " << graph.id().get() << std::endl);
+    // Setup tensor memory (Allocate all tensors or setup transition manager)
+    if(ctx.config().use_transition_memory_manager)
+    {
+        detail::configure_transition_manager(graph, ctx, workload);
     }
     else
     {
-        // Allocate all tensors
         detail::allocate_all_tensors(graph);
+    }
 
-        // Call accessors on all Const nodes
-        detail::call_all_const_node_accessors(graph);
+    // Finalize Graph context
+    ctx.finalize();
 
-        // Finalize Graph context
-        ctx.finalize();
+    // Register graph
+    _workloads.insert(std::make_pair(graph.id(), std::move(workload)));
+    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Created workload for graph with ID : " << graph.id().get() << std::endl);
 
-        // Register graph
-        _workloads.insert(std::make_pair(graph.id(), std::move(workload)));
-        ARM_COMPUTE_LOG_GRAPH_VERBOSE("Created workload for graph with ID : " << graph.id().get() << std::endl);
-
+    // TODO (COMPMID-920) : Update prepare for NEON/GC
+    if(forced_target != Target::CL)
+    {
         // Make first run
         execute_graph(graph);
 

diff --git a/src/graph/backends/CL/CLDeviceBackend.cpp b/src/graph/backends/CL/CLDeviceBackend.cpp
index 37cbcd7..7f2be67 100644
--- a/src/graph/backends/CL/CLDeviceBackend.cpp
+++ b/src/graph/backends/CL/CLDeviceBackend.cpp

@@ -37,6 +37,7 @@
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/runtime/BlobLifetimeManager.h"
 #include "arm_compute/runtime/CL/CLBufferAllocator.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #include "arm_compute/runtime/MemoryManagerOnDemand.h"
 #include "arm_compute/runtime/PoolManager.h"
@@ -107,8 +108,10 @@
     if(ctx.memory_management_ctx(Target::CL) == nullptr)
     {
         MemoryManagerContext mm_ctx;
-        mm_ctx.target = Target::CL;
-        mm_ctx.mm     = create_memory_manager(MemoryManagerAffinity::Buffer);
+        mm_ctx.target      = Target::CL;
+        mm_ctx.intra_mm    = create_memory_manager(MemoryManagerAffinity::Buffer);
+        mm_ctx.cross_mm    = create_memory_manager(MemoryManagerAffinity::Buffer);
+        mm_ctx.cross_group = std::make_shared<CLMemoryGroup>(mm_ctx.cross_mm);
 
         ctx.insert_memory_management_ctx(std::move(mm_ctx));
     }
@@ -119,6 +122,11 @@
     return arm_compute::opencl_is_available();
 }
 
+IAllocator *CLDeviceBackend::backend_allocator()
+{
+    return &_allocator;
+}
+
 std::unique_ptr<ITensorHandle> CLDeviceBackend::create_tensor(const Tensor &tensor)
 {
     // Get tensor descriptor

diff --git a/src/graph/backends/CL/CLSubTensorHandle.cpp b/src/graph/backends/CL/CLSubTensorHandle.cpp
index a1bc8a1..016dca7 100644
--- a/src/graph/backends/CL/CLSubTensorHandle.cpp
+++ b/src/graph/backends/CL/CLSubTensorHandle.cpp

@@ -32,11 +32,12 @@
 namespace backends
 {
 CLSubTensorHandle::CLSubTensorHandle(ITensorHandle *parent_handle, const TensorShape &shape, const Coordinates &coords, bool extend_parent)
-    : _sub_tensor()
+    : _sub_tensor(), _parent_handle(nullptr)
 {
     ARM_COMPUTE_ERROR_ON(!parent_handle);
     auto parent_tensor = arm_compute::utils::cast::polymorphic_downcast<ICLTensor *>(&parent_handle->tensor());
     _sub_tensor        = arm_compute::CLSubTensor(parent_tensor, shape, coords, extend_parent);
+    _parent_handle     = parent_handle;
 }
 
 void CLSubTensorHandle::allocate()
@@ -44,14 +45,15 @@
     // noop
 }
 
-const arm_compute::ITensor &CLSubTensorHandle::tensor() const
+void CLSubTensorHandle::free()
 {
-    return _sub_tensor;
+    // noop
 }
 
-arm_compute::ITensor &CLSubTensorHandle::tensor()
+void CLSubTensorHandle::manage(IMemoryGroup *mg)
 {
-    return _sub_tensor;
+    ARM_COMPUTE_UNUSED(mg);
+    // noop
 }
 
 void CLSubTensorHandle::map(bool blocking)
@@ -69,10 +71,31 @@
     // noop
 }
 
+const arm_compute::ITensor &CLSubTensorHandle::tensor() const
+{
+    return _sub_tensor;
+}
+
+arm_compute::ITensor &CLSubTensorHandle::tensor()
+{
+    return _sub_tensor;
+}
+
+ITensorHandle *CLSubTensorHandle::parent_handle()
+{
+    ARM_COMPUTE_ERROR_ON(_parent_handle == nullptr);
+    return _parent_handle->parent_handle();
+}
+
 bool CLSubTensorHandle::is_subtensor() const
 {
     return true;
 }
+
+Target CLSubTensorHandle::target() const
+{
+    return Target::CL;
+}
 } // namespace backends
 } // namespace graph
 } // namespace arm_compute
\ No newline at end of file

diff --git a/src/graph/backends/CL/CLTensorHandle.cpp b/src/graph/backends/CL/CLTensorHandle.cpp
index 563c4d9..219d9d0 100644
--- a/src/graph/backends/CL/CLTensorHandle.cpp
+++ b/src/graph/backends/CL/CLTensorHandle.cpp

@@ -23,6 +23,9 @@
  */
 #include "arm_compute/graph/backends/CL/CLTensorHandle.h"
 
+#include "arm_compute/core/utils/misc/Cast.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
+
 namespace arm_compute
 {
 namespace graph
@@ -40,14 +43,18 @@
     _tensor.allocator()->allocate();
 }
 
-const arm_compute::ITensor &CLTensorHandle::tensor() const
+void CLTensorHandle::free()
 {
-    return _tensor;
+    _tensor.allocator()->free();
 }
 
-arm_compute::ITensor &CLTensorHandle::tensor()
+void CLTensorHandle::manage(IMemoryGroup *mg)
 {
-    return _tensor;
+    if(mg != nullptr)
+    {
+        auto *cl_mg = arm_compute::utils::cast::polymorphic_downcast<CLMemoryGroup *>(mg);
+        cl_mg->manage(&_tensor);
+    }
 }
 
 void CLTensorHandle::map(bool blocking)
@@ -69,10 +76,30 @@
     }
 }
 
+const arm_compute::ITensor &CLTensorHandle::tensor() const
+{
+    return _tensor;
+}
+
+arm_compute::ITensor &CLTensorHandle::tensor()
+{
+    return _tensor;
+}
+
+ITensorHandle *CLTensorHandle::parent_handle()
+{
+    return this;
+}
+
 bool CLTensorHandle::is_subtensor() const
 {
     return false;
 }
+
+Target CLTensorHandle::target() const
+{
+    return Target::CL;
+}
 } // namespace backends
 } // namespace graph
 } // namespace arm_compute
\ No newline at end of file

diff --git a/src/graph/backends/GLES/GCDeviceBackend.cpp b/src/graph/backends/GLES/GCDeviceBackend.cpp
index 0185598..770cca5 100644
--- a/src/graph/backends/GLES/GCDeviceBackend.cpp
+++ b/src/graph/backends/GLES/GCDeviceBackend.cpp

@@ -36,6 +36,7 @@
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/runtime/BlobLifetimeManager.h"
 #include "arm_compute/runtime/GLES_COMPUTE/GCBufferAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCMemoryGroup.h"
 #include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
 #include "arm_compute/runtime/MemoryManagerOnDemand.h"
 #include "arm_compute/runtime/PoolManager.h"
@@ -68,8 +69,10 @@
     if(ctx.memory_management_ctx(Target::GC) == nullptr)
     {
         MemoryManagerContext mm_ctx;
-        mm_ctx.target = Target::GC;
-        mm_ctx.mm     = create_memory_manager(MemoryManagerAffinity::Buffer);
+        mm_ctx.target      = Target::GC;
+        mm_ctx.intra_mm    = create_memory_manager(MemoryManagerAffinity::Buffer);
+        mm_ctx.cross_mm    = create_memory_manager(MemoryManagerAffinity::Buffer);
+        mm_ctx.cross_group = std::make_shared<GCMemoryGroup>(mm_ctx.cross_mm);
 
         ctx.insert_memory_management_ctx(std::move(mm_ctx));
     }
@@ -80,6 +83,11 @@
     return arm_compute::opengles31_is_available();
 }
 
+IAllocator *GCDeviceBackend::backend_allocator()
+{
+    return &_allocator;
+}
+
 std::unique_ptr<ITensorHandle> GCDeviceBackend::create_tensor(const Tensor &tensor)
 {
     // Get tensor descriptor

diff --git a/src/graph/backends/GLES/GCTensorHandle.cpp b/src/graph/backends/GLES/GCTensorHandle.cpp
index ae7c778..4e5c652 100644
--- a/src/graph/backends/GLES/GCTensorHandle.cpp
+++ b/src/graph/backends/GLES/GCTensorHandle.cpp

@@ -23,6 +23,9 @@
  */
 #include "arm_compute/graph/backends/GLES/GCTensorHandle.h"
 
+#include "arm_compute/core/utils/misc/Cast.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCMemoryGroup.h"
+
 namespace arm_compute
 {
 namespace graph
@@ -40,14 +43,18 @@
     _tensor.allocator()->allocate();
 }
 
-const arm_compute::ITensor &GCTensorHandle::tensor() const
+void GCTensorHandle::free()
 {
-    return _tensor;
+    _tensor.allocator()->free();
 }
 
-arm_compute::ITensor &GCTensorHandle::tensor()
+void GCTensorHandle::manage(IMemoryGroup *mg)
 {
-    return _tensor;
+    if(mg != nullptr)
+    {
+        auto *gc_mg = arm_compute::utils::cast::polymorphic_downcast<GCMemoryGroup *>(mg);
+        gc_mg->manage(&_tensor);
+    }
 }
 
 void GCTensorHandle::map(bool blocking)
@@ -69,10 +76,30 @@
     }
 }
 
+const arm_compute::ITensor &GCTensorHandle::tensor() const
+{
+    return _tensor;
+}
+
+arm_compute::ITensor &GCTensorHandle::tensor()
+{
+    return _tensor;
+}
+
+ITensorHandle *GCTensorHandle::parent_handle()
+{
+    return this;
+}
+
 bool GCTensorHandle::is_subtensor() const
 {
     return false;
 }
+
+Target GCTensorHandle::target() const
+{
+    return Target::GC;
+}
 } // namespace backends
 } // namespace graph
 } // namespace arm_compute
\ No newline at end of file

diff --git a/src/graph/backends/NEON/NEDeviceBackend.cpp b/src/graph/backends/NEON/NEDeviceBackend.cpp
index def6c39..7c2db40 100644
--- a/src/graph/backends/NEON/NEDeviceBackend.cpp
+++ b/src/graph/backends/NEON/NEDeviceBackend.cpp

@@ -37,6 +37,7 @@
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/runtime/Allocator.h"
 #include "arm_compute/runtime/BlobLifetimeManager.h"
+#include "arm_compute/runtime/MemoryGroup.h"
 #include "arm_compute/runtime/MemoryManagerOnDemand.h"
 #include "arm_compute/runtime/OffsetLifetimeManager.h"
 #include "arm_compute/runtime/PoolManager.h"
@@ -74,8 +75,10 @@
     if(ctx.memory_management_ctx(Target::NEON) == nullptr)
     {
         MemoryManagerContext mm_ctx;
-        mm_ctx.target = Target::NEON;
-        mm_ctx.mm     = create_memory_manager(MemoryManagerAffinity::Buffer);
+        mm_ctx.target      = Target::NEON;
+        mm_ctx.intra_mm    = create_memory_manager(MemoryManagerAffinity::Offset);
+        mm_ctx.cross_mm    = create_memory_manager(MemoryManagerAffinity::Offset);
+        mm_ctx.cross_group = std::make_shared<MemoryGroup>(mm_ctx.cross_mm);
 
         ctx.insert_memory_management_ctx(std::move(mm_ctx));
     }
@@ -86,6 +89,11 @@
     return true;
 }
 
+IAllocator *NEDeviceBackend::backend_allocator()
+{
+    return &_allocator;
+}
+
 std::unique_ptr<ITensorHandle> NEDeviceBackend::create_tensor(const Tensor &tensor)
 {
     // Get tensor descriptor

diff --git a/src/graph/backends/NEON/NESubTensorHandle.cpp b/src/graph/backends/NEON/NESubTensorHandle.cpp
index c48ba6b..c0acedd 100644
--- a/src/graph/backends/NEON/NESubTensorHandle.cpp
+++ b/src/graph/backends/NEON/NESubTensorHandle.cpp

@@ -30,10 +30,11 @@
 namespace backends
 {
 NESubTensorHandle::NESubTensorHandle(ITensorHandle *parent_handle, const TensorShape &shape, const Coordinates &coords, bool extend_parent)
-    : _sub_tensor()
+    : _sub_tensor(), _parent_handle(nullptr)
 {
     ARM_COMPUTE_ERROR_ON(!parent_handle);
-    _sub_tensor = arm_compute::SubTensor(&parent_handle->tensor(), shape, coords, extend_parent);
+    _sub_tensor    = arm_compute::SubTensor(&parent_handle->tensor(), shape, coords, extend_parent);
+    _parent_handle = parent_handle;
 }
 
 void NESubTensorHandle::allocate()
@@ -41,14 +42,15 @@
     // noop
 }
 
-const arm_compute::ITensor &NESubTensorHandle::tensor() const
+void NESubTensorHandle::free()
 {
-    return _sub_tensor;
+    // noop
 }
 
-arm_compute::ITensor &NESubTensorHandle::tensor()
+void NESubTensorHandle::manage(IMemoryGroup *mg)
 {
-    return _sub_tensor;
+    ARM_COMPUTE_UNUSED(mg);
+    // noop
 }
 
 void NESubTensorHandle::map(bool blocking)
@@ -66,10 +68,31 @@
     // noop
 }
 
+const arm_compute::ITensor &NESubTensorHandle::tensor() const
+{
+    return _sub_tensor;
+}
+
+arm_compute::ITensor &NESubTensorHandle::tensor()
+{
+    return _sub_tensor;
+}
+
+ITensorHandle *NESubTensorHandle::parent_handle()
+{
+    ARM_COMPUTE_ERROR_ON(_parent_handle == nullptr);
+    return _parent_handle->parent_handle();
+}
+
 bool NESubTensorHandle::is_subtensor() const
 {
     return true;
 }
+
+Target NESubTensorHandle::target() const
+{
+    return Target::NEON;
+}
 } // namespace backends
 } // namespace graph
 } // namespace arm_compute
\ No newline at end of file

diff --git a/src/graph/backends/NEON/NETensorHandle.cpp b/src/graph/backends/NEON/NETensorHandle.cpp
index 8508ac9..5892116 100644
--- a/src/graph/backends/NEON/NETensorHandle.cpp
+++ b/src/graph/backends/NEON/NETensorHandle.cpp

@@ -23,6 +23,9 @@
  */
 #include "arm_compute/graph/backends/NEON/NETensorHandle.h"
 
+#include "arm_compute/core/utils/misc/Cast.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+
 namespace arm_compute
 {
 namespace graph
@@ -40,14 +43,18 @@
     _tensor.allocator()->allocate();
 }
 
-const arm_compute::ITensor &NETensorHandle::tensor() const
+void NETensorHandle::free()
 {
-    return _tensor;
+    _tensor.allocator()->free();
 }
 
-arm_compute::ITensor &NETensorHandle::tensor()
+void NETensorHandle::manage(IMemoryGroup *mg)
 {
-    return _tensor;
+    if(mg != nullptr)
+    {
+        auto *ne_mg = arm_compute::utils::cast::polymorphic_downcast<MemoryGroup *>(mg);
+        ne_mg->manage(&_tensor);
+    }
 }
 
 void NETensorHandle::map(bool blocking)
@@ -68,10 +75,30 @@
     }
 }
 
+const arm_compute::ITensor &NETensorHandle::tensor() const
+{
+    return _tensor;
+}
+
+arm_compute::ITensor &NETensorHandle::tensor()
+{
+    return _tensor;
+}
+
+ITensorHandle *NETensorHandle::parent_handle()
+{
+    return this;
+}
+
 bool NETensorHandle::is_subtensor() const
 {
     return false;
 }
+
+Target NETensorHandle::target() const
+{
+    return Target::NEON;
+}
 } // namespace backends
 } // namespace graph
 } // namespace arm_compute
\ No newline at end of file

diff --git a/src/graph/detail/CrossLayerMemoryManagerHelpers.cpp b/src/graph/detail/CrossLayerMemoryManagerHelpers.cpp
new file mode 100644
index 0000000..7fc5ca0
--- /dev/null
+++ b/src/graph/detail/CrossLayerMemoryManagerHelpers.cpp

@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/detail/CrossLayerMemoryManagerHelpers.h"
+
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/GraphContext.h"
+#include "arm_compute/graph/GraphManager.h"
+#include "arm_compute/graph/INode.h"
+#include "arm_compute/graph/Tensor.h"
+#include "arm_compute/graph/Types.h"
+#include "arm_compute/graph/backends/BackendRegistry.h"
+
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/utils/misc/Cast.h"
+
+#include <algorithm>
+#include <map>
+
+namespace arm_compute
+{
+namespace graph
+{
+namespace detail
+{
+namespace
+{
+using HandleCountPair     = std::pair<ITensorHandle *, unsigned int>;
+using HandleCounter       = std::map<HandleCountPair::first_type, HandleCountPair::second_type>;
+using TargetHandleCounter = std::map<Target, HandleCounter>;
+
+/** Holds managed IO tensor handles if a task */
+struct TaskHandles
+{
+    std::vector<std::pair<ITensorHandle *, IMemoryGroup *>> input_handles  = {}; /**< Input handles to a task */
+    std::vector<std::pair<ITensorHandle *, IMemoryGroup *>> output_handles = {}; /**< Output handles of a task */
+};
+
+/** Returns memory group depending on handle backend type
+ *
+ * @param[in] ctx    Graph context
+ * @param[in] handle Tensor handle
+ *
+ * @return Memory groupb
+ */
+IMemoryGroup *get_memory_group_from_handle(GraphContext &ctx, ITensorHandle *handle)
+{
+    ARM_COMPUTE_ERROR_ON(handle == nullptr);
+    return ctx.memory_management_ctx(handle->target())->cross_group.get();
+}
+
+/** Get handles of const tensors of graph
+ *
+ * @param[in] g Graph
+ *
+ * @return Handles of const tensors of graph
+ */
+std::set<ITensorHandle *> get_const_handles(const Graph &g)
+{
+    std::set<NodeType> const_node_types = { NodeType::Input, NodeType::Output, NodeType::Const };
+
+    std::set<ITensorHandle *> const_tensors;
+
+    auto &nodes = g.nodes();
+    for(auto &node : nodes)
+    {
+        // If its a const node:
+        if(node != nullptr && const_node_types.find(node->type()) != std::end(const_node_types))
+        {
+            // TODO (geopin01) : Create IO iterator wrappers
+            // Add all its inputs / outputs to the list of constant handles
+            for(unsigned int i = 0; i < node->num_inputs(); ++i)
+            {
+                if(node->input(i) != nullptr)
+                {
+                    const_tensors.insert(node->input(i)->handle()->parent_handle());
+                }
+            }
+            for(unsigned int i = 0; i < node->num_outputs(); ++i)
+            {
+                if(node->output(i) != nullptr)
+                {
+                    const_tensors.insert(node->output(i)->handle()->parent_handle());
+                }
+            }
+        }
+    }
+
+    return const_tensors;
+}
+
+/** Builds a list of all the transition handles (Handles that are used to link two nodes)
+ *
+ * @param[in] ctx           Graph context
+ * @param[in] task          Workload task
+ * @param[in] const_tensors Constant tensors
+ *
+ * @return List of transition handles
+ */
+TaskHandles get_transition_handles(GraphContext                    &ctx,
+                                   ExecutionTask                   &task,
+                                   const std::set<ITensorHandle *> &const_tensors)
+{
+    ARM_COMPUTE_ERROR_ON(task.node == nullptr || task.task == nullptr);
+    INode &node = *task.node;
+
+    TaskHandles transition_handles;
+
+    // Add input handles
+    for(unsigned int i = 0; i < node.input_edges().size(); ++i)
+    {
+        Edge *input_edge = node.input_edge(i);
+        // If this input is the output of another node
+        if(input_edge != nullptr && input_edge->tensor() != nullptr && const_tensors.find(input_edge->tensor()->handle()->parent_handle()) == std::end(const_tensors))
+        {
+            // Then add it to the list of transition buffers
+            ITensorHandle *tensor_handle = input_edge->tensor()->handle()->parent_handle();
+            IMemoryGroup *mm_group      = get_memory_group_from_handle(ctx, tensor_handle);
+            transition_handles.input_handles.push_back(std::make_pair(tensor_handle, mm_group));
+        }
+    }
+
+    // Add output handles
+    for(unsigned int i = 0; i < node.num_outputs(); ++i)
+    {
+        Tensor *output_tensor = node.output(i);
+        // If this output is used as an input for another node
+        if(output_tensor != nullptr && const_tensors.find(output_tensor->handle()->parent_handle()) == std::end(const_tensors))
+        {
+            ITensorHandle *tensor_handle = output_tensor->handle()->parent_handle();
+            IMemoryGroup *mm_group      = get_memory_group_from_handle(ctx, tensor_handle);
+            transition_handles.output_handles.push_back(std::make_pair(tensor_handle, mm_group));
+        }
+    }
+
+    return transition_handles;
+}
+
+/** Counts handles refcount for each input handle of each target
+ *
+ * @param[in]     task           Execution task containing the managed handles
+ * @param[in,out] handle_counter Data structure that keeps the handles reference count
+ */
+void count_input_handles_per_target(const TaskHandles &task_handles, TargetHandleCounter &handle_counter)
+{
+    for(const auto &handle : task_handles.input_handles)
+    {
+        ITensorHandle *key            = handle.first;
+        HandleCounter &target_counter = handle_counter[key->target()];
+        if(target_counter.find(key) == std::end(target_counter))
+        {
+            target_counter.emplace(std::make_pair(key, 1));
+        }
+        else
+        {
+            ++target_counter[key];
+        }
+    }
+}
+
+/** Calculates the lifetime of each tensor handle
+ *
+ * @param[in, out] tasks_handles Tensor handles for each task
+ * @param[in]      hc            Data structure that keeps the handles reference count
+ */
+void configure_handle_lifetime(std::vector<TaskHandles> &tasks_handles, const HandleCounter &hc)
+{
+    // Identify max number of tensors in flight
+    HandleCounter tensors_in_flight;
+
+    // Acquires the given handles and sets them as in flight if they aren't already
+    auto acquire = [&](std::vector<std::pair<ITensorHandle *, IMemoryGroup *>> &handles)
+    {
+        for(auto &handle : handles)
+        {
+            ITensorHandle *parent_handle = handle.first;
+            ARM_COMPUTE_ERROR_ON(parent_handle == nullptr);
+            // If the tensor is not already in flight:
+            if(tensors_in_flight.find(parent_handle) == std::end(tensors_in_flight))
+            {
+                ARM_COMPUTE_ERROR_ON(hc.find(parent_handle) == std::end(hc));
+                // Then add it to the list of in flight tensors
+                tensors_in_flight.insert(std::make_pair(parent_handle, hc.at(parent_handle)));
+                // Start of allocation's lifetime
+                parent_handle->manage(handle.second);
+            }
+        }
+    };
+
+    for(auto &task_handle : tasks_handles)
+    {
+        // Marking all the input and output tensors of the task as in flight
+        acquire(task_handle.input_handles);
+        acquire(task_handle.output_handles);
+
+        // Releasing the input tensors
+        for(auto &input_handle : task_handle.input_handles)
+        {
+            ITensorHandle *ihandle = input_handle.first;
+            ARM_COMPUTE_ERROR_ON(ihandle == nullptr);
+            ARM_COMPUTE_ERROR_ON(tensors_in_flight.find(ihandle) == std::end(tensors_in_flight));
+            --tensors_in_flight[ihandle];
+            if(tensors_in_flight[ihandle] <= 0)
+            {
+                // Remove tensor for tensors in flight
+                tensors_in_flight.erase(ihandle);
+                // End of allocation's lifetime
+                ihandle->allocate();
+            }
+        }
+    }
+}
+} // namespace
+
+void configure_transition_manager(Graph &g, GraphContext &ctx, ExecutionWorkload &workload)
+{
+    // Get const tensors (un-managed)
+    std::set<ITensorHandle *> const_tensors = get_const_handles(g);
+
+    std::vector<TaskHandles> tasks_handles;
+    TargetHandleCounter      target_handle_count;
+
+    // Count handles
+    for(auto &task : workload.tasks)
+    {
+        // Populates IO handles
+        tasks_handles.push_back(get_transition_handles(ctx, task, const_tensors));
+
+        // Count handles
+        count_input_handles_per_target(tasks_handles.back(), target_handle_count);
+    }
+
+    // Setup memory managers
+    for(auto &hc : target_handle_count)
+    {
+        MemoryManagerContext *mm_ctx = ctx.memory_management_ctx(hc.first);
+        if(mm_ctx != nullptr)
+        {
+            if(mm_ctx->cross_mm != nullptr && mm_ctx->cross_group != nullptr)
+            {
+                // Manage and allocate tensors
+                configure_handle_lifetime(tasks_handles, hc.second);
+            }
+        }
+    }
+}
+} // namespace detail
+} // namespace graph
+} // namespace arm_compute

diff --git a/src/graph/detail/ExecutionHelpers.cpp b/src/graph/detail/ExecutionHelpers.cpp
index c130443..c370fdf 100644
--- a/src/graph/detail/ExecutionHelpers.cpp
+++ b/src/graph/detail/ExecutionHelpers.cpp

@@ -143,7 +143,9 @@
 {
     ExecutionWorkload workload;
     workload.graph = &g;
-    auto &nodes    = g.nodes();
+    workload.ctx   = &ctx;
+
+    auto &nodes = g.nodes();
 
     // Create tasks
     for(auto &node : nodes)
@@ -235,10 +237,31 @@
 
 void call_all_tasks(ExecutionWorkload &workload)
 {
+    ARM_COMPUTE_ERROR_ON(workload.ctx == nullptr);
+
+    // Acquire memory for the transition buffers
+    for(auto &mm_ctx : workload.ctx->memory_managers())
+    {
+        if(mm_ctx.second.cross_group != nullptr)
+        {
+            mm_ctx.second.cross_group->acquire();
+        }
+    }
+
+    // Execute tasks
     for(auto &task : workload.tasks)
     {
         task();
     }
+
+    // Release memory for the transition buffers
+    for(auto &mm_ctx : workload.ctx->memory_managers())
+    {
+        if(mm_ctx.second.cross_group != nullptr)
+        {
+            mm_ctx.second.cross_group->release();
+        }
+    }
 }
 
 void call_all_output_node_accessors(ExecutionWorkload &workload)

diff --git a/src/runtime/BlobLifetimeManager.cpp b/src/runtime/BlobLifetimeManager.cpp
index 3ca5071..2a4ab6e 100644
--- a/src/runtime/BlobLifetimeManager.cpp
+++ b/src/runtime/BlobLifetimeManager.cpp

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -57,15 +57,15 @@
     ARM_COMPUTE_ERROR_ON(!are_all_finalized());
     ARM_COMPUTE_ERROR_ON(_active_group == nullptr);
 
-    // Sort active group requirements in descending order.
-    std::sort(std::begin(_active_elements), std::end(_active_elements), [](const Element & a, const Element & b)
+    // Sort free blobs requirements in descending order.
+    _free_blobs.sort([](const Blob & ba, const Blob & bb)
     {
-        return a.size > b.size;
+        return ba.max_size > bb.max_size;
     });
     std::vector<size_t> group_sizes;
-    std::transform(std::begin(_active_elements), std::end(_active_elements), std::back_inserter(group_sizes), [](const Element & e)
+    std::transform(std::begin(_free_blobs), std::end(_free_blobs), std::back_inserter(group_sizes), [](const Blob & b)
     {
-        return e.size;
+        return b.max_size;
     });
 
     // Update blob sizes
@@ -80,8 +80,14 @@
     // Calculate group mappings
     auto &group_mappings = _active_group->mappings();
     int   blob_idx       = 0;
-    for(auto &e : _active_elements)
+    for(auto &free_blob : _free_blobs)
     {
-        group_mappings[e.handle] = blob_idx++;
+        for(auto &bound_element_id : free_blob.bound_elements)
+        {
+            ARM_COMPUTE_ERROR_ON(_active_elements.find(bound_element_id) == std::end(_active_elements));
+            Element &bound_element               = _active_elements[bound_element_id];
+            group_mappings[bound_element.handle] = blob_idx;
+        }
+        ++blob_idx;
     }
 }

diff --git a/src/runtime/ISimpleLifetimeManager.cpp b/src/runtime/ISimpleLifetimeManager.cpp
index 2c64475..faaff8a 100644
--- a/src/runtime/ISimpleLifetimeManager.cpp
+++ b/src/runtime/ISimpleLifetimeManager.cpp

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -37,7 +37,7 @@
 using namespace arm_compute;
 
 ISimpleLifetimeManager::ISimpleLifetimeManager()
-    : _active_group(nullptr), _active_elements(), _finalized_groups()
+    : _active_group(nullptr), _active_elements(), _free_blobs(), _occupied_blobs(), _finalized_groups()
 {
 }
 
@@ -53,14 +53,21 @@
 void ISimpleLifetimeManager::start_lifetime(void *obj)
 {
     ARM_COMPUTE_ERROR_ON(obj == nullptr);
-    ARM_COMPUTE_ERROR_ON_MSG(std::find_if(std::begin(_active_elements), std::end(_active_elements), [&obj](const Element & e)
+    ARM_COMPUTE_ERROR_ON_MSG(_active_elements.find(obj) != std::end(_active_elements), "Memory object is already registered!");
+
+    // Check if there is a free blob
+    if(_free_blobs.empty())
     {
-        return obj == e.id;
-    }) != std::end(_active_elements),
-    "Memory object is already registered!");
+        _occupied_blobs.emplace_front(Blob{ obj, 0, { obj } });
+    }
+    else
+    {
+        _occupied_blobs.splice(std::begin(_occupied_blobs), _free_blobs, std::begin(_free_blobs));
+        _occupied_blobs.front().id = obj;
+    }
 
     // Insert object in groups and mark its finalized state to false
-    _active_elements.emplace_back(obj);
+    _active_elements.insert(std::make_pair(obj, obj));
 }
 
 void ISimpleLifetimeManager::end_lifetime(void *obj, void **handle, size_t size)
@@ -68,36 +75,50 @@
     ARM_COMPUTE_ERROR_ON(obj == nullptr);
 
     // Find object
-    auto it = std::find_if(std::begin(_active_elements), std::end(_active_elements), [&obj](const Element & e)
-    {
-        return obj == e.id;
-    });
-    ARM_COMPUTE_ERROR_ON(it == std::end(_active_elements));
+    auto active_object_it = _active_elements.find(obj);
+    ARM_COMPUTE_ERROR_ON(active_object_it == std::end(_active_elements));
 
     // Update object fields and mark object as complete
-    it->handle = handle;
-    it->size   = size;
-    it->status = true;
+    Element &el = active_object_it->second;
+    el.handle   = handle;
+    el.size     = size;
+    el.status   = true;
+
+    // Find object in the occupied lists
+    auto occupied_blob_it = std::find_if(std::begin(_occupied_blobs), std::end(_occupied_blobs), [&obj](const Blob & b)
+    {
+        return obj == b.id;
+    });
+    ARM_COMPUTE_ERROR_ON(occupied_blob_it == std::end(_occupied_blobs));
+
+    // Update occupied blob and return as free
+    occupied_blob_it->bound_elements.insert(obj);
+    occupied_blob_it->max_size = std::max(occupied_blob_it->max_size, size);
+    occupied_blob_it->id       = nullptr;
+    _free_blobs.splice(std::begin(_free_blobs), _occupied_blobs, occupied_blob_it);
 
     // Check if all object are finalized and reset active group
     if(are_all_finalized())
     {
-        // Update finalized groups
-        _finalized_groups[_active_group].insert(std::end(_finalized_groups[_active_group]), std::begin(_active_elements), std::end(_active_elements));
+        ARM_COMPUTE_ERROR_ON(!_occupied_blobs.empty());
 
         // Update blobs and group mappings
         update_blobs_and_mappings();
 
+        // Update finalized groups
+        _finalized_groups[_active_group] = std::move(_active_elements);
+
         // Reset state
         _active_elements.clear();
         _active_group = nullptr;
+        _free_blobs.clear();
     }
 }
 
 bool ISimpleLifetimeManager::are_all_finalized() const
 {
-    return !std::any_of(std::begin(_active_elements), std::end(_active_elements), [](const Element e)
+    return !std::any_of(std::begin(_active_elements), std::end(_active_elements), [](const std::pair<void *, Element> &e)
     {
-        return !e.status;
+        return !e.second.status;
     });
 }

diff --git a/src/runtime/OffsetLifetimeManager.cpp b/src/runtime/OffsetLifetimeManager.cpp
index 4540aea..d0b3bde 100644
--- a/src/runtime/OffsetLifetimeManager.cpp
+++ b/src/runtime/OffsetLifetimeManager.cpp

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -58,19 +58,24 @@
     ARM_COMPUTE_ERROR_ON(_active_group == nullptr);
 
     // Update blob size
-    size_t max_group_size = std::accumulate(std::begin(_active_elements), std::end(_active_elements), static_cast<size_t>(0), [](size_t s, const Element & e)
+    size_t max_group_size = std::accumulate(std::begin(_free_blobs), std::end(_free_blobs), static_cast<size_t>(0), [](size_t s, const Blob & b)
     {
-        return s + e.size;
+        return s + b.max_size;
     });
     _blob = std::max(_blob, max_group_size);
 
     // Calculate group mappings
     auto &group_mappings = _active_group->mappings();
     size_t offset         = 0;
-    for(auto &e : _active_elements)
+    for(auto &free_blob : _free_blobs)
     {
-        group_mappings[e.handle] = offset;
-        offset += e.size;
+        for(auto &bound_element_id : free_blob.bound_elements)
+        {
+            ARM_COMPUTE_ERROR_ON(_active_elements.find(bound_element_id) == std::end(_active_elements));
+            Element &bound_element               = _active_elements[bound_element_id];
+            group_mappings[bound_element.handle] = offset;
+        }
+        offset += free_blob.max_size;
         ARM_COMPUTE_ERROR_ON(offset > _blob);
     }
 }
commit	3d1489de593574e65ef1e64a7ae64e4e56c2978b	[log] [tgz]
author	Georgios Pinitas <georgios.pinitas@arm.com>	Thu May 03 20:47:16 2018 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	Fri Nov 02 16:51:50 2018 +0000
tree	f87f3df521cb5ed8bd383dad89cbeb92c49670ac
parent	54d6fae4dbb4f556cc5ec484c51681ad84c015a7 [diff]