COMPMID-550: Adds support for branches.

Change-Id: I778007c9221ce3156400284c4039b90245eb2b7f
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/90043
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/src/graph/CL/CLMap.cpp b/src/graph/CL/CLMap.cpp
index 4892b96..5289ea9 100644
--- a/src/graph/CL/CLMap.cpp
+++ b/src/graph/CL/CLMap.cpp
@@ -23,20 +23,21 @@
  */
 #include "arm_compute/graph/CL/CLMap.h"
 
+#include "arm_compute/core/CL/ICLTensor.h"
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/Validate.h"
-#include "arm_compute/graph/Tensor.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/graph/ITensorObject.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
 
 using namespace arm_compute::graph;
 
-CLMap::CLMap(Tensor *tensor, bool blocking)
-    : _tensor(dynamic_cast<arm_compute::CLTensor *>(tensor->tensor())), _blocking(blocking)
+CLMap::CLMap(ITensorObject *tensor, bool blocking)
+    : _tensor(dynamic_cast<arm_compute::ICLTensor *>(tensor->tensor())), _blocking(blocking)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(_tensor);
 }
 
 void CLMap::run()
 {
-    _tensor->map(_blocking);
+    _tensor->map(arm_compute::CLScheduler::get().queue(), _blocking);
 }
diff --git a/src/graph/CL/CLUnmap.cpp b/src/graph/CL/CLUnmap.cpp
index ec7d865..31f2f19 100644
--- a/src/graph/CL/CLUnmap.cpp
+++ b/src/graph/CL/CLUnmap.cpp
@@ -23,20 +23,21 @@
  */
 #include "arm_compute/graph/CL/CLUnmap.h"
 
+#include "arm_compute/core/CL/ICLTensor.h"
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/Validate.h"
-#include "arm_compute/graph/Tensor.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/graph/ITensorObject.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
 
 using namespace arm_compute::graph;
 
-CLUnmap::CLUnmap(Tensor *tensor)
-    : _tensor(dynamic_cast<arm_compute::CLTensor *>(tensor->tensor()))
+CLUnmap::CLUnmap(ITensorObject *tensor)
+    : _tensor(dynamic_cast<arm_compute::ICLTensor *>(tensor->tensor()))
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(_tensor);
 }
 
 void CLUnmap::run()
 {
-    _tensor->unmap();
+    _tensor->unmap(arm_compute::CLScheduler::get().queue());
 }
diff --git a/src/graph/Graph.cpp b/src/graph/Graph.cpp
index 7dddb1c..b86330b 100644
--- a/src/graph/Graph.cpp
+++ b/src/graph/Graph.cpp
@@ -26,16 +26,18 @@
 #include "arm_compute/graph/CL/CLMap.h"
 #include "arm_compute/graph/CL/CLUnmap.h"
 #include "arm_compute/graph/INode.h"
+#include "arm_compute/graph/ITensorObject.h"
 #include "arm_compute/graph/Tensor.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/Tensor.h"
+#include "support/ToolchainSupport.h"
 
 using namespace arm_compute::graph;
 
 struct Stage
 {
-    Tensor                                 *_input;
-    Tensor                                 *_output;
+    ITensorObject                          *_input;
+    ITensorObject                          *_output;
     std::unique_ptr<arm_compute::IFunction> _function;
 };
 
@@ -48,20 +50,21 @@
      */
     void configure(GraphHints _next_hints);
 
-    GraphContext                         _ctx{};
-    std::vector<Stage>                   _pipeline{};
-    std::vector<std::unique_ptr<Tensor>> _tensors{};
-    std::vector<std::unique_ptr<INode>>  _nodes{};
-    GraphHints                           _current_hints{};
-    GraphHints                           _next_hints{};
-    std::unique_ptr<Tensor>              _graph_input{ nullptr };
-    std::unique_ptr<Tensor>              _graph_output{ nullptr };
-    std::unique_ptr<INode>               _current_node{ nullptr };
-    Tensor                              *_current_output{ nullptr };
+    GraphContext                                _ctx{};
+    std::vector<Stage>                          _pipeline{};
+    std::vector<std::unique_ptr<ITensorObject>> _tensors{};
+    std::vector<std::unique_ptr<INode>>         _nodes{};
+    GraphHints                                  _current_hints{};
+    GraphHints                                  _next_hints{};
+    std::unique_ptr<ITensorObject>              _graph_input{ nullptr };
+    std::unique_ptr<ITensorObject>              _graph_output{ nullptr };
+    std::unique_ptr<INode>                      _current_node{ nullptr };
+    ITensorObject                              *_current_output{ nullptr };
+    bool                                        _info_enabled{ false };
 
 private:
-    Tensor    *_current_input{ nullptr };
-    GraphHints _previous_hints{};
+    ITensorObject *_current_input{ nullptr };
+    GraphHints     _previous_hints{};
 };
 
 Graph::~Graph() //NOLINT
@@ -78,7 +81,7 @@
 {
     while(true)
     {
-        if(!_pimpl->_graph_input->call_accessor())
+        if(_pimpl->_graph_input->has_accessor() && !_pimpl->_graph_input->call_accessor())
         {
             return;
         }
@@ -88,7 +91,8 @@
             stage._function->run();
         }
 
-        if(!_pimpl->_graph_output->call_accessor())
+        if((_pimpl->_graph_output->has_accessor() && !_pimpl->_graph_output->call_accessor())
+           || (!_pimpl->_graph_output->has_accessor()))
         {
             return;
         }
@@ -126,9 +130,11 @@
         _current_output->set_target(TargetHint::NEON);
     }
 
-    // Update ctx and instantiate node
+    // Instantiate Node
     _ctx.hints()                                 = _current_hints;
-    std::unique_ptr<arm_compute::IFunction> func = _current_node->instantiate_node(_ctx, _current_input->tensor(), _current_output->tensor());
+    std::unique_ptr<arm_compute::IFunction> func = _current_node->instantiate_node(_ctx, _current_input, _current_output);
+
+    // Allocate current input
     _current_input->allocate();
 
     // Map input if needed
@@ -181,7 +187,7 @@
 }
 
 //Add a tensor with an Accessor (i.e either the input or output of the graph)
-void Graph::add_tensor(std::unique_ptr<Tensor> tensor)
+void Graph::add_tensor_object(std::unique_ptr<ITensorObject> tensor)
 {
     // If it's the first Tensor added then it will be the input of the Graph.
     if(_pimpl->_graph_input == nullptr)
@@ -227,7 +233,13 @@
 
 Graph &arm_compute::graph::operator<<(Graph &graph, Tensor &&tensor)
 {
-    graph.add_tensor(arm_compute::support::cpp14::make_unique<Tensor>(std::move(tensor)));
+    graph.add_tensor_object(arm_compute::support::cpp14::make_unique<Tensor>(std::move(tensor)));
+    return graph;
+}
+
+Graph &arm_compute::graph::operator<<(Graph &graph, SubTensor &&sub_tensor)
+{
+    graph.add_tensor_object(arm_compute::support::cpp14::make_unique<SubTensor>(std::move(sub_tensor)));
     return graph;
 }
 
diff --git a/src/graph/INode.cpp b/src/graph/INode.cpp
index 4b383f5..582f936 100644
--- a/src/graph/INode.cpp
+++ b/src/graph/INode.cpp
@@ -26,8 +26,6 @@
 #include "arm_compute/core/CL/OpenCL.h"
 #include "arm_compute/core/Validate.h"
 
-#include <ostream>
-
 using namespace arm_compute::graph;
 
 TargetHint INode::override_target_hint(TargetHint target_hint) const
diff --git a/src/graph/SubGraph.cpp b/src/graph/SubGraph.cpp
new file mode 100644
index 0000000..977cd4a
--- /dev/null
+++ b/src/graph/SubGraph.cpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/SubGraph.h"
+
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/INode.h"
+#include "arm_compute/graph/Tensor.h"
+
+using namespace arm_compute::graph;
+
+SubGraph::SubGraph()
+    : _nodes(), _input(nullptr), _output(nullptr)
+{
+}
+
+void SubGraph::add_node(std::unique_ptr<INode> node)
+{
+    _nodes.push_back(std::move(node));
+}
+
+void SubGraph::add_tensor_object(std::unique_ptr<ITensorObject> tensor)
+{
+    // If it's the first Tensor added then it will be the input of the Graph.
+    if(_input == nullptr)
+    {
+        _input = std::move(tensor);
+    }
+    else
+    {
+        _output = std::move(tensor);
+    }
+}
+
+std::unique_ptr<Graph> SubGraph::construct(TargetHint hint, std::unique_ptr<ITensorObject> input, std::unique_ptr<ITensorObject> output)
+{
+    auto graph = arm_compute::support::cpp14::make_unique<Graph>();
+
+    // Set hint
+    // TODO(geopin01): store hints of sub-graph
+    graph->hints().set_target_hint(hint);
+
+    // Configure input
+    if(_input == nullptr)
+    {
+        _input = std::move(input);
+    }
+    graph->add_tensor_object(std::move(_input));
+
+    // Construct nodes
+    for(auto &node : _nodes)
+    {
+        graph->add_node(std::move(node));
+    }
+
+    // Configure output
+    if(_output == nullptr)
+    {
+        _output = std::move(output);
+    }
+    graph->add_tensor_object(std::move(_output));
+
+    return graph;
+}
+
+bool SubGraph::has_input() const
+{
+    return _input != nullptr;
+}
+
+bool SubGraph::has_output() const
+{
+    return _output != nullptr;
+}
+
+SubGraph &arm_compute::graph::operator<<(SubGraph &graph, Tensor &&tensor)
+{
+    graph.add_tensor_object(arm_compute::support::cpp14::make_unique<Tensor>(std::move(tensor)));
+    return graph;
+}
+
+SubGraph &arm_compute::graph::operator<<(SubGraph &graph, SubTensor &&sub_tensor)
+{
+    graph.add_tensor_object(arm_compute::support::cpp14::make_unique<SubTensor>(std::move(sub_tensor)));
+    return graph;
+}
diff --git a/src/graph/SubTensor.cpp b/src/graph/SubTensor.cpp
index abf8506..da8de95 100644
--- a/src/graph/SubTensor.cpp
+++ b/src/graph/SubTensor.cpp
@@ -27,7 +27,9 @@
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/runtime/CL/CLSubTensor.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/SubTensor.h"
+#include "arm_compute/runtime/Tensor.h"
 #include "utils/TypePrinter.h"
 
 using namespace arm_compute::graph;
@@ -35,7 +37,7 @@
 namespace
 {
 template <typename SubTensorType, typename ParentTensorType>
-std::unique_ptr<ITensor> initialise_subtensor(ITensor *parent, TensorShape shape, Coordinates coords)
+std::unique_ptr<arm_compute::ITensor> initialise_subtensor(arm_compute::ITensor *parent, TensorShape shape, Coordinates coords)
 {
     auto ptensor   = dynamic_cast<ParentTensorType *>(parent);
     auto subtensor = arm_compute::support::cpp14::make_unique<SubTensorType>(ptensor, shape, coords);
@@ -44,41 +46,44 @@
 } // namespace
 
 SubTensor::SubTensor()
-    : _target(TargetHint::DONT_CARE), _coords(), _info(), _parent(nullptr), _subtensor(nullptr)
+    : _target(TargetHint::DONT_CARE), _tensor_shape(), _coords(), _parent(nullptr), _subtensor(nullptr)
 {
 }
 
 SubTensor::SubTensor(Tensor &parent, TensorShape tensor_shape, Coordinates coords)
-    : _target(TargetHint::DONT_CARE), _coords(coords), _info(), _parent(nullptr), _subtensor(nullptr)
+    : _target(TargetHint::DONT_CARE), _tensor_shape(tensor_shape), _coords(coords), _parent(nullptr), _subtensor(nullptr)
 {
     ARM_COMPUTE_ERROR_ON(parent.tensor() == nullptr);
     _parent = parent.tensor();
-    _info   = SubTensorInfo(parent.tensor()->info(), tensor_shape, coords);
     _target = parent.target();
 
     instantiate_subtensor();
 }
 
-SubTensor::SubTensor(ITensor *parent, TensorShape tensor_shape, Coordinates coords, TargetHint target)
-    : _target(target), _coords(coords), _info(), _parent(parent), _subtensor(nullptr)
+SubTensor::SubTensor(arm_compute::ITensor *parent, TensorShape tensor_shape, Coordinates coords, TargetHint target)
+    : _target(target), _tensor_shape(tensor_shape), _coords(coords), _parent(parent), _subtensor(nullptr)
 {
     ARM_COMPUTE_ERROR_ON(parent == nullptr);
-    _info = SubTensorInfo(parent->info(), tensor_shape, coords);
-
     instantiate_subtensor();
 }
 
-void SubTensor::set_info(SubTensorInfo &&info)
+bool SubTensor::call_accessor()
 {
-    _info = info;
+    return true;
 }
 
-const SubTensorInfo &SubTensor::info() const
+bool SubTensor::has_accessor() const
 {
-    return _info;
+    return false;
 }
 
-ITensor *SubTensor::tensor()
+arm_compute::ITensor *SubTensor::set_target(TargetHint target)
+{
+    ARM_COMPUTE_ERROR_ON(target != _target);
+    return (target == _target) ? _subtensor.get() : nullptr;
+}
+
+arm_compute::ITensor *SubTensor::tensor()
 {
     return _subtensor.get();
 }
@@ -88,15 +93,20 @@
     return _target;
 }
 
+void SubTensor::allocate()
+{
+    // NOP for sub-tensors
+}
+
 void SubTensor::instantiate_subtensor()
 {
     switch(_target)
     {
         case TargetHint::OPENCL:
-            _subtensor = initialise_subtensor<arm_compute::CLSubTensor, arm_compute::ICLTensor>(_parent, _info.tensor_shape(), _coords);
+            _subtensor = initialise_subtensor<arm_compute::CLSubTensor, arm_compute::ICLTensor>(_parent, _tensor_shape, _coords);
             break;
         case TargetHint::NEON:
-            _subtensor = initialise_subtensor<arm_compute::SubTensor, arm_compute::ITensor>(_parent, _info.tensor_shape(), _coords);
+            _subtensor = initialise_subtensor<arm_compute::SubTensor, arm_compute::ITensor>(_parent, _tensor_shape, _coords);
             break;
         default:
             ARM_COMPUTE_ERROR("Invalid TargetHint");
diff --git a/src/graph/Tensor.cpp b/src/graph/Tensor.cpp
index 31dd4e8..f85fe27 100644
--- a/src/graph/Tensor.cpp
+++ b/src/graph/Tensor.cpp
@@ -35,7 +35,7 @@
 namespace
 {
 template <typename TensorType>
-std::unique_ptr<ITensor> initialise_tensor(TensorInfo &info)
+std::unique_ptr<arm_compute::ITensor> initialise_tensor(TensorInfo &info)
 {
     auto tensor = arm_compute::support::cpp14::make_unique<TensorType>();
     tensor->allocator()->init(info);
@@ -43,7 +43,7 @@
 }
 
 template <typename TensorType>
-void tensor_allocate(ITensor &tensor)
+void tensor_allocate(arm_compute::ITensor &tensor)
 {
     auto itensor = dynamic_cast<TensorType *>(&tensor);
     ARM_COMPUTE_ERROR_ON_NULLPTR(itensor);
@@ -85,7 +85,12 @@
     return retval;
 }
 
-ITensor *Tensor::tensor()
+bool Tensor::has_accessor() const
+{
+    return (_accessor != nullptr);
+}
+
+arm_compute::ITensor *Tensor::tensor()
 {
     return _tensor.get();
 }
@@ -95,7 +100,7 @@
     return _info;
 }
 
-ITensor *Tensor::set_target(TargetHint target)
+arm_compute::ITensor *Tensor::set_target(TargetHint target)
 {
     if(_tensor != nullptr)
     {
diff --git a/src/graph/nodes/ActivationLayer.cpp b/src/graph/nodes/ActivationLayer.cpp
index 5cd2a0b..5e75c28 100644
--- a/src/graph/nodes/ActivationLayer.cpp
+++ b/src/graph/nodes/ActivationLayer.cpp
@@ -36,7 +36,7 @@
 namespace
 {
 template <typename ActivationType, typename TensorType, TargetHint target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, ITensor *output, const ActivationLayerInfo &activation_info)
+std::unique_ptr<arm_compute::IFunction> instantiate_function(arm_compute::ITensor *input, arm_compute::ITensor *output, const ActivationLayerInfo &activation_info)
 {
     auto activation = arm_compute::support::cpp14::make_unique<ActivationType>();
     activation->configure(
@@ -48,18 +48,18 @@
 }
 
 template <TargetHint                    target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate(ITensor *input, ITensor *output, const ActivationLayerInfo &activation_info);
+std::unique_ptr<arm_compute::IFunction> instantiate(arm_compute::ITensor *input, arm_compute::ITensor *output, const ActivationLayerInfo &activation_info);
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(ITensor *input, ITensor *output, const ActivationLayerInfo &activation_info)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(arm_compute::ITensor *input, arm_compute::ITensor *output, const ActivationLayerInfo &activation_info)
 {
-    return instantiate_function<arm_compute::CLActivationLayer, arm_compute::CLTensor, TargetHint::OPENCL>(input, output, activation_info);
+    return instantiate_function<arm_compute::CLActivationLayer, arm_compute::ICLTensor, TargetHint::OPENCL>(input, output, activation_info);
 }
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(ITensor *input, ITensor *output, const ActivationLayerInfo &activation_info)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(arm_compute::ITensor *input, arm_compute::ITensor *output, const ActivationLayerInfo &activation_info)
 {
-    return instantiate_function<arm_compute::NEActivationLayer, arm_compute::Tensor, TargetHint::NEON>(input, output, activation_info);
+    return instantiate_function<arm_compute::NEActivationLayer, arm_compute::ITensor, TargetHint::NEON>(input, output, activation_info);
 }
 } // namespace
 
@@ -68,25 +68,29 @@
 {
 }
 
-std::unique_ptr<arm_compute::IFunction> ActivationLayer::instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> ActivationLayer::instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output)
 {
+    ARM_COMPUTE_ERROR_ON(input == nullptr || input->tensor() == nullptr);
+    ARM_COMPUTE_ERROR_ON(output == nullptr || output->tensor() == nullptr);
+
     std::unique_ptr<arm_compute::IFunction> func;
     _target_hint = ctx.hints().target_hint();
 
+    arm_compute::ITensor *in  = input->tensor();
+    arm_compute::ITensor *out = output->tensor();
+
     if(_target_hint == TargetHint::OPENCL)
     {
-        func = instantiate<TargetHint::OPENCL>(input, output, _activation_info);
-        ARM_COMPUTE_LOG("Instantiating CLActivationLayer");
+        func = instantiate<TargetHint::OPENCL>(in, out, _activation_info);
     }
     else
     {
-        func = instantiate<TargetHint::NEON>(input, output, _activation_info);
-        ARM_COMPUTE_LOG("Instantiating NEActivationLayer");
+        func = instantiate<TargetHint::NEON>(in, out, _activation_info);
     }
 
-    ARM_COMPUTE_LOG(" Data Type: " << input->info()->data_type()
-                    << " Input shape: " << input->info()->tensor_shape()
-                    << " Output shape: " << output->info()->tensor_shape()
+    ARM_COMPUTE_LOG(" Data Type: " << in->info()->data_type()
+                    << " Input shape: " << in->info()->tensor_shape()
+                    << " Output shape: " << out->info()->tensor_shape()
                     << " Activation function: " << _activation_info.activation()
                     << " a: " << _activation_info.a()
                     << " b: " << _activation_info.b()
diff --git a/src/graph/nodes/BatchNormalizationLayer.cpp b/src/graph/nodes/BatchNormalizationLayer.cpp
index a6a990f..25e9e9b 100644
--- a/src/graph/nodes/BatchNormalizationLayer.cpp
+++ b/src/graph/nodes/BatchNormalizationLayer.cpp
@@ -36,7 +36,7 @@
 namespace
 {
 template <typename BatchBatchNormalizationLayer, typename TensorType, TargetHint target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, ITensor *output, Tensor &mean, Tensor &var, Tensor &beta, Tensor &gamma, float epsilon)
+std::unique_ptr<arm_compute::IFunction> instantiate_function(arm_compute::ITensor *input, arm_compute::ITensor *output, Tensor &mean, Tensor &var, Tensor &beta, Tensor &gamma, float epsilon)
 {
     auto norm = arm_compute::support::cpp14::make_unique<BatchBatchNormalizationLayer>();
     norm->configure(
@@ -52,58 +52,65 @@
 }
 
 template <TargetHint                    target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate(ITensor *input, ITensor *output, Tensor &mean, Tensor &var, Tensor &beta, Tensor &gamma, float epsilon);
+std::unique_ptr<arm_compute::IFunction> instantiate(arm_compute::ITensor *input, arm_compute::ITensor *output, Tensor &mean, Tensor &var, Tensor &beta, Tensor &gamma, float epsilon);
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(ITensor *input, ITensor *output, Tensor &mean, Tensor &var, Tensor &beta, Tensor &gamma, float epsilon)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(arm_compute::ITensor *input, arm_compute::ITensor *output, Tensor &mean, Tensor &var, Tensor &beta, Tensor &gamma,
+                                                                        float epsilon)
 {
     return instantiate_function<arm_compute::CLBatchNormalizationLayer, arm_compute::ICLTensor, TargetHint::OPENCL>(input, output, mean, var, beta, gamma, epsilon);
 }
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(ITensor *input, ITensor *output, Tensor &mean, Tensor &var, Tensor &beta, Tensor &gamma, float epsilon)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(arm_compute::ITensor *input, arm_compute::ITensor *output, Tensor &mean, Tensor &var, Tensor &beta, Tensor &gamma, float epsilon)
 {
     return instantiate_function<arm_compute::NEBatchNormalizationLayer, arm_compute::ITensor, TargetHint::NEON>(input, output, mean, var, beta, gamma, epsilon);
 }
 } // namespace
 
-std::unique_ptr<arm_compute::IFunction> BatchNormalizationLayer::instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> BatchNormalizationLayer::instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output)
 {
+    ARM_COMPUTE_ERROR_ON(input == nullptr || input->tensor() == nullptr);
+    ARM_COMPUTE_ERROR_ON(output == nullptr || output->tensor() == nullptr);
+
     std::unique_ptr<arm_compute::IFunction> func;
     _target_hint = ctx.hints().target_hint();
 
-    unsigned int batch_norm_size = input->info()->dimension(2);
+    arm_compute::ITensor *in  = input->tensor();
+    arm_compute::ITensor *out = output->tensor();
+
+    unsigned int batch_norm_size = in->info()->dimension(2);
     if(_mean.tensor() == nullptr)
     {
-        _mean.set_info(TensorInfo(TensorShape(batch_norm_size), input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position()));
+        _mean.set_info(TensorInfo(TensorShape(batch_norm_size), in->info()->num_channels(), in->info()->data_type(), in->info()->fixed_point_position()));
     }
     if(_var.tensor() == nullptr)
     {
-        _var.set_info(TensorInfo(TensorShape(batch_norm_size), input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position()));
+        _var.set_info(TensorInfo(TensorShape(batch_norm_size), in->info()->num_channels(), in->info()->data_type(), in->info()->fixed_point_position()));
     }
     if(_beta.tensor() == nullptr)
     {
-        _beta.set_info(TensorInfo(TensorShape(batch_norm_size), input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position()));
+        _beta.set_info(TensorInfo(TensorShape(batch_norm_size), in->info()->num_channels(), in->info()->data_type(), in->info()->fixed_point_position()));
     }
     if(_gamma.tensor() == nullptr)
     {
-        _gamma.set_info(TensorInfo(TensorShape(batch_norm_size), input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position()));
+        _gamma.set_info(TensorInfo(TensorShape(batch_norm_size), in->info()->num_channels(), in->info()->data_type(), in->info()->fixed_point_position()));
     }
 
     if(_target_hint == TargetHint::OPENCL)
     {
-        func = instantiate<TargetHint::OPENCL>(input, output, _mean, _var, _beta, _gamma, _epsilon);
+        func = instantiate<TargetHint::OPENCL>(in, out, _mean, _var, _beta, _gamma, _epsilon);
         ARM_COMPUTE_LOG("Instantiating CLBatchNormalizationLayer");
     }
     else
     {
-        func = instantiate<TargetHint::NEON>(input, output, _mean, _var, _beta, _gamma, _epsilon);
+        func = instantiate<TargetHint::NEON>(in, out, _mean, _var, _beta, _gamma, _epsilon);
         ARM_COMPUTE_LOG("Instantiating NEBatchNormalizationLayer");
     }
 
-    ARM_COMPUTE_LOG(" Data Type: " << input->info()->data_type()
-                    << " Input shape: " << input->info()->tensor_shape()
-                    << " Output shape: " << output->info()->tensor_shape()
+    ARM_COMPUTE_LOG(" Data Type: " << in->info()->data_type()
+                    << " Input shape: " << in->info()->tensor_shape()
+                    << " Output shape: " << out->info()->tensor_shape()
                     << std::endl);
 
     return func;
diff --git a/src/graph/nodes/BranchLayer.cpp b/src/graph/nodes/BranchLayer.cpp
new file mode 100644
index 0000000..28f58c6
--- /dev/null
+++ b/src/graph/nodes/BranchLayer.cpp
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/nodes/BranchLayer.h"
+
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/SubGraph.h"
+#include "arm_compute/graph/Tensor.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "support/ToolchainSupport.h"
+#include "utils/TypePrinter.h"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+
+using namespace arm_compute::graph;
+
+namespace
+{
+void depth_concatenate_output_info(ITensorInfo *info, ITensorInfo *sub_tensor_info)
+{
+    ARM_COMPUTE_ERROR_ON(info == nullptr);
+    ARM_COMPUTE_ERROR_ON(sub_tensor_info == nullptr);
+
+    TensorShape        info_shape            = info->tensor_shape();
+    const TensorShape &sub_tensor_info_shape = sub_tensor_info->tensor_shape();
+
+    // Update parent info and valid region
+    if(info_shape.total_size() == 0)
+    {
+        arm_compute::auto_init_if_empty(*info,
+                                        sub_tensor_info->tensor_shape(),
+                                        sub_tensor_info->num_channels(),
+                                        sub_tensor_info->data_type(), sub_tensor_info->fixed_point_position());
+        info->set_valid_region(sub_tensor_info->valid_region());
+    }
+    else
+    {
+        ARM_COMPUTE_ERROR_ON(info->num_channels() != sub_tensor_info->num_channels());
+        ARM_COMPUTE_ERROR_ON(info->data_type() != sub_tensor_info->data_type());
+        ARM_COMPUTE_ERROR_ON(info->fixed_point_position() != sub_tensor_info->fixed_point_position());
+
+        // Concatenate depth
+        ARM_COMPUTE_ERROR_ON(info_shape.x() != sub_tensor_info_shape.x());
+        ARM_COMPUTE_ERROR_ON(info_shape.y() != sub_tensor_info_shape.y());
+        info_shape.set(2, info_shape.z() + sub_tensor_info_shape.z());
+        info->set_tensor_shape(info_shape);
+
+        // Update valid region
+        arm_compute::ValidRegion info_valid_region = info->valid_region();
+        info_valid_region.shape.set(2, info_shape.z());
+        arm_compute::ValidRegion updated_region = arm_compute::intersect_valid_regions(info_valid_region, sub_tensor_info->valid_region());
+        info->set_valid_region(updated_region);
+    }
+}
+} // namespace
+
+/** Branch function */
+class BranchFunction final : public arm_compute::IFunction
+{
+public:
+    /** Default Constructor */
+    BranchFunction()
+        : _graphs()
+    {
+    }
+    /** Registers graph to be executed by the branch function
+     *
+     * @param[in] graph Graph to register
+     */
+    void register_graph(std::unique_ptr<Graph> graph)
+    {
+        _graphs.push_back(std::move(graph));
+    }
+    // Inherited methods overriden:
+    void run() override
+    {
+        for(auto &g : _graphs)
+        {
+            ARM_COMPUTE_ERROR_ON(g.get() == nullptr);
+            g->run();
+        }
+    }
+
+private:
+    std::vector<std::unique_ptr<Graph>> _graphs;
+};
+
+std::unique_ptr<arm_compute::IFunction> BranchLayer::instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output)
+{
+    ARM_COMPUTE_ERROR_ON(_branch_merge_method != BranchMergeMethod::DEPTH_CONCATENATE);
+    ARM_COMPUTE_UNUSED(_branch_merge_method);
+    ARM_COMPUTE_ERROR_ON(input == nullptr || input->tensor() == nullptr);
+    ARM_COMPUTE_ERROR_ON(output == nullptr || output->tensor() == nullptr);
+
+    // Create branch function
+    auto func = arm_compute::support::cpp14::make_unique<BranchFunction>();
+
+    // Track output SubTensorInfo and depth
+    TensorInfo out_info;
+    int        depth = 0;
+
+    // Constuct all sub-graphs given the input/output
+    for(auto &sg : _sub_graphs)
+    {
+        ARM_COMPUTE_ERROR_ON(sg.get() == nullptr);
+
+        // IO buffers
+        std::unique_ptr<ITensorObject> in;
+        std::unique_ptr<ITensorObject> out;
+        SubTensor                     *out_sub_tensor = nullptr;
+
+        // Create input sub-tensor
+        if(!sg->has_input())
+        {
+            ARM_COMPUTE_ERROR_ON(dynamic_cast<Tensor *>(input) == nullptr);
+            in = arm_compute::support::cpp14::make_unique<SubTensor>(*dynamic_cast<Tensor *>(input),
+                                                                     input->tensor()->info()->tensor_shape(),
+                                                                     Coordinates());
+        }
+
+        // Create output sub-tensor
+        if(!sg->has_output())
+        {
+            ARM_COMPUTE_ERROR_ON(dynamic_cast<Tensor *>(output) == nullptr);
+            out = arm_compute::support::cpp14::make_unique<SubTensor>(*dynamic_cast<Tensor *>(output),
+                                                                      output->tensor()->info()->tensor_shape(),
+                                                                      Coordinates(0, 0, depth));
+            out_sub_tensor = dynamic_cast<SubTensor *>(out.get());
+        }
+
+        // Construct sub_graph
+        auto g = sg->construct(ctx.hints().target_hint(), std::move(in), std::move(out));
+
+        // Register graph to function
+        func->register_graph(std::move(g));
+
+        // Update and track depth
+        if(out_sub_tensor != nullptr)
+        {
+            ARM_COMPUTE_ERROR_ON(out_sub_tensor->tensor() == nullptr);
+            depth += out_sub_tensor->tensor()->info()->tensor_shape()[2];
+            depth_concatenate_output_info(&out_info, out_sub_tensor->tensor()->info());
+        }
+    }
+
+    // Auto-init output
+    arm_compute::auto_init_if_empty(*output->tensor()->info(),
+                                    out_info.tensor_shape(),
+                                    out_info.num_channels(),
+                                    out_info.data_type(),
+                                    out_info.fixed_point_position());
+
+    return std::move(func);
+}
\ No newline at end of file
diff --git a/src/graph/nodes/ConvolutionLayer.cpp b/src/graph/nodes/ConvolutionLayer.cpp
index b47be8d..303780f 100644
--- a/src/graph/nodes/ConvolutionLayer.cpp
+++ b/src/graph/nodes/ConvolutionLayer.cpp
@@ -67,7 +67,8 @@
 
 // Instantiate GEMM based convolution layer
 template <typename ConvolutionType, typename TensorType, TargetHint target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, ITensor *weights, ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info)
+std::unique_ptr<arm_compute::IFunction> instantiate_function(arm_compute::ITensor *input, arm_compute::ITensor *weights, arm_compute::ITensor *biases, arm_compute::ITensor *output,
+                                                             const PadStrideInfo &conv_info, const WeightsInfo &weights_info)
 {
     auto conv = arm_compute::support::cpp14::make_unique<ConvolutionType>();
     conv->configure(
@@ -81,7 +82,8 @@
 
 // Instantiate direct convolution layer
 template <typename ConvolutionType, typename TensorType, TargetHint target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate_direct_function(ITensor *input, ITensor *weights, ITensor *biases, ITensor *output, const PadStrideInfo &conv_info)
+std::unique_ptr<arm_compute::IFunction> instantiate_direct_function(arm_compute::ITensor *input, arm_compute::ITensor *weights, arm_compute::ITensor *biases, arm_compute::ITensor *output,
+                                                                    const PadStrideInfo &conv_info)
 {
     auto conv = arm_compute::support::cpp14::make_unique<ConvolutionType>();
     conv->configure(
@@ -94,11 +96,13 @@
 }
 
 template <TargetHint                    target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate(ITensor *input, ITensor *weights, ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
+std::unique_ptr<arm_compute::IFunction> instantiate(arm_compute::ITensor *input, arm_compute::ITensor *weights, arm_compute::ITensor *biases, arm_compute::ITensor *output,
+                                                    const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
                                                     ConvolutionMethodHint conv_method);
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(ITensor *input, ITensor *weights, ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(arm_compute::ITensor *input, arm_compute::ITensor *weights, arm_compute::ITensor *biases, arm_compute::ITensor *output,
+                                                                        const PadStrideInfo &conv_info,
                                                                         const WeightsInfo    &weights_info,
                                                                         ConvolutionMethodHint conv_method)
 {
@@ -113,7 +117,8 @@
 }
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(ITensor *input, ITensor *weights, ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(arm_compute::ITensor *input, arm_compute::ITensor *weights, arm_compute::ITensor *biases, arm_compute::ITensor *output,
+                                                                      const PadStrideInfo &conv_info,
                                                                       const WeightsInfo    &weights_info,
                                                                       ConvolutionMethodHint conv_method)
 {
@@ -169,18 +174,25 @@
     std::vector<std::unique_ptr<IFunction>> _convolutions;
 };
 
-std::unique_ptr<arm_compute::IFunction> ConvolutionLayer::instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> ConvolutionLayer::instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output)
 {
+    ARM_COMPUTE_ERROR_ON(input == nullptr || input->tensor() == nullptr);
+    ARM_COMPUTE_ERROR_ON(output == nullptr || output->tensor() == nullptr);
+
+    arm_compute::ITensor *in  = input->tensor();
+    arm_compute::ITensor *out = output->tensor();
+
     // Set weights and biases info
     if(_weights.tensor() == nullptr)
     {
-        _weights.set_info(TensorInfo(TensorShape(_conv_width, _conv_height, input->info()->dimension(2) / _num_groups, _ofm),
-                                     input->info()->num_channels(), input->info()->data_type(),
-                                     input->info()->fixed_point_position()));
+        _weights.set_info(TensorInfo(TensorShape(_conv_width, _conv_height, in->info()->dimension(2) / _num_groups, _ofm),
+                                     in->info()->num_channels(),
+                                     in->info()->data_type(),
+                                     in->info()->fixed_point_position()));
     }
     if(_biases.tensor() == nullptr)
     {
-        _biases.set_info(TensorInfo(TensorShape(_ofm), input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position()));
+        _biases.set_info(TensorInfo(TensorShape(_ofm), in->info()->num_channels(), in->info()->data_type(), in->info()->fixed_point_position()));
     }
 
     std::unique_ptr<arm_compute::IFunction> func;
@@ -196,20 +208,20 @@
     _biases.set_target(_target_hint);
 
     // Calculate output shape
-    TensorShape output_shape = calculate_convolution_layer_output_shape(input->info()->tensor_shape(), _weights.info().tensor_shape(), _conv_info);
+    TensorShape output_shape = calculate_convolution_layer_output_shape(in->info()->tensor_shape(), _weights.info().tensor_shape(), _conv_info);
 
     // Output auto inizialitation if not yet initialized
-    arm_compute::auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
+    arm_compute::auto_init_if_empty(*out->info(), output_shape, 1, in->info()->data_type(), in->info()->fixed_point_position());
 
     // Create appropriate convolution function
     if(_num_groups == 1)
     {
-        func = instantiate_convolution(input, output, conv_method_hint);
+        func = instantiate_convolution(in, out, conv_method_hint);
         ARM_COMPUTE_LOG("Instantiating CLConvolutionLayer");
     }
     else
     {
-        func = instantiate_grouped_convolution(input, output, conv_method_hint);
+        func = instantiate_grouped_convolution(in, out, conv_method_hint);
         ARM_COMPUTE_LOG("Instantiating NEConvolutionLayer");
     }
 
@@ -224,11 +236,11 @@
         _biases.allocate_and_fill_if_needed();
     }
 
-    ARM_COMPUTE_LOG(" Data Type: " << input->info()->data_type()
-                    << " Input Shape: " << input->info()->tensor_shape()
+    ARM_COMPUTE_LOG(" Data Type: " << in->info()->data_type()
+                    << " Input Shape: " << in->info()->tensor_shape()
                     << " Weights shape: " << _weights.info().tensor_shape()
                     << " Biases Shape: " << _biases.info().tensor_shape()
-                    << " Output Shape: " << output->info()->tensor_shape()
+                    << " Output Shape: " << out->info()->tensor_shape()
                     << " PadStrideInfo: " << _conv_info
                     << " Groups: " << _num_groups
                     << " WeightsInfo: " << _weights_info
diff --git a/src/graph/nodes/DepthConcatenateLayer.cpp b/src/graph/nodes/DepthConcatenateLayer.cpp
new file mode 100644
index 0000000..2171db3
--- /dev/null
+++ b/src/graph/nodes/DepthConcatenateLayer.cpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <algorithm>
+#include <vector>
+
+#include "arm_compute/graph/nodes/DepthConcatenateLayer.h"
+
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLDepthConcatenate.h"
+#include "arm_compute/runtime/NEON/functions/NEDepthConcatenate.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "support/ToolchainSupport.h"
+#include "utils/TypePrinter.h"
+
+using namespace arm_compute::graph;
+
+namespace
+{
+template <typename DepthConcatenationType, typename TensorType, TargetHint hint>
+std::unique_ptr<arm_compute::IFunction> instantiate_function(std::vector<arm_compute::ITensor *> inputs, arm_compute::ITensor *output)
+{
+    auto                      depth_concat = arm_compute::support::cpp14::make_unique<DepthConcatenationType>();
+    std::vector<TensorType *> casted_inputs;
+    std::transform(inputs.begin(), inputs.end(), std::back_inserter(casted_inputs), [](arm_compute::ITensor * input)
+    {
+        return dynamic_cast<TensorType *>(input);
+    });
+    depth_concat->configure(
+        casted_inputs,
+        dynamic_cast<TensorType *>(output));
+
+    return std::move(depth_concat);
+}
+
+template <TargetHint                    hint>
+std::unique_ptr<arm_compute::IFunction> instantiate(std::vector<arm_compute::ITensor *> inputs, arm_compute::ITensor *output);
+
+template <>
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(std::vector<arm_compute::ITensor *> inputs, arm_compute::ITensor *output)
+{
+    return instantiate_function<arm_compute::CLDepthConcatenate, arm_compute::ICLTensor, TargetHint::OPENCL>(std::move(inputs), output);
+}
+
+template <>
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(std::vector<arm_compute::ITensor *> inputs, arm_compute::ITensor *output)
+{
+    return instantiate_function<arm_compute::NEDepthConcatenate, arm_compute::ITensor, TargetHint::NEON>(std::move(inputs), output);
+}
+} // namespace
+
+std::unique_ptr<arm_compute::IFunction> DepthConcatenateLayer::instantiate_node(GraphContext &ctx, std::vector<arm_compute::ITensor *> inputs, arm_compute::ITensor *output)
+{
+    std::unique_ptr<arm_compute::IFunction> func;
+    _hint   = ctx.hints().target_hint();
+    _inputs = std::move(inputs);
+    _output = output;
+
+    if(_hint == TargetHint::OPENCL)
+    {
+        func = instantiate<TargetHint::OPENCL>(_inputs, _output);
+    }
+    else
+    {
+        func = instantiate<TargetHint::NEON>(_inputs, _output);
+    }
+    return func;
+}
+
+void DepthConcatenateLayer::print_info()
+{
+    if(_hint == TargetHint::OPENCL)
+    {
+        std::cout << "Instantiating NEDepthConcatenate";
+    }
+    else
+    {
+        std::cout << "Instantiating CLDepthConcatenate";
+    }
+
+    for(const auto &i : _inputs)
+    {
+        std::cout << " Input: " << i->info()->tensor_shape();
+    }
+    std::cout << " Output: " << _output->info()->tensor_shape();
+}
diff --git a/src/graph/nodes/FloorLayer.cpp b/src/graph/nodes/FloorLayer.cpp
index 722cfdf..3224799 100644
--- a/src/graph/nodes/FloorLayer.cpp
+++ b/src/graph/nodes/FloorLayer.cpp
@@ -36,7 +36,7 @@
 namespace
 {
 template <typename FloorType, typename TensorType, TargetHint hint>
-std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> instantiate_function(arm_compute::ITensor *input, arm_compute::ITensor *output)
 {
     auto floorlayer = arm_compute::support::cpp14::make_unique<FloorType>();
     floorlayer->configure(
@@ -47,40 +47,46 @@
 }
 
 template <TargetHint                    target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate(ITensor *input, ITensor *output);
+std::unique_ptr<arm_compute::IFunction> instantiate(arm_compute::ITensor *input, arm_compute::ITensor *output);
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(arm_compute::ITensor *input, arm_compute::ITensor *output)
 {
     return instantiate_function<arm_compute::CLFloor, arm_compute::ICLTensor, TargetHint::OPENCL>(input, output);
 }
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(arm_compute::ITensor *input, arm_compute::ITensor *output)
 {
     return instantiate_function<arm_compute::NEFloor, arm_compute::ITensor, TargetHint::NEON>(input, output);
 }
 } // namespace
 
-std::unique_ptr<arm_compute::IFunction> FloorLayer::instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> FloorLayer::instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output)
 {
+    ARM_COMPUTE_ERROR_ON(input == nullptr || input->tensor() == nullptr);
+    ARM_COMPUTE_ERROR_ON(output == nullptr || output->tensor() == nullptr);
+
     std::unique_ptr<arm_compute::IFunction> func;
     _target_hint = ctx.hints().target_hint();
 
+    arm_compute::ITensor *in  = input->tensor();
+    arm_compute::ITensor *out = output->tensor();
+
     if(_target_hint == TargetHint::OPENCL)
     {
-        func = instantiate<TargetHint::OPENCL>(input, output);
+        func = instantiate<TargetHint::OPENCL>(in, out);
         ARM_COMPUTE_LOG("Instantiating CLFloorLayer");
     }
     else
     {
-        func = instantiate<TargetHint::NEON>(input, output);
+        func = instantiate<TargetHint::NEON>(in, out);
         ARM_COMPUTE_LOG("Instantiating NEFloorLayer");
     }
 
-    ARM_COMPUTE_LOG(" Data Type: " << input->info()->data_type()
-                    << " Input shape: " << input->info()->tensor_shape()
-                    << " Output shape: " << output->info()->tensor_shape()
+    ARM_COMPUTE_LOG(" Data Type: " << in->info()->data_type()
+                    << " Input shape: " << in->info()->tensor_shape()
+                    << " Output shape: " << out->info()->tensor_shape()
                     << std::endl);
 
     return func;
diff --git a/src/graph/nodes/FullyConnectedLayer.cpp b/src/graph/nodes/FullyConnectedLayer.cpp
index 6b21810..fa5ead8 100644
--- a/src/graph/nodes/FullyConnectedLayer.cpp
+++ b/src/graph/nodes/FullyConnectedLayer.cpp
@@ -45,7 +45,7 @@
     return TensorShape(output_neurons, batches);
 }
 template <typename FullyConnectedType, typename TensorType, TargetHint target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, Tensor &weights, Tensor &biases, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> instantiate_function(arm_compute::ITensor *input, Tensor &weights, Tensor &biases, arm_compute::ITensor *output)
 {
     bool weights_are_loaded = weights.tensor() != nullptr;
     bool biases_are_loaded  = biases.tensor() != nullptr;
@@ -69,27 +69,33 @@
 }
 
 template <TargetHint                    target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate(ITensor *input, Tensor &weights, Tensor &biases, ITensor *output);
+std::unique_ptr<arm_compute::IFunction> instantiate(arm_compute::ITensor *input, Tensor &weights, Tensor &biases, arm_compute::ITensor *output);
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(ITensor *input, Tensor &weights, Tensor &biases, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(arm_compute::ITensor *input, Tensor &weights, Tensor &biases, arm_compute::ITensor *output)
 {
-    return instantiate_function<arm_compute::CLFullyConnectedLayer, arm_compute::CLTensor, TargetHint::OPENCL>(input, weights, biases, output);
+    return instantiate_function<arm_compute::CLFullyConnectedLayer, arm_compute::ICLTensor, TargetHint::OPENCL>(input, weights, biases, output);
 }
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(ITensor *input, Tensor &weights, Tensor &biases, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(arm_compute::ITensor *input, Tensor &weights, Tensor &biases, arm_compute::ITensor *output)
 {
-    return instantiate_function<arm_compute::NEFullyConnectedLayer, arm_compute::Tensor, TargetHint::NEON>(input, weights, biases, output);
+    return instantiate_function<arm_compute::NEFullyConnectedLayer, arm_compute::ITensor, TargetHint::NEON>(input, weights, biases, output);
 }
 } // namespace
 
-std::unique_ptr<arm_compute::IFunction> FullyConnectedLayer::instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> FullyConnectedLayer::instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output)
 {
+    ARM_COMPUTE_ERROR_ON(input == nullptr || input->tensor() == nullptr);
+    ARM_COMPUTE_ERROR_ON(output == nullptr || output->tensor() == nullptr);
+
+    arm_compute::ITensor *in  = input->tensor();
+    arm_compute::ITensor *out = output->tensor();
+
     if(_weights.tensor() == nullptr)
     {
         unsigned int num_weights    = 1;
-        unsigned int num_dimensions = input->info()->num_dimensions();
+        unsigned int num_dimensions = in->info()->num_dimensions();
         // Ignore the batch dimension if there is one:
         if(num_dimensions == 2 || num_dimensions == 4)
         {
@@ -97,39 +103,37 @@
         }
         for(unsigned int i = 0; i < num_dimensions; i++)
         {
-            num_weights *= input->info()->dimension(i);
+            num_weights *= in->info()->dimension(i);
         }
-        _weights.set_info(TensorInfo(TensorShape(num_weights, _num_neurons), input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position()));
+        _weights.set_info(TensorInfo(TensorShape(num_weights, _num_neurons), in->info()->num_channels(), in->info()->data_type(), in->info()->fixed_point_position()));
     }
     if(_biases.tensor() == nullptr)
     {
-        _biases.set_info(TensorInfo(TensorShape(_num_neurons), input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position()));
+        _biases.set_info(TensorInfo(TensorShape(_num_neurons), in->info()->num_channels(), in->info()->data_type(), in->info()->fixed_point_position()));
     }
 
     // Auto configure output
-    arm_compute::auto_init_if_empty(*output->info(),
-                                    calculate_fullyconnected_layer_output_shape(input->info()->tensor_shape(), _num_neurons),
-                                    input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position());
+    arm_compute::auto_init_if_empty(*out->info(),
+                                    calculate_fullyconnected_layer_output_shape(in->info()->tensor_shape(), _num_neurons),
+                                    in->info()->num_channels(), in->info()->data_type(), in->info()->fixed_point_position());
 
     std::unique_ptr<arm_compute::IFunction> func;
     _target_hint = ctx.hints().target_hint();
 
     if(_target_hint == TargetHint::OPENCL)
     {
-        func = instantiate<TargetHint::OPENCL>(input, _weights, _biases, output);
-        ARM_COMPUTE_LOG("Instantiating CLFullyConnectedLayer");
+        func = instantiate<TargetHint::OPENCL>(in, _weights, _biases, out);
     }
     else
     {
-        func = instantiate<TargetHint::NEON>(input, _weights, _biases, output);
-        ARM_COMPUTE_LOG("Instantiating NEFullyConnectedLayer");
+        func = instantiate<TargetHint::NEON>(in, _weights, _biases, out);
     }
 
-    ARM_COMPUTE_LOG(" Type: " << input->info()->data_type()
-                    << " Input Shape: " << input->info()->tensor_shape()
+    ARM_COMPUTE_LOG(" Type: " << in->info()->data_type()
+                    << " Input Shape: " << in->info()->tensor_shape()
                     << " Weights shape: " << _weights.info().tensor_shape()
                     << " Biases Shape: " << _biases.info().tensor_shape()
-                    << " Output Shape: " << output->info()->tensor_shape()
+                    << " Output Shape: " << out->info()->tensor_shape()
                     << std::endl);
 
     return func;
diff --git a/src/graph/nodes/L2NormalizeLayer.cpp b/src/graph/nodes/L2NormalizeLayer.cpp
index 46d1552..7abc69c 100644
--- a/src/graph/nodes/L2NormalizeLayer.cpp
+++ b/src/graph/nodes/L2NormalizeLayer.cpp
@@ -36,7 +36,7 @@
 namespace
 {
 template <typename L2NormalizeType, typename TensorType, TargetHint hint>
-std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, ITensor *output, unsigned int axis, float epsilon)
+std::unique_ptr<arm_compute::IFunction> instantiate_function(arm_compute::ITensor *input, arm_compute::ITensor *output, unsigned int axis, float epsilon)
 {
     auto l2norm = arm_compute::support::cpp14::make_unique<L2NormalizeType>();
     l2norm->configure(
@@ -49,40 +49,46 @@
 }
 
 template <TargetHint                    target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate(ITensor *input, ITensor *output, unsigned int axis, float epsilon);
+std::unique_ptr<arm_compute::IFunction> instantiate(arm_compute::ITensor *input, arm_compute::ITensor *output, unsigned int axis, float epsilon);
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(ITensor *input, ITensor *output, unsigned int axis, float epsilon)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(arm_compute::ITensor *input, arm_compute::ITensor *output, unsigned int axis, float epsilon)
 {
     return instantiate_function<arm_compute::CLL2Normalize, arm_compute::ICLTensor, TargetHint::OPENCL>(input, output, axis, epsilon);
 }
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(ITensor *input, ITensor *output, unsigned int axis, float epsilon)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(arm_compute::ITensor *input, arm_compute::ITensor *output, unsigned int axis, float epsilon)
 {
     return instantiate_function<arm_compute::NEL2Normalize, arm_compute::ITensor, TargetHint::NEON>(input, output, axis, epsilon);
 }
 } // namespace
 
-std::unique_ptr<arm_compute::IFunction> L2NormalizeLayer::instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> L2NormalizeLayer::instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output)
 {
+    ARM_COMPUTE_ERROR_ON(input == nullptr || input->tensor() == nullptr);
+    ARM_COMPUTE_ERROR_ON(output == nullptr || output->tensor() == nullptr);
+
     std::unique_ptr<arm_compute::IFunction> func;
     _target_hint = ctx.hints().target_hint();
 
+    arm_compute::ITensor *in  = input->tensor();
+    arm_compute::ITensor *out = output->tensor();
+
     if(_target_hint == TargetHint::OPENCL)
     {
-        func = instantiate<TargetHint::OPENCL>(input, output, _axis, _epsilon);
+        func = instantiate<TargetHint::OPENCL>(in, out, _axis, _epsilon);
         ARM_COMPUTE_LOG("Instantiating CLL2NormalizeLayer");
     }
     else
     {
-        func = instantiate<TargetHint::NEON>(input, output, _axis, _epsilon);
+        func = instantiate<TargetHint::NEON>(in, out, _axis, _epsilon);
         ARM_COMPUTE_LOG("Instantiating NEL2NormalizeLayer");
     }
 
-    ARM_COMPUTE_LOG(" Data Type: " << input->info()->data_type()
-                    << " Input shape: " << input->info()->tensor_shape()
-                    << " Output shape: " << output->info()->tensor_shape()
+    ARM_COMPUTE_LOG(" Data Type: " << in->info()->data_type()
+                    << " Input shape: " << in->info()->tensor_shape()
+                    << " Output shape: " << out->info()->tensor_shape()
                     << std::endl);
 
     return func;
diff --git a/src/graph/nodes/NormalizationLayer.cpp b/src/graph/nodes/NormalizationLayer.cpp
index 47f0891..319a425 100644
--- a/src/graph/nodes/NormalizationLayer.cpp
+++ b/src/graph/nodes/NormalizationLayer.cpp
@@ -36,7 +36,7 @@
 namespace
 {
 template <typename NormalizationType, typename TensorType, TargetHint target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, ITensor *output, const NormalizationLayerInfo &norm_info)
+std::unique_ptr<arm_compute::IFunction> instantiate_function(arm_compute::ITensor *input, arm_compute::ITensor *output, const NormalizationLayerInfo &norm_info)
 {
     auto norm = arm_compute::support::cpp14::make_unique<NormalizationType>();
     norm->configure(
@@ -48,18 +48,18 @@
 }
 
 template <TargetHint                    target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate(ITensor *input, ITensor *output, const NormalizationLayerInfo &norm_info);
+std::unique_ptr<arm_compute::IFunction> instantiate(arm_compute::ITensor *input, arm_compute::ITensor *output, const NormalizationLayerInfo &norm_info);
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(ITensor *input, ITensor *output, const NormalizationLayerInfo &norm_info)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(arm_compute::ITensor *input, arm_compute::ITensor *output, const NormalizationLayerInfo &norm_info)
 {
-    return instantiate_function<arm_compute::CLNormalizationLayer, arm_compute::CLTensor, TargetHint::OPENCL>(input, output, norm_info);
+    return instantiate_function<arm_compute::CLNormalizationLayer, arm_compute::ICLTensor, TargetHint::OPENCL>(input, output, norm_info);
 }
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(ITensor *input, ITensor *output, const NormalizationLayerInfo &norm_info)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(arm_compute::ITensor *input, arm_compute::ITensor *output, const NormalizationLayerInfo &norm_info)
 {
-    return instantiate_function<arm_compute::NENormalizationLayer, arm_compute::Tensor, TargetHint::NEON>(input, output, norm_info);
+    return instantiate_function<arm_compute::NENormalizationLayer, arm_compute::ITensor, TargetHint::NEON>(input, output, norm_info);
 }
 } // namespace
 
@@ -68,25 +68,29 @@
 {
 }
 
-std::unique_ptr<arm_compute::IFunction> NormalizationLayer::instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> NormalizationLayer::instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output)
 {
+    ARM_COMPUTE_ERROR_ON(input == nullptr || input->tensor() == nullptr);
+    ARM_COMPUTE_ERROR_ON(output == nullptr || output->tensor() == nullptr);
+
     std::unique_ptr<arm_compute::IFunction> func;
     _target_hint = ctx.hints().target_hint();
 
+    arm_compute::ITensor *in  = input->tensor();
+    arm_compute::ITensor *out = output->tensor();
+
     if(_target_hint == TargetHint::OPENCL)
     {
-        func = instantiate<TargetHint::OPENCL>(input, output, _norm_info);
-        ARM_COMPUTE_LOG("Instantiating CLNormalizationLayer");
+        func = instantiate<TargetHint::OPENCL>(in, out, _norm_info);
     }
     else
     {
-        func = instantiate<TargetHint::NEON>(input, output, _norm_info);
-        ARM_COMPUTE_LOG("Instantiating NENormalizationLayer");
+        func = instantiate<TargetHint::NEON>(in, out, _norm_info);
     }
 
-    ARM_COMPUTE_LOG(" Data Type: " << input->info()->data_type()
-                    << " Input shape: " << input->info()->tensor_shape()
-                    << " Output shape: " << output->info()->tensor_shape()
+    ARM_COMPUTE_LOG(" Data Type: " << in->info()->data_type()
+                    << " Input shape: " << in->info()->tensor_shape()
+                    << " Output shape: " << out->info()->tensor_shape()
                     << " Normalization info: " << _norm_info
                     << std::endl);
 
diff --git a/src/graph/nodes/PoolingLayer.cpp b/src/graph/nodes/PoolingLayer.cpp
index 317cf4d..904ba18 100644
--- a/src/graph/nodes/PoolingLayer.cpp
+++ b/src/graph/nodes/PoolingLayer.cpp
@@ -36,7 +36,7 @@
 namespace
 {
 template <typename PoolingType, typename TensorType, TargetHint target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info)
+std::unique_ptr<arm_compute::IFunction> instantiate_function(arm_compute::ITensor *input, arm_compute::ITensor *output, const PoolingLayerInfo &pool_info)
 {
     auto pool = arm_compute::support::cpp14::make_unique<PoolingType>();
     pool->configure(
@@ -48,18 +48,18 @@
 }
 
 template <TargetHint                    target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate(ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info);
+std::unique_ptr<arm_compute::IFunction> instantiate(arm_compute::ITensor *input, arm_compute::ITensor *output, const PoolingLayerInfo &pool_info);
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(arm_compute::ITensor *input, arm_compute::ITensor *output, const PoolingLayerInfo &pool_info)
 {
-    return instantiate_function<arm_compute::CLPoolingLayer, arm_compute::CLTensor, TargetHint::OPENCL>(input, output, pool_info);
+    return instantiate_function<arm_compute::CLPoolingLayer, arm_compute::ICLTensor, TargetHint::OPENCL>(input, output, pool_info);
 }
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(arm_compute::ITensor *input, arm_compute::ITensor *output, const PoolingLayerInfo &pool_info)
 {
-    return instantiate_function<arm_compute::NEPoolingLayer, arm_compute::Tensor, TargetHint::NEON>(input, output, pool_info);
+    return instantiate_function<arm_compute::NEPoolingLayer, arm_compute::ITensor, TargetHint::NEON>(input, output, pool_info);
 }
 } // namespace
 
@@ -68,25 +68,29 @@
 {
 }
 
-std::unique_ptr<arm_compute::IFunction> PoolingLayer::instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> PoolingLayer::instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output)
 {
+    ARM_COMPUTE_ERROR_ON(input == nullptr || input->tensor() == nullptr);
+    ARM_COMPUTE_ERROR_ON(output == nullptr || output->tensor() == nullptr);
+
     std::unique_ptr<arm_compute::IFunction> func;
     _target_hint = ctx.hints().target_hint();
 
+    arm_compute::ITensor *in  = input->tensor();
+    arm_compute::ITensor *out = output->tensor();
+
     if(_target_hint == TargetHint::OPENCL)
     {
-        func = instantiate<TargetHint::OPENCL>(input, output, _pool_info);
-        ARM_COMPUTE_LOG("Instantiating CLPoolingLayer");
+        func = instantiate<TargetHint::OPENCL>(in, out, _pool_info);
     }
     else
     {
-        func = instantiate<TargetHint::NEON>(input, output, _pool_info);
-        ARM_COMPUTE_LOG("Instantiating NEPoolingLayer");
+        func = instantiate<TargetHint::NEON>(in, out, _pool_info);
     }
 
-    ARM_COMPUTE_LOG(" Data Type: " << input->info()->data_type()
-                    << " Input shape: " << input->info()->tensor_shape()
-                    << " Output shape: " << output->info()->tensor_shape()
+    ARM_COMPUTE_LOG(" Data Type: " << in->info()->data_type()
+                    << " Input shape: " << in->info()->tensor_shape()
+                    << " Output shape: " << out->info()->tensor_shape()
                     << " Pooling info: " << _pool_info << std::endl);
 
     return func;
diff --git a/src/graph/nodes/SoftmaxLayer.cpp b/src/graph/nodes/SoftmaxLayer.cpp
index 8628244..e3345f1 100644
--- a/src/graph/nodes/SoftmaxLayer.cpp
+++ b/src/graph/nodes/SoftmaxLayer.cpp
@@ -36,7 +36,7 @@
 namespace
 {
 template <typename SoftmaxType, typename TensorType, TargetHint hint>
-std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> instantiate_function(arm_compute::ITensor *input, arm_compute::ITensor *output)
 {
     auto softmax = arm_compute::support::cpp14::make_unique<SoftmaxType>();
     softmax->configure(
@@ -47,40 +47,44 @@
 }
 
 template <TargetHint                    target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate(ITensor *input, ITensor *output);
+std::unique_ptr<arm_compute::IFunction> instantiate(arm_compute::ITensor *input, arm_compute::ITensor *output);
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(arm_compute::ITensor *input, arm_compute::ITensor *output)
 {
-    return instantiate_function<arm_compute::CLSoftmaxLayer, arm_compute::CLTensor, TargetHint::OPENCL>(input, output);
+    return instantiate_function<arm_compute::CLSoftmaxLayer, arm_compute::ICLTensor, TargetHint::OPENCL>(input, output);
 }
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(arm_compute::ITensor *input, arm_compute::ITensor *output)
 {
-    return instantiate_function<arm_compute::NESoftmaxLayer, arm_compute::Tensor, TargetHint::NEON>(input, output);
+    return instantiate_function<arm_compute::NESoftmaxLayer, arm_compute::ITensor, TargetHint::NEON>(input, output);
 }
 } // namespace
 
-std::unique_ptr<arm_compute::IFunction> SoftmaxLayer::instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> SoftmaxLayer::instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output)
 {
+    ARM_COMPUTE_ERROR_ON(input == nullptr || input->tensor() == nullptr);
+    ARM_COMPUTE_ERROR_ON(output == nullptr || output->tensor() == nullptr);
+
     std::unique_ptr<arm_compute::IFunction> func;
     _target_hint = ctx.hints().target_hint();
 
+    arm_compute::ITensor *in  = input->tensor();
+    arm_compute::ITensor *out = output->tensor();
+
     if(_target_hint == TargetHint::OPENCL)
     {
-        func = instantiate<TargetHint::OPENCL>(input, output);
-        ARM_COMPUTE_LOG("Instantiating CLSoftmaxLayer");
+        func = instantiate<TargetHint::OPENCL>(in, out);
     }
     else
     {
-        func = instantiate<TargetHint::NEON>(input, output);
-        ARM_COMPUTE_LOG("Instantiating NESoftmaxLayer");
+        func = instantiate<TargetHint::NEON>(in, out);
     }
 
-    ARM_COMPUTE_LOG(" Data Type: " << input->info()->data_type()
-                    << " Input shape: " << input->info()->tensor_shape()
-                    << " Output shape: " << output->info()->tensor_shape()
+    ARM_COMPUTE_LOG(" Data Type: " << in->info()->data_type()
+                    << " Input shape: " << in->info()->tensor_shape()
+                    << " Output shape: " << out->info()->tensor_shape()
                     << std::endl);
 
     return func;