diff --git a/src/core/CL/cl_kernels/batchnormalization_layer.cl b/src/core/CL/cl_kernels/batchnormalization_layer.cl
index f7aa5eb..fbffefb 100644
--- a/src/core/CL/cl_kernels/batchnormalization_layer.cl
+++ b/src/core/CL/cl_kernels/batchnormalization_layer.cl
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -123,7 +123,7 @@
     numerator = SUB_OP(data, numerator);
     x_bar     = MUL_OP(numerator, denominator);
 
-    gamma_vec = *((__global DATA_TYPE *)(gamma.ptr + current_slice * beta.stride_x));
+    gamma_vec = *((__global DATA_TYPE *)(gamma.ptr + current_slice * gamma.stride_x));
     beta_vec  = *((__global DATA_TYPE *)(beta.ptr + current_slice * beta.stride_x));
 
     VSTORE(VEC_SIZE)
diff --git a/src/core/SubTensorInfo.cpp b/src/core/SubTensorInfo.cpp
index 7a4886f..0150a95 100644
--- a/src/core/SubTensorInfo.cpp
+++ b/src/core/SubTensorInfo.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,17 +30,49 @@
 
 using namespace arm_compute;
 
+namespace
+{
+/** Extends parent shape depending on subtensor's coordinates and shape
+ *
+ * @param parent_shape Parent shape
+ * @param shape        Subtensor shape
+ * @param coords       Subtensor coordinates inside parent tensor
+ *
+ * @return Extended parent shape
+ */
+TensorShape extend_parent_shape(TensorShape parent_shape, TensorShape shape, Coordinates coords)
+{
+    // Subtensor should not index in x, y dimensions.
+    ARM_COMPUTE_ERROR_ON((coords.x() != 0) || (coords.y() != 0));
+
+    // Cannot extend on x, y ?
+    ARM_COMPUTE_ERROR_ON((parent_shape.total_size() != 0) && (parent_shape.x() != shape.x()) && (parent_shape.y() != shape.y()));
+
+    // Extend shape
+    for(unsigned int i = 0; i < TensorShape::num_max_dimensions; ++i)
+    {
+        int dimension_extend = coords[i] + static_cast<int>(shape[i]);
+        if((dimension_extend > static_cast<int>(parent_shape[i])) && (dimension_extend > 0))
+        {
+            parent_shape.set(i, static_cast<size_t>(dimension_extend));
+        }
+    }
+
+    return parent_shape;
+}
+} // namespace
+
 SubTensorInfo::SubTensorInfo()
-    : _parent(nullptr), _tensor_shape(), _coords(), _valid_region{ Coordinates(), _tensor_shape }
+    : _parent(nullptr), _tensor_shape(), _coords(), _valid_region{ Coordinates(), _tensor_shape }, _extend_parent(false)
 {
 }
 
-SubTensorInfo::SubTensorInfo(ITensorInfo *parent, TensorShape tensor_shape, Coordinates coords)
-    : _parent(parent), _tensor_shape(tensor_shape), _coords(coords), _valid_region{ Coordinates(), _tensor_shape }
+SubTensorInfo::SubTensorInfo(ITensorInfo *parent, TensorShape tensor_shape, Coordinates coords, bool extend_parent)
+    : _parent(parent), _tensor_shape(tensor_shape), _coords(coords), _valid_region{ Coordinates(), _tensor_shape }, _extend_parent(extend_parent)
 {
     ARM_COMPUTE_ERROR_ON(parent == nullptr);
     // Check if subtensor is valid if parent is configured
-    if(parent->tensor_shape().total_size() != 0)
+    if(parent->tensor_shape().total_size() != 0 && !_extend_parent)
     {
         ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR(parent->tensor_shape(), coords, tensor_shape);
     }
@@ -63,11 +95,19 @@
 ITensorInfo &SubTensorInfo::set_tensor_shape(TensorShape shape)
 {
     ARM_COMPUTE_ERROR_ON(_parent == nullptr);
+
     // Check if subtensor is valid if parent is configured
-    if(_parent->tensor_shape().total_size() != 0)
+    if(_parent->tensor_shape().total_size() != 0 && !_extend_parent)
     {
         ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR(_parent->tensor_shape(), _coords, shape);
     }
+    else if(_extend_parent) // Extend parent shape, configure if specified
+    {
+        ARM_COMPUTE_ERROR_ON((_parent->data_type() == DataType::UNKNOWN) && (_parent->format() == Format::UNKNOWN));
+        TensorShape parent_extended_shape = extend_parent_shape(_parent->tensor_shape(), shape, _coords);
+        _parent->set_tensor_shape(parent_extended_shape);
+        _parent->set_valid_region(ValidRegion{ Coordinates(), parent_extended_shape });
+    }
     _tensor_shape = shape;
     return *this;
 }
@@ -76,6 +116,7 @@
 {
     ARM_COMPUTE_ERROR_ON(_parent == nullptr);
     ARM_COMPUTE_ERROR_ON(!_parent->is_resizable());
+    ARM_COMPUTE_ERROR_ON(_parent->total_size() == 0);
 
     // Extend parent padding if required
     return _parent->extend_padding(padding);
diff --git a/src/core/TensorInfo.cpp b/src/core/TensorInfo.cpp
index 2008217..24988e2 100644
--- a/src/core/TensorInfo.cpp
+++ b/src/core/TensorInfo.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2018 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -364,6 +364,8 @@
         _total_size                           = _tensor_shape[idx_last_dimension] * _strides_in_bytes[idx_last_dimension];
     }
 
+    std::tie(_strides_in_bytes, _offset_first_element_in_bytes, _total_size) = calculate_padding_requirements(_padding);
+
     _valid_region = ValidRegion{ Coordinates(), _tensor_shape };
     return *this;
 }
diff --git a/src/graph/SubTensor.cpp b/src/graph/SubTensor.cpp
index 2edeb3b..2e640dd 100644
--- a/src/graph/SubTensor.cpp
+++ b/src/graph/SubTensor.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -37,21 +37,21 @@
 namespace
 {
 template <typename SubTensorType, typename ParentTensorType>
-std::unique_ptr<arm_compute::ITensor> initialise_subtensor(arm_compute::ITensor *parent, TensorShape shape, Coordinates coords)
+std::unique_ptr<arm_compute::ITensor> initialise_subtensor(arm_compute::ITensor *parent, TensorShape shape, Coordinates coords, bool extend_parent)
 {
     auto ptensor   = dynamic_cast<ParentTensorType *>(parent);
-    auto subtensor = arm_compute::support::cpp14::make_unique<SubTensorType>(ptensor, shape, coords);
+    auto subtensor = arm_compute::support::cpp14::make_unique<SubTensorType>(ptensor, shape, coords, extend_parent);
     return std::move(subtensor);
 }
 } // namespace
 
 SubTensor::SubTensor()
-    : _target(TargetHint::DONT_CARE), _tensor_shape(), _coords(), _parent(nullptr), _subtensor(nullptr)
+    : _target(TargetHint::DONT_CARE), _tensor_shape(), _coords(), _parent(nullptr), _subtensor(nullptr), _extend_parent(false)
 {
 }
 
-SubTensor::SubTensor(Tensor &parent, TensorShape tensor_shape, Coordinates coords)
-    : _target(TargetHint::DONT_CARE), _tensor_shape(tensor_shape), _coords(coords), _parent(nullptr), _subtensor(nullptr)
+SubTensor::SubTensor(Tensor &parent, TensorShape tensor_shape, Coordinates coords, bool extend_parent)
+    : _target(TargetHint::DONT_CARE), _tensor_shape(tensor_shape), _coords(coords), _parent(nullptr), _subtensor(nullptr), _extend_parent(extend_parent)
 {
     ARM_COMPUTE_ERROR_ON(parent.tensor() == nullptr);
     _parent = parent.tensor();
@@ -60,8 +60,8 @@
     instantiate_subtensor();
 }
 
-SubTensor::SubTensor(arm_compute::ITensor *parent, TensorShape tensor_shape, Coordinates coords, TargetHint target)
-    : _target(target), _tensor_shape(tensor_shape), _coords(coords), _parent(parent), _subtensor(nullptr)
+SubTensor::SubTensor(arm_compute::ITensor *parent, TensorShape tensor_shape, Coordinates coords, TargetHint target, bool extend_parent)
+    : _target(target), _tensor_shape(tensor_shape), _coords(coords), _parent(parent), _subtensor(nullptr), _extend_parent(extend_parent)
 {
     ARM_COMPUTE_ERROR_ON(parent == nullptr);
     instantiate_subtensor();
@@ -108,10 +108,10 @@
     switch(_target)
     {
         case TargetHint::OPENCL:
-            _subtensor = initialise_subtensor<arm_compute::CLSubTensor, arm_compute::ICLTensor>(_parent, _tensor_shape, _coords);
+            _subtensor = initialise_subtensor<arm_compute::CLSubTensor, arm_compute::ICLTensor>(_parent, _tensor_shape, _coords, _extend_parent);
             break;
         case TargetHint::NEON:
-            _subtensor = initialise_subtensor<arm_compute::SubTensor, arm_compute::ITensor>(_parent, _tensor_shape, _coords);
+            _subtensor = initialise_subtensor<arm_compute::SubTensor, arm_compute::ITensor>(_parent, _tensor_shape, _coords, _extend_parent);
             break;
         default:
             ARM_COMPUTE_ERROR("Invalid TargetHint");
diff --git a/src/graph/nodes/BranchLayer.cpp b/src/graph/nodes/BranchLayer.cpp
index 6352bfc..7a20a56 100644
--- a/src/graph/nodes/BranchLayer.cpp
+++ b/src/graph/nodes/BranchLayer.cpp
@@ -37,46 +37,6 @@
 
 using namespace arm_compute::graph;
 
-namespace
-{
-void depth_concatenate_output_info(ITensorInfo *info, ITensorInfo *sub_tensor_info)
-{
-    ARM_COMPUTE_ERROR_ON(info == nullptr);
-    ARM_COMPUTE_ERROR_ON(sub_tensor_info == nullptr);
-
-    TensorShape        info_shape            = info->tensor_shape();
-    const TensorShape &sub_tensor_info_shape = sub_tensor_info->tensor_shape();
-
-    // Update parent info and valid region
-    if(info_shape.total_size() == 0)
-    {
-        arm_compute::auto_init_if_empty(*info,
-                                        sub_tensor_info->tensor_shape(),
-                                        sub_tensor_info->num_channels(),
-                                        sub_tensor_info->data_type(), sub_tensor_info->fixed_point_position(), sub_tensor_info->quantization_info());
-        info->set_valid_region(sub_tensor_info->valid_region());
-    }
-    else
-    {
-        ARM_COMPUTE_ERROR_ON(info->num_channels() != sub_tensor_info->num_channels());
-        ARM_COMPUTE_ERROR_ON(info->data_type() != sub_tensor_info->data_type());
-        ARM_COMPUTE_ERROR_ON(info->fixed_point_position() != sub_tensor_info->fixed_point_position());
-
-        // Concatenate depth
-        ARM_COMPUTE_ERROR_ON(info_shape.x() != sub_tensor_info_shape.x());
-        ARM_COMPUTE_ERROR_ON(info_shape.y() != sub_tensor_info_shape.y());
-        info_shape.set(2, info_shape.z() + sub_tensor_info_shape.z());
-        info->set_tensor_shape(info_shape);
-
-        // Update valid region
-        arm_compute::ValidRegion info_valid_region = info->valid_region();
-        info_valid_region.shape.set(2, info_shape.z());
-        arm_compute::ValidRegion updated_region = arm_compute::intersect_valid_regions(info_valid_region, sub_tensor_info->valid_region());
-        info->set_valid_region(updated_region);
-    }
-}
-} // namespace
-
 /** Branch function */
 class BranchFunction final : public arm_compute::IFunction
 {
@@ -117,9 +77,8 @@
     // Create branch function
     auto func = arm_compute::support::cpp14::make_unique<BranchFunction>();
 
-    // Track output SubTensorInfo and depth
-    TensorInfo out_info;
-    int        depth = 0;
+    // Track output depth
+    int depth = 0;
 
     // Constuct all sub-graphs given the input/output
     for(auto &sg : _sub_graphs)
@@ -143,10 +102,13 @@
         // Create output sub-tensor
         if(!sg->has_output())
         {
-            ARM_COMPUTE_ERROR_ON(dynamic_cast<Tensor *>(output) == nullptr);
-            out = arm_compute::support::cpp14::make_unique<SubTensor>(*dynamic_cast<Tensor *>(output),
-                                                                      output->tensor()->info()->tensor_shape(),
-                                                                      Coordinates(0, 0, depth));
+            ARM_COMPUTE_ERROR_ON((dynamic_cast<Tensor *>(output) == nullptr) && (dynamic_cast<SubTensor *>(output) == nullptr));
+
+            out = arm_compute::support::cpp14::make_unique<SubTensor>(output->tensor(),
+                                                                      TensorShape(),
+                                                                      Coordinates(0, 0, depth),
+                                                                      output->target(),
+                                                                      true);
             out_sub_tensor = dynamic_cast<SubTensor *>(out.get());
         }
 
@@ -161,17 +123,8 @@
         {
             ARM_COMPUTE_ERROR_ON(out_sub_tensor->tensor() == nullptr);
             depth += out_sub_tensor->tensor()->info()->tensor_shape()[2];
-            depth_concatenate_output_info(&out_info, out_sub_tensor->tensor()->info());
         }
     }
 
-    // Auto-init output
-    arm_compute::auto_init_if_empty(*output->tensor()->info(),
-                                    out_info.tensor_shape(),
-                                    out_info.num_channels(),
-                                    out_info.data_type(),
-                                    out_info.fixed_point_position(),
-                                    out_info.quantization_info());
-
     return std::move(func);
 }
\ No newline at end of file
diff --git a/src/graph/nodes/ConvolutionLayer.cpp b/src/graph/nodes/ConvolutionLayer.cpp
index 53d06ea..f292b89 100644
--- a/src/graph/nodes/ConvolutionLayer.cpp
+++ b/src/graph/nodes/ConvolutionLayer.cpp
@@ -106,13 +106,16 @@
                                                                         const WeightsInfo    &weights_info,
                                                                         ConvolutionMethodHint conv_method)
 {
-    if(conv_method == ConvolutionMethodHint::GEMM)
+    if((conv_method == ConvolutionMethodHint::DIRECT)
+       && arm_compute::CLDirectConvolutionLayer::validate(input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), conv_info)) // NOLINT
     {
-        return instantiate_function<arm_compute::CLConvolutionLayer, arm_compute::ICLTensor, TargetHint::OPENCL>(input, weights, biases, output, conv_info, weights_info);
+        ARM_COMPUTE_LOG_GRAPH_INFO("Instantiating CLDirectConvolutionLayer");
+        return instantiate_direct_function<arm_compute::CLDirectConvolutionLayer, arm_compute::ICLTensor, TargetHint::OPENCL>(input, weights, biases, output, conv_info);
     }
     else
     {
-        return instantiate_direct_function<arm_compute::CLDirectConvolutionLayer, arm_compute::ICLTensor, TargetHint::OPENCL>(input, weights, biases, output, conv_info);
+        ARM_COMPUTE_LOG_GRAPH_INFO("Instantiating CLConvolutionLayer");
+        return instantiate_function<arm_compute::CLConvolutionLayer, arm_compute::ICLTensor, TargetHint::OPENCL>(input, weights, biases, output, conv_info, weights_info);
     }
 }
 
@@ -122,13 +125,16 @@
                                                                       const WeightsInfo    &weights_info,
                                                                       ConvolutionMethodHint conv_method)
 {
-    if(conv_method == ConvolutionMethodHint::GEMM)
+    if((conv_method == ConvolutionMethodHint::DIRECT)
+       && arm_compute::NEDirectConvolutionLayer::validate(input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), conv_info)) // NOLINT
     {
-        return instantiate_function<arm_compute::NEConvolutionLayer, arm_compute::ITensor, TargetHint::NEON>(input, weights, biases, output, conv_info, weights_info);
+        ARM_COMPUTE_LOG_GRAPH_INFO("Instantiating NEDirectConvolutionLayer");
+        return instantiate_direct_function<arm_compute::NEDirectConvolutionLayer, arm_compute::ITensor, TargetHint::NEON>(input, weights, biases, output, conv_info);
     }
     else
     {
-        return instantiate_direct_function<arm_compute::NEDirectConvolutionLayer, arm_compute::ITensor, TargetHint::NEON>(input, weights, biases, output, conv_info);
+        ARM_COMPUTE_LOG_GRAPH_INFO("Instantiating NEConvolutionLayer");
+        return instantiate_function<arm_compute::NEConvolutionLayer, arm_compute::ITensor, TargetHint::NEON>(input, weights, biases, output, conv_info, weights_info);
     }
 }
 } // namespace
@@ -258,12 +264,10 @@
     std::unique_ptr<arm_compute::IFunction> func;
     if(_target_hint == TargetHint::OPENCL)
     {
-        ARM_COMPUTE_LOG_GRAPH_INFO("Instantiating CLConvolutionLayer");
         func = instantiate<TargetHint::OPENCL>(input, _weights.tensor(), _biases.tensor(), output, _conv_info, _weights_info, conv_method_hint);
     }
     else
     {
-        ARM_COMPUTE_LOG_GRAPH_INFO("Instantiating NEConvolutionLayer");
         func = instantiate<TargetHint::NEON>(input, _weights.tensor(), _biases.tensor(), output, _conv_info, _weights_info, conv_method_hint);
     }
     return func;
@@ -325,12 +329,10 @@
         // Instantiate convolution function
         if(_target_hint == TargetHint::OPENCL)
         {
-            ARM_COMPUTE_LOG_GRAPH_INFO("Instantiating CLConvolutionLayer");
             func = instantiate<TargetHint::OPENCL>(_is[i].tensor(), _ws[i].tensor(), _bs[i].tensor(), _os[i].tensor(), _conv_info, _weights_info, conv_method_hint);
         }
         else
         {
-            ARM_COMPUTE_LOG_GRAPH_INFO("Instantiating NEConvolutionLayer");
             func = instantiate<TargetHint::NEON>(_is[i].tensor(), _ws[i].tensor(), _bs[i].tensor(), _os[i].tensor(), _conv_info, _weights_info, conv_method_hint);
         }
 
diff --git a/src/runtime/CL/CLSubTensor.cpp b/src/runtime/CL/CLSubTensor.cpp
index b228c0a..5f58024 100644
--- a/src/runtime/CL/CLSubTensor.cpp
+++ b/src/runtime/CL/CLSubTensor.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,11 +29,11 @@
 
 using namespace arm_compute;
 
-CLSubTensor::CLSubTensor(ICLTensor *parent, const TensorShape &tensor_shape, const Coordinates &coords)
+CLSubTensor::CLSubTensor(ICLTensor *parent, const TensorShape &tensor_shape, const Coordinates &coords, bool extend_parent)
     : _parent(nullptr), _info()
 {
     ARM_COMPUTE_ERROR_ON(parent == nullptr);
-    _info   = SubTensorInfo(parent->info(), tensor_shape, coords);
+    _info   = SubTensorInfo(parent->info(), tensor_shape, coords, extend_parent);
     _parent = parent;
 }
 
diff --git a/src/runtime/SubTensor.cpp b/src/runtime/SubTensor.cpp
index 32924be..c5b8f33 100644
--- a/src/runtime/SubTensor.cpp
+++ b/src/runtime/SubTensor.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,11 +27,11 @@
 
 using namespace arm_compute;
 
-SubTensor::SubTensor(ITensor *parent, const TensorShape &tensor_shape, const Coordinates &coords)
+SubTensor::SubTensor(ITensor *parent, const TensorShape &tensor_shape, const Coordinates &coords, bool extend_parent)
     : _parent(nullptr), _info()
 {
     ARM_COMPUTE_ERROR_ON(parent == nullptr);
-    _info   = SubTensorInfo(parent->info(), tensor_shape, coords);
+    _info   = SubTensorInfo(parent->info(), tensor_shape, coords, extend_parent);
     _parent = parent;
 }
 
