COMPMID-797 Integrate Mobilenet QASYMM8 with new graph.

Change-Id: I4df63ec2f4eb27a8a6eec2bea27741bf8dec6910
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/126966
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/arm_compute/graph/GraphBuilder.h b/arm_compute/graph/GraphBuilder.h
index e01cfb1..bbbbefb 100644
--- a/arm_compute/graph/GraphBuilder.h
+++ b/arm_compute/graph/GraphBuilder.h
@@ -109,13 +109,17 @@
      * @param[in] method                (Optional) Convolution method to use
      * @param[in] weights_accessor      (Optional) Accessor of the weights node data
      * @param[in] bias_accessor         (Optional) Accessor of the bias node data
+     * @param[in] weights_quant_info    (Optional) Weights quantization info
+     * @param[in] out_quant_info        (Optional) Output quantization info
      *
      * @return Node ID of the created node, EmptyNodeID in case of error
      */
     static NodeID add_convolution_node(Graph &g, NodeParams params, NodeIdxPair input,
                                        Size2D kernel_spatial_extend, unsigned int depth, PadStrideInfo conv_info,
                                        unsigned int num_groups = 1, ConvolutionMethod method = ConvolutionMethod::DEFAULT,
-                                       ITensorAccessorUPtr weights_accessor = nullptr, ITensorAccessorUPtr bias_accessor = nullptr);
+                                       ITensorAccessorUPtr weights_accessor = nullptr, ITensorAccessorUPtr bias_accessor = nullptr,
+                                       const QuantizationInfo weights_quant_info = QuantizationInfo(),
+                                       const QuantizationInfo out_quant_info     = QuantizationInfo());
     /** Adds a depth concatenate node to the graph
      *
      * @param[in] g      Graph to add the node to
@@ -135,13 +139,14 @@
      * @param[in] method                (Optional) Convolution method to use
      * @param[in] weights_accessor      (Optional) Accessor of the weights node data
      * @param[in] bias_accessor         (Optional) Accessor of the bias node data
+     * @param[in] quant_info            (Optional) Weights quantization info
      *
      * @return Node ID of the created node, EmptyNodeID in case of error
      */
     static NodeID add_depthwise_convolution_node(Graph &g, NodeParams params, NodeIdxPair input,
                                                  Size2D kernel_spatial_extend, PadStrideInfo conv_info,
                                                  DepthwiseConvolutionMethod method    = DepthwiseConvolutionMethod::DEFAULT,
-                                                 ITensorAccessorUPtr weights_accessor = nullptr, ITensorAccessorUPtr bias_accessor = nullptr);
+                                                 ITensorAccessorUPtr weights_accessor = nullptr, ITensorAccessorUPtr bias_accessor = nullptr, const QuantizationInfo quant_info = QuantizationInfo());
     /** Adds an element-wise layer node to the graph
      *
      * @param[in] g         Graph to add the node to
diff --git a/arm_compute/graph/TensorDescriptor.h b/arm_compute/graph/TensorDescriptor.h
index fc0095e..785c493 100644
--- a/arm_compute/graph/TensorDescriptor.h
+++ b/arm_compute/graph/TensorDescriptor.h
@@ -37,18 +37,20 @@
     TensorDescriptor() = default;
     /** Constructor
      *
-     * @param[in] tensor_shape     Tensor shape
-     * @param[in] tensor_data_type Tensor data type
-     * @param[in] tensor_target    Target to allocate the tensor for
+     * @param[in] tensor_shape      Tensor shape
+     * @param[in] tensor_data_type  Tensor data type
+     * @param[in] tensor_quant_info Tensor quantization info
+     * @param[in] tensor_target     Target to allocate the tensor for
      */
-    TensorDescriptor(TensorShape tensor_shape, DataType tensor_data_type, Target tensor_target = Target::UNSPECIFIED)
-        : shape(tensor_shape), data_type(tensor_data_type), target(tensor_target)
+    TensorDescriptor(TensorShape tensor_shape, DataType tensor_data_type, QuantizationInfo tensor_quant_info = QuantizationInfo(), Target tensor_target = Target::UNSPECIFIED)
+        : shape(tensor_shape), data_type(tensor_data_type), quant_info(tensor_quant_info), target(tensor_target)
     {
     }
 
-    TensorShape shape{};                        /**< Tensor shape */
-    DataType    data_type{ DataType::UNKNOWN }; /**< Data type */
-    Target      target{ Target::UNSPECIFIED };  /**< Target */
+    TensorShape      shape{};                        /**< Tensor shape */
+    DataType         data_type{ DataType::UNKNOWN }; /**< Data type */
+    QuantizationInfo quant_info{};                   /**< Quantization info */
+    Target           target{ Target::UNSPECIFIED };  /**< Target */
 };
 } // namespace graph
 } // namespace arm_compute
diff --git a/arm_compute/graph/TypePrinter.h b/arm_compute/graph/TypePrinter.h
index d9bc837..ed57830 100644
--- a/arm_compute/graph/TypePrinter.h
+++ b/arm_compute/graph/TypePrinter.h
@@ -308,6 +308,14 @@
 
     return os;
 }
+
+/** Formatted output of the QuantizationInfo type. */
+inline ::std::ostream &operator<<(::std::ostream &os, const QuantizationInfo &quantization_info)
+{
+    os << "Scale:" << quantization_info.scale << "~"
+       << "Offset:" << quantization_info.offset;
+    return os;
+}
 } // namespace graph
 } // namespace arm_compute
 #endif /* __ARM_COMPUTE_GRAPH_TYPE_PRINTER_H__ */
diff --git a/arm_compute/graph/backends/ValidateHelpers.h b/arm_compute/graph/backends/ValidateHelpers.h
index ca01295..68a718a 100644
--- a/arm_compute/graph/backends/ValidateHelpers.h
+++ b/arm_compute/graph/backends/ValidateHelpers.h
@@ -70,12 +70,18 @@
     ARM_COMPUTE_RETURN_ERROR_ON(node.num_outputs() != 1);
 
     // Extract IO and info
-    arm_compute::ITensorInfo *input          = get_backing_tensor_info(node.input(0));
-    arm_compute::ITensorInfo *weights        = get_backing_tensor_info(node.input(1));
-    arm_compute::ITensorInfo *biases         = get_backing_tensor_info(node.input(2));
-    arm_compute::ITensorInfo *output         = get_backing_tensor_info(node.output(0));
-    const PadStrideInfo       conv_info      = node.convolution_info();
-    const ConvolutionMethod   conv_algorithm = node.convolution_method();
+    arm_compute::ITensorInfo *input   = get_backing_tensor_info(node.input(0));
+    arm_compute::ITensorInfo *weights = get_backing_tensor_info(node.input(1));
+    arm_compute::ITensorInfo *biases  = get_backing_tensor_info(node.input(2));
+    arm_compute::ITensorInfo *output  = get_backing_tensor_info(node.output(0));
+
+    if(is_data_type_quantized_asymmetric(input->data_type()))
+    {
+        biases->set_data_type(DataType::S32);
+    }
+
+    const PadStrideInfo     conv_info      = node.convolution_info();
+    const ConvolutionMethod conv_algorithm = node.convolution_method();
 
     // Validate function
     Status status{};
diff --git a/arm_compute/graph/frontend/Layers.h b/arm_compute/graph/frontend/Layers.h
index 22133b8..2e7c50e 100644
--- a/arm_compute/graph/frontend/Layers.h
+++ b/arm_compute/graph/frontend/Layers.h
@@ -160,28 +160,34 @@
 public:
     /** Construct a convolution layer.
      *
-     * @param[in] conv_width  Convolution width.
-     * @param[in] conv_height Convolution height.
-     * @param[in] ofm         Output feature map.
-     * @param[in] weights     Accessor to get kernel weights from.
-     * @param[in] bias        Accessor to get kernel bias from.
-     * @param[in] conv_info   Padding and stride information.
-     * @param[in] num_groups  (Optional) Number of groups. Default: 1.
+     * @param[in] conv_width         Convolution width.
+     * @param[in] conv_height        Convolution height.
+     * @param[in] ofm                Output feature map.
+     * @param[in] weights            Accessor to get kernel weights from.
+     * @param[in] bias               Accessor to get kernel bias from.
+     * @param[in] conv_info          Padding and stride information.
+     * @param[in] num_groups         (Optional) Number of groups. Default: 1.
+     * @param[in] weights_quant_info (Optional) Weights quantization information
+     * @param[in] out_quant_info     (Optional) Output quantization info
      */
-    ConvolutionLayer(unsigned int        conv_width,
-                     unsigned int        conv_height,
-                     unsigned int        ofm,
-                     ITensorAccessorUPtr weights,
-                     ITensorAccessorUPtr bias,
-                     PadStrideInfo       conv_info,
-                     unsigned int        num_groups = 1)
+    ConvolutionLayer(unsigned int           conv_width,
+                     unsigned int           conv_height,
+                     unsigned int           ofm,
+                     ITensorAccessorUPtr    weights,
+                     ITensorAccessorUPtr    bias,
+                     PadStrideInfo          conv_info,
+                     unsigned int           num_groups         = 1,
+                     const QuantizationInfo weights_quant_info = QuantizationInfo(),
+                     const QuantizationInfo out_quant_info     = QuantizationInfo())
         : _conv_width(conv_width),
           _conv_height(conv_height),
           _ofm(ofm),
           _conv_info(std::move(conv_info)),
           _num_groups(num_groups),
           _weights(std::move(weights)),
-          _bias(std::move(bias))
+          _bias(std::move(bias)),
+          _weights_quant_info(std::move(weights_quant_info)),
+          _out_quant_info(std::move(out_quant_info))
     {
     }
 
@@ -192,17 +198,19 @@
         return GraphBuilder::add_convolution_node(s.graph(), common_params, input,
                                                   Size2D(_conv_width, _conv_height), _ofm, _conv_info, _num_groups,
                                                   s.hints().convolution_method_hint,
-                                                  std::move(_weights), std::move(_bias));
+                                                  std::move(_weights), std::move(_bias), std::move(_weights_quant_info), std::move(_out_quant_info));
     }
 
 private:
-    unsigned int        _conv_width;
-    unsigned int        _conv_height;
-    unsigned int        _ofm;
-    const PadStrideInfo _conv_info;
-    unsigned int        _num_groups;
-    ITensorAccessorUPtr _weights;
-    ITensorAccessorUPtr _bias;
+    unsigned int           _conv_width;
+    unsigned int           _conv_height;
+    unsigned int           _ofm;
+    const PadStrideInfo    _conv_info;
+    unsigned int           _num_groups;
+    ITensorAccessorUPtr    _weights;
+    ITensorAccessorUPtr    _bias;
+    const QuantizationInfo _weights_quant_info;
+    const QuantizationInfo _out_quant_info;
 };
 
 /** Depthwise Convolution Layer */
@@ -216,17 +224,20 @@
      * @param[in] weights     Accessor to get kernel weights from.
      * @param[in] bias        Accessor to get kernel bias from.
      * @param[in] conv_info   Padding and stride information.
+     * @param[in] quant_info  (Optional) Quantization info used for weights
      */
-    DepthwiseConvolutionLayer(unsigned int        conv_width,
-                              unsigned int        conv_height,
-                              ITensorAccessorUPtr weights,
-                              ITensorAccessorUPtr bias,
-                              PadStrideInfo       conv_info)
+    DepthwiseConvolutionLayer(unsigned int           conv_width,
+                              unsigned int           conv_height,
+                              ITensorAccessorUPtr    weights,
+                              ITensorAccessorUPtr    bias,
+                              PadStrideInfo          conv_info,
+                              const QuantizationInfo quant_info = QuantizationInfo())
         : _conv_width(conv_width),
           _conv_height(conv_height),
           _conv_info(std::move(conv_info)),
           _weights(std::move(weights)),
-          _bias(std::move(bias))
+          _bias(std::move(bias)),
+          _quant_info(std::move(quant_info))
     {
     }
 
@@ -237,15 +248,16 @@
         return GraphBuilder::add_depthwise_convolution_node(s.graph(), common_params,
                                                             input, Size2D(_conv_width, _conv_height), _conv_info,
                                                             s.hints().depthwise_convolution_method_hint,
-                                                            std::move(_weights), std::move(_bias));
+                                                            std::move(_weights), std::move(_bias), std::move(_quant_info));
     }
 
 private:
-    unsigned int        _conv_width;
-    unsigned int        _conv_height;
-    const PadStrideInfo _conv_info;
-    ITensorAccessorUPtr _weights;
-    ITensorAccessorUPtr _bias;
+    unsigned int           _conv_width;
+    unsigned int           _conv_height;
+    const PadStrideInfo    _conv_info;
+    ITensorAccessorUPtr    _weights;
+    ITensorAccessorUPtr    _bias;
+    const QuantizationInfo _quant_info;
 };
 
 /** Flatten Layer */
diff --git a/arm_compute/graph/nodes/ConvolutionLayerNode.h b/arm_compute/graph/nodes/ConvolutionLayerNode.h
index 70fefbe..d029895 100644
--- a/arm_compute/graph/nodes/ConvolutionLayerNode.h
+++ b/arm_compute/graph/nodes/ConvolutionLayerNode.h
@@ -36,10 +36,11 @@
 public:
     /** Constructor
      *
-     * @param[in] info   Convolution layer attributes
-     * @param[in] method (Optional) Convolution method to use
+     * @param[in] info           Convolution layer attributes
+     * @param[in] method         (Optional) Convolution method to use
+     * @param[in] out_quant_info (Optional) Output quantization info
      */
-    ConvolutionLayerNode(PadStrideInfo info, ConvolutionMethod method = ConvolutionMethod::DEFAULT);
+    ConvolutionLayerNode(PadStrideInfo info, ConvolutionMethod method = ConvolutionMethod::DEFAULT, QuantizationInfo out_quant_info = QuantizationInfo());
     /** Sets the convolution layer method to use
      *
      * @param[in] method Method to use for convolution
@@ -78,6 +79,7 @@
 private:
     PadStrideInfo     _info;
     ConvolutionMethod _method;
+    QuantizationInfo  _out_quant_info;
 };
 } // namespace graph
 } // namespace arm_compute
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
index 5e05e93..3c9fb0e 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
@@ -106,6 +106,8 @@
     int32_t                            _b_offset;
     bool                               _run_vector_matrix_multiplication;
     bool                               _dot_product_path;
+    bool                               _is_first_run;
+    bool                               _reshape_b_only_on_first_run;
 };
 }
 #endif /*__ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H__ */
diff --git a/examples/graph_mobilenet_qasymm8.cpp b/examples/graph_mobilenet_qasymm8.cpp
index cb49ffd..ddf6175 100644
--- a/examples/graph_mobilenet_qasymm8.cpp
+++ b/examples/graph_mobilenet_qasymm8.cpp
@@ -28,6 +28,7 @@
 
 #include <cstdlib>
 
+using namespace arm_compute;
 using namespace arm_compute::utils;
 using namespace arm_compute::graph::frontend;
 using namespace arm_compute::graph_utils;
@@ -40,8 +41,6 @@
 class GraphMobileNetQASYMM8Example : public Example
 {
 public:
-    //FIXME: Missing quantization info to the tensor descriptor (Giorgio is working on it)
-#if 0
     void do_setup(int argc, char **argv) override
     {
         std::string data_path; /* Path to the trainable data */
@@ -93,8 +92,8 @@
         };
 
         // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON
-        const int  int_target_hint = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
-        TargetHint target_hint     = set_target_hint(int_target_hint);
+        const int target      = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
+        Target    target_hint = set_target_hint(target);
 
         // Parse arguments
         if(argc < 2)
@@ -123,54 +122,56 @@
         }
 
         graph << target_hint
-              << arm_compute::graph::Tensor(TensorInfo(TensorShape(224U, 224U, 3U, 1U), 1, DataType::QASYMM8, in_quant_info),
-                                            get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/" + input))
+              << DepthwiseConvolutionMethod::OPTIMIZED_3x3 // FIXME(COMPMID-1073): Add heuristics to automatically call the optimized 3x3 method
+              << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::QASYMM8, in_quant_info),
+                            get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/" + input))
               << ConvolutionLayer(
                   3U, 3U, 32U,
                   get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/Conv2d_0_weights.npy"),
                   get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/Conv2d_0_bias.npy"),
                   PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR),
-                  1, WeightsInfo(),
-                  conv_weights_quant_info.at(0),
-                  mid_quant_info)
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f))
-              << get_dwsc_node(data_path, "Conv2d_1", 64U, PadStrideInfo(1U, 1U, 1U, 1U), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(0), point_weights_quant_info.at(0))
-              << get_dwsc_node(data_path, "Conv2d_2", 128U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(1),
-                               point_weights_quant_info.at(1))
-              << get_dwsc_node(data_path, "Conv2d_3", 128U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(2),
-                               point_weights_quant_info.at(2))
-              << get_dwsc_node(data_path, "Conv2d_4", 256U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(3),
-                               point_weights_quant_info.at(3))
-              << get_dwsc_node(data_path, "Conv2d_5", 256U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(4),
-                               point_weights_quant_info.at(4))
-              << get_dwsc_node(data_path, "Conv2d_6", 512U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(5),
-                               point_weights_quant_info.at(5))
-              << get_dwsc_node(data_path, "Conv2d_7", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(6),
-                               point_weights_quant_info.at(6))
-              << get_dwsc_node(data_path, "Conv2d_8", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(7),
-                               point_weights_quant_info.at(7))
-              << get_dwsc_node(data_path, "Conv2d_9", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(8),
-                               point_weights_quant_info.at(8))
-              << get_dwsc_node(data_path, "Conv2d_10", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(9),
-                               point_weights_quant_info.at(9))
-              << get_dwsc_node(data_path, "Conv2d_11", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(10),
-                               point_weights_quant_info.at(10))
-              << get_dwsc_node(data_path, "Conv2d_12", 1024U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(11),
-                               point_weights_quant_info.at(11))
-              << get_dwsc_node(data_path, "Conv2d_13", 1024U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(12),
+                  1, conv_weights_quant_info.at(0), mid_quant_info)
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f));
+        graph << get_dwsc_node(data_path, "Conv2d_1", 64U, PadStrideInfo(1U, 1U, 1U, 1U), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(0), point_weights_quant_info.at(0));
+        graph << get_dwsc_node(data_path, "Conv2d_2", 128U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(1),
+                               point_weights_quant_info.at(1));
+        graph << get_dwsc_node(data_path, "Conv2d_3", 128U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(2),
+                               point_weights_quant_info.at(2));
+        graph << get_dwsc_node(data_path, "Conv2d_4", 256U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(3),
+                               point_weights_quant_info.at(3));
+        graph << get_dwsc_node(data_path, "Conv2d_5", 256U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(4),
+                               point_weights_quant_info.at(4));
+        graph << get_dwsc_node(data_path, "Conv2d_6", 512U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(5),
+                               point_weights_quant_info.at(5));
+        graph << get_dwsc_node(data_path, "Conv2d_7", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(6),
+                               point_weights_quant_info.at(6));
+        graph << get_dwsc_node(data_path, "Conv2d_8", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(7),
+                               point_weights_quant_info.at(7));
+        graph << get_dwsc_node(data_path, "Conv2d_9", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(8),
+                               point_weights_quant_info.at(8));
+        graph << get_dwsc_node(data_path, "Conv2d_10", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(9),
+                               point_weights_quant_info.at(9));
+        graph << get_dwsc_node(data_path, "Conv2d_11", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(10),
+                               point_weights_quant_info.at(10));
+        graph << get_dwsc_node(data_path, "Conv2d_12", 1024U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(11),
+                               point_weights_quant_info.at(11));
+        graph << get_dwsc_node(data_path, "Conv2d_13", 1024U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(12),
                                point_weights_quant_info.at(12))
               << PoolingLayer(PoolingLayerInfo(PoolingType::AVG))
               << ConvolutionLayer(
                   1U, 1U, 1001U,
                   get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/Logits_Conv2d_1c_1x1_weights.npy"),
                   get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/Logits_Conv2d_1c_1x1_bias.npy"),
-                  PadStrideInfo(1U, 1U, 0U, 0U), 1, WeightsInfo(), conv_weights_quant_info.at(1))
+                  PadStrideInfo(1U, 1U, 0U, 0U), 1, conv_weights_quant_info.at(1))
               << ReshapeLayer(TensorShape(1001U))
               << SoftmaxLayer()
-              << arm_compute::graph::Tensor(get_output_accessor(label, 5));
+              << OutputLayer(get_output_accessor(label, 5));
 
-        // In order to enable the OpenCL tuner, graph_init() has to be called only when all nodes have been instantiated
-        graph.graph_init(int_target_hint == 2);
+        // Finalize graph
+        GraphConfig config;
+        config.use_function_memory_manager = true;
+        config.use_tuner                   = (target == 2);
+        graph.finalize(target_hint, config);
     }
     void do_run() override
     {
@@ -179,7 +180,7 @@
     }
 
 private:
-    Graph graph{};
+    Stream graph{ 0, "MobileNetV1_QASYMM8" };
 
     /** This function produces a depthwise separable convolution node (i.e. depthwise + pointwise layers) with ReLU6 activation after each layer.
      *
@@ -199,29 +200,23 @@
                               QuantizationInfo depth_weights_quant_info, QuantizationInfo point_weights_quant_info)
     {
         std::string total_path = "/cnn_data/mobilenet_qasymm8_model/" + param_path + "_";
-        SubGraph    sg;
+        SubStream   sg(graph);
 
         sg << DepthwiseConvolutionLayer(
                3U, 3U,
                get_weights_accessor(data_path, total_path + "depthwise_weights.npy"),
                get_weights_accessor(data_path, total_path + "depthwise_bias.npy"),
-               dwc_pad_stride_info,
-               true,
-               depth_weights_quant_info)
+               dwc_pad_stride_info, depth_weights_quant_info)
            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f))
            << ConvolutionLayer(
                1U, 1U, conv_filt,
                get_weights_accessor(data_path, total_path + "pointwise_weights.npy"),
                get_weights_accessor(data_path, total_path + "pointwise_bias.npy"),
-               conv_pad_stride_info,
-               1, WeightsInfo(),
-               point_weights_quant_info)
+               conv_pad_stride_info, 1, point_weights_quant_info)
            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f));
 
         return BranchLayer(std::move(sg));
     }
-#endif /* if 0 */
-    Stream graph { 0, "MobileNetV1_QASYMM8" };
 };
 /** Main program for MobileNetQASYMM8
  *
diff --git a/src/core/TensorInfo.cpp b/src/core/TensorInfo.cpp
index 539d0f8..676938a 100644
--- a/src/core/TensorInfo.cpp
+++ b/src/core/TensorInfo.cpp
@@ -322,7 +322,7 @@
 {
     _data_type = data_type;
     _format    = Format::UNKNOWN;
-    return *this;
+    return set_tensor_shape(tensor_shape()); // Force total size and strides to update
 }
 
 ITensorInfo &TensorInfo::set_num_channels(int num_channels)
diff --git a/src/graph/GraphBuilder.cpp b/src/graph/GraphBuilder.cpp
index 0d1bdc3..4ad34e7 100644
--- a/src/graph/GraphBuilder.cpp
+++ b/src/graph/GraphBuilder.cpp
@@ -206,7 +206,9 @@
 NodeID GraphBuilder::add_convolution_node(Graph &g, NodeParams params, NodeIdxPair input,
                                           Size2D kernel_spatial_extend, unsigned int depth, PadStrideInfo conv_info,
                                           unsigned int num_groups, ConvolutionMethod method,
-                                          ITensorAccessorUPtr weights_accessor, ITensorAccessorUPtr bias_accessor)
+                                          ITensorAccessorUPtr weights_accessor, ITensorAccessorUPtr bias_accessor,
+                                          const QuantizationInfo weights_quant_info,
+                                          const QuantizationInfo out_quant_info)
 {
     CHECK_NODEIDX_PAIR(input, g);
     ARM_COMPUTE_ERROR_ON(depth == 0);
@@ -220,7 +222,13 @@
     // Create weights node
     TensorDescriptor w_desc = input_tensor_desc;
     w_desc.shape            = TensorShape(kernel_spatial_extend.width, kernel_spatial_extend.height, w_desc.shape.z() / num_groups, depth);
-    NodeID w_nid            = add_const_node_with_name(g, params, "Weights", w_desc, std::move(weights_accessor));
+
+    if(!weights_quant_info.empty())
+    {
+        w_desc.quant_info = weights_quant_info;
+    }
+
+    NodeID w_nid = add_const_node_with_name(g, params, "Weights", w_desc, std::move(weights_accessor));
 
     // Create bias nodes
     NodeID b_nid = EmptyNodeID;
@@ -234,7 +242,7 @@
     if(num_groups == 1)
     {
         // Create convolution node and connect
-        NodeID conv_nid = g.add_node<ConvolutionLayerNode>(conv_info, method);
+        NodeID conv_nid = g.add_node<ConvolutionLayerNode>(conv_info, method, out_quant_info);
         g.add_connection(input.node_id, input.index, conv_nid, 0);
         g.add_connection(w_nid, 0, conv_nid, 1);
         if(has_bias)
@@ -270,7 +278,7 @@
 
 NodeID GraphBuilder::add_depthwise_convolution_node(Graph &g, NodeParams params, NodeIdxPair input, Size2D kernel_spatial_extend, PadStrideInfo conv_info,
                                                     DepthwiseConvolutionMethod method,
-                                                    ITensorAccessorUPtr weights_accessor, ITensorAccessorUPtr bias_accessor)
+                                                    ITensorAccessorUPtr weights_accessor, ITensorAccessorUPtr bias_accessor, const QuantizationInfo quant_info)
 {
     CHECK_NODEIDX_PAIR(input, g);
     ARM_COMPUTE_ERROR_ON((kernel_spatial_extend.width == 0) || (kernel_spatial_extend.height == 0));
@@ -283,7 +291,13 @@
     // Create weights node
     TensorDescriptor w_desc = input_tensor_desc;
     w_desc.shape            = TensorShape(kernel_spatial_extend.width, kernel_spatial_extend.height, w_desc.shape.z());
-    NodeID w_nid            = add_const_node_with_name(g, params, "Weights", w_desc, std::move(weights_accessor));
+
+    if(!quant_info.empty())
+    {
+        w_desc.quant_info = quant_info;
+    }
+
+    NodeID w_nid = add_const_node_with_name(g, params, "Weights", w_desc, std::move(weights_accessor));
 
     // Create bias nodes
     NodeID b_nid = EmptyNodeID;
diff --git a/src/graph/backends/CL/CLDeviceBackend.cpp b/src/graph/backends/CL/CLDeviceBackend.cpp
index f10eb33..92cb693 100644
--- a/src/graph/backends/CL/CLDeviceBackend.cpp
+++ b/src/graph/backends/CL/CLDeviceBackend.cpp
@@ -126,7 +126,7 @@
     ARM_COMPUTE_ERROR_ON(tensor_desc.target != Target::CL);
 
     // Create backend tensor handle
-    TensorInfo info(tensor_desc.shape, 1, tensor_desc.data_type);
+    TensorInfo info(tensor_desc.shape, 1, tensor_desc.data_type, tensor_desc.quant_info);
     auto       backend_tensor_handle = support::cpp14::make_unique<CLTensorHandle>(info);
 
     return std::move(backend_tensor_handle);
diff --git a/src/graph/backends/CL/CLFunctionsFactory.cpp b/src/graph/backends/CL/CLFunctionsFactory.cpp
index 1b448fe..ad73a79 100644
--- a/src/graph/backends/CL/CLFunctionsFactory.cpp
+++ b/src/graph/backends/CL/CLFunctionsFactory.cpp
@@ -154,10 +154,16 @@
     ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
 
     // Extract IO and info
-    ICLTensor              *input          = get_backing_tensor(node.input(0));
-    ICLTensor              *weights        = get_backing_tensor(node.input(1));
-    ICLTensor              *biases         = get_backing_tensor(node.input(2));
-    ICLTensor              *output         = get_backing_tensor(node.output(0));
+    ICLTensor *input   = get_backing_tensor(node.input(0));
+    ICLTensor *weights = get_backing_tensor(node.input(1));
+    ICLTensor *biases  = get_backing_tensor(node.input(2));
+    ICLTensor *output  = get_backing_tensor(node.output(0));
+
+    if(is_data_type_quantized_asymmetric(input->info()->data_type()))
+    {
+        biases->info()->set_data_type(DataType::S32);
+    }
+
     const PadStrideInfo     conv_info      = node.convolution_info();
     const ConvolutionMethod conv_algorithm = node.convolution_method();
 
@@ -190,6 +196,8 @@
     // Log info
     ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
                                << " Data Type: " << input->info()->data_type()
+                               << " Input QuantInfo: " << input->info()->quantization_info()
+                               << " Weights QuantInfo: " << weights->info()->quantization_info()
                                << " Input shape: " << input->info()->tensor_shape()
                                << " Weights shape: " << weights->info()->tensor_shape()
                                << " Output shape: " << output->info()->tensor_shape()
@@ -251,10 +259,16 @@
     ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
 
     // Extract IO and info
-    ICLTensor                       *input         = get_backing_tensor(node.input(0));
-    ICLTensor                       *weights       = get_backing_tensor(node.input(1));
-    ICLTensor                       *biases        = get_backing_tensor(node.input(2));
-    ICLTensor                       *output        = get_backing_tensor(node.output(0));
+    ICLTensor *input   = get_backing_tensor(node.input(0));
+    ICLTensor *weights = get_backing_tensor(node.input(1));
+    ICLTensor *biases  = get_backing_tensor(node.input(2));
+    ICLTensor *output  = get_backing_tensor(node.output(0));
+
+    if(is_data_type_quantized_asymmetric(input->info()->data_type()))
+    {
+        biases->info()->set_data_type(DataType::S32);
+    }
+
     const PadStrideInfo              conv_info     = node.convolution_info();
     const DepthwiseConvolutionMethod dwc_algorithm = node.depthwise_convolution_method();
 
@@ -275,6 +289,8 @@
     // Log info
     ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
                                << " Data Type: " << input->info()->data_type()
+                               << " Input QuantInfo: " << input->info()->quantization_info()
+                               << " Weights QuantInfo: " << weights->info()->quantization_info()
                                << " Input shape: " << input->info()->tensor_shape()
                                << " Weights shape: " << weights->info()->tensor_shape()
                                << " Output shape: " << output->info()->tensor_shape()
diff --git a/src/graph/backends/GLES/GCDeviceBackend.cpp b/src/graph/backends/GLES/GCDeviceBackend.cpp
index 8cd9994..a55215f 100644
--- a/src/graph/backends/GLES/GCDeviceBackend.cpp
+++ b/src/graph/backends/GLES/GCDeviceBackend.cpp
@@ -87,7 +87,7 @@
     ARM_COMPUTE_ERROR_ON(tensor_desc.target != Target::GC);
 
     // Create backend tensor handle
-    TensorInfo info(tensor_desc.shape, 1, tensor_desc.data_type);
+    TensorInfo info(tensor_desc.shape, 1, tensor_desc.data_type, tensor_desc.quant_info);
     auto       backend_tensor_handle = support::cpp14::make_unique<GCTensorHandle>(info);
 
     return std::move(backend_tensor_handle);
diff --git a/src/graph/backends/GLES/GCFunctionsFactory.cpp b/src/graph/backends/GLES/GCFunctionsFactory.cpp
index 12e7c04..d3c5737 100644
--- a/src/graph/backends/GLES/GCFunctionsFactory.cpp
+++ b/src/graph/backends/GLES/GCFunctionsFactory.cpp
@@ -154,10 +154,16 @@
     ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
 
     // Extract IO and info
-    IGCTensor              *input          = get_backing_tensor(node.input(0));
-    IGCTensor              *weights        = get_backing_tensor(node.input(1));
-    IGCTensor              *biases         = get_backing_tensor(node.input(2));
-    IGCTensor              *output         = get_backing_tensor(node.output(0));
+    IGCTensor *input   = get_backing_tensor(node.input(0));
+    IGCTensor *weights = get_backing_tensor(node.input(1));
+    IGCTensor *biases  = get_backing_tensor(node.input(2));
+    IGCTensor *output  = get_backing_tensor(node.output(0));
+
+    if(is_data_type_quantized_asymmetric(input->info()->data_type()))
+    {
+        biases->info()->set_data_type(DataType::S32);
+    }
+
     const PadStrideInfo     conv_info      = node.convolution_info();
     const ConvolutionMethod conv_algorithm = node.convolution_method();
 
@@ -180,6 +186,8 @@
     // Log info
     ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
                                << " Data Type: " << input->info()->data_type()
+                               << " Input QuantInfo: " << input->info()->quantization_info()
+                               << " Weights QuantInfo: " << weights->info()->quantization_info()
                                << " Input shape: " << input->info()->tensor_shape()
                                << " Weights shape: " << weights->info()->tensor_shape()
                                << " Output shape: " << output->info()->tensor_shape()
@@ -241,10 +249,16 @@
     ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
 
     // Extract IO and info
-    IGCTensor                       *input         = get_backing_tensor(node.input(0));
-    IGCTensor                       *weights       = get_backing_tensor(node.input(1));
-    IGCTensor                       *biases        = get_backing_tensor(node.input(2));
-    IGCTensor                       *output        = get_backing_tensor(node.output(0));
+    IGCTensor *input   = get_backing_tensor(node.input(0));
+    IGCTensor *weights = get_backing_tensor(node.input(1));
+    IGCTensor *biases  = get_backing_tensor(node.input(2));
+    IGCTensor *output  = get_backing_tensor(node.output(0));
+
+    if(is_data_type_quantized_asymmetric(input->info()->data_type()))
+    {
+        biases->info()->set_data_type(DataType::S32);
+    }
+
     const PadStrideInfo              conv_info     = node.convolution_info();
     const DepthwiseConvolutionMethod dwc_algorithm = node.depthwise_convolution_method();
 
@@ -264,6 +278,8 @@
     // Log info
     ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
                                << " Data Type: " << input->info()->data_type()
+                               << " Input QuantInfo: " << input->info()->quantization_info()
+                               << " Weights QuantInfo: " << weights->info()->quantization_info()
                                << " Input shape: " << input->info()->tensor_shape()
                                << " Weights shape: " << weights->info()->tensor_shape()
                                << " Output shape: " << output->info()->tensor_shape()
diff --git a/src/graph/backends/NEON/NEDeviceBackend.cpp b/src/graph/backends/NEON/NEDeviceBackend.cpp
index aaf0582..9123196 100644
--- a/src/graph/backends/NEON/NEDeviceBackend.cpp
+++ b/src/graph/backends/NEON/NEDeviceBackend.cpp
@@ -93,7 +93,7 @@
     ARM_COMPUTE_ERROR_ON(tensor_desc.target != Target::NEON);
 
     // Create backend tensor handle
-    TensorInfo info(tensor_desc.shape, 1, tensor_desc.data_type);
+    TensorInfo info(tensor_desc.shape, 1, tensor_desc.data_type, tensor_desc.quant_info);
     auto       backend_tensor_handle = support::cpp14::make_unique<NETensorHandle>(info);
 
     return std::move(backend_tensor_handle);
diff --git a/src/graph/backends/NEON/NEFunctionFactory.cpp b/src/graph/backends/NEON/NEFunctionFactory.cpp
index 228af9c..906378c 100644
--- a/src/graph/backends/NEON/NEFunctionFactory.cpp
+++ b/src/graph/backends/NEON/NEFunctionFactory.cpp
@@ -140,10 +140,16 @@
     ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
 
     // Extract IO and info
-    ITensor                *input          = get_backing_tensor(node.input(0));
-    ITensor                *weights        = get_backing_tensor(node.input(1));
-    ITensor                *biases         = get_backing_tensor(node.input(2));
-    ITensor                *output         = get_backing_tensor(node.output(0));
+    ITensor *input   = get_backing_tensor(node.input(0));
+    ITensor *weights = get_backing_tensor(node.input(1));
+    ITensor *biases  = get_backing_tensor(node.input(2));
+    ITensor *output  = get_backing_tensor(node.output(0));
+
+    if(is_data_type_quantized_asymmetric(input->info()->data_type()))
+    {
+        biases->info()->set_data_type(DataType::S32);
+    }
+
     const PadStrideInfo     conv_info      = node.convolution_info();
     const ConvolutionMethod conv_algorithm = node.convolution_method();
 
@@ -175,6 +181,8 @@
     // Log info
     ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
                                << " Data Type: " << input->info()->data_type()
+                               << " Input QuantInfo: " << input->info()->quantization_info()
+                               << " Weights QuantInfo: " << weights->info()->quantization_info()
                                << " Input shape: " << input->info()->tensor_shape()
                                << " Weights shape: " << weights->info()->tensor_shape()
                                << " Output shape: " << output->info()->tensor_shape()
@@ -234,10 +242,16 @@
     ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
 
     // Extract IO and info
-    ITensor                         *input         = get_backing_tensor(node.input(0));
-    ITensor                         *weights       = get_backing_tensor(node.input(1));
-    ITensor                         *biases        = get_backing_tensor(node.input(2));
-    ITensor                         *output        = get_backing_tensor(node.output(0));
+    ITensor *input   = get_backing_tensor(node.input(0));
+    ITensor *weights = get_backing_tensor(node.input(1));
+    ITensor *biases  = get_backing_tensor(node.input(2));
+    ITensor *output  = get_backing_tensor(node.output(0));
+
+    if(is_data_type_quantized_asymmetric(input->info()->data_type()))
+    {
+        biases->info()->set_data_type(DataType::S32);
+    }
+
     const PadStrideInfo              conv_info     = node.convolution_info();
     const DepthwiseConvolutionMethod dwc_algorithm = node.depthwise_convolution_method();
 
@@ -258,6 +272,8 @@
     // Log info
     ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
                                << " Data Type: " << input->info()->data_type()
+                               << " Input QuantInfo: " << input->info()->quantization_info()
+                               << " Weights QuantInfo: " << weights->info()->quantization_info()
                                << " Input shape: " << input->info()->tensor_shape()
                                << " Weights shape: " << weights->info()->tensor_shape()
                                << " Output shape: " << output->info()->tensor_shape()
diff --git a/src/graph/nodes/ConvolutionLayerNode.cpp b/src/graph/nodes/ConvolutionLayerNode.cpp
index 4617284..eb0c6a1 100644
--- a/src/graph/nodes/ConvolutionLayerNode.cpp
+++ b/src/graph/nodes/ConvolutionLayerNode.cpp
@@ -31,8 +31,8 @@
 {
 namespace graph
 {
-ConvolutionLayerNode::ConvolutionLayerNode(PadStrideInfo info, ConvolutionMethod method)
-    : _info(std::move(info)), _method(method)
+ConvolutionLayerNode::ConvolutionLayerNode(PadStrideInfo info, ConvolutionMethod method, QuantizationInfo out_quant_info)
+    : _info(std::move(info)), _method(method), _out_quant_info(out_quant_info)
 {
     _input_edges.resize(3, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
@@ -90,6 +90,12 @@
     TensorDescriptor output_info  = src->desc();
     TensorShape      output_shape = compute_output_shape(src->desc().shape, weights->desc().shape, _info);
     output_info.shape             = output_shape;
+
+    if(!_out_quant_info.empty())
+    {
+        output_info.quant_info = _out_quant_info;
+    }
+
     return output_info;
 }
 
diff --git a/src/graph/nodes/SoftmaxLayerNode.cpp b/src/graph/nodes/SoftmaxLayerNode.cpp
index 4c21ac6..b6241e6 100644
--- a/src/graph/nodes/SoftmaxLayerNode.cpp
+++ b/src/graph/nodes/SoftmaxLayerNode.cpp
@@ -63,7 +63,10 @@
     const Tensor *src = input(0);
     ARM_COMPUTE_ERROR_ON(src == nullptr);
 
-    return src->desc();
+    TensorDescriptor out_desc = src->desc();
+    out_desc.quant_info       = QuantizationInfo(1.f / 256.f, 0);
+
+    return out_desc;
 }
 
 Status SoftmaxLayerNode::validate()
diff --git a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
index 5f747bc..711b006 100644
--- a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
@@ -102,7 +102,10 @@
         matrix_b = &_tmp_b;
 
         _memory_group.manage(&_tmp_a);
-        _memory_group.manage(&_tmp_b);
+        if(!_reshape_b_only_on_first_run)
+        {
+            _memory_group.manage(&_tmp_b);
+        }
 
         // Configure interleave kernel
         _mtx_a_reshape_kernel.configure(a, &_tmp_a, mult_interleave4x4_height);
@@ -119,7 +122,10 @@
     {
         TensorInfo info_vector_sum_col(compute_reductionA_shape(*b->info()), 1, DataType::S32);
         _vector_sum_col.allocator()->init(info_vector_sum_col);
-        _memory_group.manage(&_vector_sum_col);
+        if(!_reshape_b_only_on_first_run)
+        {
+            _memory_group.manage(&_vector_sum_col);
+        }
 
         // Configure Matrix B reduction kernel
         _mtx_b_reduction_kernel.configure(b, &_vector_sum_col);
diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp
index 18e6e91..e0859be 100644
--- a/src/runtime/NEON/functions/NEGEMM.cpp
+++ b/src/runtime/NEON/functions/NEGEMM.cpp
@@ -107,7 +107,10 @@
 
             // Manage intermediate buffers
             _memory_group.manage(&_tmp_a);
-            _memory_group.manage(&_tmp_b);
+            if(!_reshape_b_only_on_first_run)
+            {
+                _memory_group.manage(&_tmp_b);
+            }
 
             int m = a->info()->dimension(1);
             int n = b->info()->dimension(0);
diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
index 7f25c2e..3c48d69 100644
--- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
@@ -109,14 +109,6 @@
         ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
     }
 
-    // Checks performed when biases are present
-    if(append_bias)
-    {
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases);
-        ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(3));
-        ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
-    }
-
     if(transpose1xW)
     {
         TensorInfo weights_reshaped = weights->clone()->set_tensor_shape(get_reshaped_weights_shape(weights, append_bias));
@@ -344,7 +336,7 @@
     _memory_group.manage(&_input_im2col_reshaped);
 
     // Create tensor (interleave) to prepare input tensor for GEMM
-    if(!_is_fully_connected_convolution && !run_optimised)
+    if(!_is_fully_connected_convolution && !run_optimised && _is_interleaved)
     {
         TensorShape shape_interleaved(shape_im2col);
         shape_interleaved.set(0, shape_interleaved.x() * 4);
@@ -362,7 +354,9 @@
     TensorInfo info_gemm(shape_gemm, 1, gemm_data_type, input->info()->fixed_point_position());
     info_gemm.set_quantization_info(output->info()->quantization_info());
     _gemm_output.allocator()->init(info_gemm);
-    _memory_group.manage(&_gemm_output);
+
+    // FIXME: enabling memory manager for _gemm_output gives incorrect results (maybe bound to the assembly kernel in GEMMLowp?)
+    //  _memory_group.manage(&_gemm_output);
 
     // Configure kernels
     // Configure im2col
@@ -491,7 +485,7 @@
         reshaped_weights->set_tensor_shape(get_reshaped_weights_shape_conv(weights, append_bias, is_fully_connected_convolution));
         ARM_COMPUTE_RETURN_ON_ERROR(NEConvolutionLayerReshapeWeights::validate(weights, biases, reshaped_weights.get(), !is_fully_connected_convolution /* 1xW transpose */));
     }
-    else
+    else if(!is_quantized)
     {
         TensorShape reshaped_weights_shape;
 
diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
index 7372c6c..cbec73f 100644
--- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
@@ -43,19 +43,19 @@
 NEGEMMLowpMatrixMultiplyCore::NEGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager)
     : _memory_group(std::move(memory_manager)), _asm_glue_unsigned(), _asm_glue_signed(), _mm_kernel(nullptr), _mtx_a_reshape_kernel(nullptr), _mtx_b_reshape_kernel(nullptr), _mtx_a_reduction_kernel(),
       _mtx_b_reduction_kernel(), _offset_contribution_kernel(), _vector_sum_col(), _vector_sum_row(), _tmp_a(), _tmp_b(), _workspace(), _a_offset(0), _b_offset(0), _run_vector_matrix_multiplication(false),
-      _dot_product_path(false)
+      _dot_product_path(false), _is_first_run(true), _reshape_b_only_on_first_run(false)
 {
 }
 
 void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, ITensor *output, const GEMMInfo &gemm_info)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);
-    ARM_COMPUTE_UNUSED(gemm_info);
     ARM_COMPUTE_ERROR_THROW_ON(NEGEMMLowpMatrixMultiplyCore::validate(a->info(), b->info(), output->info(), gemm_info));
 
     _a_offset                         = a->info()->quantization_info().offset;
     _b_offset                         = b->info()->quantization_info().offset;
     _run_vector_matrix_multiplication = a->info()->dimension(1) < 2;
+    _reshape_b_only_on_first_run      = gemm_info.reshape_b_only_on_first_run();
 
 #ifdef __aarch64__
     switch(a->info()->data_type())
@@ -98,7 +98,10 @@
             _tmp_a.allocator()->init(info_a);
             _tmp_b.allocator()->init(info_b);
             _memory_group.manage(&_tmp_a);
-            _memory_group.manage(&_tmp_b);
+            if(!_reshape_b_only_on_first_run)
+            {
+                _memory_group.manage(&_tmp_b);
+            }
 
             // Configure interleave kernel
             {
@@ -129,7 +132,10 @@
         TensorInfo info_vector_sum_col(compute_reductionA_shape(*b->info()), 1, DataType::S32);
 
         _vector_sum_col.allocator()->init(info_vector_sum_col);
-        _memory_group.manage(&_vector_sum_col);
+        if(!_reshape_b_only_on_first_run)
+        {
+            _memory_group.manage(&_vector_sum_col);
+        }
 
         // Configure Matrix B reduction kernel
         _mtx_b_reduction_kernel.configure(b, &_vector_sum_col, a->info()->dimension(0), false);
@@ -252,7 +258,7 @@
             NEScheduler::get().schedule(_mtx_a_reshape_kernel.get(), Window::DimY);
         }
 
-        if(_mtx_b_reshape_kernel)
+        if(_mtx_b_reshape_kernel && (_is_first_run || !_reshape_b_only_on_first_run))
         {
             NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY);
         }
@@ -278,7 +284,7 @@
     }
 
     // Run matrix B reduction kernel only if _a_offset is not equal to 0
-    if(_a_offset != 0)
+    if(_a_offset != 0 && (_is_first_run || !_reshape_b_only_on_first_run))
     {
         NEScheduler::get().schedule(&_mtx_b_reduction_kernel, Window::DimX);
     }
@@ -287,4 +293,6 @@
     NEScheduler::get().schedule(&_offset_contribution_kernel, Window::DimY);
 
     _memory_group.release();
+
+    _is_first_run = false;
 }