COMPMID-1104 Add fast math hint in the graph API

Change-Id: I83db135fa94c6884e080f0229a9b6430d908c029
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/129823
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/arm_compute/graph/GraphBuilder.h b/arm_compute/graph/GraphBuilder.h
index bbbbefb..aea28eb 100644
--- a/arm_compute/graph/GraphBuilder.h
+++ b/arm_compute/graph/GraphBuilder.h
@@ -99,6 +99,8 @@
                                                ITensorAccessorUPtr beta_accessor = nullptr, ITensorAccessorUPtr gamma_accessor = nullptr);
     /** Adds a convolution layer node to the graph
      *
+     * TODO (COMPMID-1113): Add a graph descriptor for convolution layer node
+     *
      * @param[in] g                     Graph to add the node to
      * @param[in] params                Common node parameters
      * @param[in] input                 Input to the convolution layer node as a NodeID-Index pair
@@ -107,6 +109,7 @@
      * @param[in] conv_info             Convolution layer information
      * @param[in] num_groups            (Optional) Number of groups for a grouped convolution. Defaults to 1
      * @param[in] method                (Optional) Convolution method to use
+     * @param[in] fast_math_hint        (Optional) Fast math hint
      * @param[in] weights_accessor      (Optional) Accessor of the weights node data
      * @param[in] bias_accessor         (Optional) Accessor of the bias node data
      * @param[in] weights_quant_info    (Optional) Weights quantization info
@@ -116,7 +119,7 @@
      */
     static NodeID add_convolution_node(Graph &g, NodeParams params, NodeIdxPair input,
                                        Size2D kernel_spatial_extend, unsigned int depth, PadStrideInfo conv_info,
-                                       unsigned int num_groups = 1, ConvolutionMethod method = ConvolutionMethod::DEFAULT,
+                                       unsigned int num_groups = 1, ConvolutionMethod method = ConvolutionMethod::DEFAULT, FastMathHint fast_math_hint = FastMathHint::DISABLED,
                                        ITensorAccessorUPtr weights_accessor = nullptr, ITensorAccessorUPtr bias_accessor = nullptr,
                                        const QuantizationInfo weights_quant_info = QuantizationInfo(),
                                        const QuantizationInfo out_quant_info     = QuantizationInfo());
diff --git a/arm_compute/graph/TypePrinter.h b/arm_compute/graph/TypePrinter.h
index 0ecd57d..6babd39 100644
--- a/arm_compute/graph/TypePrinter.h
+++ b/arm_compute/graph/TypePrinter.h
@@ -295,6 +295,24 @@
     return os;
 }
 
+/** Formatted output of the FastMathHint type. */
+inline ::std::ostream &operator<<(::std::ostream &os, const FastMathHint &hint)
+{
+    switch(hint)
+    {
+        case FastMathHint::ENABLED:
+            os << "ENABLED";
+            break;
+        case FastMathHint::DISABLED:
+            os << "DISABLED";
+            break;
+        default:
+            ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
+    }
+
+    return os;
+}
+
 /** Formatted output of the DepthwiseConvolutionMethod type. */
 inline ::std::ostream &operator<<(::std::ostream &os, const DepthwiseConvolutionMethod &method)
 {
diff --git a/arm_compute/graph/Types.h b/arm_compute/graph/Types.h
index b195ed7..a910610 100644
--- a/arm_compute/graph/Types.h
+++ b/arm_compute/graph/Types.h
@@ -116,6 +116,13 @@
     OPTIMIZED_3x3, /**< Optimized 3x3 direct depthwise convolution */
 };
 
+/** Enable or disable fast math for Convolution layer */
+enum class FastMathHint
+{
+    ENABLED,  /**< Fast math enabled for Convolution layer */
+    DISABLED, /**< Fast math disabled for Convolution layer */
+};
+
 /** Supported nodes */
 enum class NodeType
 {
diff --git a/arm_compute/graph/backends/ValidateHelpers.h b/arm_compute/graph/backends/ValidateHelpers.h
index c203e8c..db3f8ba 100644
--- a/arm_compute/graph/backends/ValidateHelpers.h
+++ b/arm_compute/graph/backends/ValidateHelpers.h
@@ -83,6 +83,7 @@
 
     const PadStrideInfo     conv_info      = node.convolution_info();
     const ConvolutionMethod conv_algorithm = node.convolution_method();
+    //const bool              fast_math      = node.fast_math_hint() == FastMathHint::ENABLED;      // FIXME (COMPMID-1138): uncomment once NEON and GLES support fast_math
 
     // Validate function
     Status status{};
@@ -95,7 +96,7 @@
             status = GEMMConvolutionLayer::validate(input, weights, biases, output, conv_info);
             break;
         case ConvolutionMethod::WINOGRAD:
-            status = WinogradConvolutionLayer::validate(input, weights, biases, output, conv_info);
+            status = WinogradConvolutionLayer::validate(input, weights, biases, output, conv_info /*, fast_math*/);
             break;
         case ConvolutionMethod::DEFAULT:
             status = ConvolutionLayer::validate(input, weights, biases, output, conv_info);
@@ -107,7 +108,7 @@
     // If validation fails try the Default approach
     if(!bool(status))
     {
-        status = ConvolutionLayer::validate(input, weights, biases, output, conv_info);
+        status = ConvolutionLayer::validate(input, weights, biases, output, conv_info /*, fast_math*/);
         if(bool(status))
         {
             ARM_COMPUTE_LOG_GRAPH_INFO("Switched ConvolutionLayer method of node with ID : "
diff --git a/arm_compute/graph/frontend/IStreamOperators.h b/arm_compute/graph/frontend/IStreamOperators.h
index 350d78f..4d680f9 100644
--- a/arm_compute/graph/frontend/IStreamOperators.h
+++ b/arm_compute/graph/frontend/IStreamOperators.h
@@ -96,6 +96,18 @@
     s.hints().depthwise_convolution_method_hint = depthwise_convolution_method_hint;
     return s;
 }
+/** Overloaded stream operator to provide a fast math hint to the graph
+ *
+ * @param[in, out] s              Stream to provide the hint to
+ * @param[in]      fast_math_hint Convolution method hint to be considered
+ *
+ * @return Updated stream
+ */
+inline IStream &operator<<(IStream &s, FastMathHint fast_math_hint)
+{
+    s.hints().fast_math_hint = fast_math_hint;
+    return s;
+}
 } // namespace frontend
 } // namespace graph
 } // namespace arm_compute
diff --git a/arm_compute/graph/frontend/Layers.h b/arm_compute/graph/frontend/Layers.h
index 54cf515..d122a7a 100644
--- a/arm_compute/graph/frontend/Layers.h
+++ b/arm_compute/graph/frontend/Layers.h
@@ -197,7 +197,7 @@
         NodeParams  common_params = { name(), s.hints().target_hint };
         return GraphBuilder::add_convolution_node(s.graph(), common_params, input,
                                                   Size2D(_conv_width, _conv_height), _ofm, _conv_info, _num_groups,
-                                                  s.hints().convolution_method_hint,
+                                                  s.hints().convolution_method_hint, s.hints().fast_math_hint,
                                                   std::move(_weights), std::move(_bias), std::move(_weights_quant_info), std::move(_out_quant_info));
     }
 
diff --git a/arm_compute/graph/frontend/Types.h b/arm_compute/graph/frontend/Types.h
index 6cf7460..4789361 100644
--- a/arm_compute/graph/frontend/Types.h
+++ b/arm_compute/graph/frontend/Types.h
@@ -45,6 +45,7 @@
 using graph::PoolingType;
 using graph::Target;
 using graph::ConvolutionMethod;
+using graph::FastMathHint;
 using graph::DepthwiseConvolutionMethod;
 using graph::TensorDescriptor;
 using graph::DimensionRoundingType;
@@ -63,6 +64,7 @@
     Target                     target_hint                       = { Target::UNSPECIFIED };                 /**< Target execution hint */
     ConvolutionMethod          convolution_method_hint           = { ConvolutionMethod::DEFAULT };          /**< Convolution method hint */
     DepthwiseConvolutionMethod depthwise_convolution_method_hint = { DepthwiseConvolutionMethod::DEFAULT }; /**< Depthwise Convolution method hint */
+    FastMathHint               fast_math_hint                    = { FastMathHint::DISABLED };              /**< Fast math hint */
 };
 } // namespace frontend
 } // namespace graph
diff --git a/arm_compute/graph/nodes/ConvolutionLayerNode.h b/arm_compute/graph/nodes/ConvolutionLayerNode.h
index d1186a8..aca6028 100644
--- a/arm_compute/graph/nodes/ConvolutionLayerNode.h
+++ b/arm_compute/graph/nodes/ConvolutionLayerNode.h
@@ -38,9 +38,11 @@
      *
      * @param[in] info           Convolution layer attributes
      * @param[in] method         (Optional) Convolution method to use
+     * @param[in] fast_math_hint (Optional) Fast math hint
      * @param[in] out_quant_info (Optional) Output quantization info
      */
-    ConvolutionLayerNode(PadStrideInfo info, ConvolutionMethod method = ConvolutionMethod::DEFAULT, QuantizationInfo out_quant_info = QuantizationInfo());
+    ConvolutionLayerNode(PadStrideInfo info, ConvolutionMethod method = ConvolutionMethod::DEFAULT, FastMathHint fast_math_hint = FastMathHint::DISABLED,
+                         QuantizationInfo out_quant_info = QuantizationInfo());
     /** Sets the convolution layer method to use
      *
      * @param[in] method Method to use for convolution
@@ -51,9 +53,19 @@
      * @note This is an indication on which convolution layer implementation to use,
      *       if it fails to be created the library's heuristic approach will be used
      *
-     * @return Convolution layer method do be used by the node
+     * @return Convolution layer method to be used by the node
      */
     ConvolutionMethod convolution_method() const;
+    /** Sets the fast math fast hint
+     *
+     * @param[in] hint Hint to use for convolution
+     */
+    void set_fast_math_hint(FastMathHint hint);
+    /** Fast math hint accessor
+     *
+     * @return Fast math hint to be used by the node
+     */
+    FastMathHint fast_math_hint() const;
     /** Convolution metadata accessor
      *
      * @return Convolution information
@@ -80,6 +92,7 @@
 private:
     PadStrideInfo     _info;
     ConvolutionMethod _method;
+    FastMathHint      _fast_math_hint;
     QuantizationInfo  _out_quant_info;
 };
 } // namespace graph
diff --git a/examples/graph_alexnet.cpp b/examples/graph_alexnet.cpp
index 45c2b56..b97ca54 100644
--- a/examples/graph_alexnet.cpp
+++ b/examples/graph_alexnet.cpp
@@ -37,7 +37,7 @@
 /** Example demonstrating how to implement AlexNet's network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
  */
 class GraphAlexnetExample : public Example
 {
@@ -60,40 +60,51 @@
         const bool        is_neon              = (target_hint == Target::NEON);
         ConvolutionMethod convolution_5x5_hint = is_neon ? ConvolutionMethod::GEMM : ConvolutionMethod::DIRECT;
         ConvolutionMethod convolution_3x3_hint = is_neon ? ConvolutionMethod::GEMM : ConvolutionMethod::DEFAULT;
+        FastMathHint      fast_math_hint       = FastMathHint::DISABLED;
 
         // Parse arguments
         if(argc < 2)
         {
             // Print help
-            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 2)
         {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 3)
         {
             data_path = argv[2];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No image provided: using random values\n\n";
         }
         else if(argc == 4)
         {
             data_path = argv[2];
             image     = argv[3];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n";
             std::cout << "No text file with labels provided: skipping output accessor\n\n";
         }
-        else
+        else if(argc == 5)
         {
             data_path = argv[2];
             image     = argv[3];
             label     = argv[4];
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n";
+            std::cout << "No fast math info provided: disabling fast math\n\n";
+        }
+        else
+        {
+            data_path      = argv[2];
+            image          = argv[3];
+            label          = argv[4];
+            fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED;
         }
 
         graph << target_hint
+              << fast_math_hint
               << InputLayer(TensorDescriptor(TensorShape(227U, 227U, 3U, 1U), DataType::F32),
                             get_input_accessor(image, std::move(preprocessor)))
               // Layer 1
@@ -185,7 +196,7 @@
 /** Main program for AlexNet
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
  */
 int main(int argc, char **argv)
 {
diff --git a/examples/graph_googlenet.cpp b/examples/graph_googlenet.cpp
index deafe5a..bac3c7c 100644
--- a/examples/graph_googlenet.cpp
+++ b/examples/graph_googlenet.cpp
@@ -36,7 +36,7 @@
 /** Example demonstrating how to implement Googlenet's network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
  */
 class GraphGooglenetExample : public Example
 {
@@ -55,40 +55,51 @@
         const int         target           = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
         Target            target_hint      = set_target_hint(target);
         ConvolutionMethod convolution_hint = target_hint == Target::NEON ? ConvolutionMethod::GEMM : ConvolutionMethod::DEFAULT;
+        FastMathHint      fast_math_hint   = FastMathHint::DISABLED;
 
         // Parse arguments
         if(argc < 2)
         {
             // Print help
-            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 2)
         {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 3)
         {
             data_path = argv[2];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No image provided: using random values\n\n";
         }
         else if(argc == 4)
         {
             data_path = argv[2];
             image     = argv[3];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n";
             std::cout << "No text file with labels provided: skipping output accessor\n\n";
         }
-        else
+        else if(argc == 5)
         {
             data_path = argv[2];
             image     = argv[3];
             label     = argv[4];
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n";
+            std::cout << "No fast math info provided: disabling fast math\n\n";
+        }
+        else
+        {
+            data_path      = argv[2];
+            image          = argv[3];
+            label          = argv[4];
+            fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED;
         }
 
         graph << target_hint
+              << fast_math_hint
               << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::F32),
                             get_input_accessor(image, std::move(preprocessor)))
               << ConvolutionLayer(
@@ -206,7 +217,7 @@
 /** Main program for Googlenet
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
  */
 int main(int argc, char **argv)
 {
diff --git a/examples/graph_inception_v3.cpp b/examples/graph_inception_v3.cpp
index 7fa0fc7..8e30fd9 100644
--- a/examples/graph_inception_v3.cpp
+++ b/examples/graph_inception_v3.cpp
@@ -36,7 +36,7 @@
 /** Example demonstrating how to implement InceptionV3's network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
  */
 class InceptionV3Example : public Example
 {
@@ -54,41 +54,53 @@
         const int         target           = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
         Target            target_hint      = set_target_hint(target);
         ConvolutionMethod convolution_hint = target_hint == Target::NEON ? ConvolutionMethod::GEMM : ConvolutionMethod::DEFAULT;
+        FastMathHint      fast_math_hint   = FastMathHint::DISABLED;
 
         // Parse arguments
         if(argc < 2)
         {
             // Print help
-            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 2)
         {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 3)
         {
             data_path = argv[2];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No image provided: using random values\n\n";
         }
         else if(argc == 4)
         {
             data_path = argv[2];
             image     = argv[3];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n";
             std::cout << "No text file with labels provided: skipping output accessor\n\n";
         }
-        else
+        else if(argc == 5)
         {
             data_path = argv[2];
             image     = argv[3];
             label     = argv[4];
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n";
+            std::cout << "No fast math info provided: disabling fast math\n\n";
+        }
+        else
+        {
+            data_path      = argv[2];
+            image          = argv[3];
+            label          = argv[4];
+            fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED;
         }
 
-        graph << target_hint << InputLayer(TensorDescriptor(TensorShape(299U, 299U, 3U, 1U), DataType::F32),
-                                           get_input_accessor(image, std::move(preprocessor), false))
+        graph << target_hint
+              << fast_math_hint
+              << InputLayer(TensorDescriptor(TensorShape(299U, 299U, 3U, 1U), DataType::F32),
+                            get_input_accessor(image, std::move(preprocessor), false))
               << ConvolutionLayer(3U, 3U, 32U,
                                   get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_weights.npy"),
                                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
@@ -759,7 +771,7 @@
 /** Main program for Inception V3
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
  */
 int main(int argc, char **argv)
 {
diff --git a/examples/graph_inception_v4.cpp b/examples/graph_inception_v4.cpp
index 4217c78..827370e 100644
--- a/examples/graph_inception_v4.cpp
+++ b/examples/graph_inception_v4.cpp
@@ -36,7 +36,7 @@
 /** Example demonstrating how to implement InceptionV4's network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
  */
 class InceptionV4Example final : public Example
 {
@@ -57,41 +57,53 @@
         const int         target           = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
         Target            target_hint      = set_target_hint(target);
         ConvolutionMethod convolution_hint = target_hint == Target::NEON ? ConvolutionMethod::GEMM : ConvolutionMethod::DEFAULT;
+        FastMathHint      fast_math_hint   = FastMathHint::DISABLED;
 
         // Parse arguments
         if(argc < 2)
         {
             // Print help
-            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 2)
         {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 3)
         {
             data_path = argv[2];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No image provided: using random values\n\n";
         }
         else if(argc == 4)
         {
             data_path = argv[2];
             image     = argv[3];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n";
             std::cout << "No text file with labels provided: skipping output accessor\n\n";
         }
-        else
+        else if(argc == 5)
         {
             data_path = argv[2];
             image     = argv[3];
             label     = argv[4];
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n";
+            std::cout << "No fast math info provided: disabling fast math\n\n";
+        }
+        else
+        {
+            data_path      = argv[2];
+            image          = argv[3];
+            label          = argv[4];
+            fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED;
         }
 
-        graph << target_hint << InputLayer(TensorDescriptor(TensorShape(299U, 299U, 3U, 1U), DataType::F32),
-                                           get_input_accessor(image, std::move(preprocessor), false))
+        graph << target_hint
+              << fast_math_hint
+              << InputLayer(TensorDescriptor(TensorShape(299U, 299U, 3U, 1U), DataType::F32),
+                            get_input_accessor(image, std::move(preprocessor), false))
               // Conv2d_1a_3x3
               << ConvolutionLayer(3U, 3U, 32U,
                                   get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_weights.npy"),
@@ -737,7 +749,7 @@
 /** Main program for Inception V4
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
  */
 int main(int argc, char **argv)
 {
diff --git a/examples/graph_lenet.cpp b/examples/graph_lenet.cpp
index ea0916b..92be2d4 100644
--- a/examples/graph_lenet.cpp
+++ b/examples/graph_lenet.cpp
@@ -36,7 +36,7 @@
 /** Example demonstrating how to implement LeNet's network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] batches )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] batches, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
  */
 class GraphLenetExample : public Example
 {
@@ -50,34 +50,45 @@
         const int target      = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
         Target    target_hint = set_target_hint(target);
 
+        FastMathHint fast_math_hint = FastMathHint::DISABLED;
+
         // Parse arguments
         if(argc < 2)
         {
             // Print help
-            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [batches]\n\n";
+            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [batches] [fast_math_hint]\n\n";
             std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 2)
         {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [batches]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [batches] [fast_math_hint]\n\n";
             std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 3)
         {
             //Do something with argv[1]
             data_path = argv[2];
-            std::cout << "Usage: " << argv[0] << " [path_to_data] [batches]\n\n";
+            std::cout << "Usage: " << argv[0] << " [path_to_data] [batches] [fast_math_hint]\n\n";
             std::cout << "No number of batches where specified, thus will use the default : " << batches << "\n\n";
         }
+        else if(argc == 4)
+        {
+            data_path = argv[2];
+            batches   = std::strtol(argv[3], nullptr, 0);
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [fast_math_hint]\n\n";
+            std::cout << "No fast math info provided: disabling fast math\n\n";
+        }
         else
         {
             //Do something with argv[1] and argv[2]
-            data_path = argv[2];
-            batches   = std::strtol(argv[3], nullptr, 0);
+            data_path      = argv[2];
+            batches        = std::strtol(argv[3], nullptr, 0);
+            fast_math_hint = (std::strtol(argv[4], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED;
         }
 
         //conv1 << pool1 << conv2 << pool2 << fc1 << act1 << fc2 << smx
         graph << target_hint
+              << fast_math_hint
               << InputLayer(TensorDescriptor(TensorShape(28U, 28U, 1U, batches), DataType::F32), get_input_accessor(""))
               << ConvolutionLayer(
                   5U, 5U, 20U,
@@ -125,7 +136,7 @@
 /** Main program for LeNet
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] batches )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] batches, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
  */
 int main(int argc, char **argv)
 {
diff --git a/examples/graph_mobilenet.cpp b/examples/graph_mobilenet.cpp
index 813c0bf..7bfc680 100644
--- a/examples/graph_mobilenet.cpp
+++ b/examples/graph_mobilenet.cpp
@@ -35,7 +35,7 @@
 /** Example demonstrating how to implement MobileNet's network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] data layout, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
  */
 class GraphMobilenetExample : public Example
 {
@@ -54,6 +54,7 @@
         Target                     target_hint                = set_target_hint(target);
         ConvolutionMethod          convolution_hint           = ConvolutionMethod::GEMM;
         DepthwiseConvolutionMethod depthwise_convolution_hint = DepthwiseConvolutionMethod::OPTIMIZED_3x3;
+        FastMathHint               fast_math_hint             = FastMathHint::DISABLED;
 
         // Set model to execute. 0 (MobileNetV1_1.0_224), 1 (MobileNetV1_0.75_160)
         int model_id = (argc > 2) ? std::strtol(argv[2], nullptr, 10) : 0;
@@ -72,33 +73,33 @@
         if(argc < 2)
         {
             // Print help
-            std::cout << "Usage: " << argv[0] << " [target] [model] [layout] [path_to_data] [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " [target] [model] [layout] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No model ID provided: using MobileNetV1_1.0_224\n\n";
             std::cout << "No data layout provided: using NCHW\n\n";
             std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 2)
         {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [model] [layout] [path_to_data] [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [model] [layout] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No model ID provided: using MobileNetV1_1.0_224\n\n";
             std::cout << "No data layout provided: using NCHW\n\n";
             std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 3)
         {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [layout] [path_to_data] [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [layout] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No data layout provided: using NCHW\n\n";
             std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 4)
         {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [path_to_data] [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 5)
         {
             data_path = argv[4];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No image provided: using random values\n\n";
             std::cout << "No text file with labels provided: skipping output accessor\n\n";
         }
@@ -106,14 +107,23 @@
         {
             data_path = argv[4];
             image     = argv[5];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n";
             std::cout << "No text file with labels provided: skipping output accessor\n\n";
         }
-        else
+        else if(argc == 7)
         {
             data_path = argv[4];
             image     = argv[5];
             label     = argv[6];
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n";
+            std::cout << "No fast math info provided: disabling fast math\n\n";
+        }
+        else
+        {
+            data_path      = argv[4];
+            image          = argv[5];
+            label          = argv[6];
+            fast_math_hint = (std::strtol(argv[7], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED;
         }
 
         // Add model path to data path
@@ -125,6 +135,7 @@
         graph << target_hint
               << convolution_hint
               << depthwise_convolution_hint
+              << fast_math_hint
               << InputLayer(input_descriptor,
                             get_input_accessor(image, std::move(preprocessor), false))
               << ConvolutionLayer(
@@ -225,7 +236,9 @@
  *                             [optional] Model ID (0 = MobileNetV1_1.0_224, 1 = MobileNetV1_0.75_160),
  *                             [optional] Path to the weights folder,
  *                             [optional] image,
- *                             [optional] labels )
+ *                             [optional] labels,
+ *                             [optional] data layout,
+ *                             [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
  */
 int main(int argc, char **argv)
 {
diff --git a/examples/graph_mobilenet_qasymm8.cpp b/examples/graph_mobilenet_qasymm8.cpp
index 7edd182..2801209 100644
--- a/examples/graph_mobilenet_qasymm8.cpp
+++ b/examples/graph_mobilenet_qasymm8.cpp
@@ -36,7 +36,7 @@
 /** Example demonstrating how to implement QASYMM8 MobileNet's network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] npy_input, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] npy_input, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
  */
 class GraphMobileNetQASYMM8Example : public Example
 {
@@ -92,37 +92,48 @@
         };
 
         // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON
-        const int target      = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
-        Target    target_hint = set_target_hint(target);
+        const int    target         = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
+        Target       target_hint    = set_target_hint(target);
+        FastMathHint fast_math_hint = FastMathHint::DISABLED;
 
         // Parse arguments
         if(argc < 2)
         {
             // Print help
-            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [npy_input] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [npy_input] [labels] [fast_math_hint]\n\n";
             std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 2)
         {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [npy_input] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [npy_input] [labels] [fast_math_hint]\n\n";
             std::cout << "No input provided: using random values\n\n";
         }
         else if(argc == 4)
         {
             data_path = argv[2];
             input     = argv[3];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n";
             std::cout << "No text file with labels provided: skipping output accessor\n\n";
         }
-        else
+        else if(argc == 5)
         {
             data_path = argv[2];
             input     = argv[3];
             label     = argv[4];
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n";
+            std::cout << "No fast math info provided: disabling fast math\n\n";
+        }
+        else
+        {
+            data_path      = argv[2];
+            input          = argv[3];
+            label          = argv[4];
+            fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED;
         }
 
         graph << target_hint
               << DepthwiseConvolutionMethod::OPTIMIZED_3x3 // FIXME(COMPMID-1073): Add heuristics to automatically call the optimized 3x3 method
+              << fast_math_hint
               << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::QASYMM8, in_quant_info),
                             get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/" + input))
               << ConvolutionLayer(
@@ -220,7 +231,7 @@
 /** Main program for MobileNetQASYMM8
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Path to the weights folder, [optional] npy_input, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Path to the weights folder, [optional] npy_input, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
  */
 int main(int argc, char **argv)
 {
diff --git a/examples/graph_resnet50.cpp b/examples/graph_resnet50.cpp
index 18a028d..6fac372 100644
--- a/examples/graph_resnet50.cpp
+++ b/examples/graph_resnet50.cpp
@@ -35,7 +35,7 @@
 /** Example demonstrating how to implement Microsoft's ResNet50 network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
  */
 class GraphResNet50Example : public Example
 {
@@ -55,40 +55,51 @@
         const int         target           = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
         Target            target_hint      = set_target_hint(target);
         ConvolutionMethod convolution_hint = target_hint == Target::NEON ? ConvolutionMethod::GEMM : ConvolutionMethod::DEFAULT;
+        FastMathHint      fast_math_hint   = FastMathHint::DISABLED;
 
         // Parse arguments
         if(argc < 2)
         {
             // Print help
-            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 2)
         {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 3)
         {
             data_path = argv[2];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No image provided: using random values\n\n";
         }
         else if(argc == 4)
         {
             data_path = argv[2];
             image     = argv[3];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n";
             std::cout << "No text file with labels provided: skipping output accessor\n\n";
         }
-        else
+        else if(argc == 5)
         {
             data_path = argv[2];
             image     = argv[3];
             label     = argv[4];
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n";
+            std::cout << "No fast math info provided: disabling fast math\n\n";
+        }
+        else
+        {
+            data_path      = argv[2];
+            image          = argv[3];
+            label          = argv[4];
+            fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED;
         }
 
         graph << target_hint
+              << fast_math_hint
               << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::F32),
                             get_input_accessor(image, std::move(preprocessor), false /* Do not convert to BGR */))
               << ConvolutionLayer(
@@ -243,7 +254,7 @@
 /** Main program for ResNet50
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
  */
 int main(int argc, char **argv)
 {
diff --git a/examples/graph_squeezenet.cpp b/examples/graph_squeezenet.cpp
index 8ed43f7..2627c96 100644
--- a/examples/graph_squeezenet.cpp
+++ b/examples/graph_squeezenet.cpp
@@ -37,7 +37,7 @@
 /** Example demonstrating how to implement Squeezenet's network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
  */
 class GraphSqueezenetExample : public Example
 {
@@ -56,40 +56,51 @@
         const int         target           = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
         Target            target_hint      = set_target_hint(target);
         ConvolutionMethod convolution_hint = target_hint == Target::NEON ? ConvolutionMethod::GEMM : ConvolutionMethod::DEFAULT;
+        FastMathHint      fast_math_hint   = FastMathHint::DISABLED;
 
         // Parse arguments
         if(argc < 2)
         {
             // Print help
-            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 2)
         {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 3)
         {
             data_path = argv[2];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No image provided: using random values\n\n";
         }
         else if(argc == 4)
         {
             data_path = argv[2];
             image     = argv[3];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n";
             std::cout << "No text file with labels provided: skipping output accessor\n\n";
         }
-        else
+        else if(argc == 5)
         {
             data_path = argv[2];
             image     = argv[3];
             label     = argv[4];
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n";
+            std::cout << "No fast math info provided: disabling fast math\n\n";
+        }
+        else
+        {
+            data_path      = argv[2];
+            image          = argv[3];
+            label          = argv[4];
+            fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED;
         }
 
         graph << target_hint
+              << fast_math_hint
               << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::F32),
                             get_input_accessor(image, std::move(preprocessor)))
               << ConvolutionLayer(
@@ -209,7 +220,7 @@
 /** Main program for Squeezenet v1.0
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
  */
 int main(int argc, char **argv)
 {
diff --git a/examples/graph_squeezenet_v1_1.cpp b/examples/graph_squeezenet_v1_1.cpp
index 529f4fe..c4a5433 100644
--- a/examples/graph_squeezenet_v1_1.cpp
+++ b/examples/graph_squeezenet_v1_1.cpp
@@ -40,7 +40,7 @@
 /** Example demonstrating how to implement Squeezenet's v1.1 network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
  */
 class GraphSqueezenet_v1_1Example : public Example
 {
@@ -59,40 +59,51 @@
         const int         target           = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
         Target            target_hint      = set_target_hint(target);
         ConvolutionMethod convolution_hint = target_hint == Target::NEON ? ConvolutionMethod::GEMM : ConvolutionMethod::DEFAULT;
+        FastMathHint      fast_math_hint   = FastMathHint::DISABLED;
 
         // Parse arguments
         if(argc < 2)
         {
             // Print help
-            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 2)
         {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 3)
         {
             data_path = argv[2];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No image provided: using random values\n\n";
         }
         else if(argc == 4)
         {
             data_path = argv[2];
             image     = argv[3];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n";
             std::cout << "No text file with labels provided: skipping output accessor\n\n";
         }
-        else
+        else if(argc == 5)
         {
             data_path = argv[2];
             image     = argv[3];
             label     = argv[4];
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n";
+            std::cout << "No fast math info provided: disabling fast math\n\n";
+        }
+        else
+        {
+            data_path      = argv[2];
+            image          = argv[3];
+            label          = argv[4];
+            fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED;
         }
 
         graph << target_hint
+              << fast_math_hint
               << InputLayer(TensorDescriptor(TensorShape(227U, 227U, 3U, 1U), DataType::F32),
                             get_input_accessor(image, std::move(preprocessor)))
               << ConvolutionMethod::DIRECT
@@ -214,7 +225,7 @@
 /** Main program for Squeezenet v1.1
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
  */
 int main(int argc, char **argv)
 {
diff --git a/examples/graph_vgg16.cpp b/examples/graph_vgg16.cpp
index 44b4c4c..6db4e38 100644
--- a/examples/graph_vgg16.cpp
+++ b/examples/graph_vgg16.cpp
@@ -35,7 +35,7 @@
 /** Example demonstrating how to implement VGG16's network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
  */
 class GraphVGG16Example : public Example
 {
@@ -57,40 +57,51 @@
 
         ConvolutionMethod first_convolution3x3_hint = is_opencl ? ConvolutionMethod::DIRECT : ConvolutionMethod::GEMM;
         ConvolutionMethod convolution3x3_hint       = ConvolutionMethod::DEFAULT;
+        FastMathHint      fast_math_hint            = FastMathHint::DISABLED;
 
         // Parse arguments
         if(argc < 2)
         {
             // Print help
-            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 2)
         {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 3)
         {
             data_path = argv[2];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No image provided: using random values\n\n";
         }
         else if(argc == 4)
         {
             data_path = argv[2];
             image     = argv[3];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n";
             std::cout << "No text file with labels provided: skipping output accessor\n\n";
         }
-        else
+        else if(argc == 5)
         {
             data_path = argv[2];
             image     = argv[3];
             label     = argv[4];
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n";
+            std::cout << "No fast math info provided: disabling fast math\n\n";
+        }
+        else
+        {
+            data_path      = argv[2];
+            image          = argv[3];
+            label          = argv[4];
+            fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED;
         }
 
         graph << target_hint
+              << fast_math_hint
               << first_convolution3x3_hint
               << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::F32),
                             get_input_accessor(image, std::move(preprocessor)))
@@ -246,7 +257,7 @@
 /** Main program for VGG16
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
  */
 int main(int argc, char **argv)
 {
diff --git a/examples/graph_vgg19.cpp b/examples/graph_vgg19.cpp
index 229112b..5a281ea 100644
--- a/examples/graph_vgg19.cpp
+++ b/examples/graph_vgg19.cpp
@@ -35,7 +35,7 @@
 /** Example demonstrating how to implement VGG19's network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
  */
 class GraphVGG19Example : public Example
 {
@@ -51,9 +51,10 @@
         std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb);
 
         // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON
-        const int  target      = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
-        Target     target_hint = set_target_hint(target);
-        const bool is_opencl   = target_hint == Target::CL;
+        const int    target         = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
+        Target       target_hint    = set_target_hint(target);
+        FastMathHint fast_math_hint = FastMathHint::DISABLED;
+        const bool   is_opencl      = target_hint == Target::CL;
 
         ConvolutionMethod first_convolution3x3_hint = is_opencl ? ConvolutionMethod::DIRECT : ConvolutionMethod::GEMM;
         ConvolutionMethod convolution3x3_hint       = ConvolutionMethod::DEFAULT;
@@ -62,36 +63,46 @@
         if(argc < 2)
         {
             // Print help
-            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 2)
         {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No data folder provided: using random values\n\n";
         }
         else if(argc == 3)
         {
             data_path = argv[2];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n";
             std::cout << "No image provided: using random values\n\n";
         }
         else if(argc == 4)
         {
             data_path = argv[2];
             image     = argv[3];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels]\n\n";
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n";
             std::cout << "No text file with labels provided: skipping output accessor\n\n";
         }
-        else
+        else if(argc == 5)
         {
             data_path = argv[2];
             image     = argv[3];
             label     = argv[4];
+            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n";
+            std::cout << "No fast math info provided: disabling fast math\n\n";
+        }
+        else
+        {
+            data_path      = argv[2];
+            image          = argv[3];
+            label          = argv[4];
+            fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED;
         }
 
         graph << target_hint
               << first_convolution3x3_hint
+              << fast_math_hint
               << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::F32),
                             get_input_accessor(image, std::move(preprocessor)))
               // Layer 1
@@ -259,7 +270,7 @@
 /** Main program for VGG19
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
  */
 int main(int argc, char **argv)
 {
diff --git a/src/graph/GraphBuilder.cpp b/src/graph/GraphBuilder.cpp
index 56b31c7..df94d0b 100644
--- a/src/graph/GraphBuilder.cpp
+++ b/src/graph/GraphBuilder.cpp
@@ -81,7 +81,7 @@
 }
 
 NodeID create_grouped_convolution(Graph &g, NodeParams &params, NodeIdxPair input, NodeID weights, NodeID bias,
-                                  PadStrideInfo conv_info, ConvolutionMethod method, unsigned int num_groups)
+                                  PadStrideInfo conv_info, ConvolutionMethod method, FastMathHint fast_math_hint, unsigned int num_groups)
 {
     bool has_bias = (bias != EmptyNodeID);
 
@@ -102,7 +102,7 @@
     std::vector<NodeIdxPair> convolution_outputs;
     for(unsigned int i = 0; i < num_groups; ++i)
     {
-        NodeID conv_nid = g.add_node<ConvolutionLayerNode>(conv_info, method);
+        NodeID conv_nid = g.add_node<ConvolutionLayerNode>(conv_info, method, fast_math_hint);
         g.add_connection(input_split, i, conv_nid, 0);
         g.add_connection(weights_split, i, conv_nid, 1);
         if(has_bias)
@@ -205,7 +205,7 @@
 
 NodeID GraphBuilder::add_convolution_node(Graph &g, NodeParams params, NodeIdxPair input,
                                           Size2D kernel_spatial_extend, unsigned int depth, PadStrideInfo conv_info,
-                                          unsigned int num_groups, ConvolutionMethod method,
+                                          unsigned int num_groups, ConvolutionMethod method, FastMathHint fast_math_hint,
                                           ITensorAccessorUPtr weights_accessor, ITensorAccessorUPtr bias_accessor,
                                           const QuantizationInfo weights_quant_info,
                                           const QuantizationInfo out_quant_info)
@@ -245,7 +245,7 @@
     if(num_groups == 1)
     {
         // Create convolution node and connect
-        NodeID conv_nid = g.add_node<ConvolutionLayerNode>(conv_info, method, out_quant_info);
+        NodeID conv_nid = g.add_node<ConvolutionLayerNode>(conv_info, method, fast_math_hint, out_quant_info);
         g.add_connection(input.node_id, input.index, conv_nid, 0);
         g.add_connection(w_nid, 0, conv_nid, 1);
         if(has_bias)
@@ -258,7 +258,7 @@
     }
     else
     {
-        return create_grouped_convolution(g, params, input, w_nid, b_nid, conv_info, method, num_groups);
+        return create_grouped_convolution(g, params, input, w_nid, b_nid, conv_info, method, fast_math_hint, num_groups);
     }
 }
 
diff --git a/src/graph/backends/CL/CLFunctionsFactory.cpp b/src/graph/backends/CL/CLFunctionsFactory.cpp
index ece6364..4626cb5 100644
--- a/src/graph/backends/CL/CLFunctionsFactory.cpp
+++ b/src/graph/backends/CL/CLFunctionsFactory.cpp
@@ -166,6 +166,7 @@
 
     const PadStrideInfo     conv_info      = node.convolution_info();
     const ConvolutionMethod conv_algorithm = node.convolution_method();
+    const bool              fast_math      = node.fast_math_hint() == FastMathHint::ENABLED;
 
     // Create and configure function (we assume that functions have been validated before creation)
     std::shared_ptr<IMemoryManager> mm = get_memory_manager(ctx, Target::CL);
@@ -175,7 +176,7 @@
     if(conv_algorithm == ConvolutionMethod::WINOGRAD)
     {
         std::tie(func, func_name) = create_named_memory_managed_function<CLWinogradConvolutionLayer>(
-                                        std::string("CLWinogradConvolutionLayer"), mm, input, weights, biases, output, conv_info);
+                                        std::string("CLWinogradConvolutionLayer"), mm, input, weights, biases, output, conv_info, ActivationLayerInfo(), fast_math);
     }
     else if(conv_algorithm == ConvolutionMethod::DIRECT)
     {
@@ -190,7 +191,7 @@
     else
     {
         std::tie(func, func_name) = create_named_memory_managed_function<CLConvolutionLayer>(std::string("CLConvolutionLayer"), mm,
-                                                                                             input, weights, biases, output, conv_info);
+                                                                                             input, weights, biases, output, conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), fast_math);
     }
 
     // Log info
diff --git a/src/graph/nodes/ConvolutionLayerNode.cpp b/src/graph/nodes/ConvolutionLayerNode.cpp
index eaf1f7f..6c31a6b 100644
--- a/src/graph/nodes/ConvolutionLayerNode.cpp
+++ b/src/graph/nodes/ConvolutionLayerNode.cpp
@@ -32,8 +32,8 @@
 {
 namespace graph
 {
-ConvolutionLayerNode::ConvolutionLayerNode(PadStrideInfo info, ConvolutionMethod method, QuantizationInfo out_quant_info)
-    : _info(std::move(info)), _method(method), _out_quant_info(out_quant_info)
+ConvolutionLayerNode::ConvolutionLayerNode(PadStrideInfo info, ConvolutionMethod method, FastMathHint fast_math_hint, QuantizationInfo out_quant_info)
+    : _info(std::move(info)), _method(method), _fast_math_hint(fast_math_hint), _out_quant_info(out_quant_info)
 {
     _input_edges.resize(3, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
@@ -49,6 +49,16 @@
     return _method;
 }
 
+void ConvolutionLayerNode::set_fast_math_hint(FastMathHint hint)
+{
+    _fast_math_hint = hint;
+}
+
+FastMathHint ConvolutionLayerNode::fast_math_hint() const
+{
+    return _fast_math_hint;
+}
+
 PadStrideInfo ConvolutionLayerNode::convolution_info() const
 {
     return _info;