Enable fast_math in CpuFullyConnected ONCPUML-529 * Add support for passing fast_math for fullyconnected layers via fc_info. * Add support for passing fast_math to run ACL benchmark graphs. * Add validation test and accuracy tests (updated fixtures). Note: abs and rel. tolerance for fast math mode are set based on experimental data. Signed-off-by: cfRod <crefeda.rodrigues@arm.com> change-Id: Ib107d6264d3ae5e36555334f39a13e678f8618df Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6521 Reviewed-by: SiCong Li <sicong.li@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>

commit: f2c022e54ae65467cca4bc698b5b94e5b3c62c47 [log] [tgz]
author: cfRod <crefeda.rodrigues@arm.com> Fri Nov 05 11:29:53 2021 +0000
committer: Gian Marco Iodice <gianmarco.iodice@arm.com> Tue Nov 09 10:19:03 2021 +0000
tree: dcf3f38e85ff9761573d2e7e37cc746291f828af
parent: f349655cf75a686ed97e90d193a45602d6e82e6f [diff]
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index bff672c..47df44c 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h

@@ -1578,6 +1578,7 @@
     bool       transpose_weights{ true };                  /**<  Transpose weights if true. */
     bool       are_weights_reshaped{ false };              /**<  Reshape the weights tensor if false. */
     bool       retain_internal_weights{ false };           /**<  Retain internal reshaped weights. */
+    bool       enable_fast_math{ false };                  /**<  Enable fast math computation. */
     /* Other parameters */
     bool fp_mixed_precision{ false }; /**<  Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. */
 
@@ -2107,6 +2108,14 @@
     {
         return _fast_math;
     };
+    /** Set fast math flag
+     *
+     * @param[in] fast_math Flag to set
+     */
+    void set_fast_math(bool fast_math)
+    {
+        _fast_math = fast_math;
+    }
     /** Flag which specifies whether to broadcast the shape of the bias tensor.
      *
      * @return True if the shape of the bias tensor is to be broadcasted.

diff --git a/arm_compute/graph/GraphBuilder.h b/arm_compute/graph/GraphBuilder.h
index 14ad057..cb88c0e 100644
--- a/arm_compute/graph/GraphBuilder.h
+++ b/arm_compute/graph/GraphBuilder.h

@@ -295,13 +295,15 @@
      * @param[in] bias_nid       (Optional) Node ID of the bias node data. Defaults to EmptyNodeID
      * @param[in] fc_info        (Optional) Fully connected layer metadata
      * @param[in] out_quant_info (Optional) Output quantization info
+     * @param[in] fast_math_hint (Optional) Fast math hint
      *
      * @return Node ID of the created node, EmptyNodeID in case of error
      */
     static NodeID add_fully_connected_layer(Graph &g, NodeParams params, NodeIdxPair input, unsigned int num_outputs,
                                             NodeID weights_nid, NodeID bias_nid = EmptyNodeID,
                                             const FullyConnectedLayerInfo fc_info        = FullyConnectedLayerInfo(),
-                                            const QuantizationInfo       &out_quant_info = QuantizationInfo());
+                                            const QuantizationInfo       &out_quant_info = QuantizationInfo(),
+                                            FastMathHint                  fast_math_hint = FastMathHint::Disabled);
     /** Adds a fully connected layer node to the graph
      *
      * @param[in] g                  Graph to add the layer to
@@ -313,6 +315,7 @@
      * @param[in] fc_info            (Optional) Fully connected layer metadata
      * @param[in] weights_quant_info (Optional) Weights quantization info
      * @param[in] out_quant_info     (Optional) Output quantization info
+     * @param[in] fast_math_hint     (Optional) Fast math hint
      *
      * @return Node ID of the created node, EmptyNodeID in case of error
      */
@@ -320,7 +323,8 @@
                                             ITensorAccessorUPtr weights_accessor = nullptr, ITensorAccessorUPtr bias_accessor = nullptr,
                                             const FullyConnectedLayerInfo fc_info            = FullyConnectedLayerInfo(),
                                             const QuantizationInfo       &weights_quant_info = QuantizationInfo(),
-                                            const QuantizationInfo       &out_quant_info     = QuantizationInfo());
+                                            const QuantizationInfo       &out_quant_info     = QuantizationInfo(),
+                                            FastMathHint                  fast_math_hint     = FastMathHint::Disabled);
     /** Adds a generate proposals layer node to the graph
      *
      * @param[in] g       Graph to add the layer to

diff --git a/arm_compute/graph/backends/FunctionHelpers.h b/arm_compute/graph/backends/FunctionHelpers.h
index 6aec3f6..55af056 100644
--- a/arm_compute/graph/backends/FunctionHelpers.h
+++ b/arm_compute/graph/backends/FunctionHelpers.h

@@ -1096,7 +1096,8 @@
     typename TargetInfo::TensorType *weights = get_backing_tensor<TargetInfo>(node.input(1));
     typename TargetInfo::TensorType *biases  = get_backing_tensor<TargetInfo>(node.input(2));
     typename TargetInfo::TensorType *output  = get_backing_tensor<TargetInfo>(node.output(0));
-    const FullyConnectedLayerInfo    fc_info = node.info();
+    FullyConnectedLayerInfo          fc_info = node.info();
+    fc_info.enable_fast_math                 = (node.fast_math_hint() == FastMathHint::Enabled);
 
     ARM_COMPUTE_ERROR_ON(input == nullptr);
     ARM_COMPUTE_ERROR_ON(weights == nullptr);

diff --git a/arm_compute/graph/frontend/Layers.h b/arm_compute/graph/frontend/Layers.h
index bf68b26..fe0539b 100644
--- a/arm_compute/graph/frontend/Layers.h
+++ b/arm_compute/graph/frontend/Layers.h

@@ -776,7 +776,7 @@
         {
             return GraphBuilder::add_fully_connected_layer(s.graph(), common_params, input, _num_outputs,
                                                            std::move(_weights), std::move(_bias), _fc_info,
-                                                           std::move(_weights_quant_info), std::move(_out_quant_info));
+                                                           std::move(_weights_quant_info), std::move(_out_quant_info), s.hints().fast_math_hint);
         }
         else
         {
@@ -785,7 +785,7 @@
             NodeID bias_nid = (_bias_ss == nullptr) ? EmptyNodeID : _bias_ss->tail_node();
             return GraphBuilder::add_fully_connected_layer(s.graph(), common_params, input, _num_outputs,
                                                            _weights_ss->tail_node(), bias_nid, _fc_info,
-                                                           std::move(_out_quant_info));
+                                                           std::move(_out_quant_info), s.hints().fast_math_hint);
         }
     }
 

diff --git a/arm_compute/graph/nodes/FullyConnectedLayerNode.h b/arm_compute/graph/nodes/FullyConnectedLayerNode.h
index a7712f4..9ade62b 100644
--- a/arm_compute/graph/nodes/FullyConnectedLayerNode.h
+++ b/arm_compute/graph/nodes/FullyConnectedLayerNode.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -39,10 +39,22 @@
      * @param[in] num_outputs    Number of neurons in the layer
      * @param[in] out_quant_info (Optional) Output quantization info
      * @param[in] fc_info        (Optional) Additional information about the fully connected layer
+     * @param[in] fast_math_hint (Optional) Fast math hint
      */
     FullyConnectedLayerNode(unsigned int            num_outputs,
                             QuantizationInfo        out_quant_info = QuantizationInfo(),
-                            FullyConnectedLayerInfo fc_info        = FullyConnectedLayerInfo());
+                            FullyConnectedLayerInfo fc_info        = FullyConnectedLayerInfo(),
+                            FastMathHint            fast_math_hint = FastMathHint::Disabled);
+    /** Sets the fast math fast hint
+     *
+     * @param[in] hint Hint to use for fullyconnected layer
+     */
+    void set_fast_math_hint(FastMathHint hint);
+    /** Fast math hint accessor
+     *
+     * @return Fast math hint to be used by the node
+     */
+    FastMathHint fast_math_hint() const;
     /** Sets fused activation
      *
      * @param[in] fused_activation Fused activation to set
@@ -94,6 +106,7 @@
     unsigned int            _num_outputs;
     QuantizationInfo        _out_quant_info;
     FullyConnectedLayerInfo _info;
+    FastMathHint            _fast_math_hint;
 };
 } // namespace graph
 } // namespace arm_compute
commit	f2c022e54ae65467cca4bc698b5b94e5b3c62c47	[log] [tgz]
author	cfRod <crefeda.rodrigues@arm.com>	Fri Nov 05 11:29:53 2021 +0000
committer	Gian Marco Iodice <gianmarco.iodice@arm.com>	Tue Nov 09 10:19:03 2021 +0000
tree	dcf3f38e85ff9761573d2e7e37cc746291f828af
parent	f349655cf75a686ed97e90d193a45602d6e82e6f [diff]