COMPMID-1451: Fuse activation in DepthwiseConvolution. Change-Id: Id964d9068e18aaa13ab8adcbf7a9375b034ea6c3 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/154651 Tested-by: bsgcomp <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>

commit: 60e98253f1e3df1723e7b8f4c996b544aa7c7205 [log] [tgz]
author: Georgios Pinitas <georgios.pinitas@arm.com> Mon Oct 22 16:17:20 2018 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> Fri Nov 02 16:55:45 2018 +0000
tree: 45ca11d6fb0a16974fc8681bc7161a6ad2b1af2e
parent: c04a0e8f93c620d05444251e1ae55dcf8c660a1b [diff]
diff --git a/src/graph/backends/GLES/GCFunctionsFactory.cpp b/src/graph/backends/GLES/GCFunctionsFactory.cpp
index 02a0567..7df659e 100644
--- a/src/graph/backends/GLES/GCFunctionsFactory.cpp
+++ b/src/graph/backends/GLES/GCFunctionsFactory.cpp

@@ -171,8 +171,10 @@
         biases->info()->set_data_type(DataType::S32);
     }
 
-    const PadStrideInfo              conv_info     = node.convolution_info();
-    const DepthwiseConvolutionMethod dwc_algorithm = node.depthwise_convolution_method();
+    const PadStrideInfo              conv_info        = node.convolution_info();
+    const DepthwiseConvolutionMethod dwc_algorithm    = node.depthwise_convolution_method();
+    const unsigned int               depth_multiplier = 1;
+    const ActivationLayerInfo        fused_act        = node.fused_activation();
 
     // Create and configure function (we assume that functions have been validated before creation)
     std::unique_ptr<IFunction> func;
@@ -181,7 +183,7 @@
     {
         std::tie(func, func_name) = create_named_function<GCDepthwiseConvolutionLayerFunctions::DepthwiseConvolutionLayer3x3>(
                                         std::string("DepthwiseConvolutionLayer3x3"),
-                                        input, weights, biases, output, conv_info);
+                                        input, weights, biases, output, conv_info, depth_multiplier, fused_act);
     }
     else
     {
@@ -197,6 +199,7 @@
                                << " Input shape: " << input->info()->tensor_shape()
                                << " Weights shape: " << weights->info()->tensor_shape()
                                << " Output shape: " << output->info()->tensor_shape()
+                               << (fused_act.enabled() ? " " + to_string(fused_act.activation()) : "")
                                << std::endl);
     return func;
 }

diff --git a/src/graph/mutators/NodeFusionMutator.cpp b/src/graph/mutators/NodeFusionMutator.cpp
index 7e66ce0..98c3a56 100644
--- a/src/graph/mutators/NodeFusionMutator.cpp
+++ b/src/graph/mutators/NodeFusionMutator.cpp

@@ -39,12 +39,14 @@
 namespace detail
 {
 template <typename N>
-void fuse_node_with_activation(Graph &g, const std::set<Activation> &supported_fused_activations)
+void fuse_node_with_activation(Graph                              &g,
+                               const std::set<Activation>         &supported_fused_activations,
+                               std::function<bool(INode &)> const &prec)
 {
     // Not interested in the order of nodes
     for(auto &node : g.nodes())
     {
-        // Check if the node is batch norm and not a branching node
+        // Check if the node is of type N and not a branching node
         if(node && node->type() == N::node_type && node->output_edges().size() == 1)
         {
             auto output_edge_id = *node->output_edges().begin();
@@ -57,6 +59,11 @@
 
                 ARM_COMPUTE_ERROR_ON(act_node->output(0) == nullptr || n_node->output(0) == nullptr);
 
+                // Check given precondition
+                if(!prec(*n_node))
+                {
+                    continue;
+                }
                 // Check if activation is supported for fusion
                 if(supported_fused_activations.count(act_node->activation_info().activation()) == 0)
                 {
@@ -110,8 +117,21 @@
     // Supported activations when fusing
     const std::set<Activation> supported_fused_activations = { Activation::RELU, Activation::BOUNDED_RELU, Activation::LU_BOUNDED_RELU };
 
-    detail::fuse_node_with_activation<BatchNormalizationLayerNode>(g, supported_fused_activations);
-    detail::fuse_node_with_activation<ConvolutionLayerNode>(g, supported_fused_activations);
+    // Preconditions
+    auto empty_prec = [](INode & n)
+    {
+        return true;
+    };
+    auto qs8_prec = [](INode & n)
+    {
+        ARM_COMPUTE_ERROR_ON(n.output(0) == nullptr);
+        return n.output(0)->desc().data_type == DataType::QASYMM8;
+    };
+
+    // Fusion mutations
+    detail::fuse_node_with_activation<BatchNormalizationLayerNode>(g, supported_fused_activations, empty_prec);
+    detail::fuse_node_with_activation<ConvolutionLayerNode>(g, supported_fused_activations, empty_prec);
+    detail::fuse_node_with_activation<DepthwiseConvolutionLayerNode>(g, supported_fused_activations, qs8_prec);
 }
 } // namespace graph
 } // namespace arm_compute

diff --git a/src/graph/nodes/DepthwiseConvolutionLayerNode.cpp b/src/graph/nodes/DepthwiseConvolutionLayerNode.cpp
index 1a6f8d3..02d1632 100644
--- a/src/graph/nodes/DepthwiseConvolutionLayerNode.cpp
+++ b/src/graph/nodes/DepthwiseConvolutionLayerNode.cpp

@@ -33,7 +33,7 @@
 namespace graph
 {
 DepthwiseConvolutionLayerNode::DepthwiseConvolutionLayerNode(PadStrideInfo info, DepthwiseConvolutionMethod method)
-    : _info(std::move(info)), _method(method)
+    : _info(std::move(info)), _method(method), _fused_activation()
 {
     _input_edges.resize(3, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
@@ -54,6 +54,16 @@
     return _info;
 }
 
+ActivationLayerInfo DepthwiseConvolutionLayerNode::fused_activation() const
+{
+    return _fused_activation;
+}
+
+void DepthwiseConvolutionLayerNode::set_fused_activation(ActivationLayerInfo fused_activation)
+{
+    _fused_activation = fused_activation;
+}
+
 TensorDescriptor DepthwiseConvolutionLayerNode::compute_output_descriptor(const TensorDescriptor &input_descriptor,
                                                                           const TensorDescriptor &weights_descriptor,
                                                                           const PadStrideInfo    &info)
@@ -100,7 +110,7 @@
 
 NodeType DepthwiseConvolutionLayerNode::type() const
 {
-    return NodeType::DepthwiseConvolutionLayer;
+    return DepthwiseConvolutionLayerNode::node_type;
 }
 
 void DepthwiseConvolutionLayerNode::accept(INodeVisitor &v)
commit	60e98253f1e3df1723e7b8f4c996b544aa7c7205	[log] [tgz]
author	Georgios Pinitas <georgios.pinitas@arm.com>	Mon Oct 22 16:17:20 2018 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	Fri Nov 02 16:55:45 2018 +0000
tree	45ca11d6fb0a16974fc8681bc7161a6ad2b1af2e
parent	c04a0e8f93c620d05444251e1ae55dcf8c660a1b [diff]