COMPMID-1451: Fuse activation in DepthwiseConvolution.

Change-Id: Id964d9068e18aaa13ab8adcbf7a9375b034ea6c3
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/154651
Tested-by: bsgcomp <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
diff --git a/arm_compute/graph/backends/FunctionHelpers.h b/arm_compute/graph/backends/FunctionHelpers.h
index a1cadcb..1968ec3 100644
--- a/arm_compute/graph/backends/FunctionHelpers.h
+++ b/arm_compute/graph/backends/FunctionHelpers.h
@@ -397,8 +397,10 @@
         biases->info()->set_data_type(DataType::S32);
     }
 
-    const PadStrideInfo              conv_info     = node.convolution_info();
-    const DepthwiseConvolutionMethod dwc_algorithm = node.depthwise_convolution_method();
+    const PadStrideInfo              conv_info        = node.convolution_info();
+    const DepthwiseConvolutionMethod dwc_algorithm    = node.depthwise_convolution_method();
+    const unsigned int               depth_multiplier = 1;
+    const ActivationLayerInfo        fused_act        = node.fused_activation();
 
     // Create and configure function (we assume that functions have been validated before creation)
     std::unique_ptr<IFunction> func;
@@ -407,13 +409,13 @@
     {
         std::tie(func, func_name) = create_named_function<typename DepthwiseConvolutionLayerFunctions::DepthwiseConvolutionLayer3x3>(
                                         std::string("DepthwiseConvolutionLayer3x3"),
-                                        input, weights, biases, output, conv_info);
+                                        input, weights, biases, output, conv_info, depth_multiplier, fused_act);
     }
     else
     {
         std::tie(func, func_name) = create_named_function<typename DepthwiseConvolutionLayerFunctions::GenericDepthwiseConvolutionLayer>(
                                         std::string("DepthwiseConvolutionLayer"),
-                                        input, weights, biases, output, conv_info);
+                                        input, weights, biases, output, conv_info, depth_multiplier, fused_act);
     }
 
     // Log info
@@ -431,6 +433,7 @@
                                << " Input shape: " << input->info()->tensor_shape()
                                << " Weights shape: " << weights->info()->tensor_shape()
                                << " Output shape: " << output->info()->tensor_shape()
+                               << (fused_act.enabled() ? " " + to_string(fused_act.activation()) : "")
                                << std::endl);
     return func;
 }
diff --git a/arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h b/arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h
index 1a173c5..7fa44b7 100644
--- a/arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h
+++ b/arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h
@@ -58,6 +58,16 @@
      * @return Convolution information
      */
     PadStrideInfo convolution_info() const;
+    /** Returns fused activation
+     *
+     * @return Fused activation
+     */
+    ActivationLayerInfo fused_activation() const;
+    /** Sets fused activation
+     *
+     * @param[in] fused_activation Fused activation to set
+     */
+    void set_fused_activation(ActivationLayerInfo fused_activation);
     /** Computes depthwise convolution output descriptor
      *
      * @param[in] input_descriptor   Input descriptor
@@ -76,9 +86,13 @@
     TensorDescriptor configure_output(size_t idx) const override;
     void accept(INodeVisitor &v) override;
 
+public:
+    static constexpr NodeType node_type = NodeType::DepthwiseConvolutionLayer;
+
 private:
     PadStrideInfo              _info;
     DepthwiseConvolutionMethod _method;
+    ActivationLayerInfo        _fused_activation;
 };
 } // namespace graph
 } // namespace arm_compute
diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
index 0547a6a..96a0d23 100644
--- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
@@ -35,6 +35,7 @@
 #include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
 #include "arm_compute/runtime/IFunction.h"
 
 namespace arm_compute
@@ -121,8 +122,10 @@
      * @param[out]     output           Destination tensor. Data type supported: same as @p input.
      * @param[in]      conv_info        Padding and stride information to use for the convolution.
      * @param[in]      depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+     * @param[in]      act_info         (Optional) Activation layer information in case of a fused activation.
      */
-    void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1);
+    void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+                   unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer
      *
@@ -133,10 +136,12 @@
      * @param[in] output           Destination tensor. Data type supported: same as @p input.
      * @param[in] conv_info        Padding and stride information to use for the convolution.
      * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+     * @param[in] act_info         (Optional) Activation layer information in case of a fused activation.
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
+                           unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     // Inherited methods overriden:
     void run() override;
@@ -148,6 +153,7 @@
     CLGEMMMatrixVectorMultiplyKernel          _v2mm_kernel;
     CLDepthwiseVectorToTensorKernel           _vector_to_tensor_kernel;
     CLDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
+    CLActivationLayer                         _activationlayer_function;
     CLFillBorderKernel                        _v2mm_input_fill_border;
     CLFillBorderKernel                        _v2mm_weights_fill_border;
     CLTensor                                  _input_reshaped;
@@ -156,6 +162,7 @@
     CLTensor                                  _output_reshaped;
     bool                                      _is_prepared;
     bool                                      _is_quantized;
+    bool                                      _is_activationlayer_enabled;
     const ICLTensor                          *_original_weights;
 };
 }
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.h
index c994856..5eccc4d 100644
--- a/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.h
@@ -28,6 +28,7 @@
 #include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h"
 #include "arm_compute/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCActivationLayer.h"
 #include "arm_compute/runtime/IFunction.h"
 
 namespace arm_compute
@@ -54,8 +55,10 @@
      * @param[out]     output           Destination tensor. Data type supported: same as @p input.
      * @param[in]      conv_info        Padding and stride information to use for the convolution.
      * @param[in]      depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+     * @param[in]      act_info         (Optional) Activation layer information in case of a fused activation.
      */
-    void configure(IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1);
+    void configure(IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info,
+                   unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     // Inherited methods overridden:
     void run() override final;
@@ -64,6 +67,9 @@
     std::unique_ptr<IGCKernel> _kernel;
     GCFillBorderKernel         _border_handler;
     GCTensorShiftKernel        _shift_handler;
+    GCActivationLayer          _activationlayer_function;
+
+    bool _is_activationlayer_enabled;
 };
 }
 #endif /*__ARM_COMPUTE_GCDEPTHWISECONVOLUTION_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
index b7398f6..288d513 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
@@ -35,6 +35,7 @@
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEPermute.h"
 #include "arm_compute/runtime/Tensor.h"
 
@@ -62,8 +63,10 @@
      * @param[out]     output           Destination tensor. Data type supported: same as @p input.
      * @param[in]      conv_info        Padding and stride information to use for the convolution.
      * @param[in]      depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+     * @param[in]      act_info         (Optional) Activation layer information in case of a fused activation.
      */
-    void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1);
+    void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
+                   unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer3x3
      *
@@ -74,10 +77,12 @@
      * @param[in] output           Destination tensor. Data type supported: same as @p input.
      * @param[in] conv_info        Padding and stride information to use for the convolution.
      * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+     * @param[in] act_info         (Optional) Activation layer information in case of a fused activation.
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
+                           unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     // Inherited methods overriden:
     void run() override;
@@ -89,6 +94,7 @@
     NEPermute                                 _permute_input;
     NEPermute                                 _permute_weights;
     NEPermute                                 _permute_output;
+    NEActivationLayer                         _activationlayer_function;
     Tensor                                    _accumulator;
     Tensor                                    _permuted_input;
     Tensor                                    _permuted_weights;
@@ -100,6 +106,7 @@
     bool                                      _is_nchw;
     bool                                      _is_first_run;
     bool                                      _permute;
+    bool                                      _is_activationlayer_enabled;
 };
 
 /** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernels:
@@ -132,8 +139,10 @@
      *                                  Data type supported: Same as @p input, S32 when input is QASYMM8.
      * @param[in]      conv_info        Padding and stride information to use for the convolution.
      * @param[in]      depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+     * @param[in]      act_info         (Optional) Activation layer information in case of a fused activation.
      */
-    void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1);
+    void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
+                   unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer
      *
@@ -144,10 +153,12 @@
      *                             Data type supported: Same as @p input, S32 when input is QASYMM8.
      * @param[in] conv_info        Padding and stride information to use for the convolution.
      * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+     * @param[in] act_info         (Optional) Activation layer information in case of a fused activation.
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
+                           unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     // Inherited methods overriden:
     void run() override;
@@ -164,6 +175,7 @@
     NEPermute                                 _permute_input;
     NEPermute                                 _permute_weights;
     NEPermute                                 _permute_output;
+    NEActivationLayer                         _activationlayer_function;
     Tensor                                    _input_reshaped;
     Tensor                                    _weights_reshaped;
     Tensor                                    _v2mm_output;
@@ -174,6 +186,7 @@
     bool                                      _is_prepared;
     bool                                      _is_quantized;
     bool                                      _is_nhwc;
+    bool                                      _is_activationlayer_enabled;
     const ITensor                            *_original_weights;
 };
 }