COMPMID-908 - Merge Activation layer with Convolution Layer (NEON. CL, GLES)

Change-Id: Iab06d0768ecf805b841e601185608aae88cf9166
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/120874
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
index 5f383b2..7df7659 100644
--- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
@@ -24,10 +24,9 @@
 #ifndef __ARM_COMPUTE_CLCONVOLUTIONLAYER_H__
 #define __ARM_COMPUTE_CLCONVOLUTIONLAYER_H__
 
-#include "arm_compute/runtime/IFunction.h"
-
 #include "arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h"
 #include "arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h"
+#include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 
 #include <memory>
@@ -57,9 +56,10 @@
      * @param[in]  conv_info    Contains padding and stride information described in @ref PadStrideInfo.
      * @param[in]  weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. Data type supported: Same as @p input.
      * @param[in]  dilation     (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     * @param[in]  act_info     (Optional) Activation layer information in case of a fused activation.
      */
     void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
-                   const Size2D &dilation = Size2D(1U, 1U));
+                   const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLConvolutionLayer
      *
      * @param[in] input        Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -72,11 +72,12 @@
      * @param[in] conv_info    Contains padding and stride information described in @ref PadStrideInfo.
      * @param[in] weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. Data type supported: Same as @p input.
      * @param[in] dilation     (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     * @param[in] act_info     (Optional) Activation layer information in case of a fused activation.
      *
      * @return a status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
-                           const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U));
+                           const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will return the convolution called by @ref CLConvolutionLayer
      *
      * @param[in] input        Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -88,14 +89,14 @@
      *                         Data types supported: Same as @p input.
      * @param[in] conv_info    Contains padding and stride information described in @ref PadStrideInfo.
      * @param[in] weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. Data type supported: Same as @p input.
+     * @param[in] act_info     (Optional) Activation layer information in case of a fused activation.
      * @param[in] gpu_target   Specifies the @p GPUTarget.
      * @param[in] dilation     (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
      *
      * @return a status
      */
     static ConvolutionMethod get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
-                                                    const WeightsInfo &weights_info, const GPUTarget gpu_target, const Size2D &dilation = Size2D(1U, 1U));
-
+                                                    const WeightsInfo &weights_info, const ActivationLayerInfo &act_info, const GPUTarget gpu_target, const Size2D &dilation = Size2D(1U, 1U));
     // Inherited methods overridden:
     void run() override;
 
diff --git a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
index 205c90c..4363224 100644
--- a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,6 +27,7 @@
 #include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
 #include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
 #include "arm_compute/runtime/IFunction.h"
 
 #include <memory>
@@ -53,8 +54,9 @@
      * @param[out] output    Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
      *                       Data types supported: Same as @p input.
      * @param[in]  conv_info Contains padding and stride information described in @ref PadStrideInfo.
+     * @param[in]  act_info  (Optional) Activation layer information in case of a fused activation.
      */
-    void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info);
+    void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLDirectConvolutionLayer
      *
      * @param[in] input     Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -65,10 +67,12 @@
      * @param[in] output    Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
      *                      Data types supported: Same as @p input.
      * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+     * @param[in] act_info  (Optional) Activation layer information in case of a fused activation.
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
+                           const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     // Inherited methods overridden:
     void run() override;
@@ -76,6 +80,9 @@
 private:
     CLDirectConvolutionLayerKernel _direct_conv_kernel;
     CLFillBorderKernel             _input_border_handler;
+    CLActivationLayer              _activationlayer_function;
+
+    bool _is_activationlayer_enabled;
 };
 }
 #endif /* __ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYER_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
index 91d35ca..9bfbb53 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
@@ -36,6 +36,7 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/CL/CLMemoryGroup.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
 #include "arm_compute/runtime/CL/functions/CLGEMM.h"
 #include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
 #include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h"
@@ -125,9 +126,10 @@
      * @param[in]  weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. If this is not part of the fully connected layer the weights
      *                          tensor has also been transposed with CLGEMMTranspose1xWKernel. Data type supported: Same as @p input.
      * @param[in]  dilation     (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     * @param[in]  act_info     (Optional) Activation layer information in case of a fused activation.
      */
     void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
-                   const Size2D &dilation = Size2D(1U, 1U));
+                   const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMConvolutionLayer.
      *
      * @param[in]  input        Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -142,11 +144,12 @@
      * @param[in]  weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. If this is not part of the fully connected layer the weights
      *                          tensor has also been transposed with CLGEMMTranspose1xWKernel. Data type supported: Same as @p input.
      * @param[in]  dilation     (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     * @param[in]  act_info     (Optional) Activation layer information in case of a fused activation.
      *
      * @return a status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
-                           const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U));
+                           const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     // Inherited methods overridden:
     void run() override;
@@ -179,6 +182,7 @@
     CLGEMMLowpMatrixMultiplyCore                        _mm_gemmlowp;
     CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage;
     CLCol2ImKernel                                      _col2im_kernel;
+    CLActivationLayer                                   _activationlayer_function;
 
     const ICLTensor *_original_weights;
 
@@ -189,6 +193,7 @@
 
     bool _is_quantized;
     bool _is_first_run;
+    bool _is_activationlayer_enabled;
 };
 }
 #endif /* __ARM_COMPUTE_CLGEMMCONVOLUTIONLAYER_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h
index 14de169..2cf1f77 100644
--- a/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h
@@ -27,6 +27,7 @@
 #include "arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h"
 #include "arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
 #include "arm_compute/runtime/CL/functions/CLGEMM.h"
 #include "arm_compute/runtime/CL/functions/CLWinogradInputTransform.h"
 #include "arm_compute/runtime/IFunction.h"
@@ -60,8 +61,10 @@
      * @param[out] output    Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
      *                       Data types supported: Same as @p input.
      * @param[in]  conv_info Contains padding and stride information described in @ref PadStrideInfo.
+     * @param[in]  act_info  (Optional) Activation layer information in case of a fused activation.
      */
-    void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info);
+    void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+                   const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradConvolutionLayer
      *
      * @note: This function only works with 3x3 kernels and unit strides
@@ -74,10 +77,12 @@
      * @param[out] output    Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
      *                       Data types supported: Same as @p input.
      * @param[in]  conv_info Contains padding and stride information described in @ref PadStrideInfo.
+     * @param[in]  act_info  (Optional) Activation layer information in case of a fused activation.
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
+                           const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     // Inherited methods overridden:
     void run() override;
@@ -88,10 +93,12 @@
     CLWinogradInputTransform        _input_transform;
     CLWinogradFilterTransformKernel _filter_transform;
     CLWinogradOutputTransformKernel _output_transform;
+    CLActivationLayer               _activationlayer_function;
     CLTensor                        _input0;
     CLTensor                        _input1;
     CLTensor                        _batched_mm_output;
     bool                            _is_first_run;
+    bool                            _is_activationlayer_enabled;
 };
 }
 #endif /* __ARM_COMPUTE_CLWINOGRADCONVOLUTIONLAYER_H__ */