COMPMID-2245: Extend NEFuseBatchNormalization to support DepthwiseConvolution weights

Change-Id: I2ee4aebfd69865290ed6c78dd17ff1299353317e
Signed-off-by: Manuel Bottini <manuel.bottini@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1371
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
diff --git a/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h b/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h
index 5e62869..3a2f6cc 100644
--- a/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h
+++ b/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h
@@ -52,37 +52,43 @@
     ~NEFuseBatchNormalization() = default;
     /** Set the input and output tensors.
      *
-     * @param[in]  conv_weights  Convolution layer weights tensor. Data type supported: F16/F32
-     * @param[in]  bn_mean       Batch normalization layer mean tensor. Same as @p conv_weights
-     * @param[in]  bn_var        Batch normalization layer variance tensor. Same as @p conv_weights
-     * @param[out] fused_weights Output fused weights tensor. Same as @p conv_weights
-     * @param[out] fused_bias    Output fused bias tensor. Same as @p conv_weights
-     * @param[in]  conv_bias     (Optional) Convolution layer bias tensor. Same as @p conv_weights
-     * @param[in]  bn_beta       (Optional) Batch normalization layer beta tensor. Same as @p conv_weights
-     * @param[in]  bn_gamma      (Optional) Batch normalization layer gamma tensor. Same as @p conv_weights
+     * @param[in]  input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
+     * @param[in]  bn_mean       Batch normalization layer mean tensor. Same as @p input_weights
+     * @param[in]  bn_var        Batch normalization layer variance tensor. Same as @p input_weights
+     * @param[out] fused_weights (Optional) Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights
+     * @param[out] fused_bias    (Optional) Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights
+     * @param[in]  input_bias    (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights
+     * @param[in]  bn_beta       (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights
+     *                           @note if nullptr, bn_beta is set to 0.0
+     * @param[in]  bn_gamma      (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights
+     *                           @note if nullptr, bn_gamma is set to 1.0
      * @param[in]  epsilon       (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f.
+     * @param[in]  fbn_type      (Optional) Fused batch normalization type. Defaults to Convolution.
      */
-    void configure(const ITensor *conv_weights, const ITensor *bn_mean, const ITensor *bn_var, ITensor *fused_weights, ITensor *fused_bias,
-                   const ITensor *conv_bias = nullptr, const ITensor *bn_beta = nullptr, const ITensor *bn_gamma = nullptr,
-                   float epsilon = 0.001f);
+    void configure(const ITensor *input_weights, const ITensor *bn_mean, const ITensor *bn_var, ITensor *fused_weights, ITensor *fused_bias,
+                   const ITensor *input_bias = nullptr, const ITensor *bn_beta = nullptr, const ITensor *bn_gamma = nullptr,
+                   float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
     /** Static function to check if given info will lead to a valid configuration of @ref NEFuseBatchNormalization
      *
-     * @param[in] conv_weights  Convolution layer weights tensor. Data type supported: F16/F32
-     * @param[in] bn_mean       Batch normalization layer mean tensor. Same as @p conv_weights
-     * @param[in] bn_var        Batch normalization layer variance tensor. Same as @p conv_weights
-     * @param[in] fused_weights Output fused weights tensor. Same as @p conv_weights
-     * @param[in] fused_bias    Output fused bias tensor. Same as @p conv_weights
-     * @param[in] conv_bias     (Optional) Convolution layer bias tensor. Same as @p conv_weights
-     * @param[in] bn_beta       (Optional) Batch normalization layer beta tensor. Same as @p conv_weights
-     * @param[in] bn_gamma      (Optional) Batch normalization layer gamma tensor. Same as @p conv_weights
+     * @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
+     * @param[in] bn_mean       Batch normalization layer mean tensor info. Same as @p input_weights
+     * @param[in] bn_var        Batch normalization layer variance tensor info. Same as @p input_weights
+     * @param[in] fused_weights (Optional) Output fused weights tensor info. It can be a nullptr in case of in-place computation. Same as @p input_weights
+     * @param[in] fused_bias    (Optional) Output fused bias tensor info. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights
+     * @param[in] input_bias    (Optional) Input bias tensor info for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights
+     * @param[in] bn_beta       (Optional) Batch normalization layer beta tensor info. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights
+     *                          @note if nullptr, bn_beta is set to 0.0
+     * @param[in] bn_gamma      (Optional) Batch normalization layer gamma tensor info. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights
+     *                          @note if nullptr, bn_gamma is set to 1.0
      * @param[in] epsilon       (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f.
+     * @param[in] fbn_type      (Optional) Fused batch normalization type. Defaults to Convolution.
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *conv_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var,
+    static Status validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var,
                            const ITensorInfo *fused_weights, const ITensorInfo *fused_bias,
-                           const ITensorInfo *conv_bias = nullptr, const ITensorInfo *bn_beta = nullptr, const ITensorInfo *bn_gamma = nullptr,
-                           float epsilon = 0.001f);
+                           const ITensorInfo *input_bias = nullptr, const ITensorInfo *bn_beta = nullptr, const ITensorInfo *bn_gamma = nullptr,
+                           float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
 
     // Inherited methods overridden:
     void run() override;