COMPMID-417: Add in-place support for batch-normalization.

Change-Id: I4b0c9348f3bc2addc198a76fadd1b583abf42b60
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/84434
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
diff --git a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
index b0b5c12..041b9e7 100644
--- a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
@@ -45,16 +45,19 @@
     NEBatchNormalizationLayer();
     /** Set the input and output tensors.
      *
-     * @param[in]  input   Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM].
-     *                     The rest are optional and used for representing batches. Data types supported: QS8/F32.
-     * @param[in]  mean    Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
-     * @param[in]  var     Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
-     * @param[in]  gamma   Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
-     * @param[in]  beta    Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
-     * @param[in]  epsilon Small value to avoid division with zero.
-     * @param[out] output  Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
+     * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place
+     *
+     * @param[in, out] input   Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
+     *                         3 lower dimensions represent a single input with dimensions [width, height, FM].
+     *                         The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32.
+     * @param[in]      mean    Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]      var     Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]      gamma   Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]      beta    Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]      epsilon Small value to avoid division with zero.
+     * @param[out]     output  Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
      */
-    void configure(const ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon);
+    void configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon);
 
     // Inherited methods overridden:
     void run() override;