COMPMID-417 NEON/CL MeanStdDev bugfix using FillBorderKernel

Change-Id: Ic48ba7f69783d0e1e80611264e2bc67d1732436e
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/81293
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
diff --git a/arm_compute/runtime/NEON/functions/NEMeanStdDev.h b/arm_compute/runtime/NEON/functions/NEMeanStdDev.h
index 3770b2a..e8bf8df 100644
--- a/arm_compute/runtime/NEON/functions/NEMeanStdDev.h
+++ b/arm_compute/runtime/NEON/functions/NEMeanStdDev.h
@@ -24,6 +24,7 @@
 #ifndef __ARM_COMPUTE_NEMEANSTDDEV_H__
 #define __ARM_COMPUTE_NEMEANSTDDEV_H__
 
+#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
 #include "arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
@@ -44,17 +45,18 @@
     NEMeanStdDev();
     /** Initialise the kernel's inputs and outputs.
      *
-     * @param[in]  input  Input image. Data type supported: U8.
-     * @param[out] mean   Output average pixel value.
-     * @param[out] stddev (Optional) Output standard deviation of pixel values.
+     * @param[in, out] input  Input image. Data types supported: U8. (Written to only for border filling)
+     * @param[out]     mean   Output average pixel value.
+     * @param[out]     stddev (Optional) Output standard deviation of pixel values.
      */
-    void configure(const IImage *input, float *mean, float *stddev = nullptr);
+    void configure(IImage *input, float *mean, float *stddev = nullptr);
 
     // Inherited methods overridden:
     void run() override;
 
 private:
     NEMeanStdDevKernel _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */
+    NEFillBorderKernel _fill_border_kernel; /**< Kernel that fills tensor's borders with zeroes. */
     uint64_t           _global_sum;         /**< Variable that holds the global sum among calls in order to ease reduction */
     uint64_t           _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */
 };