COMPMID-647: Exclude padding pixels from averaging factor.

Adds support for excluding the padding pixels from the average scaling
factor calculation.

Change-Id: Ia13fbfeae235aff564db74191613921848231a01
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/93715
Reviewed-by: Robert Hughes <robert.hughes@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
diff --git a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
index 9d7c751..0a57a26 100644
--- a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
@@ -66,14 +66,14 @@
      * @param[in] window_input Input region on which to execute the kernel.
      * @param[in] window       Output region on which to execute the kernel.
      */
-    template <PoolingType pooling_type>
+    template <PoolingType pooling_type, bool exclude_padding = false>
     void pooling2_f32(const Window &window_input, const Window &window);
     /** Function to perform 2x2 pooling for float16_t.
      *
      * @param[in] window_input Input region on which to execute the kernel.
      * @param[in] window       Output region on which to execute the kernel.
      */
-    template <PoolingType pooling_type>
+    template <PoolingType pooling_type, bool exclude_padding = false>
     void pooling2_f16(const Window &window_input, const Window &window);
 
     /** Function to perform 2x2 pooling for 8bit fixed point.
@@ -95,14 +95,14 @@
      * @param[in] window_input Input region on which to execute the kernel.
      * @param[in] window       Output region on which to execute the kernel.
      */
-    template <PoolingType pooling_type>
+    template <PoolingType pooling_type, bool exclude_padding = false>
     void pooling3_f32(const Window &window_input, const Window &window);
     /** Function to perform 3x3 pooling.
      *
      * @param[in] window_input Input region on which to execute the kernel.
      * @param[in] window       Output region on which to execute the kernel.
      */
-    template <PoolingType pooling_type>
+    template <PoolingType pooling_type, bool exclude_padding = false>
     void pooling3_f16(const Window &window_input, const Window &window);
     /** Function to perform 3x3 pooling for 8bit fixed point.
      *
@@ -123,14 +123,14 @@
      * @param[in] window_input Input region on which to execute the kernel.
      * @param[in] window       Output region on which to execute the kernel.
      */
-    template <PoolingType pooling_type>
+    template <PoolingType pooling_type, bool exclude_padding = false>
     void pooling7_f32(const Window &window_input, const Window &window);
     /** Function to perform NxN pooling.
      *
      * @param[in] window_input Input region on which to execute the kernel.
      * @param[in] window       Output region on which to execute the kernel.
      */
-    template <PoolingType pooling_type>
+    template <PoolingType pooling_type, bool exclude_padding = false>
     void poolingN_f32(const Window &window_input, const Window &window);
     /** Common signature for all the specialised Pooling functions
      *