COMPMID-463 - Extended Pooling Layer on NEON to support Global Pooling

Change-Id: I8ae44187624deeab3d40d878e7b34ff651f1dad0
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/89834
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
diff --git a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
index 2a0ecf8..9d7c751 100644
--- a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
@@ -48,6 +48,8 @@
     ~NEPoolingLayerKernel() = default;
     /** Set the input and output tensors.
      *
+     * @note QS8, QS16 and F16 are supported for pool sizes 2 and 3 only
+     *
      * @param[in]  input     Source tensor. Data types supported: QS8/QS16/F16/F32.
      * @param[out] output    Destination tensor. Data types supported: Same as @p input.
      * @param[in]  pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
@@ -123,6 +125,13 @@
      */
     template <PoolingType pooling_type>
     void pooling7_f32(const Window &window_input, const Window &window);
+    /** Function to perform NxN pooling.
+     *
+     * @param[in] window_input Input region on which to execute the kernel.
+     * @param[in] window       Output region on which to execute the kernel.
+     */
+    template <PoolingType pooling_type>
+    void poolingN_f32(const Window &window_input, const Window &window);
     /** Common signature for all the specialised Pooling functions
      *
      * @param[in] window_input Input region on which to execute the kernel.