COMPMID-802 Add NHWC data format support for NEON im2col.

Change-Id: I86e678179106a2b83d1c6a7cfe562df91b0f9eb2
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/124000
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Pablo Tello <pablo.tello@arm.com>
diff --git a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h
index ecfce24..5aa803f 100644
--- a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h
+++ b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h
@@ -111,7 +111,7 @@
     void run(const Window &window, const ThreadInfo &info) override;
 
 private:
-    /** Template function to run the im2col optimised for the fully connected layer case
+    /** Template function to run the im2col optimised for the fully connected and flatten layers case
      *
      * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
      */
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index c3d5b64..e174227 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -107,13 +107,6 @@
 
     return shape_vector_sum_row;
 }
-inline TensorShape compute_im2col_shape(const ITensorInfo &input)
-{
-    TensorShape shape_im2col{ input.tensor_shape() };
-    shape_im2col.collapse(3);
-
-    return shape_im2col;
-}
 inline TensorShape compute_col2im_shape(const ITensorInfo &input, std::pair<unsigned int, unsigned int> convolved_dims)
 {
     TensorShape col2im_shape{ input.tensor_shape() };
@@ -159,7 +152,25 @@
 
     return scale_out_shape;
 }
-inline TensorShape compute_im2col_shape(const ITensorInfo *input, const int num_input_dimensions = 3)
+inline TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation)
+{
+    // The output shape will be the 2D shape used as input for GEMM [ out_channels * kernel_area, num_elems_per_out_channel ]
+
+    TensorShape output_shape{ input->tensor_shape() };
+
+    const DataLayout data_layout = input->data_layout();
+    const int        width_idx   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+    const int        height_idx  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+    const int        channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
+
+    std::pair<unsigned int, unsigned int> out_dims = scaled_dimensions(output_shape[width_idx], output_shape[height_idx], kernel_dims.width, kernel_dims.height, conv_info, dilation);
+    output_shape.set(width_idx, (output_shape[channel_idx] * kernel_dims.area() + (has_bias ? 1 : 0)));
+    output_shape.set(height_idx, (out_dims.first * out_dims.second));
+    output_shape.set(channel_idx, 1);
+
+    return output_shape;
+}
+inline TensorShape compute_im2col_fc_shape(const ITensorInfo *input, const int num_input_dimensions = 3)
 {
     TensorShape output_shape{ input->tensor_shape() };
 
@@ -167,6 +178,21 @@
 
     return output_shape;
 }
+inline TensorShape compute_im2col_flatten_shape(const ITensorInfo *input)
+{
+    // The output shape will be the flatten version of the input (i.e. [ width * height * channels, 1, 1, ... ] ). Used for FlattenLayer.
+
+    ARM_COMPUTE_ERROR_ON(input->num_dimensions() < 3);
+
+    TensorShape output_shape{ input->tensor_shape() };
+
+    const size_t flatten_shape = input->dimension(0) * input->dimension(1) * input->dimension(2);
+    output_shape.set(0, flatten_shape);
+    output_shape.remove_dimension(1);
+    output_shape.remove_dimension(1);
+
+    return output_shape;
+}
 inline TensorShape compute_interleave_custom_shape(const TensorShape &input, const int x_interleave, const int y_interleave)
 {
     TensorShape output_shape{ input };
diff --git a/arm_compute/runtime/NEON/functions/NEIm2Col.h b/arm_compute/runtime/NEON/functions/NEIm2Col.h
index cf4999b..caa8a01 100644
--- a/arm_compute/runtime/NEON/functions/NEIm2Col.h
+++ b/arm_compute/runtime/NEON/functions/NEIm2Col.h
@@ -26,6 +26,7 @@
 
 #include "arm_compute/runtime/NEON/INESimpleFunction.h"
 
+#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
 #include "arm_compute/core/Size2D.h"
 #include "arm_compute/core/Types.h"
 
@@ -34,9 +35,11 @@
 class ITensor;
 
 /** Basic function to run @ref NEIm2ColKernel */
-class NEIm2Col : public INESimpleFunction
+class NEIm2Col : public IFunction
 {
 public:
+    /** Default constructor */
+    NEIm2Col();
     /** Configure the im2col NEON kernel
      *
      * @param[in]  input              The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
@@ -46,9 +49,10 @@
      * @param[in]  kernel_dims        The kernel dimensions (width and height).
      * @param[in]  conv_info          Contains padding and stride information described in @ref PadStrideInfo.
      * @param[in]  has_bias           In case biases are provided expands the matrix with 1.
-     * @param[in]  is_fully_connected Determines whether this kernel will be called by @ref NEFullyConnectedLayer in order to validate the arguments
+     * @param[in]  is_fully_connected (Optional) Determines whether this function will be called by @ref NEFullyConnectedLayer in order to validate the arguments
+     * @param[in]  is_flatten         (Optional) Determines whether this function will be called by @ref NEFlattenLayer in order to validate the arguments
      */
-    void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, bool is_fully_connected = false);
+    void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, bool is_fully_connected = false, bool is_flatten = false);
     /** Static function to check if given info will lead to a valid configuration of @ref NEIm2Col
      *
      * @param[in] input              The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
@@ -58,11 +62,19 @@
      * @param[in] kernel_dims        The kernel dimensions (width and height).
      * @param[in] conv_info          Contains padding and stride information described in @ref PadStrideInfo.
      * @param[in] has_bias           In case biases are provided expands the matrix with 1.
-     * @param[in] is_fully_connected Determines whether this kernel will be called by @ref NEFullyConnectedLayer in order to validate the arguments
+     * @param[in] is_fully_connected Determines whether this function will be called by @ref NEFullyConnectedLayer in order to validate the arguments
+     * @param[in] is_flatten         Determines whether this function will be called by @ref NEFlattenLayer in order to validate the arguments
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, bool is_fully_connected);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, bool is_fully_connected, bool is_flatten);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    NEIm2ColKernel _kernel;
+    unsigned int   _y_dim;
 };
 }
 #endif /* __ARM_COMPUTE_NEIM2COL_H__ */