COMPMID-1277 - Optimizing CLIm2ColKernel for NHWC.

This patch includes:

- Im2Col optimizations for NHWC using a new data layout
- Refactoring of CLIm2ColKernel adding validation method and auto-init
- Removed im2col_reduced from CLIm2ColKernel and created a new kernel CLFlattenLayerKernel

Change-Id: I1620640b6796baa268324b33ae92cdd8de53e27c
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/141241
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index 1e5b9af..0a2a535 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -215,19 +215,13 @@
 
     return output_shape;
 }
-inline TensorShape compute_im2col_fc_shape(const ITensorInfo *input, const int num_input_dimensions = 3)
+inline TensorShape compute_flatten_shape(const ITensorInfo *input)
 {
+    // The output shape will be the flatten version of the input (i.e. [ width * height * channels, num_batches, ... ] ). Used for FlattenLayer and FullyConnectedLayer.
+
     TensorShape output_shape{ input->tensor_shape() };
 
-    output_shape.collapse(num_input_dimensions);
-
-    return output_shape;
-}
-inline TensorShape compute_im2col_flatten_shape(const ITensorInfo *input)
-{
-    // The output shape will be the flatten version of the input (i.e. [ width * height * channels, 1, 1, ... ] ). Used for FlattenLayer.
-    TensorShape output_shape{ input->tensor_shape() };
-    output_shape.collapse(3, 0);
+    output_shape.collapse(3);
 
     return output_shape;
 }