COMPMID-3128: Test improvement for GEMMConvolutionLayer on CL and NEON(Function-level)

Cleaning up GEMMConvolutionLayer tests by doing the following:

- Remove unnecessary configuration tests
- Remove redundant tests
  > Redundant shapes
  > For large shapes there are already tests for each internal kernel/function
- Test NHWC 1x1 kernel 1x1 stride to stress _skip_im2col
- Stimulate gemm3d to skip col2im
- Test asymmetric padding
- Test batch size equal to one and different than one
- Test fully connected convolution
- Test with a few different padding values
- Test 1D kernel
- Test with FLOOR rounding policy

Change-Id: I88e7009b8e9c991994ed264476c16a79a0de4a68
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3150
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Sang-Hoon Park <sang-hoon.park@arm.com>
diff --git a/tests/datasets/LargeConvolutionLayerDataset.h b/tests/datasets/LargeConvolutionLayerDataset.h
index 40a8855..20a73b8 100644
--- a/tests/datasets/LargeConvolutionLayerDataset.h
+++ b/tests/datasets/LargeConvolutionLayerDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -197,31 +197,19 @@
 class LargeConvolutionLayerDataset final : public ConvolutionLayerDataset
 {
 public:
+    /** Shapes taken from use-cases such as AlexNet, MobileNet, SqueezeNet, etc. */
     LargeConvolutionLayerDataset()
     {
         // Batch size 1
         add_config(TensorShape(227U, 227U, 3U), TensorShape(11U, 11U, 3U, 96U), TensorShape(96U), TensorShape(55U, 55U, 96U), PadStrideInfo(4, 4, 0, 0));
         add_config(TensorShape(27U, 27U, 96U), TensorShape(5U, 5U, 96U, 256U), TensorShape(256U), TensorShape(27U, 27U, 256U), PadStrideInfo(1, 1, 2, 2));
         add_config(TensorShape(13U, 13U, 256U), TensorShape(1U, 1U, 256U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(13U, 13U, 384U), TensorShape(1U, 1U, 384U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U), PadStrideInfo(1, 1, 0, 0));
         add_config(TensorShape(224U, 224U, 3U), TensorShape(7U, 7U, 3U, 64U), TensorShape(64U), TensorShape(112U, 112U, 64U), PadStrideInfo(2, 2, 3, 3));
-        add_config(TensorShape(28U, 28U, 256U), TensorShape(1U, 1U, 256U, 64U), TensorShape(64U), TensorShape(28U, 28U, 64U), PadStrideInfo(1, 1, 0, 0));
         // Batch size 4
         add_config(TensorShape(227U, 227U, 3U, 4U), TensorShape(11U, 11U, 3U, 96U), TensorShape(96U), TensorShape(55U, 55U, 96U, 4U), PadStrideInfo(4, 4, 0, 0));
         add_config(TensorShape(27U, 27U, 96U, 4U), TensorShape(5U, 5U, 96U, 256U), TensorShape(256U), TensorShape(27U, 27U, 256U, 4U), PadStrideInfo(1, 1, 2, 2));
         add_config(TensorShape(13U, 13U, 256U, 4U), TensorShape(1U, 1U, 256U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 4U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(13U, 13U, 384U, 4U), TensorShape(1U, 1U, 384U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 4U), PadStrideInfo(1, 1, 0, 0));
         add_config(TensorShape(224U, 224U, 3U, 4U), TensorShape(7U, 7U, 3U, 64U), TensorShape(64U), TensorShape(112U, 112U, 64U, 4U), PadStrideInfo(2, 2, 3, 3));
-        add_config(TensorShape(28U, 28U, 256U, 4U), TensorShape(1U, 1U, 256U, 64U), TensorShape(64U), TensorShape(28U, 28U, 64U, 4U), PadStrideInfo(1, 1, 0, 0));
-        // Batch size 8
-        add_config(TensorShape(227U, 227U, 3U, 8U), TensorShape(11U, 11U, 3U, 96U), TensorShape(96U), TensorShape(55U, 55U, 96U, 8U), PadStrideInfo(4, 4, 0, 0));
-        add_config(TensorShape(27U, 27U, 96U, 8U), TensorShape(5U, 5U, 96U, 256U), TensorShape(256U), TensorShape(27U, 27U, 256U, 8U), PadStrideInfo(1, 1, 2, 2));
-        add_config(TensorShape(13U, 13U, 256U, 8U), TensorShape(1U, 1U, 256U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 8U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(13U, 13U, 384U, 8U), TensorShape(1U, 1U, 384U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 8U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(224U, 224U, 3U, 8U), TensorShape(7U, 7U, 3U, 64U), TensorShape(64U), TensorShape(112U, 112U, 64U, 8U), PadStrideInfo(2, 2, 3, 3));
-        add_config(TensorShape(28U, 28U, 256U, 8U), TensorShape(1U, 1U, 256U, 64U), TensorShape(64U), TensorShape(28U, 28U, 64U, 8U), PadStrideInfo(1, 1, 0, 0));
-        // Arbitrary batch size
-        add_config(TensorShape(227U, 227U, 3U, 5U), TensorShape(11U, 11U, 3U, 96U), TensorShape(96U), TensorShape(55U, 55U, 96U, 5U), PadStrideInfo(4, 4, 0, 0));
     }
 };
 
@@ -240,11 +228,6 @@
         add_config(TensorShape(27U, 27U, 96U, 4U), TensorShape(5U, 5U, 24U, 256U), TensorShape(256U), TensorShape(27U, 27U, 256U, 4U), PadStrideInfo(1, 1, 2, 2));
         add_config(TensorShape(13U, 13U, 256U, 4U), TensorShape(3U, 3U, 128U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 4U), PadStrideInfo(1, 1, 1, 1));
         add_config(TensorShape(13U, 13U, 384U, 4U), TensorShape(3U, 3U, 128U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 4U), PadStrideInfo(1, 1, 1, 1));
-        // Batch size 8
-        add_config(TensorShape(227U, 227U, 4U, 8U), TensorShape(11U, 11U, 2U, 96U), TensorShape(96U), TensorShape(55U, 55U, 96U, 8U), PadStrideInfo(4, 4, 0, 0));
-        add_config(TensorShape(27U, 27U, 96U, 8U), TensorShape(5U, 5U, 24U, 256U), TensorShape(256U), TensorShape(27U, 27U, 256U, 8U), PadStrideInfo(1, 1, 2, 2));
-        add_config(TensorShape(13U, 13U, 256U, 8U), TensorShape(3U, 3U, 128U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 8U), PadStrideInfo(1, 1, 1, 1));
-        add_config(TensorShape(13U, 13U, 384U, 8U), TensorShape(3U, 3U, 128U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 8U), PadStrideInfo(1, 1, 1, 1));
     }
 };
 } // namespace datasets