COMPMID-1318: Implementing Winograd 7x7 NHWC on OpenCL -- Part I

Change-Id: I94c3c886718076c6eee09be37a074a4bb0e54809
Signed-off-by: giuros01 <giuseppe.rossini@arm.com>
Reviewed-on: https://review.mlplatform.org/c/868
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/tests/datasets/WinogradInputTransformDataset.h b/tests/datasets/WinogradInputTransformDataset.h
index 23efcf0..ac9baba 100644
--- a/tests/datasets/WinogradInputTransformDataset.h
+++ b/tests/datasets/WinogradInputTransformDataset.h
@@ -196,6 +196,39 @@
     }
 };
 
+class SmallWinogradInputTransformDataset2x2_7x7 final : public WinogradInputTransformDataset
+{
+public:
+    SmallWinogradInputTransformDataset2x2_7x7()
+    {
+        add_config(TensorShape(27U, 13U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(9U, 9U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW));
+        add_config(TensorShape(27U, 13U, 2U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(27U, 13U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW));
+        add_config(TensorShape(27U, 13U, 3U, 4U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(9U, 9U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW));
+    }
+};
+
+class SmallWinogradInputTransformDataset2x1_7x1 final : public WinogradInputTransformDataset
+{
+public:
+    SmallWinogradInputTransformDataset2x1_7x1()
+    {
+        add_config(TensorShape(23U, 31U), WinogradInfo(Size2D(2U, 1U), Size2D(7U, 1U), Size2D(9U, 9U), PadStrideInfo(1, 1, 2, 0), DataLayout::NCHW));
+        add_config(TensorShape(27U, 13U, 2U), WinogradInfo(Size2D(2U, 1U), Size2D(7U, 1U), Size2D(27U, 13U), PadStrideInfo(1, 1, 1, 0), DataLayout::NCHW));
+        add_config(TensorShape(27U, 31U, 3U, 4U), WinogradInfo(Size2D(2U, 1U), Size2D(7U, 1U), Size2D(9U, 9U), PadStrideInfo(1, 1, 2, 0), DataLayout::NCHW));
+    }
+};
+
+class SmallWinogradInputTransformDataset1x2_1x7 final : public WinogradInputTransformDataset
+{
+public:
+    SmallWinogradInputTransformDataset1x2_1x7()
+    {
+        add_config(TensorShape(23U, 31U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 7U), Size2D(9U, 9U), PadStrideInfo(1, 1, 0, 2), DataLayout::NCHW));
+        add_config(TensorShape(27U, 13U, 2U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 7U), Size2D(27U, 13U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW));
+        add_config(TensorShape(27U, 31U, 3U, 4U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 7U), Size2D(9U, 9U), PadStrideInfo(1, 1, 0, 2), DataLayout::NCHW));
+    }
+};
+
 class LargeWinogradInputTransformDataset2x2_3x3 final : public WinogradInputTransformDataset
 {
 public:
@@ -339,6 +372,55 @@
         add_config(TensorShape(83U, 72U, 14U, 5U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 5U), Size2D(83U, 72U), PadStrideInfo(1, 1, 0, 2), DataLayout::NCHW));
     }
 };
+
+class LargeWinogradInputTransformDataset1x2_1x7 final : public WinogradInputTransformDataset
+{
+public:
+    LargeWinogradInputTransformDataset1x2_1x7()
+    {
+        add_config(TensorShape(23U, 31U, 3U, 5U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 7U), Size2D(9U, 9U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW));
+        add_config(TensorShape(128U, 64U, 1U, 3U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 7U), Size2D(128U, 64U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW));
+        add_config(TensorShape(27U, 13U, 2U, 4U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 7U), Size2D(27U, 13U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW));
+        add_config(TensorShape(14U, 14U, 512U, 2U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 7U), Size2D(14U, 14U), PadStrideInfo(1, 1, 0, 2), DataLayout::NCHW));
+        add_config(TensorShape(42U, 37U, 8U, 15U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 7U), Size2D(42U, 37U), PadStrideInfo(1, 1, 0, 2), DataLayout::NCHW));
+        add_config(TensorShape(57U, 60U, 13U, 8U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 7U), Size2D(57U, 60U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW));
+        add_config(TensorShape(128U, 64U, 21U, 13U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 7U), Size2D(128U, 64U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW));
+        add_config(TensorShape(83U, 72U, 14U, 5U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 7U), Size2D(83U, 72U), PadStrideInfo(1, 1, 0, 2), DataLayout::NCHW));
+    }
+};
+
+class LargeWinogradInputTransformDataset2x1_7x1 final : public WinogradInputTransformDataset
+{
+public:
+    LargeWinogradInputTransformDataset2x1_7x1()
+    {
+        add_config(TensorShape(23U, 31U, 3U, 5U), WinogradInfo(Size2D(2U, 1U), Size2D(7U, 1U), Size2D(9U, 9U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW));
+        add_config(TensorShape(128U, 64U, 1U, 3U), WinogradInfo(Size2D(2U, 1U), Size2D(7U, 1U), Size2D(128U, 64U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW));
+        add_config(TensorShape(27U, 13U, 2U, 4U), WinogradInfo(Size2D(2U, 1U), Size2D(7U, 1U), Size2D(27U, 13U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW));
+        add_config(TensorShape(14U, 14U, 512U, 2U), WinogradInfo(Size2D(2U, 1U), Size2D(7U, 1U), Size2D(14U, 14U), PadStrideInfo(1, 1, 0, 2), DataLayout::NCHW));
+        add_config(TensorShape(42U, 37U, 8U, 15U), WinogradInfo(Size2D(2U, 1U), Size2D(7U, 1U), Size2D(42U, 37U), PadStrideInfo(1, 1, 0, 2), DataLayout::NCHW));
+        add_config(TensorShape(57U, 60U, 13U, 8U), WinogradInfo(Size2D(2U, 1U), Size2D(7U, 1U), Size2D(57U, 60U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW));
+        add_config(TensorShape(128U, 64U, 21U, 13U), WinogradInfo(Size2D(2U, 1U), Size2D(7U, 1U), Size2D(128U, 64U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW));
+        add_config(TensorShape(83U, 72U, 14U, 5U), WinogradInfo(Size2D(2U, 1U), Size2D(7U, 1U), Size2D(83U, 72U), PadStrideInfo(1, 1, 0, 2), DataLayout::NCHW));
+    }
+};
+
+class LargeWinogradInputTransformDataset2x2_7x7 final : public WinogradInputTransformDataset
+{
+public:
+    LargeWinogradInputTransformDataset2x2_7x7()
+    {
+        add_config(TensorShape(27U, 13U, 3U, 5U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(9U, 9U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW));
+        add_config(TensorShape(128U, 64U, 1U, 3U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(128U, 64U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW));
+        add_config(TensorShape(27U, 13U, 2U, 4U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(27U, 13U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW));
+        add_config(TensorShape(14U, 14U, 512U, 2U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(14U, 14U), PadStrideInfo(1, 1, 0, 2), DataLayout::NCHW));
+        add_config(TensorShape(42U, 37U, 8U, 15U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(42U, 37U), PadStrideInfo(1, 1, 0, 2), DataLayout::NCHW));
+        add_config(TensorShape(57U, 60U, 13U, 8U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(57U, 60U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW));
+        add_config(TensorShape(128U, 64U, 21U, 13U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(128U, 64U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW));
+        add_config(TensorShape(83U, 72U, 14U, 5U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(83U, 72U), PadStrideInfo(1, 1, 0, 2), DataLayout::NCHW));
+    }
+};
+
 } // namespace datasets
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/CL/Winograd.cpp b/tests/validation/CL/Winograd.cpp
index e744473..dd759b6 100644
--- a/tests/validation/CL/Winograd.cpp
+++ b/tests/validation/CL/Winograd.cpp
@@ -81,6 +81,11 @@
                                                     framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x1_5x1(),
                                                                                datasets::SmallWinogradInputTransformDataset1x4_1x5())))));
 
+const auto SmallWinogradInputTransformDatasetNHWC_FP32 = framework::dataset::concat(SmallWinogradInputTransformDatasetNHWC,
+                                                         framework::dataset::concat(datasets::SmallWinogradInputTransformDataset1x2_1x7(),
+                                                         framework::dataset::concat(datasets::SmallWinogradInputTransformDataset2x1_7x1(),
+                                                                                    datasets::SmallWinogradInputTransformDataset2x2_7x7())));
+
 const auto LargeWinogradInputTransformDatasetNCHW =
            framework::dataset::concat(datasets::LargeWinogradInputTransformDataset2x2_3x3(),
            framework::dataset::concat(datasets::LargeWinogradInputTransformDataset2x1_3x1(),
@@ -98,6 +103,12 @@
            framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x1_5x1(),
                                       datasets::LargeWinogradInputTransformDataset1x4_1x5())));
 
+const auto LargeWinogradInputTransformDatasetNHWC_FP32 =
+           framework::dataset::concat(LargeWinogradInputTransformDatasetNHWC,
+           framework::dataset::concat(datasets::LargeWinogradInputTransformDataset1x2_1x7(),
+           framework::dataset::concat(datasets::LargeWinogradInputTransformDataset2x1_7x1(),
+                                     (datasets::LargeWinogradInputTransformDataset2x2_7x7()))));
+
 // Filter transform
 const auto SmallWinogradFilterTransformDatasetNCHW =
            framework::dataset::concat(combine(datasets::Small3x3Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U), Size2D(4U, 4U) })),
@@ -113,7 +124,8 @@
            framework::dataset::concat(combine(datasets::Small1x3Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })),
            framework::dataset::concat(combine(datasets::Small5x5Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) })),
            framework::dataset::concat(combine(datasets::Small5x1Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 1U) })),
-                                      combine(datasets::Small1x5Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })))))));
+                                     (combine(datasets::Small1x5Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) }))))))));
+
 
 const auto LargeWinogradFilterTransformDatasetNCHW =
            framework::dataset::concat(combine(datasets::Large3x3Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U), Size2D(4U, 4U) })),
@@ -252,14 +264,14 @@
 }
 TEST_SUITE_END() // FP16
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradInputTransformFixtureFP32, framework::DatasetMode::PRECOMMIT, combine(combine(SmallWinogradInputTransformDatasetNHWC,
+FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradInputTransformFixtureFP32, framework::DatasetMode::PRECOMMIT, combine(combine(SmallWinogradInputTransformDatasetNHWC_FP32,
                                                                                                                      framework::dataset::make("DataLayout", { DataLayout::NHWC })),
                                                                                                                      framework::dataset::make("DataType", { DataType::F32 })))
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradInputTransformFixtureFP32, framework::DatasetMode::NIGHTLY, combine(combine(LargeWinogradInputTransformDatasetNHWC,
+FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradInputTransformFixtureFP32, framework::DatasetMode::NIGHTLY, combine(combine(LargeWinogradInputTransformDatasetNHWC_FP32,
                                                                                                                    framework::dataset::make("DataLayout", { DataLayout::NHWC })),
                                                                                                                    framework::dataset::make("DataType", { DataType::F32 })))
 {
diff --git a/tests/validation/reference/Winograd.cpp b/tests/validation/reference/Winograd.cpp
index 294993b..f09b220 100644
--- a/tests/validation/reference/Winograd.cpp
+++ b/tests/validation/reference/Winograd.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -181,6 +181,7 @@
         { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1), WinogradTransformType::INPUT), imatrix4x4_5x5 },
         { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(7, 1), WinogradTransformType::INPUT), imatrix2x1_7x7 },
         { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 7), WinogradTransformType::INPUT), imatrix2x1_7x7 },
+        { WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(7, 7), WinogradTransformType::INPUT), imatrix2x1_7x7 },
         { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5), WinogradTransformType::INPUT), imatrix4x4_5x5 },
         { WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::FILTER), fmatrix2x2_3x3 },
         { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::FILTER), fmatrix4x4_3x3 },