COMPMID-1339 - Implementing Winograd Convolution Layer 1x5 and 5x1 kernels on OpenCL NCHW

Change-Id: Ia293cd89651146a0e27e5f7c74ca9c924807e83c
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/138707
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
diff --git a/tests/validation/CL/Winograd.cpp b/tests/validation/CL/Winograd.cpp
index 501afac..849d0c1 100644
--- a/tests/validation/CL/Winograd.cpp
+++ b/tests/validation/CL/Winograd.cpp
@@ -65,7 +65,9 @@
            framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x4_3x3(),
            framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x1_3x1(),
            framework::dataset::concat(datasets::SmallWinogradInputTransformDataset1x4_1x3(),
-                                      datasets::SmallWinogradInputTransformDataset4x4_5x5()))))));
+           framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x4_5x5(),
+           framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x1_5x1(),
+                                      datasets::SmallWinogradInputTransformDataset1x4_1x5()))))))));
 
 const auto SmallWinogradInputTransformDatasetNHWC = framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x4_3x3(),
                                                                                datasets::SmallWinogradInputTransformDataset4x4_5x5());
@@ -77,7 +79,9 @@
            framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x4_3x3(),
            framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x1_3x1(),
            framework::dataset::concat(datasets::LargeWinogradInputTransformDataset1x4_1x3(),
-                                      datasets::LargeWinogradInputTransformDataset4x4_5x5()))))));
+           framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x4_5x5(),
+           framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x1_5x1(),
+                                      datasets::LargeWinogradInputTransformDataset1x4_1x5()))))))));
 
 const auto LargeWinogradInputTransformDatasetNHWC =
            framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x4_3x3(),
@@ -88,7 +92,9 @@
            framework::dataset::concat(combine(datasets::Small3x3Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U), Size2D(4U, 4U) })),
            framework::dataset::concat(combine(datasets::Small3x1Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 1U), Size2D(4U, 1U) })),
            framework::dataset::concat(combine(datasets::Small1x3Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 2U), Size2D(1U, 4U) })),
-                                      combine(datasets::Small5x5Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) })))));
+           framework::dataset::concat(combine(datasets::Small5x5Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) })),
+           framework::dataset::concat(combine(datasets::Small5x1Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 1U) })),
+                                      combine(datasets::Small1x5Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })))))));
 
 const auto SmallWinogradFilterTransformDatasetNHWC =
            framework::dataset::concat(combine(datasets::Small3x3Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) })),
@@ -98,7 +104,9 @@
            framework::dataset::concat(combine(datasets::Large3x3Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U), Size2D(4U, 4U) })),
            framework::dataset::concat(combine(datasets::Large3x1Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 1U), Size2D(4U, 1U) })),
            framework::dataset::concat(combine(datasets::Large1x3Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 2U), Size2D(1U, 4U) })),
-                                      combine(datasets::Large5x5Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) })))));
+           framework::dataset::concat(combine(datasets::Large5x5Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) })),
+           framework::dataset::concat(combine(datasets::Large5x1Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 1U) })),
+                                      combine(datasets::Large1x5Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })))))));
 
 const auto LargeWinogradFilterTransformDatasetNHWC =
            framework::dataset::concat(combine(datasets::Large3x3Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) })),
@@ -643,6 +651,54 @@
 }
 TEST_SUITE_END() // Conv5x5
 
+TEST_SUITE(Conv5x1)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(),
+                                               framework::dataset::make("DataType", { DataType::F32 })),
+                                               framework::dataset::make("ActivationLayerInfo", { ActivationLayerInfo() })),
+                                               framework::dataset::make("DataLayout", { DataLayout::NCHW })))
+
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(),
+                                               framework::dataset::make("DataType", { DataType::F32 })),
+                                               framework::dataset::make("ActivationLayerInfo", { ActivationLayerInfo() })),
+                                               framework::dataset::make("DataLayout", { DataLayout::NCHW })))
+
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
+}
+TEST_SUITE_END() // Conv5x1
+
+TEST_SUITE(Conv1x5)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(),
+                                               framework::dataset::make("DataType", { DataType::F32 })),
+                                               framework::dataset::make("ActivationLayerInfo", { ActivationLayerInfo() })),
+                                               framework::dataset::make("DataLayout", { DataLayout::NCHW })))
+
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(),
+                                               framework::dataset::make("DataType", { DataType::F32 })),
+                                               framework::dataset::make("ActivationLayerInfo", { ActivationLayerInfo() })),
+                                               framework::dataset::make("DataLayout", { DataLayout::NCHW })))
+
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
+}
+TEST_SUITE_END() // Conv1x5
+
 TEST_SUITE_END() // ConvolutionLayer
 TEST_SUITE_END() // Winograd
 TEST_SUITE_END() // CL
diff --git a/tests/validation/reference/Winograd.cpp b/tests/validation/reference/Winograd.cpp
index 5be4fe2..026b300 100644
--- a/tests/validation/reference/Winograd.cpp
+++ b/tests/validation/reference/Winograd.cpp
@@ -148,6 +148,8 @@
         { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3), WinogradTransformType::INPUT), imatrix2x2_3x3 },
         { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3), WinogradTransformType::INPUT), imatrix4x4_3x3 },
         { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::INPUT), imatrix4x4_5x5 },
+        { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1), WinogradTransformType::INPUT), imatrix4x4_5x5 },
+        { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5), WinogradTransformType::INPUT), imatrix4x4_5x5 },
         { WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::FILTER), fmatrix2x2_3x3 },
         { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::FILTER), fmatrix4x4_3x3 },
         { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1), WinogradTransformType::FILTER), fmatrix2x2_3x3 },
@@ -155,6 +157,8 @@
         { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3), WinogradTransformType::FILTER), fmatrix2x2_3x3 },
         { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3), WinogradTransformType::FILTER), fmatrix4x4_3x3 },
         { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::FILTER), fmatrix4x4_5x5 },
+        { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1), WinogradTransformType::FILTER), fmatrix4x4_5x5 },
+        { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5), WinogradTransformType::FILTER), fmatrix4x4_5x5 },
         { WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::OUTPUT), omatrix2x2_3x3 },
         { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::OUTPUT), omatrix4x4_3x3 },
         { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1), WinogradTransformType::OUTPUT), omatrix2x2_3x3 },
@@ -162,6 +166,8 @@
         { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3), WinogradTransformType::OUTPUT), omatrix2x2_3x3 },
         { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3), WinogradTransformType::OUTPUT), omatrix4x4_3x3 },
         { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::OUTPUT), omatrix4x4_5x5 },
+        { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1), WinogradTransformType::OUTPUT), omatrix4x4_5x5 },
+        { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5), WinogradTransformType::OUTPUT), omatrix4x4_5x5 },
     };
 
     // Find transformation matrix