COMPMID-2575 Implement Winograd 7x1/1x7 with FP16

Change-Id: I3851418bba75fb0cb8c244c88828af019008067a
Signed-off-by: giuros01 <giuseppe.rossini@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1984
Reviewed-by: Pablo Marquez <pablo.tello@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/tests/datasets/WinogradOutputTransformDataset.h b/tests/datasets/WinogradOutputTransformDataset.h
index fe0b6e7..d15a16e 100644
--- a/tests/datasets/WinogradOutputTransformDataset.h
+++ b/tests/datasets/WinogradOutputTransformDataset.h
@@ -222,22 +222,6 @@
         add_config(TensorShape(7U, 16U, 8U, 3U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 5U), Size2D(8U, 10U), PadStrideInfo(1, 1, 0, 0), DataLayout::NHWC));
         add_config(TensorShape(24U, 42U, 8U, 2U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 5U), Size2D(14U, 14U), PadStrideInfo(1, 1, 0, 1), DataLayout::NHWC));
         add_config(TensorShape(7U, 24U, 8U, 5U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 5U), Size2D(8U, 10U), PadStrideInfo(1, 1, 0, 2), DataLayout::NHWC));
-    }
-};
-
-class SmallWinogradOutputTransformDatasetNHWC_F32 : public SmallWinogradOutputTransformDatasetNHWC_F16
-{
-public:
-    SmallWinogradOutputTransformDatasetNHWC_F32()
-        : SmallWinogradOutputTransformDatasetNHWC_F16()
-    {
-        // (2x2, 7x7)
-        add_config(TensorShape(13U, 4U, 64U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(9U, 9U), PadStrideInfo(1, 1, 0, 0), DataLayout::NHWC));
-        add_config(TensorShape(7U, 6U, 64U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(10U, 11U), PadStrideInfo(1, 1, 0, 0), DataLayout::NHWC));
-        add_config(TensorShape(5U, 360U, 64U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(53U, 33U), PadStrideInfo(1, 1, 0, 1), DataLayout::NHWC));
-        add_config(TensorShape(7U, 2U, 64U, 3U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(8U, 10U), PadStrideInfo(1, 1, 0, 0), DataLayout::NHWC));
-        add_config(TensorShape(24U, 25U, 64U, 2U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(14U, 14U), PadStrideInfo(1, 1, 1, 1), DataLayout::NHWC));
-        add_config(TensorShape(7U, 2U, 64U, 5U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(8U, 10U), PadStrideInfo(1, 1, 0, 0), DataLayout::NHWC));
 
         // (2x1, 7x1)
         add_config(TensorShape(13U, 18U, 8U), WinogradInfo(Size2D(2U, 1U), Size2D(7U, 1U), Size2D(9U, 9U), PadStrideInfo(1, 1, 0, 0), DataLayout::NHWC));
@@ -257,6 +241,22 @@
     }
 };
 
+class SmallWinogradOutputTransformDatasetNHWC_F32 : public SmallWinogradOutputTransformDatasetNHWC_F16
+{
+public:
+    SmallWinogradOutputTransformDatasetNHWC_F32()
+        : SmallWinogradOutputTransformDatasetNHWC_F16()
+    {
+        // (2x2, 7x7)
+        add_config(TensorShape(13U, 4U, 64U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(9U, 9U), PadStrideInfo(1, 1, 0, 0), DataLayout::NHWC));
+        add_config(TensorShape(7U, 6U, 64U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(10U, 11U), PadStrideInfo(1, 1, 0, 0), DataLayout::NHWC));
+        add_config(TensorShape(5U, 360U, 64U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(53U, 33U), PadStrideInfo(1, 1, 0, 1), DataLayout::NHWC));
+        add_config(TensorShape(7U, 2U, 64U, 3U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(8U, 10U), PadStrideInfo(1, 1, 0, 0), DataLayout::NHWC));
+        add_config(TensorShape(24U, 25U, 64U, 2U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(14U, 14U), PadStrideInfo(1, 1, 1, 1), DataLayout::NHWC));
+        add_config(TensorShape(7U, 2U, 64U, 5U), WinogradInfo(Size2D(2U, 2U), Size2D(7U, 7U), Size2D(8U, 10U), PadStrideInfo(1, 1, 0, 0), DataLayout::NHWC));
+    }
+};
+
 class LargeWinogradOutputTransformDatasetNCHW : public WinogradOutputTransformDataset
 {
 public:
@@ -376,14 +376,7 @@
         add_config(TensorShape(13U, 784U, 8U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 5U), Size2D(56U, 56U), PadStrideInfo(1, 1, 0, 1), DataLayout::NHWC));
         add_config(TensorShape(32U, 3024U, 8U, 2U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 5U), Size2D(112U, 112U), PadStrideInfo(1, 1, 0, 0), DataLayout::NHWC));
         add_config(TensorShape(13U, 784U, 8U, 5U), WinogradInfo(Size2D(1U, 4U), Size2D(1U, 5U), Size2D(56U, 56U), PadStrideInfo(1, 1, 0, 1), DataLayout::NHWC));
-    }
-};
 
-class LargeWinogradOutputTransformDatasetNHWC_F32 : public LargeWinogradOutputTransformDatasetNHWC_F16
-{
-public:
-    LargeWinogradOutputTransformDatasetNHWC_F32()
-    {
         // (2x1, 7x1)
         add_config(TensorShape(32U, 6160U, 8U), WinogradInfo(Size2D(2U, 1U), Size2D(7U, 1U), Size2D(112U, 112U), PadStrideInfo(1, 1, 2, 0), DataLayout::NHWC));
         add_config(TensorShape(13U, 1456U, 8U), WinogradInfo(Size2D(2U, 1U), Size2D(7U, 1U), Size2D(56U, 56U), PadStrideInfo(1, 1, 1, 0), DataLayout::NHWC));
@@ -397,6 +390,15 @@
         add_config(TensorShape(13U, 1456U, 8U, 5U), WinogradInfo(Size2D(1U, 2U), Size2D(1U, 7U), Size2D(56U, 56U), PadStrideInfo(1, 1, 0, 1), DataLayout::NHWC));
     }
 };
+
+class LargeWinogradOutputTransformDatasetNHWC_F32 : public LargeWinogradOutputTransformDatasetNHWC_F16
+{
+public:
+    LargeWinogradOutputTransformDatasetNHWC_F32()
+        : LargeWinogradOutputTransformDatasetNHWC_F16()
+    {
+    }
+};
 } // namespace datasets
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/CL/Winograd.cpp b/tests/validation/CL/Winograd.cpp
index 5894d7f..de8b8af 100644
--- a/tests/validation/CL/Winograd.cpp
+++ b/tests/validation/CL/Winograd.cpp
@@ -61,7 +61,7 @@
 RelativeTolerance<half_float::half> rel_tolerance_f16(half(0.2)); /**< Tolerance value for comparing reference's output against implementation's output for FP16 data types */
 constexpr float                     tolerance_num   = 0.05f;  /**< Tolerance number */
 constexpr float                     abs_tolerance_convolution_layer_f16   = 2.5f;  /**< Tolerance number */
-constexpr float                      tolerance_num_convolution_f16 = 0.15f;                 /**< Tolerance number */
+constexpr float                      tolerance_num_f16 = 0.15f;                 /**< Tolerance number */
 
 // Input transform
 const auto SmallWinogradInputTransformDatasetNCHW =
@@ -80,12 +80,12 @@
                                                     framework::dataset::concat(datasets::SmallWinogradInputTransformDataset1x4_1x3(),
                                                     framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x4_5x5(),
                                                     framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x1_5x1(),
-                                                                               datasets::SmallWinogradInputTransformDataset1x4_1x5())))));
+                                                    framework::dataset::concat(datasets::SmallWinogradInputTransformDataset1x4_1x5(),
+                                                    framework::dataset::concat(datasets::SmallWinogradInputTransformDataset2x1_7x1(),
+                                                                               datasets::SmallWinogradInputTransformDataset1x2_1x7())))))));
 
 const auto SmallWinogradInputTransformDatasetNHWC_FP32 = framework::dataset::concat(SmallWinogradInputTransformDatasetNHWC,
-                                                         framework::dataset::concat(datasets::SmallWinogradInputTransformDataset1x2_1x7(),
-                                                         framework::dataset::concat(datasets::SmallWinogradInputTransformDataset2x1_7x1(),
-                                                                                    datasets::SmallWinogradInputTransformDataset2x2_7x7())));
+                                                                                    datasets::SmallWinogradInputTransformDataset2x2_7x7());
 
 const auto LargeWinogradInputTransformDatasetNCHW =
            framework::dataset::concat(datasets::LargeWinogradInputTransformDataset2x2_3x3(),
@@ -96,7 +96,9 @@
            framework::dataset::concat(datasets::LargeWinogradInputTransformDataset1x4_1x3(),
            framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x4_5x5(),
            framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x1_5x1(),
-                                      datasets::LargeWinogradInputTransformDataset1x4_1x5()))))))));
+           framework::dataset::concat(datasets::LargeWinogradInputTransformDataset1x4_1x5(),
+           framework::dataset::concat(datasets::LargeWinogradInputTransformDataset1x2_1x7(),
+                                      datasets::LargeWinogradInputTransformDataset2x1_7x1()))))))))));
 
 const auto LargeWinogradInputTransformDatasetNHWC =
            framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x4_3x3(),
@@ -106,9 +108,7 @@
 
 const auto LargeWinogradInputTransformDatasetNHWC_FP32 =
            framework::dataset::concat(LargeWinogradInputTransformDatasetNHWC,
-           framework::dataset::concat(datasets::LargeWinogradInputTransformDataset1x2_1x7(),
-           framework::dataset::concat(datasets::LargeWinogradInputTransformDataset2x1_7x1(),
-                                     (datasets::LargeWinogradInputTransformDataset2x2_7x7()))));
+                                      (datasets::LargeWinogradInputTransformDataset2x2_7x7()));
 
 // Filter transform
 const auto SmallWinogradFilterTransformDatasetNCHW =
@@ -125,13 +125,13 @@
            framework::dataset::concat(combine(datasets::Small1x3Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })),
            framework::dataset::concat(combine(datasets::Small5x5Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) })),
            framework::dataset::concat(combine(datasets::Small5x1Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 1U) })),
-                                     (combine(datasets::Small1x5Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) }))))))));
+           framework::dataset::concat(combine(datasets::Small1x5Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })),
+           framework::dataset::concat(combine(datasets::Small1x7Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 2U) })),
+                                      combine(datasets::Small7x1Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 1U) })))))))));
 
 const auto SmallWinogradFilterTransformDatasetNHWC_F32 =
            framework::dataset::concat(SmallWinogradFilterTransformDatasetNHWC_F16,
-           framework::dataset::concat(combine(datasets::Small7x7Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U) })),
-           framework::dataset::concat(combine(datasets::Small7x1Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 1U) })),
-                                      combine(datasets::Small1x7Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 2U) })))));
+                                      combine(datasets::Small7x7Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U) })));
 
 const auto LargeWinogradFilterTransformDatasetNCHW =
            framework::dataset::concat(combine(datasets::Large3x3Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U), Size2D(4U, 4U) })),
@@ -147,13 +147,13 @@
            framework::dataset::concat(combine(datasets::Large1x3Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })),
            framework::dataset::concat(combine(datasets::Large5x5Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) })),
            framework::dataset::concat(combine(datasets::Large5x1Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 1U) })),
-                                      combine(datasets::Large1x5Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })))))));
+           framework::dataset::concat(combine(datasets::Large1x5Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })),
+           framework::dataset::concat(combine(datasets::Large7x1Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 1U) })),
+                                      combine(datasets::Large1x7Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 2U) })))))))));
 
 const auto LargeWinogradFilterTransformDatasetNHWC_F32 =
            framework::dataset::concat(LargeWinogradFilterTransformDatasetNHWC_F16,
-           framework::dataset::concat(combine(datasets::Large7x7Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U) })),
-           framework::dataset::concat(combine(datasets::Large7x1Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 1U) })),
-                                      combine(datasets::Large1x7Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 2U) })))));
+                                      combine(datasets::Large7x7Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U) })));
 
 // Output transform
 const auto SmallWinogradOutputTransformDatasetNCHW = datasets::SmallWinogradOutputTransformDatasetNCHW();
@@ -269,14 +269,14 @@
                                                                                                                      framework::dataset::make("DataLayout", { DataLayout::NHWC })),
                                                                                                                      framework::dataset::make("DataType", { DataType::F16 })))
 {
-    validate(CLAccessor(_target), _reference, tolerance_f16);
+    validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num_f16);
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradInputTransformFixtureFP16, framework::DatasetMode::NIGHTLY, combine(combine(LargeWinogradInputTransformDatasetNHWC,
                                                                                                                    framework::dataset::make("DataLayout", { DataLayout::NHWC })),
                                                                                                                    framework::dataset::make("DataType", { DataType::F16 })))
 {
-    validate(CLAccessor(_target), _reference, tolerance_f16);
+    validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num_f16);
 }
 TEST_SUITE_END() // FP16
 TEST_SUITE(FP32)
@@ -385,7 +385,7 @@
                                        framework::dataset::make("DataType", { DataType::F16 })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f16);
+    validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num_f16);
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradFilterTransformFixtureFP16, framework::DatasetMode::NIGHTLY,
@@ -394,7 +394,7 @@
                                        framework::dataset::make("DataType", { DataType::F16 })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f16);
+    validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num_f16);
 }
 TEST_SUITE_END() // FP16
 TEST_SUITE(FP32)
@@ -524,7 +524,7 @@
                                framework::dataset::make("ActivationInfo",{ ActivationLayerInfo() }) ))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f16);
+    validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num_f16);
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradOutputTransformFixtureFP16, framework::DatasetMode::NIGHTLY,
@@ -533,7 +533,7 @@
                                framework::dataset::make("ActivationInfo",{ ActivationLayerInfo() }) ))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f16);
+    validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num_f16);
 }
 TEST_SUITE_END() // FP16
 TEST_SUITE(FP32)
@@ -754,7 +754,7 @@
                                                framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_convolution_f16);
+    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
@@ -776,7 +776,7 @@
                                        framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_convolution_f16);
+    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
@@ -798,7 +798,7 @@
                                        framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_convolution_f16);
+    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
@@ -821,7 +821,7 @@
 
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_convolution_f16);
+    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
@@ -845,7 +845,7 @@
 
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_convolution_f16);
+    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
@@ -869,7 +869,7 @@
 
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_convolution_f16);
+    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
@@ -884,6 +884,30 @@
 }
 TEST_SUITE_END() // Conv1x5
 
+TEST_SUITE(Conv1x7)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(),
+                                               framework::dataset::make("DataType", { DataType::F16 })),
+                                       ActivationFunctionsSmallDataset),
+                                               framework::dataset::make("DataLayout", { DataLayout::NHWC })))
+
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x7Dataset(),
+                                               framework::dataset::make("DataType", { DataType::F16 })),
+                                               ActivationFunctionsDataset),
+                                               framework::dataset::make("DataLayout", { DataLayout::NHWC })))
+
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16);
+}
+TEST_SUITE_END() // Conv1x7
+
 TEST_SUITE_END() // FP16
 
 TEST_SUITE_END() // ConvolutionLayer