Integrate new winograd APIs from MLTech

Resolves: COMPMID-5400
Signed-off-by: Ramy Elgammal <ramy.elgammal@arm.com>
Change-Id: Ib4428436dd7a6e40d8b2d8a2f8dac1b079154551
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7894
Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/Android.bp b/Android.bp
index 16e67ad..9d056f9 100644
--- a/Android.bp
+++ b/Android.bp
@@ -340,27 +340,30 @@
         "src/core/NEON/kernels/convolution/common/qasymm8.cpp",
         "src/core/NEON/kernels/convolution/common/qsymm8.cpp",
         "src/core/NEON/kernels/convolution/common/utils.cpp",
+        "src/core/NEON/kernels/convolution/winograd/input_transforms/arm_fp32_1x8.cpp",
+        "src/core/NEON/kernels/convolution/winograd/input_transforms/arm_fp32_4x4.cpp",
+        "src/core/NEON/kernels/convolution/winograd/input_transforms/arm_fp32_6x6.cpp",
+        "src/core/NEON/kernels/convolution/winograd/input_transforms_fp16.cpp",
+        "src/core/NEON/kernels/convolution/winograd/input_transforms_fp32.cpp",
+        "src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_1x2_1x7.cpp",
+        "src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_1x4_1x5.cpp",
+        "src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_1x6_1x3.cpp",
+        "src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_2x2_3x3.cpp",
+        "src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_2x2_5x5.cpp",
+        "src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_4x4_3x3.cpp",
+        "src/core/NEON/kernels/convolution/winograd/output_transforms_fp16.cpp",
+        "src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp",
         "src/core/NEON/kernels/convolution/winograd/padding.cpp",
-        "src/core/NEON/kernels/convolution/winograd/winograd.cpp",
-        "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_1x8_fp32_fp32_integers.cpp",
-        "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_4x4_fp16_fp16_integers.cpp",
-        "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_4x4_fp32_fp32_integers.cpp",
-        "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_6x6_fp16_fp16_integers.cpp",
-        "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_6x6_fp32_fp32_integers.cpp",
-        "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2_7_fp32_fp32_integers.cpp",
-        "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2x2_3x3_fp32_fp32_integers.cpp",
-        "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2x2_5x5_fp32_fp32_integers.cpp",
-        "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4_5_fp32_fp32_integers.cpp",
-        "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4x4_3x3_fp16_fp16_integers.cpp",
-        "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4x4_3x3_fp32_fp32_integers.cpp",
-        "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_6_3_fp32_fp32_integers.cpp",
-        "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2_7_fp32_fp32_integers.cpp",
-        "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2x2_3x3_fp32_fp32_integers.cpp",
-        "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2x2_5x5_fp32_fp32_integers.cpp",
-        "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4_5_fp32_fp32_integers.cpp",
-        "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4x4_3x3_fp16_fp16_integers.cpp",
-        "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4x4_3x3_fp32_fp32_integers.cpp",
-        "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_6_3_fp32_fp32_integers.cpp",
+        "src/core/NEON/kernels/convolution/winograd/weight_transforms/arm_fp32_2x2_3x3.cpp",
+        "src/core/NEON/kernels/convolution/winograd/weight_transforms/arm_fp32_2x2_5x5.cpp",
+        "src/core/NEON/kernels/convolution/winograd/weight_transforms/arm_fp32_4x4_3x3.cpp",
+        "src/core/NEON/kernels/convolution/winograd/weight_transforms/cpp_fp32_1x2_1x7.cpp",
+        "src/core/NEON/kernels/convolution/winograd/weight_transforms/cpp_fp32_1x4_1x5.cpp",
+        "src/core/NEON/kernels/convolution/winograd/weight_transforms/cpp_fp32_1x6_1x3.cpp",
+        "src/core/NEON/kernels/convolution/winograd/weight_transforms_fp16.cpp",
+        "src/core/NEON/kernels/convolution/winograd/weight_transforms_fp32.cpp",
+        "src/core/NEON/kernels/convolution/winograd/winograd_fp16.cpp",
+        "src/core/NEON/kernels/convolution/winograd/winograd_fp32.cpp",
         "src/core/Rounding.cpp",
         "src/core/Size2D.cpp",
         "src/core/Size3D.cpp",
@@ -1184,6 +1187,11 @@
                 "src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL/generic.cpp",
                 "src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp",
                 "src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp",
+                "src/core/NEON/kernels/convolution/winograd/input_transforms/a64_fp16_6x6.cpp",
+                "src/core/NEON/kernels/convolution/winograd/input_transforms/a64_fp32_6x6.cpp",
+                "src/core/NEON/kernels/convolution/winograd/input_transforms/sve_fp32_6x6.cpp",
+                "src/core/NEON/kernels/convolution/winograd/output_transforms/a64_fp16_4x4_3x3.cpp",
+                "src/core/NEON/kernels/convolution/winograd/weight_transforms/a64_fp16_4x4_3x3.cpp",
                 
             ],
         },