COMPMID-3108: Add Winograd 3x3,4x4 FP16 support for NEON

Change-Id: I20680dc74a3d709297539e2132417308a7aecc9d
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3159
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/core/NEON/kernels/convolution/winograd/padding.cpp b/src/core/NEON/kernels/convolution/winograd/padding.cpp
index 46fe57c..04aa472 100644
--- a/src/core/NEON/kernels/convolution/winograd/padding.cpp
+++ b/src/core/NEON/kernels/convolution/winograd/padding.cpp
@@ -85,6 +85,15 @@
   unsigned int, unsigned int, unsigned int, unsigned int, float
 );
 
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+template void copy_and_pad_tile(
+    unsigned int, unsigned int, unsigned int,
+    const __fp16 *, unsigned int, unsigned int,
+    __fp16 *, unsigned int, unsigned int,
+    unsigned int, unsigned int, unsigned int, unsigned int, __fp16
+);
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
 template <unsigned int TileRows, unsigned int TileCols>
 void CopyCropped<TileRows, TileCols>::execute(
   const size_t size,
@@ -163,4 +172,21 @@
   unsigned int crop_right
 );
 
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+template void crop_and_copy_tile(
+    unsigned int tile_rows,
+    unsigned int tile_cols,
+    unsigned int n_channels,
+    const __fp16 *inptr,
+    unsigned int in_row_stride,
+    unsigned int in_col_stride,
+    __fp16 *outptr,
+    unsigned int out_row_stride,
+    unsigned int out_col_stride,
+    unsigned int crop_top,
+    unsigned int crop_left,
+    unsigned int crop_bottom,
+    unsigned int crop_right
+);
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 }  // namespace padding