COMPMID-3160: Add Bfloat16 support in NEGEMMConvolutionLayer

Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: I0e449306c138a562ffc1455e76ec44b2fd059d85
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2860
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/Android.bp b/Android.bp
index 12cdb64..3dd91d3 100644
--- a/Android.bp
+++ b/Android.bp
@@ -352,6 +352,7 @@
         "src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp",
         "src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp",
         "src/core/NEON/kernels/NEYOLOLayerKernel.cpp",
+        "src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp",
         "src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp",
         "src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp",
         "src/core/NEON/kernels/arm_gemm/gemm_int16.cpp",
@@ -769,6 +770,8 @@
                 "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_16x4/generic.cpp",
                 "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_16x4/a55.cpp",
                 "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_16x4/generic.cpp",
+                "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_12x8/generic.cpp",
+                "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_12x8/generic.cpp",
                 "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_12x8/generic.cpp",
                 "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_12x8/generic.cpp",
                 "src/core/NEON/kernels/arm_gemm/kernels/a64_native_fp32_mla_16x4/generic.cpp",
@@ -788,20 +791,28 @@
                 "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_4x6/generic.cpp",
                 "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_4x8/a55.cpp",
                 "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_4x8/generic.cpp",
+                "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_4VLx4/generic.cpp",
+                "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_mmla_4VLx4/generic.cpp",
+                "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_mmla_6VLx2/generic.cpp",
+                "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_mmla_8VLx2/generic.cpp",
                 "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_4VLx4/generic.cpp",
                 "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_4VLx4/generic.cpp",
                 "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_4VLx4/generic.cpp",
                 "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_4VLx4/generic.cpp",
+                "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_3VLx8/generic.cpp",
+                "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_3VLx8/generic.cpp",
                 "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_3VLx8/generic.cpp",
                 "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_3VLx8/generic.cpp",
                 "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_3VLx8/generic.cpp",
                 "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_3VLx8/generic.cpp",
                 "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_3VLx8/generic.cpp",
                 "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_3VLx8/generic.cpp",
+                "src/core/NEON/kernels/arm_gemm/kernels/sve_native_bf16fp32_dot_4VLx4/generic.cpp",
                 "src/core/NEON/kernels/arm_gemm/kernels/sve_native_fp16_mla_4VLx4/generic.cpp",
                 "src/core/NEON/kernels/arm_gemm/kernels/sve_native_fp32_mla_4VLx4/generic.cpp",
                 "src/core/NEON/kernels/arm_gemm/kernels/sve_native_s8s32_dot_4VLx4/generic.cpp",
                 "src/core/NEON/kernels/arm_gemm/kernels/sve_native_u8u32_dot_4VLx4/generic.cpp",
+                "src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_1VLx8/generic.cpp",
                 "src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_1VLx8/generic.cpp",
                 "src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_1VLx8/generic.cpp",