Pooling changes to enable fp16 in armv8a multi_isa builds

    * Changes in filelist.json moving fp16 file from common to fp16 attribute

    * Changes in kernel CpuPool2dAssemblyWrapperKernel, replaced
      __ARM_FEATURE_FP16_VECTOR_ARITHMETIC by ENABLE_FP16_KERNELS to
      make sure the fp16 kernels are compiled in for multi_isa=1

    * Partially resolves MLCE-1102

Change-Id: I327154ec5b1ddfb9f54d9096f00c35b3e05c678a
Signed-off-by: Pablo Marquez Tello <pablo.tello@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10662
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/filelist.json b/filelist.json
index 6559ed2..d8c1692 100644
--- a/filelist.json
+++ b/filelist.json
@@ -1945,16 +1945,11 @@
           "neon": {
             "common": [
               "src/core/NEON/kernels/arm_conv/pooling/kernels/cpp_nhwc_1x1_stride_any_depthfirst/generic.cpp",
-              "src/core/NEON/kernels/arm_conv/pooling/pooling_fp16.cpp",
               "src/core/NEON/kernels/arm_conv/pooling/pooling_fp32.cpp",
               "src/core/NEON/kernels/arm_conv/pooling/pooling_s8.cpp",
               "src/core/NEON/kernels/arm_conv/pooling/pooling_s8q.cpp",
               "src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp",
               "src/core/NEON/kernels/arm_conv/pooling/pooling_u8q.cpp",
-              "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp",
-              "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_generic_depthfirst/generic.cpp",
-              "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
-              "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_generic_depthfirst/generic.cpp",
               "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp",
               "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_generic_depthfirst/generic.cpp",
               "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
@@ -1971,7 +1966,14 @@
               "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_max_generic_depthfirst/generic.cpp"
             ],
             "nchw": [ "src/cpu/kernels/pool2d/neon/nchw/all.cpp" ],
-            "fp16": [ "src/cpu/kernels/pool2d/neon/fp16.cpp" ],
+            "fp16": [
+                "src/cpu/kernels/pool2d/neon/fp16.cpp",
+                "src/core/NEON/kernels/arm_conv/pooling/pooling_fp16.cpp",
+                "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp",
+                "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_generic_depthfirst/generic.cpp",
+                "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
+                "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_generic_depthfirst/generic.cpp"
+             ],
             "fp32": [ "src/cpu/kernels/pool2d/neon/fp32.cpp" ],
             "qasymm8":[ "src/cpu/kernels/pool2d/neon/qasymm8.cpp" ],
             "qasymm8_signed":["src/cpu/kernels/pool2d/neon/qasymm8_signed.cpp"]
diff --git a/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp b/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp
index a161c80..9ba2451 100644
--- a/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp
+++ b/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022 Arm Limited.
+ * Copyright (c) 2021-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -79,11 +79,11 @@
                 create_arm_pooling<int8_t, int8_t>(src, dst, info, cpu_info);
             }
             break;
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#if defined(ENABLE_FP16_KERNELS)
         case DataType::F16:
             create_arm_pooling<float16_t, float16_t>(src, dst, info, cpu_info);
             break;
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+#endif // defined(ENABLE_FP16_KERNELS)
         case DataType::F32:
             create_arm_pooling<float, float>(src, dst, info, cpu_info);
             break;