Changes to enable FP16 in armv8a multi_isa

* This is the initial patch to start working on enabling fp16 in all
  multi_isa builds. More changes are required in the way we register
  the kernels using the macro REGISTER_FP16_NEON.

* In this patch we add the capability to build the fp16 files in listed in
  filelist.json with the correct arch option to enable FP16

* This patch is required towards building an universal multi_isa binary
  where fp16 is enable.

* Enable REGISTER_FP16_NEON macro for all builds by removing
  __ARM_FEATURE_FP16_VECTOR_ARITHMETIC guard from the macro definition.
  The macro has to be used across all types of builds.

Change-Id: I99f4c273f6ee04cad3c097e5e374200f48568fa9
Signed-off-by: Pablo Marquez Tello <pablo.tello@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10682
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/SConscript b/SConscript
index 9069df9..31e7a5b 100644
--- a/SConscript
+++ b/SConscript
@@ -82,7 +82,7 @@
 #         A list of static objects
 #         A list of shared objects
 
-def build_lib_objects():
+def build_multiisa_lib_objects():
     lib_static_objs = [] # static objects
     lib_shared_objs = [] # shared objects
 
@@ -93,20 +93,30 @@
 
     # Build all the common files for the base architecture
     if env['arch'] == 'armv8a':
-        lib_static_objs += build_obj_list(filedefs["armv8-a"], lib_files, static=True)
-        lib_shared_objs += build_obj_list(filedefs["armv8-a"], lib_files, static=False)
+        lib_static_objs += build_obj_list(filedefs["armv8-a"], misa_lib_files, static=True)
+        lib_shared_objs += build_obj_list(filedefs["armv8-a"], misa_lib_files, static=False)
     else:
-        lib_static_objs += build_obj_list(filedefs["armv8.2-a"], lib_files, static=True)
-        lib_shared_objs += build_obj_list(filedefs["armv8.2-a"], lib_files, static=False)
+        lib_static_objs += build_obj_list(filedefs["armv8.2-a"], misa_lib_files, static=True)
+        lib_shared_objs += build_obj_list(filedefs["armv8.2-a"], misa_lib_files, static=False)
+
+    # Build the FP16 specific files
+    lib_static_objs += build_obj_list(filedefs["armv8.2-a"], misa_lib_files_neon_fp16, static=True)
+    lib_shared_objs += build_obj_list(filedefs["armv8.2-a"], misa_lib_files_neon_fp16, static=False)
 
     # Build the SVE specific files
-    lib_static_objs += build_obj_list(filedefs["armv8.2-a-sve"], lib_files_sve, static=True)
-    lib_shared_objs += build_obj_list(filedefs["armv8.2-a-sve"], lib_files_sve, static=False)
+    lib_static_objs += build_obj_list(filedefs["armv8.2-a-sve"], misa_lib_files_sve, static=True)
+    lib_shared_objs += build_obj_list(filedefs["armv8.2-a-sve"], misa_lib_files_sve, static=False)
+    lib_static_objs += build_obj_list(filedefs["armv8.2-a-sve"], misa_lib_files_sve_fp16, static=True)
+    lib_shared_objs += build_obj_list(filedefs["armv8.2-a-sve"], misa_lib_files_sve_fp16, static=False)
+
 
     # Build the SVE2 specific files
     arm_compute_env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_SVE2'])
-    lib_static_objs += build_obj_list(filedefs["armv8.6-a-sve2"], lib_files_sve2, static=True)
-    lib_shared_objs += build_obj_list(filedefs["armv8.6-a-sve2"], lib_files_sve2, static=False)
+    lib_static_objs += build_obj_list(filedefs["armv8.6-a-sve2"], misa_lib_files_sve2, static=True)
+    lib_shared_objs += build_obj_list(filedefs["armv8.6-a-sve2"], misa_lib_files_sve2, static=False)
+    lib_static_objs += build_obj_list(filedefs["armv8.6-a-sve2"], misa_lib_files_sve2_fp16, static=True)
+    lib_shared_objs += build_obj_list(filedefs["armv8.6-a-sve2"], misa_lib_files_sve2_fp16, static=False)
+
 
     return lib_static_objs, lib_shared_objs
 
@@ -284,29 +294,29 @@
     return attrs
 
 
-def get_operator_backend_files(filelist, operators, backend='', techs=[], attrs=[]):
+def get_operator_backend_files(filelist, operators, backend='', techs=[], attrs=[], include_common=True):
     files = { "common" : [] }
-
     # Early return if filelist is empty
     if backend not in filelist:
         return files
-
     # Iterate over operators and create the file lists to compiler
     for operator in operators:
         if operator in filelist[backend]['operators']:
-            files['common'] += filelist[backend]['operators'][operator]["files"]["common"]
+            if include_common :
+                files['common'] += filelist[backend]['operators'][operator]["files"]["common"]
             for tech in techs:
                 if tech in filelist[backend]['operators'][operator]["files"]:
                     # Add tech as a key to dictionary if not there
                     if tech not in files:
                         files[tech] = []
-
                     # Add tech files to the tech file list
                     tech_files = filelist[backend]['operators'][operator]["files"][tech]
-                    files[tech] += tech_files.get('common', [])
+                    if include_common:
+                        files[tech] += tech_files.get('common', [])
                     for attr in attrs:
                         files[tech] += tech_files.get(attr, [])
 
+
     # Remove duplicates if they exist
     return {k: list(set(v)) for k,v in files.items()}
 
@@ -608,6 +618,17 @@
 lib_files_sve = []
 lib_files_sve2 = []
 
+# the variables below are used for the multi_isa builds
+# please note that the variables names without the _fp16 suffix
+# do not hold any fp16 files.
+
+misa_lib_files = lib_files
+misa_lib_files_sve = []
+misa_lib_files_sve2 = []
+misa_lib_files_neon_fp16 = []
+misa_lib_files_sve_fp16 = []
+misa_lib_files_sve2_fp16 = []
+
 if env['neon']:
     # build winograd/depthwise sources for either v7a / v8a
     arm_compute_env.Append(CPPPATH = ["src/core/NEON/kernels/arm_gemm",
@@ -620,8 +641,6 @@
                                       "arm_compute/core/NEON/kernels/assembly/",
                                       "src/cpu/kernels/assembly/"])
 
-    lib_files += filelist['cpu']['common']
-
     # Setup SIMD file list to include
     simd = ['neon']
     if env['multi_isa']:
@@ -636,7 +655,6 @@
     else:
         attrs = get_attrs_list(env, env['data_type_support'], env['data_layout_support'])
 
-
     if env['fixed_format_kernels']:
         attrs.append("fixed_format_kernels")
 
@@ -644,19 +662,46 @@
     cpu_operators = custom_operators if use_custom_ops else filelist['cpu']['operators'].keys()
     cpu_ops_to_build = resolve_operator_dependencies(filelist, cpu_operators, 'cpu')
 
-    cpu_files = get_operator_backend_files(filelist, cpu_ops_to_build, 'cpu', simd, attrs)
+    if env['multi_isa']:
+        misa_lib_files += filelist['cpu']['common']
 
-    # Shared among ALL CPU files
-    lib_files += cpu_files.get('common', [])
+        # For multi_isa builds we need to build fp16 files for armv8.2-a+fp16 so we filter them out of cpu_files removing the attribute fp16
+        attrs.remove('fp16')
+        cpu_files = get_operator_backend_files(filelist, cpu_ops_to_build, 'cpu', simd, attrs)
 
-    # Arm® Neon™ specific files
-    lib_files += cpu_files.get('neon', [])
+        # Shared among ALL CPU files
+        misa_lib_files += cpu_files.get('common', [])
 
-    # SVE files only
-    lib_files_sve = cpu_files.get('sve', [])
+        # Arm® Neon™ specific files
+        misa_lib_files += cpu_files.get('neon', [])
 
-    # SVE2 files only
-    lib_files_sve2 = cpu_files.get('sve2', [])
+        # Get all the fp16 files
+        fp16_cpu_files = get_operator_backend_files(filelist, cpu_ops_to_build, 'cpu', simd, ['fp16'],False)
+
+        misa_lib_files_neon_fp16 = fp16_cpu_files.get('neon',[])
+        misa_lib_files_sve_fp16 = fp16_cpu_files.get('sve',[])
+        misa_lib_files_sve2_fp16 = fp16_cpu_files.get('sve2',[])
+
+        # SVE files only minus FP16
+        misa_lib_files_sve = cpu_files.get('sve', [])
+
+        # SVE2 files only minus FP16
+        misa_lib_files_sve2 = cpu_files.get('sve2', [])
+    else:
+        lib_files += filelist['cpu']['common']
+
+        # Non multi_isa build
+        cpu_files = get_operator_backend_files(filelist, cpu_ops_to_build, 'cpu', simd, attrs)
+
+        # Shared among ALL CPU files
+        lib_files += cpu_files.get('common', [])
+
+        # Arm® Neon™ specific files
+        lib_files += cpu_files.get('neon', [])
+
+        lib_files_sve = cpu_files.get('sve', [])
+
+        lib_files_sve2 = cpu_files.get('sve2', [])
 
     graph_files += Glob('src/graph/backends/NEON/*.cpp')
 
@@ -674,7 +719,7 @@
 
 
 if (env['multi_isa']):
-    lib_static_objs, lib_shared_objs = build_lib_objects()
+    lib_static_objs, lib_shared_objs = build_multiisa_lib_objects()
 
 
 # STATIC library build.