Add Multi ISA support for SCons build System (part #1)

- Enhance the SCons build system to support V8
  SVE/SVE2 achitecture in a single binary

- Add additional filedefs.json to include build definitions

Resolves: COMPMID-4921

Signed-off-by: Motti Gondabi <motti.gondabi@arm.com>
Change-Id: Ie3c0ef444303270ba560ca3f43c6e22d50b86679
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6689
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/SConscript b/SConscript
index 7e90101..afff4e5 100644
--- a/SConscript
+++ b/SConscript
@@ -19,6 +19,7 @@
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
+
 import collections
 import os.path
 import re
@@ -44,11 +45,42 @@
     Default(obj)
     return obj
 
+# @brief Generates SVE/SVE2 shared object files for a specific V8 architechture.
+#
+# @param  sources    The target source files
+# @param  arch_info  A Tuple represents the architecture info
+#                    such as the compiler flags and defines.
+#
+# @return A list of objects for the corresponding architecture.
+def build_multi_isa_objs(sources, arch_v8_info):
 
-def build_sve_objs(sources):
+    arch_v8 = arch_v8_info[0]
+
+    # Create a temp environment
     tmp_env = arm_compute_env.Clone()
-    tmp_env.Append(CXXFLAGS = "-march=armv8.2-a+sve+fp16")
-    obj = tmp_env.SharedObject(sources)
+
+    if 'cxxflags' in arch_v8_info[1] and len(arch_v8_info[1]['cxxflags']) > 0:
+        tmp_env.Append(CXXFLAGS = arch_v8_info[1]['cxxflags'])
+    if 'cppdefines' in arch_v8_info[1] and len(arch_v8_info[1]['cppdefines']) > 0:
+        tmp_env.Append(CPPDEFINES = arch_v8_info[1]['cppdefines'])
+
+    if 'sve' in arch_v8:
+        # Toggle SVE/SVE2 specific extensions
+        tmp_env.Append(CPPDEFINES = ['ENABLE_SVE', 'ARM_COMPUTE_ENABLE_SVE'])
+        if 'sve2' in arch_v8:
+            tmp_env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_SVE2'])
+    else:
+        # FIXME: The NEON flags should be always defined for CPU.
+        #        however, build fails when SVE/SVE2 & NEON flags
+        #        defined together.
+        tmp_env.Append(CPPDEFINES = ['ENABLE_NEON', 'ARM_COMPUTE_ENABLE_NEON'])
+
+    # we must differentiate the file object names
+    # as we accumulate the set.
+    obj = []
+    for src in sources:
+        obj += tmp_env.SharedObject(target='{}-{}'.format(src, arch_v8), source=src)
+
     Default(obj)
     return obj
 
@@ -422,6 +454,11 @@
 
 arm_compute_env.Append(LIBS = ['dl'])
 
+# Load build definitions file
+with (open(Dir('#').path + '/filedefs.json')) as fd:
+    filedefs = json.load(fd)
+
+
 with (open(Dir('#').path + '/filelist.json')) as fp:
     filelist = json.load(fp)
 
@@ -472,8 +509,9 @@
 
     graph_files += Glob('src/graph/backends/CL/*.cpp')
 
-sve_o = []
+multi_isa_objs_list = []
 lib_files_sve = []
+
 if env['neon']:
     # build winograd/depthwise sources for either v7a / v8a
     arm_compute_env.Append(CPPPATH = ["src/core/NEON/kernels/convolution/common/",
@@ -481,14 +519,14 @@
                                       "src/core/NEON/kernels/convolution/depthwise/",
                                       "src/core/NEON/kernels/assembly/",
                                       "arm_compute/core/NEON/kernels/assembly/",
-                                      "src/cpu/kernels/assembly/",])
+                                      "src/cpu/kernels/assembly/"])
 
     lib_files += filelist['cpu']['common']
 
     # Setup SIMD file list to include
     simd = []
-    if 'sve' in env['arch'] or env['fat_binary']: simd += ['sve']
-    if 'sve' not in env['arch'] or env['fat_binary']: simd += ['neon']
+    if 'sve' in env['arch'] or env['multi_isa']: simd += ['sve']
+    if 'sve' not in env['arch'] or env['multi_isa']: simd += ['neon']
 
     # Get attributes
     if(use_custom_ops):
@@ -501,6 +539,7 @@
     cpu_ops_to_build = resolve_operator_dependencies(filelist, cpu_operators, 'cpu')
 
     cpu_files = get_operator_backend_files(filelist, cpu_ops_to_build, 'cpu', simd, attrs)
+
     lib_files += cpu_files.get('common', [])
     lib_files += cpu_files.get('neon', [])
     lib_files_sve += cpu_files.get('sve', [])
@@ -520,17 +559,21 @@
 Export('bootcode_o')
 
 # Build static libraries
-if (env['fat_binary']):
-    sve_o = build_sve_objs(lib_files_sve)
-    arm_compute_a = build_library('arm_compute-static', arm_compute_env, lib_files + sve_o, static=True)
+if (env['multi_isa']):
+    # Available architecture
+    arch_v8s = filedefs['cpu']['arch']
+    for arch_v8_info in arch_v8s.items():
+        multi_isa_objs_list += build_multi_isa_objs(lib_files_sve, arch_v8_info)
+
+    arm_compute_a = build_library('arm_compute-static', arm_compute_env, lib_files + multi_isa_objs_list, static=True)
 else:
     arm_compute_a = build_library('arm_compute-static', arm_compute_env, lib_files + lib_files_sve, static=True)
 Export('arm_compute_a')
 
 # Build shared libraries
 if env['os'] != 'bare_metal' and not env['standalone']:
-    if (env['fat_binary']):
-        arm_compute_so = build_library('arm_compute', arm_compute_env, lib_files + sve_o, static=False)
+    if (env['multi_isa']):
+        arm_compute_so = build_library('arm_compute', arm_compute_env, lib_files + multi_isa_objs_list, static=False)
     else:
         arm_compute_so = build_library('arm_compute', arm_compute_env, lib_files + lib_files_sve, static=False)
 
diff --git a/SConstruct b/SConstruct
index 400228c..2a8403f 100644
--- a/SConstruct
+++ b/SConstruct
@@ -99,7 +99,7 @@
     BoolVariable("examples", "Build example programs", True),
     BoolVariable("gemm_tuner", "Build gemm_tuner programs", True),
     BoolVariable("Werror", "Enable/disable the -Werror compilation flag", True),
-    BoolVariable("fat_binary", "Build fat binary version of library. Note works only for armv8.2-a", False),
+    BoolVariable("multi_isa", "Build Multi ISA binary version of library. Note works only for armv8.2-a", False),
     BoolVariable("standalone", "Builds the tests as standalone executables, links statically with libgcc, libstdc++ and libarm_compute", False),
     BoolVariable("opencl", "Enable OpenCL support", True),
     BoolVariable("neon", "Enable Arm® Neon™ support", False),
@@ -250,40 +250,63 @@
 
 # Add architecture specific flags
 prefix = ""
-if 'v7a' in env['arch']:
-    env.Append(CXXFLAGS = ['-march=armv7-a', '-mthumb', '-mfpu=neon'])
-    if (env['os'] == 'android' or env['os'] == 'tizen') and not 'hf' in env['arch']:
-        env.Append(CXXFLAGS = ['-mfloat-abi=softfp'])
-    else:
-        env.Append(CXXFLAGS = ['-mfloat-abi=hard'])
-elif 'v8' in env['arch']:
-    if 'sve2' in env['arch']:
-        env.Append(CXXFLAGS = ['-march=armv8.2-a+sve2+fp16+dotprod'])
-        env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_SVE2'])
-    elif 'sve' in env['arch']:
-        env.Append(CXXFLAGS = ['-march=armv8.2-a+sve+fp16+dotprod'])
-    elif 'armv8r64' in env['arch']:
-        env.Append(CXXFLAGS = ['-march=armv8.4-a'])
-    elif 'v8.' in env['arch']:
-        env.Append(CXXFLAGS = ['-march=armv8.2-a+fp16']) # explicitly enable fp16 extension otherwise __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is undefined
-    else:
-        env.Append(CXXFLAGS = ['-march=armv8-a'])
+if env['multi_isa']:
+    # assert arch version is v8
+    if 'v8' not in env['arch']:
+        print("Currently Multi ISA binary is only supported for arm v8 family")
+        Exit(1)
 
     if 'v8.6-a' in env['arch']:
-        env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_I8MM', 'ARM_COMPUTE_ENABLE_BF16'])
         if "disable_mmla_fp" not in env['custom_options']:
             env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_SVEF32MM'])
-    if 'v8.' in env['arch']:
-        env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_FP16'])
 
-elif 'x86' in env['arch']:
-    if env['estate'] == '32':
-        env.Append(CCFLAGS = ['-m32'])
-        env.Append(LINKFLAGS = ['-m32'])
+else: # NONE "multi_isa" builds
+
+    if 'sve' in env['arch']:
+        env.Append(CPPDEFINES = ['ENABLE_SVE', 'ARM_COMPUTE_ENABLE_SVE'])
+        if 'sve2' in env['arch']:
+            env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_SVE2'])
     else:
-        env.Append(CXXFLAGS = ['-fPIC'])
-        env.Append(CCFLAGS = ['-m64'])
-        env.Append(LINKFLAGS = ['-m64'])
+        # FIXME: The NEON flags should be always defined for CPU.
+        #        however, build fails when SVE/SVE2 & NEON flags
+        #        defined together.
+        env.Append(CPPDEFINES = ['ENABLE_NEON', 'ARM_COMPUTE_ENABLE_NEON'])
+    
+
+    if 'v7a' in env['arch']:
+        env.Append(CXXFLAGS = ['-march=armv7-a', '-mthumb', '-mfpu=neon'])
+        if (env['os'] == 'android' or env['os'] == 'tizen') and not 'hf' in env['arch']:
+            env.Append(CXXFLAGS = ['-mfloat-abi=softfp'])
+        else:
+            env.Append(CXXFLAGS = ['-mfloat-abi=hard'])
+    elif 'v8' in env['arch']:
+        # Preserve the V8 archs for non-multi-ISA variants
+        if 'sve2' in env['arch']:
+            env.Append(CXXFLAGS = ['-march=armv8.2-a+sve2+fp16+dotprod'])
+        elif 'sve' in env['arch']:
+            env.Append(CXXFLAGS = ['-march=armv8.2-a+sve+fp16+dotprod'])
+        elif 'armv8r64' in env['arch']:
+            env.Append(CXXFLAGS = ['-march=armv8.4-a'])
+        elif 'v8.' in env['arch']:
+            env.Append(CXXFLAGS = ['-march=armv8.2-a+fp16']) # explicitly enable fp16 extension otherwise __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is undefined
+        else:
+            env.Append(CXXFLAGS = ['-march=armv8-a'])
+
+        if 'v8.6-a' in env['arch']:
+            env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_I8MM', 'ARM_COMPUTE_ENABLE_BF16'])
+            if "disable_mmla_fp" not in env['custom_options']:
+                env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_SVEF32MM'])
+        if 'v8.' in env['arch']:
+            env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_FP16'])
+
+    elif 'x86' in env['arch']:
+        if env['estate'] == '32':
+            env.Append(CCFLAGS = ['-m32'])
+            env.Append(LINKFLAGS = ['-m32'])
+        else:
+            env.Append(CXXFLAGS = ['-fPIC'])
+            env.Append(CCFLAGS = ['-m64'])
+            env.Append(LINKFLAGS = ['-m64'])
 
 # Define toolchain
 prefix = ""
@@ -307,11 +330,6 @@
         elif env['os'] == 'tizen':
             prefix = "aarch64-tizen-linux-gnu-"
 
-if 'sve' in env['arch']:
-    env.Append(CXXFLAGS = ['-DENABLE_SVE', '-DARM_COMPUTE_ENABLE_SVE'])
-else:
-    env.Append(CXXFLAGS = ['-DENABLE_NEON', '-DARM_COMPUTE_ENABLE_NEON'])
-
 if env['build'] == 'native':
     prefix = ""
 
@@ -355,15 +373,6 @@
         if not version_at_least(compiler_ver, '7.0.0') and env['os'] == 'bare_metal':
             env.Append(LINKFLAGS = ['-fstack-protector-strong'])
 
-if env['fat_binary']:
-    if env['arch'] != 'armv8.2-a':
-        print("Currently fat binary is only supported with armv8.2-a")
-        Exit(1)
-    env.Append(CXXFLAGS = ['-DENABLE_NEON', '-DARM_COMPUTE_ENABLE_NEON',
-                           '-DENABLE_SVE', '-DARM_COMPUTE_ENABLE_SVE',
-                           '-DARM_COMPUTE_ENABLE_FP16', '-DARM_COMPUTE_ENABLE_BF16',
-                           '-DARM_COMPUTE_ENABLE_I8MM', '-DARM_COMPUTE_ENABLE_SVEF32MM'])
-
 if env['high_priority'] and env['build_config']:
     print("The high priority library cannot be built in conjuction with a user-specified build configuration")
     Exit(1)
diff --git a/docs/user_guide/introduction.dox b/docs/user_guide/introduction.dox
index a8c9926..d685a49 100644
--- a/docs/user_guide/introduction.dox
+++ b/docs/user_guide/introduction.dox
@@ -85,7 +85,7 @@
             - Linux armv7a: gcc-linaro-7.2.1-2017.11-x86_64_arm-linux-gnueabihf
             - Linux arm64-v8a: gcc-linaro-7.2.1-2017.11-x86_64_aarch64-linux-gnu
             - Linux arm64-v8.2-a: gcc-linaro-7.2.1-2017.11-x86_64_aarch64-linux-gnu
-            - Linux arm64-v8.2-a (fat binary): gcc-arm-10.2-2020.11-x86_64-aarch64-none-linux-gnu
+            - Linux arm64-v8.2-a (multi-ISA binary): gcc-arm-10.2-2020.11-x86_64-aarch64-none-linux-gnu
             - Linux armv8.2a-sve: gcc-arm-10.2-2020.11-x86_64-aarch64-none-linux-gnu
             - Android armv7a: clang++ / libc++ NDK r20b
             - Android am64-v8a: clang++ / libc++ NDK r20b
diff --git a/docs/user_guide/library.dox b/docs/user_guide/library.dox
index fc08dbc..7a45fe9 100644
--- a/docs/user_guide/library.dox
+++ b/docs/user_guide/library.dox
@@ -555,9 +555,9 @@
 - Providing information to the caller required by the computation (e.g., memory requirements)
 - Allocation of any required auxiliary memory if it isn't given by its caller explicitly
 
-@subsection architecture_experimental_build_fat_binary Build fat binary
+@subsection architecture_experimental_build_multi_isa Build multi-ISA binary
 
-Selecting fat_binary when building Compute Library, will create a library that contains all the supported ISA features. 
+Selecting multi_isa when building Compute Library, will create a library that contains all the supported ISA features.
 Based on the CPU support, the appropriate kernel will be selected at runtime for execution. Currently this option is
 only supported with armv8.2-a as the base architecture.
 
diff --git a/filedefs.json b/filedefs.json
new file mode 100644
index 0000000..0bc030e
--- /dev/null
+++ b/filedefs.json
@@ -0,0 +1,41 @@
+{
+    "cpu": {
+        "arch" : {
+            "armv8-a": {
+                "cxxflags": ["-march=armv8.2-a"]
+            },
+            "armv8.2-a": {
+                "cxxflags": ["-march=armv8.2-a+fp16"],
+                "cppdefines": ["ARM_COMPUTE_ENABLE_FP16"]
+            },
+            "armv8.2-a-sve": {
+                "cxxflags": ["-march=armv8.2-a+sve+fp16+dotprod"],
+                "cppdefines": ["ARM_COMPUTE_ENABLE_FP16", "ARM_COMPUTE_ENABLE_BF16",
+                               "ARM_COMPUTE_ENABLE_I8MM", "ARM_COMPUTE_ENABLE_SVEF32MM"]
+            },
+            "armv8.2-a-sve2": {
+                "cxxflags": ["-march=armv8.2-a+sve2+fp16+dotprod"],
+                "cppdefines": ["ARM_COMPUTE_ENABLE_FP16", "ARM_COMPUTE_ENABLE_BF16",
+                               "ARM_COMPUTE_ENABLE_I8MM", "ARM_COMPUTE_ENABLE_SVEF32MM"]
+            },
+            "armv8r64": {
+                "cxxflags": ["-march=armv8.4-a"]
+            },
+            "armv8.6-a": {
+                "cxxflags": ["-march=armv8.6-a+fp16"],
+                "cppdefines": ["ARM_COMPUTE_ENABLE_FP16"]
+            },
+            "armv8.6-a-sve": {
+                "cxxflags": ["-march=armv8.6-a+sve+fp16+dotprod"],
+                "cppdefines": ["ARM_COMPUTE_ENABLE_FP16", "ARM_COMPUTE_ENABLE_BF16",
+                               "ARM_COMPUTE_ENABLE_I8MM"]
+            },
+            "armv8.6-a-sve2": {
+                "cxxflags": ["-march=armv8.6-a+sve2+fp16+dotprod"],
+                "cppdefines": ["ARM_COMPUTE_ENABLE_FP16", "ARM_COMPUTE_ENABLE_BF16",
+                               "ARM_COMPUTE_ENABLE_I8MM"]
+
+            }
+        }
+    }
+}
diff --git a/src/cpu/kernels/add/generic/sve/fp16.cpp b/src/cpu/kernels/add/generic/sve/fp16.cpp
index 71056a0..28f4d2b 100644
--- a/src/cpu/kernels/add/generic/sve/fp16.cpp
+++ b/src/cpu/kernels/add/generic/sve/fp16.cpp
@@ -21,6 +21,7 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
+#if defined(ARM_COMPUTE_ENABLE_SVE)
 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
 
 #include "src/cpu/kernels/add/generic/sve/impl.h"
@@ -36,3 +37,4 @@
 }
 } // namespace arm_compute
 #endif /* (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */
+#endif /* #if defined(ARM_COMPUTE_ENABLE_SVE) */
\ No newline at end of file