SCons build system refactoring (phase #2).

* Add kernel selection at build time
* Modify filelist.json to allow files separation
  as part of our kernel decoupling process.

Issues to address after this change will be merged:
   (1) Remove SVE/SVE2 defines from already decoupled kernels
   (2) Adapt the new file list structure (filelist.json)

resolves COMPMID-4996 and COMPMID-5048

Change-Id: I8c17a9d6b150bbc7d8c1f2ed38060be82b6aa904
Signed-off-by: Motti Gondabi <motti.gondabi@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7006
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: SiCong Li <sicong.li@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/SConscript b/SConscript
index 45e8818..8cc734d 100644
--- a/SConscript
+++ b/SConscript
@@ -1,3 +1,6 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
 # Copyright (c) 2016-2021 Arm Limited.
 #
 # SPDX-License-Identifier: MIT
@@ -45,48 +48,66 @@
     Default(obj)
     return obj
 
-# @brief Generates SVE/SVE2 shared object files for a specific V8 architechture.
+
+
+
+# @brief Create a list of object from a given file list.
 #
-# @param  sources    The target source files
-# @param  arch_info  A Tuple represents the architecture info
-#                    such as the compiler flags and defines.
+# @param  arch_info      A dictionary represents the architecture info such as the
+#                        compiler flags and defines (filedefs.json).
+#
+# @param  sources        A list of files to build
 #
 # @return A list of objects for the corresponding architecture.
-#
-def build_multi_isa_objs(sources, arch_v8_info):
 
-    # Get the march
-    arch_v8 = arch_v8_info[0]
+def build_obj_list(arch_info, sources, static=False):
 
-    # Create a temp environment
+    # Clone environment
     tmp_env = arm_compute_env.Clone()
 
-    if 'cxxflags' in arch_v8_info[1] and len(arch_v8_info[1]['cxxflags']) > 0:
-        tmp_env.Append(CXXFLAGS = arch_v8_info[1]['cxxflags'])
-    if 'cppdefines' in arch_v8_info[1] and len(arch_v8_info[1]['cppdefines']) > 0:
-        tmp_env.Append(CPPDEFINES = arch_v8_info[1]['cppdefines'])
+    # Append architecture spec
+    if 'cxxflags' in arch_info and len(arch_info['cxxflags']) > 0:
+        tmp_env.Append(CXXFLAGS = arch_info['cxxflags'])
 
-    if 'sve' in arch_v8:
-        tmp_env.Append(CPPDEFINES = ['ENABLE_SVE', 'ARM_COMPUTE_ENABLE_SVE'])
-        if 'sve2' in arch_v8:
-            tmp_env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_SVE2'])
+    # Build and return objects
+    if static:
+        objs = tmp_env.StaticObject(sources)
     else:
-        tmp_env.Append(CPPDEFINES = ['ENABLE_NEON', 'ARM_COMPUTE_ENABLE_NEON'])
-
-    # we must differentiate the file object names
-    # as we accumulate the set.
-    objs = []
-    for src in sources:
-        objs += tmp_env.SharedObject('{}-{}.o'.format(src.replace('.cpp', ''), arch_v8), src)
-
+        objs = tmp_env.SharedObject(sources)
+    
     tmp_env.Default(objs)
     return objs
 
+# @brief Build multi-ISA files with the respective architecture.
+#
+# @return Two distinct lists:
+#         A list of static objects
+#         A list of shared objects
 
-def build_objs(sources):
-    obj = arm_compute_env.SharedObject(sources)
-    Default(obj)
-    return obj
+def build_lib_objects():
+    lib_static_objs = [] # static objects
+    lib_shared_objs = [] # shared objects
+
+    arm_compute_env.Append(CPPDEFINES = ['ENABLE_NEON', 'ARM_COMPUTE_ENABLE_NEON',
+                           'ENABLE_SVE', 'ARM_COMPUTE_ENABLE_SVE',
+                           'ARM_COMPUTE_ENABLE_FP16', 'ARM_COMPUTE_ENABLE_BF16',
+                           'ARM_COMPUTE_ENABLE_I8MM', 'ARM_COMPUTE_ENABLE_SVEF32MM'])
+
+    # Build all the common files for the base architecture
+    lib_static_objs += build_obj_list(filedefs["armv8.2-a"], lib_files, static=True)
+    lib_shared_objs += build_obj_list(filedefs["armv8.2-a"], lib_files, static=False)
+    
+    # Build the SVE specific files
+    lib_static_objs += build_obj_list(filedefs["armv8.2-a-sve"], lib_files_sve, static=True)
+    lib_shared_objs += build_obj_list(filedefs["armv8.2-a-sve"], lib_files_sve, static=False)
+
+    # Build the SVE2 specific files
+    arm_compute_env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_SVE2'])
+    lib_static_objs += build_obj_list(filedefs["armv8.6-a-sve2"], lib_files_sve2, static=True)
+    lib_shared_objs += build_obj_list(filedefs["armv8.6-a-sve2"], lib_files_sve2, static=False)
+
+    return lib_static_objs, lib_shared_objs
+
 
 
 def build_library(name, build_env, sources, static=False, libs=[]):
@@ -214,6 +235,7 @@
 
     # Manage execution state
     attrs += ['estate32' if (env['estate'] == 'auto' and 'v7a' in env['arch']) or '32' in env['estate'] else 'estate64']
+
     return attrs
 
 
@@ -461,6 +483,7 @@
 # Load build definitions file
 with (open(Dir('#').path + '/filedefs.json')) as fd:
     filedefs = json.load(fd)
+    filedefs = filedefs['cpu']['arch']
 
 
 with (open(Dir('#').path + '/filelist.json')) as fp:
@@ -513,8 +536,9 @@
 
     graph_files += Glob('src/graph/backends/CL/*.cpp')
 
-multi_isa_objs_list = []
+
 lib_files_sve = []
+lib_files_sve2 = []
 
 if env['neon']:
     # build winograd/depthwise sources for either v7a / v8a
@@ -528,9 +552,12 @@
     lib_files += filelist['cpu']['common']
 
     # Setup SIMD file list to include
-    simd = []
-    if 'sve' in env['arch'] or env['multi_isa']: simd += ['sve']
-    if 'sve' not in env['arch'] or env['multi_isa']: simd += ['neon']
+    simd = ['neon']
+    if env['multi_isa']:
+        simd += ['sve', 'sve2']
+    else:
+        if 'sve' in env['arch']: simd += ['sve']
+        if 'sve2' in env['arch']: simd += ['sve2']
 
     # Get attributes
     if(use_custom_ops):
@@ -544,9 +571,17 @@
 
     cpu_files = get_operator_backend_files(filelist, cpu_ops_to_build, 'cpu', simd, attrs)
 
+    # Shared among ALL CPU files
     lib_files += cpu_files.get('common', [])
+
+    # Arm® Neon™ specific files
     lib_files += cpu_files.get('neon', [])
-    lib_files_sve += cpu_files.get('sve', [])
+
+    # SVE files only
+    lib_files_sve = cpu_files.get('sve', [])
+
+    # SVE2 files only
+    lib_files_sve2 = cpu_files.get('sve2', [])
 
     graph_files += Glob('src/graph/backends/NEON/*.cpp')
 
@@ -562,45 +597,39 @@
     bootcode_o = build_bootcode_objs(bootcode_files)
 Export('bootcode_o')
 
-# STATIC library build.
-#
-# Multi-ISA: We split the library into 2 static libs: arm_compute-static.a and arm_compute_multi_isa-static.a
-# to avoid 'argument list too long' error which being thrown due to OS string length limitation at link time.
-#
-# For single arch build: No change and we produce a single arm_compute-static.a
+
 if (env['multi_isa']):
-    # Get v8 variants
-    arch_v8s = filedefs['cpu']['arch']
-    for arch_v8_info in arch_v8s.items():
-        multi_isa_objs_list += build_multi_isa_objs(lib_files_sve, arch_v8_info)
+    lib_static_objs, lib_shared_objs = build_lib_objects()
+
+
+# STATIC library build.
+if (env['multi_isa']):
+    arm_compute_a = build_library('arm_compute-static', arm_compute_env, lib_static_objs, static=True)
+else:
+    if 'sve2' in env['arch']:
+        lib_files += lib_files_sve
+        lib_files += lib_files_sve2
+    elif 'sve' in env['arch']:
+        lib_files += lib_files_sve
 
     arm_compute_a = build_library('arm_compute-static', arm_compute_env, lib_files, static=True)
-    arm_compute_multi_isa_a = build_library('arm_compute_multi_isa-static', arm_compute_env, multi_isa_objs_list, static=True)
-    Export('arm_compute_multi_isa_a')
-else:
-    arm_compute_a = build_library('arm_compute-static', arm_compute_env, lib_files + lib_files_sve, static=True)
-    arm_compute_multi_isa_a = None
 
 Export('arm_compute_a')
 
 # SHARED library build.
-#
-# Multi-ISA: we leverage the prior static build such that the resulting
-# "libarm_compute.so" is combined from "arm_compute_multi_isa-static.a" and "lib_files" objects.
-#
-# For single arch build: No change.
 if env['os'] != 'bare_metal' and not env['standalone']:
     if (env['multi_isa']):
-        arm_compute_so = build_library('arm_compute', arm_compute_env, lib_files, static=False, libs=[arm_compute_multi_isa_a])
+
+        arm_compute_so = build_library('arm_compute', arm_compute_env, lib_shared_objs, static=False)
     else:
-        arm_compute_so = build_library('arm_compute', arm_compute_env, lib_files + lib_files_sve, static=False)
+        arm_compute_so = build_library('arm_compute', arm_compute_env, lib_files, static=False)
 
     Export('arm_compute_so')
 
 # Generate dummy core lib for backwards compatibility
 if env['os'] == 'macos':
     # macos static library archiver fails if given an empty list of files
-    arm_compute_core_a = build_library('arm_compute_core-static', arm_compute_env, [lib_files], static=True)
+    arm_compute_core_a = build_library('arm_compute_core-static', arm_compute_env, lib_files, static=True)
 else:
     arm_compute_core_a = build_library('arm_compute_core-static', arm_compute_env, [], static=True)
 
@@ -615,7 +644,7 @@
 # Build graph libraries
 arm_compute_graph_env.Append(CXXFLAGS = ['-Wno-redundant-move', '-Wno-pessimizing-move'])
 
-arm_compute_graph_a = build_library('arm_compute_graph-static', arm_compute_graph_env, graph_files, static=True, libs = [ arm_compute_a, arm_compute_multi_isa_a ])
+arm_compute_graph_a = build_library('arm_compute_graph-static', arm_compute_graph_env, graph_files, static=True, libs = [ arm_compute_a ])
 Export('arm_compute_graph_a')
 
 if env['os'] != 'bare_metal' and not env['standalone']:
@@ -624,7 +653,7 @@
     Export('arm_compute_graph_so')
 
 if env['standalone']:
-    alias = arm_compute_env.Alias("arm_compute", [arm_compute_a, arm_compute_multi_isa_a])
+    alias = arm_compute_env.Alias("arm_compute", [arm_compute_a])
 else:
     alias = arm_compute_env.Alias("arm_compute", [arm_compute_a, arm_compute_so])
 
diff --git a/SConstruct b/SConstruct
index ff53229..bae197c 100644
--- a/SConstruct
+++ b/SConstruct
@@ -249,12 +249,12 @@
     print("ERROR: armv7a architecture has only 32-bit execution state")
     Exit(1)
 
+env.Append(CPPDEFINES = ['ENABLE_NEON', 'ARM_COMPUTE_ENABLE_NEON'])
+
 if 'sve' in env['arch']:
     env.Append(CPPDEFINES = ['ENABLE_SVE', 'ARM_COMPUTE_ENABLE_SVE'])
     if 'sve2' in env['arch']:
         env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_SVE2'])
-else:
-    env.Append(CPPDEFINES = ['ENABLE_NEON', 'ARM_COMPUTE_ENABLE_NEON'])
 
 # Add architecture specific flags
 prefix = ""
diff --git a/filedefs.json b/filedefs.json
index 0bc030e..76dccff 100644
--- a/filedefs.json
+++ b/filedefs.json
@@ -34,8 +34,7 @@
                 "cxxflags": ["-march=armv8.6-a+sve2+fp16+dotprod"],
                 "cppdefines": ["ARM_COMPUTE_ENABLE_FP16", "ARM_COMPUTE_ENABLE_BF16",
                                "ARM_COMPUTE_ENABLE_I8MM"]
-
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/filelist.json b/filelist.json
index 37c0f1d..d6b6f47 100644
--- a/filelist.json
+++ b/filelist.json
@@ -873,22 +873,24 @@
           "common": [
             "src/cpu/operators/CpuAdd.cpp",
             "src/cpu/kernels/CpuAddKernel.cpp",
-            "src/runtime/NEON/functions/NEArithmeticAddition.cpp",
-            "src/cpu/kernels/add/generic/neon/qasymm8.cpp",
-            "src/cpu/kernels/add/generic/neon/qasymm8_signed.cpp",
-            "src/cpu/kernels/add/generic/neon/qsymm16.cpp"
+            "src/runtime/NEON/functions/NEArithmeticAddition.cpp"
           ],
           "neon": {
-            "common": [ "src/cpu/kernels/add/generic/neon/impl.cpp" ],
+            "common": ["src/cpu/kernels/add/generic/neon/impl.cpp"],
             "fp32":["src/cpu/kernels/add/generic/neon/fp32.cpp"],
             "fp16":["src/cpu/kernels/add/generic/neon/fp16.cpp"],
-            "integer":["src/cpu/kernels/add/generic/neon/integer.cpp"]
+            "integer":["src/cpu/kernels/add/generic/neon/integer.cpp"],
+            "qasymm8": ["src/cpu/kernels/add/generic/neon/qasymm8.cpp"],
+            "qasymm8_signed": ["src/cpu/kernels/add/generic/neon/qasymm8_signed.cpp"],
+            "qsymm16": ["src/cpu/kernels/add/generic/neon/qsymm16.cpp"]
           },
           "sve": {
             "common": [ "src/cpu/kernels/add/generic/sve/impl.cpp" ],
-            "fp32":["src/cpu/kernels/add/generic/sve/fp32.cpp"],
-            "fp16":["src/cpu/kernels/add/generic/sve/fp16.cpp"],
             "integer":["src/cpu/kernels/add/generic/sve/integer.cpp"],
+            "fp32":["src/cpu/kernels/add/generic/sve/fp32.cpp"],
+            "fp16":["src/cpu/kernels/add/generic/sve/fp16.cpp"]
+          },
+          "sve2": {
             "qasymm8": [ "src/cpu/kernels/add/generic/sve2/qasymm8.cpp" ],
             "qasymm8_signed": [ "src/cpu/kernels/add/generic/sve2/qasymm8_signed.cpp" ],
             "qsymm16": [ "src/cpu/kernels/add/generic/sve2/qsymm16.cpp" ]
@@ -1024,38 +1026,42 @@
             "src/cpu/kernels/CpuCol2ImKernel.cpp",
             "src/cpu/kernels/CpuIm2ColKernel.cpp",
             "src/cpu/kernels/CpuWeightsReshapeKernel.cpp",
-            "src/core/NEON/kernels/convolution/common/padding.cpp",
-            "src/core/NEON/kernels/convolution/common/qasymm8.cpp",
-            "src/core/NEON/kernels/convolution/common/qsymm8.cpp",
-            "src/core/NEON/kernels/convolution/common/utils.cpp",
-            "src/core/NEON/kernels/convolution/winograd/padding.cpp",
-            "src/core/NEON/kernels/convolution/winograd/winograd.cpp",
-            "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_1x8_fp32_fp32_integers.cpp",
-            "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_4x4_fp16_fp16_integers.cpp",
-            "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_4x4_fp32_fp32_integers.cpp",
-            "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_6x6_fp16_fp16_integers.cpp",
-            "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_6x6_fp32_fp32_integers.cpp",
-            "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2_7_fp32_fp32_integers.cpp",
-            "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2x2_3x3_fp32_fp32_integers.cpp",
-            "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2x2_5x5_fp32_fp32_integers.cpp",
-            "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4_5_fp32_fp32_integers.cpp",
-            "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4x4_3x3_fp16_fp16_integers.cpp",
-            "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4x4_3x3_fp32_fp32_integers.cpp",
-            "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_6_3_fp32_fp32_integers.cpp",
-            "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2_7_fp32_fp32_integers.cpp",
-            "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2x2_3x3_fp32_fp32_integers.cpp",
-            "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2x2_5x5_fp32_fp32_integers.cpp",
-            "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4_5_fp32_fp32_integers.cpp",
-            "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4x4_3x3_fp16_fp16_integers.cpp",
-            "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4x4_3x3_fp32_fp32_integers.cpp",
-            "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_6_3_fp32_fp32_integers.cpp",
             "src/runtime/NEON/functions/NEConvolutionLayer.cpp",
             "src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp",
             "src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp",
             "src/runtime/NEON/functions/NEGEMMConv2d.cpp",
             "src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp",
             "src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp"
-          ]
+          ],
+          "neon": {
+            "common": [
+              "src/core/NEON/kernels/convolution/common/padding.cpp",
+              "src/core/NEON/kernels/convolution/common/qasymm8.cpp",
+              "src/core/NEON/kernels/convolution/common/qsymm8.cpp",
+              "src/core/NEON/kernels/convolution/common/utils.cpp",
+              "src/core/NEON/kernels/convolution/winograd/padding.cpp",
+              "src/core/NEON/kernels/convolution/winograd/winograd.cpp",
+              "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_1x8_fp32_fp32_integers.cpp",
+              "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_4x4_fp16_fp16_integers.cpp",
+              "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_4x4_fp32_fp32_integers.cpp",
+              "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_6x6_fp16_fp16_integers.cpp",
+              "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_6x6_fp32_fp32_integers.cpp",
+              "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2_7_fp32_fp32_integers.cpp",
+              "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2x2_3x3_fp32_fp32_integers.cpp",
+              "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2x2_5x5_fp32_fp32_integers.cpp",
+              "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4_5_fp32_fp32_integers.cpp",
+              "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4x4_3x3_fp16_fp16_integers.cpp",
+              "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4x4_3x3_fp32_fp32_integers.cpp",
+              "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_6_3_fp32_fp32_integers.cpp",
+              "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2_7_fp32_fp32_integers.cpp",
+              "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2x2_3x3_fp32_fp32_integers.cpp",
+              "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2x2_5x5_fp32_fp32_integers.cpp",
+              "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4_5_fp32_fp32_integers.cpp",
+              "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4x4_3x3_fp16_fp16_integers.cpp",
+              "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4x4_3x3_fp32_fp32_integers.cpp",
+              "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_6_3_fp32_fp32_integers.cpp" 
+            ]
+          }
         }
       },
       "Copy": {
@@ -1079,7 +1085,7 @@
             "fp32":    [ "src/cpu/kernels/crop/generic/neon/fp32.cpp" ],
             "fp16":    [ "src/cpu/kernels/crop/generic/neon/fp16.cpp" ],
             "integer": [ "src/cpu/kernels/crop/generic/neon/integer.cpp"   ]
-          }  
+          }
         }
       },
       "Deconv2d": {
@@ -1114,68 +1120,72 @@
             "src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp",
             "src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.cpp",
             "src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.cpp",
-            "src/core/NEON/kernels/convolution/common/padding.cpp",
-            "src/core/NEON/kernels/convolution/common/qasymm8.cpp",
-            "src/core/NEON/kernels/convolution/common/qsymm8.cpp",
-            "src/core/NEON/kernels/convolution/common/utils.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp16.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8q.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8s8u8q.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/interleaves/8b_mla.cpp",
-            "src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_generic_output9_mla_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_generic_output9_mla_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_s8q_3x3_dot.cpp",
-            "src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_u8q_3x3_dot.cpp"
+            "src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp"
           ],
+          "neon": {
+            "common": [
+              "src/core/NEON/kernels/convolution/common/padding.cpp",
+              "src/core/NEON/kernels/convolution/common/qasymm8.cpp",
+              "src/core/NEON/kernels/convolution/common/qsymm8.cpp",
+              "src/core/NEON/kernels/convolution/common/utils.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp16.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8q.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8s8u8q.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/interleaves/8b_mla.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_generic_output9_mla_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_generic_output9_mla_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_s8q_3x3_dot.cpp",
+              "src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_u8q_3x3_dot.cpp"
+              ]
+          },
           "sve": {
             "common": [
               "src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_8b_mla.cpp",
@@ -1388,92 +1398,94 @@
             "src/cpu/operators/CpuGemm.cpp",
             "src/cpu/operators/CpuGemmLowpOutputStage.cpp",
             "src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp",
-            "src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp",
-            "src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp",
-            "src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp",
-            "src/core/NEON/kernels/arm_gemm/gemm_int16.cpp",
-            "src/core/NEON/kernels/arm_gemm/gemm_int8.cpp",
-            "src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp",
-            "src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp",
-            "src/core/NEON/kernels/arm_gemm/gemm_uint16.cpp",
-            "src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp",
-            "src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp",
-            "src/core/NEON/kernels/arm_gemm/mergeresults-fp16.cpp",
-            "src/core/NEON/kernels/arm_gemm/mergeresults.cpp",
-            "src/core/NEON/kernels/arm_gemm/misc.cpp",
-            "src/core/NEON/kernels/arm_gemm/quantized.cpp",
-            "src/core/NEON/kernels/arm_gemm/rowsum_indirect_s8.cpp",
-            "src/core/NEON/kernels/arm_gemm/rowsum_indirect_u8.cpp",
-            "src/core/NEON/kernels/arm_gemm/transform.cpp",
             "src/runtime/NEON/functions/NEGEMM.cpp",
             "src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp",
-            "src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_8x12/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/a55r1.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/x1.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_8x12/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/a55r1.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/x1.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/a55r1.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/x1.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_dot_6x16/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_mmla_6x16/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/a55.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/a55.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/a55.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/a55.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_4x24/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_6x16/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/a55.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_mmla_4x16/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/a55.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_mmla_6x16/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/a55.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_mmla_6x16/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/a55.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_mmla_4x16/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/a55.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_mmla_6x16/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/x1.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/a510.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/a510.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/a510.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a53.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55r1.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/x1.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x6/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_6x4/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_8x4/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/a55.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4/a55.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/a55.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/generic.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/a55.cpp",
-            "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/generic.cpp"
+            "src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp"
           ],
           "neon": {
+            "common": [
+              "src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp",
+              "src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp",
+              "src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp",
+              "src/core/NEON/kernels/arm_gemm/gemm_int16.cpp",
+              "src/core/NEON/kernels/arm_gemm/gemm_int8.cpp",
+              "src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp",
+              "src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp",
+              "src/core/NEON/kernels/arm_gemm/gemm_uint16.cpp",
+              "src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp",
+              "src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp",
+              "src/core/NEON/kernels/arm_gemm/mergeresults-fp16.cpp",
+              "src/core/NEON/kernels/arm_gemm/mergeresults.cpp",
+              "src/core/NEON/kernels/arm_gemm/misc.cpp",
+              "src/core/NEON/kernels/arm_gemm/quantized.cpp",
+              "src/core/NEON/kernels/arm_gemm/rowsum_indirect_s8.cpp",
+              "src/core/NEON/kernels/arm_gemm/rowsum_indirect_u8.cpp",
+              "src/core/NEON/kernels/arm_gemm/transform.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_8x12/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/a55r1.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/x1.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_8x12/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/a55r1.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/x1.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/a55r1.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/x1.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_dot_6x16/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_mmla_6x16/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/a55.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/a55.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/a55.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/a55.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_4x24/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_6x16/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/a55.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_mmla_4x16/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/a55.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_mmla_6x16/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/a55.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_mmla_6x16/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/a55.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_mmla_4x16/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/a55.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_mmla_6x16/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/x1.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/a510.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/a510.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/a510.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a53.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55r1.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/x1.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x6/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_6x4/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_8x4/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/a55.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4/a55.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/a55.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/generic.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/a55.cpp",
+              "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/generic.cpp"    
+            ],
             "estate32": [
               "src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/a53.cpp",
               "src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/a55r1.cpp",
@@ -1677,36 +1689,38 @@
             "src/cpu/operators/CpuPool2d.cpp",
             "src/cpu/kernels/CpuPool2dKernel.cpp",
             "src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp",
-            "src/core/NEON/kernels/arm_conv/pooling/kernels/cpp_nhwc_1x1_stride_any_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/pooling/pooling_fp16.cpp",
-            "src/core/NEON/kernels/arm_conv/pooling/pooling_fp32.cpp",
-            "src/core/NEON/kernels/arm_conv/pooling/pooling_s8.cpp",
-            "src/core/NEON/kernels/arm_conv/pooling/pooling_s8q.cpp",
-            "src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp",
-            "src/core/NEON/kernels/arm_conv/pooling/pooling_u8q.cpp",
-            "src/runtime/NEON/functions/NEPoolingLayer.cpp",
-            "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_generic_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_generic_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_generic_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_generic_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_avg_generic_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_generic_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_avg_generic_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_max_generic_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_avg_generic_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_generic_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_avg_generic_depthfirst/generic.cpp",
-            "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_max_generic_depthfirst/generic.cpp",
             "src/cpu/kernels/pool2d/neon/qasymm8.cpp",
-            "src/cpu/kernels/pool2d/neon/qasymm8_signed.cpp"
+            "src/cpu/kernels/pool2d/neon/qasymm8_signed.cpp",
+            "src/runtime/NEON/functions/NEPoolingLayer.cpp"
           ],
           "neon": {
+            "common": [
+              "src/core/NEON/kernels/arm_conv/pooling/kernels/cpp_nhwc_1x1_stride_any_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/pooling/pooling_fp16.cpp",
+              "src/core/NEON/kernels/arm_conv/pooling/pooling_fp32.cpp",
+              "src/core/NEON/kernels/arm_conv/pooling/pooling_s8.cpp",
+              "src/core/NEON/kernels/arm_conv/pooling/pooling_s8q.cpp",
+              "src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp",
+              "src/core/NEON/kernels/arm_conv/pooling/pooling_u8q.cpp",
+              "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_generic_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_generic_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_generic_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_generic_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_avg_generic_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_generic_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_avg_generic_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_max_generic_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_avg_generic_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_generic_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_avg_generic_depthfirst/generic.cpp",
+              "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_max_generic_depthfirst/generic.cpp"  
+            ],
             "nchw": [ "src/cpu/kernels/pool2d/neon/nchw/all.cpp" ],
             "fp16": [ "src/cpu/kernels/pool2d/neon/fp16.cpp" ],
             "fp32": [ "src/cpu/kernels/pool2d/neon/fp32.cpp" ]
diff --git a/src/cpu/kernels/add/generic/sve/fp16.cpp b/src/cpu/kernels/add/generic/sve/fp16.cpp
index 28f4d2b..7dc3142 100644
--- a/src/cpu/kernels/add/generic/sve/fp16.cpp
+++ b/src/cpu/kernels/add/generic/sve/fp16.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/src/cpu/kernels/add/generic/sve/fp32.cpp b/src/cpu/kernels/add/generic/sve/fp32.cpp
index 8f651b3..5383b88 100644
--- a/src/cpu/kernels/add/generic/sve/fp32.cpp
+++ b/src/cpu/kernels/add/generic/sve/fp32.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,4 +36,4 @@
 }
 }
 } // namespace arm_compute
-#endif //ARM_COMPUTE_ENABLE_SVE
+#endif //ARM_COMPUTE_ENABLE_SVE
\ No newline at end of file
diff --git a/src/cpu/kernels/add/generic/sve/impl.cpp b/src/cpu/kernels/add/generic/sve/impl.cpp
index 52429bb..615f346 100644
--- a/src/cpu/kernels/add/generic/sve/impl.cpp
+++ b/src/cpu/kernels/add/generic/sve/impl.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -133,4 +133,4 @@
 #endif /* (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */
 } // namespace cpu
 } // namespace arm_compute
-#endif // ARM_COMPUTE_ENABLE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
\ No newline at end of file
diff --git a/src/cpu/kernels/add/generic/sve/impl.h b/src/cpu/kernels/add/generic/sve/impl.h
index 59f39e9..219dbc9 100644
--- a/src/cpu/kernels/add/generic/sve/impl.h
+++ b/src/cpu/kernels/add/generic/sve/impl.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -37,4 +37,4 @@
 } // namespace cpu
 } // namespace arm_compute
 #endif // SRC_CORE_SVE_KERNELS_ADD_IMPL_H
-#endif // ARM_COMPUTE_ENABLE_SVE
+#endif // ARM_COMPUTE_ENABLE_SVE
\ No newline at end of file
diff --git a/src/cpu/kernels/add/generic/sve/integer.cpp b/src/cpu/kernels/add/generic/sve/integer.cpp
index d197717..1d25d34 100644
--- a/src/cpu/kernels/add/generic/sve/integer.cpp
+++ b/src/cpu/kernels/add/generic/sve/integer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -46,4 +46,4 @@
 }
 }
 } // namespace arm_compute
-#endif //(ARM_COMPUTE_ENABLE_SVE)
+#endif //(ARM_COMPUTE_ENABLE_SVE)
\ No newline at end of file
diff --git a/src/cpu/kernels/add/generic/sve2/qasymm8.cpp b/src/cpu/kernels/add/generic/sve2/qasymm8.cpp
index c61089e..5fe9b95 100644
--- a/src/cpu/kernels/add/generic/sve2/qasymm8.cpp
+++ b/src/cpu/kernels/add/generic/sve2/qasymm8.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,7 +22,6 @@
  * SOFTWARE.
  */
 #if defined(ARM_COMPUTE_ENABLE_SVE2)
-
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/ITensor.h"
 #include "arm_compute/core/Types.h"
@@ -180,4 +179,4 @@
 }
 } // namespace cpu
 } // namespace arm_compute
-#endif //ARM_COMPUTE_ENABLE_SVE2
+#endif //ARM_COMPUTE_ENABLE_SVE2
\ No newline at end of file
diff --git a/src/cpu/kernels/add/generic/sve2/qasymm8_signed.cpp b/src/cpu/kernels/add/generic/sve2/qasymm8_signed.cpp
index 9ac138a..9135dfd 100644
--- a/src/cpu/kernels/add/generic/sve2/qasymm8_signed.cpp
+++ b/src/cpu/kernels/add/generic/sve2/qasymm8_signed.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -178,4 +178,4 @@
 }
 } // namespace cpu
 } // namespace arm_compute
-#endif //ARM_COMPUTE_ENABLE_SVE2
+#endif //(ARM_COMPUTE_ENABLE_SVE2)
\ No newline at end of file
diff --git a/src/cpu/kernels/add/generic/sve2/qsymm16.cpp b/src/cpu/kernels/add/generic/sve2/qsymm16.cpp
index f148872..723d2a6 100644
--- a/src/cpu/kernels/add/generic/sve2/qsymm16.cpp
+++ b/src/cpu/kernels/add/generic/sve2/qsymm16.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -153,4 +153,4 @@
 }
 } // namespace cpu
 } // namespace arm_compute
-#endif //ARM_COMPUTE_ENABLE_SVE2
+#endif //ARM_COMPUTE_ENABLE_SVE2
\ No newline at end of file