Decouple CpuAddKernel

1- NEON supported data types are : fp32, fp16, u8, s16, s32 , q8, q_s8 , q16
2- SVE supported data types are: fp32, fp16, u8, s16, s32
3- SVE2 supported data types are :  q8, q_s8 , q16
4- Re-arange SVE folder sturct

** Need to remove gaurds and add testing after Multi ISA build system and validation tests will be avalible

Resolves COMPMID-4635
Change-Id: I90e4f6a219478aa9ad5c4a6b9858496afa8af42d
Signed-off-by: Dana Zlotnik <dana.zlotnik@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6711
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/cpu/kernels/CpuAddKernel.cpp b/src/cpu/kernels/CpuAddKernel.cpp
index 73c1fda..0c3540f 100644
--- a/src/cpu/kernels/CpuAddKernel.cpp
+++ b/src/cpu/kernels/CpuAddKernel.cpp
@@ -30,9 +30,7 @@
 #include "src/core/common/Registrars.h"
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/core/helpers/WindowHelpers.h"
-#include "src/cpu/kernels/add/neon/list.h"
-#include "src/cpu/kernels/add/sve/list.h"
-
+#include "src/cpu/kernels/add/list.h"
 #include <array>
 
 namespace arm_compute
@@ -67,7 +65,7 @@
         {
             return (data.dt == DataType::QASYMM8) && data.ci.has_sve2();
         },
-        REGISTER_QASYMM8_SVE(arm_compute::cpu::add_qasymm8_sve)
+        REGISTER_QASYMM8_SVE2(arm_compute::cpu::add_qasymm8_sve2)
     },
     {
         "sve2_qs8_add",
@@ -75,7 +73,7 @@
         {
             return (data.dt == DataType::QASYMM8_SIGNED) && data.ci.has_sve2();
         },
-        REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::add_qasymm8_signed_sve)
+        REGISTER_QASYMM8_SIGNED_SVE2(arm_compute::cpu::add_qasymm8_signed_sve2)
     },
     {
         "sve2_qs16_add",
@@ -83,7 +81,7 @@
         {
             return (data.dt == DataType::QSYMM16) && data.ci.has_sve2();
         },
-        REGISTER_QSYMM16_SVE(arm_compute::cpu::add_qsymm16_sve)
+        REGISTER_QSYMM16_SVE2(arm_compute::cpu::add_qsymm16_sve2)
     },
 #endif /* !defined(ARM_COMPUTE_ENABLE_SVE2) */
 #if defined(ARM_COMPUTE_ENABLE_SVE)
@@ -93,7 +91,7 @@
         {
             return (data.dt == DataType::F32) && data.ci.has_sve();
         },
-        REGISTER_FP32_SVE(arm_compute::cpu::add_same_sve<float>)
+        REGISTER_FP32_SVE(arm_compute::cpu::add_fp32_sve)
     },
     {
         "sve_fp16_add",
@@ -101,7 +99,7 @@
         {
             return (data.dt == DataType::F16) && data.ci.has_sve();
         },
-        REGISTER_FP16_SVE(arm_compute::cpu::add_same_sve<float16_t>)
+        REGISTER_FP16_SVE(arm_compute::cpu::add_fp16_sve)
     },
     {
         "sve_u8_add",
@@ -109,7 +107,7 @@
         {
             return (data.dt == DataType::U8) && data.ci.has_sve();
         },
-        REGISTER_INTEGER_SVE(arm_compute::cpu::add_same_sve<uint8_t>)
+        REGISTER_INTEGER_SVE(arm_compute::cpu::add_u8_sve)
     },
     {
         "sve_s16_add",
@@ -117,7 +115,7 @@
         {
             return (data.dt == DataType::S16) && data.ci.has_sve();
         },
-        REGISTER_INTEGER_SVE(arm_compute::cpu::add_same_sve<int16_t>)
+        REGISTER_INTEGER_SVE(arm_compute::cpu::add_s16_sve)
     },
     {
         "sve_s32_add",
@@ -125,14 +123,14 @@
         {
             return (data.dt == DataType::S32) && data.ci.has_sve();
         },
-        REGISTER_INTEGER_SVE(arm_compute::cpu::add_same_sve<int32_t>)
+        REGISTER_INTEGER_SVE(arm_compute::cpu::add_s32_sve)
     },
 #endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
 #if defined(ARM_COMPUTE_ENABLE_NEON)
     {
         "neon_fp32_add",
         [](const AddSelectorData & data) { return (data.dt == DataType::F32); },
-        REGISTER_FP32_NEON(arm_compute::cpu::add_same_neon<float>)
+        REGISTER_FP32_NEON(arm_compute::cpu::add_fp32_neon)
     },
 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     {
@@ -141,23 +139,23 @@
         {
             return (data.dt == DataType::F16) && data.ci.has_fp16();
         },
-        REGISTER_FP16_NEON(arm_compute::cpu::add_same_neon<float16_t>)
+        REGISTER_FP16_NEON(arm_compute::cpu::add_fp16_neon)
     },
 #endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
     {
         "neon_u8_add",
         [](const AddSelectorData & data) { return (data.dt == DataType::U8); },
-        REGISTER_INTEGER_NEON(arm_compute::cpu::add_same_neon<uint8_t>)
+        REGISTER_INTEGER_NEON(arm_compute::cpu::add_u8_neon)
     },
     {
         "neon_s16_add",
         [](const AddSelectorData & data) { return (data.dt == DataType::S16); },
-        REGISTER_INTEGER_NEON(arm_compute::cpu::add_same_neon<int16_t>)
+        REGISTER_INTEGER_NEON(arm_compute::cpu::add_s16_neon)
     },
     {
         "neon_s32_add",
         [](const AddSelectorData & data) { return (data.dt == DataType::S32); },
-        REGISTER_INTEGER_NEON(arm_compute::cpu::add_same_neon<int32_t>)
+        REGISTER_INTEGER_NEON(arm_compute::cpu::add_s32_neon)
     },
 #endif /*  defined(ARM_COMPUTE_ENABLE_NEON) */
 #if defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE)
@@ -295,12 +293,12 @@
 size_t CpuAddKernel::get_mws(const CPUInfo &platform, size_t thread_count) const
 {
     ARM_COMPUTE_UNUSED(thread_count);
-    // Tuning results that gave optimized results in performance investigation 
-    if (platform.get_cpu_model() == CPUModel::A73 ) 
+    // Tuning results that gave optimized results in performance investigation
+    if(platform.get_cpu_model() == CPUModel::A73)
     {
         return 10240;
     }
-    else 
+    else
     {
         return 9216;
     }