Decouple CpuAddKernel

1- NEON supported data types are : fp32, fp16, u8, s16, s32 , q8, q_s8 , q16
2- SVE supported data types are: fp32, fp16, u8, s16, s32
3- SVE2 supported data types are :  q8, q_s8 , q16
4- Re-arange SVE folder sturct

** Need to remove gaurds and add testing after Multi ISA build system and validation tests will be avalible

Resolves COMPMID-4635
Change-Id: I90e4f6a219478aa9ad5c4a6b9858496afa8af42d
Signed-off-by: Dana Zlotnik <dana.zlotnik@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6711
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/core/common/Registrars.h b/src/core/common/Registrars.h
index 65f6c70..c7fbf7f 100644
--- a/src/core/common/Registrars.h
+++ b/src/core/common/Registrars.h
@@ -32,6 +32,12 @@
 #define REGISTER_FP16_SVE(func_name) nullptr
 #endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
 
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
+#define REGISTER_FP16_SVE2(func_name) &(func_name)
+#else /* !defined(ARM_COMPUTE_ENABLE_SVE2) */
+#define REGISTER_FP16_SVE2(func_name) nullptr
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
+
 #if defined(ARM_COMPUTE_ENABLE_NEON) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
 #define REGISTER_FP16_NEON(func_name) &(func_name)
 #else /* !defined(ARM_COMPUTE_ENABLE_NEON) */
@@ -41,6 +47,7 @@
 #else /* !defined(ENABLE_FP16_KERNELS) */
 #define REGISTER_FP16_NEON(func_name) nullptr
 #define REGISTER_FP16_SVE(func_name) nullptr
+#define REGISTER_FP16_SVE2(func_name) nullptr
 #endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */
 
 #if defined(ENABLE_FP32_KERNELS)
@@ -51,6 +58,12 @@
 #define REGISTER_FP32_SVE(func_name) nullptr
 #endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
 
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
+#define REGISTER_FP32_SVE2(func_name) &(func_name)
+#else /* !defined(ARM_COMPUTE_ENABLE_SVE2) */
+#define REGISTER_FP32_SVE2(func_name) nullptr
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
+
 #if defined(ARM_COMPUTE_ENABLE_NEON)
 #define REGISTER_FP32_NEON(func_name) &(func_name)
 #else /* !defined(ARM_COMPUTE_ENABLE_NEON) */
@@ -60,6 +73,7 @@
 #else /* defined(ENABLE_FP32_KERNELS) */
 #define REGISTER_FP32_NEON(func_name) nullptr
 #define REGISTER_FP32_SVE(func_name) nullptr
+#define REGISTER_FP32_SVE2(func_name) nullptr
 #endif /* defined(ENABLE_FP32_KERNELS) */
 
 #if defined(ENABLE_QASYMM8_SIGNED_KERNELS)
@@ -72,9 +86,16 @@
 #define REGISTER_QASYMM8_SIGNED_SVE(func_name) nullptr
 #endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
 
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
+#define REGISTER_QASYMM8_SIGNED_SVE2(func_name) &(func_name)
+#else /* !defined(ARM_COMPUTE_ENABLE_SVE2) */
+#define REGISTER_QASYMM8_SIGNED_SVE2(func_name) nullptr
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
+
 #else /* defined(ENABLE_QASYMM8_SIGNED_KERNELS) */
 #define REGISTER_QASYMM8_SIGNED_NEON(func_name) nullptr
 #define REGISTER_QASYMM8_SIGNED_SVE(func_name) nullptr
+#define REGISTER_QASYMM8_SIGNED_SVE2(func_name) nullptr
 #endif /* defined(ENABLE_QASYMM8_SIGNED_KERNELS) */
 
 #if defined(ENABLE_QASYMM8_KERNELS)
@@ -86,9 +107,16 @@
 #define REGISTER_QASYMM8_SVE(func_name) nullptr
 #endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
 
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
+#define REGISTER_QASYMM8_SVE2(func_name) &(func_name)
+#else /* !defined(ARM_COMPUTE_ENABLE_SVE2) */
+#define REGISTER_QASYMM8_SVE2(func_name) nullptr
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
+
 #else /* defined(ENABLE_QASYMM8_KERNELS) */
 #define REGISTER_QASYMM8_NEON(func_name) nullptr
 #define REGISTER_QASYMM8_SVE(func_name) nullptr
+#define REGISTER_QASYMM8_SVE2(func_name) nullptr
 #endif /* defined(ENABLE_QASYMM8_KERNELS) */
 
 #if defined(ENABLE_QSYMM16_KERNELS)
@@ -101,9 +129,16 @@
 #define REGISTER_QSYMM16_SVE(func_name) nullptr
 #endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
 
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
+#define REGISTER_QSYMM16_SVE2(func_name) &(func_name)
+#else /* !defined(ARM_COMPUTE_ENABLE_SVE2) */
+#define REGISTER_QSYMM16_SVE2(func_name) nullptr
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
+
 #else /* defined(ENABLE_QSYMM16_KERNELS) */
 #define REGISTER_QSYMM16_NEON(func_name) nullptr
 #define REGISTER_QSYMM16_SVE(func_name) nullptr
+#define REGISTER_QSYMM16_SVE2(func_name) nullptr
 #endif /* defined(ENABLE_QSYMM16_KERNELS) */
 
 #if defined(ENABLE_INTEGER_KERNELS)
@@ -114,6 +149,12 @@
 #define REGISTER_INTEGER_SVE(func_name) nullptr
 #endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
 
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
+#define REGISTER_INTEGER_SVE2(func_name) &(func_name)
+#else /* !defined(ARM_COMPUTE_ENABLE_SVE2) */
+#define REGISTER_INTEGER_SVE2(func_name) nullptr
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
+
 #if defined(ARM_COMPUTE_ENABLE_NEON)
 #define REGISTER_INTEGER_NEON(func_name) &(func_name)
 #else /* !defined(ARM_COMPUTE_ENABLE_NEON) */
@@ -123,6 +164,7 @@
 #else /* defined(ENABLE_INTEGER_KERNELS) */
 #define REGISTER_INTEGER_NEON(func_name) nullptr
 #define REGISTER_INTEGER_SVE(func_name) nullptr
+#define REGISTER_INTEGER_SVE2(func_name) nullptr
 #endif /* defined(ENABLE_INTEGER_KERNELS) */
 
 #endif /* SRC_CORE_COMMON_REGISTRARS_H */
diff --git a/src/cpu/kernels/CpuAddKernel.cpp b/src/cpu/kernels/CpuAddKernel.cpp
index 73c1fda..0c3540f 100644
--- a/src/cpu/kernels/CpuAddKernel.cpp
+++ b/src/cpu/kernels/CpuAddKernel.cpp
@@ -30,9 +30,7 @@
 #include "src/core/common/Registrars.h"
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/core/helpers/WindowHelpers.h"
-#include "src/cpu/kernels/add/neon/list.h"
-#include "src/cpu/kernels/add/sve/list.h"
-
+#include "src/cpu/kernels/add/list.h"
 #include <array>
 
 namespace arm_compute
@@ -67,7 +65,7 @@
         {
             return (data.dt == DataType::QASYMM8) && data.ci.has_sve2();
         },
-        REGISTER_QASYMM8_SVE(arm_compute::cpu::add_qasymm8_sve)
+        REGISTER_QASYMM8_SVE2(arm_compute::cpu::add_qasymm8_sve2)
     },
     {
         "sve2_qs8_add",
@@ -75,7 +73,7 @@
         {
             return (data.dt == DataType::QASYMM8_SIGNED) && data.ci.has_sve2();
         },
-        REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::add_qasymm8_signed_sve)
+        REGISTER_QASYMM8_SIGNED_SVE2(arm_compute::cpu::add_qasymm8_signed_sve2)
     },
     {
         "sve2_qs16_add",
@@ -83,7 +81,7 @@
         {
             return (data.dt == DataType::QSYMM16) && data.ci.has_sve2();
         },
-        REGISTER_QSYMM16_SVE(arm_compute::cpu::add_qsymm16_sve)
+        REGISTER_QSYMM16_SVE2(arm_compute::cpu::add_qsymm16_sve2)
     },
 #endif /* !defined(ARM_COMPUTE_ENABLE_SVE2) */
 #if defined(ARM_COMPUTE_ENABLE_SVE)
@@ -93,7 +91,7 @@
         {
             return (data.dt == DataType::F32) && data.ci.has_sve();
         },
-        REGISTER_FP32_SVE(arm_compute::cpu::add_same_sve<float>)
+        REGISTER_FP32_SVE(arm_compute::cpu::add_fp32_sve)
     },
     {
         "sve_fp16_add",
@@ -101,7 +99,7 @@
         {
             return (data.dt == DataType::F16) && data.ci.has_sve();
         },
-        REGISTER_FP16_SVE(arm_compute::cpu::add_same_sve<float16_t>)
+        REGISTER_FP16_SVE(arm_compute::cpu::add_fp16_sve)
     },
     {
         "sve_u8_add",
@@ -109,7 +107,7 @@
         {
             return (data.dt == DataType::U8) && data.ci.has_sve();
         },
-        REGISTER_INTEGER_SVE(arm_compute::cpu::add_same_sve<uint8_t>)
+        REGISTER_INTEGER_SVE(arm_compute::cpu::add_u8_sve)
     },
     {
         "sve_s16_add",
@@ -117,7 +115,7 @@
         {
             return (data.dt == DataType::S16) && data.ci.has_sve();
         },
-        REGISTER_INTEGER_SVE(arm_compute::cpu::add_same_sve<int16_t>)
+        REGISTER_INTEGER_SVE(arm_compute::cpu::add_s16_sve)
     },
     {
         "sve_s32_add",
@@ -125,14 +123,14 @@
         {
             return (data.dt == DataType::S32) && data.ci.has_sve();
         },
-        REGISTER_INTEGER_SVE(arm_compute::cpu::add_same_sve<int32_t>)
+        REGISTER_INTEGER_SVE(arm_compute::cpu::add_s32_sve)
     },
 #endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
 #if defined(ARM_COMPUTE_ENABLE_NEON)
     {
         "neon_fp32_add",
         [](const AddSelectorData & data) { return (data.dt == DataType::F32); },
-        REGISTER_FP32_NEON(arm_compute::cpu::add_same_neon<float>)
+        REGISTER_FP32_NEON(arm_compute::cpu::add_fp32_neon)
     },
 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     {
@@ -141,23 +139,23 @@
         {
             return (data.dt == DataType::F16) && data.ci.has_fp16();
         },
-        REGISTER_FP16_NEON(arm_compute::cpu::add_same_neon<float16_t>)
+        REGISTER_FP16_NEON(arm_compute::cpu::add_fp16_neon)
     },
 #endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
     {
         "neon_u8_add",
         [](const AddSelectorData & data) { return (data.dt == DataType::U8); },
-        REGISTER_INTEGER_NEON(arm_compute::cpu::add_same_neon<uint8_t>)
+        REGISTER_INTEGER_NEON(arm_compute::cpu::add_u8_neon)
     },
     {
         "neon_s16_add",
         [](const AddSelectorData & data) { return (data.dt == DataType::S16); },
-        REGISTER_INTEGER_NEON(arm_compute::cpu::add_same_neon<int16_t>)
+        REGISTER_INTEGER_NEON(arm_compute::cpu::add_s16_neon)
     },
     {
         "neon_s32_add",
         [](const AddSelectorData & data) { return (data.dt == DataType::S32); },
-        REGISTER_INTEGER_NEON(arm_compute::cpu::add_same_neon<int32_t>)
+        REGISTER_INTEGER_NEON(arm_compute::cpu::add_s32_neon)
     },
 #endif /*  defined(ARM_COMPUTE_ENABLE_NEON) */
 #if defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE)
@@ -295,12 +293,12 @@
 size_t CpuAddKernel::get_mws(const CPUInfo &platform, size_t thread_count) const
 {
     ARM_COMPUTE_UNUSED(thread_count);
-    // Tuning results that gave optimized results in performance investigation 
-    if (platform.get_cpu_model() == CPUModel::A73 ) 
+    // Tuning results that gave optimized results in performance investigation
+    if(platform.get_cpu_model() == CPUModel::A73)
     {
         return 10240;
     }
-    else 
+    else
     {
         return 9216;
     }
diff --git a/src/cpu/kernels/add/sve/impl.h b/src/cpu/kernels/add/generic/neon/fp16.cpp
similarity index 72%
copy from src/cpu/kernels/add/sve/impl.h
copy to src/cpu/kernels/add/generic/neon/fp16.cpp
index 32ff5d0..12d4a46 100644
--- a/src/cpu/kernels/add/sve/impl.h
+++ b/src/cpu/kernels/add/generic/neon/fp16.cpp
@@ -21,20 +21,18 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef SRC_CORE_SVE_KERNELS_ADD_IMPL_H
-#define SRC_CORE_SVE_KERNELS_ADD_IMPL_H
+#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
 
-#if defined(ARM_COMPUTE_ENABLE_SVE)
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/Traits.h"
+#include "src/cpu/kernels/add/generic/neon/impl.h"
 
 namespace arm_compute
 {
 namespace cpu
 {
-template <typename ScalarType>
-void add_same_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
-} // namespace cpu
+void add_fp16_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
+{
+    return add_same_neon<float16_t>(src0, src1, dst, policy, window);
+}
+}
 } // namespace arm_compute
-#endif // defined(ARM_COMPUTE_ENABLE_SVE)
-#endif // SRC_CORE_SVE_KERNELS_ADD_IMPL_H
\ No newline at end of file
+#endif /* (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */
diff --git a/src/cpu/kernels/add/sve/impl.h b/src/cpu/kernels/add/generic/neon/fp32.cpp
similarity index 72%
copy from src/cpu/kernels/add/sve/impl.h
copy to src/cpu/kernels/add/generic/neon/fp32.cpp
index 32ff5d0..3563162 100644
--- a/src/cpu/kernels/add/sve/impl.h
+++ b/src/cpu/kernels/add/generic/neon/fp32.cpp
@@ -21,20 +21,16 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef SRC_CORE_SVE_KERNELS_ADD_IMPL_H
-#define SRC_CORE_SVE_KERNELS_ADD_IMPL_H
 
-#if defined(ARM_COMPUTE_ENABLE_SVE)
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/Traits.h"
+#include "src/cpu/kernels/add/generic/neon/impl.h"
 
 namespace arm_compute
 {
 namespace cpu
 {
-template <typename ScalarType>
-void add_same_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
-} // namespace cpu
+void add_fp32_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
+{
+    return add_same_neon<float>(src0, src1, dst, policy, window);
+}
+}
 } // namespace arm_compute
-#endif // defined(ARM_COMPUTE_ENABLE_SVE)
-#endif // SRC_CORE_SVE_KERNELS_ADD_IMPL_H
\ No newline at end of file
diff --git a/src/cpu/kernels/add/neon/list.h b/src/cpu/kernels/add/generic/neon/impl.cpp
similarity index 85%
rename from src/cpu/kernels/add/neon/list.h
rename to src/cpu/kernels/add/generic/neon/impl.cpp
index 379bd32..ad3e445 100644
--- a/src/cpu/kernels/add/neon/list.h
+++ b/src/cpu/kernels/add/generic/neon/impl.cpp
@@ -21,26 +21,15 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef SRC_CORE_NEON_KERNELS_ADD_LIST_H
-#define SRC_CORE_NEON_KERNELS_ADD_LIST_H
 
-#include "arm_compute/core/Types.h"
+#include "src/cpu/kernels/add/generic/neon/impl.h"
+#include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/utils/misc/Traits.h"
 #include "src/core/NEON/wrapper/wrapper.h"
-
 namespace arm_compute
 {
 namespace cpu
 {
-#define DECLARE_ADD_KERNEL(func_name) \
-    void func_name(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
-
-DECLARE_ADD_KERNEL(add_qasymm8_neon);
-DECLARE_ADD_KERNEL(add_qasymm8_signed_neon);
-DECLARE_ADD_KERNEL(add_qsymm16_neon);
-
-#undef DECLARE_ADD_KERNEL
-
 template <typename ScalarType>
 void add_same_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
 {
@@ -138,6 +127,15 @@
         input1, input2, output);
     }
 }
+
+template void add_same_neon<float>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
+template void add_same_neon<uint8_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
+template void add_same_neon<int32_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
+template void add_same_neon<int16_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
+
+#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
+template void add_same_neon<float16_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
+#endif /* (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */
+
 } // namespace cpu
 } // namespace arm_compute
-#endif // SRC_CORE_NEON_KERNELS_ADD_LIST_H
diff --git a/src/cpu/kernels/add/sve/impl.h b/src/cpu/kernels/add/generic/neon/impl.h
similarity index 77%
copy from src/cpu/kernels/add/sve/impl.h
copy to src/cpu/kernels/add/generic/neon/impl.h
index 32ff5d0..07afdda 100644
--- a/src/cpu/kernels/add/sve/impl.h
+++ b/src/cpu/kernels/add/generic/neon/impl.h
@@ -21,20 +21,17 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef SRC_CORE_SVE_KERNELS_ADD_IMPL_H
-#define SRC_CORE_SVE_KERNELS_ADD_IMPL_H
-
-#if defined(ARM_COMPUTE_ENABLE_SVE)
+#ifndef SRC_CORE_NEON_KERNELS_ADD_IMPL_H
+#define SRC_CORE_NEON_KERNELS_ADD_IMPL_H
+#include "arm_compute/core/ITensor.h"
 #include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/Traits.h"
-
+#include "arm_compute/core/Window.h"
 namespace arm_compute
 {
 namespace cpu
 {
 template <typename ScalarType>
-void add_same_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
+void add_same_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
 } // namespace cpu
 } // namespace arm_compute
-#endif // defined(ARM_COMPUTE_ENABLE_SVE)
-#endif // SRC_CORE_SVE_KERNELS_ADD_IMPL_H
\ No newline at end of file
+#endif // SRC_CORE_NEON_KERNELS_ADD_IMPL_H
\ No newline at end of file
diff --git a/src/cpu/kernels/add/sve/impl.h b/src/cpu/kernels/add/generic/neon/integer.cpp
similarity index 64%
copy from src/cpu/kernels/add/sve/impl.h
copy to src/cpu/kernels/add/generic/neon/integer.cpp
index 32ff5d0..62c19e6 100644
--- a/src/cpu/kernels/add/sve/impl.h
+++ b/src/cpu/kernels/add/generic/neon/integer.cpp
@@ -21,20 +21,26 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef SRC_CORE_SVE_KERNELS_ADD_IMPL_H
-#define SRC_CORE_SVE_KERNELS_ADD_IMPL_H
 
-#if defined(ARM_COMPUTE_ENABLE_SVE)
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/Traits.h"
+#include "src/cpu/kernels/add/generic/neon/impl.h"
 
 namespace arm_compute
 {
 namespace cpu
 {
-template <typename ScalarType>
-void add_same_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
-} // namespace cpu
+void add_u8_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
+{
+    return add_same_neon<uint8_t>(src0, src1, dst, policy, window);
+}
+
+void add_s16_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
+{
+    return add_same_neon<int16_t>(src0, src1, dst, policy, window);
+}
+
+void add_s32_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
+{
+    return add_same_neon<int32_t>(src0, src1, dst, policy, window);
+}
+}
 } // namespace arm_compute
-#endif // defined(ARM_COMPUTE_ENABLE_SVE)
-#endif // SRC_CORE_SVE_KERNELS_ADD_IMPL_H
\ No newline at end of file
diff --git a/src/cpu/kernels/add/neon/qasymm8.cpp b/src/cpu/kernels/add/generic/neon/qasymm8.cpp
similarity index 100%
rename from src/cpu/kernels/add/neon/qasymm8.cpp
rename to src/cpu/kernels/add/generic/neon/qasymm8.cpp
diff --git a/src/cpu/kernels/add/neon/qasymm8_signed.cpp b/src/cpu/kernels/add/generic/neon/qasymm8_signed.cpp
similarity index 100%
rename from src/cpu/kernels/add/neon/qasymm8_signed.cpp
rename to src/cpu/kernels/add/generic/neon/qasymm8_signed.cpp
diff --git a/src/cpu/kernels/add/neon/qsymm16.cpp b/src/cpu/kernels/add/generic/neon/qsymm16.cpp
similarity index 100%
rename from src/cpu/kernels/add/neon/qsymm16.cpp
rename to src/cpu/kernels/add/generic/neon/qsymm16.cpp
diff --git a/src/cpu/kernels/add/sve/impl.h b/src/cpu/kernels/add/generic/sve/fp16.cpp
similarity index 72%
copy from src/cpu/kernels/add/sve/impl.h
copy to src/cpu/kernels/add/generic/sve/fp16.cpp
index 32ff5d0..71056a0 100644
--- a/src/cpu/kernels/add/sve/impl.h
+++ b/src/cpu/kernels/add/generic/sve/fp16.cpp
@@ -21,20 +21,18 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef SRC_CORE_SVE_KERNELS_ADD_IMPL_H
-#define SRC_CORE_SVE_KERNELS_ADD_IMPL_H
+#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
 
-#if defined(ARM_COMPUTE_ENABLE_SVE)
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/Traits.h"
+#include "src/cpu/kernels/add/generic/sve/impl.h"
 
 namespace arm_compute
 {
 namespace cpu
 {
-template <typename ScalarType>
-void add_same_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
-} // namespace cpu
+void add_fp16_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
+{
+    return add_same_sve<float16_t>(src0, src1, dst, policy, window);
+}
+}
 } // namespace arm_compute
-#endif // defined(ARM_COMPUTE_ENABLE_SVE)
-#endif // SRC_CORE_SVE_KERNELS_ADD_IMPL_H
\ No newline at end of file
+#endif /* (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */
diff --git a/src/cpu/kernels/add/sve/impl.h b/src/cpu/kernels/add/generic/sve/fp32.cpp
similarity index 74%
copy from src/cpu/kernels/add/sve/impl.h
copy to src/cpu/kernels/add/generic/sve/fp32.cpp
index 32ff5d0..8f651b3 100644
--- a/src/cpu/kernels/add/sve/impl.h
+++ b/src/cpu/kernels/add/generic/sve/fp32.cpp
@@ -21,20 +21,19 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef SRC_CORE_SVE_KERNELS_ADD_IMPL_H
-#define SRC_CORE_SVE_KERNELS_ADD_IMPL_H
-
 #if defined(ARM_COMPUTE_ENABLE_SVE)
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/Traits.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "src/cpu/kernels/add/generic/sve/impl.h"
 
 namespace arm_compute
 {
 namespace cpu
 {
-template <typename ScalarType>
-void add_same_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
-} // namespace cpu
+void add_fp32_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
+{
+    return add_same_sve<float>(src0, src1, dst, policy, window);
+}
+}
 } // namespace arm_compute
-#endif // defined(ARM_COMPUTE_ENABLE_SVE)
-#endif // SRC_CORE_SVE_KERNELS_ADD_IMPL_H
\ No newline at end of file
+#endif //ARM_COMPUTE_ENABLE_SVE
diff --git a/src/cpu/kernels/add/sve/impl.cpp b/src/cpu/kernels/add/generic/sve/impl.cpp
similarity index 95%
rename from src/cpu/kernels/add/sve/impl.cpp
rename to src/cpu/kernels/add/generic/sve/impl.cpp
index f8e16a5..52429bb 100644
--- a/src/cpu/kernels/add/sve/impl.cpp
+++ b/src/cpu/kernels/add/generic/sve/impl.cpp
@@ -21,17 +21,13 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
+#include "src/cpu/kernels/add/generic/sve/impl.h"
 #include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/Types.h"
 #include "arm_compute/core/utils/misc/Traits.h"
-#include "src/core/NEON/wrapper/intrinsics/intrinsics.h"
-
 #include "src/core/NEON/SVEMath.h"
-#include "src/cpu/kernels/add/sve/impl.h"
+#include "src/core/NEON/wrapper/intrinsics/intrinsics.h"
 #include <arm_sve.h>
-
 namespace arm_compute
 {
 namespace cpu
@@ -128,12 +124,13 @@
         input1, input2, output);
     }
 }
-
 template void add_same_sve<float>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
-template void add_same_sve<float16_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
 template void add_same_sve<uint8_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
 template void add_same_sve<int16_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
 template void add_same_sve<int32_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
+#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
+template void add_same_sve<float16_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
+#endif /* (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */
 } // namespace cpu
 } // namespace arm_compute
-#endif /* defined(__ARM_FEATURE_SVE) */
\ No newline at end of file
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/cpu/kernels/add/sve/impl.h b/src/cpu/kernels/add/generic/sve/impl.h
similarity index 93%
rename from src/cpu/kernels/add/sve/impl.h
rename to src/cpu/kernels/add/generic/sve/impl.h
index 32ff5d0..59f39e9 100644
--- a/src/cpu/kernels/add/sve/impl.h
+++ b/src/cpu/kernels/add/generic/sve/impl.h
@@ -21,12 +21,12 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
+#if defined(ARM_COMPUTE_ENABLE_SVE)
 #ifndef SRC_CORE_SVE_KERNELS_ADD_IMPL_H
 #define SRC_CORE_SVE_KERNELS_ADD_IMPL_H
-
-#if defined(ARM_COMPUTE_ENABLE_SVE)
+#include "arm_compute/core/ITensor.h"
 #include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/Traits.h"
+#include "arm_compute/core/Window.h"
 
 namespace arm_compute
 {
@@ -36,5 +36,5 @@
 void add_same_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
 } // namespace cpu
 } // namespace arm_compute
-#endif // defined(ARM_COMPUTE_ENABLE_SVE)
-#endif // SRC_CORE_SVE_KERNELS_ADD_IMPL_H
\ No newline at end of file
+#endif // SRC_CORE_SVE_KERNELS_ADD_IMPL_H
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/cpu/kernels/add/sve/impl.h b/src/cpu/kernels/add/generic/sve/integer.cpp
similarity index 62%
copy from src/cpu/kernels/add/sve/impl.h
copy to src/cpu/kernels/add/generic/sve/integer.cpp
index 32ff5d0..d197717 100644
--- a/src/cpu/kernels/add/sve/impl.h
+++ b/src/cpu/kernels/add/generic/sve/integer.cpp
@@ -21,20 +21,29 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef SRC_CORE_SVE_KERNELS_ADD_IMPL_H
-#define SRC_CORE_SVE_KERNELS_ADD_IMPL_H
-
 #if defined(ARM_COMPUTE_ENABLE_SVE)
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/Traits.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "src/cpu/kernels/add/generic/sve/impl.h"
 
 namespace arm_compute
 {
 namespace cpu
 {
-template <typename ScalarType>
-void add_same_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
-} // namespace cpu
+void add_u8_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
+{
+    return add_same_sve<uint8_t>(src0, src1, dst, policy, window);
+}
+
+void add_s16_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
+{
+    return add_same_sve<int16_t>(src0, src1, dst, policy, window);
+}
+
+void add_s32_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
+{
+    return add_same_sve<int32_t>(src0, src1, dst, policy, window);
+}
+}
 } // namespace arm_compute
-#endif // defined(ARM_COMPUTE_ENABLE_SVE)
-#endif // SRC_CORE_SVE_KERNELS_ADD_IMPL_H
\ No newline at end of file
+#endif //(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/cpu/kernels/add/sve/qasymm8.cpp b/src/cpu/kernels/add/generic/sve2/qasymm8.cpp
similarity index 98%
rename from src/cpu/kernels/add/sve/qasymm8.cpp
rename to src/cpu/kernels/add/generic/sve2/qasymm8.cpp
index 888ad87..c61089e 100644
--- a/src/cpu/kernels/add/sve/qasymm8.cpp
+++ b/src/cpu/kernels/add/generic/sve2/qasymm8.cpp
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #if defined(ARM_COMPUTE_ENABLE_SVE2)
+
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/ITensor.h"
 #include "arm_compute/core/Types.h"
@@ -34,7 +35,7 @@
 {
 namespace cpu
 {
-void add_qasymm8_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
+void add_qasymm8_sve2(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
 {
     ARM_COMPUTE_UNUSED(policy);
 
@@ -179,4 +180,4 @@
 }
 } // namespace cpu
 } // namespace arm_compute
-#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
\ No newline at end of file
+#endif //ARM_COMPUTE_ENABLE_SVE2
diff --git a/src/cpu/kernels/add/sve/qasymm8_signed.cpp b/src/cpu/kernels/add/generic/sve2/qasymm8_signed.cpp
similarity index 97%
rename from src/cpu/kernels/add/sve/qasymm8_signed.cpp
rename to src/cpu/kernels/add/generic/sve2/qasymm8_signed.cpp
index 3b922c6..9ac138a 100644
--- a/src/cpu/kernels/add/sve/qasymm8_signed.cpp
+++ b/src/cpu/kernels/add/generic/sve2/qasymm8_signed.cpp
@@ -34,7 +34,7 @@
 {
 namespace cpu
 {
-void add_qasymm8_signed_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
+void add_qasymm8_signed_sve2(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
 {
     ARM_COMPUTE_UNUSED(policy);
 
@@ -178,4 +178,4 @@
 }
 } // namespace cpu
 } // namespace arm_compute
-#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
\ No newline at end of file
+#endif //ARM_COMPUTE_ENABLE_SVE2
diff --git a/src/cpu/kernels/add/sve/qsymm16.cpp b/src/cpu/kernels/add/generic/sve2/qsymm16.cpp
similarity index 97%
rename from src/cpu/kernels/add/sve/qsymm16.cpp
rename to src/cpu/kernels/add/generic/sve2/qsymm16.cpp
index eef5d24..f148872 100644
--- a/src/cpu/kernels/add/sve/qsymm16.cpp
+++ b/src/cpu/kernels/add/generic/sve2/qsymm16.cpp
@@ -34,7 +34,7 @@
 {
 namespace cpu
 {
-void add_qsymm16_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
+void add_qsymm16_sve2(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
 {
     ARM_COMPUTE_UNUSED(policy);
 
@@ -153,4 +153,4 @@
 }
 } // namespace cpu
 } // namespace arm_compute
-#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
\ No newline at end of file
+#endif //ARM_COMPUTE_ENABLE_SVE2
diff --git a/src/cpu/kernels/add/sve/list.h b/src/cpu/kernels/add/list.h
similarity index 64%
rename from src/cpu/kernels/add/sve/list.h
rename to src/cpu/kernels/add/list.h
index 4529a9f..9d7c9a6 100644
--- a/src/cpu/kernels/add/sve/list.h
+++ b/src/cpu/kernels/add/list.h
@@ -21,16 +21,11 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef SRC_CORE_SVE_KERNELS_ADD_LIST_H
-#define SRC_CORE_SVE_KERNELS_ADD_LIST_H
+#ifndef SRC_CORE_KERNELS_ADD_LIST_H
+#define SRC_CORE_KERNELS_ADD_LIST_H
 
-#if defined(ARM_COMPUTE_ENABLE_SVE)
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/Traits.h"
-#include "src/core/NEON/SVEMath.h"
-#include "src/core/NEON/wrapper/intrinsics/intrinsics.h"
-#include "src/cpu/kernels/add/sve/impl.h"
-#include <arm_sve.h>
+#include "src/cpu/kernels/add/generic/neon/impl.h"
+#include "src/cpu/kernels/add/generic/sve/impl.h"
 
 namespace arm_compute
 {
@@ -39,13 +34,25 @@
 #define DECLARE_ADD_KERNEL(func_name) \
     void func_name(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
 
-DECLARE_ADD_KERNEL(add_qasymm8_sve);
-DECLARE_ADD_KERNEL(add_qasymm8_signed_sve);
-DECLARE_ADD_KERNEL(add_qsymm16_sve);
+DECLARE_ADD_KERNEL(add_qasymm8_neon);
+DECLARE_ADD_KERNEL(add_qasymm8_signed_neon);
+DECLARE_ADD_KERNEL(add_qsymm16_neon);
+DECLARE_ADD_KERNEL(add_fp32_neon);
+DECLARE_ADD_KERNEL(add_fp16_neon);
+DECLARE_ADD_KERNEL(add_u8_neon);
+DECLARE_ADD_KERNEL(add_s16_neon);
+DECLARE_ADD_KERNEL(add_s32_neon);
+DECLARE_ADD_KERNEL(add_fp32_sve);
+DECLARE_ADD_KERNEL(add_fp16_sve);
+DECLARE_ADD_KERNEL(add_u8_sve);
+DECLARE_ADD_KERNEL(add_s16_sve);
+DECLARE_ADD_KERNEL(add_s32_sve);
+DECLARE_ADD_KERNEL(add_qasymm8_sve2);
+DECLARE_ADD_KERNEL(add_qasymm8_signed_sve2);
+DECLARE_ADD_KERNEL(add_qsymm16_sve2);
 
 #undef DECLARE_ADD_KERNEL
 
 } // namespace cpu
 } // namespace arm_compute
-#endif // defined(ARM_COMPUTE_ENABLE_SVE)
-#endif // SRC_CORE_SVE_KERNELS_ADD_LIST_H
\ No newline at end of file
+#endif // SRC_CORE_KERNELS_ADD_LIST_H
\ No newline at end of file