Decouple CpuAddKernel
1- NEON supported data types are : fp32, fp16, u8, s16, s32 , q8, q_s8 , q16
2- SVE supported data types are: fp32, fp16, u8, s16, s32
3- SVE2 supported data types are : q8, q_s8 , q16
4- Re-arange SVE folder sturct
** Need to remove gaurds and add testing after Multi ISA build system and validation tests will be avalible
Resolves COMPMID-4635
Change-Id: I90e4f6a219478aa9ad5c4a6b9858496afa8af42d
Signed-off-by: Dana Zlotnik <dana.zlotnik@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6711
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/core/common/Registrars.h b/src/core/common/Registrars.h
index 65f6c70..c7fbf7f 100644
--- a/src/core/common/Registrars.h
+++ b/src/core/common/Registrars.h
@@ -32,6 +32,12 @@
#define REGISTER_FP16_SVE(func_name) nullptr
#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
+#define REGISTER_FP16_SVE2(func_name) &(func_name)
+#else /* !defined(ARM_COMPUTE_ENABLE_SVE2) */
+#define REGISTER_FP16_SVE2(func_name) nullptr
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
+
#if defined(ARM_COMPUTE_ENABLE_NEON) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
#define REGISTER_FP16_NEON(func_name) &(func_name)
#else /* !defined(ARM_COMPUTE_ENABLE_NEON) */
@@ -41,6 +47,7 @@
#else /* !defined(ENABLE_FP16_KERNELS) */
#define REGISTER_FP16_NEON(func_name) nullptr
#define REGISTER_FP16_SVE(func_name) nullptr
+#define REGISTER_FP16_SVE2(func_name) nullptr
#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */
#if defined(ENABLE_FP32_KERNELS)
@@ -51,6 +58,12 @@
#define REGISTER_FP32_SVE(func_name) nullptr
#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
+#define REGISTER_FP32_SVE2(func_name) &(func_name)
+#else /* !defined(ARM_COMPUTE_ENABLE_SVE2) */
+#define REGISTER_FP32_SVE2(func_name) nullptr
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
+
#if defined(ARM_COMPUTE_ENABLE_NEON)
#define REGISTER_FP32_NEON(func_name) &(func_name)
#else /* !defined(ARM_COMPUTE_ENABLE_NEON) */
@@ -60,6 +73,7 @@
#else /* defined(ENABLE_FP32_KERNELS) */
#define REGISTER_FP32_NEON(func_name) nullptr
#define REGISTER_FP32_SVE(func_name) nullptr
+#define REGISTER_FP32_SVE2(func_name) nullptr
#endif /* defined(ENABLE_FP32_KERNELS) */
#if defined(ENABLE_QASYMM8_SIGNED_KERNELS)
@@ -72,9 +86,16 @@
#define REGISTER_QASYMM8_SIGNED_SVE(func_name) nullptr
#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
+#define REGISTER_QASYMM8_SIGNED_SVE2(func_name) &(func_name)
+#else /* !defined(ARM_COMPUTE_ENABLE_SVE2) */
+#define REGISTER_QASYMM8_SIGNED_SVE2(func_name) nullptr
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
+
#else /* defined(ENABLE_QASYMM8_SIGNED_KERNELS) */
#define REGISTER_QASYMM8_SIGNED_NEON(func_name) nullptr
#define REGISTER_QASYMM8_SIGNED_SVE(func_name) nullptr
+#define REGISTER_QASYMM8_SIGNED_SVE2(func_name) nullptr
#endif /* defined(ENABLE_QASYMM8_SIGNED_KERNELS) */
#if defined(ENABLE_QASYMM8_KERNELS)
@@ -86,9 +107,16 @@
#define REGISTER_QASYMM8_SVE(func_name) nullptr
#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
+#define REGISTER_QASYMM8_SVE2(func_name) &(func_name)
+#else /* !defined(ARM_COMPUTE_ENABLE_SVE2) */
+#define REGISTER_QASYMM8_SVE2(func_name) nullptr
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
+
#else /* defined(ENABLE_QASYMM8_KERNELS) */
#define REGISTER_QASYMM8_NEON(func_name) nullptr
#define REGISTER_QASYMM8_SVE(func_name) nullptr
+#define REGISTER_QASYMM8_SVE2(func_name) nullptr
#endif /* defined(ENABLE_QASYMM8_KERNELS) */
#if defined(ENABLE_QSYMM16_KERNELS)
@@ -101,9 +129,16 @@
#define REGISTER_QSYMM16_SVE(func_name) nullptr
#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
+#define REGISTER_QSYMM16_SVE2(func_name) &(func_name)
+#else /* !defined(ARM_COMPUTE_ENABLE_SVE2) */
+#define REGISTER_QSYMM16_SVE2(func_name) nullptr
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
+
#else /* defined(ENABLE_QSYMM16_KERNELS) */
#define REGISTER_QSYMM16_NEON(func_name) nullptr
#define REGISTER_QSYMM16_SVE(func_name) nullptr
+#define REGISTER_QSYMM16_SVE2(func_name) nullptr
#endif /* defined(ENABLE_QSYMM16_KERNELS) */
#if defined(ENABLE_INTEGER_KERNELS)
@@ -114,6 +149,12 @@
#define REGISTER_INTEGER_SVE(func_name) nullptr
#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
+#if defined(ARM_COMPUTE_ENABLE_SVE2)
+#define REGISTER_INTEGER_SVE2(func_name) &(func_name)
+#else /* !defined(ARM_COMPUTE_ENABLE_SVE2) */
+#define REGISTER_INTEGER_SVE2(func_name) nullptr
+#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
+
#if defined(ARM_COMPUTE_ENABLE_NEON)
#define REGISTER_INTEGER_NEON(func_name) &(func_name)
#else /* !defined(ARM_COMPUTE_ENABLE_NEON) */
@@ -123,6 +164,7 @@
#else /* defined(ENABLE_INTEGER_KERNELS) */
#define REGISTER_INTEGER_NEON(func_name) nullptr
#define REGISTER_INTEGER_SVE(func_name) nullptr
+#define REGISTER_INTEGER_SVE2(func_name) nullptr
#endif /* defined(ENABLE_INTEGER_KERNELS) */
#endif /* SRC_CORE_COMMON_REGISTRARS_H */
diff --git a/src/cpu/kernels/CpuAddKernel.cpp b/src/cpu/kernels/CpuAddKernel.cpp
index 73c1fda..0c3540f 100644
--- a/src/cpu/kernels/CpuAddKernel.cpp
+++ b/src/cpu/kernels/CpuAddKernel.cpp
@@ -30,9 +30,7 @@
#include "src/core/common/Registrars.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
-#include "src/cpu/kernels/add/neon/list.h"
-#include "src/cpu/kernels/add/sve/list.h"
-
+#include "src/cpu/kernels/add/list.h"
#include <array>
namespace arm_compute
@@ -67,7 +65,7 @@
{
return (data.dt == DataType::QASYMM8) && data.ci.has_sve2();
},
- REGISTER_QASYMM8_SVE(arm_compute::cpu::add_qasymm8_sve)
+ REGISTER_QASYMM8_SVE2(arm_compute::cpu::add_qasymm8_sve2)
},
{
"sve2_qs8_add",
@@ -75,7 +73,7 @@
{
return (data.dt == DataType::QASYMM8_SIGNED) && data.ci.has_sve2();
},
- REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::add_qasymm8_signed_sve)
+ REGISTER_QASYMM8_SIGNED_SVE2(arm_compute::cpu::add_qasymm8_signed_sve2)
},
{
"sve2_qs16_add",
@@ -83,7 +81,7 @@
{
return (data.dt == DataType::QSYMM16) && data.ci.has_sve2();
},
- REGISTER_QSYMM16_SVE(arm_compute::cpu::add_qsymm16_sve)
+ REGISTER_QSYMM16_SVE2(arm_compute::cpu::add_qsymm16_sve2)
},
#endif /* !defined(ARM_COMPUTE_ENABLE_SVE2) */
#if defined(ARM_COMPUTE_ENABLE_SVE)
@@ -93,7 +91,7 @@
{
return (data.dt == DataType::F32) && data.ci.has_sve();
},
- REGISTER_FP32_SVE(arm_compute::cpu::add_same_sve<float>)
+ REGISTER_FP32_SVE(arm_compute::cpu::add_fp32_sve)
},
{
"sve_fp16_add",
@@ -101,7 +99,7 @@
{
return (data.dt == DataType::F16) && data.ci.has_sve();
},
- REGISTER_FP16_SVE(arm_compute::cpu::add_same_sve<float16_t>)
+ REGISTER_FP16_SVE(arm_compute::cpu::add_fp16_sve)
},
{
"sve_u8_add",
@@ -109,7 +107,7 @@
{
return (data.dt == DataType::U8) && data.ci.has_sve();
},
- REGISTER_INTEGER_SVE(arm_compute::cpu::add_same_sve<uint8_t>)
+ REGISTER_INTEGER_SVE(arm_compute::cpu::add_u8_sve)
},
{
"sve_s16_add",
@@ -117,7 +115,7 @@
{
return (data.dt == DataType::S16) && data.ci.has_sve();
},
- REGISTER_INTEGER_SVE(arm_compute::cpu::add_same_sve<int16_t>)
+ REGISTER_INTEGER_SVE(arm_compute::cpu::add_s16_sve)
},
{
"sve_s32_add",
@@ -125,14 +123,14 @@
{
return (data.dt == DataType::S32) && data.ci.has_sve();
},
- REGISTER_INTEGER_SVE(arm_compute::cpu::add_same_sve<int32_t>)
+ REGISTER_INTEGER_SVE(arm_compute::cpu::add_s32_sve)
},
#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
#if defined(ARM_COMPUTE_ENABLE_NEON)
{
"neon_fp32_add",
[](const AddSelectorData & data) { return (data.dt == DataType::F32); },
- REGISTER_FP32_NEON(arm_compute::cpu::add_same_neon<float>)
+ REGISTER_FP32_NEON(arm_compute::cpu::add_fp32_neon)
},
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
{
@@ -141,23 +139,23 @@
{
return (data.dt == DataType::F16) && data.ci.has_fp16();
},
- REGISTER_FP16_NEON(arm_compute::cpu::add_same_neon<float16_t>)
+ REGISTER_FP16_NEON(arm_compute::cpu::add_fp16_neon)
},
#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
{
"neon_u8_add",
[](const AddSelectorData & data) { return (data.dt == DataType::U8); },
- REGISTER_INTEGER_NEON(arm_compute::cpu::add_same_neon<uint8_t>)
+ REGISTER_INTEGER_NEON(arm_compute::cpu::add_u8_neon)
},
{
"neon_s16_add",
[](const AddSelectorData & data) { return (data.dt == DataType::S16); },
- REGISTER_INTEGER_NEON(arm_compute::cpu::add_same_neon<int16_t>)
+ REGISTER_INTEGER_NEON(arm_compute::cpu::add_s16_neon)
},
{
"neon_s32_add",
[](const AddSelectorData & data) { return (data.dt == DataType::S32); },
- REGISTER_INTEGER_NEON(arm_compute::cpu::add_same_neon<int32_t>)
+ REGISTER_INTEGER_NEON(arm_compute::cpu::add_s32_neon)
},
#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */
#if defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE)
@@ -295,12 +293,12 @@
size_t CpuAddKernel::get_mws(const CPUInfo &platform, size_t thread_count) const
{
ARM_COMPUTE_UNUSED(thread_count);
- // Tuning results that gave optimized results in performance investigation
- if (platform.get_cpu_model() == CPUModel::A73 )
+ // Tuning results that gave optimized results in performance investigation
+ if(platform.get_cpu_model() == CPUModel::A73)
{
return 10240;
}
- else
+ else
{
return 9216;
}
diff --git a/src/cpu/kernels/add/sve/impl.h b/src/cpu/kernels/add/generic/neon/fp16.cpp
similarity index 72%
copy from src/cpu/kernels/add/sve/impl.h
copy to src/cpu/kernels/add/generic/neon/fp16.cpp
index 32ff5d0..12d4a46 100644
--- a/src/cpu/kernels/add/sve/impl.h
+++ b/src/cpu/kernels/add/generic/neon/fp16.cpp
@@ -21,20 +21,18 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_CORE_SVE_KERNELS_ADD_IMPL_H
-#define SRC_CORE_SVE_KERNELS_ADD_IMPL_H
+#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
-#if defined(ARM_COMPUTE_ENABLE_SVE)
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/Traits.h"
+#include "src/cpu/kernels/add/generic/neon/impl.h"
namespace arm_compute
{
namespace cpu
{
-template <typename ScalarType>
-void add_same_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
-} // namespace cpu
+void add_fp16_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
+{
+ return add_same_neon<float16_t>(src0, src1, dst, policy, window);
+}
+}
} // namespace arm_compute
-#endif // defined(ARM_COMPUTE_ENABLE_SVE)
-#endif // SRC_CORE_SVE_KERNELS_ADD_IMPL_H
\ No newline at end of file
+#endif /* (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */
diff --git a/src/cpu/kernels/add/sve/impl.h b/src/cpu/kernels/add/generic/neon/fp32.cpp
similarity index 72%
copy from src/cpu/kernels/add/sve/impl.h
copy to src/cpu/kernels/add/generic/neon/fp32.cpp
index 32ff5d0..3563162 100644
--- a/src/cpu/kernels/add/sve/impl.h
+++ b/src/cpu/kernels/add/generic/neon/fp32.cpp
@@ -21,20 +21,16 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_CORE_SVE_KERNELS_ADD_IMPL_H
-#define SRC_CORE_SVE_KERNELS_ADD_IMPL_H
-#if defined(ARM_COMPUTE_ENABLE_SVE)
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/Traits.h"
+#include "src/cpu/kernels/add/generic/neon/impl.h"
namespace arm_compute
{
namespace cpu
{
-template <typename ScalarType>
-void add_same_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
-} // namespace cpu
+void add_fp32_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
+{
+ return add_same_neon<float>(src0, src1, dst, policy, window);
+}
+}
} // namespace arm_compute
-#endif // defined(ARM_COMPUTE_ENABLE_SVE)
-#endif // SRC_CORE_SVE_KERNELS_ADD_IMPL_H
\ No newline at end of file
diff --git a/src/cpu/kernels/add/neon/list.h b/src/cpu/kernels/add/generic/neon/impl.cpp
similarity index 85%
rename from src/cpu/kernels/add/neon/list.h
rename to src/cpu/kernels/add/generic/neon/impl.cpp
index 379bd32..ad3e445 100644
--- a/src/cpu/kernels/add/neon/list.h
+++ b/src/cpu/kernels/add/generic/neon/impl.cpp
@@ -21,26 +21,15 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_CORE_NEON_KERNELS_ADD_LIST_H
-#define SRC_CORE_NEON_KERNELS_ADD_LIST_H
-#include "arm_compute/core/Types.h"
+#include "src/cpu/kernels/add/generic/neon/impl.h"
+#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/utils/misc/Traits.h"
#include "src/core/NEON/wrapper/wrapper.h"
-
namespace arm_compute
{
namespace cpu
{
-#define DECLARE_ADD_KERNEL(func_name) \
- void func_name(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
-
-DECLARE_ADD_KERNEL(add_qasymm8_neon);
-DECLARE_ADD_KERNEL(add_qasymm8_signed_neon);
-DECLARE_ADD_KERNEL(add_qsymm16_neon);
-
-#undef DECLARE_ADD_KERNEL
-
template <typename ScalarType>
void add_same_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
{
@@ -138,6 +127,15 @@
input1, input2, output);
}
}
+
+template void add_same_neon<float>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
+template void add_same_neon<uint8_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
+template void add_same_neon<int32_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
+template void add_same_neon<int16_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
+
+#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
+template void add_same_neon<float16_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
+#endif /* (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */
+
} // namespace cpu
} // namespace arm_compute
-#endif // SRC_CORE_NEON_KERNELS_ADD_LIST_H
diff --git a/src/cpu/kernels/add/sve/impl.h b/src/cpu/kernels/add/generic/neon/impl.h
similarity index 77%
copy from src/cpu/kernels/add/sve/impl.h
copy to src/cpu/kernels/add/generic/neon/impl.h
index 32ff5d0..07afdda 100644
--- a/src/cpu/kernels/add/sve/impl.h
+++ b/src/cpu/kernels/add/generic/neon/impl.h
@@ -21,20 +21,17 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_CORE_SVE_KERNELS_ADD_IMPL_H
-#define SRC_CORE_SVE_KERNELS_ADD_IMPL_H
-
-#if defined(ARM_COMPUTE_ENABLE_SVE)
+#ifndef SRC_CORE_NEON_KERNELS_ADD_IMPL_H
+#define SRC_CORE_NEON_KERNELS_ADD_IMPL_H
+#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/Traits.h"
-
+#include "arm_compute/core/Window.h"
namespace arm_compute
{
namespace cpu
{
template <typename ScalarType>
-void add_same_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
+void add_same_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
} // namespace cpu
} // namespace arm_compute
-#endif // defined(ARM_COMPUTE_ENABLE_SVE)
-#endif // SRC_CORE_SVE_KERNELS_ADD_IMPL_H
\ No newline at end of file
+#endif // SRC_CORE_NEON_KERNELS_ADD_IMPL_H
\ No newline at end of file
diff --git a/src/cpu/kernels/add/sve/impl.h b/src/cpu/kernels/add/generic/neon/integer.cpp
similarity index 64%
copy from src/cpu/kernels/add/sve/impl.h
copy to src/cpu/kernels/add/generic/neon/integer.cpp
index 32ff5d0..62c19e6 100644
--- a/src/cpu/kernels/add/sve/impl.h
+++ b/src/cpu/kernels/add/generic/neon/integer.cpp
@@ -21,20 +21,26 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_CORE_SVE_KERNELS_ADD_IMPL_H
-#define SRC_CORE_SVE_KERNELS_ADD_IMPL_H
-#if defined(ARM_COMPUTE_ENABLE_SVE)
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/Traits.h"
+#include "src/cpu/kernels/add/generic/neon/impl.h"
namespace arm_compute
{
namespace cpu
{
-template <typename ScalarType>
-void add_same_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
-} // namespace cpu
+void add_u8_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
+{
+ return add_same_neon<uint8_t>(src0, src1, dst, policy, window);
+}
+
+void add_s16_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
+{
+ return add_same_neon<int16_t>(src0, src1, dst, policy, window);
+}
+
+void add_s32_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
+{
+ return add_same_neon<int32_t>(src0, src1, dst, policy, window);
+}
+}
} // namespace arm_compute
-#endif // defined(ARM_COMPUTE_ENABLE_SVE)
-#endif // SRC_CORE_SVE_KERNELS_ADD_IMPL_H
\ No newline at end of file
diff --git a/src/cpu/kernels/add/neon/qasymm8.cpp b/src/cpu/kernels/add/generic/neon/qasymm8.cpp
similarity index 100%
rename from src/cpu/kernels/add/neon/qasymm8.cpp
rename to src/cpu/kernels/add/generic/neon/qasymm8.cpp
diff --git a/src/cpu/kernels/add/neon/qasymm8_signed.cpp b/src/cpu/kernels/add/generic/neon/qasymm8_signed.cpp
similarity index 100%
rename from src/cpu/kernels/add/neon/qasymm8_signed.cpp
rename to src/cpu/kernels/add/generic/neon/qasymm8_signed.cpp
diff --git a/src/cpu/kernels/add/neon/qsymm16.cpp b/src/cpu/kernels/add/generic/neon/qsymm16.cpp
similarity index 100%
rename from src/cpu/kernels/add/neon/qsymm16.cpp
rename to src/cpu/kernels/add/generic/neon/qsymm16.cpp
diff --git a/src/cpu/kernels/add/sve/impl.h b/src/cpu/kernels/add/generic/sve/fp16.cpp
similarity index 72%
copy from src/cpu/kernels/add/sve/impl.h
copy to src/cpu/kernels/add/generic/sve/fp16.cpp
index 32ff5d0..71056a0 100644
--- a/src/cpu/kernels/add/sve/impl.h
+++ b/src/cpu/kernels/add/generic/sve/fp16.cpp
@@ -21,20 +21,18 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_CORE_SVE_KERNELS_ADD_IMPL_H
-#define SRC_CORE_SVE_KERNELS_ADD_IMPL_H
+#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
-#if defined(ARM_COMPUTE_ENABLE_SVE)
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/Traits.h"
+#include "src/cpu/kernels/add/generic/sve/impl.h"
namespace arm_compute
{
namespace cpu
{
-template <typename ScalarType>
-void add_same_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
-} // namespace cpu
+void add_fp16_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
+{
+ return add_same_sve<float16_t>(src0, src1, dst, policy, window);
+}
+}
} // namespace arm_compute
-#endif // defined(ARM_COMPUTE_ENABLE_SVE)
-#endif // SRC_CORE_SVE_KERNELS_ADD_IMPL_H
\ No newline at end of file
+#endif /* (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */
diff --git a/src/cpu/kernels/add/sve/impl.h b/src/cpu/kernels/add/generic/sve/fp32.cpp
similarity index 74%
copy from src/cpu/kernels/add/sve/impl.h
copy to src/cpu/kernels/add/generic/sve/fp32.cpp
index 32ff5d0..8f651b3 100644
--- a/src/cpu/kernels/add/sve/impl.h
+++ b/src/cpu/kernels/add/generic/sve/fp32.cpp
@@ -21,20 +21,19 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_CORE_SVE_KERNELS_ADD_IMPL_H
-#define SRC_CORE_SVE_KERNELS_ADD_IMPL_H
-
#if defined(ARM_COMPUTE_ENABLE_SVE)
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/Traits.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "src/cpu/kernels/add/generic/sve/impl.h"
namespace arm_compute
{
namespace cpu
{
-template <typename ScalarType>
-void add_same_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
-} // namespace cpu
+void add_fp32_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
+{
+ return add_same_sve<float>(src0, src1, dst, policy, window);
+}
+}
} // namespace arm_compute
-#endif // defined(ARM_COMPUTE_ENABLE_SVE)
-#endif // SRC_CORE_SVE_KERNELS_ADD_IMPL_H
\ No newline at end of file
+#endif //ARM_COMPUTE_ENABLE_SVE
diff --git a/src/cpu/kernels/add/sve/impl.cpp b/src/cpu/kernels/add/generic/sve/impl.cpp
similarity index 95%
rename from src/cpu/kernels/add/sve/impl.cpp
rename to src/cpu/kernels/add/generic/sve/impl.cpp
index f8e16a5..52429bb 100644
--- a/src/cpu/kernels/add/sve/impl.cpp
+++ b/src/cpu/kernels/add/generic/sve/impl.cpp
@@ -21,17 +21,13 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
+#include "src/cpu/kernels/add/generic/sve/impl.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/Traits.h"
-#include "src/core/NEON/wrapper/intrinsics/intrinsics.h"
-
#include "src/core/NEON/SVEMath.h"
-#include "src/cpu/kernels/add/sve/impl.h"
+#include "src/core/NEON/wrapper/intrinsics/intrinsics.h"
#include <arm_sve.h>
-
namespace arm_compute
{
namespace cpu
@@ -128,12 +124,13 @@
input1, input2, output);
}
}
-
template void add_same_sve<float>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
-template void add_same_sve<float16_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
template void add_same_sve<uint8_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
template void add_same_sve<int16_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
template void add_same_sve<int32_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
+#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
+template void add_same_sve<float16_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
+#endif /* (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */
} // namespace cpu
} // namespace arm_compute
-#endif /* defined(__ARM_FEATURE_SVE) */
\ No newline at end of file
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/cpu/kernels/add/sve/impl.h b/src/cpu/kernels/add/generic/sve/impl.h
similarity index 93%
rename from src/cpu/kernels/add/sve/impl.h
rename to src/cpu/kernels/add/generic/sve/impl.h
index 32ff5d0..59f39e9 100644
--- a/src/cpu/kernels/add/sve/impl.h
+++ b/src/cpu/kernels/add/generic/sve/impl.h
@@ -21,12 +21,12 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#ifndef SRC_CORE_SVE_KERNELS_ADD_IMPL_H
#define SRC_CORE_SVE_KERNELS_ADD_IMPL_H
-
-#if defined(ARM_COMPUTE_ENABLE_SVE)
+#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/Traits.h"
+#include "arm_compute/core/Window.h"
namespace arm_compute
{
@@ -36,5 +36,5 @@
void add_same_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
} // namespace cpu
} // namespace arm_compute
-#endif // defined(ARM_COMPUTE_ENABLE_SVE)
-#endif // SRC_CORE_SVE_KERNELS_ADD_IMPL_H
\ No newline at end of file
+#endif // SRC_CORE_SVE_KERNELS_ADD_IMPL_H
+#endif // ARM_COMPUTE_ENABLE_SVE
diff --git a/src/cpu/kernels/add/sve/impl.h b/src/cpu/kernels/add/generic/sve/integer.cpp
similarity index 62%
copy from src/cpu/kernels/add/sve/impl.h
copy to src/cpu/kernels/add/generic/sve/integer.cpp
index 32ff5d0..d197717 100644
--- a/src/cpu/kernels/add/sve/impl.h
+++ b/src/cpu/kernels/add/generic/sve/integer.cpp
@@ -21,20 +21,29 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_CORE_SVE_KERNELS_ADD_IMPL_H
-#define SRC_CORE_SVE_KERNELS_ADD_IMPL_H
-
#if defined(ARM_COMPUTE_ENABLE_SVE)
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/Traits.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "src/cpu/kernels/add/generic/sve/impl.h"
namespace arm_compute
{
namespace cpu
{
-template <typename ScalarType>
-void add_same_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window);
-} // namespace cpu
+void add_u8_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
+{
+ return add_same_sve<uint8_t>(src0, src1, dst, policy, window);
+}
+
+void add_s16_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
+{
+ return add_same_sve<int16_t>(src0, src1, dst, policy, window);
+}
+
+void add_s32_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
+{
+ return add_same_sve<int32_t>(src0, src1, dst, policy, window);
+}
+}
} // namespace arm_compute
-#endif // defined(ARM_COMPUTE_ENABLE_SVE)
-#endif // SRC_CORE_SVE_KERNELS_ADD_IMPL_H
\ No newline at end of file
+#endif //(ARM_COMPUTE_ENABLE_SVE)
diff --git a/src/cpu/kernels/add/sve/qasymm8.cpp b/src/cpu/kernels/add/generic/sve2/qasymm8.cpp
similarity index 98%
rename from src/cpu/kernels/add/sve/qasymm8.cpp
rename to src/cpu/kernels/add/generic/sve2/qasymm8.cpp
index 888ad87..c61089e 100644
--- a/src/cpu/kernels/add/sve/qasymm8.cpp
+++ b/src/cpu/kernels/add/generic/sve2/qasymm8.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
#if defined(ARM_COMPUTE_ENABLE_SVE2)
+
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Types.h"
@@ -34,7 +35,7 @@
{
namespace cpu
{
-void add_qasymm8_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
+void add_qasymm8_sve2(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
{
ARM_COMPUTE_UNUSED(policy);
@@ -179,4 +180,4 @@
}
} // namespace cpu
} // namespace arm_compute
-#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
\ No newline at end of file
+#endif //ARM_COMPUTE_ENABLE_SVE2
diff --git a/src/cpu/kernels/add/sve/qasymm8_signed.cpp b/src/cpu/kernels/add/generic/sve2/qasymm8_signed.cpp
similarity index 97%
rename from src/cpu/kernels/add/sve/qasymm8_signed.cpp
rename to src/cpu/kernels/add/generic/sve2/qasymm8_signed.cpp
index 3b922c6..9ac138a 100644
--- a/src/cpu/kernels/add/sve/qasymm8_signed.cpp
+++ b/src/cpu/kernels/add/generic/sve2/qasymm8_signed.cpp
@@ -34,7 +34,7 @@
{
namespace cpu
{
-void add_qasymm8_signed_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
+void add_qasymm8_signed_sve2(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
{
ARM_COMPUTE_UNUSED(policy);
@@ -178,4 +178,4 @@
}
} // namespace cpu
} // namespace arm_compute
-#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
\ No newline at end of file
+#endif //ARM_COMPUTE_ENABLE_SVE2
diff --git a/src/cpu/kernels/add/sve/qsymm16.cpp b/src/cpu/kernels/add/generic/sve2/qsymm16.cpp
similarity index 97%
rename from src/cpu/kernels/add/sve/qsymm16.cpp
rename to src/cpu/kernels/add/generic/sve2/qsymm16.cpp
index eef5d24..f148872 100644
--- a/src/cpu/kernels/add/sve/qsymm16.cpp
+++ b/src/cpu/kernels/add/generic/sve2/qsymm16.cpp
@@ -34,7 +34,7 @@
{
namespace cpu
{
-void add_qsymm16_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
+void add_qsymm16_sve2(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
{
ARM_COMPUTE_UNUSED(policy);
@@ -153,4 +153,4 @@
}
} // namespace cpu
} // namespace arm_compute
-#endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */
\ No newline at end of file
+#endif //ARM_COMPUTE_ENABLE_SVE2
diff --git a/src/cpu/kernels/add/sve/list.h b/src/cpu/kernels/add/list.h
similarity index 64%
rename from src/cpu/kernels/add/sve/list.h
rename to src/cpu/kernels/add/list.h
index 4529a9f..9d7c9a6 100644
--- a/src/cpu/kernels/add/sve/list.h
+++ b/src/cpu/kernels/add/list.h
@@ -21,16 +21,11 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_CORE_SVE_KERNELS_ADD_LIST_H
-#define SRC_CORE_SVE_KERNELS_ADD_LIST_H
+#ifndef SRC_CORE_KERNELS_ADD_LIST_H
+#define SRC_CORE_KERNELS_ADD_LIST_H
-#if defined(ARM_COMPUTE_ENABLE_SVE)
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/Traits.h"
-#include "src/core/NEON/SVEMath.h"
-#include "src/core/NEON/wrapper/intrinsics/intrinsics.h"
-#include "src/cpu/kernels/add/sve/impl.h"
-#include <arm_sve.h>
+#include "src/cpu/kernels/add/generic/neon/impl.h"
+#include "src/cpu/kernels/add/generic/sve/impl.h"
namespace arm_compute
{
@@ -39,13 +34,25 @@
#define DECLARE_ADD_KERNEL(func_name) \
void func_name(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
-DECLARE_ADD_KERNEL(add_qasymm8_sve);
-DECLARE_ADD_KERNEL(add_qasymm8_signed_sve);
-DECLARE_ADD_KERNEL(add_qsymm16_sve);
+DECLARE_ADD_KERNEL(add_qasymm8_neon);
+DECLARE_ADD_KERNEL(add_qasymm8_signed_neon);
+DECLARE_ADD_KERNEL(add_qsymm16_neon);
+DECLARE_ADD_KERNEL(add_fp32_neon);
+DECLARE_ADD_KERNEL(add_fp16_neon);
+DECLARE_ADD_KERNEL(add_u8_neon);
+DECLARE_ADD_KERNEL(add_s16_neon);
+DECLARE_ADD_KERNEL(add_s32_neon);
+DECLARE_ADD_KERNEL(add_fp32_sve);
+DECLARE_ADD_KERNEL(add_fp16_sve);
+DECLARE_ADD_KERNEL(add_u8_sve);
+DECLARE_ADD_KERNEL(add_s16_sve);
+DECLARE_ADD_KERNEL(add_s32_sve);
+DECLARE_ADD_KERNEL(add_qasymm8_sve2);
+DECLARE_ADD_KERNEL(add_qasymm8_signed_sve2);
+DECLARE_ADD_KERNEL(add_qsymm16_sve2);
#undef DECLARE_ADD_KERNEL
} // namespace cpu
} // namespace arm_compute
-#endif // defined(ARM_COMPUTE_ENABLE_SVE)
-#endif // SRC_CORE_SVE_KERNELS_ADD_LIST_H
\ No newline at end of file
+#endif // SRC_CORE_KERNELS_ADD_LIST_H
\ No newline at end of file