Add SVE2 kernels for quantized elementwise operations
Partially implements: COMPMID-3872
Change-Id: I76d81f2b8aa343f9d830298bc931e410c7c901bc
Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4842
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
index 29ae903..4d67ec3 100644
--- a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
+++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
@@ -30,6 +30,7 @@
#include "src/core/NEON/NEFixedPoint.h"
#include "src/core/NEON/wrapper/wrapper.h"
#include "src/core/SVE/kernels/elementwise/impl/elementwise_list.h"
+#include "src/core/SVE/kernels/elementwise/impl/elementwise_quantized_list.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
@@ -717,6 +718,7 @@
}
}
+#if !defined(__ARM_FEATURE_SVE2)
void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window,
uint8_t (*scalar_func)(const float &, const float &, UniformQuantizationInfo),
int (*broadcast_func)(int, int, int, const uint8_t *, float32x4x4_t, uint8_t *, int32x4_t, float32x4_t,
@@ -1038,6 +1040,7 @@
input1, input2, output);
}
}
+#endif /* !defined(__ARM_FEATURE_SVE2) */
template <ComparisonOperation op, typename InputScalarType, typename InputVectorType>
void elementwise_comp_op_8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
@@ -1143,9 +1146,14 @@
{ "op_F32_F32_F32", &elementwise_arithm_op<op, typename wrapper::traits::neon_vector<float, 4>> },
{ "op_S32_S32_S32", &elementwise_arithm_op<op, typename wrapper::traits::neon_vector<int32_t, 4>> },
#endif /* defined(__ARM_FEATURE_SVE) */
- { "op_S16_S16_S16", &elementwise_arithm_op<op, typename wrapper::traits::neon_vector<int16_t, 8>> },
+#if defined(__ARM_FEATURE_SVE2)
+ { "op_QASYMM8_QASYMM8_QASYMM8", &arm_compute::cpu::sve::elementwise_arithmetic_quantized_op<op, uint8_t> },
+ { "op_QASYMM8_SIGNED_QASYMM8_SIGNED_QASYMM8_SIGNED", &arm_compute::cpu::sve::elementwise_arithmetic_quantized_op<op, int8_t> },
+#else /* defined(__ARM_FEATURE_SVE2) */
{ "op_QASYMM8_QASYMM8_QASYMM8", &elementwise_arithm_op_quantized<op> },
- { "op_QASYMM8_SIGNED_QASYMM8_SIGNED_QASYMM8_SIGNED", &elementwise_arithm_op_quantized_signed<op> }
+ { "op_QASYMM8_SIGNED_QASYMM8_SIGNED_QASYMM8_SIGNED", &elementwise_arithm_op_quantized_signed<op> },
+#endif /* defined(__ARM_FEATURE_SVE2) */
+ { "op_S16_S16_S16", &elementwise_arithm_op<op, typename wrapper::traits::neon_vector<int16_t, 8>> },
};
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
#if defined(__ARM_FEATURE_SVE)
@@ -1175,8 +1183,13 @@
{ "op_S16_S16_U8", &elementwise_comp_op_16<op, int16_t, int16x8_t> },
{ "op_S32_S32_U8", &elementwise_comp_op_32<op, int32_t, int32x4_t> },
#endif /* defined(__ARM_FEATURE_SVE) */
+#if defined(__ARM_FEATURE_SVE2)
+ { "op_QASYMM8_SIGNED_QASYMM8_SIGNED_U8", &arm_compute::cpu::sve::elementwise_comparison_quantized_op<op, int8_t> },
+ { "op_QASYMM8_QASYMM8_U8", &arm_compute::cpu::sve::elementwise_comparison_quantized_op<op, uint8_t> }
+#else /* defined(__ARM_FEATURE_SVE2) */
{ "op_QASYMM8_SIGNED_QASYMM8_SIGNED_U8", &elementwise_comp_op_quantized_signed<op> },
{ "op_QASYMM8_QASYMM8_U8", &elementwise_comp_op_quantized<op> }
+#endif /* defined(__ARM_FEATURE_SVE2) */
};
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
#if defined(__ARM_FEATURE_SVE)