Add SME2 implementation of softmax for FP32
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Change-Id: I8a63610cfb9ccff89dec6045d023439fc19b027a
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11357
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/cpu/kernels/CpuSoftmaxKernel.cpp b/src/cpu/kernels/CpuSoftmaxKernel.cpp
index 54ff858..a088fb6 100644
--- a/src/cpu/kernels/CpuSoftmaxKernel.cpp
+++ b/src/cpu/kernels/CpuSoftmaxKernel.cpp
@@ -50,6 +50,12 @@
{
/* Softmax */
static const std::vector<typename CpuSoftmaxKernel::SoftmaxKernel> available_kernels = {
+#ifdef ARM_COMPUTE_ENABLE_SME2
+ {"sme2_fp32_softmax",
+ [](const SoftmaxKernelDataTypeISASelectorData &data)
+ { return (!data.is_log && data.dt == DataType::F32 && data.isa.sme2); },
+ REGISTER_FP32_NEON(sme2_fp32_softmax)},
+#endif // ARM_COMPUTE_ENABLE_SME2
{"neon_fp32_softmax",
[](const SoftmaxKernelDataTypeISASelectorData &data) { return (!data.is_log && data.dt == DataType::F32); },
REGISTER_FP32_NEON(neon_fp32_softmax<false>)},