Use Neon™ kernels for FP Bilinear Resize for SVE
Removes FP Bilinear SVE kernels and uses Neon™ kernels instead
Resolves: COMPMID-5449
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Change-Id: I8e01de44bd884cb6578ca0b9358509b69bc31ca2
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8100
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/cpu/kernels/CpuScaleKernel.cpp b/src/cpu/kernels/CpuScaleKernel.cpp
index e230dfa..c9e858f 100644
--- a/src/cpu/kernels/CpuScaleKernel.cpp
+++ b/src/cpu/kernels/CpuScaleKernel.cpp
@@ -52,62 +52,68 @@
{
{
"sve_fp16_scale",
- [](const DataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.sve && data.isa.fp16; },
+ [](const ScaleKernelDataTypeISASelectorData & data)
+ {
+ return data.dt == DataType::F16 && data.isa.sve && data.isa.fp16 && data.interpolation_policy != InterpolationPolicy::BILINEAR;
+ },
REGISTER_FP16_SVE(arm_compute::cpu::fp16_sve_scale)
},
{
"sve_fp32_scale",
- [](const DataTypeISASelectorData & data) { return data.dt == DataType::F32 && data.isa.sve; },
+ [](const ScaleKernelDataTypeISASelectorData & data)
+ {
+ return data.dt == DataType::F32 && data.isa.sve && data.interpolation_policy != InterpolationPolicy::BILINEAR;
+ },
REGISTER_FP32_SVE(arm_compute::cpu::fp32_sve_scale)
},
{
"sve_qu8_scale",
- [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8 && data.isa.sve; },
+ [](const ScaleKernelDataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8 && data.isa.sve; },
REGISTER_QASYMM8_SVE(arm_compute::cpu::qasymm8_sve_scale)
},
{
"sve_qs8_scale",
- [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.isa.sve; },
+ [](const ScaleKernelDataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.isa.sve; },
REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::qasymm8_signed_sve_scale)
},
{
"sve_u8_scale",
- [](const DataTypeISASelectorData & data) { return data.dt == DataType::U8 && data.isa.sve; },
+ [](const ScaleKernelDataTypeISASelectorData & data) { return data.dt == DataType::U8 && data.isa.sve; },
REGISTER_INTEGER_SVE(arm_compute::cpu::u8_sve_scale)
},
{
"sve_s16_scale",
- [](const DataTypeISASelectorData & data) { return data.dt == DataType::S16 && data.isa.sve; },
+ [](const ScaleKernelDataTypeISASelectorData & data) { return data.dt == DataType::S16 && data.isa.sve; },
REGISTER_INTEGER_SVE(arm_compute::cpu::s16_sve_scale)
},
{
"neon_fp16_scale",
- [](const DataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.fp16; },
+ [](const ScaleKernelDataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.fp16; },
REGISTER_FP16_NEON(arm_compute::cpu::common_neon_scale<float16_t>)
},
{
"neon_fp32_scale",
- [](const DataTypeISASelectorData & data) { return data.dt == DataType::F32; },
+ [](const ScaleKernelDataTypeISASelectorData & data) { return data.dt == DataType::F32; },
REGISTER_FP32_NEON(arm_compute::cpu::common_neon_scale<float>)
},
{
"neon_qu8_scale",
- [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8; },
+ [](const ScaleKernelDataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8; },
REGISTER_QASYMM8_NEON(arm_compute::cpu::qasymm8_neon_scale)
},
{
"neon_qs8_scale",
- [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; },
+ [](const ScaleKernelDataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; },
REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::qasymm8_signed_neon_scale)
},
{
"neon_u8_scale",
- [](const DataTypeISASelectorData & data) { return data.dt == DataType::U8; },
+ [](const ScaleKernelDataTypeISASelectorData & data) { return data.dt == DataType::U8; },
REGISTER_INTEGER_NEON(arm_compute::cpu::u8_neon_scale)
},
{
"neon_s16_scale",
- [](const DataTypeISASelectorData & data) { return data.dt == DataType::S16; },
+ [](const ScaleKernelDataTypeISASelectorData & data) { return data.dt == DataType::S16; },
REGISTER_INTEGER_NEON(arm_compute::cpu::s16_neon_scale)
},
};
@@ -115,7 +121,7 @@
Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dx, const ITensorInfo *dy,
const ITensorInfo *offsets, ITensorInfo *dst, const ScaleKernelInfo &info)
{
- const auto *uk = CpuScaleKernel::get_implementation(DataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa() });
+ const auto *uk = CpuScaleKernel::get_implementation(ScaleKernelDataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa(), info.interpolation_policy });
ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
@@ -174,7 +180,7 @@
dst,
info));
- const auto *uk = CpuScaleKernel::get_implementation(DataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa() });
+ const auto *uk = CpuScaleKernel::get_implementation(ScaleKernelDataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa(), info.interpolation_policy });
ARM_COMPUTE_ERROR_ON_NULLPTR(uk);
_run_method = uk->ukernel;