Optimize CPU add layer on quantized data
* Use fixed-point arithmetic where possible.
* Various optimization for the FP32-based implementation.
This implementation is kept as the fall-back solution
in case of unrealistic quantization parameters that exceed
the range of fixed-point solution.
Resolves: COMPMID-5458
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Change-Id: I221d2d3801ecaae4fe0b7cf6ae8ef00ca3743665
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8317
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/tests/validation/NEON/ArithmeticAddition.cpp b/tests/validation/NEON/ArithmeticAddition.cpp
index f94e329..322cd7b 100644
--- a/tests/validation/NEON/ArithmeticAddition.cpp
+++ b/tests/validation/NEON/ArithmeticAddition.cpp
@@ -89,7 +89,7 @@
}
DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL, concat(concat(
- combine(combine(framework::dataset::make("CpuExt", std::string("NEON")),
+ combine(combine(combine(framework::dataset::make("CpuExt", std::string("NEON")),
framework::dataset::make("DataType", { DataType::F32,
DataType::F16,
DataType::U8,
@@ -100,21 +100,24 @@
DataType::QSYMM16
})),
framework::dataset::make("CanInterpretAs1D", {true, false})),
- combine(combine(framework::dataset::make("CpuExt", std::string("SVE")),
+ framework::dataset::make("CanUseFixedpoint", {true, false})),
+ combine(combine(combine(framework::dataset::make("CpuExt", std::string("SVE")),
framework::dataset::make("DataType", { DataType::F32,
DataType::F16,
DataType::U8,
DataType::S16,
DataType::S32
})),
- framework::dataset::make("CanInterpretAs1D", {true, false}))),
- combine(combine(framework::dataset::make("CpuExt", std::string("SVE2")),
+ framework::dataset::make("CanInterpretAs1D", {true, false})),
+ framework::dataset::make("CanUseFixedpoint", {true, false}))),
+ combine(combine(combine(framework::dataset::make("CpuExt", std::string("SVE2")),
framework::dataset::make("DataType", { DataType::QASYMM8,
DataType::QASYMM8_SIGNED,
DataType::QSYMM16
})),
- framework::dataset::make("CanInterpretAs1D", {false}))),
- cpu_ext, data_type, can_interpret_inputs_as_1d_array)
+ framework::dataset::make("CanInterpretAs1D", {false})),
+ framework::dataset::make("CanUseFixedpoint", {true, false}))),
+ cpu_ext, data_type, can_interpret_inputs_as_1d_array, can_use_fixedpoint)
{
using namespace cpu::kernels;
@@ -124,18 +127,23 @@
cpu_isa.sve2 = (cpu_ext == "SVE2");
cpu_isa.fp16 = (data_type == DataType::F16);
- const auto *selected_impl = CpuAddKernel::get_implementation(CpuAddKernelDataTypeISASelectorData{data_type, cpu_isa, can_interpret_inputs_as_1d_array}, cpu::KernelSelectionType::Preferred);
+ const auto *selected_impl = CpuAddKernel::get_implementation(CpuAddKernelDataTypeISASelectorData{data_type, cpu_isa, can_interpret_inputs_as_1d_array, can_use_fixedpoint}, cpu::KernelSelectionType::Preferred);
ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl);
bool float_or_integer = (data_type == DataType::F32 || data_type == DataType::F16 || data_type == DataType::U8 ||
data_type == DataType::S16 || data_type == DataType::S32);
+ bool qasymm8_any = (data_type == DataType::QASYMM8 || data_type == DataType::QASYMM8_SIGNED);
std::string expected;
if(can_interpret_inputs_as_1d_array && float_or_integer)
{
expected = "neon_" + cpu_impl_dt(data_type) + "_add_as_1d_array";
}
+ else if(qasymm8_any && can_use_fixedpoint)
+ {
+ expected = "neon_" + cpu_impl_dt(data_type) + "_add_fixedpoint";
+ }
else
{
expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_add";