Improve start-up time in gemmlowp reshaped rhs only.
- Does not Pass RESULT_MULTIPLIER, RESULT_SHIFT When PER_CHANNEL_QUANTIZATION is defined.
Resolves COMPMID-5499
Signed-off-by: Adnan AlSinan <adnan.alsinan@arm.com>
Change-Id: Ie433eaf83c003a6d5ccfeb89eb2783528dc2b48e
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8316
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel.cpp b/src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel.cpp
index 386c13e..90084ea 100644
--- a/src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel.cpp
+++ b/src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel.cpp
@@ -376,8 +376,17 @@
build_opts.add_option("-DK_OFFSET=" + support::cpp11::to_string(a_offset * b_offset * src0->dimension(0)));
build_opts.add_option_if(bias != nullptr, "-DADD_BIAS");
build_opts.add_option("-DRESULT_OFFSET=" + support::cpp11::to_string(output_stage.gemmlowp_offset));
- build_opts.add_option("-DRESULT_MULTIPLIER=" + support::cpp11::to_string(output_stage.gemmlowp_multipliers[0]));
- build_opts.add_option("-DRESULT_SHIFT=" + support::cpp11::to_string(output_stage.gemmlowp_shifts[0]));
+ // In case of _is_quantized_per_channel, RESULT_MULTIPLIER and RESULT_SHIFT are not utilized, but they are passed as a part of T_QUANTIZE8 macro.
+ if(!_is_quantized_per_channel)
+ {
+ build_opts.add_option("-DRESULT_MULTIPLIER=" + support::cpp11::to_string(output_stage.gemmlowp_multipliers[0]));
+ build_opts.add_option("-DRESULT_SHIFT=" + support::cpp11::to_string(output_stage.gemmlowp_shifts[0]));
+ }
+ else
+ {
+ build_opts.add_option("-DRESULT_MULTIPLIER=0");
+ build_opts.add_option("-DRESULT_SHIFT=0");
+ }
build_opts.add_option_if(_is_quantized_per_channel, "-DPER_CHANNEL_QUANTIZATION");
const int min = output_stage.gemmlowp_min_bound;