Fix fully connected and matmul mismatches
* There is an issue with quantized fully connected and matmul
when the lower bound of bounded ReLU is negative.
* Use int32_t for the calculation of min/max quantized value
rather than PixelValue to avoid this issue.
Partially resolves: COMPMID-5996
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Change-Id: I7b22e9d56a2441fc6a4c5c4e627f57d6e00d6ff1
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9502
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/tests/validation/fixtures/MatMulFixture.h b/tests/validation/fixtures/MatMulFixture.h
index f8f038a..1571902 100644
--- a/tests/validation/fixtures/MatMulFixture.h
+++ b/tests/validation/fixtures/MatMulFixture.h
@@ -27,6 +27,7 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "src/core/utils/quantization/AsymmHelpers.h"
#include "tests/framework/Fixture.h"
#include "tests/validation/reference/ActivationLayer.h"
#include "tests/validation/reference/GEMM.h"
@@ -162,16 +163,16 @@
template <typename TT>
typename std::enable_if<!std::is_integral<TT>::value, SimpleTensor<TT>>::type
- compute_reference_gemm(const SimpleTensor<TT> &a, const SimpleTensor<TT> &b, const SimpleTensor<TT> &c, float alpha, float beta, const ActivationLayerInfo &act_info, const QuantizationInfo &o_qinfo)
+ compute_reference_gemm(const SimpleTensor<TT> &a, const SimpleTensor<TT> &b, const SimpleTensor<TT> &c, float alpha, float beta, const QuantizationInfo &o_qinfo)
{
- ARM_COMPUTE_UNUSED(act_info, o_qinfo);
+ ARM_COMPUTE_UNUSED(o_qinfo);
return reference::gemm(a, b, c, alpha, beta);
}
template <typename TT>
typename std::enable_if<std::is_integral<TT>::value, SimpleTensor<TT>>::type
- compute_reference_gemm(const SimpleTensor<TT> &a, const SimpleTensor<TT> &b, const SimpleTensor<TT> &c, float alpha, float beta, const ActivationLayerInfo &act_info, const QuantizationInfo &o_qinfo)
+ compute_reference_gemm(const SimpleTensor<TT> &a, const SimpleTensor<TT> &b, const SimpleTensor<TT> &c, float alpha, float beta, const QuantizationInfo &o_qinfo)
{
ARM_COMPUTE_UNUSED(alpha, beta);
@@ -187,17 +188,12 @@
std::vector<int32_t> output_multipliers{ output_multiplier };
std::vector<int32_t> output_shifts{ output_shift };
- PixelValue output_min{};
- PixelValue output_max{};
- std::tie(output_min, output_max) = quantization::get_quantized_asymmetric_output_min_max(
- o_qinfo, act_info, a.data_type());
-
const auto tmp = reference::gemmlowp_matrix_multiply_core<int32_t>(
a, b, c.shape(), aq.offset, bq.offset);
auto output = reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, TT>(
tmp, output_multipliers, output_shifts, oq.offset,
- output_min.get<int32_t>(), output_max.get<int32_t>());
+ std::numeric_limits<int32_t>::lowest(), std::numeric_limits<int32_t>::max());
output.quantization_info(o_qinfo);
return output;
@@ -253,7 +249,7 @@
// Setting beta to 0 will effectively disable C for the
// computation of the reference: alpha * A * B + 0 * C
// Use transposed tensors if boolean enabled else use original tensors
- auto result = compute_reference_gemm<T>((transpose_a) ? a_transposed : a, (transpose_b) ? b_transposed : b, c, 1.0f, 0.f, act_info, o_qinfo);
+ auto result = compute_reference_gemm<T>((transpose_a) ? a_transposed : a, (transpose_b) ? b_transposed : b, c, 1.0f, 0.f, o_qinfo);
result = reference::activation_layer<T>(result, act_info, o_qinfo);