COMPMID-3241: Fix hidden scale in NEQLSTMLayer

- Fix wrong data types in LSTMParams
- Add logic to ignore epsilon for quantization multiplier computation
- Ignore epsilon for hidden gate scale computation

Change-Id: Ia0b2f523b1c2ad325f3523439a8eea051d81958c
Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3058
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/core/utils/quantization/AsymmHelpers.cpp b/src/core/utils/quantization/AsymmHelpers.cpp
index f923518..8e0e92c 100644
--- a/src/core/utils/quantization/AsymmHelpers.cpp
+++ b/src/core/utils/quantization/AsymmHelpers.cpp
@@ -36,7 +36,7 @@
 constexpr int64_t fixed_point_one_Q0 = (1LL << 31);
 constexpr float   epsilon            = 0.00001f;
 
-Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift)
+Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon)
 {
     if(multiplier >= 1.f)
     {
@@ -46,19 +46,22 @@
     }
     else
     {
-        return calculate_quantized_multiplier_less_than_one(multiplier, quant_multiplier, shift);
+        return calculate_quantized_multiplier_less_than_one(multiplier, quant_multiplier, shift, ignore_epsilon);
     }
 }
 
 Status calculate_quantized_multiplier_less_than_one(float    multiplier,
                                                     int32_t *quant_multiplier,
-                                                    int32_t *right_shift)
+                                                    int32_t *right_shift,
+                                                    bool     ignore_epsilon)
 {
+    const float internal_epsilon = ignore_epsilon ? 0.0f : epsilon;
+
     ARM_COMPUTE_RETURN_ERROR_ON(quant_multiplier == nullptr);
     ARM_COMPUTE_RETURN_ERROR_ON(right_shift == nullptr);
-    ARM_COMPUTE_RETURN_ERROR_ON(multiplier < -epsilon);
-    ARM_COMPUTE_RETURN_ERROR_ON(multiplier > 1.0f + epsilon);
-    if(std::fabs(0.0f - multiplier) < epsilon)
+    ARM_COMPUTE_RETURN_ERROR_ON(multiplier < -internal_epsilon);
+    ARM_COMPUTE_RETURN_ERROR_ON(multiplier > 1.0f + internal_epsilon);
+    if(std::fabs(0.0f - multiplier) < internal_epsilon)
     {
         *quant_multiplier = 0;
         *right_shift      = 0;
@@ -75,6 +78,13 @@
         q_fixed /= 2;
         --*right_shift;
     }
+
+    if(ignore_epsilon && *right_shift > 31)
+    {
+        *right_shift = 0;
+        q_fixed      = 0;
+    }
+
     ARM_COMPUTE_RETURN_ERROR_ON(*right_shift < 0);
     ARM_COMPUTE_RETURN_ERROR_ON(q_fixed > std::numeric_limits<int32_t>::max());
     *quant_multiplier = static_cast<int32_t>(q_fixed);
diff --git a/src/runtime/NEON/functions/NEQLSTMLayer.cpp b/src/runtime/NEON/functions/NEQLSTMLayer.cpp
index 3aa77b2..b02fab2 100644
--- a/src/runtime/NEON/functions/NEQLSTMLayer.cpp
+++ b/src/runtime/NEON/functions/NEQLSTMLayer.cpp
@@ -341,7 +341,7 @@
     _output_gate.allocator()->allocate();
     _input_gate.allocator()->allocate();
     const float hidden_state_scale = std::pow(2, -15) / lstm_params.hidden_state_scale() * std::pow(2, -15);
-    quantization::calculate_quantized_multiplier(hidden_state_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift);
+    quantization::calculate_quantized_multiplier(hidden_state_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift, /* ignore_epsilon */ true);
     gemmlowp_info.gemmlowp_offset  = lstm_params.hidden_state_zero();
     gemmlowp_info.output_data_type = output_state_in->info()->data_type();
     _hidden_outstage.configure(&_hidden_mul_res, nullptr, output_state_out, gemmlowp_info);
@@ -622,7 +622,7 @@
     const TensorInfo hidden_mul_res(TensorShape(num_units, batch_size), 1, DataType::S32);
     ARM_COMPUTE_RETURN_ON_ERROR(NEPixelWiseMultiplicationKernel::validate(&output_gate_info, &input_gate_info, &hidden_mul_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO));
     const float hidden_state_scale = std::pow(2, -15) / lstm_params.hidden_state_scale() * std::pow(2, -15);
-    ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(hidden_state_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift));
+    ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(hidden_state_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift, /* ignore_epsilon */ true));
     gemmlowp_info.gemmlowp_offset = lstm_params.hidden_state_zero();
     ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpOutputStage::validate(&hidden_mul_res, nullptr, output_state_out, gemmlowp_info));