COMPMID-3281: Implement QSYMM16 Layer Normalization for NEON QLSTM

- Reference kernel is modified to use the same algorithm as NEON kernel.
- NEON kernel is implemented.
- Tests for validation and run are added.

Change-Id: I3533bc2bd12c6e9cc75d837ecf193f74ceddf796
Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2948
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
diff --git a/arm_compute/core/utils/quantization/AsymmHelpers.h b/arm_compute/core/utils/quantization/AsymmHelpers.h
index 0f0ec72..a7bbf9b 100644
--- a/arm_compute/core/utils/quantization/AsymmHelpers.h
+++ b/arm_compute/core/utils/quantization/AsymmHelpers.h
@@ -128,7 +128,7 @@
  *
  * @return The multiplied value
  */
-int32_t multiply_by_quantized_multipler(int32_t input, int32_t qmul, int32_t shift);
+int32_t multiply_by_quantized_multiplier(int32_t input, int32_t qmul, int32_t shift);
 
 /** Compute the value multiplied the power-of-two
  *
@@ -137,7 +137,18 @@
  *
  * @return The multiplied value
  */
-int32_t saturating_rounding_multiply_by_pow2(int exponent, int32_t v);
+int32_t saturating_rounding_multiply_by_pow2(int32_t exponent, int32_t v);
+
+/** Compute quantized multiplier and shift for the inverse square root of input.
+ *  Using 3-bit fixed point and 5 iteration of Newton-Raphson method.
+ *
+ * @param[in]  input           Input to use
+ * @param[in]  reverse_shift   -1 to reverse the shift direction
+ * @param[out] output_inv_sqrt Quantized multiplier for inverse square root
+ * @param[out] output_shift    Shift for inverse square root
+ *
+ */
+void get_invsqrt_quantized_multiplier_exp(int32_t input, int32_t reverse_shift, int32_t &output_inv_sqrt, int32_t &output_shift);
 
 } // namespace quantization
 } // namespace arm_compute