Enable quantized data types for  CpuElementwiseUnary on Armv7a

- Adding fallback functions neon_qasymm8_signed_elementwise_unary() and
  neon_qasymm8_elementwise_unary()
- They would be called in case target is not aarch64

Resolves: COMPMID-5994
Change-Id: Id0db1e7cb0fe92f1eaef0b3a9ed2bea01b3f2a15
Signed-off-by: Ramy Elgammal <ramy.elgammal@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9416
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/tests/validation/NEON/ElementwiseExpLayer.cpp b/tests/validation/NEON/ElementwiseExpLayer.cpp
index e8940c5..31cd786 100644
--- a/tests/validation/NEON/ElementwiseExpLayer.cpp
+++ b/tests/validation/NEON/ElementwiseExpLayer.cpp
@@ -46,8 +46,15 @@
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 RelativeTolerance<float> tolerance_fp16(0.01f);
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+#if defined(__aarch64__)
 constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0);
 constexpr AbsoluteTolerance<int8_t>  tolerance_qasymm8_signed(0);
+#else  // #if !defined(__aarch64__)
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); // There is difference of 1, because quantizing in reference uses round policy "TO_NEAREST_UP", where the armv7a neon kernel uses "TO_ZERO"
+constexpr AbsoluteTolerance<int8_t>  tolerance_qasymm8_signed(1);
+#endif // #if !defined(__aarch64__)
+
 } // namespace
 TEST_SUITE(NEON)
 TEST_SUITE(ExpLayer)
@@ -111,7 +118,6 @@
     validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
 }
 TEST_SUITE_END() // QASYMM8_SIGNED
-
 TEST_SUITE_END() // Quantized
 
 TEST_SUITE_END() // ExpLayer