COMPMID-2407: Add (logistic and tanh) activation support for QSYMM16 for NEON

Change-Id: Ib89c9cfe12975e51d1710af736c73ce79e667363
Signed-off-by: giuros01 <giuseppe.rossini@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1412
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Manuel Bottini <manuel.bottini@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
diff --git a/arm_compute/core/NEON/NESymm.h b/arm_compute/core/NEON/NESymm.h
index 0479753..364a317 100644
--- a/arm_compute/core/NEON/NESymm.h
+++ b/arm_compute/core/NEON/NESymm.h
@@ -102,5 +102,52 @@
 
     return out_s16;
 }
+
+/** Dequantize a neon vector holding 8 16-bit quantized values.
+ *
+ * @param[in] qv    Input values to be dequantized.
+ * @param[in] scale Quantization scale
+ *
+ * @return Dequantized values in a neon vector
+ */
+inline float32x4x2_t vdequantize_int16(const int16x8_t &qv, float scale)
+{
+    const float32x4_t   vscale = vdupq_n_f32(scale);
+    const float32x4x2_t vdequantized_input =
+    {
+        {
+            vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(qv))), vscale),
+            vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(qv))), vscale)
+        }
+    };
+    return vdequantized_input;
+}
+
+/** Quantize a neon vector holding 8 floating point values.
+ *
+ * @param[in] qv    Input values to be quantized.
+ * @param[in] scale Quantization scale
+ *
+ * @return A neon vector holding the quantized values
+ */
+inline int16x8_t vquantize_int16(const float32x4x2_t &qv, float scale)
+{
+    const float32x4_t vinvscale = vdupq_n_f32(1.f / scale);
+
+    const int32x4x2_t rf =
+    {
+        {
+#ifdef __aarch64__
+            vcvtnq_s32_f32(vmulq_f32(qv.val[0], vinvscale)),
+            vcvtnq_s32_f32(vmulq_f32(qv.val[1], vinvscale))
+#else  //__aarch64__
+            vcvtq_s32_f32(vmulq_f32(qv.val[0], vinvscale)),
+            vcvtq_s32_f32(vmulq_f32(qv.val[1], vinvscale))
+#endif //__aarch64__
+        }
+    };
+    return vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1]));
+}
+
 } // namespace arm_compute
 #endif // __ARM_COMPUTE_NESYMM_H__