COMPMID-2408: Add QSYMM16 support for ElementwiseAddition for NEON

Change-Id: I22991e9369ffba9b51a94522ff4977933e887b94
Signed-off-by: Manuel Bottini <manuel.bottini@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1352
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com>
diff --git a/tests/AssetsLibrary.h b/tests/AssetsLibrary.h
index 366c145..5c8019b 100644
--- a/tests/AssetsLibrary.h
+++ b/tests/AssetsLibrary.h
@@ -646,6 +646,7 @@
             break;
         }
         case DataType::S16:
+        case DataType::QSYMM16:
         {
             std::uniform_int_distribution<int16_t> distribution_s16(std::numeric_limits<int16_t>::lowest(), std::numeric_limits<int16_t>::max());
             fill(tensor, distribution_s16, seed_offset);
@@ -745,6 +746,7 @@
             break;
         }
         case DataType::S16:
+        case DataType::QSYMM16:
         {
             const auto                         converted_pairs = detail::convert_range_pair<int16_t>(excluded_range_pairs);
             RangedUniformDistribution<int16_t> distribution_s16(std::numeric_limits<int16_t>::lowest(),
@@ -820,6 +822,7 @@
             break;
         }
         case DataType::S16:
+        case DataType::QSYMM16:
         {
             ARM_COMPUTE_ERROR_ON(!(std::is_same<int16_t, D>::value));
             std::uniform_int_distribution<int16_t> distribution_s16(low, high);
diff --git a/tests/Utils.h b/tests/Utils.h
index d6e4a88..a14b30b 100644
--- a/tests/Utils.h
+++ b/tests/Utils.h
@@ -363,6 +363,7 @@
             *reinterpret_cast<uint16_t *>(ptr) = value;
             break;
         case DataType::S16:
+        case DataType::QSYMM16:
             *reinterpret_cast<int16_t *>(ptr) = value;
             break;
         case DataType::U32:
diff --git a/tests/validation/Helpers.cpp b/tests/validation/Helpers.cpp
index 31d6bfa..360859e 100644
--- a/tests/validation/Helpers.cpp
+++ b/tests/validation/Helpers.cpp
@@ -132,6 +132,32 @@
     return dst;
 }
 
+template <>
+SimpleTensor<int16_t> convert_to_symmetric(const SimpleTensor<float> &src, const QuantizationInfo &quantization_info)
+{
+    SimpleTensor<int16_t>          dst{ src.shape(), DataType::QSYMM16, 1, quantization_info };
+    const UniformQuantizationInfo &qinfo = quantization_info.uniform();
+
+    for(int i = 0; i < src.num_elements(); ++i)
+    {
+        dst[i] = quantize_qsymm16(src[i], qinfo);
+    }
+    return dst;
+}
+
+template <>
+SimpleTensor<float> convert_from_symmetric(const SimpleTensor<int16_t> &src)
+{
+    const UniformQuantizationInfo &quantization_info = src.quantization_info().uniform();
+    SimpleTensor<float>            dst{ src.shape(), DataType::F32, 1, QuantizationInfo(), src.data_layout() };
+
+    for(int i = 0; i < src.num_elements(); ++i)
+    {
+        dst[i] = dequantize_qsymm16(src[i], quantization_info);
+    }
+    return dst;
+}
+
 template <typename T>
 void matrix_multiply(const SimpleTensor<T> &a, const SimpleTensor<T> &b, SimpleTensor<T> &out)
 {
diff --git a/tests/validation/Helpers.h b/tests/validation/Helpers.h
index 2e8c667..44dd7a9 100644
--- a/tests/validation/Helpers.h
+++ b/tests/validation/Helpers.h
@@ -194,6 +194,25 @@
  */
 SimpleTensor<uint8_t> convert_to_asymmetric(const SimpleTensor<float> &src, const QuantizationInfo &quantization_info);
 
+/** Convert quantized simple tensor into float using tensor quantization information.
+ *
+ * @param[in] src Quantized tensor.
+ *
+ * @return Float tensor.
+ */
+template <typename T>
+SimpleTensor<float> convert_from_symmetric(const SimpleTensor<T> &src);
+
+/** Convert float simple tensor into quantized using specified quantization information.
+ *
+ * @param[in] src               Float tensor.
+ * @param[in] quantization_info Quantification information.
+ *
+ * @return Quantized tensor.
+ */
+template <typename T>
+SimpleTensor<T> convert_to_symmetric(const SimpleTensor<float> &src, const QuantizationInfo &quantization_info);
+
 /** Matrix multiply between 2 float simple tensors
  *
  * @param[in]  a   Input tensor A
diff --git a/tests/validation/NEON/ArithmeticAddition.cpp b/tests/validation/NEON/ArithmeticAddition.cpp
index 4a72dfc..8d8a327 100644
--- a/tests/validation/NEON/ArithmeticAddition.cpp
+++ b/tests/validation/NEON/ArithmeticAddition.cpp
@@ -45,7 +45,7 @@
 {
 #ifndef __aarch64__
 constexpr AbsoluteTolerance<float> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
-#endif //__aarch64__
+#endif                                                   //__aarch64__
 
 /** Input data sets **/
 const auto ArithmeticAdditionU8Dataset = combine(combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U8)), framework::dataset::make("DataType",
@@ -60,6 +60,8 @@
                                                    framework::dataset::make("DataType", DataType::F32));
 const auto ArithmeticAdditionQASYMM8Dataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::QASYMM8)),
                                                       framework::dataset::make("DataType", DataType::QASYMM8));
+const auto ArithmeticAdditionQSYMM16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QSYMM16), framework::dataset::make("DataType", DataType::QSYMM16)),
+                                                      framework::dataset::make("DataType", DataType::QSYMM16));
 } // namespace
 
 TEST_SUITE(NEON)
@@ -275,9 +277,9 @@
                        framework::DatasetMode::PRECOMMIT,
                        combine(combine(combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionQASYMM8Dataset),
                                                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
-                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })),
-                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
-                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) })))
+                                               framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
+                                       framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+                               framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })))
 {
     // Validate output
 #ifdef __aarch64__
@@ -287,6 +289,42 @@
 #endif //__aarch64__
 }
 TEST_SUITE_END() // QASYMM8
+TEST_SUITE(QSYMM16)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
+               shape, policy)
+{
+    // Create tensors
+    Tensor ref_src1 = create_tensor<Tensor>(shape, DataType::QSYMM16);
+    Tensor ref_src2 = create_tensor<Tensor>(shape, DataType::QSYMM16);
+    Tensor dst      = create_tensor<Tensor>(shape, DataType::QSYMM16);
+
+    // Create and Configure function
+    NEArithmeticAddition add;
+    add.configure(&ref_src1, &ref_src2, &dst, policy);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    validate(ref_src1.info()->padding(), PaddingSize());
+    validate(ref_src2.info()->padding(), PaddingSize());
+    validate(dst.info()->padding(), PaddingSize());
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall,
+                       NEArithmeticAdditionQuantizedFixture<int16_t>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionQSYMM16Dataset),
+                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
+                                               framework::dataset::make("Src0QInfo", { QuantizationInfo(1.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
+                                       framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
+                               framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END() // QSYMM16
 TEST_SUITE_END() // Quantized
 
 TEST_SUITE_END() // ArithmeticAddition
diff --git a/tests/validation/reference/ArithmeticOperations.cpp b/tests/validation/reference/ArithmeticOperations.cpp
index a6205af..abd4f31 100644
--- a/tests/validation/reference/ArithmeticOperations.cpp
+++ b/tests/validation/reference/ArithmeticOperations.cpp
@@ -124,8 +124,32 @@
     }
 }
 
-template SimpleTensor<int16_t> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<int16_t> &src1, const SimpleTensor<int16_t> &src2, SimpleTensor<int16_t> &dst,
-                                                    ConvertPolicy convert_policy);
+template <>
+SimpleTensor<int16_t> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<int16_t> &src1, const SimpleTensor<int16_t> &src2, SimpleTensor<int16_t> &dst, ConvertPolicy convert_policy)
+{
+    Coordinates id_src1{};
+    Coordinates id_src2{};
+    Coordinates id_dst{};
+
+    if(dst.data_type() == DataType::QSYMM16)
+    {
+        SimpleTensor<float> src1_tmp = convert_from_symmetric<int16_t>(src1);
+        SimpleTensor<float> src2_tmp = convert_from_symmetric<int16_t>(src2);
+        SimpleTensor<float> dst_tmp(TensorShape::broadcast_shape(src1.shape(), src2.shape()), dst.data_type());
+
+        BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1_tmp, src2_tmp, dst_tmp, convert_policy, id_src1, id_src2, id_dst);
+
+        dst = convert_to_symmetric<int16_t>(dst_tmp, dst.quantization_info());
+        return dst;
+    }
+    else
+    {
+        // DataType::S16
+        BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1, src2, dst, convert_policy, id_src1, id_src2, id_dst);
+        return dst;
+    }
+}
+
 template SimpleTensor<int8_t> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<int8_t> &src1, const SimpleTensor<int8_t> &src2, SimpleTensor<int8_t> &dst, ConvertPolicy convert_policy);
 template SimpleTensor<half> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<half> &src1, const SimpleTensor<half> &src2, SimpleTensor<half> &dst, ConvertPolicy convert_policy);
 template SimpleTensor<float> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<float> &src1, const SimpleTensor<float> &src2, SimpleTensor<float> &dst, ConvertPolicy convert_policy);
@@ -133,7 +157,7 @@
 template <typename T>
 SimpleTensor<T> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, DataType dst_data_type, ConvertPolicy convert_policy)
 {
-    ARM_COMPUTE_ERROR_ON_MSG(dst_data_type == DataType::QASYMM8, "For QASYMM8, the quantized output tensor should be passed directly.");
+    ARM_COMPUTE_ERROR_ON_MSG(is_data_type_quantized(dst_data_type), "For quantized input data types, the quantized output tensor should be passed directly.");
 
     SimpleTensor<T> dst(TensorShape::broadcast_shape(src1.shape(), src2.shape()), dst_data_type);
     arithmetic_operation<T>(op, src1, src2, dst, convert_policy);