COMPMID-2235: Extend type support for CL/NEON DequantizationLayer.

Adds support for:
- QSYMM8

Change-Id: Ia0b839fc844ce0f968dad1b69a001f9a660dbcd5
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1378
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Manuel Bottini <manuel.bottini@arm.com>
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
diff --git a/src/core/CL/CLHelpers.cpp b/src/core/CL/CLHelpers.cpp
index f4ceca8..2e6ceb4 100644
--- a/src/core/CL/CLHelpers.cpp
+++ b/src/core/CL/CLHelpers.cpp
@@ -37,11 +37,11 @@
     switch(dt)
     {
         case DataType::U8:
-            return "uchar";
-        case DataType::S8:
-            return "char";
         case DataType::QASYMM8:
             return "uchar";
+        case DataType::S8:
+        case DataType::QSYMM8:
+            return "char";
         case DataType::U16:
             return "ushort";
         case DataType::S16:
@@ -69,11 +69,11 @@
     switch(dt)
     {
         case DataType::U8:
-            return "uchar";
-        case DataType::S8:
-            return "char";
         case DataType::QASYMM8:
             return "uchar";
+        case DataType::S8:
+        case DataType::QSYMM8:
+            return "char";
         case DataType::U16:
             return "ushort";
         case DataType::F16:
@@ -100,6 +100,7 @@
     {
         case DataType::U8:
         case DataType::S8:
+        case DataType::QSYMM8:
         case DataType::QASYMM8:
             return "8";
         case DataType::U16:
@@ -241,6 +242,7 @@
         case DataType::U8:
         case DataType::S8:
         case DataType::QASYMM8:
+        case DataType::QSYMM8:
             return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR>();
         case DataType::U16:
         case DataType::S16:
diff --git a/src/core/CL/cl_kernels/dequantization_layer.cl b/src/core/CL/cl_kernels/dequantization_layer.cl
index 7307700..ad3ed35 100644
--- a/src/core/CL/cl_kernels/dequantization_layer.cl
+++ b/src/core/CL/cl_kernels/dequantization_layer.cl
@@ -23,16 +23,17 @@
  */
 #include "helpers.h"
 
-#if defined(VEC_SIZE) && defined(DATA_TYPE) && defined(SCALE) && defined(OFFSET)
+#if defined(VEC_SIZE) && defined(DATA_TYPE_SRC) && defined(DATA_TYPE_DST) && defined(SCALE) && defined(OFFSET)
 
 /** This performs the dequantization of 8-bit unsigned integers to floating point.
  *
- * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float
+ * @note Source datatype should be given as a preprocessor argument using -DDATA_TYPE_SRC=type. e.g. -DDATA_TYPE_SRC=char
+ * @note Destination datatype should be given as a preprocessor argument using -DDATA_TYPE_DST=type. e.g. -DDATA_TYPE_DST=float
  * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
  * @note Quantization scale of input tensor is passed in with -DSCALE=scale.
  * @note Quantization offset of input tensor is passed in with -DOFFSET=offset.
  *
- * @param[in]  input_ptr                            Pointer to the source tensor. Supported data types: QASYMM8
+ * @param[in]  input_ptr                            Pointer to the source tensor. Supported data types: QASYMM8/QSYMM8
  * @param[in]  input_stride_x                       Stride of the source tensor in X dimension (in bytes)
  * @param[in]  input_step_x                         input_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  input_stride_y                       Stride of the source tensor in Y dimension (in bytes)
@@ -66,7 +67,7 @@
 
     // Load data
     VEC_DATA_TYPE(int, VEC_SIZE)
-    val = CONVERT(VLOAD(VEC_SIZE)(0, (__global uchar *)input.ptr), VEC_DATA_TYPE(int, VEC_SIZE));
+    val = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_SRC *)input.ptr), VEC_DATA_TYPE(int, VEC_SIZE));
 
     // Create scale and offset vectors
     const VEC_DATA_TYPE(float, VEC_SIZE)
@@ -81,10 +82,10 @@
 
     // Store result
     VSTORE(VEC_SIZE)
-    (CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), 0, (__global DATA_TYPE *)output.ptr);
+    (CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_DST, VEC_SIZE)), 0, (__global DATA_TYPE_DST *)output.ptr);
 #else  // !defined(VEC_SIZE) || !defined(LAST_ACCESSED_X)
-    *((__global DATA_TYPE *)(output.ptr)) = (DATA_TYPE)((float)((int)(*((__global uchar *)(input.ptr))) - (int)(OFFSET)) * (float)(SCALE));
+    *((__global DATA_TYPE_DST *)(output.ptr)) = (DATA_TYPE)((float)((int)(*((__global DATA_TYPE_SRC *)(input.ptr))) - (int)(OFFSET)) * (float)(SCALE));
 #endif // defined(VEC_SIZE) && defined(LAST_ACCESSED_X)
 }
 
-#endif // defined(VEC_SIZE) && defined(DATA_TYPE) && defined(SCALE) && defined(OFFSET)
\ No newline at end of file
+#endif // defined(VEC_SIZE) && defined(DATA_TYPE_SRC) && defined(DATA_TYPE_DST) && defined(SCALE) && defined(OFFSET)
diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
index 0b06683..e383bc4 100644
--- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
@@ -40,7 +40,7 @@
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QSYMM8);
 
     if(output->tensor_shape().total_size() > 0)
     {
@@ -95,15 +95,19 @@
     }
     ICLKernel::configure_internal(win);
 
-    const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform();
+    const UniformQuantizationInfo qinfo   = input->info()->quantization_info().uniform();
+    const int                     qoffset = is_data_type_quantized_asymmetric(input->info()->data_type()) ? qinfo.offset : 0;
 
     // Create kernel
     CLBuildOptions build_opts;
     build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(qinfo.scale));
-    build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(qinfo.offset));
+    build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(qoffset));
     build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
-    build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type()));
+    build_opts.add_option("-DDATA_TYPE_SRC=" + get_cl_type_from_data_type(input->info()->data_type()));
+    build_opts.add_option("-DDATA_TYPE_DST=" + get_cl_type_from_data_type(output->info()->data_type()));
     build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max<int>(output_width_x - vec_size_x, 0)));
+
+    // Create kernel name
     _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("dequantization_layer", build_opts.options()));
 }