COMPMID-2225: Add interface support for new quantized data types.

Add support for:
-QSYMM8, 8-bit quantized symmetric
-QSYMM8_PER_CHANNEL, 8-bit quantized symmetric with per channel quantization

Change-Id: I00c4ff98e44af37419470af61419ee95d0de2463
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1236
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp
index d601dfc..65e6561 100644
--- a/src/core/CL/kernels/CLActivationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp
@@ -122,42 +122,43 @@
     int                a_const_int                       = 0;
     int                b_const_int                       = 0;
 
+    const bool is_quantized_asymmetric = is_data_type_quantized_asymmetric(dt);
     // Create quantized version of constants a, b if needed
-    if(is_data_type_quantized(dt))
+    if(is_quantized_asymmetric)
     {
-        a_const_int = input->info()->quantization_info().quantize(a_const, RoundingPolicy::TO_NEAREST_UP);
-        b_const_int = input->info()->quantization_info().quantize(b_const, RoundingPolicy::TO_NEAREST_UP);
+        const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+        a_const_int                           = quantize_qasymm8(a_const, iq_info);
+        b_const_int                           = quantize_qasymm8(b_const, iq_info);
     }
 
-    const bool is_logistic_activation_quantized = is_data_type_quantized_asymmetric(dt) && act_info.activation() == ActivationLayerInfo::ActivationFunction::LOGISTIC;
+    const bool is_logistic_activation_quantized = is_quantized_asymmetric && act_info.activation() == ActivationLayerInfo::ActivationFunction::LOGISTIC;
     // Set build options
     CLBuildOptions build_opts;
     build_opts.add_option_if(!is_logistic_activation_quantized, "-DACT=" + lower_string(string_from_activation_func(act_info.activation())));
     build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(dt)));
     build_opts.add_option(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
 
-    if(is_data_type_quantized(dt))
+    if(is_quantized_asymmetric)
     {
         build_opts.add_option(("-DA_VAL=" + support::cpp11::to_string(a_const_int)));
         build_opts.add_option(("-DB_VAL=" + support::cpp11::to_string(b_const_int)));
 
-        const int   o1 = input->info()->quantization_info().offset;
-        const float s1 = input->info()->quantization_info().scale;
+        const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+
         // Quantized value of 0 corresponds to the offset o1
-        build_opts.add_option(("-DCONST_0=" + support::cpp11::to_string(o1)));
-        build_opts.add_option(("-DS1_VAL=" + float_to_string_with_full_precision(s1)));
-        build_opts.add_option(("-DO1_VAL=" + support::cpp11::to_string(o1)));
+        build_opts.add_option(("-DCONST_0=" + support::cpp11::to_string(iq_info.offset)));
+        build_opts.add_option(("-DS1_VAL=" + float_to_string_with_full_precision(iq_info.scale)));
+        build_opts.add_option(("-DO1_VAL=" + support::cpp11::to_string(iq_info.offset)));
 
         // Set scale and offset of the input and output if they have different quantization info
-        if(is_data_type_quantized_asymmetric(dt) && output != nullptr)
+        if(is_quantized_asymmetric && output != nullptr)
         {
-            const float s2 = output->info()->quantization_info().scale;
-            const int   o2 = output->info()->quantization_info().offset;
+            const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
 
-            if(o1 != o2 || s1 != s2)
+            if(iq_info != oq_info)
             {
-                build_opts.add_option(("-DS2_VAL=" + float_to_string_with_full_precision(s2)));
-                build_opts.add_option(("-DO2_VAL=" + support::cpp11::to_string(o2)));
+                build_opts.add_option(("-DS2_VAL=" + float_to_string_with_full_precision(oq_info.scale)));
+                build_opts.add_option(("-DO2_VAL=" + support::cpp11::to_string(oq_info.offset)));
             }
         }
     }
@@ -171,7 +172,7 @@
 
     // Create kernel
     std::string kernel_name = std::string("activation_layer");
-    if(is_data_type_quantized_asymmetric(dt))
+    if(is_quantized_asymmetric)
     {
         kernel_name += is_logistic_activation_quantized ? std::string("_logistic_qa8") : std::string("_qa8");
     }
diff --git a/src/core/CL/kernels/CLComparisonKernel.cpp b/src/core/CL/kernels/CLComparisonKernel.cpp
index 4f44851..628f9f1 100644
--- a/src/core/CL/kernels/CLComparisonKernel.cpp
+++ b/src/core/CL/kernels/CLComparisonKernel.cpp
@@ -134,10 +134,13 @@
     build_opts.emplace("-DOP_NAME=" + lower_string(operation_name));
     if(is_data_type_quantized_asymmetric(input1->info()->data_type()))
     {
-        build_opts.emplace("-DOFFSET_IN1=" + support::cpp11::to_string(input1->info()->quantization_info().offset));
-        build_opts.emplace("-DOFFSET_IN2=" + support::cpp11::to_string(input2->info()->quantization_info().offset));
-        build_opts.emplace("-DSCALE_IN1=" + float_to_string_with_full_precision(input1->info()->quantization_info().scale));
-        build_opts.emplace("-DSCALE_IN2=" + float_to_string_with_full_precision(input2->info()->quantization_info().scale));
+        const UniformQuantizationInfo iq1_info = input1->info()->quantization_info().uniform();
+        const UniformQuantizationInfo iq2_info = input2->info()->quantization_info().uniform();
+
+        build_opts.emplace("-DOFFSET_IN1=" + support::cpp11::to_string(iq1_info.offset));
+        build_opts.emplace("-DOFFSET_IN2=" + support::cpp11::to_string(iq2_info.offset));
+        build_opts.emplace("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale));
+        build_opts.emplace("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2_info.scale));
         kernel_name += "_quantized";
     }
 
diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
index 1cae371..5e1bbe9 100644
--- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
@@ -99,10 +99,13 @@
     build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
     if(is_data_type_quantized_asymmetric(input->info()->data_type()) && input->info()->quantization_info() != output->info()->quantization_info())
     {
-        build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().offset));
-        build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset));
-        build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().scale));
-        build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale));
+        const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+        const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
+
+        build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset));
+        build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset));
+        build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale));
+        build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale));
     }
 
     // Create kernel
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
index cd25bb1..615327a 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
@@ -251,30 +251,34 @@
 
     if(is_qasymm)
     {
-        float multiplier        = _input->info()->quantization_info().scale * _weights->info()->quantization_info().scale / _output->info()->quantization_info().scale;
+        const UniformQuantizationInfo iq_info = _input->info()->quantization_info().uniform();
+        const UniformQuantizationInfo wq_info = _weights->info()->quantization_info().uniform();
+        const UniformQuantizationInfo oq_info = _output->info()->quantization_info().uniform();
+
+        float multiplier        = iq_info.scale * wq_info.scale / oq_info.scale;
         int   output_multiplier = 0;
         int   output_shift      = 0;
         quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
 
         build_opts.add_option("-DCONV_STRIDE_Y=" + support::cpp11::to_string(_conv_stride_y));
-        build_opts.add_option("-DINPUT_OFFSET=" + support::cpp11::to_string(-_input->info()->quantization_info().offset));
-        build_opts.add_option("-DWEIGHTS_OFFSET=" + support::cpp11::to_string(-_weights->info()->quantization_info().offset));
-        build_opts.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(_output->info()->quantization_info().offset));
-        build_opts.add_option("-DK_OFFSET=" + support::cpp11::to_string(9 * input->info()->quantization_info().offset * weights->info()->quantization_info().offset));
+        build_opts.add_option("-DINPUT_OFFSET=" + support::cpp11::to_string(-iq_info.offset));
+        build_opts.add_option("-DWEIGHTS_OFFSET=" + support::cpp11::to_string(-wq_info.offset));
+        build_opts.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(oq_info.offset));
+        build_opts.add_option("-DK_OFFSET=" + support::cpp11::to_string(9 * iq_info.offset * wq_info.offset));
         build_opts.add_option("-DOUTPUT_MULTIPLIER=" + support::cpp11::to_string(output_multiplier));
         build_opts.add_option("-DOUTPUT_SHIFT=" + support::cpp11::to_string(output_shift));
 
         if(act_info.enabled())
         {
-            const int a_val = output->info()->quantization_info().quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP);
-            const int b_val = output->info()->quantization_info().quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP);
-            const int o1    = output->info()->quantization_info().offset;
+            const int a_val = quantize_qasymm8(act_info.a(), oq_info);
+            const int b_val = quantize_qasymm8(act_info.b(), oq_info);
+            const int o1    = oq_info.offset;
 
             build_opts.add_option("-DA_VAL=" + support::cpp11::to_string(a_val));
             build_opts.add_option("-DB_VAL=" + support::cpp11::to_string(b_val));
             build_opts.add_option("-DCONST_0=" + support::cpp11::to_string(o1));
 
-            const float s1 = input->info()->quantization_info().scale;
+            const float s1 = iq_info.scale;
             build_opts.add_option("-DS1_VAL=" + float_to_string_with_full_precision(s1));
             build_opts.add_option("-DO1_VAL=" + support::cpp11::to_string(o1));
         }
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
index 758e99b..e32faa1 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
@@ -213,30 +213,34 @@
 
     if(is_qasymm)
     {
-        float multiplier        = _input->info()->quantization_info().scale * _weights->info()->quantization_info().scale / _output->info()->quantization_info().scale;
+        const UniformQuantizationInfo iq_info = _input->info()->quantization_info().uniform();
+        const UniformQuantizationInfo wq_info = _weights->info()->quantization_info().uniform();
+        const UniformQuantizationInfo oq_info = _output->info()->quantization_info().uniform();
+
+        float multiplier        = iq_info.scale * wq_info.scale / oq_info.scale;
         int   output_multiplier = 0;
         int   output_shift      = 0;
         quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
 
         build_opts.add_option("-DSRC_DIM_1=" + support::cpp11::to_string(_input->info()->dimension(1)));
-        build_opts.add_option("-DINPUT_OFFSET=" + support::cpp11::to_string(-_input->info()->quantization_info().offset));
-        build_opts.add_option("-DWEIGHTS_OFFSET=" + support::cpp11::to_string(-_weights->info()->quantization_info().offset));
-        build_opts.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(_output->info()->quantization_info().offset));
-        build_opts.add_option("-DK_OFFSET=" + support::cpp11::to_string(9 * input->info()->quantization_info().offset * weights->info()->quantization_info().offset));
+        build_opts.add_option("-DINPUT_OFFSET=" + support::cpp11::to_string(-iq_info.offset));
+        build_opts.add_option("-DWEIGHTS_OFFSET=" + support::cpp11::to_string(-wq_info.offset));
+        build_opts.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(oq_info.offset));
+        build_opts.add_option("-DK_OFFSET=" + support::cpp11::to_string(9 * iq_info.offset * wq_info.offset));
         build_opts.add_option("-DOUTPUT_MULTIPLIER=" + support::cpp11::to_string(output_multiplier));
         build_opts.add_option("-DOUTPUT_SHIFT=" + support::cpp11::to_string(output_shift));
 
         if(act_info.enabled())
         {
-            const int a_val = output->info()->quantization_info().quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP);
-            const int b_val = output->info()->quantization_info().quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP);
-            const int o1    = output->info()->quantization_info().offset;
+            const int a_val = quantize_qasymm8(act_info.a(), oq_info);
+            const int b_val = quantize_qasymm8(act_info.b(), oq_info);
+            const int o1    = oq_info.offset;
 
             build_opts.add_option("-DA_VAL=" + support::cpp11::to_string(a_val));
             build_opts.add_option("-DB_VAL=" + support::cpp11::to_string(b_val));
             build_opts.add_option("-DCONST_0=" + support::cpp11::to_string(o1));
 
-            const float s1 = input->info()->quantization_info().scale;
+            const float s1 = iq_info.scale;
             build_opts.add_option("-DS1_VAL=" + float_to_string_with_full_precision(s1));
             build_opts.add_option("-DO1_VAL=" + support::cpp11::to_string(o1));
         }
diff --git a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp
index 28d4ff2..0312a57 100644
--- a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp
@@ -72,9 +72,10 @@
     _input  = input;
     _output = output;
 
-    const DataLayout data_layout = input->info()->data_layout();
-    const size_t     idx_w       = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
-    const size_t     idx_h       = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+    const DataLayout              data_layout = input->info()->data_layout();
+    const size_t                  idx_w       = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+    const size_t                  idx_h       = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+    const UniformQuantizationInfo qinfo       = input->info()->quantization_info().uniform();
 
     // Create kernel
     CLBuildOptions build_opts;
@@ -96,7 +97,7 @@
     build_opts.add_option("-D" + string_from_data_layout(input->info()->data_layout()));
     build_opts.add_option_if(has_bias, "-DHAS_BIAS");
     build_opts.add_option_if_else(is_data_type_quantized_asymmetric(input->info()->data_type()),
-                                  "-DPAD_VALUE=" + support::cpp11::to_string(input->info()->quantization_info().offset),
+                                  "-DPAD_VALUE=" + support::cpp11::to_string(qinfo.offset),
                                   "-DPAD_VALUE=0");
 
     _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("depthwise_im2col", build_opts.options()));
diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
index 78cc559..0b06683 100644
--- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
@@ -95,10 +95,12 @@
     }
     ICLKernel::configure_internal(win);
 
+    const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform();
+
     // Create kernel
     CLBuildOptions build_opts;
-    build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(input->info()->quantization_info().scale));
-    build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(input->info()->quantization_info().offset));
+    build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(qinfo.scale));
+    build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(qinfo.offset));
     build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
     build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type()));
     build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max<int>(output_width_x - vec_size_x, 0)));
diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
index 12affa9..3e158a5 100644
--- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
@@ -452,16 +452,20 @@
     // Set static kernel arguments
     if(is_data_type_quantized_asymmetric(data_type))
     {
+        const UniformQuantizationInfo iqinfo = _input->info()->quantization_info().uniform();
+        const UniformQuantizationInfo wqinfo = _weights->info()->quantization_info().uniform();
+        const UniformQuantizationInfo oqinfo = _output->info()->quantization_info().uniform();
+
         int output_multiplier = 0;
         int output_shift      = 0;
 
-        float multiplier = _input->info()->quantization_info().scale * _weights->info()->quantization_info().scale / _output->info()->quantization_info().scale;
+        float multiplier = iqinfo.scale * wqinfo.scale / oqinfo.scale;
         ARM_COMPUTE_THROW_ON_ERROR(quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift));
 
         unsigned int idx = 3 * num_arguments_per_3D_tensor() + ((_biases != nullptr) ? num_arguments_per_1D_tensor() : 0) + 1;
-        _kernel.setArg(idx++, -_input->info()->quantization_info().offset);
-        _kernel.setArg(idx++, -_weights->info()->quantization_info().offset);
-        _kernel.setArg(idx++, _output->info()->quantization_info().offset);
+        _kernel.setArg(idx++, -iqinfo.offset);
+        _kernel.setArg(idx++, -wqinfo.offset);
+        _kernel.setArg(idx++, oqinfo.offset);
         _kernel.setArg(idx++, output_multiplier);
         _kernel.setArg(idx++, output_shift);
     }
diff --git a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
index 414b040..1d9c715 100644
--- a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
+++ b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
@@ -134,12 +134,16 @@
     build_opts.add_option("-DOP=" + operation_string);
     if(is_data_type_quantized_asymmetric(input1.data_type()))
     {
-        build_opts.add_option("-DOFFSET_IN1=" + support::cpp11::to_string(input1.quantization_info().offset));
-        build_opts.add_option("-DOFFSET_IN2=" + support::cpp11::to_string(input2.quantization_info().offset));
-        build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(output.quantization_info().offset));
-        build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input1.quantization_info().scale));
-        build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(input2.quantization_info().scale));
-        build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output.quantization_info().scale));
+        const UniformQuantizationInfo iq1info = input1.quantization_info().uniform();
+        const UniformQuantizationInfo iq2info = input2.quantization_info().uniform();
+        const UniformQuantizationInfo oqinfo  = output.quantization_info().uniform();
+
+        build_opts.add_option("-DOFFSET_IN1=" + support::cpp11::to_string(iq1info.offset));
+        build_opts.add_option("-DOFFSET_IN2=" + support::cpp11::to_string(iq2info.offset));
+        build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(oqinfo.offset));
+        build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1info.scale));
+        build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2info.scale));
+        build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oqinfo.scale));
     }
     return build_opts;
 }
diff --git a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
index 11a4292..0ff2f13 100644
--- a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -104,9 +104,12 @@
     // Add static arguments
     if(is_quantized)
     {
+        const UniformQuantizationInfo iq0_info = _input0->info()->quantization_info().uniform();
+        const UniformQuantizationInfo iq1_info = _input1->info()->quantization_info().uniform();
+
         unsigned int idx = num_arguments_per_3D_tensor() + num_arguments_per_2D_tensor() + num_arguments_per_1D_tensor();
-        _kernel.setArg<int>(idx++, -_input0->info()->quantization_info().offset);
-        _kernel.setArg<int>(idx++, -_input1->info()->quantization_info().offset);
+        _kernel.setArg<int>(idx++, -iq0_info.offset);
+        _kernel.setArg<int>(idx++, -iq1_info.offset);
     }
 
     // Configure kernel window
diff --git a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp
index e3f2a96..4da3e24 100644
--- a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp
@@ -133,10 +133,13 @@
 
     if(is_data_type_quantized_asymmetric(input->info()->data_type()) && input->info()->quantization_info() != output->info()->quantization_info())
     {
-        build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().offset));
-        build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset));
-        build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().scale));
-        build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale));
+        const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+        const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
+
+        build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset));
+        build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset));
+        build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale));
+        build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale));
     }
 
     // Create kernel
diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp
index 8caa927..10d6e68 100644
--- a/src/core/CL/kernels/CLIm2ColKernel.cpp
+++ b/src/core/CL/kernels/CLIm2ColKernel.cpp
@@ -162,10 +162,11 @@
     const std::pair<unsigned int, unsigned int> convolved_dims = scaled_dimensions(input_width, input_height, kernel_dims.width, kernel_dims.height, conv_info, dilation);
 
     // Im2Col configuration
-    std::string    kernel_name = "im2col_generic_";
-    CLBuildOptions build_opts;
-    unsigned int   num_elems_processed_per_iteration = 1;
-    bool           is_padding_required_nchw          = false;
+    std::string                   kernel_name = "im2col_generic_";
+    CLBuildOptions                build_opts;
+    unsigned int                  num_elems_processed_per_iteration = 1;
+    bool                          is_padding_required_nchw          = false;
+    const UniformQuantizationInfo qinfo                             = input->quantization_info().uniform();
 
     build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type));
     build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(input->element_size()));
@@ -185,7 +186,7 @@
     build_opts.add_option("-DDILATION_X=" + support::cpp11::to_string(dilation.x()));
     build_opts.add_option("-DDILATION_Y=" + support::cpp11::to_string(dilation.y()));
     build_opts.add_option_if(num_groups > 1, "-DNUM_GROUPS=" + support::cpp11::to_string(num_groups));
-    build_opts.add_option_if_else(is_data_type_quantized(data_type), "-DPAD_VALUE=" + support::cpp11::to_string(input->quantization_info().offset), "-DPAD_VALUE=0");
+    build_opts.add_option_if_else(is_data_type_quantized(data_type), "-DPAD_VALUE=" + support::cpp11::to_string(qinfo.offset), "-DPAD_VALUE=0");
     build_opts.add_option_if(has_bias, "-DHAS_BIAS");
 
     if(data_layout == DataLayout::NHWC)
diff --git a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp
index 9033016..b255ba3 100644
--- a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp
+++ b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp
@@ -123,8 +123,9 @@
     std::string kernel_name = "normalize_planar_yuv_layer_";
     if(is_data_type_quantized(dt))
     {
-        build_opts.add_option(("-DOFFSET=" + support::cpp11::to_string(input->info()->quantization_info().offset)));
-        build_opts.add_option(("-DSCALE=" + support::cpp11::to_string(input->info()->quantization_info().scale)));
+        const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform();
+        build_opts.add_option(("-DOFFSET=" + support::cpp11::to_string(qinfo.offset)));
+        build_opts.add_option(("-DSCALE=" + support::cpp11::to_string(qinfo.scale)));
         kernel_name += "q8_";
     }
 
diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
index dda9b16..050bbb8 100644
--- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
+++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
@@ -181,12 +181,16 @@
     CLBuildOptions build_opts;
     if(is_quantized)
     {
-        build_opts.add_option("-DOFFSET_IN1=" + support::cpp11::to_string(input1->info()->quantization_info().offset));
-        build_opts.add_option("-DOFFSET_IN2=" + support::cpp11::to_string(input2->info()->quantization_info().offset));
-        build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(output->info()->quantization_info().offset));
-        build_opts.add_option("-DSCALE_IN1=" + support::cpp11::to_string(input1->info()->quantization_info().scale));
-        build_opts.add_option("-DSCALE_IN2=" + support::cpp11::to_string(input2->info()->quantization_info().scale));
-        build_opts.add_option("-DSCALE_OUT=" + support::cpp11::to_string(output->info()->quantization_info().scale));
+        const UniformQuantizationInfo iq1_info = input1->info()->quantization_info().uniform();
+        const UniformQuantizationInfo iq2_info = input2->info()->quantization_info().uniform();
+        const UniformQuantizationInfo oq_info  = output->info()->quantization_info().uniform();
+
+        build_opts.add_option("-DOFFSET_IN1=" + support::cpp11::to_string(iq1_info.offset));
+        build_opts.add_option("-DOFFSET_IN2=" + support::cpp11::to_string(iq2_info.offset));
+        build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(oq_info.offset));
+        build_opts.add_option("-DSCALE_IN1=" + support::cpp11::to_string(iq1_info.scale));
+        build_opts.add_option("-DSCALE_IN2=" + support::cpp11::to_string(iq2_info.scale));
+        build_opts.add_option("-DSCALE_OUT=" + support::cpp11::to_string(oq_info.scale));
         kernel_name += "_quantized";
     }
     else
diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
index 7ccbda9..8eaf5bf 100644
--- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
@@ -205,10 +205,13 @@
 
     if(is_data_type_quantized_asymmetric(input->info()->data_type()) && input->info()->quantization_info() != output->info()->quantization_info())
     {
-        build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().offset));
-        build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset));
-        build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().scale));
-        build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale));
+        const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+        const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
+
+        build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset));
+        build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset));
+        build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale));
+        build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale));
     }
 
     // Check output dimensions
diff --git a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
index 374b22e..22d4e33 100644
--- a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
@@ -93,10 +93,12 @@
     }
     ICLKernel::configure_internal(win);
 
+    const UniformQuantizationInfo qinfo = output->info()->quantization_info().uniform();
+
     // Create kernel
     CLBuildOptions build_opts;
-    build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(output->info()->quantization_info().scale));
-    build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(output->info()->quantization_info().offset));
+    build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(qinfo.scale));
+    build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(qinfo.offset));
     build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
     build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
     build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max<int>(input_width_x - vec_size_x, 0)));
diff --git a/src/core/CL/kernels/CLRangeKernel.cpp b/src/core/CL/kernels/CLRangeKernel.cpp
index eb8822b..a22f5cb 100644
--- a/src/core/CL/kernels/CLRangeKernel.cpp
+++ b/src/core/CL/kernels/CLRangeKernel.cpp
@@ -116,8 +116,9 @@
     build_opts.add_option("-DSTEP=" + support::cpp11::to_string(step));
     if(is_data_type_quantized_asymmetric(output->info()->data_type()))
     {
-        build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(output->info()->quantization_info().offset));
-        build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale));
+        const UniformQuantizationInfo qinfo = output->info()->quantization_info().uniform();
+        build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(qinfo.offset));
+        build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(qinfo.scale));
         kernel_name += "_quantized";
     }
     // Create kernel
diff --git a/src/core/CL/kernels/CLScaleKernel.cpp b/src/core/CL/kernels/CLScaleKernel.cpp
index cd89d1c..488313f 100644
--- a/src/core/CL/kernels/CLScaleKernel.cpp
+++ b/src/core/CL/kernels/CLScaleKernel.cpp
@@ -206,8 +206,9 @@
     build_opts.add_option_if_else(sampling_policy == SamplingPolicy::CENTER, "-DSAMPLING_POLICY_CENTER", "-DSAMPLING_POLICY_TOP_LEFT");
     if(call_quantized_kernel)
     {
-        build_opts.add_option("-DSCALE=" + support::cpp11::to_string(input->info()->quantization_info().scale));
-        build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(input->info()->quantization_info().offset));
+        const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform();
+        build_opts.add_option("-DSCALE=" + support::cpp11::to_string(qinfo.scale));
+        build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(qinfo.offset));
     }
 
     std::string interpolation_name = string_from_interpolation_policy(policy);
diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
index e2d9881..a9c0870 100644
--- a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
@@ -233,15 +233,16 @@
     _output = output;
     _sum    = sum;
 
-    const DataType dt                 = input->info()->data_type();
-    const size_t   reduction_dim_size = input->info()->dimension(0);
+    const DataType                dt                 = input->info()->data_type();
+    const UniformQuantizationInfo qinfo              = input->info()->quantization_info().uniform();
+    const size_t                  reduction_dim_size = input->info()->dimension(0);
 
     // Set build options
     CLBuildOptions build_opts;
     build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(dt));
     build_opts.add_option_if(dt == DataType::F16, "-DUSE_F16");
     build_opts.add_option_if(is_data_type_float(dt) && (beta != 1.0f), "-DBETA=" + float_to_string_with_full_precision(beta));
-    build_opts.add_options_if(is_data_type_quantized_asymmetric(dt), prepare_quantized_softmax_build_options(input->info()->quantization_info().scale, beta).options());
+    build_opts.add_options_if(is_data_type_quantized_asymmetric(dt), prepare_quantized_softmax_build_options(qinfo.scale, beta).options());
 
     cl::NDRange lws_hint(cl::NullRange);
     std::string kernel_name = is_data_type_quantized_asymmetric(dt) ? std::string("softmax_layer_max_shift_exp_sum_quantized_serial") :
@@ -338,9 +339,10 @@
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, sum, output);
 
     // Note: output should always have a scale of 1/256 and offset 0
-    const QuantizationInfo allowed_quantization_info = QuantizationInfo(1.F / 256, 0);
-    const bool             is_quantized_asymmetric   = (input->info()->data_type() == DataType::S32);
-    const DataType         output_data_type          = is_quantized_asymmetric ? DataType::QASYMM8 : input->info()->data_type();
+    const QuantizationInfo        allowed_quantization_info = QuantizationInfo(1.F / 256, 0);
+    const bool                    is_quantized_asymmetric   = (input->info()->data_type() == DataType::S32);
+    const DataType                output_data_type          = is_quantized_asymmetric ? DataType::QASYMM8 : input->info()->data_type();
+    const UniformQuantizationInfo qinfo                     = input->info()->quantization_info().uniform();
 
     // Output auto initialization if not yet initialized
     auto_init_if_empty(*output->info(),
@@ -357,7 +359,7 @@
     CLBuildOptions build_opts;
     build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
     build_opts.add_options_if(is_quantized_asymmetric,
-                              prepare_quantized_softmax_build_options(input->info()->quantization_info().scale, beta).options());
+                              prepare_quantized_softmax_build_options(qinfo.scale, beta).options());
 
     // Create kernel
     std::string kernel_name = is_quantized_asymmetric ? "softmax_layer_norm_quantized" : "softmax_layer_norm";
diff --git a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp
index 5f266c5..bd4ff2c 100644
--- a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp
+++ b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp
@@ -116,12 +116,16 @@
     const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(output->info(), input1->info(), input2->info());
     if(is_data_type_quantized_asymmetric(input1->info()->data_type()) && have_different_qinfo)
     {
-        build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input1->info()->quantization_info().offset));
-        build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input1->info()->quantization_info().scale));
-        build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(input2->info()->quantization_info().offset));
-        build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(input2->info()->quantization_info().scale));
-        build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset));
-        build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale));
+        const UniformQuantizationInfo iq1_info = input1->info()->quantization_info().uniform();
+        const UniformQuantizationInfo iq2_info = input2->info()->quantization_info().uniform();
+        const UniformQuantizationInfo oq_info  = output->info()->quantization_info().uniform();
+
+        build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq1_info.offset));
+        build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale));
+        build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(iq2_info.offset));
+        build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2_info.scale));
+        build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset));
+        build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale));
     }
 
     // Create kernel
diff --git a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp
index 54edaaf..a3ac102 100644
--- a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp
+++ b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp
@@ -138,16 +138,22 @@
     const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(output->info(), input1->info(), input2->info(), input3->info(), input4->info());
     if(is_data_type_quantized_asymmetric(input1->info()->data_type()) && have_different_qinfo)
     {
-        build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input1->info()->quantization_info().offset));
-        build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input1->info()->quantization_info().scale));
-        build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(input2->info()->quantization_info().offset));
-        build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(input2->info()->quantization_info().scale));
-        build_opts.add_option("-DOFFSET_IN3=" + float_to_string_with_full_precision(input3->info()->quantization_info().offset));
-        build_opts.add_option("-DSCALE_IN3=" + float_to_string_with_full_precision(input3->info()->quantization_info().scale));
-        build_opts.add_option("-DOFFSET_IN4=" + float_to_string_with_full_precision(input4->info()->quantization_info().offset));
-        build_opts.add_option("-DSCALE_IN4=" + float_to_string_with_full_precision(input4->info()->quantization_info().scale));
-        build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset));
-        build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale));
+        const UniformQuantizationInfo iq1_info = input1->info()->quantization_info().uniform();
+        const UniformQuantizationInfo iq2_info = input2->info()->quantization_info().uniform();
+        const UniformQuantizationInfo iq3_info = input3->info()->quantization_info().uniform();
+        const UniformQuantizationInfo iq4_info = input4->info()->quantization_info().uniform();
+        const UniformQuantizationInfo oq_info  = output->info()->quantization_info().uniform();
+
+        build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq1_info.offset));
+        build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale));
+        build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(iq2_info.offset));
+        build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2_info.scale));
+        build_opts.add_option("-DOFFSET_IN3=" + float_to_string_with_full_precision(iq3_info.offset));
+        build_opts.add_option("-DSCALE_IN3=" + float_to_string_with_full_precision(iq3_info.scale));
+        build_opts.add_option("-DOFFSET_IN4=" + float_to_string_with_full_precision(iq4_info.offset));
+        build_opts.add_option("-DSCALE_IN4=" + float_to_string_with_full_precision(iq4_info.scale));
+        build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset));
+        build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale));
     }
 
     // Create kernel
diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
index 6c32cd2..b577944 100644
--- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
@@ -109,10 +109,13 @@
 
     if(is_data_type_quantized_asymmetric(input->info()->data_type()) && input->info()->quantization_info() != output->info()->quantization_info())
     {
-        build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().offset));
-        build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset));
-        build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().scale));
-        build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale));
+        const UniformQuantizationInfo iqinfo = input->info()->quantization_info().uniform();
+        const UniformQuantizationInfo oqinfo = output->info()->quantization_info().uniform();
+
+        build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iqinfo.offset));
+        build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oqinfo.offset));
+        build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iqinfo.scale));
+        build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oqinfo.scale));
     }
 
     // Create kernel
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
index bc6a281..3f71553 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
@@ -30,7 +30,6 @@
 #include "arm_compute/core/NEON/NEFixedPoint.h"
 #include "arm_compute/core/NEON/NEMath.h"
 #include "arm_compute/core/NEON/wrapper/wrapper.h"
-#include "arm_compute/core/QAsymm8.h"
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/core/Validate.h"
@@ -320,15 +319,15 @@
     Iterator input(_input, win_collapsed);
     Iterator output(_output, win_collapsed);
 
-    const QuantizationInfo qi_in    = _input->info()->quantization_info();
-    const QuantizationInfo qi_out   = _output->info()->quantization_info();
-    const qasymm8x16_t     va       = vdupq_n_u8(sqcvt_qasymm8_f32(_act_info.a(), qi_in.scale, qi_in.offset));
-    const qasymm8x16_t     vb       = vdupq_n_u8(sqcvt_qasymm8_f32(_act_info.b(), qi_in.scale, qi_in.offset));
-    const qasymm8_t        a        = sqcvt_qasymm8_f32(_act_info.a(), qi_in.scale, qi_in.offset);
-    const qasymm8_t        b        = sqcvt_qasymm8_f32(_act_info.b(), qi_in.scale, qi_in.offset);
-    const qasymm8_t        const_0  = sqcvt_qasymm8_f32(0.f, qi_in.scale, qi_in.offset);
-    const qasymm8x16_t     vconst_0 = vdupq_n_u8(const_0);
-    const auto             vconst_1 = vdupq_n_f32(1.f);
+    const UniformQuantizationInfo qi_in    = _input->info()->quantization_info().uniform();
+    const UniformQuantizationInfo qi_out   = _output->info()->quantization_info().uniform();
+    const qasymm8x16_t            va       = vdupq_n_u8(quantize_qasymm8(_act_info.a(), qi_in));
+    const qasymm8x16_t            vb       = vdupq_n_u8(quantize_qasymm8(_act_info.b(), qi_in));
+    const qasymm8_t               a        = quantize_qasymm8(_act_info.a(), qi_in);
+    const qasymm8_t               b        = quantize_qasymm8(_act_info.b(), qi_in);
+    const qasymm8_t               const_0  = quantize_qasymm8(0.f, qi_in);
+    const qasymm8x16_t            vconst_0 = vdupq_n_u8(const_0);
+    const auto                    vconst_1 = vdupq_n_f32(1.f);
 
     // Initialise scale/offset for re-quantization
     float       s  = qi_in.scale / qi_out.scale;
@@ -415,9 +414,9 @@
             }
             else if(act == ActivationFunction::LOGISTIC)
             {
-                float tmp_f = scvt_f32_qasymm8(in, qi_in.scale, qi_in.offset);
+                float tmp_f = dequantize_qasymm8(in, qi_in);
                 tmp_f       = 1.f / (1.f + std::exp(-tmp_f));
-                tmp         = sqcvt_qasymm8_f32(tmp_f, qi_out.scale, qi_out.offset);
+                tmp         = quantize_qasymm8(tmp_f, qi_out);
             }
             else
             {
diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
index ca79a0a..164026c 100644
--- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
+++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
@@ -165,25 +165,26 @@
     const auto window_end_x          = static_cast<int>(window.x().end());
     const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0);
 
-    const float output_scale  = out->info()->quantization_info().scale;
-    const int   output_offset = out->info()->quantization_info().offset;
+    const UniformQuantizationInfo iq1_info = in1->info()->quantization_info().uniform();
+    const UniformQuantizationInfo iq2_info = in2->info()->quantization_info().uniform();
+    const UniformQuantizationInfo oq_info  = out->info()->quantization_info().uniform();
 
-    const float32x4_t vscale1    = vdupq_n_f32(in1->info()->quantization_info().scale);
-    const float32x4_t vscale2    = vdupq_n_f32(in2->info()->quantization_info().scale);
-    const float32x4_t invvscaleo = vdupq_n_f32(1.f / output_scale);
-    const int32x4_t   voffset1   = vdupq_n_s32(in1->info()->quantization_info().offset);
-    const int32x4_t   voffset2   = vdupq_n_s32(in2->info()->quantization_info().offset);
-    const float32x4_t voffseto   = vdupq_n_f32(output_offset);
+    const float32x4_t vscale1    = vdupq_n_f32(iq1_info.scale);
+    const float32x4_t vscale2    = vdupq_n_f32(iq2_info.scale);
+    const float32x4_t invvscaleo = vdupq_n_f32(1.f / oq_info.scale);
+    const int32x4_t   voffset1   = vdupq_n_s32(iq1_info.offset);
+    const int32x4_t   voffset2   = vdupq_n_s32(iq2_info.offset);
+    const float32x4_t voffseto   = vdupq_n_f32(oq_info.offset);
 
     if(is_broadcast_across_x)
     {
-        const bool             is_broadcast_input_2 = input2_win.x().step() == 0;
-        Window                 broadcast_win        = is_broadcast_input_2 ? input2_win : input1_win;
-        Window                 non_broadcast_win    = !is_broadcast_input_2 ? input2_win : input1_win;
-        const ITensor         *broadcast_tensor     = is_broadcast_input_2 ? in2 : in1;
-        const ITensor         *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1;
-        const QuantizationInfo broadcast_qinfo      = broadcast_tensor->info()->quantization_info();
-        const QuantizationInfo non_broadcast_qinfo  = non_broadcast_tensor->info()->quantization_info();
+        const bool                    is_broadcast_input_2 = input2_win.x().step() == 0;
+        Window                        broadcast_win        = is_broadcast_input_2 ? input2_win : input1_win;
+        Window                        non_broadcast_win    = !is_broadcast_input_2 ? input2_win : input1_win;
+        const ITensor                *broadcast_tensor     = is_broadcast_input_2 ? in2 : in1;
+        const ITensor                *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1;
+        const UniformQuantizationInfo broadcast_qinfo      = broadcast_tensor->info()->quantization_info().uniform();
+        const UniformQuantizationInfo non_broadcast_qinfo  = non_broadcast_tensor->info()->quantization_info().uniform();
 
         // Clear X Dimension on execution window as we handle manually
         non_broadcast_win.set(Window::DimX, Window::Dimension(0, 1, 1));
@@ -252,7 +253,7 @@
             for(; x < window_end_x; ++x)
             {
                 const float afs   = static_cast<int32_t>(*(non_broadcast_input_ptr + x) - non_broadcast_qinfo.offset) * non_broadcast_qinfo.scale;
-                *(output_ptr + x) = out->info()->quantization_info().quantize((afs + bfs), RoundingPolicy::TO_NEAREST_UP);
+                *(output_ptr + x) = quantize_qasymm8((afs + bfs), oq_info);
             }
         },
         broadcast_input, non_broadcast_input, output);
@@ -263,9 +264,6 @@
         input1_win.set(Window::DimX, Window::Dimension(0, 1, 1));
         input2_win.set(Window::DimX, Window::Dimension(0, 1, 1));
 
-        const QuantizationInfo input1_qinfo = in1->info()->quantization_info();
-        const QuantizationInfo input2_qinfo = in2->info()->quantization_info();
-
         Iterator input1(in1, input1_win);
         Iterator input2(in2, input2_win);
         Iterator output(out, win);
@@ -328,9 +326,9 @@
             // Compute left-over elements
             for(; x < window_end_x; ++x)
             {
-                const float afs   = static_cast<int32_t>((*(input1_ptr + x)) - input1_qinfo.offset) * input1_qinfo.scale;
-                const float bfs   = static_cast<int32_t>((*(input2_ptr + x)) - input2_qinfo.offset) * input2_qinfo.scale;
-                *(output_ptr + x) = out->info()->quantization_info().quantize((afs + bfs), RoundingPolicy::TO_NEAREST_UP);
+                const float afs   = static_cast<int32_t>((*(input1_ptr + x)) - iq1_info.offset) * iq1_info.scale;
+                const float bfs   = static_cast<int32_t>((*(input2_ptr + x)) - iq2_info.offset) * iq2_info.scale;
+                *(output_ptr + x) = quantize_qasymm8((afs + bfs), out->info()->quantization_info());
             }
         },
         input1, input2, output);
diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
index 45e1562..8874b52 100644
--- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
+++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
@@ -87,10 +87,14 @@
     Iterator input2(in2, window.broadcast_if_dimension_le_one(in2->info()->tensor_shape()));
     Iterator output(out, window);
 
+    const UniformQuantizationInfo iq1_info = in1->info()->quantization_info().uniform();
+    const UniformQuantizationInfo iq2_info = in2->info()->quantization_info().uniform();
+    const UniformQuantizationInfo oq_info  = out->info()->quantization_info().uniform();
+
     execute_window_loop(window, [&](const Coordinates &)
     {
-        const float32x4x4_t ta1 = vdequantize(vld1q_u8(reinterpret_cast<const qasymm8_t *>(input1.ptr())), in1->info()->quantization_info());
-        const float32x4x4_t ta2 = vdequantize(vld1q_u8(reinterpret_cast<const qasymm8_t *>(input2.ptr())), in2->info()->quantization_info());
+        const float32x4x4_t ta1 = vdequantize(vld1q_u8(reinterpret_cast<const qasymm8_t *>(input1.ptr())), iq1_info);
+        const float32x4x4_t ta2 = vdequantize(vld1q_u8(reinterpret_cast<const qasymm8_t *>(input2.ptr())), iq2_info);
 
         const float32x4x4_t ta3 =
         {
@@ -102,7 +106,7 @@
             }
         };
 
-        const uint8x16_t result = vquantize(ta3, out->info()->quantization_info());
+        const uint8x16_t result = vquantize(ta3, oq_info);
 
         vst1q_u8(reinterpret_cast<qasymm8_t *>(output.ptr()), result);
     },
diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp
index b360e9e..c9c70d6 100644
--- a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp
@@ -53,9 +53,9 @@
     Iterator input(in, window);
     Iterator output(out, window);
 
-    const DataType          dt           = in->info()->data_type();
-    const QuantizationInfo &input_qinfo  = in->info()->quantization_info();
-    const QuantizationInfo &output_qinfo = out->info()->quantization_info();
+    const DataType                dt           = in->info()->data_type();
+    const UniformQuantizationInfo input_qinfo  = in->info()->quantization_info().uniform();
+    const UniformQuantizationInfo output_qinfo = out->info()->quantization_info().uniform();
     if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo)
     {
         execute_window_loop(window, [&](const Coordinates &)
diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
index fdafc2d..385be04 100644
--- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
+++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
@@ -51,8 +51,8 @@
     static void convolve(const Window &window, unsigned int num_elems_written_per_iteration,
                          const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation)
     {
-        const int input_offset   = -input->info()->quantization_info().offset;
-        const int weights_offset = -weights->info()->quantization_info().offset;
+        const int input_offset   = -input->info()->quantization_info().uniform().offset;
+        const int weights_offset = -weights->info()->quantization_info().uniform().offset;
 
         const int          input_stride_x  = input->info()->strides_in_bytes().x();
         const int          input_stride_y  = input->info()->strides_in_bytes().y();
diff --git a/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp b/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp
index 88f8b31..53789e2 100644
--- a/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp
@@ -92,7 +92,7 @@
     auto zero = static_cast<T>(0);
     if(std::is_same<T, uint8_t>::value)
     {
-        zero = _input->info()->quantization_info().offset;
+        zero = _input->info()->quantization_info().uniform().offset;
     }
 
     execute_window_loop(window_out, [&](const Coordinates & id)
diff --git a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp
index 1520225..a6dc097 100644
--- a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp
@@ -97,7 +97,7 @@
 template <typename T>
 void run_dequantization(const ITensor *input, ITensor *output, const Window &window)
 {
-    const QuantizationInfo &qinfo = input->info()->quantization_info();
+    const UniformQuantizationInfo &qinfo = input->info()->quantization_info().uniform();
 
     const int  window_step_x  = 16;
     const auto window_start_x = static_cast<int>(window.x().start());
@@ -129,7 +129,7 @@
         for(; x < window_end_x; ++x)
         {
             uint8_t val    = *(in_ptr + x);
-            *(out_ptr + x) = static_cast<T>(qinfo.dequantize(val));
+            *(out_ptr + x) = static_cast<T>(dequantize_qasymm8(val, qinfo));
         }
     },
     in, out);
diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
index 33457e1..0fe05d2 100644
--- a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
+++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
@@ -142,9 +142,9 @@
 }
 
 template <ArithmeticOperation op>
-inline uint8_t elementwise_arithm_op_quantized_scalar(const float &a, const float &b, QuantizationInfo qinfo)
+inline uint8_t elementwise_arithm_op_quantized_scalar(const float &a, const float &b, UniformQuantizationInfo qinfo)
 {
-    return qinfo.quantize(elementwise_arithm_op_scalar<op>(a, b), RoundingPolicy::TO_NEAREST_UP);
+    return quantize_qasymm8(elementwise_arithm_op_scalar<op>(a, b), qinfo);
 }
 
 template <ArithmeticOperation op, typename VectorType>
@@ -253,7 +253,7 @@
 }
 
 template <ComparisonOperation op>
-inline uint8_t elementwise_comp_op_quantized_scalar(const float &a, const float &b, QuantizationInfo qinfo)
+inline uint8_t elementwise_comp_op_quantized_scalar(const float &a, const float &b, UniformQuantizationInfo qinfo)
 {
     ARM_COMPUTE_UNUSED(qinfo);
     return elementwise_comp_op_scalar<op>(a, b);
@@ -567,7 +567,7 @@
 }
 
 void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window,
-                              uint8_t (*scalar_func)(const float &, const float &, QuantizationInfo),
+                              uint8_t (*scalar_func)(const float &, const float &, UniformQuantizationInfo),
                               int (*broadcast_func)(int, int, int, const uint8_t *, float32x4x4_t, uint8_t *, int32x4_t, float32x4_t,
                                                     float32x4_t, float32x4_t, const bool),
                               int (*neon_func)(int, int, int, const uint8_t *, const uint8_t *, uint8_t *,
@@ -587,12 +587,11 @@
     const auto window_end_x          = static_cast<int>(window.x().end());
     const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0);
 
-    const float output_scale  = out->info()->quantization_info().scale;
-    const int   output_offset = out->info()->quantization_info().offset;
+    const UniformQuantizationInfo output_qinfo = out->info()->quantization_info().uniform();
 
     // Output quantization info (add 0.5 to round toward the nearest integer - 0.5 rounds away from zero)
-    const float32x4_t voffseto   = vdupq_n_f32(output_offset + 0.5f);
-    const float32x4_t invvscaleo = vdupq_n_f32(1.f / output_scale);
+    const float32x4_t voffseto   = vdupq_n_f32(output_qinfo.offset + 0.5f);
+    const float32x4_t invvscaleo = vdupq_n_f32(1.f / output_qinfo.scale);
 
     if(is_broadcast_across_x)
     {
@@ -603,8 +602,8 @@
         const ITensor *broadcast_tensor     = is_broadcast_input_2 ? in2 : in1;
         const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1;
 
-        const QuantizationInfo broadcast_qinfo     = broadcast_tensor->info()->quantization_info();
-        const QuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info();
+        const UniformQuantizationInfo broadcast_qinfo     = broadcast_tensor->info()->quantization_info().uniform();
+        const UniformQuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info().uniform();
 
         const int32x4_t   voffset_non_broadcast = vdupq_n_s32(non_broadcast_qinfo.offset);
         const float32x4_t vscale_non_broadcast  = vdupq_n_f32(non_broadcast_qinfo.scale);
@@ -628,31 +627,30 @@
                                       voffset_non_broadcast, vscale_non_broadcast, voffseto, invvscaleo, !is_broadcast_input_2);
             for(; x < window_end_x; ++x)
             {
-                const float afs   = scvt_f32_qasymm8(*(non_broadcast_input_ptr + x), non_broadcast_qinfo.scale, non_broadcast_qinfo.offset);
-                const float bfs   = scvt_f32_qasymm8(broadcast_value, broadcast_qinfo.scale, broadcast_qinfo.offset);
-                *(output_ptr + x) = (*scalar_func)(!is_broadcast_input_2 ? bfs : afs, !is_broadcast_input_2 ? afs : bfs,
-                                                   out->info()->quantization_info());
+                const float afs   = dequantize_qasymm8(*(non_broadcast_input_ptr + x), non_broadcast_qinfo);
+                const float bfs   = dequantize_qasymm8(broadcast_value, broadcast_qinfo);
+                *(output_ptr + x) = (*scalar_func)(!is_broadcast_input_2 ? bfs : afs, !is_broadcast_input_2 ? afs : bfs, output_qinfo);
             }
         },
         broadcast_input, non_broadcast_input, output);
     }
     else
     {
+        const UniformQuantizationInfo input1_qinfo = in1->info()->quantization_info().uniform();
+        const UniformQuantizationInfo input2_qinfo = in2->info()->quantization_info().uniform();
+
         // Input1 quantization info
-        const int32x4_t   voffset1 = vdupq_n_s32(in1->info()->quantization_info().offset);
-        const float32x4_t vscale1  = vdupq_n_f32(in1->info()->quantization_info().scale);
+        const int32x4_t   voffset1 = vdupq_n_s32(input1_qinfo.offset);
+        const float32x4_t vscale1  = vdupq_n_f32(input1_qinfo.scale);
 
         // Input2 quantization info
-        const int32x4_t   voffset2 = vdupq_n_s32(in2->info()->quantization_info().offset);
-        const float32x4_t vscale2  = vdupq_n_f32(in2->info()->quantization_info().scale);
+        const int32x4_t   voffset2 = vdupq_n_s32(input2_qinfo.offset);
+        const float32x4_t vscale2  = vdupq_n_f32(input2_qinfo.scale);
 
         // Clear X Dimension on execution window as we handle manually
         input1_win.set(Window::DimX, Window::Dimension(0, 1, 1));
         input2_win.set(Window::DimX, Window::Dimension(0, 1, 1));
 
-        const QuantizationInfo input1_qinfo = in1->info()->quantization_info();
-        const QuantizationInfo input2_qinfo = in2->info()->quantization_info();
-
         Iterator input1(in1, input1_win);
         Iterator input2(in2, input2_win);
         Iterator output(out, win);
@@ -667,9 +665,9 @@
                                  vscale1, vscale2, voffseto, invvscaleo);
             for(; x < window_end_x; ++x)
             {
-                const float afs   = scvt_f32_qasymm8(*(input1_ptr + x), input1_qinfo.scale, input1_qinfo.offset);
-                const float bfs   = scvt_f32_qasymm8(*(input2_ptr + x), input2_qinfo.scale, input2_qinfo.offset);
-                *(output_ptr + x) = (*scalar_func)(afs, bfs, out->info()->quantization_info());
+                const float afs   = dequantize_qasymm8(*(input1_ptr + x), input1_qinfo);
+                const float bfs   = dequantize_qasymm8(*(input2_ptr + x), input2_qinfo);
+                *(output_ptr + x) = (*scalar_func)(afs, bfs, output_qinfo);
             }
         },
         input1, input2, output);
diff --git a/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp
index e699bac..d45e3ce 100644
--- a/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp
+++ b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp
@@ -27,12 +27,9 @@
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/ITensor.h"
 #include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Utils.h"
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/core/Window.h"
-#include "arm_compute/core/Window.h"
 
 #include "support/ToolchainSupport.h"
 
diff --git a/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp
index cba3390..0e77ead 100644
--- a/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp
@@ -179,8 +179,8 @@
     Iterator in2(_input1, window_w);
     Iterator out(_output, window_out);
 
-    const int input_offset   = -_input0->info()->quantization_info().offset;
-    const int weights_offset = -_input1->info()->quantization_info().offset;
+    const int input_offset   = -_input0->info()->quantization_info().uniform().offset;
+    const int weights_offset = -_input1->info()->quantization_info().uniform().offset;
 
     const int input_w          = _input0->info()->dimension(0);
     const int input_h          = _input0->info()->dimension(1);
diff --git a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp
index b8e204c..8efab7d 100644
--- a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp
@@ -112,11 +112,11 @@
     uint8_t *output_ptr = _output->buffer() + _output->info()->offset_first_element_in_bytes() + _height_offset * _output->info()->strides_in_bytes()[Window::DimY];
 
     // Create iterators
-    Iterator                input(_input, window);
-    Iterator                output(_output, window);
-    const DataType          dt           = _input->info()->data_type();
-    const QuantizationInfo &input_qinfo  = _input->info()->quantization_info();
-    const QuantizationInfo &output_qinfo = _output->info()->quantization_info();
+    Iterator                       input(_input, window);
+    Iterator                       output(_output, window);
+    const DataType                 dt           = _input->info()->data_type();
+    const UniformQuantizationInfo &input_qinfo  = _input->info()->quantization_info().uniform();
+    const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform();
     if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo)
     {
         execute_window_loop(window, [&](const Coordinates &)
diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp
index 34af0cf..874259b 100644
--- a/src/core/NEON/kernels/NEIm2ColKernel.cpp
+++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp
@@ -279,7 +279,7 @@
     const int pad_top        = _conv_info.pad_top();
     const int stride_x       = _conv_info.stride().first;
     const int stride_y       = _conv_info.stride().second;
-    const int pad_value      = is_data_type_quantized(_input->info()->data_type()) ? _input->info()->quantization_info().offset : 0;
+    const int pad_value      = is_data_type_quantized(_input->info()->data_type()) ? _input->info()->quantization_info().uniform().offset : 0;
 
     Window window_in_out(window);
     // The first three dimensions of the input and output are increased by the inner loops
diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
index fa16484..c313b23 100644
--- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
+++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
@@ -174,7 +174,7 @@
 }
 
 void mul_saturate_QASYMM8_QASYMM8_QASYMM8_n(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale,
-                                            const QuantizationInfo &input1_qua_info, const QuantizationInfo &input2_qua_info, const QuantizationInfo &output_qua_info)
+                                            const UniformQuantizationInfo &input1_qua_info, const UniformQuantizationInfo &input2_qua_info, const UniformQuantizationInfo &output_qua_info)
 {
     const auto input1 = static_cast<const qasymm8_t *__restrict>(input1_ptr);
     const auto input2 = static_cast<const qasymm8_t *__restrict>(input2_ptr);
@@ -187,7 +187,7 @@
     const float32x4x4_t in1_f32x4x4 = vdequantize(input1_q, input1_qua_info);
     const float32x4x4_t in2_f32x4x4 = vdequantize(input2_q, input2_qua_info);
 
-    const QuantizationInfo tmp_qua_info = QuantizationInfo(output_qua_info.scale / scale, output_qua_info.offset);
+    const UniformQuantizationInfo tmp_qua_info = { output_qua_info.scale / scale, output_qua_info.offset };
 
     const float32x4x4_t out_f32x4x4 =
     {
@@ -660,7 +660,7 @@
         execute_window_loop(collapsed, [&](const Coordinates &)
         {
             (*_func_qasymm8)(input1.ptr(), input2.ptr(), output.ptr(), _scale,
-                             _input1->info()->quantization_info(), _input2->info()->quantization_info(), _output->info()->quantization_info());
+                             _input1->info()->quantization_info().uniform(), _input2->info()->quantization_info().uniform(), _output->info()->quantization_info().uniform());
             collapsed.slide_window_slice_3D(slice_input1);
             collapsed.slide_window_slice_3D(slice_input2);
         },
diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
index ac2ffa1..62c9ca0 100644
--- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
@@ -562,6 +562,10 @@
 
     const int scale_step_x = (pool_stride_x == 1) ? 2 : 1;
 
+    const UniformQuantizationInfo input_qinfo          = _input->info()->quantization_info().uniform();
+    const UniformQuantizationInfo output_qinfo         = _output->info()->quantization_info().uniform();
+    const bool                    have_different_qinfo = input_qinfo != output_qinfo;
+
     execute_window_loop(window, [&](const Coordinates & id)
     {
         const auto top_data    = vld1q_u8(reinterpret_cast<const uint8_t *>(input_top_ptr + input.offset()));
@@ -640,9 +644,7 @@
             }
         }
 
-        const QuantizationInfo &input_qinfo  = _input->info()->quantization_info();
-        const QuantizationInfo &output_qinfo = _output->info()->quantization_info();
-        if(input_qinfo != output_qinfo)
+        if(have_different_qinfo)
         {
             const auto requantized_output = vquantize(vdequantize(vcombine_u8(lower_res, upper_res), input_qinfo), output_qinfo);
             lower_res                     = vget_low_u8(requantized_output);
@@ -814,8 +816,8 @@
     const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
     const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
 
-    const QuantizationInfo &input_qinfo  = _input->info()->quantization_info();
-    const QuantizationInfo &output_qinfo = _output->info()->quantization_info();
+    const UniformQuantizationInfo &input_qinfo  = _input->info()->quantization_info().uniform();
+    const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform();
 
     const uint8_t *const input_top_ptr    = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
     const uint8_t *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
@@ -1598,6 +1600,9 @@
     const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
     const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
 
+    const UniformQuantizationInfo &input_qinfo  = _input->info()->quantization_info().uniform();
+    const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform();
+
     execute_window_loop(window, [&](const Coordinates & id)
     {
         uint8_t res = 0;
@@ -1671,11 +1676,7 @@
         }
 
         // Store result
-        const QuantizationInfo &input_qinfo  = _input->info()->quantization_info();
-        const QuantizationInfo &output_qinfo = _output->info()->quantization_info();
-        res                                  = (input_qinfo != output_qinfo) ? sqcvt_qasymm8_f32(scvt_f32_qasymm8(res, input_qinfo.scale, input_qinfo.offset), output_qinfo.scale,
-                                                                                                 output_qinfo.offset) :
-                                               res;
+        res                                          = (input_qinfo != output_qinfo) ? quantize_qasymm8(dequantize_qasymm8(res, input_qinfo), output_qinfo) : res;
         *(reinterpret_cast<uint8_t *>(output.ptr())) = res;
     },
     input, output);
@@ -1698,9 +1699,9 @@
     const int upper_bound_w = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_right);
     const int upper_bound_h = _input->info()->dimension(2) + (exclude_padding ? 0 : pool_pad_bottom);
 
-    const float32x4_t       half_scale_v = vdupq_n_f32(0.5f);
-    const QuantizationInfo &input_qinfo  = _input->info()->quantization_info();
-    const QuantizationInfo &output_qinfo = _output->info()->quantization_info();
+    const float32x4_t             half_scale_v = vdupq_n_f32(0.5f);
+    const UniformQuantizationInfo input_qinfo  = _input->info()->quantization_info().uniform();
+    const UniformQuantizationInfo output_qinfo = _output->info()->quantization_info().uniform();
 
     execute_window_loop(window, [&](const Coordinates & id)
     {
diff --git a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp
index 4deeb1c..0aa34cd 100644
--- a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp
@@ -107,6 +107,7 @@
     const auto     window_start_x = static_cast<int>(window.x().start());
     const auto     window_end_x   = static_cast<int>(window.x().end());
 
+    const UniformQuantizationInfo uqinfo = qinfo.uniform();
 #ifdef __aarch64__
     constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_EVEN;
 #else  //__aarch64__
@@ -127,12 +128,12 @@
         int x = window_start_x;
         for(; x <= (window_end_x - window_step); x += window_step)
         {
-            wrapper::vstore(&output_ptr[x], vquantize(load_value(&input_ptr[x]), qinfo));
+            wrapper::vstore(&output_ptr[x], vquantize(load_value(&input_ptr[x]), uqinfo));
         }
         // Compute left-over elements
         for(; x < window_end_x; ++x)
         {
-            output_ptr[x] = qinfo.quantize(input_ptr[x], rounding_policy);
+            output_ptr[x] = quantize_qasymm8(input_ptr[x], uqinfo, rounding_policy);
         }
     },
     input, output);
diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
index c6e8536..1bfef27 100644
--- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp
+++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
@@ -542,6 +542,9 @@
     inline void operator()(Iterator &input, Iterator &output, Window &in_slice, Window &out_slice, const TensorInfo &in_info, const ReductionOperation op)
     {
         ARM_COMPUTE_UNUSED(out_slice);
+
+        const UniformQuantizationInfo iq_info = in_info.quantization_info().uniform();
+
         auto vec_res_value1 = vdupq_n_u32(static_cast<uint32_t>(0.f));
         auto vec_res_value2 = vdupq_n_u32(static_cast<uint32_t>(0.f));
         auto vec_res_value3 = vdupq_n_u32(static_cast<uint32_t>(0.f));
@@ -584,8 +587,8 @@
                 }
                 case ReductionOperation::PROD:
                 {
-                    const auto offset32x4f_4 = vdupq_n_f32(in_info.quantization_info().offset);
-                    const auto scale32x4f_4  = vdupq_n_f32(in_info.quantization_info().scale);
+                    const auto offset32x4f_4 = vdupq_n_f32(iq_info.offset);
+                    const auto scale32x4f_4  = vdupq_n_f32(iq_info.scale);
 
                     const auto temp16x8t_1 = vmovl_u8(vget_low_u8(vec_elements));
                     const auto temp16x8t_2 = vmovl_u8(vget_high_u8(vec_elements));
@@ -673,7 +676,7 @@
                 res *= wrapper::vgetlane(carry_res, 3);
 
                 //re-quantize result
-                res             = sqcvt_qasymm8_f32(res, in_info.quantization_info().scale, in_info.quantization_info().offset);
+                res             = quantize_qasymm8(res, iq_info);
                 *(output.ptr()) = static_cast<uint8_t>(res);
                 break;
             }
@@ -877,6 +880,8 @@
     {
         ARM_COMPUTE_UNUSED(out_slice);
 
+        const UniformQuantizationInfo iq_info = in_info.quantization_info().uniform();
+
         execute_window_loop(in_slice, [&](const Coordinates &)
         {
             uint32x4x4_t vec_res_idx{ { 0 } };
@@ -932,8 +937,8 @@
                     }
                     case ReductionOperation::PROD:
                     {
-                        const auto offset32x4f_4 = vdupq_n_f32(in_info.quantization_info().offset);
-                        const auto scale32x4f_4  = vdupq_n_f32(in_info.quantization_info().scale);
+                        const auto offset32x4f_4 = vdupq_n_f32(iq_info.offset);
+                        const auto scale32x4f_4  = vdupq_n_f32(iq_info.scale);
 
                         const auto temp16x8t_1 = vmovl_u8(vget_low_u8(vec_elements));
                         const auto temp16x8t_2 = vmovl_u8(vget_high_u8(vec_elements));
@@ -1004,8 +1009,8 @@
             }
             else if(op == ReductionOperation::PROD)
             {
-                const auto offset32x4f_4 = vdupq_n_f32(in_info.quantization_info().offset);
-                const auto iscale32x4f_4 = vinvq_f32(vdupq_n_f32(in_info.quantization_info().scale));
+                const auto offset32x4f_4 = vdupq_n_f32(iq_info.offset);
+                const auto iscale32x4f_4 = vinvq_f32(vdupq_n_f32(iq_info.scale));
 
                 //re-quantize
                 vec_res_value1_f = vaddq_f32(vmulq_f32(vec_res_value1_f, iscale32x4f_4), offset32x4f_4);
diff --git a/src/core/NEON/kernels/NEReverseKernel.cpp b/src/core/NEON/kernels/NEReverseKernel.cpp
index 36398cf..99328de 100644
--- a/src/core/NEON/kernels/NEReverseKernel.cpp
+++ b/src/core/NEON/kernels/NEReverseKernel.cpp
@@ -31,7 +31,6 @@
 #include "arm_compute/core/NEON/NEFixedPoint.h"
 #include "arm_compute/core/NEON/NEMath.h"
 #include "arm_compute/core/NEON/wrapper/wrapper.h"
-#include "arm_compute/core/QAsymm8.h"
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/core/Validate.h"
diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp
index 003f472..e99b97b 100644
--- a/src/core/NEON/kernels/NEScaleKernel.cpp
+++ b/src/core/NEON/kernels/NEScaleKernel.cpp
@@ -218,7 +218,7 @@
     const int input_height = input->info()->dimension(2);
 
     T border_value;
-    if(use_padding && border_mode != BorderMode::REPLICATE )
+    if(use_padding && border_mode != BorderMode::REPLICATE)
     {
         // configure() sets top border to 0 for BorderMode::REPLICATE and border_value is not needed in execute_window_loop() for REPLICATE
         border_value = *reinterpret_cast<T *>(input->buffer() + input->info()->offset_first_element_in_bytes() - stride_w);
@@ -235,9 +235,9 @@
 
     int border_size = (border_mode == BorderMode::UNDEFINED) ? 0 : 1;
 
-    const bool             is_quantized = (input->info()->data_type() == DataType::QASYMM8);
-    const QuantizationInfo iq_info      = input->info()->quantization_info();
-    const QuantizationInfo oq_info      = output->info()->quantization_info();
+    const bool                    is_quantized = (input->info()->data_type() == DataType::QASYMM8);
+    const UniformQuantizationInfo iq_info      = input->info()->quantization_info().uniform();
+    const UniformQuantizationInfo oq_info      = output->info()->quantization_info().uniform();
 
     execute_window_loop(window, [&](const Coordinates & id)
     {
@@ -295,11 +295,11 @@
             //dequantize quantized input
             if(is_quantized)
             {
-                float inp00 = iq_info.dequantize(a00);
-                float inp01 = iq_info.dequantize(a01);
-                float inp10 = iq_info.dequantize(a10);
-                float inp11 = iq_info.dequantize(a11);
-                res         = static_cast<T>(oq_info.quantize((inp00 * w1 + inp01 * w2 + inp10 * w3 + inp11 * w4), RoundingPolicy::TO_NEAREST_UP));
+                float inp00 = dequantize_qasymm8(a00, iq_info);
+                float inp01 = dequantize_qasymm8(a01, iq_info);
+                float inp10 = dequantize_qasymm8(a10, iq_info);
+                float inp11 = dequantize_qasymm8(a11, iq_info);
+                res         = static_cast<T>(quantize_qasymm8((inp00 * w1 + inp01 * w2 + inp10 * w3 + inp11 * w4), oq_info));
             }
             else
             {
@@ -651,9 +651,9 @@
     const size_t in_stide_in_bytes = _input->info()->strides_in_bytes()[1];
     const size_t in_stride         = in_stide_in_bytes / _input->info()->element_size();
 
-    const bool             is_quantized = (_input->info()->data_type() == DataType::QASYMM8);
-    const QuantizationInfo iq_info      = _input->info()->quantization_info();
-    const QuantizationInfo oq_info      = _output->info()->quantization_info();
+    const bool                    is_quantized = (_input->info()->data_type() == DataType::QASYMM8);
+    const UniformQuantizationInfo iq_info      = _input->info()->quantization_info().uniform();
+    const UniformQuantizationInfo oq_info      = _output->info()->quantization_info().uniform();
 
     switch(_input->info()->data_type())
     {
diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
index e9417ec..4144a18 100644
--- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
+++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -595,7 +595,7 @@
     const int start_x     = in.info()->valid_region().anchor.x();
     const int input_width = in.info()->valid_region().shape.x();
 
-    const float scale_beta = -beta * in.info()->quantization_info().scale;
+    const float scale_beta = -beta * in.info()->quantization_info().uniform().scale;
 
     Iterator in_it(&in, window);
     Iterator max_it(&max, window);
diff --git a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp
index aea6875..28f655c 100644
--- a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp
@@ -112,11 +112,11 @@
     uint8_t *output_ptr = _output->buffer() + _output->info()->offset_first_element_in_bytes() + _width_offset * _output->info()->strides_in_bytes()[0];
 
     // Create iterators
-    Iterator                input(_input, window);
-    Iterator                output(_output, window);
-    const DataType          dt           = _input->info()->data_type();
-    const QuantizationInfo &input_qinfo  = _input->info()->quantization_info();
-    const QuantizationInfo &output_qinfo = _output->info()->quantization_info();
+    Iterator                       input(_input, window);
+    Iterator                       output(_output, window);
+    const DataType                 dt           = _input->info()->data_type();
+    const UniformQuantizationInfo &input_qinfo  = _input->info()->quantization_info().uniform();
+    const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform();
     if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo)
     {
         execute_window_loop(window, [&](const Coordinates &)
diff --git a/src/core/NEON/kernels/NEYOLOLayerKernel.cpp b/src/core/NEON/kernels/NEYOLOLayerKernel.cpp
index 09a4a11..383c2b8 100644
--- a/src/core/NEON/kernels/NEYOLOLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEYOLOLayerKernel.cpp
@@ -30,7 +30,6 @@
 #include "arm_compute/core/NEON/NEFixedPoint.h"
 #include "arm_compute/core/NEON/NEMath.h"
 #include "arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h"
-#include "arm_compute/core/QAsymm8.h"
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/core/Validate.h"