COMPMID-2225: Add interface support for new quantized data types.

Add support for:
-QSYMM8, 8-bit quantized symmetric
-QSYMM8_PER_CHANNEL, 8-bit quantized symmetric with per channel quantization

Change-Id: I00c4ff98e44af37419470af61419ee95d0de2463
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1236
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/runtime/CL/CLSubTensor.cpp b/src/runtime/CL/CLSubTensor.cpp
index d0e7d76..0f36250 100644
--- a/src/runtime/CL/CLSubTensor.cpp
+++ b/src/runtime/CL/CLSubTensor.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -58,6 +58,11 @@
     return _parent->cl_buffer();
 }
 
+CLQuantization CLSubTensor::quantization() const
+{
+    return _parent->quantization();
+}
+
 ICLTensor *CLSubTensor::parent()
 {
     return _parent;
diff --git a/src/runtime/CL/CLTensor.cpp b/src/runtime/CL/CLTensor.cpp
index dd27738..732689e 100644
--- a/src/runtime/CL/CLTensor.cpp
+++ b/src/runtime/CL/CLTensor.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -47,6 +47,11 @@
     return _allocator.cl_data();
 }
 
+CLQuantization CLTensor::quantization() const
+{
+    return _allocator.quantization();
+}
+
 CLTensorAllocator *CLTensor::allocator()
 {
     return &_allocator;
diff --git a/src/runtime/CL/CLTensorAllocator.cpp b/src/runtime/CL/CLTensorAllocator.cpp
index 101e4f1..63aa1ba 100644
--- a/src/runtime/CL/CLTensorAllocator.cpp
+++ b/src/runtime/CL/CLTensorAllocator.cpp
@@ -34,6 +34,14 @@
 
 namespace
 {
+/** Helper function used to allocate the backing memory of a tensor
+ *
+ * @param[in] context   OpenCL context to use
+ * @param[in] size      Size of the allocation
+ * @param[in] alignment Alignment of the allocation
+ *
+ * @return A wrapped memory region
+ */
 std::unique_ptr<ICLMemoryRegion> allocate_region(const cl::Context &context, size_t size, cl_uint alignment)
 {
     // Try fine-grain SVM
@@ -54,13 +62,49 @@
     }
     return region;
 }
+/** Clears quantization arrays
+ *
+ * @param[in, out] scale  Quantization scale array
+ * @param[in, out] offset Quantization offset array
+ */
+void clear_quantization_arrays(CLFloatArray &scale, CLInt32Array &offset)
+{
+    // Clear arrays
+    scale  = CLFloatArray();
+    offset = CLInt32Array();
+}
+/** Helper function used to create quantization data arrays
+ *
+ * @param[in, out] scale    Quantization scale array
+ * @param[in, out] offset   Quantization offset array
+ * @param[in]      qinfo    Quantization info
+ * @param[in]      pad_size Pad size to use in case array needs to be padded for computation purposes
+ *
+ * @return A pair (scale, offset) containing the respective allocated and filled arrays
+ */
+void populate_quantization_info(CLFloatArray &scale, CLInt32Array &offset, const QuantizationInfo &qinfo, size_t pad_size)
+{
+    clear_quantization_arrays(scale, offset);
+
+    // Create scale array
+    const size_t num_elements = qinfo.scale.size();
+    const size_t element_size = sizeof(decltype(qinfo.scale)::value_type);
+    scale                     = CLFloatArray(num_elements + pad_size);
+    scale.resize(num_elements);
+    CLScheduler::get().queue().enqueueWriteBuffer(scale.cl_buffer(), CL_TRUE, 0, num_elements * element_size, qinfo.scale.data());
+}
 } // namespace
 
 CLTensorAllocator::CLTensorAllocator(CLTensor *owner)
-    : _associated_memory_group(nullptr), _memory(), _mapping(nullptr), _owner(owner)
+    : _associated_memory_group(nullptr), _memory(), _mapping(nullptr), _owner(owner), _scale(), _offset()
 {
 }
 
+CLQuantization CLTensorAllocator::quantization() const
+{
+    return { &_scale, &_offset };
+}
+
 uint8_t *CLTensorAllocator::data()
 {
     return _mapping;
@@ -73,6 +117,7 @@
 
 void CLTensorAllocator::allocate()
 {
+    // Allocate tensor backing memory
     if(_associated_memory_group == nullptr)
     {
         if(_memory.region() != nullptr && _memory.cl_region()->cl_data().get() != nullptr)
@@ -91,6 +136,15 @@
     {
         _associated_memory_group->finalize_memory(_owner, _memory, info().total_size());
     }
+
+    // Allocate and fill the quantization parameter arrays
+    if(info().data_type() == DataType::QSYMM8_PER_CHANNEL)
+    {
+        const size_t pad_size = 0;
+        populate_quantization_info(_scale, _offset, info().quantization_info(), pad_size);
+    }
+
+    // Lock allocator
     info().set_is_resizable(false);
 }
 
@@ -98,6 +152,7 @@
 {
     _mapping = nullptr;
     _memory.set_region(nullptr);
+    clear_quantization_arrays(_scale, _offset);
     info().set_is_resizable(true);
 }
 
diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
index 97b0a01..e912740 100644
--- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
@@ -130,7 +130,7 @@
     PixelValue &&zero_value(0.f);
     if(is_data_type_quantized_asymmetric(input->info()->data_type()))
     {
-        zero_value = PixelValue(static_cast<uint8_t>(input->info()->quantization_info().offset));
+        zero_value = PixelValue(static_cast<uint8_t>(input->info()->quantization_info().uniform().offset));
     }
     _border_handler.configure(input_to_use, _kernel->border_size(), BorderMode::CONSTANT, zero_value);
 }
@@ -288,6 +288,10 @@
         const size_t patch_size = weights_w * weights_h + ((append_bias) ? 1 : 0);
         const size_t conv_size  = conv_w * conv_h;
 
+        const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+        const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();
+        const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
+
         // Im2Col configuration
         TensorShape shape_im2col = input->info()->tensor_shape();
         shape_im2col.set(0, patch_size);
@@ -319,9 +323,9 @@
         // Output staged configuration
         if(_is_quantized)
         {
-            const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input->info()->quantization_info() : output->info()->quantization_info();
+            const UniformQuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? iq_info : oq_info;
 
-            float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output_quant_info.scale;
+            float multiplier = iq_info.scale * wq_info.scale / output_quant_info.scale;
             int   output_multiplier;
             int   output_shift;
             quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
@@ -334,8 +338,8 @@
         PixelValue zero_w(static_cast<int32_t>(0));
         if(_is_quantized)
         {
-            zero_in = PixelValue(static_cast<int32_t>(input->info()->quantization_info().offset));
-            zero_w  = PixelValue(static_cast<int32_t>(weights->info()->quantization_info().offset));
+            zero_in = PixelValue(static_cast<int32_t>(iq_info.offset));
+            zero_w  = PixelValue(static_cast<int32_t>(wq_info.offset));
         }
         BorderSize border_size = _v2mm_kernel.border_size();
         _v2mm_input_fill_border.configure(&_input_reshaped, border_size, BorderMode::CONSTANT, zero_in);
diff --git a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
index c451bd4..bfc6ff1 100644
--- a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -49,7 +49,7 @@
     PixelValue &&zero_value(0.f);
     if(is_data_type_quantized_asymmetric(input->info()->data_type()))
     {
-        zero_value = PixelValue(static_cast<uint8_t>(input->info()->quantization_info().offset));
+        zero_value = PixelValue(static_cast<uint8_t>(input->info()->quantization_info().uniform().offset));
     }
     _input_border_handler.configure(input, _direct_conv_kernel.border_size(), BorderMode::CONSTANT, zero_value);
 
diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
index 7b9229c..87d4c56 100644
--- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
+++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
@@ -41,10 +41,13 @@
 {
     if(is_data_type_quantized_asymmetric(input.data_type()))
     {
+        const UniformQuantizationInfo iq_info = input.quantization_info().uniform();
+        const UniformQuantizationInfo wq_info = weights.quantization_info().uniform();
+
         // Since we need negative offsets for computing convolution, we need to change QuantizationInfo()
         // Extract and negate input and weights offset
-        const QuantizationInfo input_quantization_info(input.quantization_info().scale, -input.quantization_info().offset);
-        const QuantizationInfo weights_quantization_info(weights.quantization_info().scale, -weights.quantization_info().offset);
+        const QuantizationInfo input_quantization_info(iq_info.scale, -iq_info.offset);
+        const QuantizationInfo weights_quantization_info(wq_info.scale, -wq_info.offset);
 
         // Validate gemmlowp function
         ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyCore::validate(&input.clone()->set_quantization_info(input_quantization_info),
@@ -88,8 +91,8 @@
         const QuantizationInfo input_quantization_info   = input->info()->quantization_info();
         const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
 
-        input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset));
-        weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset));
+        input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
+        weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
 
         // Configure gemmlowp function
         _mm_gemmlowp.configure(input, weights, nullptr, output);
@@ -230,11 +233,15 @@
     // Configure output stage for asymmetric quantized types
     if(_is_quantized)
     {
-        float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output->info()->quantization_info().scale;
+        const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+        const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();
+        const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
+
+        float multiplier = iq_info.scale * wq_info.scale / oq_info.scale;
         int   output_multiplier;
         int   output_shift;
         quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
-        _gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier, output_shift, output->info()->quantization_info().offset);
+        _gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier, output_shift, oq_info.offset);
         _gemmlowp_output.allocator()->allocate();
     }
 }
diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
index 03d516f..4e518fc 100644
--- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
@@ -115,8 +115,8 @@
         const QuantizationInfo input_quantization_info   = input->info()->quantization_info();
         const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
 
-        input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset));
-        weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset));
+        input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
+        weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
 
         _mm_gemmlowp.configure(input, weights, biases, output, gemm_info);
 
@@ -151,8 +151,8 @@
 
         std::unique_ptr<ITensorInfo> input_qa   = input->clone();
         std::unique_ptr<ITensorInfo> weights_qa = weights->clone();
-        input_qa->set_quantization_info(QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset));
-        weights_qa->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset));
+        input_qa->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
+        weights_qa->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
 
         // Perform validation step on GEMMLowp
         return CLGEMMLowpMatrixMultiplyCore::validate(input_qa.get(), weights_qa.get(), biases, output, gemm_info);
@@ -190,6 +190,10 @@
     const unsigned int kernel_width  = weights->info()->dimension(idx_width);
     const unsigned int kernel_height = weights->info()->dimension(idx_height);
 
+    const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+    const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();
+    const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
+
     _is_prepared                = weights_info.retain_internal_weights();
     _original_weights           = weights;
     _is_quantized               = is_data_type_quantized_asymmetric(input->info()->data_type());
@@ -281,9 +285,9 @@
     // Configure output stage for quantized case
     if(_is_quantized)
     {
-        const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input->info()->quantization_info() : output->info()->quantization_info();
+        const auto output_quant_info = (output->info()->total_size() == 0) ? iq_info : oq_info;
 
-        const float multiplier        = (input->info()->quantization_info().scale * weights->info()->quantization_info().scale) / output_quant_info.scale;
+        const float multiplier        = (iq_info.scale * wq_info.scale) / output_quant_info.scale;
         int         output_multiplier = 0;
         int         output_shift      = 0;
         quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
@@ -298,8 +302,8 @@
 
         if(_is_activationlayer_enabled && supported_acts.count(act_info.activation()) != 0)
         {
-            const int a_const_int = output_quant_info.quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP);
-            const int b_const_int = output_quant_info.quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP);
+            const int a_const_int = quantize_qasymm8(act_info.a(), output_quant_info);
+            const int b_const_int = quantize_qasymm8(act_info.b(), output_quant_info);
 
             min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? output_quant_info.offset : b_const_int;
             max_activation = act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU ? 255 : a_const_int;
@@ -387,6 +391,10 @@
     // In case of F16, fused bias will be used in GEMM
     const bool run_addition = (skip_im2col) && (append_bias) && (data_type != DataType::F16);
 
+    const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
+    const UniformQuantizationInfo wq_info = weights->quantization_info().uniform();
+    const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
+
     ARM_COMPUTE_RETURN_ERROR_ON((weights->dimension(idx_channel) * num_groups) != input->dimension(idx_channel));
     ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4);
 
@@ -468,9 +476,9 @@
 
     if(is_quantized)
     {
-        const QuantizationInfo output_quant_info = (output->total_size() == 0) ? input->quantization_info() : output->quantization_info();
+        const auto output_quant_info = (output->total_size() == 0) ? iq_info : oq_info;
 
-        const float multiplier        = (input->quantization_info().scale * weights->quantization_info().scale) / output_quant_info.scale;
+        const float multiplier        = (iq_info.scale * wq_info.scale) / output_quant_info.scale;
         int         output_multiplier = 0;
         int         output_shift      = 0;
 
@@ -486,8 +494,8 @@
 
         if(is_activationlayer_enabled && supported_acts.count(act_info.activation()) != 0)
         {
-            const int a_const_int = output_quant_info.quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP);
-            const int b_const_int = output_quant_info.quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP);
+            const int a_const_int = quantize_qasymm8(act_info.a(), output_quant_info);
+            const int b_const_int = quantize_qasymm8(act_info.b(), output_quant_info);
 
             min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? output_quant_info.offset : b_const_int;
             max_activation = act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU ? 255 : a_const_int;
diff --git a/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp
index bcb91e0..36a120e 100644
--- a/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp
@@ -277,11 +277,15 @@
 
     if(_is_quantized)
     {
-        float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / _gemmlowp_final.info()->quantization_info().scale;
+        const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+        const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();
+        const UniformQuantizationInfo oq_info = _gemmlowp_final.info()->quantization_info().uniform();
+
+        float multiplier = iq_info.scale * wq_info.scale / oq_info.scale;
         int   output_multiplier(0);
         int   output_shift(0);
         quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
-        _gemmlowp_output_stage.configure(&_gemmlowp_final, nullptr, output_stage_output, output_multiplier, output_shift, _gemmlowp_final.info()->quantization_info().offset);
+        _gemmlowp_output_stage.configure(&_gemmlowp_final, nullptr, output_stage_output, output_multiplier, output_shift, oq_info.offset);
         _gemmlowp_final.allocator()->allocate();
     }
 
diff --git a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
index 049db1d..875e3a2 100644
--- a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
@@ -77,8 +77,8 @@
     _is_prepared                 = false;
     _original_b                  = b;
     _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();
-    _a_offset                    = a->info()->quantization_info().offset;
-    _b_offset                    = b->info()->quantization_info().offset;
+    _a_offset                    = a->info()->quantization_info().uniform().offset;
+    _b_offset                    = b->info()->quantization_info().uniform().offset;
 
     // Get the GPU target
     const GPUTarget gpu_target = CLScheduler::get().target();
@@ -213,8 +213,8 @@
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(), "Matrix A already reshaped is not supported");
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(), "Matrix B already reshaped is not supported");
 
-    int32_t a_offset = a->quantization_info().offset;
-    int32_t b_offset = b->quantization_info().offset;
+    int32_t a_offset = a->quantization_info().uniform().offset;
+    int32_t b_offset = b->quantization_info().uniform().offset;
 
     const ITensorInfo *matrix_a_info = a;
     const ITensorInfo *matrix_b_info = b;
diff --git a/src/runtime/CL/functions/CLPoolingLayer.cpp b/src/runtime/CL/functions/CLPoolingLayer.cpp
index cbe1ce3..086017a 100644
--- a/src/runtime/CL/functions/CLPoolingLayer.cpp
+++ b/src/runtime/CL/functions/CLPoolingLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -45,7 +45,7 @@
     PixelValue pixel_value(0.f);
     if(is_data_type_quantized_asymmetric(input->info()->data_type()) && !pool_info.exclude_padding())
     {
-        pixel_value = PixelValue(static_cast<uint32_t>(input->info()->quantization_info().offset));
+        pixel_value = PixelValue(static_cast<uint32_t>(input->info()->quantization_info().uniform().offset));
     }
     switch(input->info()->data_layout())
     {