COMPMID-2060: Support different qinfo in PoolingLayer

CL and Neon back ends now support different qinfos

Change-Id: I638d5f258ab2f99b40659601b4c5398d2c34c43b
Signed-off-by: Pablo Tello <pablo.tello@arm.com>
Reviewed-on: https://review.mlplatform.org/c/927
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
diff --git a/src/core/CL/cl_kernels/pooling_layer_quantized.cl b/src/core/CL/cl_kernels/pooling_layer_quantized.cl
index 198250b..919b76e 100644
--- a/src/core/CL/cl_kernels/pooling_layer_quantized.cl
+++ b/src/core/CL/cl_kernels/pooling_layer_quantized.cl
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,6 +23,19 @@
  */
 #include "helpers.h"
 
+#if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT)
+#define VEC_FLOAT(VEC_SIZE)        \
+    VEC_DATA_TYPE(float, VEC_SIZE) \
+#define VEC_INT(VEC_SIZE) VEC_DATA_TYPE(int, VEC_SIZE) #define VEC_UCHAR(VEC_SIZE) VEC_DATA_TYPE(uchar, VEC_SIZE) #define CONVERT_RTE(x, type)(convert_##type##_rte((x)))
+#define CONVERT_DOWN(x, type) CONVERT_RTE(x, type)
+#define REQUANTIZE(VEC_SIZE, input, in_offset, out_offset, in_scale, out_scale, res)                                                                                  \
+    {                                                                                                                                                                 \
+        const VEC_FLOAT(VEC_SIZE) in_f32  = (CONVERT(input, VEC_FLOAT(VEC_SIZE)) - (VEC_FLOAT(VEC_SIZE))((float)in_offset)) * (VEC_FLOAT(VEC_SIZE))((float)in_scale); \
+        const VEC_FLOAT(VEC_SIZE) out_f32 = in_f32 / ((VEC_FLOAT(VEC_SIZE))(float)out_scale) + ((VEC_FLOAT(VEC_SIZE))((float)out_offset));                            \
+        res                               = CONVERT_SAT(CONVERT_DOWN(out_f32, VEC_INT(VEC_SIZE)), VEC_UCHAR(VEC_SIZE));                                               \
+    }
+#endif /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */
+
 #if defined(POOL_AVG)
 #define POOL_OP(x, y) ((x) + (y))
 #else /* defined(POOL_AVG) */
@@ -118,8 +131,22 @@
     res = round(DIV_OP(res, calculate_avg_scale(POOL_SIZE_X, POOL_SIZE_Y, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y)));
 #endif /* defined(POOL_AVG) */
 
-    // Store result
-    *(__global uchar *)output.ptr = convert_uchar(res);
+    uchar result_u8 = convert_uchar(res);
+
+#if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT)
+
+    const float result_f32   = convert_float(result_u8);
+    const float input_offset = (float)OFFSET_IN1;
+    const float input_scale  = (float)SCALE_IN1;
+    const float scale_out    = (float)SCALE_OUT;
+    const float offset_out   = (float)OFFSET_OUT;
+    const float in_f32       = (result_f32 - input_offset) * input_scale;
+    const float out_f32      = in_f32 / scale_out + offset_out;
+    result_u8                = convert_uchar_sat(convert_int_rte(out_f32));
+
+#endif /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */
+
+    *(__global uchar *)output.ptr = result_u8;
 }
 
 int calculate_avg_scale_nhwc(const int pool_size_x, const int pool_size_y, int upper_bound_w, int upper_bound_h,
@@ -217,6 +244,11 @@
     vdata = convert_int8(round(DIV_OP_NHWC(vdata, calculate_avg_scale_nhwc(POOL_SIZE_X, POOL_SIZE_Y, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y))));
 #endif /* defined(POOL_AVG) */
 
+    uchar8 out_u8 = convert_uchar8(vdata);
+#if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT)
+    REQUANTIZE(8, out_u8, OFFSET_IN1, OFFSET_OUT, SCALE_IN1, SCALE_OUT, out_u8);
+#endif /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */
+
     // Store result
-    vstore8(convert_uchar8(vdata), 0, (__global uchar *)output.ptr);
-}
\ No newline at end of file
+    vstore8(out_u8, 0, (__global uchar *)output.ptr);
+}
diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
index 7081688..7ccbda9 100644
--- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
@@ -78,7 +78,6 @@
     {
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
         TensorInfo out_info(TensorInfo(compute_pool_shape(*input, pool_info), 1, output->data_type()));
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &out_info);
     }
@@ -201,6 +200,17 @@
     const int pool_pad_top  = pad_stride_info.pad_top();
     const int pool_pad_left = pad_stride_info.pad_left();
 
+    // Set build options
+    CLBuildOptions build_opts;
+
+    if(is_data_type_quantized_asymmetric(input->info()->data_type()) && input->info()->quantization_info() != output->info()->quantization_info())
+    {
+        build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().offset));
+        build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset));
+        build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().scale));
+        build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale));
+    }
+
     // Check output dimensions
     auto_init(input->info(), output->info(), pool_info);
     ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pool_info));
@@ -212,8 +222,6 @@
 
     const DataType data_type = input->info()->data_type();
 
-    // Set build options
-    CLBuildOptions build_opts;
     build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type));
     build_opts.add_option("-DPOOL_" + string_from_pooling_type(pool_type));
     build_opts.add_option("-DSTRIDE_X=" + support::cpp11::to_string(pool_stride_x));
@@ -222,6 +230,7 @@
     build_opts.add_option("-DPAD_Y=" + support::cpp11::to_string(pool_pad_top));
     build_opts.add_option("-DPOOL_SIZE_X=" + support::cpp11::to_string(pool_size_x));
     build_opts.add_option("-DPOOL_SIZE_Y=" + support::cpp11::to_string(pool_size_y));
+
     build_opts.add_option_if(data_type == DataType::F16, "-DFP16");
 
     // Create kernel