COMPMID-3034: Add NERequantizationLayerKernel

Change-Id: I3f098c3c2c2031d8cbe7326eab88a4e78bda867f
Signed-off-by: Manuel Bottini <manuel.bottini@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2704
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Sang-Hoon Park <sang-hoon.park@arm.com>
diff --git a/arm_compute/core/NEON/NEMath.h b/arm_compute/core/NEON/NEMath.h
index 54f8252..3905f67 100644
--- a/arm_compute/core/NEON/NEMath.h
+++ b/arm_compute/core/NEON/NEMath.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -173,6 +173,15 @@
  */
 float32x4x4_t convert_int8x16_to_float32x4x4(const int8x16_t &in);
 
+/** Converts to float32x4x4_t from the specified templated 16 elements vectors
+ *
+ * @param[in] in Vector of float to be converted
+ *
+ * @return Converted vector of float
+ */
+template <typename T>
+float32x4x4_t convert_to_float32x4x4(const T &in);
+
 /** Converts from two float32x4x3_t to just one uint8x8x3_t
  *
  * @param[in]  in1 First input vector of float to be converted
diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl
index 5d8b82c..49870d0 100644
--- a/arm_compute/core/NEON/NEMath.inl
+++ b/arm_compute/core/NEON/NEMath.inl
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -345,6 +345,18 @@
     return out;
 }
 
+template <>
+inline float32x4x4_t convert_to_float32x4x4(const uint8x16_t &in)
+{
+    return convert_uint8x16_to_float32x4x4(in);
+}
+
+template <>
+inline float32x4x4_t convert_to_float32x4x4(const int8x16_t &in)
+{
+    return convert_int8x16_to_float32x4x4(in);
+}
+
 inline void convert_float32x4x3_to_uint8x8x3(const float32x4x3_t &in1, const float32x4x3_t &in2, uint8x8x3_t &out)
 {
     out.val[0] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[0])),
diff --git a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h
index 1a9b533..087e767 100644
--- a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -56,16 +56,16 @@
     ~NEQuantizationLayerKernel() = default;
     /** Set the input, output.
      *
-     * @param[in]  input  Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: F32/F16.
-     * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM16.
+     * @param[in]  input  Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
+     * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
      *
      * @note Output auto initialization is not supported by this kernel
      */
     void configure(const ITensor *input, ITensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NEQuantizationLayerKernel
      *
-     * @param[in] input  Input tensor info. Data types supported: F32/F16.
-     * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM16.
+     * @param[in] input  Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
+     * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
      *
      * @return a status
      */
@@ -80,7 +80,7 @@
      * @param[in] window Region on which to execute the kernel.
      */
     using QuantizationFunctionExecutorPtr = void (NEQuantizationLayerKernel::*)(const Window &window);
-    /** Function to apply QASYMM8 quantization on a tensor.
+    /** Function to apply QASYMM8 or QASYMM8_SIGNED quantization on a tensor.
      *
      * @param[in] window Region on which to execute the kernel.
      */
diff --git a/arm_compute/core/QuantizationInfo.h b/arm_compute/core/QuantizationInfo.h
index 06ba665..f859beb 100644
--- a/arm_compute/core/QuantizationInfo.h
+++ b/arm_compute/core/QuantizationInfo.h
@@ -516,5 +516,49 @@
 {
     return dequantize_qasymm16(value, qinfo.uniform());
 }
+
+/*
+ * In case of requantization of a quantized input tensor to an output tensor with another quantization
+ * instead of applying dequantization and then a quantization functions, we just compute new scale and
+ * offset.
+ *
+ * Assuming:
+ *   - q_i as input quantized value
+ *   - q_o as output quantized value
+ *   - z_i as input quantization offset value
+ *   - z_o as output quantization offset value
+ *   - s_i as input quantization scale value
+ *   - s_o as output quantization scale value
+ *   - z_n as new quantization offset value
+ *   - s_n as new quantization scale value
+ *
+ * q_o = ( q_i - z_i ) * s_i / s_o + z_o
+ *
+ * We can rewrite the formula as:
+ *
+ * q_o = ( q_i * s_i / s_o ) - z_i * s_i / s_o + z_o
+ *
+ * q_o = q_i / s_n + z_n
+ *
+ * Where:
+ *
+ * s_n = s_o / s_i
+ *
+ * z_n = - z_i * s_i / s_o + z_o
+ *
+ */
+inline UniformQuantizationInfo compute_requantization_scale_offset(const UniformQuantizationInfo &uqinfo_in, const UniformQuantizationInfo &uqinfo_out)
+{
+    float   scale_to_apply  = uqinfo_out.scale;
+    int32_t offset_to_apply = uqinfo_out.offset;
+
+    scale_to_apply /= uqinfo_in.scale;
+    // In order to minimize flooring we convert the offset to a float,
+    // then compute the new offset in the float domain,
+    // finally we convert it back as int32_t
+    offset_to_apply -= static_cast<int32_t>(static_cast<float>(uqinfo_in.offset) * uqinfo_in.scale / uqinfo_out.scale);
+    return UniformQuantizationInfo(scale_to_apply, offset_to_apply);
+}
+
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_QUANTIZATION_INFO_H */
diff --git a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
index 1cf83e8..fc317be 100644
--- a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -48,14 +48,14 @@
     NEQuantizationLayer() = default;
     /** Set the input and output tensors.
      *
-     * @param[in]  input  Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: F32/F16.
-     * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM16
+     * @param[in]  input  Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
+     * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16
      */
     void configure(const ITensor *input, ITensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NEQuantizationLayer
      *
-     * @param[in] input  Input tensor info. The dimensions over the third will be interpreted as batches. Data types supported: F32/F16.
-     * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM16
+     * @param[in] input  Input tensor info. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
+     * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16
      *
      * @return a status
      */