COMPMID-2306: CLDepthwiseConvolution: support for QUANT8_PER_CHANNEL_SYMM - Reference

This patch modifies the reference implementation and the fixtures of
depthwise convolution layer to support QSYMM8_PER_CHANNEL quantization.

Change-Id: I28adb5c110308b1024a213bec2d35a89180a46dc
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2063
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/tests/validation/reference/DepthwiseConvolutionLayer.cpp b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
index b1d2b92..7458f81 100644
--- a/tests/validation/reference/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
@@ -40,7 +40,9 @@
 {
 namespace reference
 {
-/** Perform a depthwise convolution
+namespace
+{
+/** Perform a depthwise convolution for floating-point types
  *
  * - Three dimensions tensors
  * - Third dimention is number of channels
@@ -48,9 +50,9 @@
  * - Padding, stride and output shape "match"
  *
  */
-template <typename T, typename TB>
-SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info,
-                                      unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
+template <typename T>
+SimpleTensor<T> depthwise_convolution_fp(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<T> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info,
+                                         unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
 {
     ARM_COMPUTE_UNUSED(out_quant_info);
 
@@ -114,7 +116,7 @@
                             }
                         }
 
-                        dst[out_pos++] = saturate_cast<T>(val + *static_cast<const TB *>(biases(Coordinates(out_z))));
+                        dst[out_pos++] = saturate_cast<T>(val + *static_cast<const T *>(biases(Coordinates(out_z))));
                     }
                 }
             }
@@ -124,26 +126,32 @@
     return dst;
 }
 
-template <>
-SimpleTensor<uint8_t> depthwise_convolution(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &biases, const TensorShape &dst_shape,
-                                            const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
+/** Perform a quantized depthwise convolution
+ *
+ * - Three dimensions tensors
+ * - Third dimention is number of channels
+ * - Depths of input tensor and filter are equals
+ * - Padding, stride and output shape "match"
+ * - QASYMM8 input, output
+ * - QASYMM8 or QSYMM8_PER_CHANNEL filter
+ *
+ */
+template <typename T, typename TW, typename TB>
+SimpleTensor<T> depthwise_convolution_quantized(const SimpleTensor<T> &src, const SimpleTensor<TW> &weights, const SimpleTensor<int32_t> &biases, const TensorShape &dst_shape,
+                                                const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
 {
     // if no explicit quantization has been set you the same as src
     const QuantizationInfo &dst_qinfo = out_quant_info.uniform().empty() ? src.quantization_info() : out_quant_info;
-    SimpleTensor<uint8_t>   dst{ dst_shape, src.data_type(), 1, dst_qinfo };
+    SimpleTensor<T>         dst{ dst_shape, src.data_type(), 1, dst_qinfo };
 
     // Create reference
     const int   input_offset   = -src.quantization_info().uniform().offset;
     const float input_scale    = src.quantization_info().uniform().scale;
     const int   weights_offset = -weights.quantization_info().uniform().offset;
-    const float weights_scale  = weights.quantization_info().uniform().scale;
     const int   output_offset  = dst_qinfo.uniform().offset;
     const float output_scale   = dst_qinfo.uniform().scale;
 
-    int         output_multiplier = 0;
-    int         output_shift      = 0;
-    const float multiplier        = input_scale * weights_scale / output_scale;
-    arm_compute::quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
+    const std::vector<float> weights_scale_vec = weights.quantization_info().scale();
 
     // Compute reference
     const int filter_width  = weights.shape().x();
@@ -173,11 +181,19 @@
     const int maximum_x = input_width + pad_left + pad_right - static_cast<int>(patch_width);
     const int maximum_y = input_height + pad_top + pad_bottom - static_cast<int>(patch_height);
 
+    const bool is_quantized_per_channel = is_data_type_quantized_per_channel(weights.data_type());
+
     int out_pos = 0;
     for(int r = 0; r < num_batches; ++r)
     {
         for(int z = 0; z < input_depth; ++z)
         {
+            int         output_multiplier = 0;
+            int         output_shift      = 0;
+            const float weights_scale     = (is_quantized_per_channel) ? weights_scale_vec[z] : weights_scale_vec[0];
+            const float multiplier        = input_scale * weights_scale / output_scale;
+            arm_compute::quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
+
             for(unsigned int m = 0; m < depth_multiplier; ++m)
             {
                 const int     out_z    = z * depth_multiplier + m;
@@ -197,8 +213,8 @@
                             {
                                 coords.set(0, i);
                                 coords.set(1, j);
-                                const auto    in_val = tensor_elem_at<uint8_t>(src, coords, BorderMode::CONSTANT, -input_offset);
-                                const uint8_t w_val  = *(weights.data() + filter_offset);
+                                const auto in_val = tensor_elem_at<T>(src, coords, BorderMode::CONSTANT, -input_offset);
+                                const TW   w_val  = *(weights.data() + filter_offset);
                                 val += (in_val + input_offset) * (w_val + weights_offset);
                                 ++filter_offset;
                             }
@@ -206,8 +222,7 @@
                         val += bias_val;
                         val = asymm_rounding_divide_by_pow2(asymm_int_mult(val, output_multiplier), output_shift);
                         val += output_offset;
-                        val = std::max<int32_t>(val, 0);
-                        val = std::min<int32_t>(val, 255);
+                        val = utility::clamp<int32_t>(val, 0, 255);
 
                         // Store the result
                         dst[out_pos++] = val;
@@ -219,12 +234,35 @@
 
     return dst;
 }
+} // namespace
 
-template SimpleTensor<float> depthwise_convolution(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &biases, const TensorShape &dst_shape,
-                                                   const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info);
+template <>
+SimpleTensor<float> depthwise_convolution(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &biases, const TensorShape &dst_shape,
+                                          const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
+{
+    return depthwise_convolution_fp(src, weights, biases, dst_shape, conv_info, depth_multiplier, dilation, out_quant_info);
+}
 
-template SimpleTensor<half> depthwise_convolution(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &biases, const TensorShape &dst_shape,
-                                                  const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info);
+template <>
+SimpleTensor<half> depthwise_convolution(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &biases, const TensorShape &dst_shape,
+                                         const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
+{
+    return depthwise_convolution_fp(src, weights, biases, dst_shape, conv_info, depth_multiplier, dilation, out_quant_info);
+}
+
+template <>
+SimpleTensor<uint8_t> depthwise_convolution(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &biases, const TensorShape &dst_shape,
+                                            const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
+{
+    return depthwise_convolution_quantized<uint8_t, uint8_t, int32_t>(src, weights, biases, dst_shape, conv_info, depth_multiplier, dilation, out_quant_info);
+}
+
+template <>
+SimpleTensor<uint8_t> depthwise_convolution(const SimpleTensor<uint8_t> &src, const SimpleTensor<int8_t> &weights, const SimpleTensor<int32_t> &biases, const TensorShape &dst_shape,
+                                            const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
+{
+    return depthwise_convolution_quantized<uint8_t, int8_t, int32_t>(src, weights, biases, dst_shape, conv_info, depth_multiplier, dilation, out_quant_info);
+}
 } // namespace reference
 } // namespace validation
 } // namespace test