COMPMID-661: Add QASYMM8 support (and basic tests) to CLDepthwiseConvolution3x3 kernel (#28)

Change-Id: I51bebe74e3814c1245812ad575fe7854d460674f
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/109864
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com>
diff --git a/tests/validation/CL/DepthwiseConvolution.cpp b/tests/validation/CL/DepthwiseConvolution.cpp
index 5f1bde8..ccd9c36 100644
--- a/tests/validation/CL/DepthwiseConvolution.cpp
+++ b/tests/validation/CL/DepthwiseConvolution.cpp
@@ -42,7 +42,8 @@
 {
 namespace
 {
-constexpr RelativeTolerance<float> tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
+constexpr RelativeTolerance<float>   tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
+constexpr RelativeTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */
 } // namespace
 
 TEST_SUITE(CL)
@@ -52,11 +53,13 @@
 using CLDepthwiseConvolutionFixture = DepthwiseConvolutionValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolution, T>;
 
 TEST_SUITE(Generic)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionFixture<float>, framework::DatasetMode::PRECOMMIT, datasets::SmallDepthwiseConvolutionDataset())
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionDataset(), framework::dataset::make("DataType",
+                                                                                                            DataType::F32)))
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionFixture<float>, framework::DatasetMode::NIGHTLY, datasets::LargeDepthwiseConvolutionDataset())
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionDataset(), framework::dataset::make("DataType",
+                                                                                                                DataType::F32)))
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
@@ -65,16 +68,44 @@
 template <typename T>
 using CLDepthwiseConvolutionFixture3x3 = DepthwiseConvolutionValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolution3x3, T>;
 
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
 TEST_SUITE(W3x3)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionFixture3x3<float>, framework::DatasetMode::PRECOMMIT, datasets::SmallDepthwiseConvolutionDataset3x3())
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionFixture3x3<float>, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionDataset3x3(), framework::dataset::make("DataType",
+                                                                                                               DataType::F32)))
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionFixture3x3<float>, framework::DatasetMode::NIGHTLY, datasets::LargeDepthwiseConvolutionDataset3x3())
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionDataset3x3(), framework::dataset::make("DataType",
+                                                                                                                   DataType::F32)))
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
 TEST_SUITE_END()
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+template <typename T>
+using CLDepthwiseConvolutionQuantizedFixture3x3 = DepthwiseConvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLDepthwiseConvolution3x3, T>;
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+TEST_SUITE(W3x3)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionQuantizedFixture3x3<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallDepthwiseConvolutionDataset3x3(),
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127) })))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionDataset3x3(),
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127) })))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+TEST_SUITE_END()
 
 TEST_SUITE_END()
 TEST_SUITE_END()
diff --git a/tests/validation/CPP/ConvolutionLayer.cpp b/tests/validation/CPP/ConvolutionLayer.cpp
index 95852b0..a767912 100644
--- a/tests/validation/CPP/ConvolutionLayer.cpp
+++ b/tests/validation/CPP/ConvolutionLayer.cpp
@@ -55,8 +55,8 @@
 {
     const T *in_ptr  = in.data() + i_offset;
     const T *w_ptr   = weights.data() + w_offset;
-    const T *b_ptr   = bias.data() + b_offset;
-    T       *out_ptr = out.data() + o_offset;
+    const TB *b_ptr   = bias.data() + b_offset;
+    T        *out_ptr = out.data() + o_offset;
 
     const int half_width_weights  = width_weights / 2;
     const int half_height_weights = height_weights / 2;
diff --git a/tests/validation/CPP/DepthwiseConvolution.cpp b/tests/validation/CPP/DepthwiseConvolution.cpp
index e29d014..ad06538 100644
--- a/tests/validation/CPP/DepthwiseConvolution.cpp
+++ b/tests/validation/CPP/DepthwiseConvolution.cpp
@@ -26,8 +26,13 @@
 #include "ConvolutionLayer.h"
 #include "Utils.h"
 
+#include "tests/validation/CPP/Utils.h"
+#include "tests/validation/CPP/UtilsQuantizedAsymm.h"
+#include "tests/validation/FixedPoint.h"
 #include "tests/validation/Helpers.h"
 
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+
 namespace arm_compute
 {
 namespace test
@@ -44,8 +49,8 @@
  * - Padding, stride and output shape "match"
  *
  */
-template <typename T>
-SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<T> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info)
+template <typename T, typename TB>
+SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info)
 {
     // Create reference
     SimpleTensor<T> dst{ dst_shape, src.data_type(), 1, src.fixed_point_position() };
@@ -97,7 +102,7 @@
                     }
                     coords.set(0, x);
                     coords.set(1, y);
-                    dst[out_pos++] = saturate_cast<T>(val + *static_cast<const T *>(biases(Coordinates(z))));
+                    dst[out_pos++] = saturate_cast<T>(val + *static_cast<const TB *>(biases(Coordinates(z))));
                 }
             }
         }
@@ -106,6 +111,78 @@
     return dst;
 }
 
+template <>
+SimpleTensor<uint8_t> depthwise_convolution(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &biases, const TensorShape &dst_shape,
+                                            const PadStrideInfo &conv_info)
+{
+    // Create reference
+    SimpleTensor<uint8_t> dst{ dst_shape, src.data_type(), 1, src.fixed_point_position(), src.quantization_info() };
+
+    const int   input_offset   = -src.quantization_info().offset;
+    const float input_scale    = src.quantization_info().scale;
+    const int   weights_offset = -weights.quantization_info().offset;
+    const float weights_scale  = weights.quantization_info().scale;
+    const int   output_offset  = dst.quantization_info().offset;
+    const float output_scale   = dst.quantization_info().scale;
+
+    int         output_multiplier;
+    int         output_shift;
+    const float multiplier = input_scale * weights_scale / output_scale;
+    arm_compute::quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
+
+    // Compute reference
+    const int filter_width  = weights.shape().x();
+    const int filter_height = weights.shape().y();
+    const int filter_plane  = filter_width * filter_height;
+    const int input_width   = src.shape().x();
+    const int input_height  = src.shape().y();
+    const int input_depth   = src.shape().z();
+
+    const int filter_half_size = filter_width / 2;
+    const int pad_x            = std::min(filter_half_size, static_cast<int>(conv_info.pad().first));
+    const int pad_y            = std::min(filter_half_size, static_cast<int>(conv_info.pad().second));
+    const int minimum_x        = -pad_x + filter_half_size;
+    const int minimum_y        = -pad_y + filter_half_size;
+
+    int out_pos = 0;
+    for(int z = 0; z < input_depth; ++z)
+    {
+        int32_t bias_val = *static_cast<const int32_t *>(biases(Coordinates(z)));
+        for(int y = minimum_y; y < input_height + pad_y - filter_half_size; y += conv_info.stride().second)
+        {
+            for(int x = minimum_x; x < input_width + pad_x - filter_half_size; x += conv_info.stride().first)
+            {
+                Coordinates coords(x, y, z);
+                int         filter_offset = filter_plane * z;
+
+                uint32_t val = 0;
+                for(int j = y - filter_half_size; j <= (y + filter_half_size); ++j)
+                {
+                    for(int i = x - filter_half_size; i <= (x + filter_half_size); ++i)
+                    {
+                        coords.set(0, i);
+                        coords.set(1, j);
+                        auto    in_val = tensor_elem_at<uint8_t>(src, coords, BorderMode::CONSTANT, 0);
+                        uint8_t w_val  = *(weights.data() + filter_offset);
+                        val += (in_val + input_offset) * (w_val + weights_offset);
+                        ++filter_offset;
+                    }
+                }
+                val += bias_val;
+                val = asymm_rounding_divide_by_pow2(asymm_int_mult(val, output_multiplier), output_shift);
+                val += output_offset;
+                val = std::max<int32_t>(val, 0);
+                val = std::min<int32_t>(val, 255);
+
+                // Store the result
+                dst[out_pos++] = val;
+            }
+        }
+    }
+
+    return dst;
+}
+
 template SimpleTensor<float> depthwise_convolution(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &biases, const TensorShape &dst_shape,
                                                    const PadStrideInfo &conv_info);
 } // namespace reference
diff --git a/tests/validation/CPP/DepthwiseConvolution.h b/tests/validation/CPP/DepthwiseConvolution.h
index e8c55b1..df743a5 100644
--- a/tests/validation/CPP/DepthwiseConvolution.h
+++ b/tests/validation/CPP/DepthwiseConvolution.h
@@ -35,8 +35,8 @@
 {
 namespace reference
 {
-template <typename T>
-SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<T> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info);
+template <typename T, typename TB>
+SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/NEON/DepthwiseConvolution.cpp b/tests/validation/NEON/DepthwiseConvolution.cpp
index 6e8aa46..b6719b5 100644
--- a/tests/validation/NEON/DepthwiseConvolution.cpp
+++ b/tests/validation/NEON/DepthwiseConvolution.cpp
@@ -87,18 +87,22 @@
 template <typename T>
 using NEDepthwiseConvolutionFixture3x3 = DepthwiseConvolutionValidationFixture<Tensor, Accessor, NEDepthwiseConvolution3x3, T>;
 
+TEST_SUITE(Float)
 TEST_SUITE(F32)
 TEST_SUITE(W3x3)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionFixture3x3<float>, framework::DatasetMode::PRECOMMIT, datasets::SmallDepthwiseConvolutionDataset3x3())
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionFixture3x3<float>, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionDataset3x3(), framework::dataset::make("DataType",
+                                                                                                               DataType::F32)))
 {
     validate(Accessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionFixture3x3<float>, framework::DatasetMode::NIGHTLY, datasets::LargeDepthwiseConvolutionDataset3x3())
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionDataset3x3(), framework::dataset::make("DataType",
+                                                                                                                   DataType::F32)))
 {
     validate(Accessor(_target), _reference, tolerance_f32);
 }
 TEST_SUITE_END()
 TEST_SUITE_END()
+TEST_SUITE_END()
 
 TEST_SUITE_END()
 TEST_SUITE_END()
diff --git a/tests/validation/fixtures/DepthwiseConvolutionFixture.h b/tests/validation/fixtures/DepthwiseConvolutionFixture.h
index f49e76c..b1d31d6 100644
--- a/tests/validation/fixtures/DepthwiseConvolutionFixture.h
+++ b/tests/validation/fixtures/DepthwiseConvolutionFixture.h
@@ -43,14 +43,22 @@
 namespace validation
 {
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class DepthwiseConvolutionValidationFixture : public framework::Fixture
+class DepthwiseConvolutionValidationGenericFixture : public framework::Fixture
 {
 public:
+    using TBias = typename std::conditional<std::is_same<typename std::decay<T>::type, uint8_t>::value, int32_t, T>::type;
+
+public:
     template <typename...>
-    void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape out_shape, PadStrideInfo pad_stride_info)
+    void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, QuantizationInfo quantization_info)
     {
-        _target    = compute_target(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info);
-        _reference = compute_reference(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info);
+        _quantization_info = quantization_info;
+        _data_type         = data_type;
+
+        const DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
+
+        _target    = compute_target(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, data_type, bias_data_type, quantization_info);
+        _reference = compute_reference(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, data_type, bias_data_type, quantization_info);
     }
 
 protected:
@@ -59,28 +67,46 @@
     {
         switch(tensor.data_type())
         {
+            case DataType::QASYMM8:
+            {
+                std::uniform_int_distribution<uint8_t> distribution(0, 10);
+                library->fill(tensor, distribution, i);
+                break;
+            }
             case DataType::F32:
             {
                 std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
                 library->fill(tensor, distribution, i);
                 break;
             }
+            case DataType::S32:
+            {
+                std::uniform_int_distribution<int32_t> distribution(-1000, 1000);
+                library->fill(tensor, distribution, i);
+                break;
+            }
             default:
                 library->fill_tensor_uniform(tensor, i);
         }
     }
 
-    TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &biases_shape, const TensorShape &output_shape, PadStrideInfo &pad_stride_info)
+    TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &biases_shape, const TensorShape &output_shape, PadStrideInfo &pad_stride_info,
+                              const DataType data_type, const DataType bias_data_type, const QuantizationInfo quantization_info)
     {
         // Create tensors
-        TensorType src     = create_tensor<TensorType>(input_shape, DataType::F32);
-        TensorType weights = create_tensor<TensorType>(weights_shape, DataType::F32);
-        TensorType biases  = create_tensor<TensorType>(biases_shape, DataType::F32);
-        TensorType dst     = create_tensor<TensorType>(output_shape, DataType::F32);
+        TensorType src     = create_tensor<TensorType>(input_shape, data_type, 1, 0, quantization_info);
+        TensorType weights = create_tensor<TensorType>(weights_shape, data_type, 1, 0, quantization_info);
+        TensorType biases  = create_tensor<TensorType>(biases_shape, bias_data_type, 1, 0, quantization_info);
+        TensorType dst     = create_tensor<TensorType>(output_shape, data_type, 1, 0, quantization_info);
 
         // Create Depthwise Convolution configure function
-        FunctionType depthwise_convolution;
-        depthwise_convolution.configure(&src, &weights, &biases, &dst, pad_stride_info);
+        FunctionType dwc;
+        dwc.configure(&src, &weights, &biases, &dst, pad_stride_info);
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(biases.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
 
         // Allocate tensors
         src.allocator()->allocate();
@@ -99,16 +125,17 @@
         fill(AccessorType(biases), 2);
 
         // Compute function
-        depthwise_convolution.run();
+        dwc.run();
 
         return dst;
     }
 
-    SimpleTensor<T> compute_reference(const TensorShape &in_shape, const TensorShape &weights_shape, const TensorShape &biases_shape, const TensorShape &out_shape, const PadStrideInfo &pad_stride_info)
+    SimpleTensor<T> compute_reference(const TensorShape &in_shape, const TensorShape &weights_shape, const TensorShape &biases_shape, const TensorShape &out_shape, const PadStrideInfo &pad_stride_info,
+                                      const DataType data_type, const DataType bias_data_type, QuantizationInfo quantization_info)
     {
-        SimpleTensor<T> src(in_shape, DataType::F32);
-        SimpleTensor<T> weights(weights_shape, DataType::F32);
-        SimpleTensor<T> biases(biases_shape, DataType::F32);
+        SimpleTensor<T>     src{ in_shape, data_type, 1, 0, quantization_info };
+        SimpleTensor<T>     weights{ weights_shape, data_type, 1, 0, quantization_info };
+        SimpleTensor<TBias> biases{ biases_shape, data_type, 1, 0, quantization_info };
 
         fill(src, 0);
         fill(weights, 1);
@@ -117,8 +144,34 @@
         return reference::depthwise_convolution(src, weights, biases, out_shape, pad_stride_info);
     }
 
-    TensorType      _target{};
-    SimpleTensor<T> _reference{};
+    TensorType       _target{};
+    SimpleTensor<T>  _reference{};
+    DataType         _data_type{};
+    QuantizationInfo _quantization_info{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DepthwiseConvolutionValidationFixture : public DepthwiseConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type)
+    {
+        DepthwiseConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info,
+                                                                                                       data_type, QuantizationInfo());
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DepthwiseConvolutionValidationQuantizedFixture : public DepthwiseConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, QuantizationInfo quantization_info)
+    {
+        DepthwiseConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info,
+                                                                                                       data_type, quantization_info);
+    }
 };
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/fixtures/DirectConvolutionLayerFixture.h b/tests/validation/fixtures/DirectConvolutionLayerFixture.h
index 279a489..1ec4d31 100644
--- a/tests/validation/fixtures/DirectConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DirectConvolutionLayerFixture.h
@@ -84,6 +84,12 @@
                 library->fill(tensor, distribution, i);
                 break;
             }
+            case DataType::S32:
+            {
+                std::uniform_int_distribution<int32_t> distribution(-1000, 1000);
+                library->fill(tensor, distribution, i);
+                break;
+            }
             default:
                 library->fill_tensor_uniform(tensor, i);
         }