COMPMID-2966 Add support for QASYMM8_SIGNED in NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel

Signed-off-by: Luca Foschiani <luca.foschiani@arm.com>
Change-Id: Ia8692f8fda16fa3b73f343e4b5b1b55e14403225
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2750
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/tests/validation/CL/GEMMLowp.cpp b/tests/validation/CL/GEMMLowp.cpp
index 8aa81d0..41a441c 100644
--- a/tests/validation/CL/GEMMLowp.cpp
+++ b/tests/validation/CL/GEMMLowp.cpp
@@ -147,6 +147,65 @@
 
 TEST_SUITE(OutputStage)
 
+TEST_SUITE(QuantizeDownInt32Scale)
+
+TEST_SUITE(QASYMM8)
+
+const auto quantize_down_int32_to_uint8_scale_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) * framework::dataset::make("result_shift", 2,
+                                                      3)
+                                                      * framework::dataset::make("min", 0) * framework::dataset::make("max", 255) * framework::dataset::make("addBias", { false, true });
+
+const auto quantize_down_int32_to_uint8_scale_relu_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1,
+                                                           2)
+                                                           * framework::dataset::make("result_shift", 2, 3) * framework::dataset::make("min", 0, 2) * framework::dataset::make("max", 171, 173) * framework::dataset::make("addBias", { false, true });
+
+using CLGEMMLowpQuantizeDownInt32ScaleFixture = GEMMLowpQuantizeDownInt32ToUint8ScaleValidationFixture<CLTensor, CLAccessor, CLGEMMLowpOutputStage>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_cases))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+TEST_SUITE(BoundedReLu)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_relu_cases))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+TEST_SUITE_END() // BoundedReLu
+TEST_SUITE_END() // QASYMM8
+
+TEST_SUITE(QASYMM8_SIGNED)
+
+const auto quantize_down_int32_to_int8_scale_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) * framework::dataset::make("result_shift", 2,
+                                                     3)
+                                                     * framework::dataset::make("min", -128) * framework::dataset::make("max", 127) * framework::dataset::make("addBias", { false, true });
+
+const auto quantize_down_int32_to_int8_scale_relu_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1,
+                                                          2)
+                                                          * framework::dataset::make("result_shift", 2, 3) * framework::dataset::make("min", -100, -98) * framework::dataset::make("max", 71, 73) * framework::dataset::make("addBias", { false, true });
+
+using CLGEMMLowpQuantizeDownInt32ScaleFixture = GEMMLowpQuantizeDownInt32ToInt8ScaleValidationFixture<CLTensor, CLAccessor, CLGEMMLowpOutputStage>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_cases))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+TEST_SUITE(BoundedReLu)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_relu_cases))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+TEST_SUITE_END() // BoundedReLu
+TEST_SUITE_END() // QASYMM8_SIGNED
+TEST_SUITE_END() // QuantizeDownInt32Scale
+
 TEST_SUITE(QuantizeDownInt32ToUint8ScaleByFixedPoint)
 const auto quantize_down_int32_to_uint8_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
                                                                     2)
diff --git a/tests/validation/NEON/GEMMLowp.cpp b/tests/validation/NEON/GEMMLowp.cpp
index de30bd5..c3747dd 100644
--- a/tests/validation/NEON/GEMMLowp.cpp
+++ b/tests/validation/NEON/GEMMLowp.cpp
@@ -165,7 +165,9 @@
 
 TEST_SUITE(OutputStage)
 
-TEST_SUITE(QuantizeDownInt32ToUint8Scale)
+TEST_SUITE(QuantizeDownInt32Scale)
+
+TEST_SUITE(QASYMM8)
 
 const auto quantize_down_int32_to_uint8_scale_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) * framework::dataset::make("result_shift", 2,
                                                       3)
@@ -175,7 +177,7 @@
                                                            2)
                                                            * framework::dataset::make("result_shift", 2, 3) * framework::dataset::make("min", 0, 2) * framework::dataset::make("max", 171, 174) * framework::dataset::make("addBias", { false, true });
 
-using NEGEMMLowpQuantizeDownInt32ToUint8ScaleFixture = GEMMLowpQuantizeDownInt32ToUint8ScaleValidationFixture<Tensor, Accessor, NEGEMMLowpQuantizeDownInt32ToUint8Scale>;
+using NEGEMMLowpQuantizeDownInt32ScaleFixture = GEMMLowpQuantizeDownInt32ToUint8ScaleValidationFixture<Tensor, Accessor, NEGEMMLowpOutputStage>;
 
 // *INDENT-OFF*
 // clang-format off
@@ -198,85 +200,112 @@
     framework::dataset::make("Expected", { true, false })),
     a_info, b_info, output_info, min, max, expected)
 {
+
+    GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo();
+    output_stage.type        = GEMMLowpOutputStageType::QUANTIZE_DOWN;
+    output_stage.gemmlowp_min_bound        = min;
+    output_stage.gemmlowp_max_bound        = max;
+    output_stage.output_data_type = DataType::QASYMM8;
+
     // Lock tensors
-    Status status =  NEGEMMLowpQuantizeDownInt32ToUint8Scale::validate(&a_info.clone()->set_is_resizable(false),
+    Status status =  NEGEMMLowpOutputStage::validate(&a_info.clone()->set_is_resizable(false),
                                                                      &b_info.clone()->set_is_resizable(false),
                                                                      &output_info.clone()->set_is_resizable(false),
-                                                                     min,
-                                                                     max);
+                                                                     output_stage);
     ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
 }
 // clang-format on
 // *INDENT-ON*
 
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_cases),
-               shape, result_offset, result_mult_int, result_shift, min, max, add_bias)
-{
-    TensorShape shape_bias(shape[0]);
-
-    // Create tensors
-    Tensor in   = create_tensor<Tensor>(shape, DataType::S32);
-    Tensor bias = create_tensor<Tensor>(shape_bias, DataType::S32);
-    Tensor out  = create_tensor<Tensor>(shape, DataType::QASYMM8);
-
-    ARM_COMPUTE_EXPECT(in.info()->is_resizable(), framework::LogLevel::ERRORS);
-    ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
-    ARM_COMPUTE_EXPECT(out.info()->is_resizable(), framework::LogLevel::ERRORS);
-
-    // Create and configure function
-    NEGEMMLowpQuantizeDownInt32ToUint8Scale output_stage;
-    output_stage.configure(&in, add_bias ? &bias : nullptr, &out, result_offset, result_mult_int, result_shift, min, max);
-
-    // Validate valid region input and output
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(in.info()->valid_region(), valid_region);
-    validate(out.info()->valid_region(), valid_region);
-
-    // Validate valid region bias
-    if(add_bias)
-    {
-        const ValidRegion valid_region_bias = shape_to_valid_region(shape_bias);
-        validate(bias.info()->valid_region(), valid_region_bias);
-    }
-
-    // Validate padding
-    const PaddingSize padding(0);
-    validate(in.info()->padding(), padding);
-    validate(out.info()->padding(), padding);
-
-    if(add_bias)
-    {
-        validate(bias.info()->padding(), padding);
-    }
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ToUint8ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_cases))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpQuantizeDownInt32ToUint8ScaleFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), quantize_down_int32_to_uint8_scale_cases))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_cases))
 {
     // Validate output
     validate(Accessor(_target), _reference);
 }
 
 TEST_SUITE(BoundedReLu)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ToUint8ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_relu_cases))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_relu_cases))
 {
     // Validate output
     validate(Accessor(_target), _reference);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpQuantizeDownInt32ToUint8ScaleFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), quantize_down_int32_to_uint8_scale_relu_cases))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
 TEST_SUITE_END() // BoundedReLu
 
-TEST_SUITE_END() // QuantizeDownInt32ToUint8Scale
+TEST_SUITE_END() // QASYMM8
+
+TEST_SUITE(QASYMM8_SIGNED)
+
+const auto quantize_down_int32_to_int8_scale_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) * framework::dataset::make("result_shift", 2,
+                                                     3)
+                                                     * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
+
+const auto quantize_down_int32_to_int8_scale_relu_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1,
+                                                          2)
+                                                          * framework::dataset::make("result_shift", 2, 3) * framework::dataset::make("min", -100, -98) * framework::dataset::make("max", 71, 74) * framework::dataset::make("addBias", { false, true });
+
+using NEGEMMLowpQuantizeDownInt32ScaleFixture = GEMMLowpQuantizeDownInt32ToInt8ScaleValidationFixture<Tensor, Accessor, NEGEMMLowpOutputStage>;
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
+    framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Input not a multiple of 16
+                                             TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Invalid min and max
+                                             TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), // Wrong output data type
+                                          }),
+    framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(21U), 1, DataType::S32),
+                                            TensorInfo(TensorShape(21U), 1, DataType::S32),
+                                            TensorInfo(TensorShape(20U), 1, DataType::S32),
+                                          })),
+    framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8_SIGNED),
+                                            TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8_SIGNED),
+                                            TensorInfo(TensorShape(20U, 13U), 1, DataType::S32),
+                                           })),
+    framework::dataset::make("Min",{        -10,
+                                            -200,
+                                            -113,
+                                           })),
+    framework::dataset::make("Max",{        105,
+                                            300,
+                                            -18,
+                                           })),
+    framework::dataset::make("Expected", { true, false, false })),
+    a_info, b_info, output_info, min, max, expected)
+{
+    GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo();
+    output_stage.type        = GEMMLowpOutputStageType::QUANTIZE_DOWN;
+    output_stage.gemmlowp_min_bound        = min;
+    output_stage.gemmlowp_max_bound        = max;
+    output_stage.output_data_type = DataType::QASYMM8_SIGNED;
+
+    // Lock tensors
+    Status status =  NEGEMMLowpOutputStage::validate(&a_info.clone()->set_is_resizable(false),
+                                                                     &b_info.clone()->set_is_resizable(false),
+                                                                     &output_info.clone()->set_is_resizable(false),
+                                                                     output_stage);
+    ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_cases))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+TEST_SUITE(BoundedReLu)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_relu_cases))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+TEST_SUITE_END() // BoundedReLu
+
+TEST_SUITE_END() // QASYMM8_SIGNED
+
+TEST_SUITE_END() // QuantizeDownInt32Scale
 
 TEST_SUITE(QuantizeDownInt32ToUint8ScaleByFixedPoint)
 
diff --git a/tests/validation/fixtures/GEMMLowpFixture.h b/tests/validation/fixtures/GEMMLowpFixture.h
index be9ce96..e3dc738 100644
--- a/tests/validation/fixtures/GEMMLowpFixture.h
+++ b/tests/validation/fixtures/GEMMLowpFixture.h
@@ -301,8 +301,16 @@
         TensorType c = create_tensor<TensorType>(shape, DataType::QASYMM8, 1);
 
         // Create and configure function
-        FunctionType output_stage;
-        output_stage.configure(&a, add_bias ? &b : nullptr, &c, result_offset, result_mult_int, result_shift, min, max);
+        FunctionType            output_stage;
+        GEMMLowpOutputStageInfo output_stage_info = GEMMLowpOutputStageInfo();
+        output_stage_info.type                    = GEMMLowpOutputStageType::QUANTIZE_DOWN;
+        output_stage_info.gemmlowp_offset         = result_offset;
+        output_stage_info.gemmlowp_multiplier     = result_mult_int;
+        output_stage_info.gemmlowp_shift          = result_shift;
+        output_stage_info.gemmlowp_min_bound      = min;
+        output_stage_info.gemmlowp_max_bound      = max;
+        output_stage_info.output_data_type        = DataType::QASYMM8;
+        output_stage.configure(&a, add_bias ? &b : nullptr, &c, output_stage_info);
 
         ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS);
         ARM_COMPUTE_EXPECT(c.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -367,6 +375,108 @@
 };
 
 template <typename TensorType, typename AccessorType, typename FunctionType>
+class GEMMLowpQuantizeDownInt32ToInt8ScaleValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max, bool add_bias)
+    {
+        _target    = compute_target(shape, result_offset, result_mult_int, result_shift, min, max, add_bias);
+        _reference = compute_reference(shape, result_offset, result_mult_int, result_shift, min, max, add_bias);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        std::uniform_int_distribution<> distribution(-6000, 6000);
+        library->fill(tensor, distribution, i);
+    }
+
+    TensorType compute_target(const TensorShape &shape, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max, bool add_bias)
+    {
+        TensorShape shape_bias(shape[0]);
+
+        // Create tensors
+        TensorType a = create_tensor<TensorType>(shape, DataType::S32, 1);
+        TensorType b = create_tensor<TensorType>(shape_bias, DataType::S32, 1);
+        TensorType c = create_tensor<TensorType>(shape, DataType::QASYMM8_SIGNED, 1);
+
+        // Create and configure function
+        FunctionType            output_stage;
+        GEMMLowpOutputStageInfo output_stage_info = GEMMLowpOutputStageInfo();
+        output_stage_info.type                    = GEMMLowpOutputStageType::QUANTIZE_DOWN;
+        output_stage_info.gemmlowp_offset         = result_offset;
+        output_stage_info.gemmlowp_multiplier     = result_mult_int;
+        output_stage_info.gemmlowp_shift          = result_shift;
+        output_stage_info.gemmlowp_min_bound      = min;
+        output_stage_info.gemmlowp_max_bound      = max;
+        output_stage_info.output_data_type        = DataType::QASYMM8_SIGNED;
+        output_stage.configure(&a, add_bias ? &b : nullptr, &c, output_stage_info);
+
+        ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(c.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        a.allocator()->allocate();
+        c.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!a.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!c.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensor
+        fill(AccessorType(a), 0);
+
+        if(add_bias)
+        {
+            ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+            // Allocate bias tensor
+            b.allocator()->allocate();
+
+            ARM_COMPUTE_EXPECT(!b.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+            // Fill tensor
+            fill(AccessorType(b), 1);
+        }
+
+        // Compute GEMM function
+        output_stage.run();
+        return c;
+    }
+
+    SimpleTensor<int8_t> compute_reference(const TensorShape &shape, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max, bool add_bias)
+    {
+        // Create reference
+        TensorShape shape_bias(shape[0]);
+
+        SimpleTensor<int32_t> a{ shape, DataType::S32, 1 };
+        SimpleTensor<int32_t> b{ shape_bias, DataType::S32, 1 };
+
+        // Fill reference
+        fill(a, 0);
+
+        const std::vector<int32_t> result_mult_int_vec = { result_mult_int };
+        const std::vector<int32_t> result_shift_vec    = { result_shift };
+
+        if(add_bias)
+        {
+            // Fill bias
+            fill(b, 1);
+
+            return reference::gemmlowp_quantize_down_scale<int32_t, int8_t>(a, b, result_offset, result_mult_int_vec, result_shift_vec, min, max);
+        }
+        else
+        {
+            return reference::gemmlowp_quantize_down_scale<int32_t, int8_t>(a, result_offset, result_mult_int_vec, result_shift_vec, min, max);
+        }
+    }
+
+    TensorType           _target{};
+    SimpleTensor<int8_t> _reference{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType>
 class GEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointValidationFixture : public framework::Fixture
 {
 public: