COMPMID-3580 Add S32 support to NEArithmeticSubtraction

* Fix convert policy validate logics and add missing validate test
* Add S32 support to NEArithmeticSubtraction and NEArithmeticSubtractionKernel
* Add S32 validation tests

Change-Id: I1b6cb15b024613c202fe9f17747a83da43a5ddcf
Signed-off-by: SiCong Li <sicong.li@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3908
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
diff --git a/tests/validation/NEON/ArithmeticSubtraction.cpp b/tests/validation/NEON/ArithmeticSubtraction.cpp
index f468f6d..12fe64c 100644
--- a/tests/validation/NEON/ArithmeticSubtraction.cpp
+++ b/tests/validation/NEON/ArithmeticSubtraction.cpp
@@ -70,6 +70,10 @@
 const auto ArithmeticSubtractionS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }),
                                                              framework::dataset::make("DataType", DataType::S16)),
                                                      framework::dataset::make("DataType", DataType::S16));
+
+const auto ArithmeticSubtractionS32Dataset = combine(combine(framework::dataset::make("DataType", DataType::S32),
+                                                             framework::dataset::make("DataType", DataType::S32)),
+                                                     framework::dataset::make("DataType", DataType::S32));
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 const auto ArithmeticSubtractionFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16),
                                                               framework::dataset::make("DataType", DataType::F16)),
@@ -120,12 +124,14 @@
                                                 TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                 TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
                                                 TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8),
+                                                TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8),
         })),
         framework::dataset::make("ConvertPolicy",{ ConvertPolicy::WRAP,
                                                 ConvertPolicy::SATURATE,
                                                 ConvertPolicy::SATURATE,
                                                 ConvertPolicy::WRAP,
                                                 ConvertPolicy::WRAP,
+                                                ConvertPolicy::WRAP,
         })),
         framework::dataset::make("Expected", { true, true, false, false, false, false})),
         input1_info, input2_info, output_info, policy, expected)
@@ -270,6 +276,24 @@
 }
 TEST_SUITE_END() // S16
 
+TEST_SUITE(S32)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<int32_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticSubtractionS32Dataset),
+                                                                                                                     framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                     OutOfPlaceDataSet))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticSubtractionFixture<int32_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ArithmeticSubtractionS32Dataset),
+                                                                                                                   framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                   OutOfPlaceDataSet))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END() // S32
+
 TEST_SUITE(Float)
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 TEST_SUITE(F16)