COMPMID-2418: CLDequantizationLayer support for QASYMM8_PER_CHANNEL

Add support for QASYMM8_PER_CHANNEL in CLDequantiazationLayer.
Added tests for NHWC and also updated NEON code to work with NHWC
data layout.
Cleaned up the reference implementation.

Change-Id: Ic1d51f16f7f625503fffdbbb66f6487aa588f08c
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1828
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
diff --git a/tests/validation/CL/DequantizationLayer.cpp b/tests/validation/CL/DequantizationLayer.cpp
index 2ef8c60..acc0022 100644
--- a/tests/validation/CL/DequantizationLayer.cpp
+++ b/tests/validation/CL/DequantizationLayer.cpp
@@ -41,6 +41,33 @@
 {
 namespace validation
 {
+namespace
+{
+const auto dataset_quant_f32 = combine(combine(combine(datasets::SmallShapes(), datasets::QuantizedTypes()),
+                                               framework::dataset::make("DataType", DataType::F32)),
+                                       framework::dataset::make("DataLayout", { DataLayout::NCHW }));
+const auto dataset_quant_f16 = combine(combine(combine(datasets::SmallShapes(), datasets::QuantizedTypes()),
+                                               framework::dataset::make("DataType", DataType::F16)),
+                                       framework::dataset::make("DataLayout", { DataLayout::NCHW }));
+const auto dataset_quant_per_channel_f32 = combine(combine(combine(datasets::SmallShapes(), datasets::QuantizedPerChannelTypes()),
+                                                           framework::dataset::make("DataType", DataType::F32)),
+                                                   framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }));
+const auto dataset_quant_per_channel_f16 = combine(combine(combine(datasets::SmallShapes(), datasets::QuantizedPerChannelTypes()),
+                                                           framework::dataset::make("DataType", DataType::F16)),
+                                                   framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }));
+const auto dataset_quant_nightly_f32 = combine(combine(combine(datasets::LargeShapes(), datasets::QuantizedTypes()),
+                                                       framework::dataset::make("DataType", DataType::F32)),
+                                               framework::dataset::make("DataLayout", { DataLayout::NCHW }));
+const auto dataset_quant_nightly_f16 = combine(combine(combine(datasets::LargeShapes(), datasets::QuantizedTypes()),
+                                                       framework::dataset::make("DataType", DataType::F16)),
+                                               framework::dataset::make("DataLayout", { DataLayout::NCHW }));
+const auto dataset_quant_per_channel_nightly_f32 = combine(combine(combine(datasets::LargeShapes(), datasets::QuantizedPerChannelTypes()),
+                                                                   framework::dataset::make("DataType", DataType::F32)),
+                                                           framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }));
+const auto dataset_quant_per_channel_nightly_f16 = combine(combine(combine(datasets::LargeShapes(), datasets::QuantizedPerChannelTypes()),
+                                                                   framework::dataset::make("DataType", DataType::F16)),
+                                                           framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }));
+} // namespace
 TEST_SUITE(CL)
 TEST_SUITE(DequantizationLayer)
 
@@ -97,14 +124,12 @@
 using CLDequantizationLayerFixture = DequantizationValidationFixture<CLTensor, CLAccessor, CLDequantizationLayer, T>;
 
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDequantizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), datasets::QuantizedTypes()),
-                                                                                                                framework::dataset::make("DataType", DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDequantizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, concat(dataset_quant_f16, dataset_quant_per_channel_f16))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDequantizationLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), datasets::QuantizedTypes()),
-                                                                                                              framework::dataset::make("DataType", DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDequantizationLayerFixture<half>, framework::DatasetMode::NIGHTLY, concat(dataset_quant_nightly_f16, dataset_quant_per_channel_nightly_f16))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -112,14 +137,12 @@
 TEST_SUITE_END() // FP16
 
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDequantizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), datasets::QuantizedTypes()),
-                                                                                                                 framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDequantizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, concat(dataset_quant_f32, dataset_quant_per_channel_f32))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDequantizationLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), datasets::QuantizedTypes()),
-                                                                                                               framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDequantizationLayerFixture<float>, framework::DatasetMode::NIGHTLY, concat(dataset_quant_nightly_f32, dataset_quant_per_channel_nightly_f32))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);