[COMPMID-1353] Add support for 4D Softmax layer on OpenCL

Change-Id: I4342d4240fe5b1aab234c015684a1216c3990a5f
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/145631
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
diff --git a/tests/datasets/ShapeDatasets.h b/tests/datasets/ShapeDatasets.h
index 4d75a16..c7955bc 100644
--- a/tests/datasets/ShapeDatasets.h
+++ b/tests/datasets/ShapeDatasets.h
@@ -794,6 +794,24 @@
         TensorShape{ 1000U, 10U },
                      TensorShape{ 3989U, 10U },
                      TensorShape{ 7339U, 11U },
+
+    })
+    {
+    }
+};
+
+/** Data set containing large and small softmax layer 4D shapes. */
+class SoftmaxLayer4DShapes final : public ShapeDataset
+{
+public:
+    SoftmaxLayer4DShapes()
+        : ShapeDataset("Shape",
+    {
+        TensorShape{ 9U, 9U, 9U, 9U },
+                     TensorShape{ 256U, 10U, 1U, 9U },
+                     TensorShape{ 353U, 8U, 2U },
+                     TensorShape{ 781U, 5U, 2U, 2U },
+                     TensorShape{ 781U, 11U, 1U, 2U },
     })
     {
     }
diff --git a/tests/validation/CL/SoftmaxLayer.cpp b/tests/validation/CL/SoftmaxLayer.cpp
index 66ca0b8..7dab626 100644
--- a/tests/validation/CL/SoftmaxLayer.cpp
+++ b/tests/validation/CL/SoftmaxLayer.cpp
@@ -82,16 +82,20 @@
     validate(src.info()->valid_region(), valid_region);
     validate(dst.info()->valid_region(), valid_region);
 
-    // Get reduction kernel info
-    CLLogits1DMaxShiftExpSumKernel::ParallelReductionInfo reduction_info = CLLogits1DMaxShiftExpSumKernel::is_parallel_reduction(shape.x());
+    // CLLogits1DMaxShiftExpSumKernel configures the paddings only in the 2D case
+    if(shape.num_dimensions() <= 2)
+    {
+        // Get reduction kernel info
+        CLLogits1DMaxShiftExpSumKernel::ParallelReductionInfo reduction_info = CLLogits1DMaxShiftExpSumKernel::is_parallel_reduction(shape.x());
 
-    // Validate src padding
-    const PaddingSize padding_src = PaddingCalculator(shape.x(), std::get<1>(reduction_info)).required_padding();
-    validate(src.info()->padding(), padding_src);
+        // Validate src padding for 2D softmax
+        const PaddingSize padding_src = PaddingCalculator(shape.x(), std::get<1>(reduction_info)).required_padding();
+        validate(src.info()->padding(), padding_src);
 
-    // Validate dst padding
-    const PaddingSize padding_dst = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(dst.info()->padding(), padding_dst);
+        // Validate dst padding for 2D softmax
+        const PaddingSize padding_dst = PaddingCalculator(shape.x(), 16).required_padding();
+        validate(dst.info()->padding(), padding_dst);
+    }
 }
 
 // *INDENT-OFF*
@@ -144,6 +148,13 @@
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16);
 }
+FIXTURE_DATA_TEST_CASE(Run4D, CLSoftmaxLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayer4DShapes(),
+                                                                                                            framework::dataset::make("DataType", DataType::F16)),
+                                                                                                    framework::dataset::make("Beta", { 1.0f, 2.0f })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f16);
+}
 TEST_SUITE_END()
 
 TEST_SUITE(FP32)
@@ -161,6 +172,13 @@
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
+FIXTURE_DATA_TEST_CASE(Run4D, CLSoftmaxLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayer4DShapes(),
+                                                                                                             framework::dataset::make("DataType", DataType::F32)),
+                                                                                                     framework::dataset::make("Beta", { 1.0f, 2.0f })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
 TEST_SUITE_END()
 TEST_SUITE_END()
 
@@ -185,6 +203,15 @@
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
+FIXTURE_DATA_TEST_CASE(Run4D, CLSoftmaxLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayer4DShapes(),
+                                                                                                                        framework::dataset::make("DataType", DataType::QASYMM8)),
+                                                                                                                combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
+                                                                                                                        framework::dataset::make("Beta", { 1.0f, 2.0f }))))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+
 TEST_SUITE_END()
 TEST_SUITE_END()
 
diff --git a/tests/validation/reference/SoftmaxLayer.cpp b/tests/validation/reference/SoftmaxLayer.cpp
index aa640ad..7f2c36e 100644
--- a/tests/validation/reference/SoftmaxLayer.cpp
+++ b/tests/validation/reference/SoftmaxLayer.cpp
@@ -39,21 +39,25 @@
     // Create reference
     SimpleTensor<T> dst{ src.shape(), src.data_type(), 1 };
 
-    // Compute reference
-    const int cols       = src.shape()[0];
-    const int upper_dims = src.num_elements() / cols;
+    const bool is_4D_input = (src.shape().num_dimensions() > 2);
+
+    // Compute reference. Lower dims are
+    // - the number of columns for the 2D case
+    // - the collapsing of the first three dimensions (i.e., the flattened dimension of each batch) in the 4D case
+    const int lower_dims = (is_4D_input ? src.shape()[2] * src.shape()[1] * src.shape()[0] : src.shape()[0]);
+    const int upper_dims = src.num_elements() / lower_dims;
 
     for(int r = 0; r < upper_dims; ++r)
     {
-        const T *src_row_ptr = src.data() + r * cols;
-        T       *dst_row_ptr = dst.data() + r * cols;
+        const T *src_row_ptr = src.data() + r * lower_dims;
+        T       *dst_row_ptr = dst.data() + r * lower_dims;
 
         // Find max
-        const T max = *std::max_element(src_row_ptr, src_row_ptr + cols);
+        const T max = *std::max_element(src_row_ptr, src_row_ptr + lower_dims);
 
         // Regularize
         T sum(0.f);
-        std::transform(src_row_ptr, src_row_ptr + cols, dst_row_ptr, [&sum, max, beta](T val)
+        std::transform(src_row_ptr, src_row_ptr + lower_dims, dst_row_ptr, [&sum, max, beta](T val)
         {
             const T res(std::exp((val - max) * beta));
             sum += res;
@@ -61,7 +65,7 @@
         });
 
         // Normalize
-        std::transform(dst_row_ptr, dst_row_ptr + cols, dst_row_ptr, [sum](T val)
+        std::transform(dst_row_ptr, dst_row_ptr + lower_dims, dst_row_ptr, [sum](T val)
         {
             return val / sum;
         });