COMPMID-1667: Add 4D tensors support to CLWidthConcatenateLayerKernel

Change-Id: Ibc0b1242804c2fdb183825406e3c78bd0d1d3564
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/154368
Reviewed-by: Pablo Tello <pablo.tello@arm.com>
Tested-by: bsgcomp <bsgcomp@arm.com>
diff --git a/tests/validation/CL/WidthConcatenateLayer.cpp b/tests/validation/CL/WidthConcatenateLayer.cpp
index 6af3c64..6ff1dfc 100644
--- a/tests/validation/CL/WidthConcatenateLayer.cpp
+++ b/tests/validation/CL/WidthConcatenateLayer.cpp
@@ -103,14 +103,16 @@
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLWidthConcatenateLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
-                                                                                                                  DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLWidthConcatenateLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()),
+                                                                                                                  framework::dataset::make("DataType",
+                                                                                                                          DataType::F16)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWidthConcatenateLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType",
-                                                                                                                DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLWidthConcatenateLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(concat(datasets::Large2DShapes(), datasets::Small4DShapes()),
+                                                                                                                framework::dataset::make("DataType",
+                                                                                                                        DataType::F16)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -118,8 +120,9 @@
 TEST_SUITE_END()
 
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLWidthConcatenateLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
-                                                                                                                   DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLWidthConcatenateLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()),
+                                                                                                                   framework::dataset::make("DataType",
+                                                                                                                           DataType::F32)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
diff --git a/tests/validation/reference/WidthConcatenateLayer.cpp b/tests/validation/reference/WidthConcatenateLayer.cpp
index 8662199..6be171b 100644
--- a/tests/validation/reference/WidthConcatenateLayer.cpp
+++ b/tests/validation/reference/WidthConcatenateLayer.cpp
@@ -59,20 +59,24 @@
     {
         ARM_COMPUTE_ERROR_ON(width_offset >= width_out);
 
-        const int width  = src.shape().x();
-        const int height = src.shape().y();
-        const int depth  = src.shape().z();
+        const int width      = src.shape().x();
+        const int height     = src.shape().y();
+        const int depth      = src.shape().z();
+        const int upper_dims = src.shape().total_size() / (width * height * depth);
 
         const T *src_ptr = src.data();
         T       *dst_ptr = dst.data();
 
-        for(int d = 0; d < depth; ++d)
+        for(int u = 0; u < upper_dims; ++u)
         {
-            for(int r = 0; r < height; ++r)
+            for(int d = 0; d < depth; ++d)
             {
-                int offset = d * height + r;
-                std::copy(src_ptr, src_ptr + width, dst_ptr + width_offset + offset * width_out);
-                src_ptr += width;
+                for(int r = 0; r < height; ++r)
+                {
+                    const int offset = u * height * depth + d * height + r;
+                    std::copy(src_ptr, src_ptr + width, dst_ptr + width_offset + offset * width_out);
+                    src_ptr += width;
+                }
             }
         }