Mixed data-layout testing on high priority operators

Change data layouts after the configure in validation tests for:
  - Scale
  - Pooling
  - FullyConnected
  - DepthwiseConvolution
  - DirectConvolution
  - FFTConvolution
  - WinogradConvolution
  - GEMMConvolution (Indirect GEMM included)
Extending fixtures

Fixes for new mixed data layout tests

Resolves: COMPMID-4162
Change-Id: I2f2eb2075f7e24ab3872249d88cadb57b82c5dde
Signed-off-by: Manuel Bottini <manuel.bottini@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5326
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
diff --git a/src/core/cpu/kernels/pooling/neon/fp16.cpp b/src/core/cpu/kernels/pooling/neon/fp16.cpp
index 314be37..1ecceaf 100644
--- a/src/core/cpu/kernels/pooling/neon/fp16.cpp
+++ b/src/core/cpu/kernels/pooling/neon/fp16.cpp
@@ -93,7 +93,7 @@
             // Store result
             vst1q_f16(reinterpret_cast<float16_t *>(out.ptr()) + x_off, vres);
 
-            const uint32_t   offset_base    = offset_no_padding<float16_t>(in.offset(), id, *src->info(), pool_stride_x, pool_stride_y);
+            const uint32_t   offset_base    = offset_no_padding<float16_t>(in.offset(), id, *src->info(), pool_stride_x, pool_stride_y, DataLayout::NHWC);
             const uint32_t   offset_x0      = (uint32_t)offset_base / sizeof(float16_t) + x_off;
             const uint32_t   offset_x1      = (uint32_t)offset_x0 + in_stride_y / sizeof(float16_t) - pad_right;
             const uint32_t   offset_x2      = (uint32_t)offset_x0 + in_stride_z / sizeof(float16_t) - pad_right * src->info()->tensor_shape()[1];
@@ -132,7 +132,7 @@
             // Store result
             *(reinterpret_cast<float16_t *>(out.ptr()) + x_off) = res;
 
-            const uint32_t offset_base = offset_no_padding<float16_t>(in.offset(), id, *src->info(), pool_stride_x, pool_stride_y);
+            const uint32_t offset_base = offset_no_padding<float16_t>(in.offset(), id, *src->info(), pool_stride_x, pool_stride_y, DataLayout::NHWC);
             const uint32_t offset_x0   = (uint32_t)offset_base / sizeof(float16_t) + x_off;
             const uint32_t offset_x1   = (uint32_t)offset_x0 + in_stride_y / sizeof(float16_t) - pad_right;
             const uint32_t offset_x2   = (uint32_t)offset_x0 + in_stride_z / sizeof(float16_t) - pad_right * src->info()->tensor_shape()[1];