Optimize CpuScale NHWC F32/F16

- Rework CpuScaleKernel F32/F16 NHWC - bilinear
- Rework CpuScaleKernel F32/F16 NHWC - nearest
- Add test to validate the vector computation path

Resolves COMPMID-4801, COMPMID-4802

Change-Id: Ie6e4f262a8cce509edd7b8f564c940758625c58a
Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6361
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com>
diff --git a/tests/datasets/ScaleValidationDataset.h b/tests/datasets/ScaleValidationDataset.h
index c0073f9..11e0343 100644
--- a/tests/datasets/ScaleValidationDataset.h
+++ b/tests/datasets/ScaleValidationDataset.h
@@ -147,7 +147,7 @@
  */
 #define SCALE_SHAPE_DATASET(element_per_iteration)                                    \
     concat(concat(concat(ScaleShapesBaseDataSet<1, 1, (element_per_iteration), 0>(),  \
-                        ScaleShapesBaseDataSet<1, 1, (element_per_iteration), 2>()),  \
+                         ScaleShapesBaseDataSet<1, 1, (element_per_iteration), 2>()), \
                   ScaleShapesBaseDataSet<3, 1, (element_per_iteration), 1>()),        \
            ScaleShapesBaseDataSet<3, 3, (element_per_iteration), 0>())
 
@@ -166,7 +166,7 @@
  */
 #define SCALE_NIGHTLY_SHAPE_DATASET(element_per_iteration)                            \
     concat(concat(concat(ScaleShapesBaseDataSet<1, 1, (element_per_iteration), 0>(),  \
-                        ScaleShapesBaseDataSet<1, 1, (element_per_iteration), 1>()),  \
+                         ScaleShapesBaseDataSet<1, 1, (element_per_iteration), 1>()), \
                   ScaleShapesBaseDataSet<3, 1, (element_per_iteration), 0>()),        \
            ScaleShapesBaseDataSet<3, 3, (element_per_iteration), 0>())
 
@@ -177,6 +177,12 @@
                     datasets::BorderModes()),                   \
             samping_policy_set)
 
+#define ASSEMBLE_NHWC_DATASET(shape, samping_policy_set)                                                      \
+    combine(combine(combine(combine((shape), framework::dataset::make("DataLayout", DataLayout::NHWC)),       \
+                            ScaleInterpolationPolicySet),                                                     \
+                    framework::dataset::make("BorderMode", { BorderMode::CONSTANT, BorderMode::REPLICATE })), \
+            samping_policy_set)
+
 /** Generating dataset for quantized data tyeps with the given shapes */
 #define ASSEMBLE_QUANTIZED_DATASET(shape, sampling_policy_set, quantization_info_set) \
     combine(combine(combine(combine(combine(shape,                                    \