Optimize Quantized/Integer Bilinear Scale for Neon™

This patch introduces several performance optimizations regarding the Bilinear Scale operator with REPLICATE Border mode. Changes apply only to NHWC.

This patch
   - Reduces the memory footprint by disabling precomputation of indices and weights when they're not used
   - Rewrites the kernels for QASYMM8/QASYMM8_SIGNED/U8(Uint8)
   - Adds S8(Int8) Bilinear Scale for Border mode REPLICATE
   - Removes Bilinear Scale SVE kernels for Quantized and Integer types and adjust the heuristics to choose the Neon™ implementation
   - Adds new test cases where the input and output of the Bilinear Scale operator have different quantization scale and offset

Resolves: COMPMID-5453, COMPMID-5454

Change-Id: I3d251e76e0c6978fd5a0a1795ec62ab536bec93c
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8250
Reviewed-by: SiCong Li <sicong.li@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/tests/datasets/ScaleValidationDataset.h b/tests/datasets/ScaleValidationDataset.h
index 11e0343..c6987c0 100644
--- a/tests/datasets/ScaleValidationDataset.h
+++ b/tests/datasets/ScaleValidationDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,12 +24,8 @@
 #ifndef ARM_COMPUTE_TEST_SCALE_VALIDATION_DATASET
 #define ARM_COMPUTE_TEST_SCALE_VALIDATION_DATASET
 
-#include "utils/TypePrinter.h"
-
-#include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
 #include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/InterpolationPolicyDataset.h"
 #include "tests/datasets/SamplingPolicyDataset.h"
 #include "tests/datasets/ShapeDatasets.h"
 
@@ -149,7 +145,7 @@
     concat(concat(concat(ScaleShapesBaseDataSet<1, 1, (element_per_iteration), 0>(),  \
                          ScaleShapesBaseDataSet<1, 1, (element_per_iteration), 2>()), \
                   ScaleShapesBaseDataSet<3, 1, (element_per_iteration), 1>()),        \
-           ScaleShapesBaseDataSet<3, 3, (element_per_iteration), 0>())
+           ScaleShapesBaseDataSet<40, 3, (element_per_iteration), 0>())
 
 // To prevent long precommit time for OpenCL, shape set for OpenCL is separated into below two parts.
 /** Generated shapes for precommits to achieve essential coverage. Used by CL precommit and nightly
@@ -170,13 +166,19 @@
                   ScaleShapesBaseDataSet<3, 1, (element_per_iteration), 0>()),        \
            ScaleShapesBaseDataSet<3, 3, (element_per_iteration), 0>())
 
-/** Generating dataset for non-quantized data tyeps with the given shapes */
+/** Generating dataset for non-quantized data types with the given shapes */
 #define ASSEMBLE_DATASET(shape, samping_policy_set)             \
     combine(combine(combine(combine((shape), ScaleDataLayouts), \
                             ScaleInterpolationPolicySet),       \
                     datasets::BorderModes()),                   \
             samping_policy_set)
 
+#define ASSEMBLE_S8_DATASET(shape, samping_policy_set)                                                           \
+    combine(combine(combine(combine((shape), framework::dataset::make("DataLayout", DataLayout::NHWC)),          \
+                            framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::BILINEAR })), \
+                    framework::dataset::make("BorderMode", { BorderMode::REPLICATE })),                          \
+            samping_policy_set)
+
 #define ASSEMBLE_NHWC_DATASET(shape, samping_policy_set)                                                      \
     combine(combine(combine(combine((shape), framework::dataset::make("DataLayout", DataLayout::NHWC)),       \
                             ScaleInterpolationPolicySet),                                                     \
@@ -192,6 +194,16 @@
                     datasets::BorderModes()),                                         \
             sampling_policy_set)
 
+/** Generating dataset for quantized data tyeps with the given shapes */
+#define ASSEMBLE_DIFFERENTLY_QUANTIZED_DATASET(shape, sampling_policy_set, input_quant_info_set, output_quant_info_set) \
+    combine(combine(combine(combine(combine(combine(shape,                                                              \
+                                                    input_quant_info_set),                                              \
+                                            output_quant_info_set),                                                     \
+                                    framework::dataset::make("DataLayout", { DataLayout::NHWC })),                      \
+                            framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::BILINEAR })),        \
+                    framework::dataset::make("BorderMode", { BorderMode::REPLICATE })),                                 \
+            sampling_policy_set)
+
 } // namespace datasets
 } // namespace test
 } // namespace arm_compute