Optimize FP32/16 Bilinear Scale Kernel for Neon™

This patch removes index and weight pre-computations where it's not used and reduces some calculations inside the inner-most loop of Scale.

Resolves: COMPMID-5452
Change-Id: Ie149b1b76a90a8cb659ada0f97aef78caf69932f
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8220
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/cpu/kernels/CpuScaleKernel.cpp b/src/cpu/kernels/CpuScaleKernel.cpp
index c9e858f..e7386a3 100644
--- a/src/cpu/kernels/CpuScaleKernel.cpp
+++ b/src/cpu/kernels/CpuScaleKernel.cpp
@@ -140,12 +140,12 @@
     ARM_COMPUTE_RETURN_ERROR_ON(output_width == 0);
     ARM_COMPUTE_RETURN_ERROR_ON(output_height == 0);
 
-    if(info.interpolation_policy == InterpolationPolicy::NEAREST_NEIGHBOR)
+    if(info.interpolation_policy == InterpolationPolicy::NEAREST_NEIGHBOR && offsets != nullptr)
     {
         ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(offsets, 1, DataType::S32);
     }
 
-    if(info.interpolation_policy == InterpolationPolicy::BILINEAR)
+    if(info.interpolation_policy == InterpolationPolicy::BILINEAR && offsets != nullptr)
     {
         ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(offsets, 1, DataType::S32);
         if(dx != nullptr && dy != nullptr)