COMPMID-661: Fix scale border issue (#38)

Change-Id: If1dcca724e5e5f5ab363ffc16b0ef8c943e0b657
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/110105
Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/src/core/CL/cl_kernels/scale.cl b/src/core/CL/cl_kernels/scale.cl
index d533d97..a2ae8c4 100644
--- a/src/core/CL/cl_kernels/scale.cl
+++ b/src/core/CL/cl_kernels/scale.cl
@@ -49,13 +49,23 @@
 inline const float8 transform_bilinear(const float2 coord, const float2 scale)
 {
     const float4 in_x_coords = (float4)(coord.s0, 1 + coord.s0, 2 + coord.s0, 3 + coord.s0);
-    const float4 new_x       = (in_x_coords + ((float4)(0.5f))) * (float4)(scale.s0) - (float4)(0.5f);
-    const float4 new_y       = (float4)((coord.s1 + 0.5f) * scale.s1 - 0.5f);
+#ifdef SAMPLING_POLICY_TOP_LEFT
+    const float4 new_x = in_x_coords * (float4)(scale.s0);
+    const float4 new_y = (float4)(coord.s1 * scale.s1);
     return (float8)(new_x.s0, new_y.s0, new_x.s1, new_y.s1, new_x.s2, new_y.s2, new_x.s3, new_y.s3);
+#elif SAMPLING_POLICY_CENTER
+    const float4 new_x = (in_x_coords + ((float4)(0.5f))) * (float4)(scale.s0) - (float4)(0.5f);
+    const float4 new_y = (float4)((coord.s1 + 0.5f) * scale.s1 - 0.5f);
+    return (float8)(new_x.s0, new_y.s0, new_x.s1, new_y.s1, new_x.s2, new_y.s2, new_x.s3, new_y.s3);
+#else /* SAMPLING_POLICY */
+#error("Unsupported sampling policy");
+#endif /* SAMPLING_POLICY */
 }
 
 /** Performs an affine transformation on an image interpolating with the NEAREAST NEIGHBOUR method. Input and output are single channel U8 or S16.
  *
+ * @note Sampling policy to used is passed as -DSAMPLING_POLICY_(TYPE) e.g. -DSAMPLING_POLICY_TOP_LEFT
+ *
  * @param[in]  in_ptr                            Pointer to the source image. Supported data types: U8, S16.
  * @param[in]  in_stride_x                       Stride of the source image in X dimension (in bytes)
  * @param[in]  in_step_x                         src_stride_x * number of elements along X processed per workitem(in bytes)
@@ -90,6 +100,8 @@
 
 /** Performs an affine transformation on an image interpolating with the BILINEAR method.
  *
+ * @note Sampling policy to used is passed as -DSAMPLING_POLICY_(TYPE) e.g. -DSAMPLING_POLICY_TOP_LEFT
+ *
  * @param[in]  in_ptr                            Pointer to the source image. Supported data types: U8, S16.
  * @param[in]  in_stride_x                       Stride of the source image in X dimension (in bytes)
  * @param[in]  in_step_x                         src_stride_x * number of elements along X processed per workitem(in bytes)
diff --git a/src/core/CL/kernels/CLScaleKernel.cpp b/src/core/CL/kernels/CLScaleKernel.cpp
index 6a5d24c..673304a 100644
--- a/src/core/CL/kernels/CLScaleKernel.cpp
+++ b/src/core/CL/kernels/CLScaleKernel.cpp
@@ -44,7 +44,7 @@
     return BorderSize(1);
 }
 
-void CLScaleKernel::configure(const ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, bool border_undefined)
+void CLScaleKernel::configure(const ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, bool border_undefined, SamplingPolicy sampling_policy)
 {
     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
     ARM_COMPUTE_ERROR_ON_NULLPTR(output);
@@ -75,6 +75,7 @@
     CLBuildOptions build_opts;
     build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
     build_opts.add_option("-DBORDER_SIZE=" + support::cpp11::to_string(border.right));
+    build_opts.add_option_if_else(sampling_policy == SamplingPolicy::CENTER, "-DSAMPLING_POLICY_CENTER", "-DSAMPLING_POLICY_TOP_LEFT");
 
     std::string interpolation_name = string_from_interpolation_policy(policy);
     std::transform(interpolation_name.begin(), interpolation_name.end(), interpolation_name.begin(), ::tolower);
diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp
index b1ced7e..1918a77 100644
--- a/src/core/NEON/kernels/NEScaleKernel.cpp
+++ b/src/core/NEON/kernels/NEScaleKernel.cpp
@@ -48,12 +48,15 @@
     return BorderSize(1);
 }
 
-void NEScaleKernel::configure(const ITensor *input, const ITensor *dx, const ITensor *dy, const ITensor *offsets, ITensor *output, InterpolationPolicy policy, bool border_undefined)
+void NEScaleKernel::configure(const ITensor *input, const ITensor *dx, const ITensor *dy, const ITensor *offsets, ITensor *output, InterpolationPolicy policy, bool border_undefined,
+                              SamplingPolicy sampling_policy)
 {
     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::F32);
     ARM_COMPUTE_ERROR_ON_NULLPTR(output);
     ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
     ARM_COMPUTE_ERROR_ON(output == input);
+    ARM_COMPUTE_ERROR_ON(sampling_policy != SamplingPolicy::CENTER);
+    ARM_COMPUTE_UNUSED(sampling_policy);
 
     if(policy == InterpolationPolicy::NEAREST_NEIGHBOR)
     {
diff --git a/src/runtime/CL/functions/CLGaussianPyramid.cpp b/src/runtime/CL/functions/CLGaussianPyramid.cpp
index 8436dce..4b32954 100644
--- a/src/runtime/CL/functions/CLGaussianPyramid.cpp
+++ b/src/runtime/CL/functions/CLGaussianPyramid.cpp
@@ -157,7 +157,7 @@
             _gauss5x5[i].configure(_pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i), border_mode, constant_border_value);
 
             /* Configure scale image kernel */
-            _scale_nearest[i].configure(_tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1), InterpolationPolicy::NEAREST_NEIGHBOR, border_mode == BorderMode::UNDEFINED);
+            _scale_nearest[i].configure(_tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1), InterpolationPolicy::NEAREST_NEIGHBOR, border_mode == BorderMode::UNDEFINED, SamplingPolicy::CENTER);
         }
 
         _tmp.allocate();
diff --git a/src/runtime/CL/functions/CLScale.cpp b/src/runtime/CL/functions/CLScale.cpp
index 49b0275..cb68481 100644
--- a/src/runtime/CL/functions/CLScale.cpp
+++ b/src/runtime/CL/functions/CLScale.cpp
@@ -31,10 +31,10 @@
 
 using namespace arm_compute;
 
-void CLScale::configure(ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value)
+void CLScale::configure(ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, SamplingPolicy sampling_policy)
 {
     auto k = arm_compute::support::cpp14::make_unique<CLScaleKernel>();
-    k->configure(input, output, policy, border_mode == BorderMode::UNDEFINED);
+    k->configure(input, output, policy, border_mode == BorderMode::UNDEFINED, sampling_policy);
     _kernel = std::move(k);
     _border_handler.configure(input, _kernel->border_size(), border_mode, constant_border_value);
 }
diff --git a/src/runtime/NEON/functions/NEScale.cpp b/src/runtime/NEON/functions/NEScale.cpp
index bbd3fac..bd565c9 100644
--- a/src/runtime/NEON/functions/NEScale.cpp
+++ b/src/runtime/NEON/functions/NEScale.cpp
@@ -42,9 +42,11 @@
 
 namespace
 {
-void precompute_dx_dy_offsets(ITensor *dx, ITensor *dy, ITensor *offsets, float wr, float hr, size_t input_element_size)
+void precompute_dx_dy_offsets(ITensor *dx, ITensor *dy, ITensor *offsets, float wr, float hr, size_t input_element_size, SamplingPolicy sampling_policy)
 {
     ARM_COMPUTE_ERROR_ON(nullptr == offsets);
+    ARM_COMPUTE_ERROR_ON(sampling_policy != SamplingPolicy::CENTER);
+    ARM_COMPUTE_UNUSED(sampling_policy);
 
     Window win;
     win.set(Window::DimX, Window::Dimension(0, offsets->info()->dimension(0), 1));
@@ -95,7 +97,7 @@
 {
 }
 
-void NEScale::configure(ITensor *input, ITensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value)
+void NEScale::configure(ITensor *input, ITensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, SamplingPolicy sampling_policy)
 {
     ARM_COMPUTE_ERROR_ON(nullptr == input);
     ARM_COMPUTE_ERROR_ON(nullptr == output);
@@ -131,13 +133,13 @@
             TensorInfo tensor_info_offsets(shape, Format::S32);
             _offsets.allocator()->init(tensor_info_offsets);
 
-            _scale_kernel.configure(input, nullptr, nullptr, &_offsets, output, policy, border_undefined);
+            _scale_kernel.configure(input, nullptr, nullptr, &_offsets, output, policy, border_undefined, sampling_policy);
 
             // Allocate once the configure methods have been called
             _offsets.allocator()->allocate();
 
             // Pre-compute offsets for nearest interpolation
-            precompute_dx_dy_offsets(nullptr, nullptr, &_offsets, wr, hr, input_element_size);
+            precompute_dx_dy_offsets(nullptr, nullptr, &_offsets, wr, hr, input_element_size, sampling_policy);
             break;
         }
         case InterpolationPolicy::BILINEAR:
@@ -149,7 +151,7 @@
             _dx.allocator()->init(tensor_info_dxdy);
             _dy.allocator()->init(tensor_info_dxdy);
 
-            _scale_kernel.configure(input, &_dx, &_dy, &_offsets, output, policy, border_undefined);
+            _scale_kernel.configure(input, &_dx, &_dy, &_offsets, output, policy, border_undefined, sampling_policy);
 
             // Allocate once the configure methods have been called
             _offsets.allocator()->allocate();
@@ -157,7 +159,7 @@
             _dy.allocator()->allocate();
 
             // Pre-compute dx, dy and offsets for bilinear interpolation
-            precompute_dx_dy_offsets(&_dx, &_dy, &_offsets, wr, hr, input_element_size);
+            precompute_dx_dy_offsets(&_dx, &_dy, &_offsets, wr, hr, input_element_size, sampling_policy);
             break;
         }
         case InterpolationPolicy::AREA: