COMPMID-577: Implement CL validation for GaussianPyramid

Change-Id: If879cbe15b14d97818c24d44b29fc69b6c8cb686
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/127601
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/src/core/CL/kernels/CLGaussianPyramidKernel.cpp b/src/core/CL/kernels/CLGaussianPyramidKernel.cpp
index 34a228c..a4fda36 100644
--- a/src/core/CL/kernels/CLGaussianPyramidKernel.cpp
+++ b/src/core/CL/kernels/CLGaussianPyramidKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -32,20 +32,19 @@
 using namespace arm_compute;
 
 CLGaussianPyramidHorKernel::CLGaussianPyramidHorKernel()
-    : _border_size(0), _l2_load_offset(0)
+    : _l2_load_offset(0)
 {
 }
 
 BorderSize CLGaussianPyramidHorKernel::border_size() const
 {
-    return _border_size;
+    return BorderSize(0, 2);
 }
 
-void CLGaussianPyramidHorKernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined)
+void CLGaussianPyramidHorKernel::configure(const ICLTensor *input, ICLTensor *output)
 {
     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U16);
-    ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != 2 * output->info()->dimension(0));
     ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != output->info()->dimension(1));
 
     for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i)
@@ -53,9 +52,8 @@
         ARM_COMPUTE_ERROR_ON(input->info()->dimension(i) != output->info()->dimension(i));
     }
 
-    _input       = input;
-    _output      = output;
-    _border_size = BorderSize(border_undefined ? 0 : 2, 2);
+    _input  = input;
+    _output = output;
 
     // Create kernel
     _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("gaussian1x5_sub_x"));
@@ -64,9 +62,9 @@
     constexpr unsigned int num_elems_processed_per_iteration = 16;
     constexpr unsigned int num_elems_read_per_iteration      = 20;
     constexpr unsigned int num_elems_written_per_iteration   = 8;
-    constexpr float        scale_x                           = 0.5f;
+    const float            scale_x                           = static_cast<float>(output->info()->dimension(0)) / input->info()->dimension(0);
 
-    Window                 win = calculate_max_window_horizontal(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
+    Window                 win = calculate_max_window_horizontal(*input->info(), Steps(num_elems_processed_per_iteration));
     AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration, scale_x);
 
     // Sub sampling selects odd pixels (1, 3, 5, ...) for images with even
@@ -95,11 +93,7 @@
                               AccessWindowHorizontal(input->info(), _l2_load_offset, num_elems_read_per_iteration),
                               output_access);
 
-    ValidRegion valid_region = input->info()->valid_region();
-    valid_region.anchor.set(0, std::ceil((valid_region.anchor[0] + (border_undefined ? border_size().left : 0)) / 2.f));
-    valid_region.shape.set(0, (valid_region.shape[0] - (border_undefined ? border_size().right : 0)) / 2 - valid_region.anchor[0]);
-
-    output_access.set_valid_region(win, valid_region);
+    output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
 
     ICLKernel::configure(win);
 }
@@ -139,12 +133,11 @@
     return BorderSize(2, 0);
 }
 
-void CLGaussianPyramidVertKernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined)
+void CLGaussianPyramidVertKernel::configure(const ICLTensor *input, ICLTensor *output)
 {
     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U16);
     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
     ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != output->info()->dimension(0));
-    ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != 2 * output->info()->dimension(1));
 
     for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i)
     {
@@ -163,10 +156,10 @@
     constexpr unsigned int num_elems_written_per_iteration   = 8;
     constexpr unsigned int num_elems_read_per_iteration      = 8;
     constexpr unsigned int num_rows_per_iteration            = 5;
-    constexpr float        scale_y                           = 0.5f;
 
-    Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration, num_rows_processed_per_iteration),
-                                      border_undefined, border_size());
+    const float scale_y = static_cast<float>(output->info()->dimension(1)) / input->info()->dimension(1);
+
+    Window                win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration, num_rows_processed_per_iteration));
     AccessWindowRectangle output_access(output->info(), 0, 0, num_elems_written_per_iteration, num_rows_per_iteration, 1.f, scale_y);
 
     // Determine whether we need to load even or odd rows. See above for a
@@ -182,11 +175,7 @@
                               AccessWindowRectangle(input->info(), 0, _t2_load_offset, num_elems_read_per_iteration, num_rows_per_iteration),
                               output_access);
 
-    ValidRegion valid_region = input->info()->valid_region();
-    valid_region.anchor.set(1, std::ceil((valid_region.anchor[1] + (border_undefined ? border_size().top : 0)) / 2.f));
-    valid_region.shape.set(1, (valid_region.shape[1] - (border_undefined ? border_size().bottom : 0)) / 2 - valid_region.anchor[1]);
-
-    output_access.set_valid_region(win, valid_region);
+    output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
 
     ICLKernel::configure(win);
 }
diff --git a/src/runtime/CL/functions/CLGaussianPyramid.cpp b/src/runtime/CL/functions/CLGaussianPyramid.cpp
index 4b32954..ddce5fb 100644
--- a/src/runtime/CL/functions/CLGaussianPyramid.cpp
+++ b/src/runtime/CL/functions/CLGaussianPyramid.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -49,7 +49,8 @@
 }
 
 CLGaussianPyramidHalf::CLGaussianPyramidHalf() // NOLINT
-    : _border_handler(),
+    : _horizontal_border_handler(),
+      _vertical_border_handler(),
       _horizontal_reduction(),
       _vertical_reduction()
 {
@@ -64,6 +65,9 @@
     ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != pyramid->info()->height());
     ARM_COMPUTE_ERROR_ON(SCALE_PYRAMID_HALF != pyramid->info()->scale());
 
+    // Constant value to use for vertical fill border when the border mode is CONSTANT
+    const uint16_t pixel_value_u16 = static_cast<uint16_t>(constant_border_value) * 2 + static_cast<uint16_t>(constant_border_value) * 8 + static_cast<uint16_t>(constant_border_value) * 6;
+
     /* Get number of pyramid levels */
     const size_t num_levels = pyramid->info()->num_levels();
 
@@ -72,28 +76,31 @@
 
     if(num_levels > 1)
     {
-        _border_handler       = arm_compute::support::cpp14::make_unique<CLFillBorderKernel[]>(num_levels - 1);
-        _horizontal_reduction = arm_compute::support::cpp14::make_unique<CLGaussianPyramidHorKernel[]>(num_levels - 1);
-        _vertical_reduction   = arm_compute::support::cpp14::make_unique<CLGaussianPyramidVertKernel[]>(num_levels - 1);
+        _horizontal_border_handler = arm_compute::support::cpp14::make_unique<CLFillBorderKernel[]>(num_levels - 1);
+        _vertical_border_handler   = arm_compute::support::cpp14::make_unique<CLFillBorderKernel[]>(num_levels - 1);
+        _horizontal_reduction      = arm_compute::support::cpp14::make_unique<CLGaussianPyramidHorKernel[]>(num_levels - 1);
+        _vertical_reduction        = arm_compute::support::cpp14::make_unique<CLGaussianPyramidVertKernel[]>(num_levels - 1);
 
         // Apply half scale to the X dimension of the tensor shape
         TensorShape tensor_shape = pyramid->info()->tensor_shape();
         tensor_shape.set(0, (pyramid->info()->width() + 1) * SCALE_PYRAMID_HALF);
 
         PyramidInfo pyramid_info(num_levels - 1, SCALE_PYRAMID_HALF, tensor_shape, Format::U16);
-
         _tmp.init(pyramid_info);
 
         for(size_t i = 0; i < num_levels - 1; ++i)
         {
             /* Configure horizontal kernel */
-            _horizontal_reduction[i].configure(_pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i), border_mode == BorderMode::UNDEFINED);
+            _horizontal_reduction[i].configure(_pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i));
 
             /* Configure vertical kernel */
-            _vertical_reduction[i].configure(_tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1), border_mode == BorderMode::UNDEFINED);
+            _vertical_reduction[i].configure(_tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1));
 
             /* Configure border */
-            _border_handler[i].configure(_pyramid->get_pyramid_level(i), _horizontal_reduction[i].border_size(), border_mode, PixelValue(constant_border_value));
+            _horizontal_border_handler[i].configure(_pyramid->get_pyramid_level(i), _horizontal_reduction[i].border_size(), border_mode, PixelValue(constant_border_value));
+
+            /* Configure border */
+            _vertical_border_handler[i].configure(_tmp.get_pyramid_level(i), _vertical_reduction[i].border_size(), border_mode, PixelValue(pixel_value_u16));
         }
         _tmp.allocate();
     }
@@ -110,13 +117,15 @@
     _pyramid->get_pyramid_level(0)->map(CLScheduler::get().queue(), true /* blocking */);
     _input->map(CLScheduler::get().queue(), true /* blocking */);
     _pyramid->get_pyramid_level(0)->copy_from(*_input);
+
     _input->unmap(CLScheduler::get().queue());
     _pyramid->get_pyramid_level(0)->unmap(CLScheduler::get().queue());
 
     for(unsigned int i = 0; i < num_levels - 1; ++i)
     {
-        CLScheduler::get().enqueue(_border_handler[i], false);
+        CLScheduler::get().enqueue(_horizontal_border_handler[i], false);
         CLScheduler::get().enqueue(_horizontal_reduction[i], false);
+        CLScheduler::get().enqueue(_vertical_border_handler[i], false);
         CLScheduler::get().enqueue(_vertical_reduction[i], false);
     }
 }