COMPMID-2336 Extend validation for depthwise native and fix same pad calculator

Change-Id: I9f5cc95bc0cbd94869ac13064ffa0aa0f52a7a61
Signed-off-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1684
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h
index b0e2632..bc461e7 100644
--- a/arm_compute/core/Utils.h
+++ b/arm_compute/core/Utils.h
@@ -839,10 +839,12 @@
  * @param[in] conv_info     Convolution information (containing strides)
  * @param[in] data_layout   (Optional) Data layout of the input and weights tensor
  * @param[in] dilation      (Optional) Dilation factor used in the convolution.
+ * @param[in] rounding_type (Optional) Dimension rounding type when down-scaling.
  *
  * @return PadStrideInfo for SAME padding
  */
-PadStrideInfo calculate_same_pad(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo conv_info, DataLayout data_layout = DataLayout::NCHW, const Size2D &dilation = Size2D(1u, 1u));
+PadStrideInfo calculate_same_pad(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo conv_info, DataLayout data_layout = DataLayout::NCHW, const Size2D &dilation = Size2D(1u, 1u),
+                                 const DimensionRoundingType &rounding_type = DimensionRoundingType::FLOOR);
 
 /** Returns expected width and height of the deconvolution's output tensor.
  *
diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp
index 5d32750..d0bffdf 100644
--- a/src/core/Utils.cpp
+++ b/src/core/Utils.cpp
@@ -331,37 +331,44 @@
     return res;
 }
 
-PadStrideInfo arm_compute::calculate_same_pad(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo conv_info, DataLayout data_layout, const Size2D &dilation)
+PadStrideInfo arm_compute::calculate_same_pad(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo conv_info, DataLayout data_layout, const Size2D &dilation,
+                                              const DimensionRoundingType &rounding_type)
 {
-    const unsigned int width_idx  = arm_compute::get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
-    const unsigned int height_idx = arm_compute::get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
-    const auto        &strides    = conv_info.stride();
+    const unsigned int width_idx     = arm_compute::get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+    const unsigned int height_idx    = arm_compute::get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+    const unsigned int in_width      = input_shape[width_idx];
+    const unsigned int in_height     = input_shape[height_idx];
+    const unsigned int kernel_width  = weights_shape[width_idx];
+    const unsigned int kernel_height = weights_shape[height_idx];
+    const auto        &strides       = conv_info.stride();
 
     // Calculate output dimensions
-    const int out_width  = (input_shape[width_idx] + strides.first - 1) / strides.first;
-    const int out_height = (input_shape[height_idx] + strides.second - 1) / strides.second;
+    const auto         is_ceil    = static_cast<unsigned int>(rounding_type == DimensionRoundingType::CEIL);
+    const unsigned int out_width  = ((in_width - is_ceil) + strides.first - 1) / strides.first + is_ceil;
+    const unsigned int out_height = ((in_height - is_ceil) + strides.second - 1) / strides.second + is_ceil;
 
     // Calculate effective weights sizes
-    const int real_weight_width  = (weights_shape[width_idx] - 1) * dilation.x() + 1;
-    const int real_weight_height = (weights_shape[height_idx] - 1) * dilation.y() + 1;
+    const int real_weight_width  = (kernel_width - 1) * dilation.x() + 1;
+    const int real_weight_height = (kernel_height - 1) * dilation.y() + 1;
 
     // Calculate total pad
-    const int pad_width  = (out_width - 1) * strides.first + real_weight_width - input_shape[width_idx];
-    const int pad_height = (out_height - 1) * strides.second + real_weight_height - input_shape[height_idx];
+    const int pad_width  = std::max(0, static_cast<int>((out_width - 1) * strides.first + real_weight_width - in_width));
+    const int pad_height = std::max(0, static_cast<int>((out_height - 1) * strides.second + real_weight_height - in_height));
 
     // Calculate individual paddings
-    const int same_pad_left   = pad_width / 2;
-    const int same_pad_top    = pad_height / 2;
-    const int same_pad_right  = pad_width - same_pad_left;
-    const int same_pad_bottom = pad_height - same_pad_top;
+    const unsigned int pad_left   = pad_width / 2;
+    const unsigned int pad_top    = pad_height / 2;
+    const unsigned int pad_right  = pad_width - pad_left;
+    const unsigned int pad_bottom = pad_height - pad_top;
 
-    return { static_cast<unsigned int>(strides.first),
-             static_cast<unsigned int>(strides.second),
-             static_cast<unsigned int>(same_pad_left),
-             static_cast<unsigned int>(same_pad_right),
-             static_cast<unsigned int>(same_pad_top),
-             static_cast<unsigned int>(same_pad_bottom),
-             DimensionRoundingType::CEIL };
+    PadStrideInfo same_info(strides.first, strides.second, pad_left, pad_right, pad_top, pad_bottom, rounding_type);
+
+    // Check for correctness of predicted output shape against the one calculated using the generated info
+    const auto out_dims = scaled_dimensions(in_width, in_height, kernel_width, kernel_height, same_info, dilation);
+    ARM_COMPUTE_ERROR_ON(out_dims.first != out_width || out_dims.second != out_height);
+    ARM_COMPUTE_UNUSED(out_dims);
+
+    return same_info;
 }
 
 std::pair<unsigned int, unsigned int> arm_compute::deconvolution_output_dimensions(
diff --git a/tests/validation/NEON/DepthwiseConvolutionNativeLayer.cpp b/tests/validation/NEON/DepthwiseConvolutionNativeLayer.cpp
index a44c2dc..64f6a93 100644
--- a/tests/validation/NEON/DepthwiseConvolutionNativeLayer.cpp
+++ b/tests/validation/NEON/DepthwiseConvolutionNativeLayer.cpp
@@ -75,8 +75,11 @@
 /** Batch values to test - Nightly */
 const auto batch_values_nightly = framework::dataset::make("batch", { 1U, 3U });
 
-/** Kernel size values to test - All */
-const auto kernel_sz_values = framework::dataset::make("kernel_size", { Size2D(3U, 5U), Size2D(5U, 3U) });
+/** Kernel size values to test - Precommit */
+const auto kernel_sz_values_precommit = framework::dataset::make("kernel_size", { Size2D(1U, 1U), Size2D(1U, 3U) });
+
+/** Kernel size values to test - Nightly */
+const auto kernel_sz_values_nightly = framework::dataset::make("kernel_size", { Size2D(3U, 5U), Size2D(5U, 1U), Size2D(1U, 7U), Size2D(9U, 7U) });
 
 /** Depth multiplier values to test - All */
 const auto depth_multiplier_values = framework::dataset::make("depth_multiplier", { 1U, 3U });
@@ -154,7 +157,7 @@
                                                                                                                                            height_values_precommit),
                                                                                                                                            channel_values_precommit),
                                                                                                                                            batch_values_precommit),
-                                                                                                                                           kernel_sz_values),
+                                                                                                                                           kernel_sz_values_precommit),
                                                                                                                                            depth_multiplier_values),
                                                                                                                                            dilation_values),
                                                                                                                                            stride_values),
@@ -171,7 +174,7 @@
                                                                                                 height_values_precommit),
                                                                                                 channel_values_precommit),
                                                                                                 batch_values_precommit),
-                                                                                                kernel_sz_values),
+                                                                                                kernel_sz_values_precommit),
                                                                                                 depth_multiplier_values),
                                                                                                 dilation_values),
                                                                                                 stride_values),
@@ -188,7 +191,7 @@
                                                                                                 height_values_nightly),
                                                                                                 channel_values_nightly),
                                                                                                 batch_values_nightly),
-                                                                                                kernel_sz_values),
+                                                                                                kernel_sz_values_nightly),
                                                                                                 depth_multiplier_values),
                                                                                                 dilation_values),
                                                                                                 stride_values),