COMPMID-884: Valgrind: NEDirectConvolutionLayerKernel invalid read

Change-Id: I258f03b61446e8333645efe80f2857e8c725b9de
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/118943
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
index c7534c5..ba4dbee 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
@@ -1051,11 +1051,10 @@
                                                         unsigned int &num_elems_read_per_iteration, unsigned int &num_elems_written_per_iteration, BorderSize &border_size)
 {
     // Calculate right and bottom border
-    unsigned int       kernel_size   = weights->dimension(0);
-    const unsigned int conv_stride_x = std::get<0>(conv_info.stride());
-    const unsigned int conv_stride_y = std::get<1>(conv_info.stride());
-    const int          input_width   = input->dimension(0);
-    const int          input_height  = input->dimension(1);
+    unsigned int kernel_size   = weights->dimension(0);
+    const int    conv_stride_x = std::get<0>(conv_info.stride());
+    const int    input_width   = input->dimension(0);
+    const int    input_height  = input->dimension(1);
 
     switch(kernel_size)
     {
@@ -1120,19 +1119,21 @@
         }
     }
 
+    // Calculate right pad
+    int start_x       = kernel_size / 2 - static_cast<int>(conv_info.pad_left());
+    int end_x         = ceil_to_multiple(static_cast<int>(output->dimension(0)), num_elems_written_per_iteration) * conv_stride_x;
+    int upper_bound_w = ceil_to_multiple(start_x + end_x, num_elems_read_per_iteration) - input_width;
+
     // Calculate border
-    int upper_bound_w = ceil_to_multiple(((output->dimension(0) - 1) * conv_stride_x + kernel_size), num_elems_read_per_iteration) - conv_info.pad_left() - conv_info.pad_right() - input_width;
-    int upper_bound_h = ((output->dimension(1) - 1) * conv_stride_y - conv_info.pad_top() - conv_info.pad_bottom() + kernel_size) - input_height;
+    const unsigned int conv_pad_left   = conv_info.pad_left();
+    const unsigned int conv_pad_top    = conv_info.pad_top();
+    const unsigned int conv_pad_right  = std::max(upper_bound_w, 0);
+    const unsigned int conv_pad_bottom = conv_info.pad_bottom();
 
-    const unsigned int conv_pad_left   = std::max(upper_bound_w - static_cast<int>(conv_info.pad_right()), static_cast<int>(kernel_size) / 2);
-    const unsigned int conv_pad_top    = std::max(upper_bound_h - static_cast<int>(conv_info.pad_bottom()), static_cast<int>(kernel_size) / 2);
-    const unsigned int conv_pad_right  = std::max(upper_bound_w - static_cast<int>(conv_info.pad_left()), static_cast<int>(kernel_size) / 2);
-    const unsigned int conv_pad_bottom = std::max(upper_bound_h - static_cast<int>(conv_info.pad_top()), static_cast<int>(kernel_size) / 2);
-
-    border_size.right  = conv_pad_right;
-    border_size.bottom = conv_pad_bottom;
     border_size.left   = conv_pad_left;
     border_size.top    = conv_pad_top;
+    border_size.right  = conv_pad_right;
+    border_size.bottom = conv_pad_bottom;
 
     Window                 win = calculate_max_window(*output, Steps(num_elems_written_per_iteration));
     AccessWindowStatic     input_access(input, -conv_pad_left, -conv_pad_top, input_width + conv_pad_right, input_height + conv_pad_bottom);