Fix strict overflow warnings

Resolves COMPMID-2138

Change-Id: I5587fe53ec99d164413cde1809d1791bf909b8df
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6171
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/core/NEON/kernels/NELogicalKernel.cpp b/src/core/NEON/kernels/NELogicalKernel.cpp
index e1c24da..6939e08 100644
--- a/src/core/NEON/kernels/NELogicalKernel.cpp
+++ b/src/core/NEON/kernels/NELogicalKernel.cpp
@@ -41,15 +41,14 @@
 static const uint8x16_t c0_x16    = vdupq_n_u8(0);
 static const uint8x8_t  c1_x8     = vdup_n_u8(1);
 static const uint8x16_t c1_x16    = vdupq_n_u8(1);
-static const int        step      = 16;
-static const int        half_step = step / 2;
+static const uint32_t   step      = 16;
+static const uint32_t   half_step = step / 2;
 
-void neon_logical_and(const uint8_t *src0, const uint8_t *src1, uint8_t *dst, int len)
+void neon_logical_and(const uint8_t *src0, const uint8_t *src1, uint8_t *dst, uint32_t len)
 {
     ARM_COMPUTE_ASSERT_NOT_NULLPTR(src0);
     ARM_COMPUTE_ASSERT_NOT_NULLPTR(src1);
     ARM_COMPUTE_ASSERT_NOT_NULLPTR(dst);
-    ARM_COMPUTE_ASSERT(len >= 0);
 
     for(; len >= step; len -= step)
     {
@@ -76,11 +75,10 @@
     }
 }
 
-void neon_logical_and_broadcast(const uint8_t *src, uint8_t broadcast_val, uint8_t *dst, int len)
+void neon_logical_and_broadcast(const uint8_t *src, uint8_t broadcast_val, uint8_t *dst, uint32_t len)
 {
     ARM_COMPUTE_ASSERT_NOT_NULLPTR(src);
     ARM_COMPUTE_ASSERT_NOT_NULLPTR(dst);
-    ARM_COMPUTE_ASSERT(len >= 0);
 
     const auto broadcast_val_clamped_s   = std::min<uint8_t>(broadcast_val, 1);
     const auto broadcast_val_clamped_x16 = vdupq_n_u8(broadcast_val_clamped_s);
@@ -108,12 +106,11 @@
     }
 }
 
-void neon_logical_or(const uint8_t *src0, const uint8_t *src1, uint8_t *dst, int len)
+void neon_logical_or(const uint8_t *src0, const uint8_t *src1, uint8_t *dst, uint32_t len)
 {
     ARM_COMPUTE_ASSERT_NOT_NULLPTR(src0);
     ARM_COMPUTE_ASSERT_NOT_NULLPTR(src1);
     ARM_COMPUTE_ASSERT_NOT_NULLPTR(dst);
-    ARM_COMPUTE_ASSERT(len >= 0);
 
     for(; len >= step; len -= step)
     {
@@ -140,11 +137,10 @@
     }
 }
 
-void neon_logical_or_broadcast(const uint8_t *src, uint8_t broadcast_val, uint8_t *dst, int len)
+void neon_logical_or_broadcast(const uint8_t *src, uint8_t broadcast_val, uint8_t *dst, uint32_t len)
 {
     ARM_COMPUTE_ASSERT_NOT_NULLPTR(src);
     ARM_COMPUTE_ASSERT_NOT_NULLPTR(dst);
-    ARM_COMPUTE_ASSERT(len >= 0);
 
     const auto broadcast_val_clamped_s   = std::min<uint8_t>(broadcast_val, 1);
     const auto broadcast_val_clamped_x16 = vdupq_n_u8(broadcast_val_clamped_s);
@@ -172,11 +168,10 @@
     }
 }
 
-void neon_logical_not(const uint8_t *src, uint8_t *dst, int len)
+void neon_logical_not(const uint8_t *src, uint8_t *dst, uint32_t len)
 {
     ARM_COMPUTE_ASSERT_NOT_NULLPTR(src);
     ARM_COMPUTE_ASSERT_NOT_NULLPTR(dst);
-    ARM_COMPUTE_ASSERT(len >= 0);
 
     for(; len >= step; len -= step)
     {
@@ -204,7 +199,7 @@
 {
     Window win{ window };
     win.set(Window::DimX, Window::Dimension(0, 1, 1));
-    const auto len = static_cast<int>(window.x().end()) - static_cast<int>(window.x().start());
+    const auto len = window.x().end() - window.x().start();
 
     Iterator in(src, win);
     Iterator out(dst, win);
@@ -225,11 +220,11 @@
     win.set(Window::DimX, Window::Dimension(0, 1, 1));
 
     const bool is_broadcast_across_x = src0->info()->tensor_shape().x() != src1->info()->tensor_shape().x();
-    const auto len                   = static_cast<int>(window.x().end()) - static_cast<int>(window.x().start());
+    const auto len                   = window.x().end() - window.x().start();
 
     if(is_broadcast_across_x)
     {
-        using LogicalBroadcastUKernelPtr        = std::add_pointer<void(const uint8_t *, uint8_t, uint8_t *, int)>::type;
+        using LogicalBroadcastUKernelPtr        = std::add_pointer<void(const uint8_t *, uint8_t, uint8_t *, uint32_t)>::type;
         LogicalBroadcastUKernelPtr logical_func = op == LogicalOperation::Or ? &neon_logical_or_broadcast : &neon_logical_and_broadcast;
 
         const bool     is_broadcast_input_1 = src1_win.x().step() == 0;
@@ -253,7 +248,7 @@
     }
     else
     {
-        using LogicalUKernelPtr        = std::add_pointer<void(const uint8_t *, const uint8_t *, uint8_t *, int)>::type;
+        using LogicalUKernelPtr        = std::add_pointer<void(const uint8_t *, const uint8_t *, uint8_t *, uint32_t)>::type;
         LogicalUKernelPtr logical_func = op == LogicalOperation::Or ? &neon_logical_or : &neon_logical_and;
 
         src0_win.set(Window::DimX, Window::Dimension(0, 1, 1));
diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
index 2d6db76..bfecccf 100644
--- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp
+++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
@@ -321,7 +321,7 @@
     res_idx_mask.val[1] = wrapper::vadd(res_idx_mask.val[1], mask_ones);
 
     uint32_t res  = 0xFFFFFFFF;
-    int      iter = 0;
+    uint32_t iter = 0;
     do
     {
         auto pmin = wrapper::vpmin(wrapper::vgethigh(res_idx_mask.val[iter]), wrapper::vgetlow(res_idx_mask.val[iter]));
diff --git a/src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.cpp b/src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.cpp
index d79fe87..d09cc1d 100644
--- a/src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.cpp
+++ b/src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.cpp
@@ -78,7 +78,7 @@
           x_start(w.x().start()),
           x_end(w.x().end()),
           x_step(static_cast<uint32_t>(num_read_elements_per_iteration * depth_multiplier)),
-          x_leftover_start(std::max(static_cast<int32_t>(w.x().end()) - static_cast<int32_t>(x_step) + 1, int32_t(0))),
+          x_leftover_start(std::max(static_cast<int32_t>(w.x().end() + 1) - static_cast<int32_t>(x_step), int32_t(0))),
           input_stride_y(input.strides_in_bytes().y()),
           input_stride_z(input.strides_in_bytes().z()),
           input_max_offset(input.strides_in_bytes().z() * input.dimension(height_idx) - (input.padding().bottom + input.padding().top) * input.strides_in_bytes().y()),
diff --git a/src/cpu/kernels/scale/neon/qasymm8.cpp b/src/cpu/kernels/scale/neon/qasymm8.cpp
index fb52752..daa157e 100644
--- a/src/cpu/kernels/scale/neon/qasymm8.cpp
+++ b/src/cpu/kernels/scale/neon/qasymm8.cpp
@@ -74,7 +74,7 @@
             const auto a00 = (0 <= index_w && index_w < in_dim_w && 0 <= index_h && index_h < in_dim_h) ?
                              (*(pixel_row_ptr + index_w * stride_w + index_h * stride_h)) :
                              const_border_value;
-            const auto a01 = (-1 <= index_w && index_w < in_dim_w - 1 && 0 <= index_h && index_h < in_dim_h) ?
+            const auto a01 = (-1 <= index_w && index_w + 1 < in_dim_w && 0 <= index_h && index_h < in_dim_h) ?
                              (*(pixel_row_ptr + (index_w + 1) * stride_w + index_h * stride_h)) :
                              const_border_value;
             const auto a10 = (0 <= index_w && index_w < in_dim_w && -1 <= index_h && index_h < in_dim_h - 1) ?
diff --git a/src/cpu/kernels/scale/neon/qasymm8_signed.cpp b/src/cpu/kernels/scale/neon/qasymm8_signed.cpp
index 706bcee..8331263 100644
--- a/src/cpu/kernels/scale/neon/qasymm8_signed.cpp
+++ b/src/cpu/kernels/scale/neon/qasymm8_signed.cpp
@@ -74,7 +74,7 @@
             const auto a00 = (0 <= index_w && index_w < in_dim_w && 0 <= index_h && index_h < in_dim_h) ?
                              (*(pixel_row_ptr + index_w * stride_w + index_h * stride_h)) :
                              const_border_value;
-            const auto a01 = (-1 <= index_w && index_w < in_dim_w - 1 && 0 <= index_h && index_h < in_dim_h) ?
+            const auto a01 = (-1 <= index_w && index_w + 1 < in_dim_w && 0 <= index_h && index_h < in_dim_h) ?
                              (*(pixel_row_ptr + (index_w + 1) * stride_w + index_h * stride_h)) :
                              const_border_value;
             const auto a10 = (0 <= index_w && index_w < in_dim_w && -1 <= index_h && index_h < in_dim_h - 1) ?
diff --git a/src/gpu/cl/kernels/ClCropKernel.cpp b/src/gpu/cl/kernels/ClCropKernel.cpp
index c7e5537..87ad6b4 100644
--- a/src/gpu/cl/kernels/ClCropKernel.cpp
+++ b/src/gpu/cl/kernels/ClCropKernel.cpp
@@ -56,7 +56,7 @@
     _batch_index         = batch_index;
     _extrapolation_value = extrapolation_value;
 
-    const int vec_size_x = 4;
+    const uint32_t vec_size_x = 4;
     // Create and update the window (if needed)
     Window win = calculate_max_window(*dst);
 
@@ -66,9 +66,9 @@
         win = *dst_window;
     }
 
-    const int  dst_width_x    = win.num_iterations(0);
-    const bool multi_access_x = dst_width_x >= vec_size_x;
-    const bool remainder_x    = dst_width_x % vec_size_x > 0;
+    const uint32_t dst_width_x    = win.num_iterations(0);
+    const bool     multi_access_x = dst_width_x >= vec_size_x;
+    const bool     remainder_x    = dst_width_x % vec_size_x > 0;
 
     if(multi_access_x)
     {