COMPMID-3478: Allow SubTensors with XY indexing

Remove limitations on sub-tensor creation and allow any possible
indexing as long as it honors the parent tensor shape.

In case of padding expansion on a subtensor, an error is raised if
the sub-tensor is indexed on the XY dimensions.

Change-Id: Ibb5183a6cb7421f55068b47c06b43ebde0f6e9a5
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3427
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp
index aba2af1..ada84db 100644
--- a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp
+++ b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp
@@ -51,8 +51,8 @@
     // The window needs to be based on the output
     Window             win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
     AccessWindowStatic input1_access(input1, 0, 0, ceil_to_multiple(input1->dimension(0), num_elems_processed_per_iteration), input1->dimension(1));
-    const unsigned int input2_right_padding = (output->dimension(0) / num_elems_processed_per_iteration) * num_elems_processed_per_iteration - input1->dimension(
-                                                  0) + num_elems_processed_per_iteration - input2->dimension(0);
+    const unsigned int input2_right_padding = ((output->dimension(0) / num_elems_processed_per_iteration) * num_elems_processed_per_iteration - input1->dimension(0) - input2->dimension(
+                                                   0)) % num_elems_processed_per_iteration;
     AccessWindowStatic input2_access(input2, -(input1->dimension(0) % num_elems_processed_per_iteration),
                                      0, input2->dimension(0) + input2_right_padding, input2->dimension(1));
     AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
diff --git a/src/core/SubTensorInfo.cpp b/src/core/SubTensorInfo.cpp
index be8560f..a50f584 100644
--- a/src/core/SubTensorInfo.cpp
+++ b/src/core/SubTensorInfo.cpp
@@ -41,12 +41,6 @@
  */
 TensorShape extend_parent_shape(TensorShape parent_shape, TensorShape shape, Coordinates coords)
 {
-    // Subtensor should not index in x, y dimensions.
-    ARM_COMPUTE_ERROR_ON((coords.x() != 0) || (coords.y() != 0));
-
-    // Cannot extend on x, y ?
-    ARM_COMPUTE_ERROR_ON((parent_shape.total_size() != 0) && (parent_shape.x() != shape.x()) && (parent_shape.y() != shape.y()));
-
     // Extend shape
     for(unsigned int i = 0; i < TensorShape::num_max_dimensions; ++i)
     {
@@ -70,6 +64,7 @@
     : _parent(parent), _tensor_shape(tensor_shape), _coords(coords), _valid_region{ Coordinates(), _tensor_shape }, _extend_parent(extend_parent)
 {
     ARM_COMPUTE_ERROR_ON(parent == nullptr);
+
     // Check if subtensor is valid if parent is configured
     if(parent->tensor_shape().total_size() != 0 && !_extend_parent)
     {
@@ -118,6 +113,17 @@
     ARM_COMPUTE_ERROR_ON(!_parent->is_resizable());
     ARM_COMPUTE_ERROR_ON(_parent->total_size() == 0);
 
+    // Check that you do not extend padding on sub-tensors unless XY shape matches parent tensor
+    // TODO(COMPMID-3558): Remove _extend_parent check
+    if(!_extend_parent && (padding.left || padding.right))
+    {
+        ARM_COMPUTE_ERROR_ON(_parent->tensor_shape().x() != tensor_shape().x());
+    }
+    if(!_extend_parent && (padding.top || padding.bottom))
+    {
+        ARM_COMPUTE_ERROR_ON(_parent->tensor_shape().y() != tensor_shape().y());
+    }
+
     // Extend parent padding if required
     return _parent->extend_padding(padding);
 }
diff --git a/src/core/Validate.cpp b/src/core/Validate.cpp
index f9bd6d6..cc80611 100644
--- a/src/core/Validate.cpp
+++ b/src/core/Validate.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -176,16 +176,12 @@
 arm_compute::Status arm_compute::error_on_invalid_subtensor(const char *function, const char *file, const int line,
                                                             const TensorShape &parent_shape, const Coordinates &coords, const TensorShape &shape)
 {
-    // Subtensor should not index in x, y dimensions.
-    ARM_COMPUTE_RETURN_ERROR_ON_LOC(((coords.x() != 0) || (coords.y() != 0)), function, file, line);
-    // Subtensor shape should match parent tensor in x, y dimensions.
-    ARM_COMPUTE_RETURN_ERROR_ON_LOC(((parent_shape.x() != shape.x()) || (parent_shape.y() != shape.y())), function, file, line);
-
     // Check dimensions
     for(unsigned int i = 0; i < TensorShape::num_max_dimensions; ++i)
     {
-        ARM_COMPUTE_RETURN_ERROR_ON_LOC(((coords[i] >= static_cast<int>(parent_shape[i])) || (coords[i] + static_cast<int>(shape[i]) > static_cast<int>(parent_shape[i]))),
-                                        function, file, line);
+        const bool invalid_idx        = coords[i] >= static_cast<int>(parent_shape[i]);
+        const bool out_of_bounds_size = coords[i] + static_cast<int>(shape[i]) > static_cast<int>(parent_shape[i]);
+        ARM_COMPUTE_RETURN_ERROR_ON_LOC(invalid_idx || out_of_bounds_size, function, file, line);
     }
     return arm_compute::Status{};
 }