MLBEDSW-4350 Use padding instead of skirt for merged SplitSlice

When the operations are merged some later passes are confused by start
and end coordinates for the convolution not being along the edges of
the IFM, and omitting padding. But we need the zero padding to keep
the output the same as before the transformation.
Also fixes bug where Vela could crash if convolution had explicit
start coordinate.

Signed-off-by: Henrik G Olsson <henrik.olsson@arm.com>
Change-Id: I8449d237350d528f83738b2f09124f1ed79c07ca
diff --git a/ethosu/vela/graph_optimiser.py b/ethosu/vela/graph_optimiser.py
index 7c60368..573b7eb 100644
--- a/ethosu/vela/graph_optimiser.py
+++ b/ethosu/vela/graph_optimiser.py
@@ -196,6 +196,10 @@
                 cons_op.set_input_tensor(op.ifm, cons_op.type.info.indices.ifms[1])
                 cons_op.ifm_shapes[1] = op.ifm_shapes[0]
 
+            if "skirt" in cons_op.attrs:
+                assert cons_op.attrs["explicit_padding"] == cons_op.attrs["skirt"]
+                cons_op.attrs["skirt"] = None
+                cons_op.attrs["force_padding"] = True
             op.ofm.consumer_list.remove(cons_op)
             op.ofm.ops = []
             op.ifm.consumer_list.remove(op)
diff --git a/ethosu/vela/high_level_command_stream.py b/ethosu/vela/high_level_command_stream.py
index 075574e..53c5e01 100644
--- a/ethosu/vela/high_level_command_stream.py
+++ b/ethosu/vela/high_level_command_stream.py
@@ -62,7 +62,7 @@
             new_start_coord[-1] = 0
             new_end_coord[-1] = ifm_shape.depth
 
-        if npu_block_type == NpuBlockType.ElementWise and len(new_end_coord) >= 1:
+        if npu_block_type in (NpuBlockType.ElementWise, NpuBlockType.ConvolutionMxN) and len(new_end_coord) >= 1:
             new_end_coord[-1] = min(new_end_coord[-1], ifm_shape.depth)
         if len(new_end_coord) >= 2:
             new_end_coord[-2] = min(new_end_coord[-2], ifm_shape.width * upscaling_factor)
diff --git a/ethosu/vela/high_level_command_to_npu_op.py b/ethosu/vela/high_level_command_to_npu_op.py
index c56eb04..ad9e266 100644
--- a/ethosu/vela/high_level_command_to_npu_op.py
+++ b/ethosu/vela/high_level_command_to_npu_op.py
@@ -151,10 +151,11 @@
 
     # Indexing from end since a 1x1 Avgpool might have been added with non 4-dimensional input/output,
     # because of activation function needed to be fused.
-    if len(cmd.ifm_box.start_coord) >= 2 and cmd.ifm_box.start_coord[-2] > 0:
-        left = 0
-    if len(cmd.ifm_box.end_coord) >= 2 and cmd.ifm_box.end_coord[-2] < cmd.ps.ifm_shapes[0].width:
-        right = 0
+    if not primary_op.attrs.get("force_padding"):
+        if len(cmd.ifm_box.start_coord) >= 2 and cmd.ifm_box.start_coord[-2] > 0:
+            left = 0
+        if len(cmd.ifm_box.end_coord) >= 2 and cmd.ifm_box.end_coord[-2] < cmd.ps.ifm_shapes[0].width:
+            right = 0
     return NpuPadding(top=top, left=left, bottom=bottom, right=right)