MLBEDSW-2420: Improved support for dilated convolution

- Dilation added to SET_KERNEL_STRIDE instruction
- Kernel height/width adjusted for dilation
- Updated padding calculation
- Updated weight compression

Change-Id: I0c8190223e223b039a305aba0f37896ae1de2b80
Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
diff --git a/ethosu/vela/tensor.py b/ethosu/vela/tensor.py
index 2f91f61..426a710 100644
--- a/ethosu/vela/tensor.py
+++ b/ethosu/vela/tensor.py
@@ -521,7 +521,7 @@
             strides[4] = stride
             strides[3] = 16 * stride  # STRIDE_X
             strides[1] = strides[3] * augmented_shape[2]  # STRIDE_C
-            strides[2] = augmented_shape[2] * augmented_shape[3] * stride # STRIDE_Y
+            strides[2] = augmented_shape[2] * augmented_shape[3] * stride  # STRIDE_Y
             strides[0] = strides[2] * augmented_shape[1]  # STRIDE_N
 
         return strides, augmented_coord
@@ -539,6 +539,15 @@
         # Note: for DMA ops, Pass.weight_tensor is referring to the SRAM weight tensor
         return self.ops[0].inputs[0] if self.needs_dma() else None
 
+    def find_npu_op(self):
+        # Returns the NPU operator that uses this tensor, excluding DMA operators.
+        for op in self.consumers():
+            if op.type == "DMA":
+                return op.outputs[0].find_npu_op()
+            if "npu_block_type" in op.attrs:
+                return op
+            return None
+
     def compressed_stream_index_from_coord(self, coord):
         assert self.format == TensorFormat.WeightsCompressed
         assert len(self.compressed_values) > 0