MLBEDSW-2420: Improved support for dilated convolution
- Dilation added to SET_KERNEL_STRIDE instruction
- Kernel height/width adjusted for dilation
- Updated padding calculation
- Updated weight compression
Change-Id: I0c8190223e223b039a305aba0f37896ae1de2b80
Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
diff --git a/ethosu/vela/tensor.py b/ethosu/vela/tensor.py
index 2f91f61..426a710 100644
--- a/ethosu/vela/tensor.py
+++ b/ethosu/vela/tensor.py
@@ -521,7 +521,7 @@
strides[4] = stride
strides[3] = 16 * stride # STRIDE_X
strides[1] = strides[3] * augmented_shape[2] # STRIDE_C
- strides[2] = augmented_shape[2] * augmented_shape[3] * stride # STRIDE_Y
+ strides[2] = augmented_shape[2] * augmented_shape[3] * stride # STRIDE_Y
strides[0] = strides[2] * augmented_shape[1] # STRIDE_N
return strides, augmented_coord
@@ -539,6 +539,15 @@
# Note: for DMA ops, Pass.weight_tensor is referring to the SRAM weight tensor
return self.ops[0].inputs[0] if self.needs_dma() else None
+ def find_npu_op(self):
+ # Returns the NPU operator that uses this tensor, excluding DMA operators.
+ for op in self.consumers():
+ if op.type == "DMA":
+ return op.outputs[0].find_npu_op()
+ if "npu_block_type" in op.attrs:
+ return op
+ return None
+
def compressed_stream_index_from_coord(self, coord):
assert self.format == TensorFormat.WeightsCompressed
assert len(self.compressed_values) > 0