MLBEDSW-1493: Optimise strided conv
- Reshape/rearrange IFM and weight tensor for better HW utilization
- Update estimator to cover this case
Change-Id: I4be70a69fa600a1951bf1c247f9973e6cc9b03f4
Signed-off-by: Diqing Zhong <diqing.zhong@arm.com>
diff --git a/ethosu/vela/npu_performance.py b/ethosu/vela/npu_performance.py
index 4ca4683..c2418d7 100644
--- a/ethosu/vela/npu_performance.py
+++ b/ethosu/vela/npu_performance.py
@@ -422,6 +422,9 @@
ifm_tensor, _, weight_tensor, ofm_tensor = ps.get_primary_op_ifm_ifm2_weights_ofm()
ifm_tensor_shape = ps.primary_op.ifm_shapes[0].clone()
ofm_tensor_shape = ps.primary_op.ofm_shapes[0].clone()
+ ofm_block.width = min(ofm_block.width, ofm_tensor_shape.width)
+ ofm_block.height = min(ofm_block.height, ofm_tensor_shape.height)
+ ofm_block.depth = min(ofm_block.depth, ofm_tensor_shape.depth)
if npu_block_type == NpuBlockType.ReduceSum:
block_traversal = TensorBlockTraversal.DepthFirst
@@ -439,6 +442,8 @@
ifm_block = arch.get_ifm_block_size(
ifm_block_depth, ofm_block, primary_op.kernel, ifm_resampling_mode=ifm_tensor.resampling_mode
)
+ ifm_block.width = min(ifm_block.width, ifm_tensor_shape.width)
+ ifm_block.height = min(ifm_block.height, ifm_tensor_shape.height)
if npu_block_type in (
NpuBlockType.ConvolutionMxN,