MLBEDSW-2570 Avoid usage of NHCWB16 for some cases
Avoid usage of NHCWB16 when Stack/Pack/Concat is performed in axis 3,
and the "concat start" of each slice to be combined is not a multiple
of 16.
Signed-off-by: Patrik Gustavsson <patrik.gustavsson@arm.com>
Change-Id: If3f7b4a3424be3c86fc2dc48e8649ce4c4f49485
diff --git a/ethosu/vela/tensor.py b/ethosu/vela/tensor.py
index 3574970..ecca0e0 100644
--- a/ethosu/vela/tensor.py
+++ b/ethosu/vela/tensor.py
@@ -300,6 +300,7 @@
"npu_tensor",
"equivalence_id",
"resampling_mode",
+ "avoid_NHCWB16",
)
AllocationQuantum = 16
@@ -346,6 +347,8 @@
self.block_traversal = TensorBlockTraversal.Default
self.resampling_mode = resampling_mode.NONE
+ self.avoid_NHCWB16 = False
+
def element_size(self):
if self.element_size_bytes == 0:
return self.dtype.size_in_bits() / 8
@@ -380,6 +383,7 @@
res.resampling_mode = self.resampling_mode
res.copy_compressed_weight_info(self)
+ res.avoid_NHCWB16 = self.avoid_NHCWB16
return res
def clone_into_fast_storage(self, arch):