MLBEDSW-5582: MLCE: memory corruption with zero concat - This bug was due to an interaction between multiple Ethos-U custom operators and concatenation of constant tensors - It resulted in different parts of the concatenation being placed in different custom operators - The fix involves places all parts of the concatenation into the same custom operator by switching to a breadth first search in pass packing Signed-off-by: Johan Alfven <johan.alfven@arm.com> Signed-off-by: Tim Hall <tim.hall@arm.com> Change-Id: Ic47613cfd7bf675b4674dc91d6f9765849ba3130

commit: 849ff81f82c10a68898e5101930b92372bec5565 [log] [tgz]
author: Tim Hall <tim.hall@arm.com> Thu Dec 23 15:40:34 2021 +0000
committer: tim.hall <tim.hall@arm.com> Mon Jan 24 17:05:53 2022 +0000
tree: 9ec10a484ac99bf9ef4ad65baad4f6e69ccc8a6a
parent: 2de898a85d800f0e812cc13eff6363e27a377cf5 [diff]
diff --git a/ethosu/vela/pass_packing.py b/ethosu/vela/pass_packing.py
index 1fefdf4..b84e455 100644
--- a/ethosu/vela/pass_packing.py
+++ b/ethosu/vela/pass_packing.py

@@ -179,7 +179,7 @@
 
 
 def pack_into_passes(nng, arch, verbose_packing=False):
-    def visit_op(op, ignored):
+    def visit_op(op, multiple_ops=None):
         visit_op_refcount[op] += 1
 
         if visit_op_refcount[op] == 1:  # First-time visit, go and fix up unused output tensors
@@ -187,7 +187,6 @@
                 if len(tens.consumers()) == 0:
                     visit_op_refcount[op] += 1
 
-        assert visit_op_refcount[op] <= len(op.outputs)
         if visit_op_refcount[op] == len(op.outputs):
 
             if op.type in startup_init_ops:
@@ -198,9 +197,9 @@
                     ofm_tensor = op.outputs[0]
                 ofm_shape = op.ofm_shapes[0] if op.run_on_npu else None
 
-                build_pass((op,), ofm_tensor, ofm_shape)
+                build_pass((op,), ofm_tensor, ofm_shape, multiple_ops)
 
-    def build_pass(start_ops_to_process, ofm_tensor=None, ofm_shape=None):
+    def build_pass(start_ops_to_process, ofm_tensor=None, ofm_shape=None, multiple_ops=None):
         reverse_ops_list = []
         curr_flags = PassFlags.Empty
         npu_block_type = NpuBlockType.Default
@@ -373,6 +372,10 @@
 
         reverse_pass_list.append(ps)
 
+        if multiple_ops:
+            multiple_op_next = multiple_ops.pop(0)
+            visit_op(multiple_op_next, multiple_ops)
+
         for inp, refcount in input_refcounts.items():
             for _ in range(refcount):
                 visit_tensor(inp)
@@ -383,8 +386,10 @@
         visit_tensor_refcount[tens] += 1
         assert visit_tensor_refcount[tens] <= len(tens.consumers())
         if visit_tensor_refcount[tens] == len(tens.consumers()):
-            for op in reversed(tens.ops):
-                visit_op(op, tens)
+            if tens.ops:
+                op = tens.ops[0]
+                multiple_ops = [o for o in tens.ops if o != op]
+                visit_op(op, multiple_ops)
 
     def create_primary_op(op_list):
         if any(op.type in (npu_post_ops | npu_post_fuse_limited_ops) and op.run_on_npu for op in op_list):
commit	849ff81f82c10a68898e5101930b92372bec5565	[log] [tgz]
author	Tim Hall <tim.hall@arm.com>	Thu Dec 23 15:40:34 2021 +0000
committer	tim.hall <tim.hall@arm.com>	Mon Jan 24 17:05:53 2022 +0000
tree	9ec10a484ac99bf9ef4ad65baad4f6e69ccc8a6a
parent	2de898a85d800f0e812cc13eff6363e27a377cf5 [diff]