MLBEDSW-5582: MLCE: memory corruption with zero concat Fixed problem when ofm is produced by different NPU nodes by making sure that output is always in NHWC format. Signed-off-by: Johan Alfven <johan.alfven@arm.com> Change-Id: I00e55c989d5860499fbaf4f4318661b17b4bda7e

commit: 211165a378ccb43696c562fe53e1e1937c75a144 [log] [tgz]
author: Johan Alfvén <johan.alfven@arm.com> Sun Feb 06 15:30:07 2022 +0100
committer: Dwight Lidman <dwight.lidman@arm.com> Tue Feb 08 11:08:30 2022 +0000
tree: e9921e7f391cf0418cc72a164c945aef37d2470b
parent: de6cb64b156108cf48e34c80241bd70f0307a588 [diff]
diff --git a/ethosu/vela/extract_npu_subgraphs.py b/ethosu/vela/extract_npu_subgraphs.py
index ac24e43..015634c 100644
--- a/ethosu/vela/extract_npu_subgraphs.py
+++ b/ethosu/vela/extract_npu_subgraphs.py

@@ -106,9 +106,6 @@
 
     # Deal with output tensors for the NPU graph. These are special.
     npu_subgraph.output_tensors = [new_tens if tens == orig_tens else tens for tens in npu_subgraph.output_tensors]
-    for tens in npu_subgraph.output_tensors:
-        # Enforce output tensor from NPU graph to use normal NHWC output
-        tens.needs_linear_format = True
 
 
 def rewrite_tensor_npu_producer_cpu_consumers(
@@ -241,6 +238,11 @@
                         tens, call_pass[curr_sg], startup_init_passes[curr_sg], curr_sg, orig_sg, subgraph_for_pass
                     )
 
+        for tens in curr_sg.output_tensors:
+            # ofm can depend on multiple ops. These ops can be divided into different NPU
+            # nodes due to CPU nodes. If that is the case the ofm must be NHWC.
+            tens.needs_linear_format = True
+
     return new_subgraphs
commit	211165a378ccb43696c562fe53e1e1937c75a144	[log] [tgz]
author	Johan Alfvén <johan.alfven@arm.com>	Sun Feb 06 15:30:07 2022 +0100
committer	Dwight Lidman <dwight.lidman@arm.com>	Tue Feb 08 11:08:30 2022 +0000
tree	e9921e7f391cf0418cc72a164c945aef37d2470b
parent	de6cb64b156108cf48e34c80241bd70f0307a588 [diff]