MLBEDSW-5410 Clarify "Subgraph IO Summary" output

* Add small aesthetic changes to summary
* Move "_cpu" suffix from cloned tensor to original tensor such that suffix is no longer externally visible

Signed-off-by: James Ward <james.ward@arm.com>
Change-Id: I97427561bd9acb04765ae9de6278760511278118
diff --git a/ethosu/vela/extract_npu_subgraphs.py b/ethosu/vela/extract_npu_subgraphs.py
index b536cfb..d802b51 100644
--- a/ethosu/vela/extract_npu_subgraphs.py
+++ b/ethosu/vela/extract_npu_subgraphs.py
@@ -104,7 +104,8 @@
     orig_tens, call_ps, startup_init_ps, npu_subgraph, cpu_subgraph, subgraph_for_pass
 ):
 
-    new_tens = orig_tens.clone("_cpu")
+    new_tens = orig_tens.clone("")
+    orig_tens.name = orig_tens.name + "_cpu"
     npu_subgraph.output_tensors.append(orig_tens)
 
     call_ps.outputs.append(new_tens)
diff --git a/ethosu/vela/vela.py b/ethosu/vela/vela.py
index 8a80827..63cccc5 100644
--- a/ethosu/vela/vela.py
+++ b/ethosu/vela/vela.py
@@ -37,7 +37,6 @@
 from .errors import InputFileError
 from .errors import VelaError
 from .nn_graph import NetworkType
-from .nn_graph import PassPlacement
 from .nn_graph import TensorAllocator
 from .tensor import MemArea
 from .tensor import Tensor
@@ -140,30 +139,34 @@
 
     print("Subgraph IO Summary")
     print("-------------------")
-    print("NNG: {0}".format(nng.name))
+    print(f"NNG: {nng.name}")
     max_sg_size = 0
     for sg in reversed(nng.subgraphs):
-        print("   Subgraph: {0} = {1}".format(sg.name, sg.placement))
+        print(f"  NNG Subgraph: {sg.name} = {sg.placement}")
         sg_size = 0
 
-        if sg.placement == PassPlacement.Npu:
-            for tens in sg.input_tensors + [sg.scratch_tensor] + sg.output_tensors:
-                if tens in sg.input_tensors:
-                    tens_dir = "In"
-                elif tens in sg.output_tensors:
-                    tens_dir = "Out"
-                else:
-                    tens_dir = "In/Out"
+        if hasattr(sg, "scratch_tensor") and sg.scratch_tensor is not None:
+            sg_tensors = sg.input_tensors + [sg.scratch_tensor] + sg.output_tensors
+        else:
+            sg_tensors = sg.input_tensors + sg.output_tensors
 
-                size = tens.elements() * tens.element_size() / 1024.0
-                sg_size = sg_size + size
-                print("         Tensor [{0}]: {1} = {2} KiB".format(tens_dir, tens.name, size))
+        for tens in sg_tensors:
+            if tens in sg.input_tensors:
+                tens_dir = "In"
+            elif tens in sg.output_tensors:
+                tens_dir = "Out"
+            else:
+                tens_dir = "In/Out"
 
-        print("      Total Size = {0} KiB".format(sg_size))
-        print("      SRAM Memory Used = {0} KiB".format(sg.memory_used.get(MemArea.Sram, 0) / 1024.0))
+            size = tens.elements() * tens.element_size() / 1024.0
+            sg_size = sg_size + size
+            print(f"         Tensor [{tens_dir}]: {tens.name} = {size} KiB")
+
+        print(f"      Total Size = {sg_size} KiB")
+        print(f"      SRAM Memory Used = {sg.memory_used.get(MemArea.Sram, 0) / 1024.0} KiB")
         max_sg_size = max(sg_size, max_sg_size)
 
-    print("   Maximum Subgraph Size = {0} KiB".format(max_sg_size))
+    print(f"   Maximum NNG Subgraph Size = {max_sg_size} KiB")
 
 
 def generate_supported_ops():