MLBEDSW-3249: Vela config file examples

 - Added sample vela.ini config file
 - Changed vela config format, split into system config and memory mode
 - Removed unused CPU cycle performance estimation
 - Added new CLI options for --memory-mode and --verbose-config
 - Changed CLI option --config to take multiple files
 - Removed CLI option --global-memory-clock-scales
 - Changed error helper functions to raise a VelaError exception
 - Refactored to create a new is_spilling_enabled function

Signed-off-by: Tim Hall <tim.hall@arm.com>
Change-Id: I27c41577e37a3859edb9524cd99784be10ef0a0d
diff --git a/ethosu/vela/stats_writer.py b/ethosu/vela/stats_writer.py
index 3cd769f..e4b8156 100644
--- a/ethosu/vela/stats_writer.py
+++ b/ethosu/vela/stats_writer.py
@@ -46,7 +46,7 @@
         ]
 
         labels += (
-            ["accelerator_configuration", "system_config", "npu_clock", "sram_size"]
+            ["accelerator_configuration", "system_config", "memory_mode", "core_clock", "sram_size"]
             + [area.identifier_name() + "_bandwidth" for area in mem_areas]
             + ["weights_storage_area", "feature_map_storage_area"]
         )
@@ -83,7 +83,13 @@
 
         if arch:
             data_items += (
-                [arch.accelerator_config, arch.system_config, arch.npu_clock, arch.sram_size / 1024]
+                [
+                    arch.accelerator_config.name,
+                    arch.system_config,
+                    arch.memory_mode,
+                    arch.core_clock,
+                    arch.sram_size / 1024,
+                ]
                 + [arch.memory_bandwidths_per_second[mem_area] / 1000.0 / 1000 / 1000 for mem_area in mem_areas]
                 + [
                     arch.tensor_storage_mem_area[TensorPurpose.Weights].display_name(),
@@ -91,7 +97,7 @@
                 ]
             )
 
-        midpoint_inference_time = nng.cycles[PassCycles.Total] / arch.npu_clock
+        midpoint_inference_time = nng.cycles[PassCycles.Total] / arch.core_clock
         if midpoint_inference_time > 0:
             midpoint_fps = 1 / midpoint_inference_time
         else:
@@ -162,7 +168,6 @@
         all_cycles = (
             PassCycles.Total,
             PassCycles.Npu,
-            PassCycles.Cpu,
             PassCycles.SramAccess,
             PassCycles.DramAccess,
             PassCycles.OnChipFlashAccess,
@@ -239,7 +244,7 @@
 
     orig_mem_areas_labels = [(v, v.display_name()) for v in mem_areas_to_report()]
 
-    midpoint_inference_time = cycles[PassCycles.Total] / arch.npu_clock
+    midpoint_inference_time = cycles[PassCycles.Total] / arch.core_clock
     if midpoint_inference_time > 0:
         midpoint_fps = 1 / midpoint_inference_time
     else:
@@ -252,9 +257,10 @@
     if name:
         print("", file=f)
         print("Network summary for", name, file=f)
-    print("Accelerator configuration        {:20}".format(arch.accelerator_config), file=f)
-    print("System configuration             {:20}".format(arch.system_config), file=f)
-    print("Accelerator clock                        {:12d} MHz".format(int(arch.npu_clock / 1e6)), file=f)
+    print("Accelerator configuration        {:>20}".format(arch.accelerator_config.name), file=f)
+    print("System configuration             {:>20}".format(arch.system_config), file=f)
+    print("Memory mode                      {:>20}".format(arch.memory_mode), file=f)
+    print("Accelerator clock                        {:12d} MHz".format(int(arch.core_clock / 1e6)), file=f)
     for mem_area, label in mem_area_labels:
         print(
             "Design peak {:25}    {:12.2f} GB/s".format(