MLBEDSW-3249: Vela config file examples
- Added sample vela.ini config file
- Changed vela config format, split into system config and memory mode
- Removed unused CPU cycle performance estimation
- Added new CLI options for --memory-mode and --verbose-config
- Changed CLI option --config to take multiple files
- Removed CLI option --global-memory-clock-scales
- Changed error helper functions to raise a VelaError exception
- Refactored to create a new is_spilling_enabled function
Signed-off-by: Tim Hall <tim.hall@arm.com>
Change-Id: I27c41577e37a3859edb9524cd99784be10ef0a0d
diff --git a/ethosu/vela/stats_writer.py b/ethosu/vela/stats_writer.py
index 3cd769f..e4b8156 100644
--- a/ethosu/vela/stats_writer.py
+++ b/ethosu/vela/stats_writer.py
@@ -46,7 +46,7 @@
]
labels += (
- ["accelerator_configuration", "system_config", "npu_clock", "sram_size"]
+ ["accelerator_configuration", "system_config", "memory_mode", "core_clock", "sram_size"]
+ [area.identifier_name() + "_bandwidth" for area in mem_areas]
+ ["weights_storage_area", "feature_map_storage_area"]
)
@@ -83,7 +83,13 @@
if arch:
data_items += (
- [arch.accelerator_config, arch.system_config, arch.npu_clock, arch.sram_size / 1024]
+ [
+ arch.accelerator_config.name,
+ arch.system_config,
+ arch.memory_mode,
+ arch.core_clock,
+ arch.sram_size / 1024,
+ ]
+ [arch.memory_bandwidths_per_second[mem_area] / 1000.0 / 1000 / 1000 for mem_area in mem_areas]
+ [
arch.tensor_storage_mem_area[TensorPurpose.Weights].display_name(),
@@ -91,7 +97,7 @@
]
)
- midpoint_inference_time = nng.cycles[PassCycles.Total] / arch.npu_clock
+ midpoint_inference_time = nng.cycles[PassCycles.Total] / arch.core_clock
if midpoint_inference_time > 0:
midpoint_fps = 1 / midpoint_inference_time
else:
@@ -162,7 +168,6 @@
all_cycles = (
PassCycles.Total,
PassCycles.Npu,
- PassCycles.Cpu,
PassCycles.SramAccess,
PassCycles.DramAccess,
PassCycles.OnChipFlashAccess,
@@ -239,7 +244,7 @@
orig_mem_areas_labels = [(v, v.display_name()) for v in mem_areas_to_report()]
- midpoint_inference_time = cycles[PassCycles.Total] / arch.npu_clock
+ midpoint_inference_time = cycles[PassCycles.Total] / arch.core_clock
if midpoint_inference_time > 0:
midpoint_fps = 1 / midpoint_inference_time
else:
@@ -252,9 +257,10 @@
if name:
print("", file=f)
print("Network summary for", name, file=f)
- print("Accelerator configuration {:20}".format(arch.accelerator_config), file=f)
- print("System configuration {:20}".format(arch.system_config), file=f)
- print("Accelerator clock {:12d} MHz".format(int(arch.npu_clock / 1e6)), file=f)
+ print("Accelerator configuration {:>20}".format(arch.accelerator_config.name), file=f)
+ print("System configuration {:>20}".format(arch.system_config), file=f)
+ print("Memory mode {:>20}".format(arch.memory_mode), file=f)
+ print("Accelerator clock {:12d} MHz".format(int(arch.core_clock / 1e6)), file=f)
for mem_area, label in mem_area_labels:
print(
"Design peak {:25} {:12.2f} GB/s".format(