erik.andersson@arm.com | 460c689 | 2021-02-24 14:38:09 +0100 | [diff] [blame] | 1 | # Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved. |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 2 | # |
| 3 | # SPDX-License-Identifier: Apache-2.0 |
| 4 | # |
| 5 | # Licensed under the Apache License, Version 2.0 (the License); you may |
| 6 | # not use this file except in compliance with the License. |
| 7 | # You may obtain a copy of the License at |
| 8 | # |
| 9 | # www.apache.org/licenses/LICENSE-2.0 |
| 10 | # |
| 11 | # Unless required by applicable law or agreed to in writing, software |
| 12 | # distributed under the License is distributed on an AS IS BASIS, WITHOUT |
| 13 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | # See the License for the specific language governing permissions and |
| 15 | # limitations under the License. |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 16 | # Description: |
| 17 | # Writes out per-pass and summary performance statistics to CSV files. |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 18 | import csv |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 19 | import sys |
| 20 | |
Diego Russo | ea6111a | 2020-04-14 18:41:58 +0100 | [diff] [blame] | 21 | import numpy as np |
| 22 | |
Diego Russo | ea6111a | 2020-04-14 18:41:58 +0100 | [diff] [blame] | 23 | from .nn_graph import PassPlacement |
Diego Russo | e8a1045 | 2020-04-21 17:39:10 +0100 | [diff] [blame] | 24 | from .npu_performance import BandwidthDirection |
Diego Russo | e8a1045 | 2020-04-21 17:39:10 +0100 | [diff] [blame] | 25 | from .npu_performance import PassCycles |
Diego Russo | ea6111a | 2020-04-14 18:41:58 +0100 | [diff] [blame] | 26 | from .numeric_util import round_up_to_int |
Louis Verhaard | aee5d75 | 2020-09-30 09:01:52 +0200 | [diff] [blame] | 27 | from .operation import Op |
Diego Russo | e8a1045 | 2020-04-21 17:39:10 +0100 | [diff] [blame] | 28 | from .tensor import MemArea |
| 29 | from .tensor import TensorPurpose |
Diego Russo | ea6111a | 2020-04-14 18:41:58 +0100 | [diff] [blame] | 30 | |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 31 | |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 32 | def mem_areas_to_report(): |
| 33 | # Exclude SHRAM, as the SHRAM performance numbers only cover LUT usage |
| 34 | return [area for area in MemArea.all() if area != MemArea.Shram] |
| 35 | |
| 36 | |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 37 | def write_summary_metrics_csv(nng, summary_filename, arch): |
| 38 | with open(summary_filename, "w") as f: |
| 39 | writer = csv.writer(f) |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 40 | mem_areas = mem_areas_to_report() |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 41 | |
| 42 | labels = [ |
| 43 | "experiment", |
| 44 | "network", |
| 45 | ] |
| 46 | |
| 47 | labels += ( |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 48 | ["accelerator_configuration", "system_config", "memory_mode", "core_clock", "arena_cache_size"] |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 49 | + [area.identifier_name() + "_bandwidth" for area in mem_areas] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 50 | + ["weights_storage_area", "feature_map_storage_area"] |
| 51 | ) |
| 52 | |
| 53 | labels += [ |
| 54 | "inferences_per_second", |
| 55 | "batch_size", |
| 56 | "inference_time", |
| 57 | "passes_before_fusing", |
| 58 | "passes_after_fusing", |
| 59 | ] |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 60 | labels += [area.identifier_name() + "_memory_used" for area in mem_areas] |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 61 | labels += ["total_original_weights"] |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 62 | labels += ["total_npu_encoded_weights"] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 63 | |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 64 | for mem_area in mem_areas: |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 65 | labels += [ |
| 66 | mem_area.identifier_name() + "_feature_map_read_bytes", |
| 67 | mem_area.identifier_name() + "_feature_map_write_bytes", |
| 68 | mem_area.identifier_name() + "_weight_read_bytes", |
| 69 | mem_area.identifier_name() + "_weight_write_bytes", |
| 70 | mem_area.identifier_name() + "_total_bytes", |
| 71 | ] |
| 72 | |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 73 | labels += ["nn_macs", "nn_tops"] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 74 | |
| 75 | labels += ["cycles_" + kind.identifier_name() for kind in PassCycles.all()] |
| 76 | |
| 77 | writer.writerow(labels) |
| 78 | |
| 79 | data_items = [ |
| 80 | "default", |
| 81 | nng.name, |
| 82 | ] |
| 83 | |
| 84 | if arch: |
| 85 | data_items += ( |
Tim Hall | 1bd531d | 2020-11-01 20:59:36 +0000 | [diff] [blame] | 86 | [ |
| 87 | arch.accelerator_config.name, |
| 88 | arch.system_config, |
| 89 | arch.memory_mode, |
| 90 | arch.core_clock, |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 91 | arch.arena_cache_size / 1024, |
Tim Hall | 1bd531d | 2020-11-01 20:59:36 +0000 | [diff] [blame] | 92 | ] |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 93 | + [arch.memory_bandwidths_per_second[mem_area] / 1000.0 / 1000 / 1000 for mem_area in mem_areas] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 94 | + [ |
| 95 | arch.tensor_storage_mem_area[TensorPurpose.Weights].display_name(), |
| 96 | arch.tensor_storage_mem_area[TensorPurpose.FeatureMap].display_name(), |
| 97 | ] |
| 98 | ) |
| 99 | |
Tim Hall | 1bd531d | 2020-11-01 20:59:36 +0000 | [diff] [blame] | 100 | midpoint_inference_time = nng.cycles[PassCycles.Total] / arch.core_clock |
Michael McGeagh | b424974 | 2020-07-30 14:36:40 +0100 | [diff] [blame] | 101 | if midpoint_inference_time > 0: |
| 102 | midpoint_fps = 1 / midpoint_inference_time |
| 103 | else: |
| 104 | midpoint_fps = np.nan |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 105 | |
| 106 | n_passes = sum(len(sg.passes) for sg in nng.subgraphs) |
| 107 | n_cascaded_passes = sum(len(sg.cascaded_passes) for sg in nng.subgraphs) |
| 108 | |
| 109 | data_items += [midpoint_fps, nng.batch_size, midpoint_inference_time, n_passes, n_cascaded_passes] |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 110 | data_items += [nng.memory_used.get(mem_area, 0) / 1024.0 for mem_area in mem_areas] |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 111 | data_items += [nng.total_original_weights] |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 112 | data_items += [nng.total_npu_encoded_weights] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 113 | |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 114 | for mem_area in mem_areas: |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 115 | bws = nng.bandwidths[mem_area] |
| 116 | total_bw = np.sum(bws) |
| 117 | weight_bws = bws[TensorPurpose.Weights] |
| 118 | fm_bws = bws[TensorPurpose.FeatureMap] |
| 119 | data_items += [ |
| 120 | fm_bws[BandwidthDirection.Read], |
| 121 | fm_bws[BandwidthDirection.Write], |
| 122 | weight_bws[BandwidthDirection.Read], |
| 123 | weight_bws[BandwidthDirection.Write], |
| 124 | total_bw, |
| 125 | ] |
| 126 | |
| 127 | data_items += [ |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 128 | nng.macs, |
| 129 | nng.macs * 2 * midpoint_fps / 1e12, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 130 | ] |
| 131 | |
| 132 | data_items += [nng.cycles[kind] for kind in PassCycles.all()] |
| 133 | |
| 134 | writer.writerow(data_items) |
| 135 | |
| 136 | |
| 137 | def write_pass_metrics_csv(nng, pass_filename): |
| 138 | |
| 139 | with open(pass_filename, "w") as f: |
| 140 | writer = csv.writer(f) |
| 141 | |
| 142 | purpose_list = ( |
| 143 | ("total", (TensorPurpose.Weights, TensorPurpose.FeatureMap)), |
| 144 | ("weights", (TensorPurpose.Weights,)), |
| 145 | ("feature_map", (TensorPurpose.FeatureMap,)), |
| 146 | ) |
| 147 | |
| 148 | direction_list = ( |
| 149 | ("total", (BandwidthDirection.Read, BandwidthDirection.Write)), |
| 150 | ("read", (BandwidthDirection.Read,)), |
| 151 | ("write", (BandwidthDirection.Write,)), |
| 152 | ) |
| 153 | bandwidth_names = [] |
| 154 | bandwidth_indices = [] |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 155 | for mem_area in mem_areas_to_report(): |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 156 | for purpose, purpose_candidates in purpose_list: |
| 157 | for direction, direction_candidates in direction_list: |
Diqing Zhong | 42e833d | 2020-10-02 13:18:42 +0200 | [diff] [blame] | 158 | label = "bytes_{}_{}_{}".format(mem_area.identifier_name(), purpose, direction) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 159 | bandwidth_names.append(label) |
| 160 | bandwidth_indices.append((mem_area, purpose_candidates, direction_candidates)) |
| 161 | |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 162 | all_cycles = ( |
| 163 | PassCycles.Total, |
Diqing Zhong | 42e833d | 2020-10-02 13:18:42 +0200 | [diff] [blame] | 164 | PassCycles.Npu, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 165 | PassCycles.SramAccess, |
| 166 | PassCycles.DramAccess, |
| 167 | PassCycles.OnChipFlashAccess, |
| 168 | PassCycles.OffChipFlashAccess, |
| 169 | ) |
| 170 | writer.writerow( |
| 171 | [ |
| 172 | "name", |
| 173 | "operators", |
| 174 | "placement", |
| 175 | "streaming_strategy", |
| 176 | "block_config_height", |
| 177 | "block_config_width", |
| 178 | "block_config_input_channels", |
| 179 | "block_config_output_channels", |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 180 | ] |
| 181 | + ["cycles_" + v.identifier_name() for v in all_cycles] |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 182 | + ["nn_macs"] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 183 | + bandwidth_names |
| 184 | + ["sram_used"] |
| 185 | ) |
| 186 | |
| 187 | def write_subgraph(sg): |
| 188 | for cps in sg.cascaded_passes: |
| 189 | if cps.placement == PassPlacement.StartupInit: |
| 190 | continue # skip the dummy init pass |
| 191 | |
| 192 | for ps in cps.passes: |
Louis Verhaard | aee5d75 | 2020-09-30 09:01:52 +0200 | [diff] [blame] | 193 | if len(ps.ops) == 1 and ps.ops[0].type == Op.CustomNpuOp: |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 194 | # just treat this as a call, unroll it |
| 195 | write_subgraph(ps.ops[0].attrs["subgraph"]) |
| 196 | continue |
Louis Verhaard | aee5d75 | 2020-09-30 09:01:52 +0200 | [diff] [blame] | 197 | stats = [ps.name, " ".join(op.type.name for op in ps.ops)] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 198 | stats += [ps.placement.name] |
| 199 | stats += [cps.strategy.name] |
| 200 | stats += list(ps.block_config) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 201 | stats += [round_up_to_int(ps.cycles[v]) for v in all_cycles] |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 202 | stats += [round_up_to_int(ps.macs)] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 203 | for indices in bandwidth_indices: |
| 204 | res = 0 |
| 205 | i = indices[0] |
| 206 | for j in indices[1]: |
| 207 | for k in indices[2]: |
| 208 | res += round_up_to_int(ps.bandwidths[i, j, k]) |
| 209 | stats.append(res) |
Patrik Gustavsson | eca2e95 | 2020-05-27 09:15:11 +0200 | [diff] [blame] | 210 | try: |
| 211 | stats += [ps.sram_used] |
| 212 | except AttributeError: |
| 213 | stats += [0] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 214 | |
| 215 | writer.writerow(stats) |
| 216 | |
| 217 | write_subgraph(nng.get_root_subgraph()) |
| 218 | |
| 219 | |
| 220 | def print_performance_metrics_for_strat( |
| 221 | arch, |
| 222 | name, |
| 223 | cycles, |
| 224 | macs, |
| 225 | bandwidths, |
| 226 | batch_size, |
| 227 | memory_used, |
Michael McGeagh | 6f72526 | 2020-12-03 15:21:36 +0000 | [diff] [blame] | 228 | cpu_operations=None, |
Tim Hall | 837c31c | 2021-11-24 15:39:46 +0000 | [diff] [blame] | 229 | npu_operations=None, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 230 | show_cpu_operations=False, |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 231 | weights_data=None, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 232 | f=sys.stdout, |
| 233 | ): |
| 234 | |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 235 | orig_mem_areas_labels = [(v, v.display_name()) for v in mem_areas_to_report()] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 236 | |
Tim Hall | 1bd531d | 2020-11-01 20:59:36 +0000 | [diff] [blame] | 237 | midpoint_inference_time = cycles[PassCycles.Total] / arch.core_clock |
Michael McGeagh | b424974 | 2020-07-30 14:36:40 +0100 | [diff] [blame] | 238 | if midpoint_inference_time > 0: |
| 239 | midpoint_fps = 1 / midpoint_inference_time |
| 240 | else: |
| 241 | midpoint_fps = np.nan |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 242 | |
| 243 | mem_area_labels = [ |
| 244 | (mem_area, label) for mem_area, label in orig_mem_areas_labels if np.sum(bandwidths[mem_area]) > 0 |
| 245 | ] |
| 246 | |
| 247 | if name: |
| 248 | print("", file=f) |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 249 | print(f"Network summary for {name}", file=f) |
| 250 | print(f"Accelerator configuration {arch.accelerator_config.name:>20}", file=f) |
| 251 | print(f"System configuration {arch.system_config:>20}", file=f) |
| 252 | print(f"Memory mode {arch.memory_mode:>20}", file=f) |
| 253 | print(f"Accelerator clock {int(arch.core_clock / 1e6):12d} MHz", file=f) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 254 | for mem_area, label in mem_area_labels: |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 255 | label += " bandwidth" |
| 256 | bandwidth = arch.memory_bandwidths_per_second[mem_area] / 1000.0 / 1000 / 1000 |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 257 | print( |
Jonas Ohlsson | d857507 | 2022-03-30 10:30:25 +0200 | [diff] [blame] | 258 | f"Design peak {label:25} {bandwidth:12.2f} GB/s", |
| 259 | file=f, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 260 | ) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 261 | print(file=f) |
| 262 | for mem_area, label in mem_area_labels: |
Diego Russo | ea6111a | 2020-04-14 18:41:58 +0100 | [diff] [blame] | 263 | if mem_area not in memory_used: |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 264 | continue |
| 265 | |
| 266 | aug_label = label + " used" |
| 267 | |
Diqing Zhong | db5124c | 2021-01-11 12:52:48 +0100 | [diff] [blame] | 268 | print(f"Total {aug_label:25} {memory_used[mem_area] / 1024.0:12.2f} KiB", file=f) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 269 | |
| 270 | print(file=f) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 271 | |
Michael McGeagh | 6f72526 | 2020-12-03 15:21:36 +0000 | [diff] [blame] | 272 | if cpu_operations is None: |
| 273 | cpu_operations = [] |
Tim Hall | 837c31c | 2021-11-24 15:39:46 +0000 | [diff] [blame] | 274 | if npu_operations is None: |
| 275 | npu_operations = [] |
Michael McGeagh | 6f72526 | 2020-12-03 15:21:36 +0000 | [diff] [blame] | 276 | |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 277 | n_cpu_operations = len(cpu_operations) |
Tim Hall | 837c31c | 2021-11-24 15:39:46 +0000 | [diff] [blame] | 278 | n_npu_operations = len(npu_operations) |
Tim Hall | 1bbd06b | 2022-08-25 13:38:50 +0100 | [diff] [blame] | 279 | n_total_operations = max(n_cpu_operations + n_npu_operations, 1) # avoid potential divide by zero |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 280 | |
Tim Hall | 837c31c | 2021-11-24 15:39:46 +0000 | [diff] [blame] | 281 | def format_tens_list(lst): |
| 282 | return " ".join(str(list(tens.shape)) for tens in lst) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 283 | |
Tim Hall | 837c31c | 2021-11-24 15:39:46 +0000 | [diff] [blame] | 284 | for str_ops_type, n_ops, ops in ( |
| 285 | ("CPU", n_cpu_operations, cpu_operations), |
| 286 | ("NPU", n_npu_operations, npu_operations), |
| 287 | ): |
| 288 | print(f"{str_ops_type} operators = {n_ops:d} ({n_ops / n_total_operations:4.1%})", file=f) |
| 289 | if show_cpu_operations: |
| 290 | for op in ops: |
| 291 | print( |
| 292 | f" {str_ops_type}: {op.type} = {op.name}" |
| 293 | f" (inputs {format_tens_list(op.inputs)}, outputs {format_tens_list(op.outputs)})" |
| 294 | ) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 295 | |
Tim Hall | 837c31c | 2021-11-24 15:39:46 +0000 | [diff] [blame] | 296 | print("", file=f) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 297 | |
| 298 | for mem_area, label in mem_area_labels: |
| 299 | bws = bandwidths[mem_area] |
| 300 | total_bw = np.sum(bws) |
| 301 | weight_bws = bws[TensorPurpose.Weights] |
| 302 | fm_bws = bws[TensorPurpose.FeatureMap] |
| 303 | aug_label = label + " bandwidth" |
| 304 | print( |
Jonas Ohlsson | d857507 | 2022-03-30 10:30:25 +0200 | [diff] [blame] | 305 | f"Average {aug_label:25} {total_bw * midpoint_fps / 1000.0 / 1000.0 / 1000.0:12.2f} GB/s", |
| 306 | file=f, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 307 | ) |
| 308 | print( |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 309 | f"Input {aug_label:25} {np.sum(fm_bws[BandwidthDirection.Read]) / 1000.0 / 1000.0:12.2f} MB/batch", |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 310 | file=f, |
| 311 | ) |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 312 | print(f"Weight {aug_label:25} {np.sum(weight_bws) / 1000.0 / 1000.0:12.2f} MB/batch", file=f) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 313 | print( |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 314 | f"Output {aug_label:25} " |
| 315 | f"{np.sum(fm_bws[BandwidthDirection.Write]) / 1000.0 / 1000.0:12.2f} MB/batch", |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 316 | file=f, |
| 317 | ) |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 318 | print(f"Total {aug_label:25} {total_bw / 1000.0 / 1000.0:12.2f} MB/batch", file=f) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 319 | print( |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 320 | f"Total {aug_label:25} per input " |
| 321 | f"{total_bw / 1000.0 / 1000.0 / batch_size:9.2f} MB/inference (batch size {batch_size:d})", |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 322 | file=f, |
| 323 | ) |
| 324 | print(file=f) |
| 325 | |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 326 | if weights_data: |
| 327 | print(f"Original Weights Size {weights_data['original'] / 1024.0:12.2f} KiB", file=f) |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 328 | print(f"NPU Encoded Weights Size {weights_data['npu_encoded'] / 1024.0:12.2f} KiB", file=f) |
| 329 | print(file=f) |
Diqing Zhong | db5124c | 2021-01-11 12:52:48 +0100 | [diff] [blame] | 330 | |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 331 | print( |
Jonas Ohlsson | d857507 | 2022-03-30 10:30:25 +0200 | [diff] [blame] | 332 | f"Neural network macs {int(macs):12d} MACs/batch", |
| 333 | file=f, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 334 | ) |
| 335 | print( |
Jonas Ohlsson | d857507 | 2022-03-30 10:30:25 +0200 | [diff] [blame] | 336 | f"Network Tops/s {macs * 2 * midpoint_fps / 1e12:12.2f} Tops/s", |
| 337 | file=f, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 338 | ) |
| 339 | print(file=f) |
| 340 | |
| 341 | for kind in PassCycles.all(): |
| 342 | aug_label = kind.display_name() + " cycles" |
| 343 | cyc = cycles[kind] |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 344 | print(f"{aug_label:30} {int(cyc):12d} cycles/batch", file=f) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 345 | print(file=f) |
| 346 | |
| 347 | print( |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 348 | f"Batch Inference time {midpoint_inference_time * 1000:7.2f} ms," |
| 349 | f" {midpoint_fps:7.2f} inferences/s (batch size {batch_size:d})", |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 350 | file=f, |
| 351 | ) |
| 352 | print(file=f) |
| 353 | |
| 354 | |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 355 | def print_performance_metrics(nng, arch, show_cpu_operations=False, verbose_weights=False, f=sys.stdout): |
Tim Hall | 837c31c | 2021-11-24 15:39:46 +0000 | [diff] [blame] | 356 | cpu_operations = [] |
| 357 | npu_operations = [] |
| 358 | ir_only_ops = ( |
| 359 | Op.Const, |
| 360 | Op.Placeholder, |
| 361 | Op.CustomNpuOp, |
| 362 | Op.SubgraphInput, |
| 363 | ) |
| 364 | |
| 365 | for sg in nng.subgraphs: |
| 366 | if sg.placement == PassPlacement.Cpu: |
| 367 | for op in sg.get_all_ops(): |
| 368 | if op.type not in ir_only_ops: |
| 369 | cpu_operations.append(op) |
| 370 | elif sg.placement == PassPlacement.Npu: |
| 371 | for op in sg.get_all_ops(): |
| 372 | if op.type not in ir_only_ops: |
| 373 | npu_operations.append(op) |
| 374 | |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 375 | weights_data = ( |
Ayaan Masood | b801dda | 2022-02-22 11:28:55 +0000 | [diff] [blame] | 376 | {"original": nng.total_original_weights, "npu_encoded": nng.total_npu_encoded_weights} |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 377 | if verbose_weights |
| 378 | else None |
| 379 | ) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 380 | return print_performance_metrics_for_strat( |
| 381 | arch, |
| 382 | nng.name, |
| 383 | nng.cycles, |
| 384 | nng.macs, |
| 385 | nng.bandwidths, |
| 386 | nng.batch_size, |
| 387 | nng.memory_used, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 388 | cpu_operations, |
Tim Hall | 837c31c | 2021-11-24 15:39:46 +0000 | [diff] [blame] | 389 | npu_operations, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 390 | show_cpu_operations, |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 391 | weights_data, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 392 | f, |
| 393 | ) |