erik.andersson@arm.com | 460c689 | 2021-02-24 14:38:09 +0100 | [diff] [blame] | 1 | # Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved. |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 2 | # |
| 3 | # SPDX-License-Identifier: Apache-2.0 |
| 4 | # |
| 5 | # Licensed under the Apache License, Version 2.0 (the License); you may |
| 6 | # not use this file except in compliance with the License. |
| 7 | # You may obtain a copy of the License at |
| 8 | # |
| 9 | # www.apache.org/licenses/LICENSE-2.0 |
| 10 | # |
| 11 | # Unless required by applicable law or agreed to in writing, software |
| 12 | # distributed under the License is distributed on an AS IS BASIS, WITHOUT |
| 13 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | # See the License for the specific language governing permissions and |
| 15 | # limitations under the License. |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 16 | # Description: |
| 17 | # Writes out per-pass and summary performance statistics to CSV files. |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 18 | import csv |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 19 | import sys |
| 20 | |
Diego Russo | ea6111a | 2020-04-14 18:41:58 +0100 | [diff] [blame] | 21 | import numpy as np |
| 22 | |
Diego Russo | ea6111a | 2020-04-14 18:41:58 +0100 | [diff] [blame] | 23 | from .nn_graph import PassPlacement |
Diego Russo | e8a1045 | 2020-04-21 17:39:10 +0100 | [diff] [blame] | 24 | from .npu_performance import BandwidthDirection |
Diego Russo | e8a1045 | 2020-04-21 17:39:10 +0100 | [diff] [blame] | 25 | from .npu_performance import PassCycles |
Diego Russo | ea6111a | 2020-04-14 18:41:58 +0100 | [diff] [blame] | 26 | from .numeric_util import round_up_to_int |
Louis Verhaard | aee5d75 | 2020-09-30 09:01:52 +0200 | [diff] [blame] | 27 | from .operation import Op |
Diego Russo | e8a1045 | 2020-04-21 17:39:10 +0100 | [diff] [blame] | 28 | from .tensor import MemArea |
| 29 | from .tensor import TensorPurpose |
Diego Russo | ea6111a | 2020-04-14 18:41:58 +0100 | [diff] [blame] | 30 | |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 31 | |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 32 | def mem_areas_to_report(): |
| 33 | # Exclude SHRAM, as the SHRAM performance numbers only cover LUT usage |
| 34 | return [area for area in MemArea.all() if area != MemArea.Shram] |
| 35 | |
| 36 | |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 37 | def write_summary_metrics_csv(nng, summary_filename, arch): |
| 38 | with open(summary_filename, "w") as f: |
| 39 | writer = csv.writer(f) |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 40 | mem_areas = mem_areas_to_report() |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 41 | |
| 42 | labels = [ |
| 43 | "experiment", |
| 44 | "network", |
| 45 | ] |
| 46 | |
| 47 | labels += ( |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 48 | ["accelerator_configuration", "system_config", "memory_mode", "core_clock", "arena_cache_size"] |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 49 | + [area.identifier_name() + "_bandwidth" for area in mem_areas] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 50 | + ["weights_storage_area", "feature_map_storage_area"] |
| 51 | ) |
| 52 | |
| 53 | labels += [ |
| 54 | "inferences_per_second", |
| 55 | "batch_size", |
| 56 | "inference_time", |
| 57 | "passes_before_fusing", |
| 58 | "passes_after_fusing", |
| 59 | ] |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 60 | labels += [area.identifier_name() + "_memory_used" for area in mem_areas] |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 61 | labels += ["total_original_weights"] |
| 62 | labels += ["total_npu_weights"] |
| 63 | labels += ["total_npu_encoded_weights"] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 64 | |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 65 | for mem_area in mem_areas: |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 66 | labels += [ |
| 67 | mem_area.identifier_name() + "_feature_map_read_bytes", |
| 68 | mem_area.identifier_name() + "_feature_map_write_bytes", |
| 69 | mem_area.identifier_name() + "_weight_read_bytes", |
| 70 | mem_area.identifier_name() + "_weight_write_bytes", |
| 71 | mem_area.identifier_name() + "_total_bytes", |
| 72 | ] |
| 73 | |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 74 | labels += ["nn_macs", "nn_tops"] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 75 | |
| 76 | labels += ["cycles_" + kind.identifier_name() for kind in PassCycles.all()] |
| 77 | |
| 78 | writer.writerow(labels) |
| 79 | |
| 80 | data_items = [ |
| 81 | "default", |
| 82 | nng.name, |
| 83 | ] |
| 84 | |
| 85 | if arch: |
| 86 | data_items += ( |
Tim Hall | 1bd531d | 2020-11-01 20:59:36 +0000 | [diff] [blame] | 87 | [ |
| 88 | arch.accelerator_config.name, |
| 89 | arch.system_config, |
| 90 | arch.memory_mode, |
| 91 | arch.core_clock, |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 92 | arch.arena_cache_size / 1024, |
Tim Hall | 1bd531d | 2020-11-01 20:59:36 +0000 | [diff] [blame] | 93 | ] |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 94 | + [arch.memory_bandwidths_per_second[mem_area] / 1000.0 / 1000 / 1000 for mem_area in mem_areas] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 95 | + [ |
| 96 | arch.tensor_storage_mem_area[TensorPurpose.Weights].display_name(), |
| 97 | arch.tensor_storage_mem_area[TensorPurpose.FeatureMap].display_name(), |
| 98 | ] |
| 99 | ) |
| 100 | |
Tim Hall | 1bd531d | 2020-11-01 20:59:36 +0000 | [diff] [blame] | 101 | midpoint_inference_time = nng.cycles[PassCycles.Total] / arch.core_clock |
Michael McGeagh | b424974 | 2020-07-30 14:36:40 +0100 | [diff] [blame] | 102 | if midpoint_inference_time > 0: |
| 103 | midpoint_fps = 1 / midpoint_inference_time |
| 104 | else: |
| 105 | midpoint_fps = np.nan |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 106 | |
| 107 | n_passes = sum(len(sg.passes) for sg in nng.subgraphs) |
| 108 | n_cascaded_passes = sum(len(sg.cascaded_passes) for sg in nng.subgraphs) |
| 109 | |
| 110 | data_items += [midpoint_fps, nng.batch_size, midpoint_inference_time, n_passes, n_cascaded_passes] |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 111 | data_items += [nng.memory_used.get(mem_area, 0) / 1024.0 for mem_area in mem_areas] |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 112 | data_items += [nng.total_original_weights] |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 113 | data_items += [nng.total_npu_encoded_weights] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 114 | |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 115 | for mem_area in mem_areas: |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 116 | bws = nng.bandwidths[mem_area] |
| 117 | total_bw = np.sum(bws) |
| 118 | weight_bws = bws[TensorPurpose.Weights] |
| 119 | fm_bws = bws[TensorPurpose.FeatureMap] |
| 120 | data_items += [ |
| 121 | fm_bws[BandwidthDirection.Read], |
| 122 | fm_bws[BandwidthDirection.Write], |
| 123 | weight_bws[BandwidthDirection.Read], |
| 124 | weight_bws[BandwidthDirection.Write], |
| 125 | total_bw, |
| 126 | ] |
| 127 | |
| 128 | data_items += [ |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 129 | nng.macs, |
| 130 | nng.macs * 2 * midpoint_fps / 1e12, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 131 | ] |
| 132 | |
| 133 | data_items += [nng.cycles[kind] for kind in PassCycles.all()] |
| 134 | |
| 135 | writer.writerow(data_items) |
| 136 | |
| 137 | |
| 138 | def write_pass_metrics_csv(nng, pass_filename): |
| 139 | |
| 140 | with open(pass_filename, "w") as f: |
| 141 | writer = csv.writer(f) |
| 142 | |
| 143 | purpose_list = ( |
| 144 | ("total", (TensorPurpose.Weights, TensorPurpose.FeatureMap)), |
| 145 | ("weights", (TensorPurpose.Weights,)), |
| 146 | ("feature_map", (TensorPurpose.FeatureMap,)), |
| 147 | ) |
| 148 | |
| 149 | direction_list = ( |
| 150 | ("total", (BandwidthDirection.Read, BandwidthDirection.Write)), |
| 151 | ("read", (BandwidthDirection.Read,)), |
| 152 | ("write", (BandwidthDirection.Write,)), |
| 153 | ) |
| 154 | bandwidth_names = [] |
| 155 | bandwidth_indices = [] |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 156 | for mem_area in mem_areas_to_report(): |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 157 | for purpose, purpose_candidates in purpose_list: |
| 158 | for direction, direction_candidates in direction_list: |
Diqing Zhong | 42e833d | 2020-10-02 13:18:42 +0200 | [diff] [blame] | 159 | label = "bytes_{}_{}_{}".format(mem_area.identifier_name(), purpose, direction) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 160 | bandwidth_names.append(label) |
| 161 | bandwidth_indices.append((mem_area, purpose_candidates, direction_candidates)) |
| 162 | |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 163 | all_cycles = ( |
| 164 | PassCycles.Total, |
Diqing Zhong | 42e833d | 2020-10-02 13:18:42 +0200 | [diff] [blame] | 165 | PassCycles.Npu, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 166 | PassCycles.SramAccess, |
| 167 | PassCycles.DramAccess, |
| 168 | PassCycles.OnChipFlashAccess, |
| 169 | PassCycles.OffChipFlashAccess, |
| 170 | ) |
| 171 | writer.writerow( |
| 172 | [ |
| 173 | "name", |
| 174 | "operators", |
| 175 | "placement", |
| 176 | "streaming_strategy", |
| 177 | "block_config_height", |
| 178 | "block_config_width", |
| 179 | "block_config_input_channels", |
| 180 | "block_config_output_channels", |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 181 | ] |
| 182 | + ["cycles_" + v.identifier_name() for v in all_cycles] |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 183 | + ["nn_macs"] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 184 | + bandwidth_names |
| 185 | + ["sram_used"] |
| 186 | ) |
| 187 | |
| 188 | def write_subgraph(sg): |
| 189 | for cps in sg.cascaded_passes: |
| 190 | if cps.placement == PassPlacement.StartupInit: |
| 191 | continue # skip the dummy init pass |
| 192 | |
| 193 | for ps in cps.passes: |
Louis Verhaard | aee5d75 | 2020-09-30 09:01:52 +0200 | [diff] [blame] | 194 | if len(ps.ops) == 1 and ps.ops[0].type == Op.CustomNpuOp: |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 195 | # just treat this as a call, unroll it |
| 196 | write_subgraph(ps.ops[0].attrs["subgraph"]) |
| 197 | continue |
Louis Verhaard | aee5d75 | 2020-09-30 09:01:52 +0200 | [diff] [blame] | 198 | stats = [ps.name, " ".join(op.type.name for op in ps.ops)] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 199 | stats += [ps.placement.name] |
| 200 | stats += [cps.strategy.name] |
| 201 | stats += list(ps.block_config) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 202 | stats += [round_up_to_int(ps.cycles[v]) for v in all_cycles] |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 203 | stats += [round_up_to_int(ps.macs)] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 204 | for indices in bandwidth_indices: |
| 205 | res = 0 |
| 206 | i = indices[0] |
| 207 | for j in indices[1]: |
| 208 | for k in indices[2]: |
| 209 | res += round_up_to_int(ps.bandwidths[i, j, k]) |
| 210 | stats.append(res) |
Patrik Gustavsson | eca2e95 | 2020-05-27 09:15:11 +0200 | [diff] [blame] | 211 | try: |
| 212 | stats += [ps.sram_used] |
| 213 | except AttributeError: |
| 214 | stats += [0] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 215 | |
| 216 | writer.writerow(stats) |
| 217 | |
| 218 | write_subgraph(nng.get_root_subgraph()) |
| 219 | |
| 220 | |
| 221 | def print_performance_metrics_for_strat( |
| 222 | arch, |
| 223 | name, |
| 224 | cycles, |
| 225 | macs, |
| 226 | bandwidths, |
| 227 | batch_size, |
| 228 | memory_used, |
Michael McGeagh | 6f72526 | 2020-12-03 15:21:36 +0000 | [diff] [blame] | 229 | cpu_operations=None, |
Tim Hall | 837c31c | 2021-11-24 15:39:46 +0000 | [diff] [blame] | 230 | npu_operations=None, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 231 | show_cpu_operations=False, |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 232 | weights_data=None, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 233 | f=sys.stdout, |
| 234 | ): |
| 235 | |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 236 | orig_mem_areas_labels = [(v, v.display_name()) for v in mem_areas_to_report()] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 237 | |
Tim Hall | 1bd531d | 2020-11-01 20:59:36 +0000 | [diff] [blame] | 238 | midpoint_inference_time = cycles[PassCycles.Total] / arch.core_clock |
Michael McGeagh | b424974 | 2020-07-30 14:36:40 +0100 | [diff] [blame] | 239 | if midpoint_inference_time > 0: |
| 240 | midpoint_fps = 1 / midpoint_inference_time |
| 241 | else: |
| 242 | midpoint_fps = np.nan |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 243 | |
| 244 | mem_area_labels = [ |
| 245 | (mem_area, label) for mem_area, label in orig_mem_areas_labels if np.sum(bandwidths[mem_area]) > 0 |
| 246 | ] |
| 247 | |
| 248 | if name: |
| 249 | print("", file=f) |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 250 | print(f"Network summary for {name}", file=f) |
| 251 | print(f"Accelerator configuration {arch.accelerator_config.name:>20}", file=f) |
| 252 | print(f"System configuration {arch.system_config:>20}", file=f) |
| 253 | print(f"Memory mode {arch.memory_mode:>20}", file=f) |
| 254 | print(f"Accelerator clock {int(arch.core_clock / 1e6):12d} MHz", file=f) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 255 | for mem_area, label in mem_area_labels: |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 256 | label += " bandwidth" |
| 257 | bandwidth = arch.memory_bandwidths_per_second[mem_area] / 1000.0 / 1000 / 1000 |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 258 | print( |
Jonas Ohlsson | d857507 | 2022-03-30 10:30:25 +0200 | [diff] [blame] | 259 | f"Design peak {label:25} {bandwidth:12.2f} GB/s", |
| 260 | file=f, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 261 | ) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 262 | print(file=f) |
| 263 | for mem_area, label in mem_area_labels: |
Diego Russo | ea6111a | 2020-04-14 18:41:58 +0100 | [diff] [blame] | 264 | if mem_area not in memory_used: |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 265 | continue |
| 266 | |
| 267 | aug_label = label + " used" |
| 268 | |
Diqing Zhong | db5124c | 2021-01-11 12:52:48 +0100 | [diff] [blame] | 269 | print(f"Total {aug_label:25} {memory_used[mem_area] / 1024.0:12.2f} KiB", file=f) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 270 | |
| 271 | print(file=f) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 272 | |
Michael McGeagh | 6f72526 | 2020-12-03 15:21:36 +0000 | [diff] [blame] | 273 | if cpu_operations is None: |
| 274 | cpu_operations = [] |
Tim Hall | 837c31c | 2021-11-24 15:39:46 +0000 | [diff] [blame] | 275 | if npu_operations is None: |
| 276 | npu_operations = [] |
Michael McGeagh | 6f72526 | 2020-12-03 15:21:36 +0000 | [diff] [blame] | 277 | |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 278 | n_cpu_operations = len(cpu_operations) |
Tim Hall | 837c31c | 2021-11-24 15:39:46 +0000 | [diff] [blame] | 279 | n_npu_operations = len(npu_operations) |
Tim Hall | 1bbd06b | 2022-08-25 13:38:50 +0100 | [diff] [blame] | 280 | n_total_operations = max(n_cpu_operations + n_npu_operations, 1) # avoid potential divide by zero |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 281 | |
Tim Hall | 837c31c | 2021-11-24 15:39:46 +0000 | [diff] [blame] | 282 | def format_tens_list(lst): |
| 283 | return " ".join(str(list(tens.shape)) for tens in lst) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 284 | |
Tim Hall | 837c31c | 2021-11-24 15:39:46 +0000 | [diff] [blame] | 285 | for str_ops_type, n_ops, ops in ( |
| 286 | ("CPU", n_cpu_operations, cpu_operations), |
| 287 | ("NPU", n_npu_operations, npu_operations), |
| 288 | ): |
| 289 | print(f"{str_ops_type} operators = {n_ops:d} ({n_ops / n_total_operations:4.1%})", file=f) |
| 290 | if show_cpu_operations: |
| 291 | for op in ops: |
| 292 | print( |
| 293 | f" {str_ops_type}: {op.type} = {op.name}" |
| 294 | f" (inputs {format_tens_list(op.inputs)}, outputs {format_tens_list(op.outputs)})" |
| 295 | ) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 296 | |
Tim Hall | 837c31c | 2021-11-24 15:39:46 +0000 | [diff] [blame] | 297 | print("", file=f) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 298 | |
| 299 | for mem_area, label in mem_area_labels: |
| 300 | bws = bandwidths[mem_area] |
| 301 | total_bw = np.sum(bws) |
| 302 | weight_bws = bws[TensorPurpose.Weights] |
| 303 | fm_bws = bws[TensorPurpose.FeatureMap] |
| 304 | aug_label = label + " bandwidth" |
| 305 | print( |
Jonas Ohlsson | d857507 | 2022-03-30 10:30:25 +0200 | [diff] [blame] | 306 | f"Average {aug_label:25} {total_bw * midpoint_fps / 1000.0 / 1000.0 / 1000.0:12.2f} GB/s", |
| 307 | file=f, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 308 | ) |
| 309 | print( |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 310 | f"Input {aug_label:25} {np.sum(fm_bws[BandwidthDirection.Read]) / 1000.0 / 1000.0:12.2f} MB/batch", |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 311 | file=f, |
| 312 | ) |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 313 | print(f"Weight {aug_label:25} {np.sum(weight_bws) / 1000.0 / 1000.0:12.2f} MB/batch", file=f) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 314 | print( |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 315 | f"Output {aug_label:25} " |
| 316 | f"{np.sum(fm_bws[BandwidthDirection.Write]) / 1000.0 / 1000.0:12.2f} MB/batch", |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 317 | file=f, |
| 318 | ) |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 319 | print(f"Total {aug_label:25} {total_bw / 1000.0 / 1000.0:12.2f} MB/batch", file=f) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 320 | print( |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 321 | f"Total {aug_label:25} per input " |
| 322 | f"{total_bw / 1000.0 / 1000.0 / batch_size:9.2f} MB/inference (batch size {batch_size:d})", |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 323 | file=f, |
| 324 | ) |
| 325 | print(file=f) |
| 326 | |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 327 | if weights_data: |
| 328 | print(f"Original Weights Size {weights_data['original'] / 1024.0:12.2f} KiB", file=f) |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 329 | print(f"NPU Encoded Weights Size {weights_data['npu_encoded'] / 1024.0:12.2f} KiB", file=f) |
| 330 | print(file=f) |
Diqing Zhong | db5124c | 2021-01-11 12:52:48 +0100 | [diff] [blame] | 331 | |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 332 | print( |
Jonas Ohlsson | d857507 | 2022-03-30 10:30:25 +0200 | [diff] [blame] | 333 | f"Neural network macs {int(macs):12d} MACs/batch", |
| 334 | file=f, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 335 | ) |
| 336 | print( |
Jonas Ohlsson | d857507 | 2022-03-30 10:30:25 +0200 | [diff] [blame] | 337 | f"Network Tops/s {macs * 2 * midpoint_fps / 1e12:12.2f} Tops/s", |
| 338 | file=f, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 339 | ) |
| 340 | print(file=f) |
| 341 | |
| 342 | for kind in PassCycles.all(): |
| 343 | aug_label = kind.display_name() + " cycles" |
| 344 | cyc = cycles[kind] |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 345 | print(f"{aug_label:30} {int(cyc):12d} cycles/batch", file=f) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 346 | print(file=f) |
| 347 | |
| 348 | print( |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 349 | f"Batch Inference time {midpoint_inference_time * 1000:7.2f} ms," |
| 350 | f" {midpoint_fps:7.2f} inferences/s (batch size {batch_size:d})", |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 351 | file=f, |
| 352 | ) |
| 353 | print(file=f) |
| 354 | |
| 355 | |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 356 | def print_performance_metrics(nng, arch, show_cpu_operations=False, verbose_weights=False, f=sys.stdout): |
Tim Hall | 837c31c | 2021-11-24 15:39:46 +0000 | [diff] [blame] | 357 | cpu_operations = [] |
| 358 | npu_operations = [] |
| 359 | ir_only_ops = ( |
| 360 | Op.Const, |
| 361 | Op.Placeholder, |
| 362 | Op.CustomNpuOp, |
| 363 | Op.SubgraphInput, |
| 364 | ) |
| 365 | |
| 366 | for sg in nng.subgraphs: |
| 367 | if sg.placement == PassPlacement.Cpu: |
| 368 | for op in sg.get_all_ops(): |
| 369 | if op.type not in ir_only_ops: |
| 370 | cpu_operations.append(op) |
| 371 | elif sg.placement == PassPlacement.Npu: |
| 372 | for op in sg.get_all_ops(): |
| 373 | if op.type not in ir_only_ops: |
| 374 | npu_operations.append(op) |
| 375 | |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 376 | weights_data = ( |
Ayaan Masood | b801dda | 2022-02-22 11:28:55 +0000 | [diff] [blame] | 377 | {"original": nng.total_original_weights, "npu_encoded": nng.total_npu_encoded_weights} |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 378 | if verbose_weights |
| 379 | else None |
| 380 | ) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 381 | return print_performance_metrics_for_strat( |
| 382 | arch, |
| 383 | nng.name, |
| 384 | nng.cycles, |
| 385 | nng.macs, |
| 386 | nng.bandwidths, |
| 387 | nng.batch_size, |
| 388 | nng.memory_used, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 389 | cpu_operations, |
Tim Hall | 837c31c | 2021-11-24 15:39:46 +0000 | [diff] [blame] | 390 | npu_operations, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 391 | show_cpu_operations, |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 392 | weights_data, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 393 | f, |
| 394 | ) |