Johan Alfven | 014bc28 | 2024-01-25 12:32:13 +0100 | [diff] [blame] | 1 | # SPDX-FileCopyrightText: Copyright 2020-2022, 2024 Arm Limited and/or its affiliates <open-source-office@arm.com> |
Simon Hollis | f19fd2f | 2024-02-27 19:51:16 +0000 | [diff] [blame] | 2 | # SPDX-FileCopyrightText: (c) Meta Platforms, Inc. and affiliates. (http://www.meta.com) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 3 | # |
| 4 | # SPDX-License-Identifier: Apache-2.0 |
| 5 | # |
| 6 | # Licensed under the Apache License, Version 2.0 (the License); you may |
| 7 | # not use this file except in compliance with the License. |
| 8 | # You may obtain a copy of the License at |
| 9 | # |
| 10 | # www.apache.org/licenses/LICENSE-2.0 |
| 11 | # |
| 12 | # Unless required by applicable law or agreed to in writing, software |
| 13 | # distributed under the License is distributed on an AS IS BASIS, WITHOUT |
| 14 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | # See the License for the specific language governing permissions and |
| 16 | # limitations under the License. |
Rickard Bolin | bc6ee58 | 2022-11-04 08:24:29 +0000 | [diff] [blame] | 17 | # |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 18 | # Description: |
| 19 | # Writes out per-pass and summary performance statistics to CSV files. |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 20 | import csv |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 21 | import sys |
| 22 | |
Diego Russo | ea6111a | 2020-04-14 18:41:58 +0100 | [diff] [blame] | 23 | import numpy as np |
| 24 | |
Diego Russo | ea6111a | 2020-04-14 18:41:58 +0100 | [diff] [blame] | 25 | from .nn_graph import PassPlacement |
Diego Russo | e8a1045 | 2020-04-21 17:39:10 +0100 | [diff] [blame] | 26 | from .npu_performance import BandwidthDirection |
Diego Russo | e8a1045 | 2020-04-21 17:39:10 +0100 | [diff] [blame] | 27 | from .npu_performance import PassCycles |
Diego Russo | ea6111a | 2020-04-14 18:41:58 +0100 | [diff] [blame] | 28 | from .numeric_util import round_up_to_int |
Louis Verhaard | aee5d75 | 2020-09-30 09:01:52 +0200 | [diff] [blame] | 29 | from .operation import Op |
Diego Russo | e8a1045 | 2020-04-21 17:39:10 +0100 | [diff] [blame] | 30 | from .tensor import MemArea |
| 31 | from .tensor import TensorPurpose |
Diego Russo | ea6111a | 2020-04-14 18:41:58 +0100 | [diff] [blame] | 32 | |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 33 | |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 34 | def mem_areas_to_report(): |
| 35 | # Exclude SHRAM, as the SHRAM performance numbers only cover LUT usage |
| 36 | return [area for area in MemArea.all() if area != MemArea.Shram] |
| 37 | |
| 38 | |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 39 | def write_summary_metrics_csv(nng, summary_filename, arch): |
| 40 | with open(summary_filename, "w") as f: |
| 41 | writer = csv.writer(f) |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 42 | mem_areas = mem_areas_to_report() |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 43 | |
| 44 | labels = [ |
| 45 | "experiment", |
| 46 | "network", |
| 47 | ] |
| 48 | |
| 49 | labels += ( |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 50 | ["accelerator_configuration", "system_config", "memory_mode", "core_clock", "arena_cache_size"] |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 51 | + [area.identifier_name() + "_bandwidth" for area in mem_areas] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 52 | + ["weights_storage_area", "feature_map_storage_area"] |
| 53 | ) |
| 54 | |
| 55 | labels += [ |
| 56 | "inferences_per_second", |
| 57 | "batch_size", |
| 58 | "inference_time", |
| 59 | "passes_before_fusing", |
| 60 | "passes_after_fusing", |
| 61 | ] |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 62 | labels += [area.identifier_name() + "_memory_used" for area in mem_areas] |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 63 | labels += ["total_original_weights"] |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 64 | labels += ["total_npu_encoded_weights"] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 65 | |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 66 | for mem_area in mem_areas: |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 67 | labels += [ |
| 68 | mem_area.identifier_name() + "_feature_map_read_bytes", |
| 69 | mem_area.identifier_name() + "_feature_map_write_bytes", |
| 70 | mem_area.identifier_name() + "_weight_read_bytes", |
| 71 | mem_area.identifier_name() + "_weight_write_bytes", |
| 72 | mem_area.identifier_name() + "_total_bytes", |
| 73 | ] |
| 74 | |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 75 | labels += ["nn_macs", "nn_tops"] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 76 | |
| 77 | labels += ["cycles_" + kind.identifier_name() for kind in PassCycles.all()] |
| 78 | |
| 79 | writer.writerow(labels) |
| 80 | |
| 81 | data_items = [ |
| 82 | "default", |
| 83 | nng.name, |
| 84 | ] |
| 85 | |
| 86 | if arch: |
| 87 | data_items += ( |
Tim Hall | 1bd531d | 2020-11-01 20:59:36 +0000 | [diff] [blame] | 88 | [ |
| 89 | arch.accelerator_config.name, |
| 90 | arch.system_config, |
| 91 | arch.memory_mode, |
| 92 | arch.core_clock, |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 93 | arch.arena_cache_size / 1024, |
Tim Hall | 1bd531d | 2020-11-01 20:59:36 +0000 | [diff] [blame] | 94 | ] |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 95 | + [arch.memory_bandwidths_per_second[mem_area] / 1000.0 / 1000 / 1000 for mem_area in mem_areas] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 96 | + [ |
| 97 | arch.tensor_storage_mem_area[TensorPurpose.Weights].display_name(), |
| 98 | arch.tensor_storage_mem_area[TensorPurpose.FeatureMap].display_name(), |
| 99 | ] |
| 100 | ) |
| 101 | |
Tim Hall | 1bd531d | 2020-11-01 20:59:36 +0000 | [diff] [blame] | 102 | midpoint_inference_time = nng.cycles[PassCycles.Total] / arch.core_clock |
Michael McGeagh | b424974 | 2020-07-30 14:36:40 +0100 | [diff] [blame] | 103 | if midpoint_inference_time > 0: |
| 104 | midpoint_fps = 1 / midpoint_inference_time |
| 105 | else: |
| 106 | midpoint_fps = np.nan |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 107 | |
| 108 | n_passes = sum(len(sg.passes) for sg in nng.subgraphs) |
| 109 | n_cascaded_passes = sum(len(sg.cascaded_passes) for sg in nng.subgraphs) |
| 110 | |
| 111 | data_items += [midpoint_fps, nng.batch_size, midpoint_inference_time, n_passes, n_cascaded_passes] |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 112 | data_items += [nng.memory_used.get(mem_area, 0) / 1024.0 for mem_area in mem_areas] |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 113 | data_items += [nng.total_original_weights] |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 114 | data_items += [nng.total_npu_encoded_weights] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 115 | |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 116 | for mem_area in mem_areas: |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 117 | bws = nng.bandwidths[mem_area] |
| 118 | total_bw = np.sum(bws) |
| 119 | weight_bws = bws[TensorPurpose.Weights] |
| 120 | fm_bws = bws[TensorPurpose.FeatureMap] |
| 121 | data_items += [ |
| 122 | fm_bws[BandwidthDirection.Read], |
| 123 | fm_bws[BandwidthDirection.Write], |
| 124 | weight_bws[BandwidthDirection.Read], |
| 125 | weight_bws[BandwidthDirection.Write], |
| 126 | total_bw, |
| 127 | ] |
| 128 | |
| 129 | data_items += [ |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 130 | nng.macs, |
| 131 | nng.macs * 2 * midpoint_fps / 1e12, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 132 | ] |
| 133 | |
| 134 | data_items += [nng.cycles[kind] for kind in PassCycles.all()] |
| 135 | |
| 136 | writer.writerow(data_items) |
| 137 | |
| 138 | |
| 139 | def write_pass_metrics_csv(nng, pass_filename): |
| 140 | |
| 141 | with open(pass_filename, "w") as f: |
| 142 | writer = csv.writer(f) |
| 143 | |
| 144 | purpose_list = ( |
| 145 | ("total", (TensorPurpose.Weights, TensorPurpose.FeatureMap)), |
| 146 | ("weights", (TensorPurpose.Weights,)), |
| 147 | ("feature_map", (TensorPurpose.FeatureMap,)), |
| 148 | ) |
| 149 | |
| 150 | direction_list = ( |
| 151 | ("total", (BandwidthDirection.Read, BandwidthDirection.Write)), |
| 152 | ("read", (BandwidthDirection.Read,)), |
| 153 | ("write", (BandwidthDirection.Write,)), |
| 154 | ) |
| 155 | bandwidth_names = [] |
| 156 | bandwidth_indices = [] |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 157 | for mem_area in mem_areas_to_report(): |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 158 | for purpose, purpose_candidates in purpose_list: |
| 159 | for direction, direction_candidates in direction_list: |
Diqing Zhong | 42e833d | 2020-10-02 13:18:42 +0200 | [diff] [blame] | 160 | label = "bytes_{}_{}_{}".format(mem_area.identifier_name(), purpose, direction) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 161 | bandwidth_names.append(label) |
| 162 | bandwidth_indices.append((mem_area, purpose_candidates, direction_candidates)) |
| 163 | |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 164 | all_cycles = ( |
| 165 | PassCycles.Total, |
Diqing Zhong | 42e833d | 2020-10-02 13:18:42 +0200 | [diff] [blame] | 166 | PassCycles.Npu, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 167 | PassCycles.SramAccess, |
| 168 | PassCycles.DramAccess, |
| 169 | PassCycles.OnChipFlashAccess, |
| 170 | PassCycles.OffChipFlashAccess, |
| 171 | ) |
| 172 | writer.writerow( |
| 173 | [ |
| 174 | "name", |
| 175 | "operators", |
| 176 | "placement", |
| 177 | "streaming_strategy", |
| 178 | "block_config_height", |
| 179 | "block_config_width", |
| 180 | "block_config_input_channels", |
| 181 | "block_config_output_channels", |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 182 | ] |
| 183 | + ["cycles_" + v.identifier_name() for v in all_cycles] |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 184 | + ["nn_macs"] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 185 | + bandwidth_names |
| 186 | + ["sram_used"] |
| 187 | ) |
| 188 | |
| 189 | def write_subgraph(sg): |
| 190 | for cps in sg.cascaded_passes: |
| 191 | if cps.placement == PassPlacement.StartupInit: |
| 192 | continue # skip the dummy init pass |
| 193 | |
| 194 | for ps in cps.passes: |
Louis Verhaard | aee5d75 | 2020-09-30 09:01:52 +0200 | [diff] [blame] | 195 | if len(ps.ops) == 1 and ps.ops[0].type == Op.CustomNpuOp: |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 196 | # just treat this as a call, unroll it |
| 197 | write_subgraph(ps.ops[0].attrs["subgraph"]) |
| 198 | continue |
Louis Verhaard | aee5d75 | 2020-09-30 09:01:52 +0200 | [diff] [blame] | 199 | stats = [ps.name, " ".join(op.type.name for op in ps.ops)] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 200 | stats += [ps.placement.name] |
| 201 | stats += [cps.strategy.name] |
| 202 | stats += list(ps.block_config) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 203 | stats += [round_up_to_int(ps.cycles[v]) for v in all_cycles] |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 204 | stats += [round_up_to_int(ps.macs)] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 205 | for indices in bandwidth_indices: |
| 206 | res = 0 |
| 207 | i = indices[0] |
| 208 | for j in indices[1]: |
| 209 | for k in indices[2]: |
| 210 | res += round_up_to_int(ps.bandwidths[i, j, k]) |
| 211 | stats.append(res) |
Patrik Gustavsson | eca2e95 | 2020-05-27 09:15:11 +0200 | [diff] [blame] | 212 | try: |
| 213 | stats += [ps.sram_used] |
| 214 | except AttributeError: |
| 215 | stats += [0] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 216 | |
| 217 | writer.writerow(stats) |
| 218 | |
| 219 | write_subgraph(nng.get_root_subgraph()) |
| 220 | |
| 221 | |
| 222 | def print_performance_metrics_for_strat( |
| 223 | arch, |
| 224 | name, |
| 225 | cycles, |
| 226 | macs, |
| 227 | bandwidths, |
| 228 | batch_size, |
| 229 | memory_used, |
Michael McGeagh | 6f72526 | 2020-12-03 15:21:36 +0000 | [diff] [blame] | 230 | cpu_operations=None, |
Tim Hall | 837c31c | 2021-11-24 15:39:46 +0000 | [diff] [blame] | 231 | npu_operations=None, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 232 | show_cpu_operations=False, |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 233 | weights_data=None, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 234 | f=sys.stdout, |
| 235 | ): |
| 236 | |
Louis Verhaard | 0265f40 | 2020-09-29 13:57:21 +0200 | [diff] [blame] | 237 | orig_mem_areas_labels = [(v, v.display_name()) for v in mem_areas_to_report()] |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 238 | |
Tim Hall | 1bd531d | 2020-11-01 20:59:36 +0000 | [diff] [blame] | 239 | midpoint_inference_time = cycles[PassCycles.Total] / arch.core_clock |
Michael McGeagh | b424974 | 2020-07-30 14:36:40 +0100 | [diff] [blame] | 240 | if midpoint_inference_time > 0: |
| 241 | midpoint_fps = 1 / midpoint_inference_time |
| 242 | else: |
| 243 | midpoint_fps = np.nan |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 244 | |
| 245 | mem_area_labels = [ |
| 246 | (mem_area, label) for mem_area, label in orig_mem_areas_labels if np.sum(bandwidths[mem_area]) > 0 |
| 247 | ] |
| 248 | |
| 249 | if name: |
| 250 | print("", file=f) |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 251 | print(f"Network summary for {name}", file=f) |
| 252 | print(f"Accelerator configuration {arch.accelerator_config.name:>20}", file=f) |
| 253 | print(f"System configuration {arch.system_config:>20}", file=f) |
| 254 | print(f"Memory mode {arch.memory_mode:>20}", file=f) |
| 255 | print(f"Accelerator clock {int(arch.core_clock / 1e6):12d} MHz", file=f) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 256 | for mem_area, label in mem_area_labels: |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 257 | label += " bandwidth" |
| 258 | bandwidth = arch.memory_bandwidths_per_second[mem_area] / 1000.0 / 1000 / 1000 |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 259 | print( |
Jonas Ohlsson | d857507 | 2022-03-30 10:30:25 +0200 | [diff] [blame] | 260 | f"Design peak {label:25} {bandwidth:12.2f} GB/s", |
| 261 | file=f, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 262 | ) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 263 | print(file=f) |
| 264 | for mem_area, label in mem_area_labels: |
Diego Russo | ea6111a | 2020-04-14 18:41:58 +0100 | [diff] [blame] | 265 | if mem_area not in memory_used: |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 266 | continue |
| 267 | |
| 268 | aug_label = label + " used" |
| 269 | |
Diqing Zhong | db5124c | 2021-01-11 12:52:48 +0100 | [diff] [blame] | 270 | print(f"Total {aug_label:25} {memory_used[mem_area] / 1024.0:12.2f} KiB", file=f) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 271 | |
| 272 | print(file=f) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 273 | |
Michael McGeagh | 6f72526 | 2020-12-03 15:21:36 +0000 | [diff] [blame] | 274 | if cpu_operations is None: |
| 275 | cpu_operations = [] |
Tim Hall | 837c31c | 2021-11-24 15:39:46 +0000 | [diff] [blame] | 276 | if npu_operations is None: |
| 277 | npu_operations = [] |
Michael McGeagh | 6f72526 | 2020-12-03 15:21:36 +0000 | [diff] [blame] | 278 | |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 279 | n_cpu_operations = len(cpu_operations) |
Tim Hall | 837c31c | 2021-11-24 15:39:46 +0000 | [diff] [blame] | 280 | n_npu_operations = len(npu_operations) |
Tim Hall | 1bbd06b | 2022-08-25 13:38:50 +0100 | [diff] [blame] | 281 | n_total_operations = max(n_cpu_operations + n_npu_operations, 1) # avoid potential divide by zero |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 282 | |
Tim Hall | 837c31c | 2021-11-24 15:39:46 +0000 | [diff] [blame] | 283 | def format_tens_list(lst): |
Simon Hollis | f19fd2f | 2024-02-27 19:51:16 +0000 | [diff] [blame] | 284 | return " ".join(str(list(tens.shape)) for tens in lst if tens is not None) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 285 | |
Tim Hall | 837c31c | 2021-11-24 15:39:46 +0000 | [diff] [blame] | 286 | for str_ops_type, n_ops, ops in ( |
| 287 | ("CPU", n_cpu_operations, cpu_operations), |
| 288 | ("NPU", n_npu_operations, npu_operations), |
| 289 | ): |
| 290 | print(f"{str_ops_type} operators = {n_ops:d} ({n_ops / n_total_operations:4.1%})", file=f) |
| 291 | if show_cpu_operations: |
| 292 | for op in ops: |
| 293 | print( |
| 294 | f" {str_ops_type}: {op.type} = {op.name}" |
| 295 | f" (inputs {format_tens_list(op.inputs)}, outputs {format_tens_list(op.outputs)})" |
| 296 | ) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 297 | |
Tim Hall | 837c31c | 2021-11-24 15:39:46 +0000 | [diff] [blame] | 298 | print("", file=f) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 299 | |
| 300 | for mem_area, label in mem_area_labels: |
| 301 | bws = bandwidths[mem_area] |
| 302 | total_bw = np.sum(bws) |
| 303 | weight_bws = bws[TensorPurpose.Weights] |
| 304 | fm_bws = bws[TensorPurpose.FeatureMap] |
| 305 | aug_label = label + " bandwidth" |
| 306 | print( |
Jonas Ohlsson | d857507 | 2022-03-30 10:30:25 +0200 | [diff] [blame] | 307 | f"Average {aug_label:25} {total_bw * midpoint_fps / 1000.0 / 1000.0 / 1000.0:12.2f} GB/s", |
| 308 | file=f, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 309 | ) |
| 310 | print( |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 311 | f"Input {aug_label:25} {np.sum(fm_bws[BandwidthDirection.Read]) / 1000.0 / 1000.0:12.2f} MB/batch", |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 312 | file=f, |
| 313 | ) |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 314 | print(f"Weight {aug_label:25} {np.sum(weight_bws) / 1000.0 / 1000.0:12.2f} MB/batch", file=f) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 315 | print( |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 316 | f"Output {aug_label:25} " |
| 317 | f"{np.sum(fm_bws[BandwidthDirection.Write]) / 1000.0 / 1000.0:12.2f} MB/batch", |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 318 | file=f, |
| 319 | ) |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 320 | print(f"Total {aug_label:25} {total_bw / 1000.0 / 1000.0:12.2f} MB/batch", file=f) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 321 | print( |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 322 | f"Total {aug_label:25} per input " |
| 323 | f"{total_bw / 1000.0 / 1000.0 / batch_size:9.2f} MB/inference (batch size {batch_size:d})", |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 324 | file=f, |
| 325 | ) |
| 326 | print(file=f) |
| 327 | |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 328 | if weights_data: |
| 329 | print(f"Original Weights Size {weights_data['original'] / 1024.0:12.2f} KiB", file=f) |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 330 | print(f"NPU Encoded Weights Size {weights_data['npu_encoded'] / 1024.0:12.2f} KiB", file=f) |
| 331 | print(file=f) |
Diqing Zhong | db5124c | 2021-01-11 12:52:48 +0100 | [diff] [blame] | 332 | |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 333 | print( |
Jonas Ohlsson | d857507 | 2022-03-30 10:30:25 +0200 | [diff] [blame] | 334 | f"Neural network macs {int(macs):12d} MACs/batch", |
| 335 | file=f, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 336 | ) |
| 337 | print( |
Jonas Ohlsson | d857507 | 2022-03-30 10:30:25 +0200 | [diff] [blame] | 338 | f"Network Tops/s {macs * 2 * midpoint_fps / 1e12:12.2f} Tops/s", |
| 339 | file=f, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 340 | ) |
| 341 | print(file=f) |
| 342 | |
| 343 | for kind in PassCycles.all(): |
| 344 | aug_label = kind.display_name() + " cycles" |
| 345 | cyc = cycles[kind] |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 346 | print(f"{aug_label:30} {int(cyc):12d} cycles/batch", file=f) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 347 | print(file=f) |
| 348 | |
| 349 | print( |
Diqing Zhong | 69aadd0 | 2020-12-08 13:08:48 +0100 | [diff] [blame] | 350 | f"Batch Inference time {midpoint_inference_time * 1000:7.2f} ms," |
| 351 | f" {midpoint_fps:7.2f} inferences/s (batch size {batch_size:d})", |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 352 | file=f, |
| 353 | ) |
| 354 | print(file=f) |
| 355 | |
| 356 | |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 357 | def print_performance_metrics(nng, arch, show_cpu_operations=False, verbose_weights=False, f=sys.stdout): |
Tim Hall | 837c31c | 2021-11-24 15:39:46 +0000 | [diff] [blame] | 358 | cpu_operations = [] |
| 359 | npu_operations = [] |
| 360 | ir_only_ops = ( |
| 361 | Op.Const, |
| 362 | Op.Placeholder, |
| 363 | Op.CustomNpuOp, |
| 364 | Op.SubgraphInput, |
| 365 | ) |
| 366 | |
| 367 | for sg in nng.subgraphs: |
| 368 | if sg.placement == PassPlacement.Cpu: |
Johan Alfven | 014bc28 | 2024-01-25 12:32:13 +0100 | [diff] [blame] | 369 | for op in sg.get_all_ops_from_passes(): |
Tim Hall | 837c31c | 2021-11-24 15:39:46 +0000 | [diff] [blame] | 370 | if op.type not in ir_only_ops: |
| 371 | cpu_operations.append(op) |
| 372 | elif sg.placement == PassPlacement.Npu: |
Johan Alfven | 014bc28 | 2024-01-25 12:32:13 +0100 | [diff] [blame] | 373 | for op in sg.get_all_ops_from_passes(): |
Tim Hall | 837c31c | 2021-11-24 15:39:46 +0000 | [diff] [blame] | 374 | if op.type not in ir_only_ops: |
| 375 | npu_operations.append(op) |
| 376 | |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 377 | weights_data = ( |
Ayaan Masood | b801dda | 2022-02-22 11:28:55 +0000 | [diff] [blame] | 378 | {"original": nng.total_original_weights, "npu_encoded": nng.total_npu_encoded_weights} |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 379 | if verbose_weights |
| 380 | else None |
| 381 | ) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 382 | return print_performance_metrics_for_strat( |
| 383 | arch, |
| 384 | nng.name, |
| 385 | nng.cycles, |
| 386 | nng.macs, |
| 387 | nng.bandwidths, |
| 388 | nng.batch_size, |
| 389 | nng.memory_used, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 390 | cpu_operations, |
Tim Hall | 837c31c | 2021-11-24 15:39:46 +0000 | [diff] [blame] | 391 | npu_operations, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 392 | show_cpu_operations, |
Fredrik Svedberg | f5c07c4 | 2021-04-23 14:36:42 +0200 | [diff] [blame] | 393 | weights_data, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 394 | f, |
| 395 | ) |