Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 1 | # Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved. |
| 2 | # |
| 3 | # SPDX-License-Identifier: Apache-2.0 |
| 4 | # |
| 5 | # Licensed under the Apache License, Version 2.0 (the License); you may |
| 6 | # not use this file except in compliance with the License. |
| 7 | # You may obtain a copy of the License at |
| 8 | # |
| 9 | # www.apache.org/licenses/LICENSE-2.0 |
| 10 | # |
| 11 | # Unless required by applicable law or agreed to in writing, software |
| 12 | # distributed under the License is distributed on an AS IS BASIS, WITHOUT |
| 13 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | # See the License for the specific language governing permissions and |
| 15 | # limitations under the License. |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 16 | # Description: |
| 17 | # Main entry point for the Vela compiler. |
| 18 | # |
| 19 | # Provides command line interface, options parsing, and network loading. Before calling the compiler driver. |
Diego Russo | e8a1045 | 2020-04-21 17:39:10 +0100 | [diff] [blame] | 20 | import argparse |
| 21 | import ast |
| 22 | import configparser |
Diego Russo | ea6111a | 2020-04-14 18:41:58 +0100 | [diff] [blame] | 23 | import os.path |
| 24 | import sys |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 25 | import time |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 26 | |
| 27 | from . import architecture_features |
Diego Russo | e8a1045 | 2020-04-21 17:39:10 +0100 | [diff] [blame] | 28 | from . import compiler_driver |
| 29 | from . import model_reader |
| 30 | from . import scheduler |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 31 | from . import stats_writer |
| 32 | from . import tflite_writer |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 33 | from ._version import __version__ |
Louis Verhaard | 7db7896 | 2020-05-25 15:05:26 +0200 | [diff] [blame] | 34 | from .errors import InputFileError |
Diego Russo | e8a1045 | 2020-04-21 17:39:10 +0100 | [diff] [blame] | 35 | from .nn_graph import PassPlacement |
| 36 | from .nn_graph import TensorAllocator |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 37 | from .scheduler import ParetoMetric |
Diego Russo | ea6111a | 2020-04-14 18:41:58 +0100 | [diff] [blame] | 38 | from .tensor import MemArea |
Jacob Bohlin | 0628a8c | 2020-08-28 13:25:14 +0200 | [diff] [blame] | 39 | from .tensor import Tensor |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 40 | |
| 41 | |
| 42 | def process(fname, arch, model_reader_options, compiler_options, scheduler_options): |
| 43 | if compiler_options.timing: |
| 44 | start = time.time() |
| 45 | |
| 46 | nng = model_reader.read_model(fname, model_reader_options) |
| 47 | |
| 48 | if not nng: |
Louis Verhaard | 7db7896 | 2020-05-25 15:05:26 +0200 | [diff] [blame] | 49 | raise InputFileError(fname, "input file could not be read") |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 50 | |
| 51 | if compiler_options.verbose_operators: |
| 52 | nng.print_operators() |
| 53 | |
| 54 | if compiler_options.timing: |
| 55 | stop = time.time() |
| 56 | print("Model reading took %f s" % (stop - start)) |
| 57 | start = time.time() |
| 58 | |
| 59 | compiler_driver.compiler_driver(nng, arch, compiler_options, scheduler_options) |
| 60 | |
| 61 | passes_csv_file = "%s/%s_pass-breakdown_%s.csv" % (compiler_options.output_dir, nng.name, arch.system_config) |
| 62 | stats_writer.write_pass_metrics_csv(nng, passes_csv_file) |
| 63 | |
| 64 | summary_csv_file = "%s/%s_summary_%s.csv" % (compiler_options.output_dir, nng.name, arch.system_config) |
| 65 | stats_writer.write_summary_metrics_csv(nng, summary_csv_file, arch) |
| 66 | |
| 67 | stats_writer.print_performance_metrics(nng, show_cpu_operations=compiler_options.show_cpu_operations, arch=arch) |
| 68 | |
| 69 | if fname.endswith(".tflite"): |
| 70 | tflite_writer.write_tflite(nng, "%s/%s_vela.tflite" % (compiler_options.output_dir, nng.name)) |
| 71 | |
| 72 | if compiler_options.timing: |
| 73 | stop = time.time() |
| 74 | print("Compiler driver took %f s" % (stop - start)) |
| 75 | |
| 76 | return nng |
| 77 | |
| 78 | |
| 79 | def print_subgraph_io_summary(nng): |
| 80 | """Print a summary of all the input and output tensor sizes for all subgraphs. |
| 81 | Also displays the total tensor size and the memory used area for sram. |
| 82 | """ |
| 83 | |
| 84 | print("Subgraph IO Summary") |
| 85 | print("-------------------") |
| 86 | print("NNG: {0}".format(nng.name)) |
| 87 | max_sg_size = 0 |
| 88 | for sg in reversed(nng.subgraphs): |
| 89 | print(" Subgraph: {0} = {1}".format(sg.name, sg.placement)) |
| 90 | sg_size = 0 |
| 91 | |
| 92 | if sg.placement == PassPlacement.Npu: |
| 93 | for tens in sg.input_tensors + [sg.scratch_tensor] + sg.output_tensors: |
| 94 | if tens in sg.input_tensors: |
| 95 | tens_dir = "In" |
| 96 | elif tens in sg.output_tensors: |
| 97 | tens_dir = "Out" |
| 98 | else: |
| 99 | tens_dir = "In/Out" |
| 100 | |
| 101 | size = tens.elements() * tens.element_size() / 1024.0 |
| 102 | sg_size = sg_size + size |
| 103 | print(" Tensor [{0}]: {1} = {2} KiB".format(tens_dir, tens.name, size)) |
| 104 | |
| 105 | print(" Total Size = {0} KiB".format(sg_size)) |
| 106 | print(" SRAM Memory Used = {0} KiB".format(sg.memory_used.get(MemArea.Sram, 0) / 1024.0)) |
| 107 | max_sg_size = max(sg_size, max_sg_size) |
| 108 | |
| 109 | print(" Maximum Subgraph Size = {0} KiB".format(max_sg_size)) |
| 110 | |
| 111 | |
| 112 | def main(args=None): |
| 113 | if args is None: |
| 114 | args = sys.argv[1:] |
| 115 | |
| 116 | parser = argparse.ArgumentParser(prog="vela", description="Neural network model compiler for Ethos-U55") |
| 117 | |
| 118 | parser.add_argument( |
| 119 | "network", metavar="NETWORK", type=str, default=None, nargs=None, help="Filename of network to process" |
| 120 | ) |
| 121 | |
| 122 | parser.add_argument("--version", action="version", version=__version__) |
| 123 | parser.add_argument( |
| 124 | "--output-dir", type=str, default="output", help="Output directory to write files to (default: %(default)s)" |
| 125 | ) |
| 126 | parser.add_argument("--config", type=str, help="Location of vela configuration file") |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 127 | |
| 128 | parser.add_argument("--verbose-graph", action="store_true", help="Verbose graph rewriter") |
| 129 | parser.add_argument("--verbose-quantization", action="store_true", help="Verbose quantization") |
| 130 | parser.add_argument("--verbose-packing", action="store_true", help="Verbose pass packing") |
| 131 | parser.add_argument("--verbose-tensor-purpose", action="store_true", help="Verbose tensor purpose") |
| 132 | parser.add_argument("--verbose-tensor-format", action="store_true", help="Verbose tensor format") |
| 133 | parser.add_argument("--verbose-schedule", action="store_true", help="Verbose schedule") |
| 134 | parser.add_argument( |
| 135 | "--verbose-pareto-frontier-schedules", |
| 136 | action="store_true", |
| 137 | help="Show all schedules along the pareto frontier of optimisation criteria", |
| 138 | ) |
| 139 | parser.add_argument("--verbose-allocation", action="store_true", help="Verbose tensor allocation") |
| 140 | parser.add_argument( |
| 141 | "--verbose-high-level-command-stream", action="store_true", help="Verbose high level command stream" |
| 142 | ) |
| 143 | parser.add_argument( |
| 144 | "--verbose-register-command-stream", action="store_true", help="Verbose register command stream" |
| 145 | ) |
| 146 | parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list") |
| 147 | |
| 148 | parser.add_argument( |
| 149 | "--show-minimum-possible-allocation", action="store_true", help="Show the minimum possible allocation" |
| 150 | ) |
| 151 | parser.add_argument( |
| 152 | "--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU" |
| 153 | ) |
| 154 | parser.add_argument( |
Andreas Nevalainen | 897cc14 | 2020-10-28 15:42:08 +0100 | [diff] [blame] | 155 | "--keep-scale-placement", action="store_true", help="Keep scale tensors memory placement during scheduling" |
| 156 | ) |
| 157 | parser.add_argument( |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 158 | "--cascading", |
| 159 | type=ast.literal_eval, |
| 160 | default=True, |
| 161 | choices=[True, False], |
| 162 | help="Controls the packing of multiple passes into a cascade (default: %(default)s)", |
| 163 | ) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 164 | parser.add_argument("--force-block-config", type=str, default="", help="Force a specific block configuration HxWxC") |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 165 | parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations") |
| 166 | parser.add_argument( |
| 167 | "--accelerator-config", |
| 168 | type=str, |
| 169 | default="ethos-u55-256", |
Manupa Karunaratne | d83d2e1 | 2020-07-20 12:05:32 +0100 | [diff] [blame] | 170 | choices=list(architecture_features.Accelerator.member_list()), |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 171 | help="Accelerator configuration to use (default: %(default)s)", |
| 172 | ) |
| 173 | parser.add_argument( |
| 174 | "--system-config", |
| 175 | type=str, |
| 176 | default="internal-default", |
| 177 | help="System configuration to use (default: %(default)s)", |
| 178 | ) |
| 179 | parser.add_argument( |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 180 | "--tensor-allocator", |
| 181 | default=TensorAllocator.Greedy, |
| 182 | type=lambda s: TensorAllocator[s], |
| 183 | choices=list(TensorAllocator), |
| 184 | help="Tensor Allocator algorithm (default: %(default)s)", |
| 185 | ) |
| 186 | parser.add_argument( |
| 187 | "--show-subgraph-io-summary", |
| 188 | action="store_true", |
| 189 | help="Shows a summary of all the subgraphs and their inputs and outputs", |
| 190 | ) |
| 191 | parser.add_argument( |
| 192 | "--ifm-streaming", |
| 193 | type=ast.literal_eval, |
| 194 | default=True, |
| 195 | choices=[True, False], |
| 196 | help="Controls scheduler IFM streaming search (default: %(default)s)", |
| 197 | ) |
| 198 | parser.add_argument( |
| 199 | "--block-config-limit", |
| 200 | type=int, |
| 201 | default=16, |
| 202 | help="Limit block config search space, use zero for unlimited (default: %(default)s)", |
| 203 | ) |
| 204 | parser.add_argument( |
| 205 | "--global-memory-clock-scale", |
| 206 | type=float, |
| 207 | default=1.0, |
| 208 | help=( |
| 209 | "Performs an additional scaling of the individual memory clock scales specified by the system config " |
| 210 | "(default: %(default)s)" |
| 211 | ), |
| 212 | ) |
| 213 | parser.add_argument( |
| 214 | "--pareto-metric", |
| 215 | default=ParetoMetric.BwCycMem, |
| 216 | type=lambda s: ParetoMetric[s], |
| 217 | choices=list(ParetoMetric), |
| 218 | help="Controls the calculation of the pareto metric (default: %(default)s)", |
| 219 | ) |
| 220 | parser.add_argument( |
| 221 | "--recursion-limit", |
| 222 | type=int, |
| 223 | default=10000, |
| 224 | help="Set the recursion depth limit, may result in RecursionError if too low (default: %(default)s)", |
| 225 | ) |
| 226 | parser.add_argument( |
| 227 | "--max-block-dependency", |
| 228 | type=int, |
| 229 | default=architecture_features.ArchitectureFeatures.MAX_BLOCKDEP, |
| 230 | choices=range(0, architecture_features.ArchitectureFeatures.MAX_BLOCKDEP + 1), |
| 231 | help=( |
| 232 | "Set the maximum value that can be used for the block dependency between npu kernel operations " |
| 233 | "(default: %(default)s)" |
| 234 | ), |
| 235 | ) |
Charles Xu | 7b8823f | 2020-05-29 13:53:10 +0200 | [diff] [blame] | 236 | parser.add_argument( |
| 237 | "--nhcwb16-between-cascaded-passes", |
| 238 | type=ast.literal_eval, |
| 239 | default=True, |
| 240 | choices=[True, False], |
| 241 | help="Control if NHCWB16 or NHWC should be used in between cascaded passes (default: %(default)s)", |
| 242 | ) |
Fredrik Svedberg | a0c3624 | 2020-06-03 15:43:31 +0200 | [diff] [blame] | 243 | parser.add_argument( |
Patrik Gustavsson | 90831bc | 2020-08-24 16:26:11 +0200 | [diff] [blame] | 244 | "--weight-estimation-scaling", |
| 245 | type=float, |
| 246 | default=1.0, |
| 247 | help=("Performs an additional scaling of weight compression scale estimate (default: %(default)s)"), |
| 248 | ) |
Jacob Bohlin | 0628a8c | 2020-08-28 13:25:14 +0200 | [diff] [blame] | 249 | parser.add_argument( |
| 250 | "--allocation-alignment", |
| 251 | type=int, |
| 252 | default=Tensor.AllocationQuantum, |
| 253 | help=("Controls the allocation byte alignment of cpu tensors (default: %(default)s)"), |
| 254 | ) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 255 | args = parser.parse_args(args=args) |
| 256 | |
| 257 | # Read configuration file |
| 258 | config_file = args.config |
| 259 | config = None |
| 260 | if config_file is not None: |
| 261 | with open(config_file) as f: |
| 262 | config = configparser.ConfigParser() |
| 263 | config.read_file(f) |
| 264 | |
| 265 | if args.network is None: |
| 266 | parser.error("the following argument is required: NETWORK") |
| 267 | |
| 268 | sys.setrecursionlimit(args.recursion_limit) |
| 269 | |
| 270 | if args.force_block_config: |
| 271 | force_block_config = architecture_features.Block.from_string(args.force_block_config) |
| 272 | else: |
| 273 | force_block_config = None |
| 274 | |
Jacob Bohlin | 0628a8c | 2020-08-28 13:25:14 +0200 | [diff] [blame] | 275 | alignment = args.allocation_alignment |
| 276 | if alignment < 16: |
| 277 | parser.error("the following argument needs to be greater or equal to 16: ALLOCATION_ALIGNMENT") |
| 278 | if alignment & (alignment - 1) != 0: |
| 279 | parser.error("the following argument needs to be a power of 2: ALLOCATION_ALIGNMENT") |
| 280 | |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 281 | arch = architecture_features.ArchitectureFeatures( |
| 282 | vela_config=config, |
| 283 | system_config=args.system_config, |
| 284 | accelerator_config=args.accelerator_config, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 285 | override_block_config=force_block_config, |
| 286 | block_config_limit=args.block_config_limit, |
| 287 | global_memory_clock_scale=args.global_memory_clock_scale, |
| 288 | max_blockdep=args.max_block_dependency, |
Patrik Gustavsson | 90831bc | 2020-08-24 16:26:11 +0200 | [diff] [blame] | 289 | weight_estimation_scaling=args.weight_estimation_scaling, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 290 | ) |
| 291 | |
| 292 | compiler_options = compiler_driver.CompilerOptions( |
| 293 | verbose_graph=args.verbose_graph, |
| 294 | verbose_quantization=args.verbose_quantization, |
| 295 | verbose_packing=args.verbose_packing, |
| 296 | verbose_tensor_purpose=args.verbose_tensor_purpose, |
| 297 | verbose_tensor_format=args.verbose_tensor_format, |
| 298 | verbose_allocation=args.verbose_allocation, |
| 299 | verbose_high_level_command_stream=args.verbose_high_level_command_stream, |
| 300 | verbose_register_command_stream=args.verbose_register_command_stream, |
| 301 | verbose_operators=args.verbose_operators, |
| 302 | show_minimum_possible_allocation=args.show_minimum_possible_allocation, |
| 303 | show_cpu_operations=args.show_cpu_operations, |
| 304 | tensor_allocator=args.tensor_allocator, |
| 305 | timing=args.timing, |
| 306 | output_dir=args.output_dir, |
Jacob Bohlin | 0628a8c | 2020-08-28 13:25:14 +0200 | [diff] [blame] | 307 | allocation_alignment=alignment, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 308 | ) |
| 309 | |
| 310 | scheduler_options = scheduler.SchedulerOptions( |
| 311 | use_cascading=args.cascading, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 312 | verbose_schedule=args.verbose_schedule, |
| 313 | verbose_pareto_frontier_schedules=args.verbose_pareto_frontier_schedules, |
| 314 | use_ifm_streaming=args.ifm_streaming, |
| 315 | pareto_metric=args.pareto_metric, |
Charles Xu | 7b8823f | 2020-05-29 13:53:10 +0200 | [diff] [blame] | 316 | use_nhcwb16_between_cascaded_passes=args.nhcwb16_between_cascaded_passes, |
Andreas Nevalainen | 897cc14 | 2020-10-28 15:42:08 +0100 | [diff] [blame] | 317 | keep_scale_placement=args.keep_scale_placement, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 318 | ) |
| 319 | |
Tim Hall | 284223e | 2020-06-09 13:17:21 +0100 | [diff] [blame] | 320 | model_reader_options = model_reader.ModelReaderOptions() |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 321 | |
| 322 | os.makedirs(args.output_dir, exist_ok=True) |
| 323 | |
| 324 | nng = process(args.network, arch, model_reader_options, compiler_options, scheduler_options) |
| 325 | |
| 326 | if args.show_subgraph_io_summary: |
| 327 | print_subgraph_io_summary(nng) |
| 328 | |
| 329 | return 0 |