blob: bfc76ec9d85e0ffb94948f882ca80f59d4439f59 [file] [log] [blame]
Tim Hall79d07d22020-04-27 18:20:16 +01001# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Tim Hall79d07d22020-04-27 18:20:16 +010016# Description:
17# Main entry point for the Vela compiler.
18#
19# Provides command line interface, options parsing, and network loading. Before calling the compiler driver.
Diego Russoe8a10452020-04-21 17:39:10 +010020import argparse
21import ast
Tim Hall1bd531d2020-11-01 20:59:36 +000022import os
Diego Russoea6111a2020-04-14 18:41:58 +010023import sys
Tim Hall79d07d22020-04-27 18:20:16 +010024import time
Tim Hall79d07d22020-04-27 18:20:16 +010025
26from . import architecture_features
Diego Russoe8a10452020-04-21 17:39:10 +010027from . import compiler_driver
28from . import model_reader
29from . import scheduler
Tim Hall79d07d22020-04-27 18:20:16 +010030from . import stats_writer
31from . import tflite_writer
Tim Hall79d07d22020-04-27 18:20:16 +010032from ._version import __version__
Louis Verhaard11831ce2020-11-18 18:53:24 +010033from .api import API_VERSION
Tim Halle6ccd872020-11-09 16:46:37 +000034from .debug_database import DebugDatabase
Louis Verhaard7db78962020-05-25 15:05:26 +020035from .errors import InputFileError
Diego Russoe8a10452020-04-21 17:39:10 +010036from .nn_graph import PassPlacement
37from .nn_graph import TensorAllocator
Tim Hall79d07d22020-04-27 18:20:16 +010038from .scheduler import ParetoMetric
Michael McGeagh837dc1b2020-11-10 12:38:25 +000039from .supported_operators import SupportedOperators
Diego Russoea6111a2020-04-14 18:41:58 +010040from .tensor import MemArea
Jacob Bohlin0628a8c2020-08-28 13:25:14 +020041from .tensor import Tensor
Michael McGeagh837dc1b2020-11-10 12:38:25 +000042from .tflite_mapping import builtin_operator_map
43from .tflite_mapping import builtin_type_name
Louis Verhaard52078302020-11-18 13:35:06 +010044from ethosu.vela.architecture_features import ArchitectureFeatures
Tim Hall79d07d22020-04-27 18:20:16 +010045
46
Tim Halle6ccd872020-11-09 16:46:37 +000047def process(input_name, enable_debug_db, arch, model_reader_options, compiler_options, scheduler_options):
Tim Hall79d07d22020-04-27 18:20:16 +010048 if compiler_options.timing:
49 start = time.time()
50
Tim Halle6ccd872020-11-09 16:46:37 +000051 os.makedirs(compiler_options.output_dir, exist_ok=True)
52 output_basename = os.path.join(compiler_options.output_dir, os.path.splitext(os.path.basename(input_name))[0])
53 DebugDatabase.show_warnings = enable_debug_db
54
55 nng = model_reader.read_model(input_name, model_reader_options)
Tim Hall79d07d22020-04-27 18:20:16 +010056
57 if not nng:
Michael McGeagh7a6f8432020-12-02 15:29:22 +000058 raise InputFileError(input_name, "Input file could not be read")
Tim Hall79d07d22020-04-27 18:20:16 +010059
60 if compiler_options.verbose_operators:
61 nng.print_operators()
62
63 if compiler_options.timing:
64 stop = time.time()
65 print("Model reading took %f s" % (stop - start))
66 start = time.time()
67
68 compiler_driver.compiler_driver(nng, arch, compiler_options, scheduler_options)
69
Tim Halle6ccd872020-11-09 16:46:37 +000070 passes_csv_file = "{0}_pass-breakdown_{1}.csv".format(output_basename, arch.system_config)
Tim Hall79d07d22020-04-27 18:20:16 +010071 stats_writer.write_pass_metrics_csv(nng, passes_csv_file)
72
Tim Halle6ccd872020-11-09 16:46:37 +000073 summary_csv_file = "{0}_summary_{1}.csv".format(output_basename, arch.system_config)
Tim Hall79d07d22020-04-27 18:20:16 +010074 stats_writer.write_summary_metrics_csv(nng, summary_csv_file, arch)
75
76 stats_writer.print_performance_metrics(nng, show_cpu_operations=compiler_options.show_cpu_operations, arch=arch)
77
Tim Halle6ccd872020-11-09 16:46:37 +000078 output_filename = output_basename + "_vela.tflite"
79 if input_name.endswith(".tflite"):
80 tflite_writer.write_tflite(nng, output_filename)
81
82 if enable_debug_db:
83 debug_filename = output_basename + "_debug.xml"
84 DebugDatabase.write(debug_filename, input_name, output_filename)
Tim Hall79d07d22020-04-27 18:20:16 +010085
86 if compiler_options.timing:
87 stop = time.time()
88 print("Compiler driver took %f s" % (stop - start))
89
90 return nng
91
92
93def print_subgraph_io_summary(nng):
94 """Print a summary of all the input and output tensor sizes for all subgraphs.
95 Also displays the total tensor size and the memory used area for sram.
96 """
97
98 print("Subgraph IO Summary")
99 print("-------------------")
100 print("NNG: {0}".format(nng.name))
101 max_sg_size = 0
102 for sg in reversed(nng.subgraphs):
103 print(" Subgraph: {0} = {1}".format(sg.name, sg.placement))
104 sg_size = 0
105
106 if sg.placement == PassPlacement.Npu:
107 for tens in sg.input_tensors + [sg.scratch_tensor] + sg.output_tensors:
108 if tens in sg.input_tensors:
109 tens_dir = "In"
110 elif tens in sg.output_tensors:
111 tens_dir = "Out"
112 else:
113 tens_dir = "In/Out"
114
115 size = tens.elements() * tens.element_size() / 1024.0
116 sg_size = sg_size + size
117 print(" Tensor [{0}]: {1} = {2} KiB".format(tens_dir, tens.name, size))
118
119 print(" Total Size = {0} KiB".format(sg_size))
120 print(" SRAM Memory Used = {0} KiB".format(sg.memory_used.get(MemArea.Sram, 0) / 1024.0))
121 max_sg_size = max(sg_size, max_sg_size)
122
123 print(" Maximum Subgraph Size = {0} KiB".format(max_sg_size))
124
125
Michael McGeagh837dc1b2020-11-10 12:38:25 +0000126def generate_supported_ops():
127 lines = [
128 "# Supported Ops",
129 "",
130 "This file was automatically generated by Vela using the `--supported-ops-report` parameter. ",
131 f"Vela version: `{__version__}`",
132 "",
Michael McGeagh54a61112020-11-24 14:58:51 +0000133 "This file complies with",
134 "[**Gitiles Markdown syntax**](https://github.com/google/gitiles/blob/master/Documentation/markdown.md)",
Michael McGeagh837dc1b2020-11-10 12:38:25 +0000135 "",
136 "## Summary Table",
137 "",
138 "The table below contains TFLite operators that can be placed on the Ethos-U NPU. ",
139 "If the constraints are not met, then that operator will be scheduled on the CPU instead. ",
140 "For any other TFLite operator not listed, will be left untouched and scheduled on the CPU. ",
141 "Please check the supported operator list for your chosen runtime for further information.",
142 "",
143 "| Operator | Constraints |",
Michael McGeagh54a61112020-11-24 14:58:51 +0000144 "| --- | --- |",
Michael McGeagh837dc1b2020-11-10 12:38:25 +0000145 ]
146 supported = SupportedOperators()
147 op_constraint_links = []
148 op_list = sorted(((op, builtin_type_name(op)) for op in builtin_operator_map), key=lambda x: x[1])
149 for op, name in op_list:
150 internal_op = builtin_operator_map[op][0]
151 if internal_op in SupportedOperators.supported_operators:
152 links = "[Generic](#generic-constraints)"
153 if internal_op in supported.specific_constraints:
154 links += f", [Specific](#{name.lower()}-constraints)"
155 op_constraint_links.append((internal_op, name))
156 lines.append(f"| {name} | {links} |")
157 lines += [
158 "",
159 "## Generic Constraints",
160 "",
161 "This is a list of constraints that all NPU operators must satisfy in order to be scheduled on the NPU.",
162 "",
163 ]
164 for constraint in supported.generic_constraints:
165 # Markdown needs two spaces at the end of a line to render it as a separate line
166 reason = constraint.__doc__.replace("\n", " \n")
167 lines.append(f"- {reason}")
168 for op, name in op_constraint_links:
169 lines += [
170 "",
171 f"## {name} Constraints",
172 "",
173 f"This is a list of constraints that the {name} operator must satisfy in order to be scheduled on the NPU.",
174 "",
175 ]
176 for constraint in supported.specific_constraints[op]:
177 # Markdown needs two spaces at the end of a line to render it as a separate line
178 reason = constraint.__doc__.replace("\n", " \n")
179 lines.append(f"- {reason}")
180
181 # Note. this will generate the file in the CWD
182 filepath = os.path.join(os.getcwd(), "SUPPORTED_OPS.md")
183 with open(filepath, "wt") as md:
184 md.writelines(line + "\n" for line in lines)
185 print(f"Report file: {filepath}")
186
187
Tim Hall79d07d22020-04-27 18:20:16 +0100188def main(args=None):
189 if args is None:
190 args = sys.argv[1:]
191
Tim Halld13bd062020-11-12 23:29:25 +0000192 parser = argparse.ArgumentParser(prog="vela", description="Neural network model compiler for Arm Ethos-U NPUs")
Michael McGeagh837dc1b2020-11-10 12:38:25 +0000193 parser.add_argument("--version", action="version", version=__version__)
Tim Hall79d07d22020-04-27 18:20:16 +0100194 parser.add_argument(
Louis Verhaard11831ce2020-11-18 18:53:24 +0100195 "--api-version", action="version", version=API_VERSION, help="Displays the version of the external API."
Patrik Gustavssonc8a22f12020-11-18 17:05:50 +0100196 )
197 parser.add_argument(
Michael McGeagh837dc1b2020-11-10 12:38:25 +0000198 "--supported-ops-report",
199 action="store_true",
Tim Hall1bd531d2020-11-01 20:59:36 +0000200 help="Generate the SUPPORTED_OPS.md file in the current working directory and exit",
Tim Hall79d07d22020-04-27 18:20:16 +0100201 )
202
Tim Hall1bd531d2020-11-01 20:59:36 +0000203 # set network nargs to be optional to allow the support-ops-report CLI option to be used standalone
Michael McGeagh837dc1b2020-11-10 12:38:25 +0000204 parser.add_argument(
Tim Halld13bd062020-11-12 23:29:25 +0000205 "network",
206 metavar="NETWORK",
207 type=str,
208 default=None,
209 nargs="?",
210 help="Filename of the input TensorFlow Lite for Microcontrollers network",
Michael McGeagh837dc1b2020-11-10 12:38:25 +0000211 )
Tim Hall79d07d22020-04-27 18:20:16 +0100212 parser.add_argument(
213 "--output-dir", type=str, default="output", help="Output directory to write files to (default: %(default)s)"
214 )
Tim Halle6ccd872020-11-09 16:46:37 +0000215 parser.add_argument(
216 "--enable-debug-db",
217 action="store_true",
218 default=None,
219 help="Enables the calculation and writing of a network debug database to output directory",
220 )
Tim Hall1bd531d2020-11-01 20:59:36 +0000221 parser.add_argument(
222 "--config", type=str, action="append", help="Vela configuration file(s) in Python ConfigParser .ini file format"
223 )
Michael McGeagh2fa40ae2020-12-02 10:55:04 +0000224 parser.add_argument("--verbose-all", action="store_true", help="Enable all verbose options")
Tim Hall1bd531d2020-11-01 20:59:36 +0000225 parser.add_argument("--verbose-config", action="store_true", help="Verbose system configuration and memory mode")
Tim Hall79d07d22020-04-27 18:20:16 +0100226 parser.add_argument("--verbose-graph", action="store_true", help="Verbose graph rewriter")
227 parser.add_argument("--verbose-quantization", action="store_true", help="Verbose quantization")
228 parser.add_argument("--verbose-packing", action="store_true", help="Verbose pass packing")
229 parser.add_argument("--verbose-tensor-purpose", action="store_true", help="Verbose tensor purpose")
230 parser.add_argument("--verbose-tensor-format", action="store_true", help="Verbose tensor format")
231 parser.add_argument("--verbose-schedule", action="store_true", help="Verbose schedule")
232 parser.add_argument(
233 "--verbose-pareto-frontier-schedules",
234 action="store_true",
235 help="Show all schedules along the pareto frontier of optimisation criteria",
236 )
237 parser.add_argument("--verbose-allocation", action="store_true", help="Verbose tensor allocation")
238 parser.add_argument(
239 "--verbose-high-level-command-stream", action="store_true", help="Verbose high level command stream"
240 )
241 parser.add_argument(
242 "--verbose-register-command-stream", action="store_true", help="Verbose register command stream"
243 )
244 parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list")
Tim Hall79d07d22020-04-27 18:20:16 +0100245 parser.add_argument(
246 "--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU"
247 )
248 parser.add_argument(
Tim Hall14e8a202020-11-27 12:23:42 +0000249 "--cache-bias-scale-tensor",
250 type=ast.literal_eval,
251 default=True,
252 choices=[True, False],
253 help="Controls the caching of the bias & scale tensors in SRAM (default: %(default)s)",
Andreas Nevalainen897cc142020-10-28 15:42:08 +0100254 )
255 parser.add_argument(
Tim Hall79d07d22020-04-27 18:20:16 +0100256 "--cascading",
257 type=ast.literal_eval,
258 default=True,
259 choices=[True, False],
260 help="Controls the packing of multiple passes into a cascade (default: %(default)s)",
261 )
Tim Hall79d07d22020-04-27 18:20:16 +0100262 parser.add_argument("--force-block-config", type=str, default="", help="Force a specific block configuration HxWxC")
Tim Hall79d07d22020-04-27 18:20:16 +0100263 parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations")
264 parser.add_argument(
265 "--accelerator-config",
266 type=str,
267 default="ethos-u55-256",
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100268 choices=list(architecture_features.Accelerator.member_list()),
Tim Hall79d07d22020-04-27 18:20:16 +0100269 help="Accelerator configuration to use (default: %(default)s)",
270 )
271 parser.add_argument(
272 "--system-config",
273 type=str,
Tim Hall1bd531d2020-11-01 20:59:36 +0000274 default=architecture_features.ArchitectureFeatures.DEFAULT_CONFIG,
275 help="System configuration to select from the Vela configuration file (default: %(default)s)",
276 )
277 parser.add_argument(
278 "--memory-mode",
279 type=str,
280 default=architecture_features.ArchitectureFeatures.DEFAULT_CONFIG,
281 help="Memory mode to select from the Vela configuration file (default: %(default)s)",
Tim Hall79d07d22020-04-27 18:20:16 +0100282 )
283 parser.add_argument(
Tim Hall79d07d22020-04-27 18:20:16 +0100284 "--tensor-allocator",
Louis Verhaard9bfe0f82020-12-03 12:26:25 +0100285 default=TensorAllocator.Search,
Tim Hall79d07d22020-04-27 18:20:16 +0100286 type=lambda s: TensorAllocator[s],
287 choices=list(TensorAllocator),
288 help="Tensor Allocator algorithm (default: %(default)s)",
289 )
290 parser.add_argument(
291 "--show-subgraph-io-summary",
292 action="store_true",
293 help="Shows a summary of all the subgraphs and their inputs and outputs",
294 )
295 parser.add_argument(
296 "--ifm-streaming",
297 type=ast.literal_eval,
298 default=True,
299 choices=[True, False],
300 help="Controls scheduler IFM streaming search (default: %(default)s)",
301 )
302 parser.add_argument(
303 "--block-config-limit",
304 type=int,
305 default=16,
306 help="Limit block config search space, use zero for unlimited (default: %(default)s)",
307 )
308 parser.add_argument(
Tim Hall79d07d22020-04-27 18:20:16 +0100309 "--pareto-metric",
310 default=ParetoMetric.BwCycMem,
311 type=lambda s: ParetoMetric[s],
312 choices=list(ParetoMetric),
313 help="Controls the calculation of the pareto metric (default: %(default)s)",
314 )
315 parser.add_argument(
316 "--recursion-limit",
317 type=int,
318 default=10000,
319 help="Set the recursion depth limit, may result in RecursionError if too low (default: %(default)s)",
320 )
321 parser.add_argument(
322 "--max-block-dependency",
323 type=int,
324 default=architecture_features.ArchitectureFeatures.MAX_BLOCKDEP,
325 choices=range(0, architecture_features.ArchitectureFeatures.MAX_BLOCKDEP + 1),
326 help=(
Tim Hallb9b515c2020-11-01 21:27:19 +0000327 "Set the maximum value that can be used for the block dependency between npu kernel operations"
328 " (default: %(default)s)"
Tim Hall79d07d22020-04-27 18:20:16 +0100329 ),
330 )
Charles Xu7b8823f2020-05-29 13:53:10 +0200331 parser.add_argument(
332 "--nhcwb16-between-cascaded-passes",
333 type=ast.literal_eval,
334 default=True,
335 choices=[True, False],
336 help="Control if NHCWB16 or NHWC should be used in between cascaded passes (default: %(default)s)",
337 )
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200338 parser.add_argument(
Patrik Gustavsson90831bc2020-08-24 16:26:11 +0200339 "--weight-estimation-scaling",
340 type=float,
341 default=1.0,
342 help=("Performs an additional scaling of weight compression scale estimate (default: %(default)s)"),
343 )
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200344 parser.add_argument(
Tim Hallb9b515c2020-11-01 21:27:19 +0000345 "--cpu-tensor-alignment",
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200346 type=int,
347 default=Tensor.AllocationQuantum,
Tim Hallb9b515c2020-11-01 21:27:19 +0000348 help=(
349 "Controls the allocation byte alignment of cpu tensors including Ethos-U Custom operator inputs and outputs"
350 " (default: %(default)s)"
351 ),
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200352 )
Tim Hall79d07d22020-04-27 18:20:16 +0100353 args = parser.parse_args(args=args)
354
Michael McGeagh837dc1b2020-11-10 12:38:25 +0000355 # Generate the supported ops report and exit
356 if args.supported_ops_report:
357 generate_supported_ops()
358 return 0
359
Tim Hall79d07d22020-04-27 18:20:16 +0100360 if args.network is None:
361 parser.error("the following argument is required: NETWORK")
362
Tim Hall1bd531d2020-11-01 20:59:36 +0000363 # check all config files exist because they will be read as a group
364 if args.config is not None:
365 for filename in args.config:
366 if not os.access(filename, os.R_OK):
Michael McGeagh7a6f8432020-12-02 15:29:22 +0000367 raise InputFileError(filename, "File not found or is not readable")
Tim Hall1bd531d2020-11-01 20:59:36 +0000368
Tim Hall79d07d22020-04-27 18:20:16 +0100369 sys.setrecursionlimit(args.recursion_limit)
370
371 if args.force_block_config:
372 force_block_config = architecture_features.Block.from_string(args.force_block_config)
373 else:
374 force_block_config = None
375
Tim Hallb9b515c2020-11-01 21:27:19 +0000376 if args.cpu_tensor_alignment < 16 or args.cpu_tensor_alignment & (args.cpu_tensor_alignment - 1) != 0:
377 parser.error(
378 "Invalid argument to --cpu-tensor-alignment = {} (must be greater than or equal to 16 and a power of 2)"
379 "".format(args.cpu_tensor_alignment)
380 )
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200381
Louis Verhaard52078302020-11-18 13:35:06 +0100382 if args.system_config == ArchitectureFeatures.DEFAULT_CONFIG:
383 print(f"Warning: Using {ArchitectureFeatures.DEFAULT_CONFIG} values for system configuration")
384
385 if args.memory_mode == ArchitectureFeatures.DEFAULT_CONFIG:
386 print(f"Warning: Using {ArchitectureFeatures.DEFAULT_CONFIG} values for memory mode")
387
Michael McGeagh2fa40ae2020-12-02 10:55:04 +0000388 if args.verbose_all:
389 for v in vars(args):
390 if v.startswith("verbose") and v != "verbose_all":
391 setattr(args, v, True)
392
Tim Hall79d07d22020-04-27 18:20:16 +0100393 arch = architecture_features.ArchitectureFeatures(
Tim Hall1bd531d2020-11-01 20:59:36 +0000394 vela_config_files=args.config,
Tim Hall79d07d22020-04-27 18:20:16 +0100395 system_config=args.system_config,
Tim Hall1bd531d2020-11-01 20:59:36 +0000396 memory_mode=args.memory_mode,
Tim Hall79d07d22020-04-27 18:20:16 +0100397 accelerator_config=args.accelerator_config,
Tim Hall79d07d22020-04-27 18:20:16 +0100398 override_block_config=force_block_config,
399 block_config_limit=args.block_config_limit,
Tim Hall79d07d22020-04-27 18:20:16 +0100400 max_blockdep=args.max_block_dependency,
Patrik Gustavsson90831bc2020-08-24 16:26:11 +0200401 weight_estimation_scaling=args.weight_estimation_scaling,
Tim Hall1bd531d2020-11-01 20:59:36 +0000402 verbose_config=args.verbose_config,
Tim Hall79d07d22020-04-27 18:20:16 +0100403 )
404
405 compiler_options = compiler_driver.CompilerOptions(
406 verbose_graph=args.verbose_graph,
407 verbose_quantization=args.verbose_quantization,
408 verbose_packing=args.verbose_packing,
409 verbose_tensor_purpose=args.verbose_tensor_purpose,
410 verbose_tensor_format=args.verbose_tensor_format,
411 verbose_allocation=args.verbose_allocation,
412 verbose_high_level_command_stream=args.verbose_high_level_command_stream,
413 verbose_register_command_stream=args.verbose_register_command_stream,
414 verbose_operators=args.verbose_operators,
Tim Hall79d07d22020-04-27 18:20:16 +0100415 show_cpu_operations=args.show_cpu_operations,
416 tensor_allocator=args.tensor_allocator,
417 timing=args.timing,
418 output_dir=args.output_dir,
Tim Hallb9b515c2020-11-01 21:27:19 +0000419 cpu_tensor_alignment=args.cpu_tensor_alignment,
Tim Hall79d07d22020-04-27 18:20:16 +0100420 )
421
422 scheduler_options = scheduler.SchedulerOptions(
423 use_cascading=args.cascading,
Tim Hall79d07d22020-04-27 18:20:16 +0100424 verbose_schedule=args.verbose_schedule,
425 verbose_pareto_frontier_schedules=args.verbose_pareto_frontier_schedules,
426 use_ifm_streaming=args.ifm_streaming,
427 pareto_metric=args.pareto_metric,
Charles Xu7b8823f2020-05-29 13:53:10 +0200428 use_nhcwb16_between_cascaded_passes=args.nhcwb16_between_cascaded_passes,
Tim Hall14e8a202020-11-27 12:23:42 +0000429 cache_bias_scale_tensor=args.cache_bias_scale_tensor,
Tim Hall79d07d22020-04-27 18:20:16 +0100430 )
431
Tim Hall284223e2020-06-09 13:17:21 +0100432 model_reader_options = model_reader.ModelReaderOptions()
Tim Hall79d07d22020-04-27 18:20:16 +0100433
Tim Halle6ccd872020-11-09 16:46:37 +0000434 nng = process(args.network, args.enable_debug_db, arch, model_reader_options, compiler_options, scheduler_options)
Tim Hall79d07d22020-04-27 18:20:16 +0100435
436 if args.show_subgraph_io_summary:
437 print_subgraph_io_summary(nng)
438
439 return 0