blob: 6fc5c47fe13c4967e26e0f4d8d7d8cde6e3ca5e1 [file] [log] [blame]
Tim Hall79d07d22020-04-27 18:20:16 +01001# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Tim Hall79d07d22020-04-27 18:20:16 +010016# Description:
17# Main entry point for the Vela compiler.
18#
19# Provides command line interface, options parsing, and network loading. Before calling the compiler driver.
Diego Russoe8a10452020-04-21 17:39:10 +010020import argparse
21import ast
Tim Hall1bd531d2020-11-01 20:59:36 +000022import os
Diego Russoea6111a2020-04-14 18:41:58 +010023import sys
Tim Hall79d07d22020-04-27 18:20:16 +010024import time
Tim Hall79d07d22020-04-27 18:20:16 +010025
26from . import architecture_features
Diego Russoe8a10452020-04-21 17:39:10 +010027from . import compiler_driver
28from . import model_reader
29from . import scheduler
Tim Hall79d07d22020-04-27 18:20:16 +010030from . import stats_writer
31from . import tflite_writer
Tim Hall79d07d22020-04-27 18:20:16 +010032from ._version import __version__
Patrik Gustavssonc8a22f12020-11-18 17:05:50 +010033from .api import api_version
Tim Halle6ccd872020-11-09 16:46:37 +000034from .debug_database import DebugDatabase
Louis Verhaard7db78962020-05-25 15:05:26 +020035from .errors import InputFileError
Diego Russoe8a10452020-04-21 17:39:10 +010036from .nn_graph import PassPlacement
37from .nn_graph import TensorAllocator
Tim Hall79d07d22020-04-27 18:20:16 +010038from .scheduler import ParetoMetric
Michael McGeagh837dc1b2020-11-10 12:38:25 +000039from .supported_operators import SupportedOperators
Diego Russoea6111a2020-04-14 18:41:58 +010040from .tensor import MemArea
Jacob Bohlin0628a8c2020-08-28 13:25:14 +020041from .tensor import Tensor
Michael McGeagh837dc1b2020-11-10 12:38:25 +000042from .tflite_mapping import builtin_operator_map
43from .tflite_mapping import builtin_type_name
Tim Hall79d07d22020-04-27 18:20:16 +010044
45
Tim Halle6ccd872020-11-09 16:46:37 +000046def process(input_name, enable_debug_db, arch, model_reader_options, compiler_options, scheduler_options):
Tim Hall79d07d22020-04-27 18:20:16 +010047 if compiler_options.timing:
48 start = time.time()
49
Tim Halle6ccd872020-11-09 16:46:37 +000050 os.makedirs(compiler_options.output_dir, exist_ok=True)
51 output_basename = os.path.join(compiler_options.output_dir, os.path.splitext(os.path.basename(input_name))[0])
52 DebugDatabase.show_warnings = enable_debug_db
53
54 nng = model_reader.read_model(input_name, model_reader_options)
Tim Hall79d07d22020-04-27 18:20:16 +010055
56 if not nng:
Tim Halle6ccd872020-11-09 16:46:37 +000057 raise InputFileError(input_name, "input file could not be read")
Tim Hall79d07d22020-04-27 18:20:16 +010058
59 if compiler_options.verbose_operators:
60 nng.print_operators()
61
62 if compiler_options.timing:
63 stop = time.time()
64 print("Model reading took %f s" % (stop - start))
65 start = time.time()
66
67 compiler_driver.compiler_driver(nng, arch, compiler_options, scheduler_options)
68
Tim Halle6ccd872020-11-09 16:46:37 +000069 passes_csv_file = "{0}_pass-breakdown_{1}.csv".format(output_basename, arch.system_config)
Tim Hall79d07d22020-04-27 18:20:16 +010070 stats_writer.write_pass_metrics_csv(nng, passes_csv_file)
71
Tim Halle6ccd872020-11-09 16:46:37 +000072 summary_csv_file = "{0}_summary_{1}.csv".format(output_basename, arch.system_config)
Tim Hall79d07d22020-04-27 18:20:16 +010073 stats_writer.write_summary_metrics_csv(nng, summary_csv_file, arch)
74
75 stats_writer.print_performance_metrics(nng, show_cpu_operations=compiler_options.show_cpu_operations, arch=arch)
76
Tim Halle6ccd872020-11-09 16:46:37 +000077 output_filename = output_basename + "_vela.tflite"
78 if input_name.endswith(".tflite"):
79 tflite_writer.write_tflite(nng, output_filename)
80
81 if enable_debug_db:
82 debug_filename = output_basename + "_debug.xml"
83 DebugDatabase.write(debug_filename, input_name, output_filename)
Tim Hall79d07d22020-04-27 18:20:16 +010084
85 if compiler_options.timing:
86 stop = time.time()
87 print("Compiler driver took %f s" % (stop - start))
88
89 return nng
90
91
92def print_subgraph_io_summary(nng):
93 """Print a summary of all the input and output tensor sizes for all subgraphs.
94 Also displays the total tensor size and the memory used area for sram.
95 """
96
97 print("Subgraph IO Summary")
98 print("-------------------")
99 print("NNG: {0}".format(nng.name))
100 max_sg_size = 0
101 for sg in reversed(nng.subgraphs):
102 print(" Subgraph: {0} = {1}".format(sg.name, sg.placement))
103 sg_size = 0
104
105 if sg.placement == PassPlacement.Npu:
106 for tens in sg.input_tensors + [sg.scratch_tensor] + sg.output_tensors:
107 if tens in sg.input_tensors:
108 tens_dir = "In"
109 elif tens in sg.output_tensors:
110 tens_dir = "Out"
111 else:
112 tens_dir = "In/Out"
113
114 size = tens.elements() * tens.element_size() / 1024.0
115 sg_size = sg_size + size
116 print(" Tensor [{0}]: {1} = {2} KiB".format(tens_dir, tens.name, size))
117
118 print(" Total Size = {0} KiB".format(sg_size))
119 print(" SRAM Memory Used = {0} KiB".format(sg.memory_used.get(MemArea.Sram, 0) / 1024.0))
120 max_sg_size = max(sg_size, max_sg_size)
121
122 print(" Maximum Subgraph Size = {0} KiB".format(max_sg_size))
123
124
Michael McGeagh837dc1b2020-11-10 12:38:25 +0000125def generate_supported_ops():
126 lines = [
127 "# Supported Ops",
128 "",
129 "This file was automatically generated by Vela using the `--supported-ops-report` parameter. ",
130 f"Vela version: `{__version__}`",
131 "",
132 "This file complies with [**CommonMark.**](https://commonmark.org)",
133 "",
134 "## Summary Table",
135 "",
136 "The table below contains TFLite operators that can be placed on the Ethos-U NPU. ",
137 "If the constraints are not met, then that operator will be scheduled on the CPU instead. ",
138 "For any other TFLite operator not listed, will be left untouched and scheduled on the CPU. ",
139 "Please check the supported operator list for your chosen runtime for further information.",
140 "",
141 "| Operator | Constraints |",
142 "| - | - |",
143 ]
144 supported = SupportedOperators()
145 op_constraint_links = []
146 op_list = sorted(((op, builtin_type_name(op)) for op in builtin_operator_map), key=lambda x: x[1])
147 for op, name in op_list:
148 internal_op = builtin_operator_map[op][0]
149 if internal_op in SupportedOperators.supported_operators:
150 links = "[Generic](#generic-constraints)"
151 if internal_op in supported.specific_constraints:
152 links += f", [Specific](#{name.lower()}-constraints)"
153 op_constraint_links.append((internal_op, name))
154 lines.append(f"| {name} | {links} |")
155 lines += [
156 "",
157 "## Generic Constraints",
158 "",
159 "This is a list of constraints that all NPU operators must satisfy in order to be scheduled on the NPU.",
160 "",
161 ]
162 for constraint in supported.generic_constraints:
163 # Markdown needs two spaces at the end of a line to render it as a separate line
164 reason = constraint.__doc__.replace("\n", " \n")
165 lines.append(f"- {reason}")
166 for op, name in op_constraint_links:
167 lines += [
168 "",
169 f"## {name} Constraints",
170 "",
171 f"This is a list of constraints that the {name} operator must satisfy in order to be scheduled on the NPU.",
172 "",
173 ]
174 for constraint in supported.specific_constraints[op]:
175 # Markdown needs two spaces at the end of a line to render it as a separate line
176 reason = constraint.__doc__.replace("\n", " \n")
177 lines.append(f"- {reason}")
178
179 # Note. this will generate the file in the CWD
180 filepath = os.path.join(os.getcwd(), "SUPPORTED_OPS.md")
181 with open(filepath, "wt") as md:
182 md.writelines(line + "\n" for line in lines)
183 print(f"Report file: {filepath}")
184
185
Tim Hall79d07d22020-04-27 18:20:16 +0100186def main(args=None):
187 if args is None:
188 args = sys.argv[1:]
189
Tim Halld13bd062020-11-12 23:29:25 +0000190 parser = argparse.ArgumentParser(prog="vela", description="Neural network model compiler for Arm Ethos-U NPUs")
Michael McGeagh837dc1b2020-11-10 12:38:25 +0000191 parser.add_argument("--version", action="version", version=__version__)
Tim Hall79d07d22020-04-27 18:20:16 +0100192 parser.add_argument(
Patrik Gustavssonc8a22f12020-11-18 17:05:50 +0100193 "--api-version", action="version", version=api_version, help="Displays the version of the external API."
194 )
195 parser.add_argument(
Michael McGeagh837dc1b2020-11-10 12:38:25 +0000196 "--supported-ops-report",
197 action="store_true",
Tim Hall1bd531d2020-11-01 20:59:36 +0000198 help="Generate the SUPPORTED_OPS.md file in the current working directory and exit",
Tim Hall79d07d22020-04-27 18:20:16 +0100199 )
200
Tim Hall1bd531d2020-11-01 20:59:36 +0000201 # set network nargs to be optional to allow the support-ops-report CLI option to be used standalone
Michael McGeagh837dc1b2020-11-10 12:38:25 +0000202 parser.add_argument(
Tim Halld13bd062020-11-12 23:29:25 +0000203 "network",
204 metavar="NETWORK",
205 type=str,
206 default=None,
207 nargs="?",
208 help="Filename of the input TensorFlow Lite for Microcontrollers network",
Michael McGeagh837dc1b2020-11-10 12:38:25 +0000209 )
Tim Hall79d07d22020-04-27 18:20:16 +0100210 parser.add_argument(
211 "--output-dir", type=str, default="output", help="Output directory to write files to (default: %(default)s)"
212 )
Tim Halle6ccd872020-11-09 16:46:37 +0000213 parser.add_argument(
214 "--enable-debug-db",
215 action="store_true",
216 default=None,
217 help="Enables the calculation and writing of a network debug database to output directory",
218 )
Tim Hall1bd531d2020-11-01 20:59:36 +0000219 parser.add_argument(
220 "--config", type=str, action="append", help="Vela configuration file(s) in Python ConfigParser .ini file format"
221 )
222 parser.add_argument("--verbose-config", action="store_true", help="Verbose system configuration and memory mode")
Tim Hall79d07d22020-04-27 18:20:16 +0100223 parser.add_argument("--verbose-graph", action="store_true", help="Verbose graph rewriter")
224 parser.add_argument("--verbose-quantization", action="store_true", help="Verbose quantization")
225 parser.add_argument("--verbose-packing", action="store_true", help="Verbose pass packing")
226 parser.add_argument("--verbose-tensor-purpose", action="store_true", help="Verbose tensor purpose")
227 parser.add_argument("--verbose-tensor-format", action="store_true", help="Verbose tensor format")
228 parser.add_argument("--verbose-schedule", action="store_true", help="Verbose schedule")
229 parser.add_argument(
230 "--verbose-pareto-frontier-schedules",
231 action="store_true",
232 help="Show all schedules along the pareto frontier of optimisation criteria",
233 )
234 parser.add_argument("--verbose-allocation", action="store_true", help="Verbose tensor allocation")
235 parser.add_argument(
236 "--verbose-high-level-command-stream", action="store_true", help="Verbose high level command stream"
237 )
238 parser.add_argument(
239 "--verbose-register-command-stream", action="store_true", help="Verbose register command stream"
240 )
241 parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list")
Tim Hall79d07d22020-04-27 18:20:16 +0100242 parser.add_argument(
243 "--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU"
244 )
245 parser.add_argument(
Andreas Nevalainen897cc142020-10-28 15:42:08 +0100246 "--keep-scale-placement", action="store_true", help="Keep scale tensors memory placement during scheduling"
247 )
248 parser.add_argument(
Tim Hall79d07d22020-04-27 18:20:16 +0100249 "--cascading",
250 type=ast.literal_eval,
251 default=True,
252 choices=[True, False],
253 help="Controls the packing of multiple passes into a cascade (default: %(default)s)",
254 )
Tim Hall79d07d22020-04-27 18:20:16 +0100255 parser.add_argument("--force-block-config", type=str, default="", help="Force a specific block configuration HxWxC")
Tim Hall79d07d22020-04-27 18:20:16 +0100256 parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations")
257 parser.add_argument(
258 "--accelerator-config",
259 type=str,
260 default="ethos-u55-256",
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100261 choices=list(architecture_features.Accelerator.member_list()),
Tim Hall79d07d22020-04-27 18:20:16 +0100262 help="Accelerator configuration to use (default: %(default)s)",
263 )
264 parser.add_argument(
265 "--system-config",
266 type=str,
Tim Hall1bd531d2020-11-01 20:59:36 +0000267 default=architecture_features.ArchitectureFeatures.DEFAULT_CONFIG,
268 help="System configuration to select from the Vela configuration file (default: %(default)s)",
269 )
270 parser.add_argument(
271 "--memory-mode",
272 type=str,
273 default=architecture_features.ArchitectureFeatures.DEFAULT_CONFIG,
274 help="Memory mode to select from the Vela configuration file (default: %(default)s)",
Tim Hall79d07d22020-04-27 18:20:16 +0100275 )
276 parser.add_argument(
Tim Hall79d07d22020-04-27 18:20:16 +0100277 "--tensor-allocator",
278 default=TensorAllocator.Greedy,
279 type=lambda s: TensorAllocator[s],
280 choices=list(TensorAllocator),
281 help="Tensor Allocator algorithm (default: %(default)s)",
282 )
283 parser.add_argument(
284 "--show-subgraph-io-summary",
285 action="store_true",
286 help="Shows a summary of all the subgraphs and their inputs and outputs",
287 )
288 parser.add_argument(
289 "--ifm-streaming",
290 type=ast.literal_eval,
291 default=True,
292 choices=[True, False],
293 help="Controls scheduler IFM streaming search (default: %(default)s)",
294 )
295 parser.add_argument(
296 "--block-config-limit",
297 type=int,
298 default=16,
299 help="Limit block config search space, use zero for unlimited (default: %(default)s)",
300 )
301 parser.add_argument(
Tim Hall79d07d22020-04-27 18:20:16 +0100302 "--pareto-metric",
303 default=ParetoMetric.BwCycMem,
304 type=lambda s: ParetoMetric[s],
305 choices=list(ParetoMetric),
306 help="Controls the calculation of the pareto metric (default: %(default)s)",
307 )
308 parser.add_argument(
309 "--recursion-limit",
310 type=int,
311 default=10000,
312 help="Set the recursion depth limit, may result in RecursionError if too low (default: %(default)s)",
313 )
314 parser.add_argument(
315 "--max-block-dependency",
316 type=int,
317 default=architecture_features.ArchitectureFeatures.MAX_BLOCKDEP,
318 choices=range(0, architecture_features.ArchitectureFeatures.MAX_BLOCKDEP + 1),
319 help=(
Tim Hallb9b515c2020-11-01 21:27:19 +0000320 "Set the maximum value that can be used for the block dependency between npu kernel operations"
321 " (default: %(default)s)"
Tim Hall79d07d22020-04-27 18:20:16 +0100322 ),
323 )
Charles Xu7b8823f2020-05-29 13:53:10 +0200324 parser.add_argument(
325 "--nhcwb16-between-cascaded-passes",
326 type=ast.literal_eval,
327 default=True,
328 choices=[True, False],
329 help="Control if NHCWB16 or NHWC should be used in between cascaded passes (default: %(default)s)",
330 )
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200331 parser.add_argument(
Patrik Gustavsson90831bc2020-08-24 16:26:11 +0200332 "--weight-estimation-scaling",
333 type=float,
334 default=1.0,
335 help=("Performs an additional scaling of weight compression scale estimate (default: %(default)s)"),
336 )
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200337 parser.add_argument(
Tim Hallb9b515c2020-11-01 21:27:19 +0000338 "--cpu-tensor-alignment",
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200339 type=int,
340 default=Tensor.AllocationQuantum,
Tim Hallb9b515c2020-11-01 21:27:19 +0000341 help=(
342 "Controls the allocation byte alignment of cpu tensors including Ethos-U Custom operator inputs and outputs"
343 " (default: %(default)s)"
344 ),
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200345 )
Tim Hall79d07d22020-04-27 18:20:16 +0100346 args = parser.parse_args(args=args)
347
Michael McGeagh837dc1b2020-11-10 12:38:25 +0000348 # Generate the supported ops report and exit
349 if args.supported_ops_report:
350 generate_supported_ops()
351 return 0
352
Tim Hall79d07d22020-04-27 18:20:16 +0100353 if args.network is None:
354 parser.error("the following argument is required: NETWORK")
355
Tim Hall1bd531d2020-11-01 20:59:36 +0000356 # check all config files exist because they will be read as a group
357 if args.config is not None:
358 for filename in args.config:
359 if not os.access(filename, os.R_OK):
360 raise InputFileError(filename, "File not found or is not readable.")
361
Tim Hall79d07d22020-04-27 18:20:16 +0100362 sys.setrecursionlimit(args.recursion_limit)
363
364 if args.force_block_config:
365 force_block_config = architecture_features.Block.from_string(args.force_block_config)
366 else:
367 force_block_config = None
368
Tim Hallb9b515c2020-11-01 21:27:19 +0000369 if args.cpu_tensor_alignment < 16 or args.cpu_tensor_alignment & (args.cpu_tensor_alignment - 1) != 0:
370 parser.error(
371 "Invalid argument to --cpu-tensor-alignment = {} (must be greater than or equal to 16 and a power of 2)"
372 "".format(args.cpu_tensor_alignment)
373 )
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200374
Tim Hall79d07d22020-04-27 18:20:16 +0100375 arch = architecture_features.ArchitectureFeatures(
Tim Hall1bd531d2020-11-01 20:59:36 +0000376 vela_config_files=args.config,
Tim Hall79d07d22020-04-27 18:20:16 +0100377 system_config=args.system_config,
Tim Hall1bd531d2020-11-01 20:59:36 +0000378 memory_mode=args.memory_mode,
Tim Hall79d07d22020-04-27 18:20:16 +0100379 accelerator_config=args.accelerator_config,
Tim Hall79d07d22020-04-27 18:20:16 +0100380 override_block_config=force_block_config,
381 block_config_limit=args.block_config_limit,
Tim Hall79d07d22020-04-27 18:20:16 +0100382 max_blockdep=args.max_block_dependency,
Patrik Gustavsson90831bc2020-08-24 16:26:11 +0200383 weight_estimation_scaling=args.weight_estimation_scaling,
Tim Hall1bd531d2020-11-01 20:59:36 +0000384 verbose_config=args.verbose_config,
Tim Hall79d07d22020-04-27 18:20:16 +0100385 )
386
387 compiler_options = compiler_driver.CompilerOptions(
388 verbose_graph=args.verbose_graph,
389 verbose_quantization=args.verbose_quantization,
390 verbose_packing=args.verbose_packing,
391 verbose_tensor_purpose=args.verbose_tensor_purpose,
392 verbose_tensor_format=args.verbose_tensor_format,
393 verbose_allocation=args.verbose_allocation,
394 verbose_high_level_command_stream=args.verbose_high_level_command_stream,
395 verbose_register_command_stream=args.verbose_register_command_stream,
396 verbose_operators=args.verbose_operators,
Tim Hall79d07d22020-04-27 18:20:16 +0100397 show_cpu_operations=args.show_cpu_operations,
398 tensor_allocator=args.tensor_allocator,
399 timing=args.timing,
400 output_dir=args.output_dir,
Tim Hallb9b515c2020-11-01 21:27:19 +0000401 cpu_tensor_alignment=args.cpu_tensor_alignment,
Tim Hall79d07d22020-04-27 18:20:16 +0100402 )
403
404 scheduler_options = scheduler.SchedulerOptions(
405 use_cascading=args.cascading,
Tim Hall79d07d22020-04-27 18:20:16 +0100406 verbose_schedule=args.verbose_schedule,
407 verbose_pareto_frontier_schedules=args.verbose_pareto_frontier_schedules,
408 use_ifm_streaming=args.ifm_streaming,
409 pareto_metric=args.pareto_metric,
Charles Xu7b8823f2020-05-29 13:53:10 +0200410 use_nhcwb16_between_cascaded_passes=args.nhcwb16_between_cascaded_passes,
Andreas Nevalainen897cc142020-10-28 15:42:08 +0100411 keep_scale_placement=args.keep_scale_placement,
Tim Hall79d07d22020-04-27 18:20:16 +0100412 )
413
Tim Hall284223e2020-06-09 13:17:21 +0100414 model_reader_options = model_reader.ModelReaderOptions()
Tim Hall79d07d22020-04-27 18:20:16 +0100415
Tim Halle6ccd872020-11-09 16:46:37 +0000416 nng = process(args.network, args.enable_debug_db, arch, model_reader_options, compiler_options, scheduler_options)
Tim Hall79d07d22020-04-27 18:20:16 +0100417
418 if args.show_subgraph_io_summary:
419 print_subgraph_io_summary(nng)
420
421 return 0