blob: aa74ecf3624ba25d81601913441872bc94c05c9e [file] [log] [blame]
erik.andersson@arm.comad45f792021-02-03 10:20:16 +01001# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved.
Tim Hall79d07d22020-04-27 18:20:16 +01002#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Tim Hall79d07d22020-04-27 18:20:16 +010016# Description:
17# Main entry point for the Vela compiler.
18#
19# Provides command line interface, options parsing, and network loading. Before calling the compiler driver.
Diego Russoe8a10452020-04-21 17:39:10 +010020import argparse
21import ast
Tim Hall1bd531d2020-11-01 20:59:36 +000022import os
Diego Russoea6111a2020-04-14 18:41:58 +010023import sys
Tim Hall79d07d22020-04-27 18:20:16 +010024import time
Tim Hall79d07d22020-04-27 18:20:16 +010025
erik.andersson@arm.comad45f792021-02-03 10:20:16 +010026import flatbuffers
27
Tim Hall79d07d22020-04-27 18:20:16 +010028from . import architecture_features
Diego Russoe8a10452020-04-21 17:39:10 +010029from . import compiler_driver
30from . import model_reader
31from . import scheduler
Tim Hall79d07d22020-04-27 18:20:16 +010032from . import stats_writer
33from . import tflite_writer
Tim Hall79d07d22020-04-27 18:20:16 +010034from ._version import __version__
Louis Verhaard11831ce2020-11-18 18:53:24 +010035from .api import API_VERSION
Tim Halle6ccd872020-11-09 16:46:37 +000036from .debug_database import DebugDatabase
Louis Verhaard7db78962020-05-25 15:05:26 +020037from .errors import InputFileError
Henrik G Olssonea9b23c2021-03-23 17:34:49 +010038from .errors import VelaError
Diego Russoe8a10452020-04-21 17:39:10 +010039from .nn_graph import PassPlacement
40from .nn_graph import TensorAllocator
Tim Hall79d07d22020-04-27 18:20:16 +010041from .scheduler import ParetoMetric
Michael McGeagh837dc1b2020-11-10 12:38:25 +000042from .supported_operators import SupportedOperators
Diego Russoea6111a2020-04-14 18:41:58 +010043from .tensor import MemArea
Jacob Bohlin0628a8c2020-08-28 13:25:14 +020044from .tensor import Tensor
erik.andersson@arm.comad45f792021-02-03 10:20:16 +010045from .tflite.Model import Model
Michael McGeagh837dc1b2020-11-10 12:38:25 +000046from .tflite_mapping import builtin_operator_map
47from .tflite_mapping import builtin_type_name
Louis Verhaard52078302020-11-18 13:35:06 +010048from ethosu.vela.architecture_features import ArchitectureFeatures
Tim Hall79d07d22020-04-27 18:20:16 +010049
50
Tim Halle6ccd872020-11-09 16:46:37 +000051def process(input_name, enable_debug_db, arch, model_reader_options, compiler_options, scheduler_options):
Tim Hall79d07d22020-04-27 18:20:16 +010052 if compiler_options.timing:
53 start = time.time()
54
Tim Halle6ccd872020-11-09 16:46:37 +000055 os.makedirs(compiler_options.output_dir, exist_ok=True)
56 output_basename = os.path.join(compiler_options.output_dir, os.path.splitext(os.path.basename(input_name))[0])
57 DebugDatabase.show_warnings = enable_debug_db
58
59 nng = model_reader.read_model(input_name, model_reader_options)
Tim Hall79d07d22020-04-27 18:20:16 +010060
61 if not nng:
Michael McGeagh7a6f8432020-12-02 15:29:22 +000062 raise InputFileError(input_name, "Input file could not be read")
Tim Hall79d07d22020-04-27 18:20:16 +010063
64 if compiler_options.verbose_operators:
65 nng.print_operators()
66
67 if compiler_options.timing:
68 stop = time.time()
69 print("Model reading took %f s" % (stop - start))
70 start = time.time()
71
72 compiler_driver.compiler_driver(nng, arch, compiler_options, scheduler_options)
73
Tim Halle6ccd872020-11-09 16:46:37 +000074 passes_csv_file = "{0}_pass-breakdown_{1}.csv".format(output_basename, arch.system_config)
Tim Hall79d07d22020-04-27 18:20:16 +010075 stats_writer.write_pass_metrics_csv(nng, passes_csv_file)
76
Tim Halle6ccd872020-11-09 16:46:37 +000077 summary_csv_file = "{0}_summary_{1}.csv".format(output_basename, arch.system_config)
Tim Hall79d07d22020-04-27 18:20:16 +010078 stats_writer.write_summary_metrics_csv(nng, summary_csv_file, arch)
79
Fredrik Svedbergf5c07c42021-04-23 14:36:42 +020080 stats_writer.print_performance_metrics(
81 nng,
82 show_cpu_operations=compiler_options.show_cpu_operations,
83 verbose_weights=compiler_options.verbose_weights,
84 arch=arch,
85 )
Tim Hall79d07d22020-04-27 18:20:16 +010086
Tim Halle6ccd872020-11-09 16:46:37 +000087 output_filename = output_basename + "_vela.tflite"
88 if input_name.endswith(".tflite"):
89 tflite_writer.write_tflite(nng, output_filename)
90
91 if enable_debug_db:
erik.andersson@arm.comad45f792021-02-03 10:20:16 +010092 file_offsets = calculate_operator_file_offsets(output_filename)
93 for idx, offset in enumerate(sorted(file_offsets)):
94 sg = find_subgraph_with_command_stream_order(nng, idx)
95 if sg is not None:
96 DebugDatabase.set_stream_offset(sg, offset)
Tim Halle6ccd872020-11-09 16:46:37 +000097 debug_filename = output_basename + "_debug.xml"
98 DebugDatabase.write(debug_filename, input_name, output_filename)
Tim Hall79d07d22020-04-27 18:20:16 +010099
100 if compiler_options.timing:
101 stop = time.time()
102 print("Compiler driver took %f s" % (stop - start))
103
104 return nng
105
106
erik.andersson@arm.comad45f792021-02-03 10:20:16 +0100107def find_subgraph_with_command_stream_order(nng, idx):
108 for sg in nng.subgraphs:
109 if sg.generated_stream_id == idx:
110 return sg
111 return None
112
113
114def calculate_operator_file_offsets(name: str):
115 # Read the vela optimized tflite file
116 with open(name, "rb") as f:
117 buf = bytearray(f.read())
118 # Calculate the file offsets for each custom operator
119 file_offsets = []
120 model = Model.GetRootAsModel(buf, 0)
121 for idx in range(model.SubgraphsLength()): # However only one subgraph is supported as of now
122 sg = model.Subgraphs(idx)
123 for idx in range(sg.OperatorsLength()):
124 operator = sg.Operators(idx)
125 if model.OperatorCodes(operator.OpcodeIndex()).CustomCode() is not None:
126 tensor_idx = operator.Inputs(0)
127 tensor = sg.Tensors(tensor_idx)
128 buffer = model.Buffers(tensor.Buffer())
129 offset = flatbuffers.number_types.UOffsetTFlags.py_type(buffer._tab.Offset(4))
130 file_offsets.append(buffer._tab.Vector(offset))
131 return file_offsets
132
133
Tim Hall79d07d22020-04-27 18:20:16 +0100134def print_subgraph_io_summary(nng):
135 """Print a summary of all the input and output tensor sizes for all subgraphs.
136 Also displays the total tensor size and the memory used area for sram.
137 """
138
139 print("Subgraph IO Summary")
140 print("-------------------")
141 print("NNG: {0}".format(nng.name))
142 max_sg_size = 0
143 for sg in reversed(nng.subgraphs):
144 print(" Subgraph: {0} = {1}".format(sg.name, sg.placement))
145 sg_size = 0
146
147 if sg.placement == PassPlacement.Npu:
148 for tens in sg.input_tensors + [sg.scratch_tensor] + sg.output_tensors:
149 if tens in sg.input_tensors:
150 tens_dir = "In"
151 elif tens in sg.output_tensors:
152 tens_dir = "Out"
153 else:
154 tens_dir = "In/Out"
155
156 size = tens.elements() * tens.element_size() / 1024.0
157 sg_size = sg_size + size
158 print(" Tensor [{0}]: {1} = {2} KiB".format(tens_dir, tens.name, size))
159
160 print(" Total Size = {0} KiB".format(sg_size))
161 print(" SRAM Memory Used = {0} KiB".format(sg.memory_used.get(MemArea.Sram, 0) / 1024.0))
162 max_sg_size = max(sg_size, max_sg_size)
163
164 print(" Maximum Subgraph Size = {0} KiB".format(max_sg_size))
165
166
Michael McGeagh837dc1b2020-11-10 12:38:25 +0000167def generate_supported_ops():
168 lines = [
169 "# Supported Ops",
170 "",
171 "This file was automatically generated by Vela using the `--supported-ops-report` parameter. ",
172 f"Vela version: `{__version__}`",
173 "",
Michael McGeagh54a61112020-11-24 14:58:51 +0000174 "This file complies with",
175 "[**Gitiles Markdown syntax**](https://github.com/google/gitiles/blob/master/Documentation/markdown.md)",
Michael McGeagh837dc1b2020-11-10 12:38:25 +0000176 "",
177 "## Summary Table",
178 "",
179 "The table below contains TFLite operators that can be placed on the Ethos-U NPU. ",
180 "If the constraints are not met, then that operator will be scheduled on the CPU instead. ",
181 "For any other TFLite operator not listed, will be left untouched and scheduled on the CPU. ",
182 "Please check the supported operator list for your chosen runtime for further information.",
183 "",
184 "| Operator | Constraints |",
Michael McGeagh54a61112020-11-24 14:58:51 +0000185 "| --- | --- |",
Michael McGeagh837dc1b2020-11-10 12:38:25 +0000186 ]
187 supported = SupportedOperators()
188 op_constraint_links = []
189 op_list = sorted(((op, builtin_type_name(op)) for op in builtin_operator_map), key=lambda x: x[1])
190 for op, name in op_list:
191 internal_op = builtin_operator_map[op][0]
192 if internal_op in SupportedOperators.supported_operators:
193 links = "[Generic](#generic-constraints)"
194 if internal_op in supported.specific_constraints:
195 links += f", [Specific](#{name.lower()}-constraints)"
196 op_constraint_links.append((internal_op, name))
197 lines.append(f"| {name} | {links} |")
198 lines += [
199 "",
200 "## Generic Constraints",
201 "",
202 "This is a list of constraints that all NPU operators must satisfy in order to be scheduled on the NPU.",
203 "",
204 ]
205 for constraint in supported.generic_constraints:
206 # Markdown needs two spaces at the end of a line to render it as a separate line
207 reason = constraint.__doc__.replace("\n", " \n")
208 lines.append(f"- {reason}")
209 for op, name in op_constraint_links:
210 lines += [
211 "",
212 f"## {name} Constraints",
213 "",
214 f"This is a list of constraints that the {name} operator must satisfy in order to be scheduled on the NPU.",
215 "",
216 ]
217 for constraint in supported.specific_constraints[op]:
218 # Markdown needs two spaces at the end of a line to render it as a separate line
219 reason = constraint.__doc__.replace("\n", " \n")
220 lines.append(f"- {reason}")
221
222 # Note. this will generate the file in the CWD
223 filepath = os.path.join(os.getcwd(), "SUPPORTED_OPS.md")
224 with open(filepath, "wt") as md:
225 md.writelines(line + "\n" for line in lines)
226 print(f"Report file: {filepath}")
227
228
Tim Hall79d07d22020-04-27 18:20:16 +0100229def main(args=None):
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100230 try:
231 if args is None:
232 args = sys.argv[1:]
Tim Hall79d07d22020-04-27 18:20:16 +0100233
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100234 parser = argparse.ArgumentParser(prog="vela", description="Neural network model compiler for Arm Ethos-U NPUs")
235 parser.add_argument("--version", action="version", version=__version__)
236 parser.add_argument(
237 "--api-version", action="version", version=API_VERSION, help="Displays the version of the external API."
238 )
239 parser.add_argument(
240 "--supported-ops-report",
241 action="store_true",
242 help="Generate the SUPPORTED_OPS.md file in the current working directory and exit",
Tim Hallb9b515c2020-11-01 21:27:19 +0000243 )
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200244
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100245 # set network nargs to be optional to allow the support-ops-report CLI option to be used standalone
246 parser.add_argument(
247 "network",
248 metavar="NETWORK",
249 type=str,
250 default=None,
251 nargs="?",
252 help="Filename of the input TensorFlow Lite for Microcontrollers network",
253 )
254 parser.add_argument(
255 "--output-dir", type=str, default="output", help="Output directory to write files to (default: %(default)s)"
256 )
257 parser.add_argument(
258 "--enable-debug-db",
259 action="store_true",
260 default=None,
261 help="Enables the calculation and writing of a network debug database to output directory",
262 )
263 parser.add_argument(
264 "--config",
265 type=str,
266 action="append",
267 help="Vela configuration file(s) in Python ConfigParser .ini file format",
268 )
269 parser.add_argument("--verbose-all", action="store_true", help="Enable all verbose options")
270 parser.add_argument(
271 "--verbose-config", action="store_true", help="Verbose system configuration and memory mode"
272 )
273 parser.add_argument("--verbose-graph", action="store_true", help="Verbose graph rewriter")
274 parser.add_argument("--verbose-quantization", action="store_true", help="Verbose quantization")
275 parser.add_argument("--verbose-packing", action="store_true", help="Verbose pass packing")
276 parser.add_argument("--verbose-tensor-purpose", action="store_true", help="Verbose tensor purpose")
277 parser.add_argument("--verbose-tensor-format", action="store_true", help="Verbose tensor format")
278 parser.add_argument("--verbose-schedule", action="store_true", help="Verbose schedule")
279 parser.add_argument(
280 "--verbose-pareto-frontier-schedules",
281 action="store_true",
282 help="Show all schedules along the pareto frontier of optimisation criteria",
283 )
284 parser.add_argument("--verbose-allocation", action="store_true", help="Verbose tensor allocation")
285 parser.add_argument(
286 "--verbose-high-level-command-stream", action="store_true", help="Verbose high level command stream"
287 )
288 parser.add_argument(
289 "--verbose-register-command-stream", action="store_true", help="Verbose register command stream"
290 )
291 parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list")
Fredrik Svedbergf5c07c42021-04-23 14:36:42 +0200292 parser.add_argument("--verbose-weights", action="store_true", help="Verbose weights information")
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100293 parser.add_argument(
294 "--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU"
295 )
296 parser.add_argument(
297 "--cache-bias-scale-tensor",
298 type=ast.literal_eval,
299 default=True,
300 choices=[True, False],
301 help="Controls the caching of the bias & scale tensors in SRAM (default: %(default)s)",
302 )
303 parser.add_argument(
304 "--cascading",
305 type=ast.literal_eval,
306 default=True,
307 choices=[True, False],
308 help="Controls the packing of multiple passes into a cascade (default: %(default)s)",
309 )
310 parser.add_argument(
311 "--force-block-config", type=str, default="", help="Force a specific block configuration WxHxC"
312 )
313 parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations")
314 parser.add_argument(
315 "--accelerator-config",
316 type=str,
317 default="ethos-u55-256",
318 choices=list(architecture_features.Accelerator.member_list()),
319 help="Accelerator configuration to use (default: %(default)s)",
320 )
321 parser.add_argument(
322 "--system-config",
323 type=str,
324 default=architecture_features.ArchitectureFeatures.DEFAULT_CONFIG,
325 help="System configuration to select from the Vela configuration file (default: %(default)s)",
326 )
327 parser.add_argument(
328 "--memory-mode",
329 type=str,
330 default=architecture_features.ArchitectureFeatures.DEFAULT_CONFIG,
331 help="Memory mode to select from the Vela configuration file (default: %(default)s)",
332 )
333 parser.add_argument(
334 "--tensor-allocator",
335 default=TensorAllocator.HillClimb,
336 type=lambda s: TensorAllocator[s],
337 choices=list(TensorAllocator),
338 help="Tensor Allocator algorithm (default: %(default)s)",
339 )
340 parser.add_argument(
341 "--show-subgraph-io-summary",
342 action="store_true",
343 help="Shows a summary of all the subgraphs and their inputs and outputs",
344 )
345 parser.add_argument(
346 "--ifm-streaming",
347 type=ast.literal_eval,
348 default=True,
349 choices=[True, False],
350 help="Controls scheduler IFM streaming search (default: %(default)s)",
351 )
352 parser.add_argument(
353 "--block-config-limit",
354 type=int,
355 default=16,
356 help="Limit block config search space, use zero for unlimited (default: %(default)s)",
357 )
358 parser.add_argument(
359 "--pareto-metric",
360 default=ParetoMetric.BwCycMem,
361 type=lambda s: ParetoMetric[s],
362 choices=list(ParetoMetric),
363 help="Controls the calculation of the pareto metric (default: %(default)s)",
364 )
365 parser.add_argument(
366 "--recursion-limit",
367 type=int,
368 default=10000,
369 help="Set the recursion depth limit, may result in RecursionError if too low (default: %(default)s)",
370 )
371 parser.add_argument(
372 "--max-block-dependency",
373 type=int,
374 default=architecture_features.ArchitectureFeatures.MAX_BLOCKDEP,
375 choices=range(0, architecture_features.ArchitectureFeatures.MAX_BLOCKDEP + 1),
376 help=(
377 "Set the maximum value that can be used for the block dependency between npu kernel operations"
378 " (default: %(default)s)"
379 ),
380 )
381 parser.add_argument(
382 "--nhcwb16-between-cascaded-passes",
383 type=ast.literal_eval,
384 default=True,
385 choices=[True, False],
386 help="Control if NHCWB16 or NHWC should be used in between cascaded passes (default: %(default)s)",
387 )
388 parser.add_argument(
389 "--weight-estimation-scaling",
390 type=float,
391 default=1.0,
392 help=("Performs an additional scaling of weight compression scale estimate (default: %(default)s)"),
393 )
394 parser.add_argument(
395 "--cpu-tensor-alignment",
396 type=int,
397 default=Tensor.AllocationQuantum,
398 help=(
399 "Controls the allocation byte alignment of cpu tensors including Ethos-U Custom"
400 " operator inputs and outputs (default: %(default)s)"
401 ),
402 )
403 args = parser.parse_args(args=args)
Louis Verhaard52078302020-11-18 13:35:06 +0100404
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100405 # Generate the supported ops report and exit
406 if args.supported_ops_report:
407 generate_supported_ops()
408 return 0
Louis Verhaard52078302020-11-18 13:35:06 +0100409
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100410 if args.network is None:
411 parser.error("the following argument is required: NETWORK")
Michael McGeagh2fa40ae2020-12-02 10:55:04 +0000412
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100413 # check all config files exist because they will be read as a group
414 if args.config is not None:
415 for filename in args.config:
416 if not os.access(filename, os.R_OK):
417 raise InputFileError(filename, "File not found or is not readable")
Tim Hall79d07d22020-04-27 18:20:16 +0100418
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100419 sys.setrecursionlimit(args.recursion_limit)
Tim Hall79d07d22020-04-27 18:20:16 +0100420
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100421 if args.force_block_config:
422 force_block_config = architecture_features.Block.from_string(args.force_block_config)
423 else:
424 force_block_config = None
Tim Hall79d07d22020-04-27 18:20:16 +0100425
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100426 if args.cpu_tensor_alignment < 16 or args.cpu_tensor_alignment & (args.cpu_tensor_alignment - 1) != 0:
427 parser.error(
428 "Invalid argument to --cpu-tensor-alignment = {} (must be greater than or equal to 16 and a power of 2)"
429 "".format(args.cpu_tensor_alignment)
430 )
Tim Hall79d07d22020-04-27 18:20:16 +0100431
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100432 if args.system_config == ArchitectureFeatures.DEFAULT_CONFIG:
433 print(f"Warning: Using {ArchitectureFeatures.DEFAULT_CONFIG} values for system configuration")
Tim Hall79d07d22020-04-27 18:20:16 +0100434
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100435 if args.memory_mode == ArchitectureFeatures.DEFAULT_CONFIG:
436 print(f"Warning: Using {ArchitectureFeatures.DEFAULT_CONFIG} values for memory mode")
Tim Hall79d07d22020-04-27 18:20:16 +0100437
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100438 if args.verbose_all:
439 for v in vars(args):
440 if v.startswith("verbose") and v != "verbose_all":
441 setattr(args, v, True)
442
443 arch = architecture_features.ArchitectureFeatures(
444 vela_config_files=args.config,
445 system_config=args.system_config,
446 memory_mode=args.memory_mode,
447 accelerator_config=args.accelerator_config,
448 override_block_config=force_block_config,
449 block_config_limit=args.block_config_limit,
450 max_blockdep=args.max_block_dependency,
451 weight_estimation_scaling=args.weight_estimation_scaling,
452 verbose_config=args.verbose_config,
453 )
454
455 compiler_options = compiler_driver.CompilerOptions(
456 verbose_graph=args.verbose_graph,
457 verbose_quantization=args.verbose_quantization,
458 verbose_packing=args.verbose_packing,
459 verbose_tensor_purpose=args.verbose_tensor_purpose,
460 verbose_tensor_format=args.verbose_tensor_format,
461 verbose_allocation=args.verbose_allocation,
462 verbose_high_level_command_stream=args.verbose_high_level_command_stream,
463 verbose_register_command_stream=args.verbose_register_command_stream,
464 verbose_operators=args.verbose_operators,
Fredrik Svedbergf5c07c42021-04-23 14:36:42 +0200465 verbose_weights=args.verbose_weights,
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100466 show_cpu_operations=args.show_cpu_operations,
467 tensor_allocator=args.tensor_allocator,
468 timing=args.timing,
469 output_dir=args.output_dir,
470 cpu_tensor_alignment=args.cpu_tensor_alignment,
471 )
472
473 scheduler_options = scheduler.SchedulerOptions(
474 use_cascading=args.cascading,
475 verbose_schedule=args.verbose_schedule,
476 verbose_pareto_frontier_schedules=args.verbose_pareto_frontier_schedules,
477 use_ifm_streaming=args.ifm_streaming,
478 pareto_metric=args.pareto_metric,
479 use_nhcwb16_between_cascaded_passes=args.nhcwb16_between_cascaded_passes,
480 cache_bias_scale_tensor=args.cache_bias_scale_tensor,
481 )
482
483 model_reader_options = model_reader.ModelReaderOptions()
484
485 nng = process(
486 args.network, args.enable_debug_db, arch, model_reader_options, compiler_options, scheduler_options
487 )
488
489 if args.show_subgraph_io_summary:
490 print_subgraph_io_summary(nng)
491
492 return 0
493 except VelaError as e:
494 print(e.data)
495 return 1