blob: c95518619baf663b6687fa1089f734869603b55f [file] [log] [blame]
erik.andersson@arm.comad45f792021-02-03 10:20:16 +01001# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved.
Tim Hall79d07d22020-04-27 18:20:16 +01002#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Tim Hall79d07d22020-04-27 18:20:16 +010016# Description:
17# Main entry point for the Vela compiler.
18#
19# Provides command line interface, options parsing, and network loading. Before calling the compiler driver.
Diego Russoe8a10452020-04-21 17:39:10 +010020import argparse
21import ast
Tim Hall1bd531d2020-11-01 20:59:36 +000022import os
Diego Russoea6111a2020-04-14 18:41:58 +010023import sys
Tim Hall79d07d22020-04-27 18:20:16 +010024import time
Tim Hall79d07d22020-04-27 18:20:16 +010025
erik.andersson@arm.comad45f792021-02-03 10:20:16 +010026import flatbuffers
27
Tim Hall79d07d22020-04-27 18:20:16 +010028from . import architecture_features
Diego Russoe8a10452020-04-21 17:39:10 +010029from . import compiler_driver
30from . import model_reader
31from . import scheduler
Tim Hall79d07d22020-04-27 18:20:16 +010032from . import stats_writer
33from . import tflite_writer
Tim Hall79d07d22020-04-27 18:20:16 +010034from ._version import __version__
Louis Verhaard11831ce2020-11-18 18:53:24 +010035from .api import API_VERSION
Tim Halle6ccd872020-11-09 16:46:37 +000036from .debug_database import DebugDatabase
Louis Verhaard7db78962020-05-25 15:05:26 +020037from .errors import InputFileError
Henrik G Olssonea9b23c2021-03-23 17:34:49 +010038from .errors import VelaError
Diego Russoe8a10452020-04-21 17:39:10 +010039from .nn_graph import PassPlacement
40from .nn_graph import TensorAllocator
Tim Hall79d07d22020-04-27 18:20:16 +010041from .scheduler import ParetoMetric
Michael McGeagh837dc1b2020-11-10 12:38:25 +000042from .supported_operators import SupportedOperators
Diego Russoea6111a2020-04-14 18:41:58 +010043from .tensor import MemArea
Jacob Bohlin0628a8c2020-08-28 13:25:14 +020044from .tensor import Tensor
erik.andersson@arm.comad45f792021-02-03 10:20:16 +010045from .tflite.Model import Model
Michael McGeagh837dc1b2020-11-10 12:38:25 +000046from .tflite_mapping import builtin_operator_map
47from .tflite_mapping import builtin_type_name
Louis Verhaard52078302020-11-18 13:35:06 +010048from ethosu.vela.architecture_features import ArchitectureFeatures
Tim Hall79d07d22020-04-27 18:20:16 +010049
50
Tim Halle6ccd872020-11-09 16:46:37 +000051def process(input_name, enable_debug_db, arch, model_reader_options, compiler_options, scheduler_options):
Tim Hall79d07d22020-04-27 18:20:16 +010052 if compiler_options.timing:
53 start = time.time()
54
Tim Halle6ccd872020-11-09 16:46:37 +000055 os.makedirs(compiler_options.output_dir, exist_ok=True)
56 output_basename = os.path.join(compiler_options.output_dir, os.path.splitext(os.path.basename(input_name))[0])
57 DebugDatabase.show_warnings = enable_debug_db
58
59 nng = model_reader.read_model(input_name, model_reader_options)
Tim Hall79d07d22020-04-27 18:20:16 +010060
61 if not nng:
Michael McGeagh7a6f8432020-12-02 15:29:22 +000062 raise InputFileError(input_name, "Input file could not be read")
Tim Hall79d07d22020-04-27 18:20:16 +010063
64 if compiler_options.verbose_operators:
65 nng.print_operators()
66
67 if compiler_options.timing:
68 stop = time.time()
69 print("Model reading took %f s" % (stop - start))
70 start = time.time()
71
72 compiler_driver.compiler_driver(nng, arch, compiler_options, scheduler_options)
73
Tim Halle6ccd872020-11-09 16:46:37 +000074 passes_csv_file = "{0}_pass-breakdown_{1}.csv".format(output_basename, arch.system_config)
Tim Hall79d07d22020-04-27 18:20:16 +010075 stats_writer.write_pass_metrics_csv(nng, passes_csv_file)
76
Tim Halle6ccd872020-11-09 16:46:37 +000077 summary_csv_file = "{0}_summary_{1}.csv".format(output_basename, arch.system_config)
Tim Hall79d07d22020-04-27 18:20:16 +010078 stats_writer.write_summary_metrics_csv(nng, summary_csv_file, arch)
79
80 stats_writer.print_performance_metrics(nng, show_cpu_operations=compiler_options.show_cpu_operations, arch=arch)
81
Tim Halle6ccd872020-11-09 16:46:37 +000082 output_filename = output_basename + "_vela.tflite"
83 if input_name.endswith(".tflite"):
84 tflite_writer.write_tflite(nng, output_filename)
85
86 if enable_debug_db:
erik.andersson@arm.comad45f792021-02-03 10:20:16 +010087 file_offsets = calculate_operator_file_offsets(output_filename)
88 for idx, offset in enumerate(sorted(file_offsets)):
89 sg = find_subgraph_with_command_stream_order(nng, idx)
90 if sg is not None:
91 DebugDatabase.set_stream_offset(sg, offset)
Tim Halle6ccd872020-11-09 16:46:37 +000092 debug_filename = output_basename + "_debug.xml"
93 DebugDatabase.write(debug_filename, input_name, output_filename)
Tim Hall79d07d22020-04-27 18:20:16 +010094
95 if compiler_options.timing:
96 stop = time.time()
97 print("Compiler driver took %f s" % (stop - start))
98
99 return nng
100
101
erik.andersson@arm.comad45f792021-02-03 10:20:16 +0100102def find_subgraph_with_command_stream_order(nng, idx):
103 for sg in nng.subgraphs:
104 if sg.generated_stream_id == idx:
105 return sg
106 return None
107
108
109def calculate_operator_file_offsets(name: str):
110 # Read the vela optimized tflite file
111 with open(name, "rb") as f:
112 buf = bytearray(f.read())
113 # Calculate the file offsets for each custom operator
114 file_offsets = []
115 model = Model.GetRootAsModel(buf, 0)
116 for idx in range(model.SubgraphsLength()): # However only one subgraph is supported as of now
117 sg = model.Subgraphs(idx)
118 for idx in range(sg.OperatorsLength()):
119 operator = sg.Operators(idx)
120 if model.OperatorCodes(operator.OpcodeIndex()).CustomCode() is not None:
121 tensor_idx = operator.Inputs(0)
122 tensor = sg.Tensors(tensor_idx)
123 buffer = model.Buffers(tensor.Buffer())
124 offset = flatbuffers.number_types.UOffsetTFlags.py_type(buffer._tab.Offset(4))
125 file_offsets.append(buffer._tab.Vector(offset))
126 return file_offsets
127
128
Tim Hall79d07d22020-04-27 18:20:16 +0100129def print_subgraph_io_summary(nng):
130 """Print a summary of all the input and output tensor sizes for all subgraphs.
131 Also displays the total tensor size and the memory used area for sram.
132 """
133
134 print("Subgraph IO Summary")
135 print("-------------------")
136 print("NNG: {0}".format(nng.name))
137 max_sg_size = 0
138 for sg in reversed(nng.subgraphs):
139 print(" Subgraph: {0} = {1}".format(sg.name, sg.placement))
140 sg_size = 0
141
142 if sg.placement == PassPlacement.Npu:
143 for tens in sg.input_tensors + [sg.scratch_tensor] + sg.output_tensors:
144 if tens in sg.input_tensors:
145 tens_dir = "In"
146 elif tens in sg.output_tensors:
147 tens_dir = "Out"
148 else:
149 tens_dir = "In/Out"
150
151 size = tens.elements() * tens.element_size() / 1024.0
152 sg_size = sg_size + size
153 print(" Tensor [{0}]: {1} = {2} KiB".format(tens_dir, tens.name, size))
154
155 print(" Total Size = {0} KiB".format(sg_size))
156 print(" SRAM Memory Used = {0} KiB".format(sg.memory_used.get(MemArea.Sram, 0) / 1024.0))
157 max_sg_size = max(sg_size, max_sg_size)
158
159 print(" Maximum Subgraph Size = {0} KiB".format(max_sg_size))
160
161
Michael McGeagh837dc1b2020-11-10 12:38:25 +0000162def generate_supported_ops():
163 lines = [
164 "# Supported Ops",
165 "",
166 "This file was automatically generated by Vela using the `--supported-ops-report` parameter. ",
167 f"Vela version: `{__version__}`",
168 "",
Michael McGeagh54a61112020-11-24 14:58:51 +0000169 "This file complies with",
170 "[**Gitiles Markdown syntax**](https://github.com/google/gitiles/blob/master/Documentation/markdown.md)",
Michael McGeagh837dc1b2020-11-10 12:38:25 +0000171 "",
172 "## Summary Table",
173 "",
174 "The table below contains TFLite operators that can be placed on the Ethos-U NPU. ",
175 "If the constraints are not met, then that operator will be scheduled on the CPU instead. ",
176 "For any other TFLite operator not listed, will be left untouched and scheduled on the CPU. ",
177 "Please check the supported operator list for your chosen runtime for further information.",
178 "",
179 "| Operator | Constraints |",
Michael McGeagh54a61112020-11-24 14:58:51 +0000180 "| --- | --- |",
Michael McGeagh837dc1b2020-11-10 12:38:25 +0000181 ]
182 supported = SupportedOperators()
183 op_constraint_links = []
184 op_list = sorted(((op, builtin_type_name(op)) for op in builtin_operator_map), key=lambda x: x[1])
185 for op, name in op_list:
186 internal_op = builtin_operator_map[op][0]
187 if internal_op in SupportedOperators.supported_operators:
188 links = "[Generic](#generic-constraints)"
189 if internal_op in supported.specific_constraints:
190 links += f", [Specific](#{name.lower()}-constraints)"
191 op_constraint_links.append((internal_op, name))
192 lines.append(f"| {name} | {links} |")
193 lines += [
194 "",
195 "## Generic Constraints",
196 "",
197 "This is a list of constraints that all NPU operators must satisfy in order to be scheduled on the NPU.",
198 "",
199 ]
200 for constraint in supported.generic_constraints:
201 # Markdown needs two spaces at the end of a line to render it as a separate line
202 reason = constraint.__doc__.replace("\n", " \n")
203 lines.append(f"- {reason}")
204 for op, name in op_constraint_links:
205 lines += [
206 "",
207 f"## {name} Constraints",
208 "",
209 f"This is a list of constraints that the {name} operator must satisfy in order to be scheduled on the NPU.",
210 "",
211 ]
212 for constraint in supported.specific_constraints[op]:
213 # Markdown needs two spaces at the end of a line to render it as a separate line
214 reason = constraint.__doc__.replace("\n", " \n")
215 lines.append(f"- {reason}")
216
217 # Note. this will generate the file in the CWD
218 filepath = os.path.join(os.getcwd(), "SUPPORTED_OPS.md")
219 with open(filepath, "wt") as md:
220 md.writelines(line + "\n" for line in lines)
221 print(f"Report file: {filepath}")
222
223
Tim Hall79d07d22020-04-27 18:20:16 +0100224def main(args=None):
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100225 try:
226 if args is None:
227 args = sys.argv[1:]
Tim Hall79d07d22020-04-27 18:20:16 +0100228
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100229 parser = argparse.ArgumentParser(prog="vela", description="Neural network model compiler for Arm Ethos-U NPUs")
230 parser.add_argument("--version", action="version", version=__version__)
231 parser.add_argument(
232 "--api-version", action="version", version=API_VERSION, help="Displays the version of the external API."
233 )
234 parser.add_argument(
235 "--supported-ops-report",
236 action="store_true",
237 help="Generate the SUPPORTED_OPS.md file in the current working directory and exit",
Tim Hallb9b515c2020-11-01 21:27:19 +0000238 )
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200239
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100240 # set network nargs to be optional to allow the support-ops-report CLI option to be used standalone
241 parser.add_argument(
242 "network",
243 metavar="NETWORK",
244 type=str,
245 default=None,
246 nargs="?",
247 help="Filename of the input TensorFlow Lite for Microcontrollers network",
248 )
249 parser.add_argument(
250 "--output-dir", type=str, default="output", help="Output directory to write files to (default: %(default)s)"
251 )
252 parser.add_argument(
253 "--enable-debug-db",
254 action="store_true",
255 default=None,
256 help="Enables the calculation and writing of a network debug database to output directory",
257 )
258 parser.add_argument(
259 "--config",
260 type=str,
261 action="append",
262 help="Vela configuration file(s) in Python ConfigParser .ini file format",
263 )
264 parser.add_argument("--verbose-all", action="store_true", help="Enable all verbose options")
265 parser.add_argument(
266 "--verbose-config", action="store_true", help="Verbose system configuration and memory mode"
267 )
268 parser.add_argument("--verbose-graph", action="store_true", help="Verbose graph rewriter")
269 parser.add_argument("--verbose-quantization", action="store_true", help="Verbose quantization")
270 parser.add_argument("--verbose-packing", action="store_true", help="Verbose pass packing")
271 parser.add_argument("--verbose-tensor-purpose", action="store_true", help="Verbose tensor purpose")
272 parser.add_argument("--verbose-tensor-format", action="store_true", help="Verbose tensor format")
273 parser.add_argument("--verbose-schedule", action="store_true", help="Verbose schedule")
274 parser.add_argument(
275 "--verbose-pareto-frontier-schedules",
276 action="store_true",
277 help="Show all schedules along the pareto frontier of optimisation criteria",
278 )
279 parser.add_argument("--verbose-allocation", action="store_true", help="Verbose tensor allocation")
280 parser.add_argument(
281 "--verbose-high-level-command-stream", action="store_true", help="Verbose high level command stream"
282 )
283 parser.add_argument(
284 "--verbose-register-command-stream", action="store_true", help="Verbose register command stream"
285 )
286 parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list")
287 parser.add_argument(
288 "--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU"
289 )
290 parser.add_argument(
291 "--cache-bias-scale-tensor",
292 type=ast.literal_eval,
293 default=True,
294 choices=[True, False],
295 help="Controls the caching of the bias & scale tensors in SRAM (default: %(default)s)",
296 )
297 parser.add_argument(
298 "--cascading",
299 type=ast.literal_eval,
300 default=True,
301 choices=[True, False],
302 help="Controls the packing of multiple passes into a cascade (default: %(default)s)",
303 )
304 parser.add_argument(
305 "--force-block-config", type=str, default="", help="Force a specific block configuration WxHxC"
306 )
307 parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations")
308 parser.add_argument(
309 "--accelerator-config",
310 type=str,
311 default="ethos-u55-256",
312 choices=list(architecture_features.Accelerator.member_list()),
313 help="Accelerator configuration to use (default: %(default)s)",
314 )
315 parser.add_argument(
316 "--system-config",
317 type=str,
318 default=architecture_features.ArchitectureFeatures.DEFAULT_CONFIG,
319 help="System configuration to select from the Vela configuration file (default: %(default)s)",
320 )
321 parser.add_argument(
322 "--memory-mode",
323 type=str,
324 default=architecture_features.ArchitectureFeatures.DEFAULT_CONFIG,
325 help="Memory mode to select from the Vela configuration file (default: %(default)s)",
326 )
327 parser.add_argument(
328 "--tensor-allocator",
329 default=TensorAllocator.HillClimb,
330 type=lambda s: TensorAllocator[s],
331 choices=list(TensorAllocator),
332 help="Tensor Allocator algorithm (default: %(default)s)",
333 )
334 parser.add_argument(
335 "--show-subgraph-io-summary",
336 action="store_true",
337 help="Shows a summary of all the subgraphs and their inputs and outputs",
338 )
339 parser.add_argument(
340 "--ifm-streaming",
341 type=ast.literal_eval,
342 default=True,
343 choices=[True, False],
344 help="Controls scheduler IFM streaming search (default: %(default)s)",
345 )
346 parser.add_argument(
347 "--block-config-limit",
348 type=int,
349 default=16,
350 help="Limit block config search space, use zero for unlimited (default: %(default)s)",
351 )
352 parser.add_argument(
353 "--pareto-metric",
354 default=ParetoMetric.BwCycMem,
355 type=lambda s: ParetoMetric[s],
356 choices=list(ParetoMetric),
357 help="Controls the calculation of the pareto metric (default: %(default)s)",
358 )
359 parser.add_argument(
360 "--recursion-limit",
361 type=int,
362 default=10000,
363 help="Set the recursion depth limit, may result in RecursionError if too low (default: %(default)s)",
364 )
365 parser.add_argument(
366 "--max-block-dependency",
367 type=int,
368 default=architecture_features.ArchitectureFeatures.MAX_BLOCKDEP,
369 choices=range(0, architecture_features.ArchitectureFeatures.MAX_BLOCKDEP + 1),
370 help=(
371 "Set the maximum value that can be used for the block dependency between npu kernel operations"
372 " (default: %(default)s)"
373 ),
374 )
375 parser.add_argument(
376 "--nhcwb16-between-cascaded-passes",
377 type=ast.literal_eval,
378 default=True,
379 choices=[True, False],
380 help="Control if NHCWB16 or NHWC should be used in between cascaded passes (default: %(default)s)",
381 )
382 parser.add_argument(
383 "--weight-estimation-scaling",
384 type=float,
385 default=1.0,
386 help=("Performs an additional scaling of weight compression scale estimate (default: %(default)s)"),
387 )
388 parser.add_argument(
389 "--cpu-tensor-alignment",
390 type=int,
391 default=Tensor.AllocationQuantum,
392 help=(
393 "Controls the allocation byte alignment of cpu tensors including Ethos-U Custom"
394 " operator inputs and outputs (default: %(default)s)"
395 ),
396 )
397 args = parser.parse_args(args=args)
Louis Verhaard52078302020-11-18 13:35:06 +0100398
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100399 # Generate the supported ops report and exit
400 if args.supported_ops_report:
401 generate_supported_ops()
402 return 0
Louis Verhaard52078302020-11-18 13:35:06 +0100403
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100404 if args.network is None:
405 parser.error("the following argument is required: NETWORK")
Michael McGeagh2fa40ae2020-12-02 10:55:04 +0000406
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100407 # check all config files exist because they will be read as a group
408 if args.config is not None:
409 for filename in args.config:
410 if not os.access(filename, os.R_OK):
411 raise InputFileError(filename, "File not found or is not readable")
Tim Hall79d07d22020-04-27 18:20:16 +0100412
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100413 sys.setrecursionlimit(args.recursion_limit)
Tim Hall79d07d22020-04-27 18:20:16 +0100414
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100415 if args.force_block_config:
416 force_block_config = architecture_features.Block.from_string(args.force_block_config)
417 else:
418 force_block_config = None
Tim Hall79d07d22020-04-27 18:20:16 +0100419
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100420 if args.cpu_tensor_alignment < 16 or args.cpu_tensor_alignment & (args.cpu_tensor_alignment - 1) != 0:
421 parser.error(
422 "Invalid argument to --cpu-tensor-alignment = {} (must be greater than or equal to 16 and a power of 2)"
423 "".format(args.cpu_tensor_alignment)
424 )
Tim Hall79d07d22020-04-27 18:20:16 +0100425
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100426 if args.system_config == ArchitectureFeatures.DEFAULT_CONFIG:
427 print(f"Warning: Using {ArchitectureFeatures.DEFAULT_CONFIG} values for system configuration")
Tim Hall79d07d22020-04-27 18:20:16 +0100428
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100429 if args.memory_mode == ArchitectureFeatures.DEFAULT_CONFIG:
430 print(f"Warning: Using {ArchitectureFeatures.DEFAULT_CONFIG} values for memory mode")
Tim Hall79d07d22020-04-27 18:20:16 +0100431
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100432 if args.verbose_all:
433 for v in vars(args):
434 if v.startswith("verbose") and v != "verbose_all":
435 setattr(args, v, True)
436
437 arch = architecture_features.ArchitectureFeatures(
438 vela_config_files=args.config,
439 system_config=args.system_config,
440 memory_mode=args.memory_mode,
441 accelerator_config=args.accelerator_config,
442 override_block_config=force_block_config,
443 block_config_limit=args.block_config_limit,
444 max_blockdep=args.max_block_dependency,
445 weight_estimation_scaling=args.weight_estimation_scaling,
446 verbose_config=args.verbose_config,
447 )
448
449 compiler_options = compiler_driver.CompilerOptions(
450 verbose_graph=args.verbose_graph,
451 verbose_quantization=args.verbose_quantization,
452 verbose_packing=args.verbose_packing,
453 verbose_tensor_purpose=args.verbose_tensor_purpose,
454 verbose_tensor_format=args.verbose_tensor_format,
455 verbose_allocation=args.verbose_allocation,
456 verbose_high_level_command_stream=args.verbose_high_level_command_stream,
457 verbose_register_command_stream=args.verbose_register_command_stream,
458 verbose_operators=args.verbose_operators,
459 show_cpu_operations=args.show_cpu_operations,
460 tensor_allocator=args.tensor_allocator,
461 timing=args.timing,
462 output_dir=args.output_dir,
463 cpu_tensor_alignment=args.cpu_tensor_alignment,
464 )
465
466 scheduler_options = scheduler.SchedulerOptions(
467 use_cascading=args.cascading,
468 verbose_schedule=args.verbose_schedule,
469 verbose_pareto_frontier_schedules=args.verbose_pareto_frontier_schedules,
470 use_ifm_streaming=args.ifm_streaming,
471 pareto_metric=args.pareto_metric,
472 use_nhcwb16_between_cascaded_passes=args.nhcwb16_between_cascaded_passes,
473 cache_bias_scale_tensor=args.cache_bias_scale_tensor,
474 )
475
476 model_reader_options = model_reader.ModelReaderOptions()
477
478 nng = process(
479 args.network, args.enable_debug_db, arch, model_reader_options, compiler_options, scheduler_options
480 )
481
482 if args.show_subgraph_io_summary:
483 print_subgraph_io_summary(nng)
484
485 return 0
486 except VelaError as e:
487 print(e.data)
488 return 1