blob: f552b21ecef952ec8eb4edf7ba813dec963da3b7 [file] [log] [blame]
erik.andersson@arm.comad45f792021-02-03 10:20:16 +01001# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved.
Tim Hall79d07d22020-04-27 18:20:16 +01002#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Tim Hall79d07d22020-04-27 18:20:16 +010016# Description:
17# Main entry point for the Vela compiler.
18#
19# Provides command line interface, options parsing, and network loading. Before calling the compiler driver.
Diego Russoe8a10452020-04-21 17:39:10 +010020import argparse
Tim Hall1bd531d2020-11-01 20:59:36 +000021import os
Diego Russoea6111a2020-04-14 18:41:58 +010022import sys
Tim Hall79d07d22020-04-27 18:20:16 +010023import time
Tim Hall79d07d22020-04-27 18:20:16 +010024
erik.andersson@arm.comad45f792021-02-03 10:20:16 +010025import flatbuffers
26
Tim Hall79d07d22020-04-27 18:20:16 +010027from . import architecture_features
Diego Russoe8a10452020-04-21 17:39:10 +010028from . import compiler_driver
29from . import model_reader
30from . import scheduler
Tim Hall79d07d22020-04-27 18:20:16 +010031from . import stats_writer
32from . import tflite_writer
Tim Hall79d07d22020-04-27 18:20:16 +010033from ._version import __version__
Louis Verhaard11831ce2020-11-18 18:53:24 +010034from .api import API_VERSION
Tim Halle6ccd872020-11-09 16:46:37 +000035from .debug_database import DebugDatabase
Louis Verhaard7db78962020-05-25 15:05:26 +020036from .errors import InputFileError
Henrik G Olssonea9b23c2021-03-23 17:34:49 +010037from .errors import VelaError
Diego Russoe8a10452020-04-21 17:39:10 +010038from .nn_graph import PassPlacement
39from .nn_graph import TensorAllocator
Michael McGeagh837dc1b2020-11-10 12:38:25 +000040from .supported_operators import SupportedOperators
Diego Russoea6111a2020-04-14 18:41:58 +010041from .tensor import MemArea
Jacob Bohlin0628a8c2020-08-28 13:25:14 +020042from .tensor import Tensor
erik.andersson@arm.comad45f792021-02-03 10:20:16 +010043from .tflite.Model import Model
Michael McGeagh837dc1b2020-11-10 12:38:25 +000044from .tflite_mapping import builtin_operator_map
45from .tflite_mapping import builtin_type_name
Louis Verhaard52078302020-11-18 13:35:06 +010046from ethosu.vela.architecture_features import ArchitectureFeatures
Tim Hall79d07d22020-04-27 18:20:16 +010047
48
Tim Halle6ccd872020-11-09 16:46:37 +000049def process(input_name, enable_debug_db, arch, model_reader_options, compiler_options, scheduler_options):
Tim Hall79d07d22020-04-27 18:20:16 +010050 if compiler_options.timing:
51 start = time.time()
52
Tim Halle6ccd872020-11-09 16:46:37 +000053 os.makedirs(compiler_options.output_dir, exist_ok=True)
54 output_basename = os.path.join(compiler_options.output_dir, os.path.splitext(os.path.basename(input_name))[0])
55 DebugDatabase.show_warnings = enable_debug_db
56
57 nng = model_reader.read_model(input_name, model_reader_options)
Tim Hall79d07d22020-04-27 18:20:16 +010058
59 if not nng:
Michael McGeagh7a6f8432020-12-02 15:29:22 +000060 raise InputFileError(input_name, "Input file could not be read")
Tim Hall79d07d22020-04-27 18:20:16 +010061
62 if compiler_options.verbose_operators:
63 nng.print_operators()
64
65 if compiler_options.timing:
66 stop = time.time()
67 print("Model reading took %f s" % (stop - start))
68 start = time.time()
69
70 compiler_driver.compiler_driver(nng, arch, compiler_options, scheduler_options)
71
Tim Halle6ccd872020-11-09 16:46:37 +000072 summary_csv_file = "{0}_summary_{1}.csv".format(output_basename, arch.system_config)
Tim Hall79d07d22020-04-27 18:20:16 +010073 stats_writer.write_summary_metrics_csv(nng, summary_csv_file, arch)
74
Fredrik Svedbergf5c07c42021-04-23 14:36:42 +020075 stats_writer.print_performance_metrics(
76 nng,
77 show_cpu_operations=compiler_options.show_cpu_operations,
78 verbose_weights=compiler_options.verbose_weights,
79 arch=arch,
80 )
Tim Hall79d07d22020-04-27 18:20:16 +010081
Tim Halle6ccd872020-11-09 16:46:37 +000082 output_filename = output_basename + "_vela.tflite"
83 if input_name.endswith(".tflite"):
84 tflite_writer.write_tflite(nng, output_filename)
85
86 if enable_debug_db:
erik.andersson@arm.comad45f792021-02-03 10:20:16 +010087 file_offsets = calculate_operator_file_offsets(output_filename)
88 for idx, offset in enumerate(sorted(file_offsets)):
89 sg = find_subgraph_with_command_stream_order(nng, idx)
90 if sg is not None:
91 DebugDatabase.set_stream_offset(sg, offset)
Tim Halle6ccd872020-11-09 16:46:37 +000092 debug_filename = output_basename + "_debug.xml"
93 DebugDatabase.write(debug_filename, input_name, output_filename)
Tim Hall79d07d22020-04-27 18:20:16 +010094
95 if compiler_options.timing:
96 stop = time.time()
97 print("Compiler driver took %f s" % (stop - start))
98
99 return nng
100
101
erik.andersson@arm.comad45f792021-02-03 10:20:16 +0100102def find_subgraph_with_command_stream_order(nng, idx):
103 for sg in nng.subgraphs:
104 if sg.generated_stream_id == idx:
105 return sg
106 return None
107
108
109def calculate_operator_file_offsets(name: str):
110 # Read the vela optimized tflite file
111 with open(name, "rb") as f:
112 buf = bytearray(f.read())
113 # Calculate the file offsets for each custom operator
114 file_offsets = []
115 model = Model.GetRootAsModel(buf, 0)
116 for idx in range(model.SubgraphsLength()): # However only one subgraph is supported as of now
117 sg = model.Subgraphs(idx)
118 for idx in range(sg.OperatorsLength()):
119 operator = sg.Operators(idx)
120 if model.OperatorCodes(operator.OpcodeIndex()).CustomCode() is not None:
121 tensor_idx = operator.Inputs(0)
122 tensor = sg.Tensors(tensor_idx)
123 buffer = model.Buffers(tensor.Buffer())
124 offset = flatbuffers.number_types.UOffsetTFlags.py_type(buffer._tab.Offset(4))
125 file_offsets.append(buffer._tab.Vector(offset))
126 return file_offsets
127
128
Tim Hall79d07d22020-04-27 18:20:16 +0100129def print_subgraph_io_summary(nng):
130 """Print a summary of all the input and output tensor sizes for all subgraphs.
131 Also displays the total tensor size and the memory used area for sram.
132 """
133
134 print("Subgraph IO Summary")
135 print("-------------------")
136 print("NNG: {0}".format(nng.name))
137 max_sg_size = 0
138 for sg in reversed(nng.subgraphs):
139 print(" Subgraph: {0} = {1}".format(sg.name, sg.placement))
140 sg_size = 0
141
142 if sg.placement == PassPlacement.Npu:
143 for tens in sg.input_tensors + [sg.scratch_tensor] + sg.output_tensors:
144 if tens in sg.input_tensors:
145 tens_dir = "In"
146 elif tens in sg.output_tensors:
147 tens_dir = "Out"
148 else:
149 tens_dir = "In/Out"
150
151 size = tens.elements() * tens.element_size() / 1024.0
152 sg_size = sg_size + size
153 print(" Tensor [{0}]: {1} = {2} KiB".format(tens_dir, tens.name, size))
154
155 print(" Total Size = {0} KiB".format(sg_size))
156 print(" SRAM Memory Used = {0} KiB".format(sg.memory_used.get(MemArea.Sram, 0) / 1024.0))
157 max_sg_size = max(sg_size, max_sg_size)
158
159 print(" Maximum Subgraph Size = {0} KiB".format(max_sg_size))
160
161
Michael McGeagh837dc1b2020-11-10 12:38:25 +0000162def generate_supported_ops():
163 lines = [
164 "# Supported Ops",
165 "",
166 "This file was automatically generated by Vela using the `--supported-ops-report` parameter. ",
167 f"Vela version: `{__version__}`",
168 "",
Michael McGeagh54a61112020-11-24 14:58:51 +0000169 "This file complies with",
170 "[**Gitiles Markdown syntax**](https://github.com/google/gitiles/blob/master/Documentation/markdown.md)",
Michael McGeagh837dc1b2020-11-10 12:38:25 +0000171 "",
172 "## Summary Table",
173 "",
174 "The table below contains TFLite operators that can be placed on the Ethos-U NPU. ",
175 "If the constraints are not met, then that operator will be scheduled on the CPU instead. ",
176 "For any other TFLite operator not listed, will be left untouched and scheduled on the CPU. ",
177 "Please check the supported operator list for your chosen runtime for further information.",
178 "",
179 "| Operator | Constraints |",
Michael McGeagh54a61112020-11-24 14:58:51 +0000180 "| --- | --- |",
Michael McGeagh837dc1b2020-11-10 12:38:25 +0000181 ]
182 supported = SupportedOperators()
183 op_constraint_links = []
184 op_list = sorted(((op, builtin_type_name(op)) for op in builtin_operator_map), key=lambda x: x[1])
185 for op, name in op_list:
186 internal_op = builtin_operator_map[op][0]
187 if internal_op in SupportedOperators.supported_operators:
188 links = "[Generic](#generic-constraints)"
189 if internal_op in supported.specific_constraints:
190 links += f", [Specific](#{name.lower()}-constraints)"
191 op_constraint_links.append((internal_op, name))
192 lines.append(f"| {name} | {links} |")
193 lines += [
194 "",
195 "## Generic Constraints",
196 "",
197 "This is a list of constraints that all NPU operators must satisfy in order to be scheduled on the NPU.",
198 "",
199 ]
200 for constraint in supported.generic_constraints:
201 # Markdown needs two spaces at the end of a line to render it as a separate line
202 reason = constraint.__doc__.replace("\n", " \n")
203 lines.append(f"- {reason}")
204 for op, name in op_constraint_links:
205 lines += [
206 "",
207 f"## {name} Constraints",
208 "",
209 f"This is a list of constraints that the {name} operator must satisfy in order to be scheduled on the NPU.",
210 "",
211 ]
212 for constraint in supported.specific_constraints[op]:
213 # Markdown needs two spaces at the end of a line to render it as a separate line
214 reason = constraint.__doc__.replace("\n", " \n")
215 lines.append(f"- {reason}")
216
217 # Note. this will generate the file in the CWD
218 filepath = os.path.join(os.getcwd(), "SUPPORTED_OPS.md")
219 with open(filepath, "wt") as md:
220 md.writelines(line + "\n" for line in lines)
221 print(f"Report file: {filepath}")
222
223
Tim Hall79d07d22020-04-27 18:20:16 +0100224def main(args=None):
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100225 try:
226 if args is None:
227 args = sys.argv[1:]
Tim Hall79d07d22020-04-27 18:20:16 +0100228
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100229 parser = argparse.ArgumentParser(prog="vela", description="Neural network model compiler for Arm Ethos-U NPUs")
230 parser.add_argument("--version", action="version", version=__version__)
231 parser.add_argument(
232 "--api-version", action="version", version=API_VERSION, help="Displays the version of the external API."
233 )
234 parser.add_argument(
235 "--supported-ops-report",
236 action="store_true",
237 help="Generate the SUPPORTED_OPS.md file in the current working directory and exit",
Tim Hallb9b515c2020-11-01 21:27:19 +0000238 )
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200239
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100240 # set network nargs to be optional to allow the support-ops-report CLI option to be used standalone
241 parser.add_argument(
242 "network",
243 metavar="NETWORK",
244 type=str,
245 default=None,
246 nargs="?",
247 help="Filename of the input TensorFlow Lite for Microcontrollers network",
248 )
249 parser.add_argument(
250 "--output-dir", type=str, default="output", help="Output directory to write files to (default: %(default)s)"
251 )
252 parser.add_argument(
253 "--enable-debug-db",
254 action="store_true",
255 default=None,
256 help="Enables the calculation and writing of a network debug database to output directory",
257 )
258 parser.add_argument(
259 "--config",
260 type=str,
261 action="append",
262 help="Vela configuration file(s) in Python ConfigParser .ini file format",
263 )
264 parser.add_argument("--verbose-all", action="store_true", help="Enable all verbose options")
265 parser.add_argument(
266 "--verbose-config", action="store_true", help="Verbose system configuration and memory mode"
267 )
268 parser.add_argument("--verbose-graph", action="store_true", help="Verbose graph rewriter")
269 parser.add_argument("--verbose-quantization", action="store_true", help="Verbose quantization")
270 parser.add_argument("--verbose-packing", action="store_true", help="Verbose pass packing")
271 parser.add_argument("--verbose-tensor-purpose", action="store_true", help="Verbose tensor purpose")
272 parser.add_argument("--verbose-tensor-format", action="store_true", help="Verbose tensor format")
273 parser.add_argument("--verbose-schedule", action="store_true", help="Verbose schedule")
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100274 parser.add_argument("--verbose-allocation", action="store_true", help="Verbose tensor allocation")
275 parser.add_argument(
276 "--verbose-high-level-command-stream", action="store_true", help="Verbose high level command stream"
277 )
278 parser.add_argument(
279 "--verbose-register-command-stream", action="store_true", help="Verbose register command stream"
280 )
281 parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list")
Fredrik Svedbergf5c07c42021-04-23 14:36:42 +0200282 parser.add_argument("--verbose-weights", action="store_true", help="Verbose weights information")
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100283 parser.add_argument(
284 "--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU"
285 )
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100286 parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations")
287 parser.add_argument(
288 "--accelerator-config",
289 type=str,
290 default="ethos-u55-256",
291 choices=list(architecture_features.Accelerator.member_list()),
292 help="Accelerator configuration to use (default: %(default)s)",
293 )
294 parser.add_argument(
295 "--system-config",
296 type=str,
297 default=architecture_features.ArchitectureFeatures.DEFAULT_CONFIG,
298 help="System configuration to select from the Vela configuration file (default: %(default)s)",
299 )
300 parser.add_argument(
301 "--memory-mode",
302 type=str,
303 default=architecture_features.ArchitectureFeatures.DEFAULT_CONFIG,
304 help="Memory mode to select from the Vela configuration file (default: %(default)s)",
305 )
306 parser.add_argument(
307 "--tensor-allocator",
308 default=TensorAllocator.HillClimb,
309 type=lambda s: TensorAllocator[s],
310 choices=list(TensorAllocator),
311 help="Tensor Allocator algorithm (default: %(default)s)",
312 )
313 parser.add_argument(
314 "--show-subgraph-io-summary",
315 action="store_true",
316 help="Shows a summary of all the subgraphs and their inputs and outputs",
317 )
318 parser.add_argument(
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100319 "--max-block-dependency",
320 type=int,
321 default=architecture_features.ArchitectureFeatures.MAX_BLOCKDEP,
322 choices=range(0, architecture_features.ArchitectureFeatures.MAX_BLOCKDEP + 1),
323 help=(
324 "Set the maximum value that can be used for the block dependency between npu kernel operations"
325 " (default: %(default)s)"
326 ),
327 )
328 parser.add_argument(
Tim Halld8339a72021-05-27 18:49:40 +0100329 "--optimise",
330 type=lambda s: scheduler.OptimizationStrategy[s],
331 default=scheduler.OptimizationStrategy.Performance,
332 choices=list(scheduler.OptimizationStrategy),
333 help=(
334 "Set the optimisation strategy. The Size strategy results in minimal SRAM usage (does not use"
335 " arena-cache-size). The Performance strategy results in maximal performance (uses the arena-cache-size"
336 " if specified) (default: %(default)s)"
337 ),
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100338 )
339 parser.add_argument(
Tim Halld8339a72021-05-27 18:49:40 +0100340 "--arena-cache-size",
341 type=int,
342 help=(
343 "Set the size of the arena cache memory area, in bytes. If specified, this option overrides the memory"
344 " mode attribute with the same name in a Vela configuration file"
345 ),
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100346 )
347 parser.add_argument(
348 "--cpu-tensor-alignment",
349 type=int,
350 default=Tensor.AllocationQuantum,
351 help=(
352 "Controls the allocation byte alignment of cpu tensors including Ethos-U Custom"
353 " operator inputs and outputs (default: %(default)s)"
354 ),
355 )
356 args = parser.parse_args(args=args)
Louis Verhaard52078302020-11-18 13:35:06 +0100357
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100358 # Generate the supported ops report and exit
359 if args.supported_ops_report:
360 generate_supported_ops()
361 return 0
Louis Verhaard52078302020-11-18 13:35:06 +0100362
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100363 if args.network is None:
364 parser.error("the following argument is required: NETWORK")
Michael McGeagh2fa40ae2020-12-02 10:55:04 +0000365
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100366 # check all config files exist because they will be read as a group
367 if args.config is not None:
368 for filename in args.config:
369 if not os.access(filename, os.R_OK):
370 raise InputFileError(filename, "File not found or is not readable")
Tim Hall79d07d22020-04-27 18:20:16 +0100371
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100372 if args.cpu_tensor_alignment < 16 or args.cpu_tensor_alignment & (args.cpu_tensor_alignment - 1) != 0:
373 parser.error(
374 "Invalid argument to --cpu-tensor-alignment = {} (must be greater than or equal to 16 and a power of 2)"
375 "".format(args.cpu_tensor_alignment)
376 )
Tim Hall79d07d22020-04-27 18:20:16 +0100377
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100378 if args.system_config == ArchitectureFeatures.DEFAULT_CONFIG:
379 print(f"Warning: Using {ArchitectureFeatures.DEFAULT_CONFIG} values for system configuration")
Tim Hall79d07d22020-04-27 18:20:16 +0100380
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100381 if args.memory_mode == ArchitectureFeatures.DEFAULT_CONFIG:
382 print(f"Warning: Using {ArchitectureFeatures.DEFAULT_CONFIG} values for memory mode")
Tim Hall79d07d22020-04-27 18:20:16 +0100383
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100384 if args.verbose_all:
385 for v in vars(args):
386 if v.startswith("verbose") and v != "verbose_all":
387 setattr(args, v, True)
388
389 arch = architecture_features.ArchitectureFeatures(
390 vela_config_files=args.config,
391 system_config=args.system_config,
392 memory_mode=args.memory_mode,
393 accelerator_config=args.accelerator_config,
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100394 max_blockdep=args.max_block_dependency,
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100395 verbose_config=args.verbose_config,
Tim Halld8339a72021-05-27 18:49:40 +0100396 arena_cache_size=args.arena_cache_size,
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100397 )
398
399 compiler_options = compiler_driver.CompilerOptions(
400 verbose_graph=args.verbose_graph,
401 verbose_quantization=args.verbose_quantization,
402 verbose_packing=args.verbose_packing,
403 verbose_tensor_purpose=args.verbose_tensor_purpose,
404 verbose_tensor_format=args.verbose_tensor_format,
405 verbose_allocation=args.verbose_allocation,
406 verbose_high_level_command_stream=args.verbose_high_level_command_stream,
407 verbose_register_command_stream=args.verbose_register_command_stream,
408 verbose_operators=args.verbose_operators,
Fredrik Svedbergf5c07c42021-04-23 14:36:42 +0200409 verbose_weights=args.verbose_weights,
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100410 show_cpu_operations=args.show_cpu_operations,
411 tensor_allocator=args.tensor_allocator,
412 timing=args.timing,
413 output_dir=args.output_dir,
414 cpu_tensor_alignment=args.cpu_tensor_alignment,
415 )
416
417 scheduler_options = scheduler.SchedulerOptions(
Tim Halld8339a72021-05-27 18:49:40 +0100418 optimization_strategy=args.optimise,
419 sram_target=arch.arena_cache_size,
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100420 verbose_schedule=args.verbose_schedule,
Henrik G Olssonea9b23c2021-03-23 17:34:49 +0100421 )
422
423 model_reader_options = model_reader.ModelReaderOptions()
424
425 nng = process(
426 args.network, args.enable_debug_db, arch, model_reader_options, compiler_options, scheduler_options
427 )
428
429 if args.show_subgraph_io_summary:
430 print_subgraph_io_summary(nng)
431
432 return 0
433 except VelaError as e:
434 print(e.data)
435 return 1