blob: 6835607ae634fbfebc9287e28ecd71e1a2734e74 [file] [log] [blame]
Tim Hall79d07d22020-04-27 18:20:16 +01001# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Tim Hall79d07d22020-04-27 18:20:16 +010016# Description:
17# Main entry point for the Vela compiler.
18#
19# Provides command line interface, options parsing, and network loading. Before calling the compiler driver.
Diego Russoe8a10452020-04-21 17:39:10 +010020import argparse
21import ast
22import configparser
Diego Russoea6111a2020-04-14 18:41:58 +010023import os.path
24import sys
Tim Hall79d07d22020-04-27 18:20:16 +010025import time
Tim Hall79d07d22020-04-27 18:20:16 +010026
27from . import architecture_features
Diego Russoe8a10452020-04-21 17:39:10 +010028from . import compiler_driver
29from . import model_reader
30from . import scheduler
Tim Hall79d07d22020-04-27 18:20:16 +010031from . import stats_writer
32from . import tflite_writer
Tim Hall79d07d22020-04-27 18:20:16 +010033from ._version import __version__
Patrik Gustavssonc8a22f12020-11-18 17:05:50 +010034from .api import api_version
Tim Halle6ccd872020-11-09 16:46:37 +000035from .debug_database import DebugDatabase
Louis Verhaard7db78962020-05-25 15:05:26 +020036from .errors import InputFileError
Diego Russoe8a10452020-04-21 17:39:10 +010037from .nn_graph import PassPlacement
38from .nn_graph import TensorAllocator
Tim Hall79d07d22020-04-27 18:20:16 +010039from .scheduler import ParetoMetric
Michael McGeagh837dc1b2020-11-10 12:38:25 +000040from .supported_operators import SupportedOperators
Diego Russoea6111a2020-04-14 18:41:58 +010041from .tensor import MemArea
Jacob Bohlin0628a8c2020-08-28 13:25:14 +020042from .tensor import Tensor
Michael McGeagh837dc1b2020-11-10 12:38:25 +000043from .tflite_mapping import builtin_operator_map
44from .tflite_mapping import builtin_type_name
Tim Hall79d07d22020-04-27 18:20:16 +010045
46
Tim Halle6ccd872020-11-09 16:46:37 +000047def process(input_name, enable_debug_db, arch, model_reader_options, compiler_options, scheduler_options):
Tim Hall79d07d22020-04-27 18:20:16 +010048 if compiler_options.timing:
49 start = time.time()
50
Tim Halle6ccd872020-11-09 16:46:37 +000051 os.makedirs(compiler_options.output_dir, exist_ok=True)
52 output_basename = os.path.join(compiler_options.output_dir, os.path.splitext(os.path.basename(input_name))[0])
53 DebugDatabase.show_warnings = enable_debug_db
54
55 nng = model_reader.read_model(input_name, model_reader_options)
Tim Hall79d07d22020-04-27 18:20:16 +010056
57 if not nng:
Tim Halle6ccd872020-11-09 16:46:37 +000058 raise InputFileError(input_name, "input file could not be read")
Tim Hall79d07d22020-04-27 18:20:16 +010059
60 if compiler_options.verbose_operators:
61 nng.print_operators()
62
63 if compiler_options.timing:
64 stop = time.time()
65 print("Model reading took %f s" % (stop - start))
66 start = time.time()
67
68 compiler_driver.compiler_driver(nng, arch, compiler_options, scheduler_options)
69
Tim Halle6ccd872020-11-09 16:46:37 +000070 passes_csv_file = "{0}_pass-breakdown_{1}.csv".format(output_basename, arch.system_config)
Tim Hall79d07d22020-04-27 18:20:16 +010071 stats_writer.write_pass_metrics_csv(nng, passes_csv_file)
72
Tim Halle6ccd872020-11-09 16:46:37 +000073 summary_csv_file = "{0}_summary_{1}.csv".format(output_basename, arch.system_config)
Tim Hall79d07d22020-04-27 18:20:16 +010074 stats_writer.write_summary_metrics_csv(nng, summary_csv_file, arch)
75
76 stats_writer.print_performance_metrics(nng, show_cpu_operations=compiler_options.show_cpu_operations, arch=arch)
77
Tim Halle6ccd872020-11-09 16:46:37 +000078 output_filename = output_basename + "_vela.tflite"
79 if input_name.endswith(".tflite"):
80 tflite_writer.write_tflite(nng, output_filename)
81
82 if enable_debug_db:
83 debug_filename = output_basename + "_debug.xml"
84 DebugDatabase.write(debug_filename, input_name, output_filename)
Tim Hall79d07d22020-04-27 18:20:16 +010085
86 if compiler_options.timing:
87 stop = time.time()
88 print("Compiler driver took %f s" % (stop - start))
89
90 return nng
91
92
93def print_subgraph_io_summary(nng):
94 """Print a summary of all the input and output tensor sizes for all subgraphs.
95 Also displays the total tensor size and the memory used area for sram.
96 """
97
98 print("Subgraph IO Summary")
99 print("-------------------")
100 print("NNG: {0}".format(nng.name))
101 max_sg_size = 0
102 for sg in reversed(nng.subgraphs):
103 print(" Subgraph: {0} = {1}".format(sg.name, sg.placement))
104 sg_size = 0
105
106 if sg.placement == PassPlacement.Npu:
107 for tens in sg.input_tensors + [sg.scratch_tensor] + sg.output_tensors:
108 if tens in sg.input_tensors:
109 tens_dir = "In"
110 elif tens in sg.output_tensors:
111 tens_dir = "Out"
112 else:
113 tens_dir = "In/Out"
114
115 size = tens.elements() * tens.element_size() / 1024.0
116 sg_size = sg_size + size
117 print(" Tensor [{0}]: {1} = {2} KiB".format(tens_dir, tens.name, size))
118
119 print(" Total Size = {0} KiB".format(sg_size))
120 print(" SRAM Memory Used = {0} KiB".format(sg.memory_used.get(MemArea.Sram, 0) / 1024.0))
121 max_sg_size = max(sg_size, max_sg_size)
122
123 print(" Maximum Subgraph Size = {0} KiB".format(max_sg_size))
124
125
Michael McGeagh837dc1b2020-11-10 12:38:25 +0000126def generate_supported_ops():
127 lines = [
128 "# Supported Ops",
129 "",
130 "This file was automatically generated by Vela using the `--supported-ops-report` parameter. ",
131 f"Vela version: `{__version__}`",
132 "",
133 "This file complies with [**CommonMark.**](https://commonmark.org)",
134 "",
135 "## Summary Table",
136 "",
137 "The table below contains TFLite operators that can be placed on the Ethos-U NPU. ",
138 "If the constraints are not met, then that operator will be scheduled on the CPU instead. ",
139 "For any other TFLite operator not listed, will be left untouched and scheduled on the CPU. ",
140 "Please check the supported operator list for your chosen runtime for further information.",
141 "",
142 "| Operator | Constraints |",
143 "| - | - |",
144 ]
145 supported = SupportedOperators()
146 op_constraint_links = []
147 op_list = sorted(((op, builtin_type_name(op)) for op in builtin_operator_map), key=lambda x: x[1])
148 for op, name in op_list:
149 internal_op = builtin_operator_map[op][0]
150 if internal_op in SupportedOperators.supported_operators:
151 links = "[Generic](#generic-constraints)"
152 if internal_op in supported.specific_constraints:
153 links += f", [Specific](#{name.lower()}-constraints)"
154 op_constraint_links.append((internal_op, name))
155 lines.append(f"| {name} | {links} |")
156 lines += [
157 "",
158 "## Generic Constraints",
159 "",
160 "This is a list of constraints that all NPU operators must satisfy in order to be scheduled on the NPU.",
161 "",
162 ]
163 for constraint in supported.generic_constraints:
164 # Markdown needs two spaces at the end of a line to render it as a separate line
165 reason = constraint.__doc__.replace("\n", " \n")
166 lines.append(f"- {reason}")
167 for op, name in op_constraint_links:
168 lines += [
169 "",
170 f"## {name} Constraints",
171 "",
172 f"This is a list of constraints that the {name} operator must satisfy in order to be scheduled on the NPU.",
173 "",
174 ]
175 for constraint in supported.specific_constraints[op]:
176 # Markdown needs two spaces at the end of a line to render it as a separate line
177 reason = constraint.__doc__.replace("\n", " \n")
178 lines.append(f"- {reason}")
179
180 # Note. this will generate the file in the CWD
181 filepath = os.path.join(os.getcwd(), "SUPPORTED_OPS.md")
182 with open(filepath, "wt") as md:
183 md.writelines(line + "\n" for line in lines)
184 print(f"Report file: {filepath}")
185
186
Tim Hall79d07d22020-04-27 18:20:16 +0100187def main(args=None):
188 if args is None:
189 args = sys.argv[1:]
190
Tim Hallc8a73862020-10-27 12:43:14 +0000191 parser = argparse.ArgumentParser(prog="vela", description="Neural network model compiler for Ethos-U")
Michael McGeagh837dc1b2020-11-10 12:38:25 +0000192 parser.add_argument("--version", action="version", version=__version__)
Tim Hall79d07d22020-04-27 18:20:16 +0100193 parser.add_argument(
Patrik Gustavssonc8a22f12020-11-18 17:05:50 +0100194 "--api-version", action="version", version=api_version, help="Displays the version of the external API."
195 )
196 parser.add_argument(
Michael McGeagh837dc1b2020-11-10 12:38:25 +0000197 "--supported-ops-report",
198 action="store_true",
199 help="Generate the SUPPORTED_OPS.md file in the current working directory and exits.",
Tim Hall79d07d22020-04-27 18:20:16 +0100200 )
201
Michael McGeagh837dc1b2020-11-10 12:38:25 +0000202 parser.add_argument(
203 "network", metavar="NETWORK", type=str, default=None, nargs="?", help="Filename of network to process"
204 )
205
Tim Hall79d07d22020-04-27 18:20:16 +0100206 parser.add_argument(
207 "--output-dir", type=str, default="output", help="Output directory to write files to (default: %(default)s)"
208 )
Tim Halle6ccd872020-11-09 16:46:37 +0000209 parser.add_argument(
210 "--enable-debug-db",
211 action="store_true",
212 default=None,
213 help="Enables the calculation and writing of a network debug database to output directory",
214 )
215
Tim Hall79d07d22020-04-27 18:20:16 +0100216 parser.add_argument("--config", type=str, help="Location of vela configuration file")
Tim Hall79d07d22020-04-27 18:20:16 +0100217
218 parser.add_argument("--verbose-graph", action="store_true", help="Verbose graph rewriter")
219 parser.add_argument("--verbose-quantization", action="store_true", help="Verbose quantization")
220 parser.add_argument("--verbose-packing", action="store_true", help="Verbose pass packing")
221 parser.add_argument("--verbose-tensor-purpose", action="store_true", help="Verbose tensor purpose")
222 parser.add_argument("--verbose-tensor-format", action="store_true", help="Verbose tensor format")
223 parser.add_argument("--verbose-schedule", action="store_true", help="Verbose schedule")
224 parser.add_argument(
225 "--verbose-pareto-frontier-schedules",
226 action="store_true",
227 help="Show all schedules along the pareto frontier of optimisation criteria",
228 )
229 parser.add_argument("--verbose-allocation", action="store_true", help="Verbose tensor allocation")
230 parser.add_argument(
231 "--verbose-high-level-command-stream", action="store_true", help="Verbose high level command stream"
232 )
233 parser.add_argument(
234 "--verbose-register-command-stream", action="store_true", help="Verbose register command stream"
235 )
236 parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list")
237
238 parser.add_argument(
239 "--show-minimum-possible-allocation", action="store_true", help="Show the minimum possible allocation"
240 )
241 parser.add_argument(
242 "--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU"
243 )
244 parser.add_argument(
Andreas Nevalainen897cc142020-10-28 15:42:08 +0100245 "--keep-scale-placement", action="store_true", help="Keep scale tensors memory placement during scheduling"
246 )
247 parser.add_argument(
Tim Hall79d07d22020-04-27 18:20:16 +0100248 "--cascading",
249 type=ast.literal_eval,
250 default=True,
251 choices=[True, False],
252 help="Controls the packing of multiple passes into a cascade (default: %(default)s)",
253 )
Tim Hall79d07d22020-04-27 18:20:16 +0100254 parser.add_argument("--force-block-config", type=str, default="", help="Force a specific block configuration HxWxC")
Tim Hall79d07d22020-04-27 18:20:16 +0100255 parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations")
256 parser.add_argument(
257 "--accelerator-config",
258 type=str,
259 default="ethos-u55-256",
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100260 choices=list(architecture_features.Accelerator.member_list()),
Tim Hall79d07d22020-04-27 18:20:16 +0100261 help="Accelerator configuration to use (default: %(default)s)",
262 )
263 parser.add_argument(
264 "--system-config",
265 type=str,
266 default="internal-default",
267 help="System configuration to use (default: %(default)s)",
268 )
269 parser.add_argument(
Tim Hall79d07d22020-04-27 18:20:16 +0100270 "--tensor-allocator",
271 default=TensorAllocator.Greedy,
272 type=lambda s: TensorAllocator[s],
273 choices=list(TensorAllocator),
274 help="Tensor Allocator algorithm (default: %(default)s)",
275 )
276 parser.add_argument(
277 "--show-subgraph-io-summary",
278 action="store_true",
279 help="Shows a summary of all the subgraphs and their inputs and outputs",
280 )
281 parser.add_argument(
282 "--ifm-streaming",
283 type=ast.literal_eval,
284 default=True,
285 choices=[True, False],
286 help="Controls scheduler IFM streaming search (default: %(default)s)",
287 )
288 parser.add_argument(
289 "--block-config-limit",
290 type=int,
291 default=16,
292 help="Limit block config search space, use zero for unlimited (default: %(default)s)",
293 )
294 parser.add_argument(
295 "--global-memory-clock-scale",
296 type=float,
297 default=1.0,
298 help=(
299 "Performs an additional scaling of the individual memory clock scales specified by the system config "
300 "(default: %(default)s)"
301 ),
302 )
303 parser.add_argument(
304 "--pareto-metric",
305 default=ParetoMetric.BwCycMem,
306 type=lambda s: ParetoMetric[s],
307 choices=list(ParetoMetric),
308 help="Controls the calculation of the pareto metric (default: %(default)s)",
309 )
310 parser.add_argument(
311 "--recursion-limit",
312 type=int,
313 default=10000,
314 help="Set the recursion depth limit, may result in RecursionError if too low (default: %(default)s)",
315 )
316 parser.add_argument(
317 "--max-block-dependency",
318 type=int,
319 default=architecture_features.ArchitectureFeatures.MAX_BLOCKDEP,
320 choices=range(0, architecture_features.ArchitectureFeatures.MAX_BLOCKDEP + 1),
321 help=(
322 "Set the maximum value that can be used for the block dependency between npu kernel operations "
323 "(default: %(default)s)"
324 ),
325 )
Charles Xu7b8823f2020-05-29 13:53:10 +0200326 parser.add_argument(
327 "--nhcwb16-between-cascaded-passes",
328 type=ast.literal_eval,
329 default=True,
330 choices=[True, False],
331 help="Control if NHCWB16 or NHWC should be used in between cascaded passes (default: %(default)s)",
332 )
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200333 parser.add_argument(
Patrik Gustavsson90831bc2020-08-24 16:26:11 +0200334 "--weight-estimation-scaling",
335 type=float,
336 default=1.0,
337 help=("Performs an additional scaling of weight compression scale estimate (default: %(default)s)"),
338 )
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200339 parser.add_argument(
340 "--allocation-alignment",
341 type=int,
342 default=Tensor.AllocationQuantum,
343 help=("Controls the allocation byte alignment of cpu tensors (default: %(default)s)"),
344 )
Tim Hall79d07d22020-04-27 18:20:16 +0100345 args = parser.parse_args(args=args)
346
347 # Read configuration file
348 config_file = args.config
349 config = None
350 if config_file is not None:
351 with open(config_file) as f:
352 config = configparser.ConfigParser()
353 config.read_file(f)
354
Michael McGeagh837dc1b2020-11-10 12:38:25 +0000355 # Generate the supported ops report and exit
356 if args.supported_ops_report:
357 generate_supported_ops()
358 return 0
359
Tim Hall79d07d22020-04-27 18:20:16 +0100360 if args.network is None:
361 parser.error("the following argument is required: NETWORK")
362
363 sys.setrecursionlimit(args.recursion_limit)
364
365 if args.force_block_config:
366 force_block_config = architecture_features.Block.from_string(args.force_block_config)
367 else:
368 force_block_config = None
369
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200370 alignment = args.allocation_alignment
371 if alignment < 16:
372 parser.error("the following argument needs to be greater or equal to 16: ALLOCATION_ALIGNMENT")
373 if alignment & (alignment - 1) != 0:
374 parser.error("the following argument needs to be a power of 2: ALLOCATION_ALIGNMENT")
375
Tim Hall79d07d22020-04-27 18:20:16 +0100376 arch = architecture_features.ArchitectureFeatures(
377 vela_config=config,
378 system_config=args.system_config,
379 accelerator_config=args.accelerator_config,
Tim Hall79d07d22020-04-27 18:20:16 +0100380 override_block_config=force_block_config,
381 block_config_limit=args.block_config_limit,
382 global_memory_clock_scale=args.global_memory_clock_scale,
383 max_blockdep=args.max_block_dependency,
Patrik Gustavsson90831bc2020-08-24 16:26:11 +0200384 weight_estimation_scaling=args.weight_estimation_scaling,
Tim Hall79d07d22020-04-27 18:20:16 +0100385 )
386
387 compiler_options = compiler_driver.CompilerOptions(
388 verbose_graph=args.verbose_graph,
389 verbose_quantization=args.verbose_quantization,
390 verbose_packing=args.verbose_packing,
391 verbose_tensor_purpose=args.verbose_tensor_purpose,
392 verbose_tensor_format=args.verbose_tensor_format,
393 verbose_allocation=args.verbose_allocation,
394 verbose_high_level_command_stream=args.verbose_high_level_command_stream,
395 verbose_register_command_stream=args.verbose_register_command_stream,
396 verbose_operators=args.verbose_operators,
397 show_minimum_possible_allocation=args.show_minimum_possible_allocation,
398 show_cpu_operations=args.show_cpu_operations,
399 tensor_allocator=args.tensor_allocator,
400 timing=args.timing,
401 output_dir=args.output_dir,
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200402 allocation_alignment=alignment,
Tim Hall79d07d22020-04-27 18:20:16 +0100403 )
404
405 scheduler_options = scheduler.SchedulerOptions(
406 use_cascading=args.cascading,
Tim Hall79d07d22020-04-27 18:20:16 +0100407 verbose_schedule=args.verbose_schedule,
408 verbose_pareto_frontier_schedules=args.verbose_pareto_frontier_schedules,
409 use_ifm_streaming=args.ifm_streaming,
410 pareto_metric=args.pareto_metric,
Charles Xu7b8823f2020-05-29 13:53:10 +0200411 use_nhcwb16_between_cascaded_passes=args.nhcwb16_between_cascaded_passes,
Andreas Nevalainen897cc142020-10-28 15:42:08 +0100412 keep_scale_placement=args.keep_scale_placement,
Tim Hall79d07d22020-04-27 18:20:16 +0100413 )
414
Tim Hall284223e2020-06-09 13:17:21 +0100415 model_reader_options = model_reader.ModelReaderOptions()
Tim Hall79d07d22020-04-27 18:20:16 +0100416
Tim Halle6ccd872020-11-09 16:46:37 +0000417 nng = process(args.network, args.enable_debug_db, arch, model_reader_options, compiler_options, scheduler_options)
Tim Hall79d07d22020-04-27 18:20:16 +0100418
419 if args.show_subgraph_io_summary:
420 print_subgraph_io_summary(nng)
421
422 return 0