Blame - ethosu/vela/vela.py - ml/ethos-u/ethos-u-vela

blob: 1908092636b2d69303753ba3a1427e67181b89fd [file] [log] [blame]

Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	1	# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
				2	#
				3	# SPDX-License-Identifier: Apache-2.0
				4	#
				5	# Licensed under the Apache License, Version 2.0 (the License); you may
				6	# not use this file except in compliance with the License.
				7	# You may obtain a copy of the License at
				8	#
				9	# www.apache.org/licenses/LICENSE-2.0
				10	#
				11	# Unless required by applicable law or agreed to in writing, software
				12	# distributed under the License is distributed on an AS IS BASIS, WITHOUT
				13	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	# See the License for the specific language governing permissions and
				15	# limitations under the License.
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	16	# Description:
				17	# Main entry point for the Vela compiler.
				18	#
				19	# Provides command line interface, options parsing, and network loading. Before calling the compiler driver.
Diego Russo	e8a1045	2020-04-21 17:39:10 +0100	[diff] [blame]	20	import argparse
				21	import ast
				22	import configparser
Diego Russo	ea6111a	2020-04-14 18:41:58 +0100	[diff] [blame]	23	import os.path
				24	import sys
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	25	import time
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	26
				27	from . import architecture_features
Diego Russo	e8a1045	2020-04-21 17:39:10 +0100	[diff] [blame]	28	from . import compiler_driver
				29	from . import model_reader
				30	from . import scheduler
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	31	from . import stats_writer
				32	from . import tflite_writer
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	33	from ._version import __version__
Louis Verhaard	7db7896	2020-05-25 15:05:26 +0200	[diff] [blame]	34	from .errors import InputFileError
Diego Russo	e8a1045	2020-04-21 17:39:10 +0100	[diff] [blame]	35	from .nn_graph import PassPlacement
				36	from .nn_graph import TensorAllocator
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	37	from .scheduler import ParetoMetric
Diego Russo	ea6111a	2020-04-14 18:41:58 +0100	[diff] [blame]	38	from .tensor import MemArea
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	39
				40
				41	def process(fname, arch, model_reader_options, compiler_options, scheduler_options):
				42	if compiler_options.timing:
				43	start = time.time()
				44
				45	nng = model_reader.read_model(fname, model_reader_options)
				46
				47	if not nng:
Louis Verhaard	7db7896	2020-05-25 15:05:26 +0200	[diff] [blame]	48	raise InputFileError(fname, "input file could not be read")
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	49
				50	if compiler_options.verbose_operators:
				51	nng.print_operators()
				52
				53	if compiler_options.timing:
				54	stop = time.time()
				55	print("Model reading took %f s" % (stop - start))
				56	start = time.time()
				57
				58	compiler_driver.compiler_driver(nng, arch, compiler_options, scheduler_options)
				59
				60	passes_csv_file = "%s/%s_pass-breakdown_%s.csv" % (compiler_options.output_dir, nng.name, arch.system_config)
				61	stats_writer.write_pass_metrics_csv(nng, passes_csv_file)
				62
				63	summary_csv_file = "%s/%s_summary_%s.csv" % (compiler_options.output_dir, nng.name, arch.system_config)
				64	stats_writer.write_summary_metrics_csv(nng, summary_csv_file, arch)
				65
				66	stats_writer.print_performance_metrics(nng, show_cpu_operations=compiler_options.show_cpu_operations, arch=arch)
				67
				68	if fname.endswith(".tflite"):
				69	tflite_writer.write_tflite(nng, "%s/%s_vela.tflite" % (compiler_options.output_dir, nng.name))
				70
				71	if compiler_options.timing:
				72	stop = time.time()
				73	print("Compiler driver took %f s" % (stop - start))
				74
				75	return nng
				76
				77
				78	def print_subgraph_io_summary(nng):
				79	"""Print a summary of all the input and output tensor sizes for all subgraphs.
				80	Also displays the total tensor size and the memory used area for sram.
				81	"""
				82
				83	print("Subgraph IO Summary")
				84	print("-------------------")
				85	print("NNG: {0}".format(nng.name))
				86	max_sg_size = 0
				87	for sg in reversed(nng.subgraphs):
				88	print(" Subgraph: {0} = {1}".format(sg.name, sg.placement))
				89	sg_size = 0
				90
				91	if sg.placement == PassPlacement.Npu:
				92	for tens in sg.input_tensors + [sg.scratch_tensor] + sg.output_tensors:
				93	if tens in sg.input_tensors:
				94	tens_dir = "In"
				95	elif tens in sg.output_tensors:
				96	tens_dir = "Out"
				97	else:
				98	tens_dir = "In/Out"
				99
				100	size = tens.elements() * tens.element_size() / 1024.0
				101	sg_size = sg_size + size
				102	print(" Tensor [{0}]: {1} = {2} KiB".format(tens_dir, tens.name, size))
				103
				104	print(" Total Size = {0} KiB".format(sg_size))
				105	print(" SRAM Memory Used = {0} KiB".format(sg.memory_used.get(MemArea.Sram, 0) / 1024.0))
				106	max_sg_size = max(sg_size, max_sg_size)
				107
				108	print(" Maximum Subgraph Size = {0} KiB".format(max_sg_size))
				109
				110
				111	def main(args=None):
				112	if args is None:
				113	args = sys.argv[1:]
				114
				115	parser = argparse.ArgumentParser(prog="vela", description="Neural network model compiler for Ethos-U55")
				116
				117	parser.add_argument(
				118	"network", metavar="NETWORK", type=str, default=None, nargs=None, help="Filename of network to process"
				119	)
				120
				121	parser.add_argument("--version", action="version", version=__version__)
				122	parser.add_argument(
				123	"--output-dir", type=str, default="output", help="Output directory to write files to (default: %(default)s)"
				124	)
				125	parser.add_argument("--config", type=str, help="Location of vela configuration file")
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	126
				127	parser.add_argument("--verbose-graph", action="store_true", help="Verbose graph rewriter")
				128	parser.add_argument("--verbose-quantization", action="store_true", help="Verbose quantization")
				129	parser.add_argument("--verbose-packing", action="store_true", help="Verbose pass packing")
				130	parser.add_argument("--verbose-tensor-purpose", action="store_true", help="Verbose tensor purpose")
				131	parser.add_argument("--verbose-tensor-format", action="store_true", help="Verbose tensor format")
				132	parser.add_argument("--verbose-schedule", action="store_true", help="Verbose schedule")
				133	parser.add_argument(
				134	"--verbose-pareto-frontier-schedules",
				135	action="store_true",
				136	help="Show all schedules along the pareto frontier of optimisation criteria",
				137	)
				138	parser.add_argument("--verbose-allocation", action="store_true", help="Verbose tensor allocation")
				139	parser.add_argument(
				140	"--verbose-high-level-command-stream", action="store_true", help="Verbose high level command stream"
				141	)
				142	parser.add_argument(
				143	"--verbose-register-command-stream", action="store_true", help="Verbose register command stream"
				144	)
				145	parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list")
				146
				147	parser.add_argument(
				148	"--show-minimum-possible-allocation", action="store_true", help="Show the minimum possible allocation"
				149	)
				150	parser.add_argument(
				151	"--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU"
				152	)
				153	parser.add_argument(
				154	"--cascading",
				155	type=ast.literal_eval,
				156	default=True,
				157	choices=[True, False],
				158	help="Controls the packing of multiple passes into a cascade (default: %(default)s)",
				159	)
				160	parser.add_argument(
				161	"--ifm-ofm-overlap",
				162	type=ast.literal_eval,
				163	default=True,
				164	choices=[True, False],
				165	help="Controls the overlapping of IFM and OFM buffers (default: %(default)s)",
				166	)
				167	parser.add_argument("--force-block-config", type=str, default="", help="Force a specific block configuration HxWxC")
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	168	parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations")
				169	parser.add_argument(
				170	"--accelerator-config",
				171	type=str,
				172	default="ethos-u55-256",
Manupa Karunaratne	d83d2e1	2020-07-20 12:05:32 +0100	[diff] [blame]	173	choices=list(architecture_features.Accelerator.member_list()),
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	174	help="Accelerator configuration to use (default: %(default)s)",
				175	)
				176	parser.add_argument(
				177	"--system-config",
				178	type=str,
				179	default="internal-default",
				180	help="System configuration to use (default: %(default)s)",
				181	)
				182	parser.add_argument(
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	183	"--permanent-storage",
				184	default=MemArea.OffChipFlash,
				185	type=lambda s: MemArea[s],
Patrik Gustavsson	6c87807	2020-08-19 12:13:30 +0200	[diff] [blame]	186	choices=list(MemArea)[3:5],
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	187	help=(
Patrik Gustavsson	6c87807	2020-08-19 12:13:30 +0200	[diff] [blame]	188	"Memory area for permanent storage, only valid for Ethos-U55. "
				189	"To store the weights and other constant data in SRAM, select 'OnChipFlash'. (default: %(default)s)"
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	190	),
				191	)
				192	parser.add_argument(
				193	"--tensor-allocator",
				194	default=TensorAllocator.Greedy,
				195	type=lambda s: TensorAllocator[s],
				196	choices=list(TensorAllocator),
				197	help="Tensor Allocator algorithm (default: %(default)s)",
				198	)
				199	parser.add_argument(
				200	"--show-subgraph-io-summary",
				201	action="store_true",
				202	help="Shows a summary of all the subgraphs and their inputs and outputs",
				203	)
				204	parser.add_argument(
				205	"--ifm-streaming",
				206	type=ast.literal_eval,
				207	default=True,
				208	choices=[True, False],
				209	help="Controls scheduler IFM streaming search (default: %(default)s)",
				210	)
				211	parser.add_argument(
				212	"--block-config-limit",
				213	type=int,
				214	default=16,
				215	help="Limit block config search space, use zero for unlimited (default: %(default)s)",
				216	)
				217	parser.add_argument(
				218	"--global-memory-clock-scale",
				219	type=float,
				220	default=1.0,
				221	help=(
				222	"Performs an additional scaling of the individual memory clock scales specified by the system config "
				223	"(default: %(default)s)"
				224	),
				225	)
				226	parser.add_argument(
				227	"--pareto-metric",
				228	default=ParetoMetric.BwCycMem,
				229	type=lambda s: ParetoMetric[s],
				230	choices=list(ParetoMetric),
				231	help="Controls the calculation of the pareto metric (default: %(default)s)",
				232	)
				233	parser.add_argument(
				234	"--recursion-limit",
				235	type=int,
				236	default=10000,
				237	help="Set the recursion depth limit, may result in RecursionError if too low (default: %(default)s)",
				238	)
				239	parser.add_argument(
				240	"--max-block-dependency",
				241	type=int,
				242	default=architecture_features.ArchitectureFeatures.MAX_BLOCKDEP,
				243	choices=range(0, architecture_features.ArchitectureFeatures.MAX_BLOCKDEP + 1),
				244	help=(
				245	"Set the maximum value that can be used for the block dependency between npu kernel operations "
				246	"(default: %(default)s)"
				247	),
				248	)
Charles Xu	7b8823f	2020-05-29 13:53:10 +0200	[diff] [blame]	249	parser.add_argument(
				250	"--nhcwb16-between-cascaded-passes",
				251	type=ast.literal_eval,
				252	default=True,
				253	choices=[True, False],
				254	help="Control if NHCWB16 or NHWC should be used in between cascaded passes (default: %(default)s)",
				255	)
Fredrik Svedberg	a0c3624	2020-06-03 15:43:31 +0200	[diff] [blame]	256	parser.add_argument(
				257	"--softmax-support",
				258	type=ast.literal_eval,
				259	default=False,
				260	choices=[True, False],
				261	help="Control if Softmax should be transformed into a set of npu operations (default: %(default)s)",
				262	)
Patrik Gustavsson	90831bc	2020-08-24 16:26:11 +0200	[diff] [blame^]	263	parser.add_argument(
				264	"--weight-estimation-scaling",
				265	type=float,
				266	default=1.0,
				267	help=("Performs an additional scaling of weight compression scale estimate (default: %(default)s)"),
				268	)
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	269	args = parser.parse_args(args=args)
				270
				271	# Read configuration file
				272	config_file = args.config
				273	config = None
				274	if config_file is not None:
				275	with open(config_file) as f:
				276	config = configparser.ConfigParser()
				277	config.read_file(f)
				278
				279	if args.network is None:
				280	parser.error("the following argument is required: NETWORK")
				281
				282	sys.setrecursionlimit(args.recursion_limit)
				283
				284	if args.force_block_config:
				285	force_block_config = architecture_features.Block.from_string(args.force_block_config)
				286	else:
				287	force_block_config = None
				288
				289	arch = architecture_features.ArchitectureFeatures(
				290	vela_config=config,
				291	system_config=args.system_config,
				292	accelerator_config=args.accelerator_config,
				293	permanent_storage=args.permanent_storage,
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	294	override_block_config=force_block_config,
				295	block_config_limit=args.block_config_limit,
				296	global_memory_clock_scale=args.global_memory_clock_scale,
				297	max_blockdep=args.max_block_dependency,
Fredrik Svedberg	a0c3624	2020-06-03 15:43:31 +0200	[diff] [blame]	298	softmax_support=args.softmax_support,
Patrik Gustavsson	90831bc	2020-08-24 16:26:11 +0200	[diff] [blame^]	299	weight_estimation_scaling=args.weight_estimation_scaling,
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	300	)
				301
				302	compiler_options = compiler_driver.CompilerOptions(
				303	verbose_graph=args.verbose_graph,
				304	verbose_quantization=args.verbose_quantization,
				305	verbose_packing=args.verbose_packing,
				306	verbose_tensor_purpose=args.verbose_tensor_purpose,
				307	verbose_tensor_format=args.verbose_tensor_format,
				308	verbose_allocation=args.verbose_allocation,
				309	verbose_high_level_command_stream=args.verbose_high_level_command_stream,
				310	verbose_register_command_stream=args.verbose_register_command_stream,
				311	verbose_operators=args.verbose_operators,
				312	show_minimum_possible_allocation=args.show_minimum_possible_allocation,
				313	show_cpu_operations=args.show_cpu_operations,
				314	tensor_allocator=args.tensor_allocator,
				315	timing=args.timing,
				316	output_dir=args.output_dir,
				317	)
				318
				319	scheduler_options = scheduler.SchedulerOptions(
				320	use_cascading=args.cascading,
				321	use_ifm_ofm_overlap=args.ifm_ofm_overlap,
				322	verbose_schedule=args.verbose_schedule,
				323	verbose_pareto_frontier_schedules=args.verbose_pareto_frontier_schedules,
				324	use_ifm_streaming=args.ifm_streaming,
				325	pareto_metric=args.pareto_metric,
Charles Xu	7b8823f	2020-05-29 13:53:10 +0200	[diff] [blame]	326	use_nhcwb16_between_cascaded_passes=args.nhcwb16_between_cascaded_passes,
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	327	)
				328
Tim Hall	284223e	2020-06-09 13:17:21 +0100	[diff] [blame]	329	model_reader_options = model_reader.ModelReaderOptions()
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	330
				331	os.makedirs(args.output_dir, exist_ok=True)
				332
				333	nng = process(args.network, arch, model_reader_options, compiler_options, scheduler_options)
				334
				335	if args.show_subgraph_io_summary:
				336	print_subgraph_io_summary(nng)
				337
				338	return 0