Blame - ethosu/vela/vela.py - ml/ethos-u/ethos-u-vela

blob: 923d8ec8537d5e02ec9c19bf56d6e690c459a0dc [file] [log] [blame]

Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	1	# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
				2	#
				3	# SPDX-License-Identifier: Apache-2.0
				4	#
				5	# Licensed under the Apache License, Version 2.0 (the License); you may
				6	# not use this file except in compliance with the License.
				7	# You may obtain a copy of the License at
				8	#
				9	# www.apache.org/licenses/LICENSE-2.0
				10	#
				11	# Unless required by applicable law or agreed to in writing, software
				12	# distributed under the License is distributed on an AS IS BASIS, WITHOUT
				13	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	# See the License for the specific language governing permissions and
				15	# limitations under the License.
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	16	# Description:
				17	# Main entry point for the Vela compiler.
				18	#
				19	# Provides command line interface, options parsing, and network loading. Before calling the compiler driver.
Diego Russo	e8a1045	2020-04-21 17:39:10 +0100	[diff] [blame]	20	import argparse
				21	import ast
				22	import configparser
Diego Russo	ea6111a	2020-04-14 18:41:58 +0100	[diff] [blame]	23	import os.path
				24	import sys
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	25	import time
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	26
				27	from . import architecture_features
Diego Russo	e8a1045	2020-04-21 17:39:10 +0100	[diff] [blame]	28	from . import compiler_driver
				29	from . import model_reader
				30	from . import scheduler
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	31	from . import stats_writer
				32	from . import tflite_writer
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	33	from ._version import __version__
Louis Verhaard	7db7896	2020-05-25 15:05:26 +0200	[diff] [blame]	34	from .errors import InputFileError
Diego Russo	e8a1045	2020-04-21 17:39:10 +0100	[diff] [blame]	35	from .nn_graph import PassPlacement
				36	from .nn_graph import TensorAllocator
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	37	from .scheduler import ParetoMetric
Diego Russo	ea6111a	2020-04-14 18:41:58 +0100	[diff] [blame]	38	from .tensor import MemArea
Jacob Bohlin	0628a8c	2020-08-28 13:25:14 +0200	[diff] [blame^]	39	from .tensor import Tensor
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	40
				41
				42	def process(fname, arch, model_reader_options, compiler_options, scheduler_options):
				43	if compiler_options.timing:
				44	start = time.time()
				45
				46	nng = model_reader.read_model(fname, model_reader_options)
				47
				48	if not nng:
Louis Verhaard	7db7896	2020-05-25 15:05:26 +0200	[diff] [blame]	49	raise InputFileError(fname, "input file could not be read")
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	50
				51	if compiler_options.verbose_operators:
				52	nng.print_operators()
				53
				54	if compiler_options.timing:
				55	stop = time.time()
				56	print("Model reading took %f s" % (stop - start))
				57	start = time.time()
				58
				59	compiler_driver.compiler_driver(nng, arch, compiler_options, scheduler_options)
				60
				61	passes_csv_file = "%s/%s_pass-breakdown_%s.csv" % (compiler_options.output_dir, nng.name, arch.system_config)
				62	stats_writer.write_pass_metrics_csv(nng, passes_csv_file)
				63
				64	summary_csv_file = "%s/%s_summary_%s.csv" % (compiler_options.output_dir, nng.name, arch.system_config)
				65	stats_writer.write_summary_metrics_csv(nng, summary_csv_file, arch)
				66
				67	stats_writer.print_performance_metrics(nng, show_cpu_operations=compiler_options.show_cpu_operations, arch=arch)
				68
				69	if fname.endswith(".tflite"):
				70	tflite_writer.write_tflite(nng, "%s/%s_vela.tflite" % (compiler_options.output_dir, nng.name))
				71
				72	if compiler_options.timing:
				73	stop = time.time()
				74	print("Compiler driver took %f s" % (stop - start))
				75
				76	return nng
				77
				78
				79	def print_subgraph_io_summary(nng):
				80	"""Print a summary of all the input and output tensor sizes for all subgraphs.
				81	Also displays the total tensor size and the memory used area for sram.
				82	"""
				83
				84	print("Subgraph IO Summary")
				85	print("-------------------")
				86	print("NNG: {0}".format(nng.name))
				87	max_sg_size = 0
				88	for sg in reversed(nng.subgraphs):
				89	print(" Subgraph: {0} = {1}".format(sg.name, sg.placement))
				90	sg_size = 0
				91
				92	if sg.placement == PassPlacement.Npu:
				93	for tens in sg.input_tensors + [sg.scratch_tensor] + sg.output_tensors:
				94	if tens in sg.input_tensors:
				95	tens_dir = "In"
				96	elif tens in sg.output_tensors:
				97	tens_dir = "Out"
				98	else:
				99	tens_dir = "In/Out"
				100
				101	size = tens.elements() * tens.element_size() / 1024.0
				102	sg_size = sg_size + size
				103	print(" Tensor [{0}]: {1} = {2} KiB".format(tens_dir, tens.name, size))
				104
				105	print(" Total Size = {0} KiB".format(sg_size))
				106	print(" SRAM Memory Used = {0} KiB".format(sg.memory_used.get(MemArea.Sram, 0) / 1024.0))
				107	max_sg_size = max(sg_size, max_sg_size)
				108
				109	print(" Maximum Subgraph Size = {0} KiB".format(max_sg_size))
				110
				111
				112	def main(args=None):
				113	if args is None:
				114	args = sys.argv[1:]
				115
				116	parser = argparse.ArgumentParser(prog="vela", description="Neural network model compiler for Ethos-U55")
				117
				118	parser.add_argument(
				119	"network", metavar="NETWORK", type=str, default=None, nargs=None, help="Filename of network to process"
				120	)
				121
				122	parser.add_argument("--version", action="version", version=__version__)
				123	parser.add_argument(
				124	"--output-dir", type=str, default="output", help="Output directory to write files to (default: %(default)s)"
				125	)
				126	parser.add_argument("--config", type=str, help="Location of vela configuration file")
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	127
				128	parser.add_argument("--verbose-graph", action="store_true", help="Verbose graph rewriter")
				129	parser.add_argument("--verbose-quantization", action="store_true", help="Verbose quantization")
				130	parser.add_argument("--verbose-packing", action="store_true", help="Verbose pass packing")
				131	parser.add_argument("--verbose-tensor-purpose", action="store_true", help="Verbose tensor purpose")
				132	parser.add_argument("--verbose-tensor-format", action="store_true", help="Verbose tensor format")
				133	parser.add_argument("--verbose-schedule", action="store_true", help="Verbose schedule")
				134	parser.add_argument(
				135	"--verbose-pareto-frontier-schedules",
				136	action="store_true",
				137	help="Show all schedules along the pareto frontier of optimisation criteria",
				138	)
				139	parser.add_argument("--verbose-allocation", action="store_true", help="Verbose tensor allocation")
				140	parser.add_argument(
				141	"--verbose-high-level-command-stream", action="store_true", help="Verbose high level command stream"
				142	)
				143	parser.add_argument(
				144	"--verbose-register-command-stream", action="store_true", help="Verbose register command stream"
				145	)
				146	parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list")
				147
				148	parser.add_argument(
				149	"--show-minimum-possible-allocation", action="store_true", help="Show the minimum possible allocation"
				150	)
				151	parser.add_argument(
				152	"--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU"
				153	)
				154	parser.add_argument(
				155	"--cascading",
				156	type=ast.literal_eval,
				157	default=True,
				158	choices=[True, False],
				159	help="Controls the packing of multiple passes into a cascade (default: %(default)s)",
				160	)
				161	parser.add_argument(
				162	"--ifm-ofm-overlap",
				163	type=ast.literal_eval,
				164	default=True,
				165	choices=[True, False],
				166	help="Controls the overlapping of IFM and OFM buffers (default: %(default)s)",
				167	)
				168	parser.add_argument("--force-block-config", type=str, default="", help="Force a specific block configuration HxWxC")
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	169	parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations")
				170	parser.add_argument(
				171	"--accelerator-config",
				172	type=str,
				173	default="ethos-u55-256",
Manupa Karunaratne	d83d2e1	2020-07-20 12:05:32 +0100	[diff] [blame]	174	choices=list(architecture_features.Accelerator.member_list()),
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	175	help="Accelerator configuration to use (default: %(default)s)",
				176	)
				177	parser.add_argument(
				178	"--system-config",
				179	type=str,
				180	default="internal-default",
				181	help="System configuration to use (default: %(default)s)",
				182	)
				183	parser.add_argument(
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	184	"--permanent-storage",
				185	default=MemArea.OffChipFlash,
				186	type=lambda s: MemArea[s],
Patrik Gustavsson	6c87807	2020-08-19 12:13:30 +0200	[diff] [blame]	187	choices=list(MemArea)[3:5],
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	188	help=(
Patrik Gustavsson	6c87807	2020-08-19 12:13:30 +0200	[diff] [blame]	189	"Memory area for permanent storage, only valid for Ethos-U55. "
				190	"To store the weights and other constant data in SRAM, select 'OnChipFlash'. (default: %(default)s)"
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	191	),
				192	)
				193	parser.add_argument(
				194	"--tensor-allocator",
				195	default=TensorAllocator.Greedy,
				196	type=lambda s: TensorAllocator[s],
				197	choices=list(TensorAllocator),
				198	help="Tensor Allocator algorithm (default: %(default)s)",
				199	)
				200	parser.add_argument(
				201	"--show-subgraph-io-summary",
				202	action="store_true",
				203	help="Shows a summary of all the subgraphs and their inputs and outputs",
				204	)
				205	parser.add_argument(
				206	"--ifm-streaming",
				207	type=ast.literal_eval,
				208	default=True,
				209	choices=[True, False],
				210	help="Controls scheduler IFM streaming search (default: %(default)s)",
				211	)
				212	parser.add_argument(
				213	"--block-config-limit",
				214	type=int,
				215	default=16,
				216	help="Limit block config search space, use zero for unlimited (default: %(default)s)",
				217	)
				218	parser.add_argument(
				219	"--global-memory-clock-scale",
				220	type=float,
				221	default=1.0,
				222	help=(
				223	"Performs an additional scaling of the individual memory clock scales specified by the system config "
				224	"(default: %(default)s)"
				225	),
				226	)
				227	parser.add_argument(
				228	"--pareto-metric",
				229	default=ParetoMetric.BwCycMem,
				230	type=lambda s: ParetoMetric[s],
				231	choices=list(ParetoMetric),
				232	help="Controls the calculation of the pareto metric (default: %(default)s)",
				233	)
				234	parser.add_argument(
				235	"--recursion-limit",
				236	type=int,
				237	default=10000,
				238	help="Set the recursion depth limit, may result in RecursionError if too low (default: %(default)s)",
				239	)
				240	parser.add_argument(
				241	"--max-block-dependency",
				242	type=int,
				243	default=architecture_features.ArchitectureFeatures.MAX_BLOCKDEP,
				244	choices=range(0, architecture_features.ArchitectureFeatures.MAX_BLOCKDEP + 1),
				245	help=(
				246	"Set the maximum value that can be used for the block dependency between npu kernel operations "
				247	"(default: %(default)s)"
				248	),
				249	)
Charles Xu	7b8823f	2020-05-29 13:53:10 +0200	[diff] [blame]	250	parser.add_argument(
				251	"--nhcwb16-between-cascaded-passes",
				252	type=ast.literal_eval,
				253	default=True,
				254	choices=[True, False],
				255	help="Control if NHCWB16 or NHWC should be used in between cascaded passes (default: %(default)s)",
				256	)
Fredrik Svedberg	a0c3624	2020-06-03 15:43:31 +0200	[diff] [blame]	257	parser.add_argument(
Patrik Gustavsson	90831bc	2020-08-24 16:26:11 +0200	[diff] [blame]	258	"--weight-estimation-scaling",
				259	type=float,
				260	default=1.0,
				261	help=("Performs an additional scaling of weight compression scale estimate (default: %(default)s)"),
				262	)
Jacob Bohlin	0628a8c	2020-08-28 13:25:14 +0200	[diff] [blame^]	263	parser.add_argument(
				264	"--allocation-alignment",
				265	type=int,
				266	default=Tensor.AllocationQuantum,
				267	help=("Controls the allocation byte alignment of cpu tensors (default: %(default)s)"),
				268	)
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	269	args = parser.parse_args(args=args)
				270
				271	# Read configuration file
				272	config_file = args.config
				273	config = None
				274	if config_file is not None:
				275	with open(config_file) as f:
				276	config = configparser.ConfigParser()
				277	config.read_file(f)
				278
				279	if args.network is None:
				280	parser.error("the following argument is required: NETWORK")
				281
				282	sys.setrecursionlimit(args.recursion_limit)
				283
				284	if args.force_block_config:
				285	force_block_config = architecture_features.Block.from_string(args.force_block_config)
				286	else:
				287	force_block_config = None
				288
Jacob Bohlin	0628a8c	2020-08-28 13:25:14 +0200	[diff] [blame^]	289	alignment = args.allocation_alignment
				290	if alignment < 16:
				291	parser.error("the following argument needs to be greater or equal to 16: ALLOCATION_ALIGNMENT")
				292	if alignment & (alignment - 1) != 0:
				293	parser.error("the following argument needs to be a power of 2: ALLOCATION_ALIGNMENT")
				294
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	295	arch = architecture_features.ArchitectureFeatures(
				296	vela_config=config,
				297	system_config=args.system_config,
				298	accelerator_config=args.accelerator_config,
				299	permanent_storage=args.permanent_storage,
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	300	override_block_config=force_block_config,
				301	block_config_limit=args.block_config_limit,
				302	global_memory_clock_scale=args.global_memory_clock_scale,
				303	max_blockdep=args.max_block_dependency,
Patrik Gustavsson	90831bc	2020-08-24 16:26:11 +0200	[diff] [blame]	304	weight_estimation_scaling=args.weight_estimation_scaling,
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	305	)
				306
				307	compiler_options = compiler_driver.CompilerOptions(
				308	verbose_graph=args.verbose_graph,
				309	verbose_quantization=args.verbose_quantization,
				310	verbose_packing=args.verbose_packing,
				311	verbose_tensor_purpose=args.verbose_tensor_purpose,
				312	verbose_tensor_format=args.verbose_tensor_format,
				313	verbose_allocation=args.verbose_allocation,
				314	verbose_high_level_command_stream=args.verbose_high_level_command_stream,
				315	verbose_register_command_stream=args.verbose_register_command_stream,
				316	verbose_operators=args.verbose_operators,
				317	show_minimum_possible_allocation=args.show_minimum_possible_allocation,
				318	show_cpu_operations=args.show_cpu_operations,
				319	tensor_allocator=args.tensor_allocator,
				320	timing=args.timing,
				321	output_dir=args.output_dir,
Jacob Bohlin	0628a8c	2020-08-28 13:25:14 +0200	[diff] [blame^]	322	allocation_alignment=alignment,
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	323	)
				324
				325	scheduler_options = scheduler.SchedulerOptions(
				326	use_cascading=args.cascading,
				327	use_ifm_ofm_overlap=args.ifm_ofm_overlap,
				328	verbose_schedule=args.verbose_schedule,
				329	verbose_pareto_frontier_schedules=args.verbose_pareto_frontier_schedules,
				330	use_ifm_streaming=args.ifm_streaming,
				331	pareto_metric=args.pareto_metric,
Charles Xu	7b8823f	2020-05-29 13:53:10 +0200	[diff] [blame]	332	use_nhcwb16_between_cascaded_passes=args.nhcwb16_between_cascaded_passes,
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	333	)
				334
Tim Hall	284223e	2020-06-09 13:17:21 +0100	[diff] [blame]	335	model_reader_options = model_reader.ModelReaderOptions()
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	336
				337	os.makedirs(args.output_dir, exist_ok=True)
				338
				339	nng = process(args.network, arch, model_reader_options, compiler_options, scheduler_options)
				340
				341	if args.show_subgraph_io_summary:
				342	print_subgraph_io_summary(nng)
				343
				344	return 0