Blame - ethosu/vela/nn_graph.py - ml/ethos-u/ethos-u-vela

blob: b495828e648a904c4a1991dc2c86308b2e016cf2 [file] [log] [blame]

Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	1	# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
				2	#
				3	# SPDX-License-Identifier: Apache-2.0
				4	#
				5	# Licensed under the Apache License, Version 2.0 (the License); you may
				6	# not use this file except in compliance with the License.
				7	# You may obtain a copy of the License at
				8	#
				9	# www.apache.org/licenses/LICENSE-2.0
				10	#
				11	# Unless required by applicable law or agreed to in writing, software
				12	# distributed under the License is distributed on an AS IS BASIS, WITHOUT
				13	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	# See the License for the specific language governing permissions and
				15	# limitations under the License.
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	16	# Description:
				17	# Neural network graph classes and enums.
				18	# Pass - A packed pass containing one or more Operations.
				19	# CascadedPass - A scheduled pass containing one or more Passes, as well as a scheduling strategy and block
				20	# configurations.
				21	# Subgraph - Holds a neural network subgraph, pointing at Tensors, Operations, Passes, and CascadedPasses.
				22	# Graph - A full neural network graph with one or more Subgraphs.
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	23	import enum
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	24
				25
				26	class PassPlacement(enum.Enum):
				27	Unknown = 0
				28	Cpu = 1
				29	Npu = 2
				30	MemoryOnly = 3
				31	StartupInit = 4
				32
				33
				34	class TensorAllocator(enum.Enum):
				35	LinearAlloc = 1
				36	Greedy = 2
				37
				38	def __str__(self):
				39	return self.name
				40
				41
				42	class Pass:
				43	def __init__(self, name, placement, is_element_wise, npu_block_type):
				44	self.inputs = []
				45	self.intermediates = []
				46	self.outputs = []
				47	self.ops = []
				48	self.primary_op = None
				49	self.ifm_tensor = None
				50	self.ifm2_tensor = None
				51	self.ofm_tensor = None
				52	self.weight_tensor = None
				53	self.scale_tensor = None
Fredrik Svedberg	a0c3624	2020-06-03 15:43:31 +0200	[diff] [blame]	54	self.lut_tensor = None
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	55	self.name = name
				56	self.cascade = None
				57	self.placement = placement
				58
				59	# TODO: rename is_element_wise because it is not the same as an ElementWise operator. It is used by the tensor
				60	# allocation and requires that the OFM and IFM has the exact same address. Essentially complete overlap.
				61	self.is_element_wise = is_element_wise
				62	self.npu_block_type = npu_block_type
				63	self.block_config = None # will be filled in by scheduler
				64	self.shared_buffer = None # will be filled in by scheduler
				65
				66	self.predecessors = []
				67	self.successors = []
				68
				69	def __str__(self):
				70	return "<nng.Pass '%s', %s, ops=%s>" % (self.name, self.placement, [op.type for op in self.ops])
				71
				72	__repr__ = __str__
				73
				74	def get_primary_op_ifm_weights(self):
				75	if not self.primary_op:
				76	return None, None
				77	return self.primary_op.get_ifm_ifm2_weights_ofm()[::2]
				78
				79	def get_primary_op_ifm_ifm2_weights_ofm(self):
				80	if not self.primary_op:
				81	return None, None, None, None
				82	return self.primary_op.get_ifm_ifm2_weights_ofm()
				83
				84	def get_primary_op_ifm_weights_biases_ofm(self):
				85	if not self.primary_op:
				86	return None, None, None, None
				87	return self.primary_op.get_ifm_weights_biases_ofm()
				88
Fredrik Svedberg	a0c3624	2020-06-03 15:43:31 +0200	[diff] [blame]	89	def get_primary_op_lut(self):
				90	if not self.primary_op:
				91	return None
				92	return self.primary_op.activation_lut
				93
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	94
				95	class SchedulingStrategy(enum.Enum):
				96	Unknown = -1
				97	IfmStream = 0
				98	WeightStream = 1
				99
				100
				101	class SchedulerRewrite(enum.Enum):
				102	Nop = 0
				103	ChangeTensorSubPurpose = 1
				104
				105
				106	class CascadedPass:
				107	def __init__(self, name, strat, inputs, intermediates, outputs, passes, placement, is_element_wise):
				108	self.name = name
				109	self.strategy = strat
				110	self.inputs = inputs
				111	self.intermediates = intermediates
				112	self.outputs = outputs
				113	self.passes = passes
				114	self.placement = placement
				115	self.is_element_wise = is_element_wise
				116
				117	self.predecessors = []
				118	self.successors = []
				119
				120	def __str__(self):
				121	return "<nng.CascadedPass strategy=%s x %s '%s', passes=%s, block_configs=%s>" % (
				122	self.strategy,
				123	len(self.passes),
				124	self.name,
				125	[ps.name for ps in self.passes],
				126	[ps.block_config for ps in self.passes],
				127	)
				128
				129	__repr__ = __str__
				130
				131
				132	class Subgraph:
				133	def __init__(self, name="<unnamed>", placement=PassPlacement.Cpu):
				134	self.output_tensors = []
				135	self.input_tensors = []
				136	self.original_inputs = [] # Preserve the original input order
				137	self.passes = []
				138	self.cascaded_passes = []
				139	self.name = name
				140	self.high_level_command_stream = []
				141	self.placement = placement
				142	self.command_stream_tensor = None
				143	self.flash_tensor = None
				144
				145	self.memory_used = {}
Patrik Gustavsson	eca2e95	2020-05-27 09:15:11 +0200	[diff] [blame]	146	self.memory_used_per_type = {}
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	147
				148	def __str__(self):
				149	return "<nng.Subgraph '%s', n_passes=%d, n_cascaded_passes=%d>" % (
				150	self.name,
				151	len(self.passes),
				152	len(self.cascaded_passes),
				153	)
				154
				155	__repr__ = __str__
				156
				157	def update_consumers(self):
				158	visit_op_set = set()
				159	visit_tensor_set = set()
				160	self.input_tensors = []
				161
				162	print_visit = False
				163
				164	def visit_op(op):
				165	if op in visit_op_set:
				166	return
				167
				168	visit_op_set.add(op)
				169	for inp in op.inputs:
Jacob Bohlin	67e0d8f	2020-08-20 10:53:02 +0200	[diff] [blame^]	170	if not inp:
				171	continue
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	172	if print_visit:
				173	print(inp, "adding consumer", op)
				174	visit_tensor(inp)
				175	inp.consumer_list.append(op)
				176
				177	if op.type in set(("Placeholder", "SubgraphInput")):
				178	assert len(op.outputs) == 1
				179	self.input_tensors.append(op.outputs[0])
				180
				181	for out in op.outputs:
				182	if out not in visit_tensor_set:
				183	out.consumer_list = [] # reset unvisited output, just in case
				184
				185	def visit_tensor(tens):
				186	if tens in visit_tensor_set:
				187	return
				188	visit_tensor_set.add(tens)
				189	tens.consumer_list = []
				190	for op in tens.ops:
				191	visit_op(op)
				192
				193	for ps in self.passes:
				194	for tens in ps.outputs + ps.inputs:
Jacob Bohlin	67e0d8f	2020-08-20 10:53:02 +0200	[diff] [blame^]	195	if not tens:
				196	continue
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	197	tens.consumer_list = [] # reset unvisited tensors to start with
				198
				199	for tens in self.output_tensors:
				200	visit_tensor(tens)
				201	tens.consumer_list.append(None) # special op to indicate that the graph consumes the result
				202
				203	print_visit = True
				204	for ps in self.passes:
				205	for op in ps.ops:
				206	visit_op(op)
				207	for tens in ps.inputs:
				208	visit_tensor(tens)
				209
				210	def build_pass_links(self):
				211	for idx, ps in enumerate(self.passes):
				212	ps.time = 2 * idx
				213	ps.predecessors = []
				214	ps.successors = []
				215
				216	for ps in self.passes:
				217	for tens in ps.inputs:
				218	for op in tens.ops:
				219	pred_pass = op.scheduled_pass
				220	assert pred_pass.time < ps.time
				221	if ps not in pred_pass.successors:
				222	pred_pass.successors.append(ps)
				223
				224	if pred_pass not in ps.predecessors:
				225	ps.predecessors.append(pred_pass)
				226
				227	assert tens in pred_pass.outputs
				228
				229	def build_pass_dag_predecessors(self):
				230	for ps in self.passes:
				231	ps.dag_predecessors = []
				232
				233	class State(enum.Enum):
				234	NotVisited = 0
				235	BeingVisited = 1
				236	Visited = 2
				237
				238	pass_visit_dict = {}
				239
				240	def visit_pass(ps):
				241	state = pass_visit_dict.get(ps, State.NotVisited)
				242	if state == State.Visited:
				243	return True
				244	elif state == State.BeingVisited:
				245	return False # this is a loop, need to remove this link
				246	elif state == State.NotVisited:
				247	pass_visit_dict[ps] = State.BeingVisited
				248
				249	ps.dag_predecessors = []
				250	for pred in ps.predecessors:
				251	if visit_pass(pred):
				252	ps.dag_predecessors.append(pred)
				253
				254	pass_visit_dict[ps] = State.Visited
				255	return True
				256
				257	for ps in self.passes:
				258	if not ps.successors:
				259	visit_pass(ps)
				260
				261	def build_cascaded_pass_links(self):
				262	for cps in self.cascaded_passes:
				263	cps.predecessors = []
				264	cps.successors = []
				265
				266	for cps in self.cascaded_passes:
				267	for tens in cps.inputs:
				268	for op in tens.ops:
				269	pred_cpass = op.scheduled_pass.cascade
				270	if cps not in pred_cpass.successors:
				271	pred_cpass.successors.append(cps)
				272
				273	if pred_cpass not in cps.predecessors:
				274	cps.predecessors.append(pred_cpass)
				275
				276	assert tens in pred_cpass.outputs
				277
				278	def refresh_after_modification(self):
				279	self.update_consumers()
				280
				281	def prune_startup_init_pass(self):
				282	assert len(self.passes) >= 1
				283	ps = self.passes[0]
				284	assert ps.placement == PassPlacement.StartupInit
				285
				286	ps.outputs = [out_tens for out_tens in ps.outputs if len(out_tens.consumers()) > 0]
				287	ps.ops = [op for op in ps.ops if op.outputs[0] in ps.outputs]
				288
				289	def get_all_ops(self):
				290	all_ops = []
				291	visit_op_set = set()
				292	visit_tensor_set = set()
				293
				294	def visit_op(op):
				295	if op in visit_op_set:
				296	return
				297	visit_op_set.add(op)
				298	for inp in op.inputs:
				299	visit_tensor(inp)
				300
				301	all_ops.append(op)
				302
				303	def visit_tensor(tens):
				304	if tens in visit_tensor_set:
				305	return
				306	visit_tensor_set.add(tens)
				307	for op in tens.ops:
				308	visit_op(op)
				309
				310	for tens in self.output_tensors:
				311	visit_tensor(tens)
				312
				313	return all_ops
				314
				315	def print_operators(self):
Michael McGeagh	775e396	2020-07-28 11:44:22 +0100	[diff] [blame]	316	print("print_operators()", self.name)
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	317	all_ops = self.get_all_ops()
				318	unique_ops = []
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	319	for op in all_ops:
				320	if op.type in set(("Const", "Identity", "Placeholder")):
				321	continue
				322
				323	attrs = op.attrs
				324	if (
				325	op.type == "Conv2D"
				326	or op.type == "DepthwiseConv2dNative"
				327	or op.type == "Conv2DBiasAct"
				328	or op.type == "DepthwiseConv2dBiasAct"
				329	):
				330	kshape = op.inputs[1].shape
				331	attrs["kshape"] = [kshape[0], kshape[1]]
				332	attrs["type"] = op.type
				333	attrs.pop("use_cudnn_on_gpu", None)
				334	if attrs not in unique_ops:
				335	unique_ops.append(attrs)
				336	# print attributes in human readable format
				337	a = attrs.copy()
				338	s = a.pop("type")
				339	data_format = a.pop("data_format", None)
				340	if data_format and data_format != b"NHWC":
				341	s += " " + str(data_format)
				342	t = a.pop("T", None)
				343	if t:
				344	s += " " + str(t)[9:-2]
				345	srct = a.pop("SrcT", None)
				346	if srct:
				347	s += " " + str(srct)[9:-2]
				348	dstt = a.pop("DstT", None)
				349	if dstt:
				350	s += "->" + str(dstt)[9:-2]
				351	print(s + " " + str(a))
				352
				353	def print_graph(self):
Michael McGeagh	775e396	2020-07-28 11:44:22 +0100	[diff] [blame]	354	print("print_graph()", self.name)
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	355	all_ops = self.get_all_ops()
				356	for idx, op in enumerate(all_ops):
				357	print(idx, op.type, op.name)
				358
				359	def print_graph_with_tensors(self):
Michael McGeagh	775e396	2020-07-28 11:44:22 +0100	[diff] [blame]	360	print("print_graph_with_tensors()", self.name)
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	361	all_ops = self.get_all_ops()
				362	for idx, op in enumerate(all_ops):
				363	print(idx, op.type, op.name)
				364	for idx, tens in enumerate(op.inputs):
Patrik Gustavsson	eca2e95	2020-05-27 09:15:11 +0200	[diff] [blame]	365	print(
				366	" Input %02d %20s %20s %20s %s"
				367	% (idx, tens.purpose.name, tens.mem_area.name, tens.mem_type.name, tens)
				368	)
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	369	for idx, tens in enumerate(op.outputs):
Patrik Gustavsson	eca2e95	2020-05-27 09:15:11 +0200	[diff] [blame]	370	print(
				371	" Output %02d %20s %20s %20s %s"
				372	% (idx, tens.purpose.name, tens.mem_area.name, tens.mem_type.name, tens)
				373	)
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	374	print()
				375
				376	def print_graph_with_tensor_quantization(self):
Michael McGeagh	775e396	2020-07-28 11:44:22 +0100	[diff] [blame]	377	print("print_graph_with_tensor_quantization()", self.name)
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	378	all_ops = self.get_all_ops()
				379	for idx, op in enumerate(all_ops):
				380	print(idx, op.type, op.name)
				381	for idx, tens in enumerate(op.inputs):
				382	q = tens.quantization
				383	if q is None:
				384	print(" Input %02d %10s NO QUANTIZATION INFO %s" % (idx, tens.dtype, tens.name))
				385	else:
				386	print(
				387	" Input %02d %10s min=%s max=%s scale=%s zero_point=%s %s"
				388	% (idx, tens.dtype, q.min, q.max, q.scale_f32, q.zero_point, tens.name)
				389	)
				390	for idx, tens in enumerate(op.outputs):
				391	q = tens.quantization
				392	if q is None:
				393	print(" Output %02d %10s NO QUANTIZATION INFO %s" % (idx, tens.dtype, tens.name))
				394	else:
				395	print(
				396	" Output %02d %10s min=%s max=%s scale=%s zero_point=%s %s"
				397	% (idx, tens.dtype, q.min, q.max, q.scale_f32, q.zero_point, tens.name)
				398	)
				399	print()
				400
				401	def print_passes(self):
Michael McGeagh	775e396	2020-07-28 11:44:22 +0100	[diff] [blame]	402	print("print_passes()", self.name)
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	403	for idx, ps in enumerate(self.passes):
				404	print("%03d %s" % (idx * 2, ps))
				405
				406	def print_passes_with_tensors(self):
Michael McGeagh	775e396	2020-07-28 11:44:22 +0100	[diff] [blame]	407	print("print_passes_with_tensors()", self.name)
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	408	for idx, ps in enumerate(self.passes):
				409	print("%3d %s" % (idx * 2, ps))
				410	for idx, tens in enumerate(ps.inputs):
				411	print(
				412	" Input %2d %-15s %-15s %-15s %s"
				413	% (idx, tens.purpose.name, tens.mem_area.name, tens.format.name, tens.name)
				414	)
				415	for idx, tens in enumerate(ps.intermediates):
				416	print(
				417	" Intermediate %2d %-15s %-15s %-15s %s"
				418	% (idx, tens.purpose.name, tens.mem_area.name, tens.format.name, tens.name)
				419	)
				420	for idx, tens in enumerate(ps.outputs):
				421	print(
				422	" Output %2d %-15s %-15s %-15s %s"
				423	% (idx, tens.purpose.name, tens.mem_area.name, tens.format.name, tens.name)
				424	)
				425	print()
				426
				427	def print_cascaded_passes(self):
Michael McGeagh	775e396	2020-07-28 11:44:22 +0100	[diff] [blame]	428	print("print_cascaded_passes()", self.name)
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	429	for idx, ps in enumerate(self.cascaded_passes):
				430	print("%3d %s SRAM used %.1f KB" % (idx * 2, ps, ps.sram_used / 1024))
				431
				432	def print_cascaded_passes_with_tensors(self):
Michael McGeagh	775e396	2020-07-28 11:44:22 +0100	[diff] [blame]	433	print("print_cascaded_passes_with_tensors()", self.name)
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	434	for idx, ps in enumerate(self.cascaded_passes):
				435	print("%3d %s SRAM used %.1f KB" % (idx * 2, ps, ps.sram_used / 1024))
				436	for idx, tens in enumerate(ps.inputs):
				437	print(
				438	" Input %2d %-15s %-15s %-15s %s"
				439	% (idx, tens.purpose.name, tens.mem_area.name, tens.format.name, tens.name)
				440	)
				441	for idx, tens in enumerate(ps.intermediates):
				442	print(
				443	" Intermediate %2d %-15s %-15s %-15s %s"
				444	% (idx, tens.purpose.name, tens.mem_area.name, tens.format.name, tens.name)
				445	)
				446	for idx, tens in enumerate(ps.outputs):
				447	print(
				448	" Output %2d %-15s %-15s %-15s %s"
				449	% (idx, tens.purpose.name, tens.mem_area.name, tens.format.name, tens.name)
				450	)
				451	print()
				452
				453	def print_cascaded_passes_with_tensor_sizes(self):
Michael McGeagh	775e396	2020-07-28 11:44:22 +0100	[diff] [blame]	454	print("print_cascaded_passes_with_tensor_sizes()", self.name)
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	455	for idx, ps in enumerate(self.cascaded_passes):
				456	print("%3d %s SRAM used %.1f KB" % (idx * 2, ps, ps.sram_used / 1024))
				457	for idx, tens in enumerate(ps.inputs):
				458	print(
				459	" Input %2d %7.1f KB %-24s %-15s %-15s %-20s %s"
				460	% (
				461	idx,
				462	tens.storage_size() / 1024,
				463	tens.storage_shape,
				464	tens.mem_area.name,
				465	tens.purpose.name,
				466	tens.format.name,
				467	tens.name,
				468	)
				469	)
				470	for idx, tens in enumerate(ps.intermediates):
				471	print(
				472	" Intermediate %2d %7.1f KB %-24s %-15s %-15s %-20s %s"
				473	% (
				474	idx,
				475	tens.storage_size() / 1024,
				476	tens.storage_shape,
				477	tens.mem_area.name,
				478	tens.purpose.name,
				479	tens.format.name,
				480	tens.name,
				481	)
				482	)
				483	for idx, tens in enumerate(ps.outputs):
				484	print(
				485	" Output %2d %7.1f KB %-24s %-15s %-15s %-20s %s"
				486	% (
				487	idx,
				488	tens.storage_size() / 1024,
				489	tens.storage_shape,
				490	tens.mem_area.name,
				491	tens.purpose.name,
				492	tens.format.name,
				493	tens.name,
				494	)
				495	)
				496	print()
				497
				498	def print_high_level_command_stream(self):
Michael McGeagh	775e396	2020-07-28 11:44:22 +0100	[diff] [blame]	499	print("print_high_level_command_stream()", self.name)
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	500	for idx, cmd in enumerate(self.high_level_command_stream):
				501	print("%3d %s" % (idx, cmd))
				502
				503
				504	class Graph:
				505	def __init__(self, name="<unnamed>", batch_size=1):
				506	self.name = name
				507	self.batch_size = batch_size
				508	self.subgraphs = []
Michael McGeagh	22f74e1	2020-08-07 16:21:03 +0100	[diff] [blame]	509	self.metadata = []
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	510	self.memory_used = {}
				511	self.bits_per_element = {}
				512	self.total_size = {}
				513	self.total_elements = {}
Louis Verhaard	3c07c97	2020-05-07 08:12:58 +0200	[diff] [blame]	514	self.weight_cache = None # See CompressedWeightCache
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	515
				516	def get_root_subgraph(self):
				517	return self.subgraphs[0]
				518
				519	def prune_startup_init_pass(self):
				520	for sg in self.subgraphs:
				521	sg.prune_startup_init_pass()
				522
				523	def update_consumers(self):
				524	for sg in self.subgraphs:
				525	sg.update_consumers()
				526
				527	def refresh_after_modification(self):
				528	for sg in self.subgraphs:
				529	sg.refresh_after_modification()
				530
				531	def print_operators(self):
				532	for sg in self.subgraphs:
				533	sg.print_operators()
				534
				535	def print_graph(self):
				536	for sg in self.subgraphs:
				537	sg.print_graph()
				538
				539	def print_graph_with_tensors(self):
				540	for sg in self.subgraphs:
				541	sg.print_graph_with_tensors()
				542
				543	def print_graph_with_tensor_quantization(self):
				544	for sg in self.subgraphs:
				545	sg.print_graph_with_tensor_quantization()
				546
				547	def print_passes(self):
				548	for sg in self.subgraphs:
				549	sg.print_passes()
				550
				551	def print_passes_with_tensors(self):
				552	for sg in self.subgraphs:
				553	sg.print_passes_with_tensors()
				554
				555	def print_cascaded_passes(self):
				556	for sg in self.subgraphs:
				557	sg.print_cascaded_passes()
				558
				559	def print_cascaded_passes_with_tensors(self):
				560	for sg in self.subgraphs:
				561	sg.print_cascaded_passes_with_tensors()
				562
				563	def print_cascaded_passes_with_tensor_sizes(self):
				564	for sg in self.subgraphs:
				565	sg.print_cascaded_passes_with_tensor_sizes()
				566
				567	def print_high_level_command_stream(self):
				568	for sg in self.subgraphs:
				569	sg.print_high_level_command_stream()