Blame - ethosu/vela/nn_graph.py - ml/ethos-u/ethos-u-vela

blob: 4a2855b2631ddcd64c661533b6f75d131406a3ff [file] [log] [blame]

Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	1	# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
				2	#
				3	# SPDX-License-Identifier: Apache-2.0
				4	#
				5	# Licensed under the Apache License, Version 2.0 (the License); you may
				6	# not use this file except in compliance with the License.
				7	# You may obtain a copy of the License at
				8	#
				9	# www.apache.org/licenses/LICENSE-2.0
				10	#
				11	# Unless required by applicable law or agreed to in writing, software
				12	# distributed under the License is distributed on an AS IS BASIS, WITHOUT
				13	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	# See the License for the specific language governing permissions and
				15	# limitations under the License.
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	16	# Description:
				17	# Neural network graph classes and enums.
				18	# Pass - A packed pass containing one or more Operations.
				19	# CascadedPass - A scheduled pass containing one or more Passes, as well as a scheduling strategy and block
				20	# configurations.
				21	# Subgraph - Holds a neural network subgraph, pointing at Tensors, Operations, Passes, and CascadedPasses.
				22	# Graph - A full neural network graph with one or more Subgraphs.
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	23	import enum
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	24
				25
				26	class PassPlacement(enum.Enum):
				27	Unknown = 0
				28	Cpu = 1
				29	Npu = 2
				30	MemoryOnly = 3
				31	StartupInit = 4
				32
				33
				34	class TensorAllocator(enum.Enum):
				35	LinearAlloc = 1
				36	Greedy = 2
				37
				38	def __str__(self):
				39	return self.name
				40
				41
				42	class Pass:
				43	def __init__(self, name, placement, is_element_wise, npu_block_type):
				44	self.inputs = []
				45	self.intermediates = []
				46	self.outputs = []
				47	self.ops = []
				48	self.primary_op = None
				49	self.ifm_tensor = None
				50	self.ifm2_tensor = None
				51	self.ofm_tensor = None
				52	self.weight_tensor = None
				53	self.scale_tensor = None
Fredrik Svedberg	a0c3624	2020-06-03 15:43:31 +0200	[diff] [blame^]	54	self.lut_tensor = None
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	55	self.name = name
				56	self.cascade = None
				57	self.placement = placement
				58
				59	# TODO: rename is_element_wise because it is not the same as an ElementWise operator. It is used by the tensor
				60	# allocation and requires that the OFM and IFM has the exact same address. Essentially complete overlap.
				61	self.is_element_wise = is_element_wise
				62	self.npu_block_type = npu_block_type
				63	self.block_config = None # will be filled in by scheduler
				64	self.shared_buffer = None # will be filled in by scheduler
				65
				66	self.predecessors = []
				67	self.successors = []
				68
				69	def __str__(self):
				70	return "<nng.Pass '%s', %s, ops=%s>" % (self.name, self.placement, [op.type for op in self.ops])
				71
				72	__repr__ = __str__
				73
				74	def get_primary_op_ifm_weights(self):
				75	if not self.primary_op:
				76	return None, None
				77	return self.primary_op.get_ifm_ifm2_weights_ofm()[::2]
				78
				79	def get_primary_op_ifm_ifm2_weights_ofm(self):
				80	if not self.primary_op:
				81	return None, None, None, None
				82	return self.primary_op.get_ifm_ifm2_weights_ofm()
				83
				84	def get_primary_op_ifm_weights_biases_ofm(self):
				85	if not self.primary_op:
				86	return None, None, None, None
				87	return self.primary_op.get_ifm_weights_biases_ofm()
				88
Fredrik Svedberg	a0c3624	2020-06-03 15:43:31 +0200	[diff] [blame^]	89	def get_primary_op_lut(self):
				90	if not self.primary_op:
				91	return None
				92	return self.primary_op.activation_lut
				93
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	94
				95	class SchedulingStrategy(enum.Enum):
				96	Unknown = -1
				97	IfmStream = 0
				98	WeightStream = 1
				99
				100
				101	class SchedulerRewrite(enum.Enum):
				102	Nop = 0
				103	ChangeTensorSubPurpose = 1
				104
				105
				106	class CascadedPass:
				107	def __init__(self, name, strat, inputs, intermediates, outputs, passes, placement, is_element_wise):
				108	self.name = name
				109	self.strategy = strat
				110	self.inputs = inputs
				111	self.intermediates = intermediates
				112	self.outputs = outputs
				113	self.passes = passes
				114	self.placement = placement
				115	self.is_element_wise = is_element_wise
				116
				117	self.predecessors = []
				118	self.successors = []
				119
				120	def __str__(self):
				121	return "<nng.CascadedPass strategy=%s x %s '%s', passes=%s, block_configs=%s>" % (
				122	self.strategy,
				123	len(self.passes),
				124	self.name,
				125	[ps.name for ps in self.passes],
				126	[ps.block_config for ps in self.passes],
				127	)
				128
				129	__repr__ = __str__
				130
				131
				132	class Subgraph:
				133	def __init__(self, name="<unnamed>", placement=PassPlacement.Cpu):
				134	self.output_tensors = []
				135	self.input_tensors = []
				136	self.original_inputs = [] # Preserve the original input order
				137	self.passes = []
				138	self.cascaded_passes = []
				139	self.name = name
				140	self.high_level_command_stream = []
				141	self.placement = placement
				142	self.command_stream_tensor = None
				143	self.flash_tensor = None
				144
				145	self.memory_used = {}
Patrik Gustavsson	eca2e95	2020-05-27 09:15:11 +0200	[diff] [blame]	146	self.memory_used_per_type = {}
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	147
				148	def __str__(self):
				149	return "<nng.Subgraph '%s', n_passes=%d, n_cascaded_passes=%d>" % (
				150	self.name,
				151	len(self.passes),
				152	len(self.cascaded_passes),
				153	)
				154
				155	__repr__ = __str__
				156
				157	def update_consumers(self):
				158	visit_op_set = set()
				159	visit_tensor_set = set()
				160	self.input_tensors = []
				161
				162	print_visit = False
				163
				164	def visit_op(op):
				165	if op in visit_op_set:
				166	return
				167
				168	visit_op_set.add(op)
				169	for inp in op.inputs:
				170	if print_visit:
				171	print(inp, "adding consumer", op)
				172	visit_tensor(inp)
				173	inp.consumer_list.append(op)
				174
				175	if op.type in set(("Placeholder", "SubgraphInput")):
				176	assert len(op.outputs) == 1
				177	self.input_tensors.append(op.outputs[0])
				178
				179	for out in op.outputs:
				180	if out not in visit_tensor_set:
				181	out.consumer_list = [] # reset unvisited output, just in case
				182
				183	def visit_tensor(tens):
				184	if tens in visit_tensor_set:
				185	return
				186	visit_tensor_set.add(tens)
				187	tens.consumer_list = []
				188	for op in tens.ops:
				189	visit_op(op)
				190
				191	for ps in self.passes:
				192	for tens in ps.outputs + ps.inputs:
				193	tens.consumer_list = [] # reset unvisited tensors to start with
				194
				195	for tens in self.output_tensors:
				196	visit_tensor(tens)
				197	tens.consumer_list.append(None) # special op to indicate that the graph consumes the result
				198
				199	print_visit = True
				200	for ps in self.passes:
				201	for op in ps.ops:
				202	visit_op(op)
				203	for tens in ps.inputs:
				204	visit_tensor(tens)
				205
				206	def build_pass_links(self):
				207	for idx, ps in enumerate(self.passes):
				208	ps.time = 2 * idx
				209	ps.predecessors = []
				210	ps.successors = []
				211
				212	for ps in self.passes:
				213	for tens in ps.inputs:
				214	for op in tens.ops:
				215	pred_pass = op.scheduled_pass
				216	assert pred_pass.time < ps.time
				217	if ps not in pred_pass.successors:
				218	pred_pass.successors.append(ps)
				219
				220	if pred_pass not in ps.predecessors:
				221	ps.predecessors.append(pred_pass)
				222
				223	assert tens in pred_pass.outputs
				224
				225	def build_pass_dag_predecessors(self):
				226	for ps in self.passes:
				227	ps.dag_predecessors = []
				228
				229	class State(enum.Enum):
				230	NotVisited = 0
				231	BeingVisited = 1
				232	Visited = 2
				233
				234	pass_visit_dict = {}
				235
				236	def visit_pass(ps):
				237	state = pass_visit_dict.get(ps, State.NotVisited)
				238	if state == State.Visited:
				239	return True
				240	elif state == State.BeingVisited:
				241	return False # this is a loop, need to remove this link
				242	elif state == State.NotVisited:
				243	pass_visit_dict[ps] = State.BeingVisited
				244
				245	ps.dag_predecessors = []
				246	for pred in ps.predecessors:
				247	if visit_pass(pred):
				248	ps.dag_predecessors.append(pred)
				249
				250	pass_visit_dict[ps] = State.Visited
				251	return True
				252
				253	for ps in self.passes:
				254	if not ps.successors:
				255	visit_pass(ps)
				256
				257	def build_cascaded_pass_links(self):
				258	for cps in self.cascaded_passes:
				259	cps.predecessors = []
				260	cps.successors = []
				261
				262	for cps in self.cascaded_passes:
				263	for tens in cps.inputs:
				264	for op in tens.ops:
				265	pred_cpass = op.scheduled_pass.cascade
				266	if cps not in pred_cpass.successors:
				267	pred_cpass.successors.append(cps)
				268
				269	if pred_cpass not in cps.predecessors:
				270	cps.predecessors.append(pred_cpass)
				271
				272	assert tens in pred_cpass.outputs
				273
				274	def refresh_after_modification(self):
				275	self.update_consumers()
				276
				277	def prune_startup_init_pass(self):
				278	assert len(self.passes) >= 1
				279	ps = self.passes[0]
				280	assert ps.placement == PassPlacement.StartupInit
				281
				282	ps.outputs = [out_tens for out_tens in ps.outputs if len(out_tens.consumers()) > 0]
				283	ps.ops = [op for op in ps.ops if op.outputs[0] in ps.outputs]
				284
				285	def get_all_ops(self):
				286	all_ops = []
				287	visit_op_set = set()
				288	visit_tensor_set = set()
				289
				290	def visit_op(op):
				291	if op in visit_op_set:
				292	return
				293	visit_op_set.add(op)
				294	for inp in op.inputs:
				295	visit_tensor(inp)
				296
				297	all_ops.append(op)
				298
				299	def visit_tensor(tens):
				300	if tens in visit_tensor_set:
				301	return
				302	visit_tensor_set.add(tens)
				303	for op in tens.ops:
				304	visit_op(op)
				305
				306	for tens in self.output_tensors:
				307	visit_tensor(tens)
				308
				309	return all_ops
				310
				311	def print_operators(self):
Michael McGeagh	775e396	2020-07-28 11:44:22 +0100	[diff] [blame]	312	print("print_operators()", self.name)
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	313	all_ops = self.get_all_ops()
				314	unique_ops = []
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	315	for op in all_ops:
				316	if op.type in set(("Const", "Identity", "Placeholder")):
				317	continue
				318
				319	attrs = op.attrs
				320	if (
				321	op.type == "Conv2D"
				322	or op.type == "DepthwiseConv2dNative"
				323	or op.type == "Conv2DBiasAct"
				324	or op.type == "DepthwiseConv2dBiasAct"
				325	):
				326	kshape = op.inputs[1].shape
				327	attrs["kshape"] = [kshape[0], kshape[1]]
				328	attrs["type"] = op.type
				329	attrs.pop("use_cudnn_on_gpu", None)
				330	if attrs not in unique_ops:
				331	unique_ops.append(attrs)
				332	# print attributes in human readable format
				333	a = attrs.copy()
				334	s = a.pop("type")
				335	data_format = a.pop("data_format", None)
				336	if data_format and data_format != b"NHWC":
				337	s += " " + str(data_format)
				338	t = a.pop("T", None)
				339	if t:
				340	s += " " + str(t)[9:-2]
				341	srct = a.pop("SrcT", None)
				342	if srct:
				343	s += " " + str(srct)[9:-2]
				344	dstt = a.pop("DstT", None)
				345	if dstt:
				346	s += "->" + str(dstt)[9:-2]
				347	print(s + " " + str(a))
				348
				349	def print_graph(self):
Michael McGeagh	775e396	2020-07-28 11:44:22 +0100	[diff] [blame]	350	print("print_graph()", self.name)
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	351	all_ops = self.get_all_ops()
				352	for idx, op in enumerate(all_ops):
				353	print(idx, op.type, op.name)
				354
				355	def print_graph_with_tensors(self):
Michael McGeagh	775e396	2020-07-28 11:44:22 +0100	[diff] [blame]	356	print("print_graph_with_tensors()", self.name)
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	357	all_ops = self.get_all_ops()
				358	for idx, op in enumerate(all_ops):
				359	print(idx, op.type, op.name)
				360	for idx, tens in enumerate(op.inputs):
Patrik Gustavsson	eca2e95	2020-05-27 09:15:11 +0200	[diff] [blame]	361	print(
				362	" Input %02d %20s %20s %20s %s"
				363	% (idx, tens.purpose.name, tens.mem_area.name, tens.mem_type.name, tens)
				364	)
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	365	for idx, tens in enumerate(op.outputs):
Patrik Gustavsson	eca2e95	2020-05-27 09:15:11 +0200	[diff] [blame]	366	print(
				367	" Output %02d %20s %20s %20s %s"
				368	% (idx, tens.purpose.name, tens.mem_area.name, tens.mem_type.name, tens)
				369	)
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	370	print()
				371
				372	def print_graph_with_tensor_quantization(self):
Michael McGeagh	775e396	2020-07-28 11:44:22 +0100	[diff] [blame]	373	print("print_graph_with_tensor_quantization()", self.name)
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	374	all_ops = self.get_all_ops()
				375	for idx, op in enumerate(all_ops):
				376	print(idx, op.type, op.name)
				377	for idx, tens in enumerate(op.inputs):
				378	q = tens.quantization
				379	if q is None:
				380	print(" Input %02d %10s NO QUANTIZATION INFO %s" % (idx, tens.dtype, tens.name))
				381	else:
				382	print(
				383	" Input %02d %10s min=%s max=%s scale=%s zero_point=%s %s"
				384	% (idx, tens.dtype, q.min, q.max, q.scale_f32, q.zero_point, tens.name)
				385	)
				386	for idx, tens in enumerate(op.outputs):
				387	q = tens.quantization
				388	if q is None:
				389	print(" Output %02d %10s NO QUANTIZATION INFO %s" % (idx, tens.dtype, tens.name))
				390	else:
				391	print(
				392	" Output %02d %10s min=%s max=%s scale=%s zero_point=%s %s"
				393	% (idx, tens.dtype, q.min, q.max, q.scale_f32, q.zero_point, tens.name)
				394	)
				395	print()
				396
				397	def print_passes(self):
Michael McGeagh	775e396	2020-07-28 11:44:22 +0100	[diff] [blame]	398	print("print_passes()", self.name)
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	399	for idx, ps in enumerate(self.passes):
				400	print("%03d %s" % (idx * 2, ps))
				401
				402	def print_passes_with_tensors(self):
Michael McGeagh	775e396	2020-07-28 11:44:22 +0100	[diff] [blame]	403	print("print_passes_with_tensors()", self.name)
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	404	for idx, ps in enumerate(self.passes):
				405	print("%3d %s" % (idx * 2, ps))
				406	for idx, tens in enumerate(ps.inputs):
				407	print(
				408	" Input %2d %-15s %-15s %-15s %s"
				409	% (idx, tens.purpose.name, tens.mem_area.name, tens.format.name, tens.name)
				410	)
				411	for idx, tens in enumerate(ps.intermediates):
				412	print(
				413	" Intermediate %2d %-15s %-15s %-15s %s"
				414	% (idx, tens.purpose.name, tens.mem_area.name, tens.format.name, tens.name)
				415	)
				416	for idx, tens in enumerate(ps.outputs):
				417	print(
				418	" Output %2d %-15s %-15s %-15s %s"
				419	% (idx, tens.purpose.name, tens.mem_area.name, tens.format.name, tens.name)
				420	)
				421	print()
				422
				423	def print_cascaded_passes(self):
Michael McGeagh	775e396	2020-07-28 11:44:22 +0100	[diff] [blame]	424	print("print_cascaded_passes()", self.name)
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	425	for idx, ps in enumerate(self.cascaded_passes):
				426	print("%3d %s SRAM used %.1f KB" % (idx * 2, ps, ps.sram_used / 1024))
				427
				428	def print_cascaded_passes_with_tensors(self):
Michael McGeagh	775e396	2020-07-28 11:44:22 +0100	[diff] [blame]	429	print("print_cascaded_passes_with_tensors()", self.name)
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	430	for idx, ps in enumerate(self.cascaded_passes):
				431	print("%3d %s SRAM used %.1f KB" % (idx * 2, ps, ps.sram_used / 1024))
				432	for idx, tens in enumerate(ps.inputs):
				433	print(
				434	" Input %2d %-15s %-15s %-15s %s"
				435	% (idx, tens.purpose.name, tens.mem_area.name, tens.format.name, tens.name)
				436	)
				437	for idx, tens in enumerate(ps.intermediates):
				438	print(
				439	" Intermediate %2d %-15s %-15s %-15s %s"
				440	% (idx, tens.purpose.name, tens.mem_area.name, tens.format.name, tens.name)
				441	)
				442	for idx, tens in enumerate(ps.outputs):
				443	print(
				444	" Output %2d %-15s %-15s %-15s %s"
				445	% (idx, tens.purpose.name, tens.mem_area.name, tens.format.name, tens.name)
				446	)
				447	print()
				448
				449	def print_cascaded_passes_with_tensor_sizes(self):
Michael McGeagh	775e396	2020-07-28 11:44:22 +0100	[diff] [blame]	450	print("print_cascaded_passes_with_tensor_sizes()", self.name)
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	451	for idx, ps in enumerate(self.cascaded_passes):
				452	print("%3d %s SRAM used %.1f KB" % (idx * 2, ps, ps.sram_used / 1024))
				453	for idx, tens in enumerate(ps.inputs):
				454	print(
				455	" Input %2d %7.1f KB %-24s %-15s %-15s %-20s %s"
				456	% (
				457	idx,
				458	tens.storage_size() / 1024,
				459	tens.storage_shape,
				460	tens.mem_area.name,
				461	tens.purpose.name,
				462	tens.format.name,
				463	tens.name,
				464	)
				465	)
				466	for idx, tens in enumerate(ps.intermediates):
				467	print(
				468	" Intermediate %2d %7.1f KB %-24s %-15s %-15s %-20s %s"
				469	% (
				470	idx,
				471	tens.storage_size() / 1024,
				472	tens.storage_shape,
				473	tens.mem_area.name,
				474	tens.purpose.name,
				475	tens.format.name,
				476	tens.name,
				477	)
				478	)
				479	for idx, tens in enumerate(ps.outputs):
				480	print(
				481	" Output %2d %7.1f KB %-24s %-15s %-15s %-20s %s"
				482	% (
				483	idx,
				484	tens.storage_size() / 1024,
				485	tens.storage_shape,
				486	tens.mem_area.name,
				487	tens.purpose.name,
				488	tens.format.name,
				489	tens.name,
				490	)
				491	)
				492	print()
				493
				494	def print_high_level_command_stream(self):
Michael McGeagh	775e396	2020-07-28 11:44:22 +0100	[diff] [blame]	495	print("print_high_level_command_stream()", self.name)
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	496	for idx, cmd in enumerate(self.high_level_command_stream):
				497	print("%3d %s" % (idx, cmd))
				498
				499
				500	class Graph:
				501	def __init__(self, name="<unnamed>", batch_size=1):
				502	self.name = name
				503	self.batch_size = batch_size
				504	self.subgraphs = []
				505
				506	self.memory_used = {}
				507	self.bits_per_element = {}
				508	self.total_size = {}
				509	self.total_elements = {}
Louis Verhaard	3c07c97	2020-05-07 08:12:58 +0200	[diff] [blame]	510	self.weight_cache = None # See CompressedWeightCache
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	511
				512	def get_root_subgraph(self):
				513	return self.subgraphs[0]
				514
				515	def prune_startup_init_pass(self):
				516	for sg in self.subgraphs:
				517	sg.prune_startup_init_pass()
				518
				519	def update_consumers(self):
				520	for sg in self.subgraphs:
				521	sg.update_consumers()
				522
				523	def refresh_after_modification(self):
				524	for sg in self.subgraphs:
				525	sg.refresh_after_modification()
				526
				527	def print_operators(self):
				528	for sg in self.subgraphs:
				529	sg.print_operators()
				530
				531	def print_graph(self):
				532	for sg in self.subgraphs:
				533	sg.print_graph()
				534
				535	def print_graph_with_tensors(self):
				536	for sg in self.subgraphs:
				537	sg.print_graph_with_tensors()
				538
				539	def print_graph_with_tensor_quantization(self):
				540	for sg in self.subgraphs:
				541	sg.print_graph_with_tensor_quantization()
				542
				543	def print_passes(self):
				544	for sg in self.subgraphs:
				545	sg.print_passes()
				546
				547	def print_passes_with_tensors(self):
				548	for sg in self.subgraphs:
				549	sg.print_passes_with_tensors()
				550
				551	def print_cascaded_passes(self):
				552	for sg in self.subgraphs:
				553	sg.print_cascaded_passes()
				554
				555	def print_cascaded_passes_with_tensors(self):
				556	for sg in self.subgraphs:
				557	sg.print_cascaded_passes_with_tensors()
				558
				559	def print_cascaded_passes_with_tensor_sizes(self):
				560	for sg in self.subgraphs:
				561	sg.print_cascaded_passes_with_tensor_sizes()
				562
				563	def print_high_level_command_stream(self):
				564	for sg in self.subgraphs:
				565	sg.print_high_level_command_stream()