Blame - ethosu/vela/mark_tensors.py - ml/ethos-u/ethos-u-vela

blob: 36508762681fb788a3ba055a8c0a0496d46ce446 [file] [log] [blame]

Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	1	# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
				2	#
				3	# SPDX-License-Identifier: Apache-2.0
				4	#
				5	# Licensed under the Apache License, Version 2.0 (the License); you may
				6	# not use this file except in compliance with the License.
				7	# You may obtain a copy of the License at
				8	#
				9	# www.apache.org/licenses/LICENSE-2.0
				10	#
				11	# Unless required by applicable law or agreed to in writing, software
				12	# distributed under the License is distributed on an AS IS BASIS, WITHOUT
				13	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	# See the License for the specific language governing permissions and
				15	# limitations under the License.
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	16	# Description:
				17	# Mark purpose and select formats for Tensors. Also compresses the weights.
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	18	from . import rewrite_graph
				19	from . import weight_compressor
				20	from .architecture_features import Block
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	21	from .operation import NpuBlockType
Diego Russo	e8a1045	2020-04-21 17:39:10 +0100	[diff] [blame^]	22	from .tensor import TensorFormat
				23	from .tensor import TensorPurpose
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	24
				25
				26	def purpose_from_list(lst):
				27	def purpose(op, idx):
				28	return lst[idx]
				29
				30	return purpose
				31
				32
				33	def all_fm(op, idx):
				34	return TensorPurpose.FeatureMap
				35
				36
				37	def all_parameter(op, idx):
				38	return TensorPurpose.FeatureMap
				39
				40
				41	def input0_from_output_rest_parameter(op, idx):
				42	if idx == 0:
				43	res = op.outputs[0].purpose
				44	if res == TensorPurpose.Unknown:
				45	print("Warning: Propagating unknown tensor purpose", op)
				46	return res
				47	return TensorPurpose.FeatureMap
				48
				49
				50	def inputs_from_output(op, idx):
				51	res = op.outputs[0].purpose
				52	if res == TensorPurpose.Unknown:
				53	print("Warning: Propagating unknown tensor purpose", op)
				54	return res
				55
Diego Russo	ea6111a	2020-04-14 18:41:58 +0100	[diff] [blame]	56
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	57	tensor_purposes = [ # ops, input_purpose
				58	(
				59	set(
				60	(
				61	"Relu",
				62	"Relu6",
				63	"Mul",
				64	"Add",
				65	"Sub",
				66	"Rsqrt",
				67	"Abs",
				68	"Cast",
				69	"Exp",
				70	"Floor",
				71	"FloorDiv",
				72	"FloorMod",
				73	"SquaredDifference",
				74	"AddN",
				75	"BiasAdd",
				76	"RealDiv",
				77	"Maximum",
				78	"Minimum",
				79	"Sigmoid",
				80	"Tanh",
				81	"FusedBatchNorm",
				82	"AvgPool",
				83	"MaxPool",
				84	"Squeeze",
				85	"Softmax",
				86	"LRN",
				87	"Assign",
				88	"BatchMatMul",
				89	"ZerosLike",
				90	"ExtractImagePatches",
				91	"MulAct",
				92	"AddAct",
				93	"SubAct",
				94	"DivAct",
				95	"AvgPoolAct",
				96	"MaxPoolAct",
				97	"LeakyRelu",
				98	)
				99	),
				100	all_fm,
				101	),
				102	(
				103	set(
				104	(
				105	"Conv2D",
				106	"DepthwiseConv2dNative",
				107	"MatMul",
				108	"Conv2DBiasAct",
				109	"DepthwiseConv2dBiasAct",
				110	"FullyConnectedAct",
				111	)
				112	),
				113	purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.Weights, TensorPurpose.FeatureMap]),
				114	),
				115	(
				116	set(("Conv2DBackpropInputSwitched",)),
				117	purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.Weights, TensorPurpose.FeatureMap]),
				118	),
				119	(
				120	set(("QuantizedConv2D", "QuantizedMatMul")),
				121	purpose_from_list(
				122	[
				123	TensorPurpose.FeatureMap,
				124	TensorPurpose.Weights,
				125	TensorPurpose.FeatureMap,
				126	TensorPurpose.FeatureMap,
				127	TensorPurpose.FeatureMap,
				128	TensorPurpose.FeatureMap,
				129	]
				130	),
				131	),
				132	(
				133	set(
				134	(
				135	"Reshape",
				136	"Min",
				137	"Max",
				138	"Mean",
				139	"Pad",
				140	"MirrorPad",
				141	"ArgMax",
				142	"ArgMin",
				143	"ExpandDims",
				144	"ResizeNearestNeighbor",
				145	"ResizeBilinear",
				146	"Tile",
				147	"Transpose",
				148	"Mfcc",
				149	)
				150	),
				151	purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.FeatureMap]),
				152	),
				153	(
				154	set(("QuantizedReshape", "QuantizedResizeBilinear")),
				155	purpose_from_list(
				156	[TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap]
				157	),
				158	),
				159	(
				160	set(("QuantizedBiasAdd", "QuantizedAdd", "QuantizedMul")),
				161	purpose_from_list(
				162	[
				163	TensorPurpose.FeatureMap,
				164	TensorPurpose.FeatureMap,
				165	TensorPurpose.FeatureMap,
				166	TensorPurpose.FeatureMap,
				167	TensorPurpose.FeatureMap,
				168	TensorPurpose.FeatureMap,
				169	]
				170	),
				171	),
				172	(
				173	set(
				174	(
				175	"Dequantize",
				176	"Quantize",
				177	"QuantizeV2",
				178	"QuantizedRelu",
				179	"QuantizedRelu1",
				180	"QuantizedRelu6",
				181	"QuantizedAvgPool",
				182	"QuantizedMaxPool",
				183	"Slice",
				184	"SplitV",
				185	)
				186	),
				187	purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap]),
				188	),
				189	(
				190	set(("BatchToSpaceND", "SpaceToBatchND", "DepthToSpaceND", "SpaceToDepthND")),
				191	purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap]),
				192	),
				193	(
				194	set(("BlockLSTM",)),
				195	purpose_from_list(
				196	[
				197	TensorPurpose.FeatureMap,
				198	TensorPurpose.FeatureMap,
				199	TensorPurpose.FeatureMap,
				200	TensorPurpose.FeatureMap,
				201	TensorPurpose.Weights,
				202	TensorPurpose.FeatureMap,
				203	TensorPurpose.FeatureMap,
				204	TensorPurpose.FeatureMap,
				205	TensorPurpose.FeatureMap,
				206	]
				207	),
				208	),
				209	(set(("SplitSliceRead",)), purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.FeatureMap])),
				210	(set(("Shape", "ConcatSliceWrite", "AudioSpectrogram")), purpose_from_list([TensorPurpose.FeatureMap])),
				211	(
				212	set(("StridedSlice",)),
				213	purpose_from_list(
				214	[TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap]
				215	),
				216	),
				217	(set(("Fill", "Pack", "Range")), all_parameter),
				218	(
				219	set(("Requantize",)),
				220	purpose_from_list(
				221	[
				222	TensorPurpose.FeatureMap,
				223	TensorPurpose.FeatureMap,
				224	TensorPurpose.FeatureMap,
				225	TensorPurpose.FeatureMap,
				226	TensorPurpose.FeatureMap,
				227	]
				228	),
				229	),
				230	(set(("Placeholder", "SubgraphInput", "Const", "VariableV2")), purpose_from_list([])),
				231	(set(("FakeQuantWithMinMaxArgs", "FakeQuantWithMinMaxVars")), input0_from_output_rest_parameter),
				232	(
				233	set(("Square", "Sqrt", "Log", "Less", "Enter", "Exit", "Identity", "StopGradient", "Merge", "Switch")),
				234	inputs_from_output,
				235	),
				236	(None, all_fm),
				237	]
				238
				239
				240	for ops, input_purpose in tensor_purposes:
				241	if ops is None:
				242	continue
				243	for op in ops:
				244	assert len(op) > 1, "string literal has been decomposed"
				245
				246
				247	def mark_tensor_purpose(nng, arch, verbose_tensor_purpose=False):
				248	def mark_tensor_helper(tens, purpose):
				249
				250	if tens.purpose == TensorPurpose.Unknown or tens.purpose == purpose:
				251	tens.purpose = purpose
				252	else:
				253	assert 0, "Cannot resolve tensor purpose %s and %s for tensor %s" % (tens.purpose, purpose, tens)
				254	tens.mem_area = arch.tensor_storage_mem_area[tens.purpose]
				255
				256	if len(tens.ops) == 1 and tens.ops[0].type == "Const":
				257	tens.mem_area = (
				258	arch.permanent_storage_mem_area
				259	) # special case constants, as they must be in permanent storage
				260
				261	def rewrite_mark_tensor_purpose(op, arch):
				262	# find disconnected outputs and mark as parameters
				263	for tens in op.outputs:
				264	if not tens.consumers():
				265	mark_tensor_helper(tens, TensorPurpose.FeatureMap)
				266
				267	for ops, input_purpose in tensor_purposes:
				268	if ops is None or op.type in ops:
				269	if ops is None:
				270	print(
				271	"warning: don't know how to mark up purpose for",
				272	op.type,
				273	op.inputs,
				274	"triggering all feature map fallback",
				275	)
				276	for idx, tens in enumerate(op.inputs):
				277	purpose = input_purpose(op, idx)
				278	mark_tensor_helper(tens, purpose)
				279	break
				280	return op
				281
				282	for sg in nng.subgraphs:
				283	sg = rewrite_graph.rewrite_graph_pre_order(sg, arch, [], [rewrite_mark_tensor_purpose])
				284	for tens in sg.output_tensors:
				285	mark_tensor_helper(tens, TensorPurpose.FeatureMap)
				286
				287	if verbose_tensor_purpose:
				288	nng.print_graph_with_tensors()
				289
				290	return nng
				291
				292
				293	reshape_operations = set(
				294	(
				295	"Reshape",
				296	"QuantizedReshape",
				297	"ExpandDims",
				298	"Squeeze",
				299	"BatchToSpaceND",
				300	"SpaceToBatchND",
				301	"DepthToSpaceND",
				302	"SpaceToDepthND",
				303	"Placeholder",
				304	)
				305	)
				306
				307
				308	def mark_tensor_format(nng, arch, verbose_tensor_format=False):
				309	formats_for_tensor = {}
				310
				311	def init_tens(tens):
				312	if tens.purpose == TensorPurpose.FeatureMap:
				313	fmt = arch.default_feature_map_format
				314	elif tens.purpose == TensorPurpose.Weights:
				315	fmt = arch.default_weight_format
				316	else:
				317	assert 0, "unknown tensor purpose %s" % (tens.purpose,)
				318	return fmt
				319
				320	def find_npu_usage_of_tensor(tens):
				321	for op in tens.consumers():
				322	if op.type == "DMA":
				323	return find_npu_usage_of_tensor(op.outputs[0])
				324	if "npu_block_type" in op.attrs:
				325	return op.attrs["npu_block_type"]
				326	return NpuBlockType.Default
				327
				328	def visit_tens(tens, ps):
Diego Russo	ea6111a	2020-04-14 18:41:58 +0100	[diff] [blame]	329	if tens not in formats_for_tensor:
Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame]	330	fmt = init_tens(tens)
				331	else:
				332	fmt = formats_for_tensor[tens]
				333
				334	formats_for_tensor[tens] = fmt
				335
				336	for sg in nng.subgraphs:
				337	for ps in sg.passes:
				338	for tens in ps.outputs:
				339	visit_tens(tens, ps)
				340	for tens in ps.intermediates:
				341	visit_tens(tens, ps)
				342	for tens in ps.inputs:
				343	visit_tens(tens, ps)
				344
				345	for tens, fmt in formats_for_tensor.items():
				346	tens.set_format(fmt, arch)
				347	if fmt == TensorFormat.WeightsCompressed and tens.values is not None:
				348	npu_block_type = find_npu_usage_of_tensor(tens)
				349	if len(tens.ops) == 1 and tens.ops[0].type == "DMA":
				350	weight_compressor.compress_weights(tens, arch, npu_block_type, Block(32, 32, 32), 32)
				351	# Alias compressed weights back into source tensor
				352	src_tens = tens.ops[0].inputs[0]
				353	src_tens.compressed_values = tens.compressed_values
				354	src_tens.storage_shape = tens.storage_shape
				355	src_tens.brick_size = tens.brick_size
				356	src_tens.weight_compression_scales = tens.weight_compression_scales
				357	src_tens.weight_compressed_offsets = tens.weight_compressed_offsets
				358	src_tens.compression_scale_for_worst_weight_stream = tens.compression_scale_for_worst_weight_stream
				359	src_tens.storage_compression_scale = tens.storage_compression_scale
				360
				361	if verbose_tensor_format:
				362	nng.print_passes_with_tensors()