Blame - ethosu/vela/operation.py - ml/ethos-u/ethos-u-vela

blob: d2f2806a814984ef86d42462dc3cd2686fbf85e0 [file] [log] [blame]

Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame^]	1	# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
				2	#
				3	# SPDX-License-Identifier: Apache-2.0
				4	#
				5	# Licensed under the Apache License, Version 2.0 (the License); you may
				6	# not use this file except in compliance with the License.
				7	# You may obtain a copy of the License at
				8	#
				9	# www.apache.org/licenses/LICENSE-2.0
				10	#
				11	# Unless required by applicable law or agreed to in writing, software
				12	# distributed under the License is distributed on an AS IS BASIS, WITHOUT
				13	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	# See the License for the specific language governing permissions and
				15	# limitations under the License.
				16
				17
				18	# Description:
				19	# Internal representation of a Neural Network Operation.
				20
				21	import enum
				22
				23
				24	class NpuBlockType(enum.Enum):
				25	Default = 0
				26	ConvolutionMxN = 1
				27	VectorProduct = 2
				28	Pooling = 3
				29	ConvolutionDepthWise = 4
				30	ElementWise = 5
				31
				32
				33	class Operation:
				34	"""Class representing a Neural Network operation. Has a name, a type,
				35	input and output tensors, as well as an attribute dictionary."""
				36
				37	__slots__ = "type", "name", "attrs", "inputs", "outputs", "flops", "scheduled_pass", "run_on_npu"
				38
				39	def __init__(self, op_type, name):
				40	self.type = op_type
				41	self.name = name
				42	self.attrs = {}
				43	self.inputs = []
				44	self.outputs = []
				45	self.flops = 0
				46	self.run_on_npu = True
				47	self.scheduled_pass = None
				48
				49	def clone(self, suffix="_clone"):
				50	res = Operation(self.type, self.name + suffix)
				51
				52	res.attrs = dict(self.attrs)
				53	res.inputs = list(self.inputs)
				54	res.outputs = list(self.outputs)
				55	res.flops = self.flops
				56	res.scheduled_pass = self.scheduled_pass
				57
				58	return res
				59
				60	def __str__(self):
				61	return "<nng.Operation '%s' type=%s>" % (self.name, self.type)
				62
				63	__repr__ = __str__
				64
				65	def get_ifm_ifm2_weight_bias_ofm_indices(self):
				66	ifm_idx = -1
				67	ifm2_idx = -1
				68	weight_idx = -1
				69	bias_idx = -1
				70	ofm_idx = -1
				71	npu_block_type = self.attrs.get("npu_block_type", NpuBlockType.Default)
				72	if npu_block_type in set((NpuBlockType.ConvolutionMxN, NpuBlockType.ConvolutionDepthWise)):
				73	ifm_idx = 0
				74	weight_idx = 1
				75	ofm_idx = 0
				76
				77	if self.type in set(("Conv2DBiasAct", "DepthwiseConv2dBiasAct", "TransposeConvAct")):
				78	if len(self.inputs) >= 3:
				79	bias_idx = 2
				80
				81	elif npu_block_type == NpuBlockType.Pooling:
				82	ifm_idx = 0
				83	ofm_idx = 0
				84	elif npu_block_type == NpuBlockType.VectorProduct:
				85	ifm_idx = 0
				86	weight_idx = 1
				87	ofm_idx = 0
				88
				89	if self.type in set(("FullyConnectedAct",)):
				90	if len(self.inputs) >= 3:
				91	bias_idx = 2
				92
				93	if self.type == "BlockLSTM":
				94	ifm_idx = 3
				95	weight_idx = 4
				96	ofm_idx = 6
				97
				98	elif npu_block_type == NpuBlockType.ElementWise:
				99	ifm_idx = 0
				100	ifm2_idx = 1
				101	ofm_idx = 0
				102
				103	# LeakyRelu and Abs have a single IFM
				104	if self.type in set(("LeakyRelu", "Abs")):
				105	ifm2_idx = -1
				106
				107	elif self.type == "Conv2DBackpropInput":
				108	ifm_idx = 2
				109	weight_idx = 1
				110	ofm_idx = 0
				111
				112	elif self.type in set(("Squeeze", "Reshape", "QuantizedReshape", "ExpandDims")):
				113	ifm_idx = 0
				114	ofm_idx = 0
				115
				116	elif self.is_split_op():
				117	ifm_idx = 0
				118	ofm_idx = 0
				119	if self.type == "Split":
				120	ifm_idx = 1
				121
				122	elif self.is_concat_op():
				123	ifms, _ = self.get_concat_inputs_axis()
				124	ifm_idx = self.inputs.index(ifms[0])
				125	if len(ifms) > 1:
				126	ifm2_idx = self.inputs.index(ifms[1])
				127	ofm_idx = 0
				128
				129	return ifm_idx, ifm2_idx, weight_idx, bias_idx, ofm_idx
				130
				131	def get_ifm_ifm2_weights_ofm(self):
				132	ifm_tensor = None
				133	ifm2_tensor = None
				134	weight_tensor = None
				135	ofm_tensor = None
				136
				137	ifm_idx, ifm2_idx, weight_idx, bias_idx, ofm_idx = self.get_ifm_ifm2_weight_bias_ofm_indices()
				138	if ifm_idx != -1:
				139	ifm_tensor = self.inputs[ifm_idx]
				140	if ifm2_idx != -1:
				141	ifm2_tensor = self.inputs[ifm2_idx]
				142	if weight_idx != -1:
				143	weight_tensor = self.inputs[weight_idx]
				144	if ofm_idx != -1:
				145	ofm_tensor = self.outputs[ofm_idx]
				146
				147	return ifm_tensor, ifm2_tensor, weight_tensor, ofm_tensor
				148
				149	def get_ifm_weights_biases_ofm(self):
				150	ifm_tensor = None
				151	weight_tensor = None
				152	bias_tensor = None
				153	ofm_tensor = None
				154
				155	ifm_idx, _, weight_idx, bias_idx, ofm_idx = self.get_ifm_ifm2_weight_bias_ofm_indices()
				156	if ifm_idx != -1:
				157	ifm_tensor = self.inputs[ifm_idx]
				158	if weight_idx != -1:
				159	weight_tensor = self.inputs[weight_idx]
				160	if bias_idx != -1:
				161	bias_tensor = self.inputs[bias_idx]
				162	if ofm_idx != -1:
				163	ofm_tensor = self.outputs[ofm_idx]
				164
				165	return ifm_tensor, weight_tensor, bias_tensor, ofm_tensor
				166
				167	concat_ops = set(("Concat", "ConcatV2", "QuantizedConcat", "ConcatTFLite", "PackReshaped"))
				168
				169	def is_concat_op(self):
				170	return self.type in Operation.concat_ops
				171
				172	def get_concat_inputs_axis(self):
				173	assert self.is_concat_op()
				174
				175	if self.type == "ConcatV2":
				176	axis_tensor = self.inputs[-1]
				177	inputs = self.inputs[:-1]
				178	elif self.type == "Concat":
				179	axis_tensor = self.inputs[0]
				180	inputs = self.inputs[1:]
				181	elif self.type == "QuantizedConcat":
				182	axis_tensor = self.inputs[0]
				183	inputs = self.inputs[1:]
				184	inputs = inputs[: len(inputs) // 3] # Skip min/max
				185
				186	if self.type == "ConcatTFLite":
				187	inputs = self.inputs
				188	axis = self.attrs["axis"]
				189	elif self.type == "PackReshaped":
				190	# Requires fixup_pack_input to be called before this point
				191	inputs = self.inputs
				192	axis = self.attrs["axis"]
				193	assert len(self.inputs) == self.attrs["values_count"]
				194	else:
				195	assert len(axis_tensor.ops) == 1 and axis_tensor.ops[0].type == "Const"
				196	axis = int(axis_tensor.values)
				197
				198	return inputs, axis
				199
				200	split_ops = set(("Split", "StridedSlice", "Slice", "UnpackReshaped"))
				201
				202	def is_split_op(self):
				203	return self.type in Operation.split_ops
				204
				205	def get_split_inputs_axis(self):
				206	assert self.is_split_op()
				207
				208	offset_start = None
				209	offset_end = None
				210	axis = None
				211	if self.type == "Split":
				212	# TODO: Extend split capabilities
				213	# If num_or_size_splits is an integer, then value is split along dimension axis into num_split smaller
				214	# tensors. This requires that num_split evenly divides value.shape[axis].
				215	# If num_or_size_splits is a 1-D Tensor (or list), we call it size_splits and value is split into
				216	# len(size_splits) elements. The shape of the i-th element has the same size as the value except along
				217	# dimension axis where the size is size_splits[i].
				218	num_splits = self.attrs.get("num_splits")
				219	axis_tens = self.inputs[0]
				220	assert len(axis_tens.ops) == 1 and axis_tens.ops[0].type == "Const"
				221	axis = int(axis_tens.values)
				222	input_tens = self.inputs[1]
				223	outputs = self.outputs
				224	assert num_splits == len(outputs)
				225
				226	elif self.type == "Slice":
				227	input_tens, begin_tens, size_tens = self.inputs
				228	outputs = self.outputs
				229	offset_start = [0] * len(input_tens.shape)
				230	offset_end = [0] * len(input_tens.shape)
				231
				232	for idx in range(len(begin_tens.values)):
				233	# Check if the op should slice in dimension idx
				234	if size_tens.values[idx] != input_tens.shape[idx]:
				235	offset_start[idx] = begin_tens.values[idx]
				236	offset_end[idx] = size_tens.values[idx] + offset_start[idx]
				237
				238	elif self.type == "StridedSlice":
				239	input_tens, begin_tens, end_tens, strides_tens = self.inputs
				240	outputs = self.outputs
				241	out_tens = outputs[0]
				242	offset_start = [0] * len(outputs[0].shape)
				243	offset_end = [0] * len(outputs[0].shape)
				244
				245	# Extract masks
				246	begin_mask = self.attrs["begin_mask"]
				247	ellipsis_mask = self.attrs["ellipsis_mask"]
				248	end_mask = self.attrs["end_mask"]
				249	new_axis_mask = self.attrs["new_axis_mask"]
				250	shrink_axis_mask = self.attrs["shrink_axis_mask"]
				251	# TODO: Either extend this to support these different masks or check
				252	# for this at an earlier stage and place the op on Cpu if needed
				253	assert begin_mask == end_mask
				254	assert new_axis_mask == ellipsis_mask == 0
				255	# shrink_axis_mask is not supported by the Operation class but the operation
				256	# may have the attribute modified and handled in the graph optimization phase.
				257	assert shrink_axis_mask == 0
				258	assert len(input_tens.shape) == len(out_tens.shape)
				259
				260	for idx in range(len(input_tens.shape)):
				261	# If the i:th bit in begin_mask is set then the value on begin[i] should be ignored
				262	if (begin_mask & (1 << idx)) == 0:
				263	# Check if the op should slice in dimension idx
				264	if end_tens.values[idx] != input_tens.shape[idx] or (
				265	end_tens.values[idx] == input_tens.shape[idx] and begin_tens.values[idx] != 0
				266	):
				267	offset_start[idx] = begin_tens.values[idx]
				268	offset_end[idx] = end_tens.values[idx]
				269
				270	else:
				271	# Don't slice in this axis, instead use fullest possible range
				272	continue
				273
				274	elif self.type == "UnpackReshaped":
				275	# Requires fixup_unpack_output to be called before this point
				276	input_tens = self.inputs[0]
				277	outputs = self.outputs
				278	axis = self.attrs["axis"]
				279	num_splits = self.attrs["num"]
				280	# Number of outputs have to equal the value of the dimension to unpack
				281	assert num_splits == len(outputs) == input_tens.shape[axis]
				282	else:
				283	assert False
				284
				285	return input_tens, outputs, axis, offset_start, offset_end