Blame - ethosu/vela/high_level_command_to_npu_op.py - ml/ethos-u/ethos-u-vela

blob: b5e7b4b9e171aee62dc0075497e9ac333f5ce70d [file] [log] [blame]

erik.andersson@arm.com	ad45f79	2021-02-03 10:20:16 +0100	[diff] [blame^]	1	# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved.
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	2	#
				3	# SPDX-License-Identifier: Apache-2.0
				4	#
				5	# Licensed under the Apache License, Version 2.0 (the License); you may
				6	# not use this file except in compliance with the License.
				7	# You may obtain a copy of the License at
				8	#
				9	# www.apache.org/licenses/LICENSE-2.0
				10	#
				11	# Unless required by applicable law or agreed to in writing, software
				12	# distributed under the License is distributed on an AS IS BASIS, WITHOUT
				13	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	# See the License for the specific language governing permissions and
				15	# limitations under the License.
				16	#
				17	# Description:
				18	# Conversion from high level command to NpuOperation
				19	from enum import IntEnum
				20	from typing import List
				21	from typing import Optional
				22
				23	from .api import NpuActivation
				24	from .api import NpuActivationOp
				25	from .api import NpuAddressRange
				26	from .api import NpuBlockOperation
				27	from .api import NpuBlockTraversal
				28	from .api import NpuConv2DOperation
				29	from .api import NpuConvDepthWiseOperation
				30	from .api import NpuDataType
				31	from .api import NpuDmaOperation
				32	from .api import NpuElementWiseOp
				33	from .api import NpuElementWiseOperation
				34	from .api import NpuFeatureMap
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	35	from .api import NpuLayout
				36	from .api import NpuOperation
				37	from .api import NpuPadding
				38	from .api import NpuPoolingOp
				39	from .api import NpuPoolingOperation
				40	from .api import NpuQuantization
				41	from .api import NpuResamplingMode
				42	from .api import NpuRoundingMode
				43	from .api import NpuShape3D
				44	from .api import NpuTileBox
				45	from .architecture_features import ArchitectureFeatures
				46	from .data_type import DataType
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	47	from .debug_database import DebugDatabase
Michael McGeagh	7a6f843	2020-12-02 15:29:22 +0000	[diff] [blame]	48	from .errors import UnsupportedFeatureError
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	49	from .high_level_command_stream import Box
				50	from .high_level_command_stream import Command
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	51	from .high_level_command_stream import DMA
				52	from .high_level_command_stream import NpuStripe
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	53	from .operation import NpuBlockType
				54	from .operation import Op
				55	from .operation import Operation
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	56	from .register_command_stream_generator import generate_command_stream
				57	from .register_command_stream_util import BASE_PTR_INDEX_MEM2MEM
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	58	from .register_command_stream_util import to_npu_kernel
				59	from .register_command_stream_util import UNARY_ELEMWISE_OPS
patrik.gustavsson	eeb8515	2020-12-21 17:10:40 +0000	[diff] [blame]	60	from .shape4d import Shape4D
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	61	from .tensor import MemType
				62	from .tensor import Tensor
				63	from .tensor import TensorBlockTraversal
				64	from .tensor import TensorFormat
				65	from .tensor import TensorPurpose
				66
				67
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	68	class BasePointerIndex(IntEnum):
				69	WeightTensor = 0 # base address index for the Weight tensor
				70	ScratchTensor = 1 # base address index for the Scratch_tensor in the TensorArena
				71	ScratchFastTensor = 2 # base address for the Scratch_fast_tensor
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	72
				73
				74	dtype_map = {
				75	DataType.uint8: NpuDataType.UINT8,
				76	DataType.int8: NpuDataType.INT8,
				77	DataType.uint16: NpuDataType.UINT16,
				78	DataType.int16: NpuDataType.INT16,
				79	DataType.int32: NpuDataType.INT32,
				80	}
				81
				82
				83	block_traversal_map = {
				84	TensorBlockTraversal.DepthFirst: NpuBlockTraversal.DEPTH_FIRST,
				85	TensorBlockTraversal.PartKernelFirst: NpuBlockTraversal.PART_KERNEL_FIRST,
				86	}
				87
				88
				89	# Maps an elementwise op type to an elementwise_mode enum value used by NPU_OP_ELEMENTWISE
				90	elementwise_op_map = {
				91	Op.Mul: NpuElementWiseOp.MUL,
				92	Op.Add: NpuElementWiseOp.ADD,
Fredrik Svedberg	e82be7c	2021-01-18 15:21:03 +0100	[diff] [blame]	93	Op.RescaleAdd: NpuElementWiseOp.ADD,
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	94	Op.Sub: NpuElementWiseOp.SUB,
				95	Op.Minimum: NpuElementWiseOp.MIN,
				96	Op.Maximum: NpuElementWiseOp.MAX,
				97	Op.LeakyRelu: NpuElementWiseOp.LRELU,
				98	Op.Abs: NpuElementWiseOp.ABS,
				99	Op.CLZ: NpuElementWiseOp.CLZ,
				100	Op.SHR: NpuElementWiseOp.SHR,
				101	Op.SHL: NpuElementWiseOp.SHL,
				102	}
				103
				104
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	105	def ifm_ifm2_correct_order(ifm_shape: List[int], ifm2_shape: List[int]) -> bool:
				106	if ifm_shape == []:
				107	# Scalar needs to be in IFM2
				108	return False
				109	if ifm2_shape == []:
				110	return True
				111
				112	for ifm, ifm2 in zip(ifm_shape, ifm2_shape):
				113	if ifm != ifm2 and ifm == 1:
				114	# Broadcasted FM needs to be in IFM2
				115	return False
				116	return True
				117
				118
Patrik Gustavsson	b0ca274	2020-11-18 07:59:09 +0100	[diff] [blame]	119	def get_rounding_mode(op: Operation, fused_quantize: bool) -> NpuRoundingMode:
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	120	"""Specifies type of rounding to be used"""
				121	rounding_mode = NpuRoundingMode.TFL
				122	if op.type == Op.ResizeBilinear:
				123	rounding_mode = NpuRoundingMode.TRUNCATE
				124	elif (
				125	op.type.npu_block_type in (NpuBlockType.ConvolutionMxN, NpuBlockType.ConvolutionDepthWise)
				126	and op.ifm.dtype == DataType.int16
				127	):
				128	rounding_mode = NpuRoundingMode.NATURAL
Patrik Gustavsson	b0ca274	2020-11-18 07:59:09 +0100	[diff] [blame]	129	elif (
				130	not fused_quantize
				131	and op.type.is_avgpool_op()
				132	and op.memory_function == Op.ConcatSliceWrite
				133	and op.kernel.elements_wh() == 1
				134	):
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	135	rounding_mode = NpuRoundingMode.NATURAL
				136	rounding_mode = op.attrs.get("rounding_mode", rounding_mode)
				137	return rounding_mode
				138
				139
				140	def create_padding(cmd: NpuStripe, primary_op: Operation) -> NpuPadding:
				141	if primary_op.type.npu_block_type == NpuBlockType.VectorProduct:
				142	return NpuPadding(top=0, left=0, bottom=0, right=0)
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	143	top, left, bottom, right = primary_op.attrs["explicit_padding"]
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	144
				145	# Check if this is for horizontal ifm streaming
				146	if not (cmd.is_first_h_stripe and cmd.is_last_h_stripe):
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	147	top = cmd.pad_top
				148	bottom = cmd.pad_bottom
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	149
				150	# Indexing from end since a 1x1 Avgpool might have been added with non 4-dimensional input/output,
				151	# because of activation function needed to be fused.
Andreas Nevalainen	083f103	2020-11-18 10:45:50 +0100	[diff] [blame]	152	if len(cmd.ifm_box.start_coord) >= 2 and cmd.ifm_box.start_coord[-2] > 0:
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	153	left = 0
Patrik Gustavsson	3a26920	2021-01-21 08:28:55 +0100	[diff] [blame]	154	if len(cmd.ifm_box.end_coord) >= 2 and cmd.ifm_box.end_coord[-2] < cmd.ps.ifm_shapes[0].width:
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	155	right = 0
				156	return NpuPadding(top=top, left=left, bottom=bottom, right=right)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	157
				158
				159	def get_region(tens: Tensor, arch: ArchitectureFeatures) -> int:
Tim Hall	1bd531d	2020-11-01 20:59:36 +0000	[diff] [blame]	160	base_ptr_idx_map = {
				161	MemType.Permanent_NPU: BasePointerIndex.WeightTensor,
				162	MemType.Permanent_CPU: BasePointerIndex.WeightTensor,
				163	MemType.Scratch: BasePointerIndex.ScratchTensor,
				164	}
				165
				166	if arch.is_spilling_enabled():
				167	base_ptr_idx_map[MemType.Scratch_fast] = BasePointerIndex.ScratchFastTensor
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	168	else:
Tim Hall	1bd531d	2020-11-01 20:59:36 +0000	[diff] [blame]	169	base_ptr_idx_map[MemType.Scratch_fast] = BasePointerIndex.ScratchTensor
				170
Dwight Lidman	9b43f84	2020-12-08 17:56:44 +0100	[diff] [blame]	171	return base_ptr_idx_map[tens.mem_type].value
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	172
				173
				174	def get_upscale(op: Operation) -> NpuResamplingMode:
				175	upscale = NpuResamplingMode.NONE
				176	if op.type == Op.ResizeBilinear:
				177	# perform nearest neighbor upscale
				178	upscale = NpuResamplingMode.NEAREST
				179	elif op.type == Op.Conv2DBackpropInputSwitchedBias:
				180	# perform insert zero upscale
				181	upscale = NpuResamplingMode.TRANSPOSE
				182	return upscale
				183
				184
				185	def get_ifm_depth(npu_block_type: NpuBlockType, ifm_box: Box, ofm_box: Box) -> int:
				186	if npu_block_type in (NpuBlockType.ConvolutionMxN, NpuBlockType.VectorProduct, NpuBlockType.ReduceSum):
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	187	block = ifm_box.get_block()
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	188	else:
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	189	block = ofm_box.get_block()
				190	return block.depth
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	191
				192
				193	def use_zero_point_0(ps, tens: Tensor, is_ifm_tensor: bool) -> bool:
				194	"""Checks if quantization should use 0 as zero point"""
				195	if tens.dtype == DataType.int32 and is_ifm_tensor:
				196	return True
				197	if ps.primary_op.type not in (Op.AvgPool, Op.ResizeBilinear, Op.CLZ, Op.SHL):
				198	return False
				199	fused_quantize = any(op.type == Op.Quantize for op in ps.ops)
				200	forced_ofm_quantization = ps.primary_op.forced_output_quantization
				201	use_0 = (
				202	(ps.primary_op.activation is None or forced_ofm_quantization is not None)
				203	and (ps.primary_op.memory_function != Op.ConcatSliceWrite)
				204	and not fused_quantize
				205	)
				206	return use_0
				207
				208
				209	def get_ifm_or_ifm2_quantization(ps, tens: Tensor) -> Optional[NpuQuantization]:
				210	"""Gets quantization for IFM/IFM2"""
				211	if tens.quantization is None:
				212	return None
				213	if use_zero_point_0(ps, tens, True):
				214	zero_point = 0
				215	else:
				216	zero_point = int(tens.quantization.zero_point)
				217	return NpuQuantization(scale_f32=tens.quantization.scale_f32, zero_point=zero_point)
				218
				219
				220	def get_ofm_quantization(ps, tens: Tensor) -> Optional[NpuQuantization]:
				221	"""Gets quantization for OFM"""
				222	op = ps.primary_op
				223	# Check if operation's output quantization is should be used instead of the output tensor's quantization
				224	# (used in LUTs)
				225	ofm_quant = op.forced_output_quantization if op.forced_output_quantization is not None else tens.quantization
				226	if ofm_quant is None:
				227	return None
				228	if use_zero_point_0(ps, tens, False):
				229	zero_point = 0
				230	else:
				231	zero_point = int(ofm_quant.zero_point)
				232	return NpuQuantization(scale_f32=ofm_quant.scale_f32, zero_point=zero_point)
				233
				234
Patrik Gustavsson	3a26920	2021-01-21 08:28:55 +0100	[diff] [blame]	235	def create_feature_map(tens: Tensor, box: Box, arch: ArchitectureFeatures, op_shape4D: Shape4D) -> NpuFeatureMap:
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	236	"""Creates feature map with common fields populated"""
				237	fm = NpuFeatureMap()
				238	fm.region = get_region(tens, arch)
				239	fm.data_type = dtype_map[tens.dtype]
				240	if tens.format == TensorFormat.NHWC:
				241	fm.layout = NpuLayout.NHWC
				242	elif tens.format == TensorFormat.NHCWB16:
				243	fm.layout = NpuLayout.NHCWB16
				244	else:
				245	assert 0, "Incorrect tensor format"
Patrik Gustavsson	3a26920	2021-01-21 08:28:55 +0100	[diff] [blame]	246	height_0, height_1, width_0, addresses = tens.addresses_for_rolling_buffer(
				247	box.start_coord, box.end_coord, op_shape4D
				248	)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	249	for idx, addr in enumerate(addresses):
				250	if addr is None:
				251	addresses[idx] = 0
				252	fm.tiles = NpuTileBox(
				253	height_0=height_0, height_1=height_1, width_0=width_0, addresses=[int(addr) for addr in addresses]
				254	)
Patrik Gustavsson	3a26920	2021-01-21 08:28:55 +0100	[diff] [blame]	255	strides = tens.get_strides(shape4D=op_shape4D)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	256	fm.strides = NpuShape3D(height=int(strides[2]), width=int(strides[3]), depth=int(strides[1]))
				257	return fm
				258
				259
				260	def create_weights(weight_tensor: Tensor, weight_box: Box, arch: ArchitectureFeatures) -> List[NpuAddressRange]:
				261	"""Returns address ranges for weights"""
				262	weights = []
				263	stream_index = weight_tensor.compressed_stream_index_from_coord(weight_box.start_coord)
				264	weight_substream_offsets = weight_tensor.compressed_values_substream_offsets[stream_index]
				265	substreams = len(weight_substream_offsets) - 1 # Offset list must terminate with full stream length
				266
				267	# Extract weight substream offsets and calculate their lengths
				268	assert len(weight_substream_offsets) > 1 and (weight_substream_offsets[0] == 0)
				269	weight_addr = weight_tensor.address_for_coordinate(weight_box.start_coord)
				270	region = get_region(weight_tensor, arch)
				271	for core in range(substreams):
				272	address = weight_addr + weight_substream_offsets[core]
				273	length = weight_substream_offsets[core + 1] - weight_substream_offsets[core]
				274	addr_range = NpuAddressRange(region, int(address), int(length))
				275	weights.append(addr_range)
				276	return weights
				277
				278
				279	def create_biases(
				280	weight_tensor: Tensor, scale_tensor: Tensor, weight_box: Box, arch: ArchitectureFeatures
				281	) -> List[NpuAddressRange]:
				282	"""Returns address ranges for biases"""
				283	biases = []
				284	stream_index = weight_tensor.compressed_stream_index_from_coord(weight_box.start_coord)
				285	scale_substream_offsets = scale_tensor.compressed_values_substream_offsets[stream_index]
				286	substreams = len(scale_substream_offsets) - 1 # Offset list must terminate with full stream length
				287
				288	# Extract scale substream offsets and calculate their lengths
				289	assert len(scale_substream_offsets) > 1 and (scale_substream_offsets[0] == 0)
				290	scale_addr = scale_tensor.address_for_coordinate(weight_box.start_coord[-1:])
				291
				292	region = get_region(scale_tensor, arch)
				293	for core in range(substreams):
				294	address = scale_addr + scale_substream_offsets[core]
				295	length = scale_substream_offsets[core + 1] - scale_substream_offsets[core]
				296	addr_range = NpuAddressRange(region, int(address), int(length))
				297	biases.append(addr_range)
				298	return biases
				299
				300
				301	def create_npu_activation(op: Operation) -> NpuActivation:
				302	"""Creates fused activation function"""
				303	if op.activation is None:
				304	return NpuActivation(NpuActivationOp.NONE_OR_RELU)
				305	faf = op.activation.op_type
				306	act_op = NpuActivationOp.NONE_OR_RELU
				307	if faf == Op.Tanh:
				308	act_op = NpuActivationOp.TANH
				309	elif faf == Op.Sigmoid:
				310	act_op = NpuActivationOp.SIGMOID
				311	elif faf == Op.LUT:
				312	act_op = NpuActivationOp.TABLE_LOOKUP
				313	elif not faf.is_relu_op():
Michael McGeagh	7a6f843	2020-12-02 15:29:22 +0000	[diff] [blame]	314	raise UnsupportedFeatureError(f"Unsupported fused_activation_function: {faf.name}")
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	315
				316	act = NpuActivation(act_op)
				317	act.min = op.activation.min
				318	act.max = op.activation.max
				319	act.lookup_table_index = op.activation.lut_index
				320	return act
				321
				322
				323	def set_common_op_fields(npu_op: NpuBlockOperation, cmd: NpuStripe, arch: ArchitectureFeatures):
				324	"""Sets common fields of the given operation"""
				325	ps = cmd.ps
				326	op = ps.primary_op
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	327
				328	ifm_height = cmd.ifm_box.get_block().height
Patrik Gustavsson	3a26920	2021-01-21 08:28:55 +0100	[diff] [blame]	329	ifm_width = cmd.ps.ifm_shapes[0].width
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	330	ifm_depth = get_ifm_depth(op.type.npu_block_type, cmd.ifm_box, cmd.ofm_box)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	331
Patrik Gustavsson	2349d42	2020-12-01 16:02:29 +0100	[diff] [blame]	332	npu_op.ifm = create_feature_map(cmd.ifm_tensor, cmd.ifm_box, arch, ps.ifm_shapes[0])
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	333	npu_op.ifm.shape = NpuShape3D(height=ifm_height, width=ifm_width, depth=ifm_depth)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	334	npu_op.ifm.quantization = get_ifm_or_ifm2_quantization(ps, cmd.ifm_tensor)
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	335
				336	out_block = cmd.ofm_box.get_block()
Patrik Gustavsson	2349d42	2020-12-01 16:02:29 +0100	[diff] [blame]	337	npu_op.ofm = create_feature_map(cmd.ofm_tensor, cmd.ofm_box, arch, ps.ofm_shapes[0])
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	338	npu_op.ofm.shape = NpuShape3D(height=out_block.height, width=out_block.width, depth=out_block.depth)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	339	npu_op.ofm.quantization = get_ofm_quantization(ps, cmd.ofm_tensor)
				340
				341	if cmd.weight_tensor is not None:
				342	npu_op.weights = create_weights(cmd.weight_tensor, cmd.weight_box, arch)
				343	if cmd.scale_tensor is not None:
				344	npu_op.biases = create_biases(cmd.weight_tensor, cmd.scale_tensor, cmd.weight_box, arch)
				345	npu_op.activation = create_npu_activation(op)
Patrik Gustavsson	b0ca274	2020-11-18 07:59:09 +0100	[diff] [blame]	346	npu_op.fused_quantize = any(op.type == Op.Quantize for op in ps.ops)
				347	npu_op.rounding_mode = get_rounding_mode(op, npu_op.fused_quantize)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	348	npu_op.block_config = NpuShape3D(height=ps.block_config[0], width=ps.block_config[1], depth=ps.block_config[3])
				349
				350	if not op.type.is_elementwise_op():
				351	npu_op.padding = create_padding(cmd, op)
				352	npu_op.kernel = to_npu_kernel(op.kernel)
				353	npu_op.ifm_upscale = get_upscale(op)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	354	return npu_op
				355
				356
				357	def create_npu_conv2d_op(cmd: NpuStripe, arch: ArchitectureFeatures) -> NpuConv2DOperation:
				358	"""Converts the command to NpuConv2DOperation"""
				359	npu_op = NpuConv2DOperation()
				360	set_common_op_fields(npu_op, cmd, arch)
				361	if cmd.ps.primary_op.type.npu_block_type == NpuBlockType.VectorProduct:
				362	npu_op.block_traversal = NpuBlockTraversal.DEPTH_FIRST
				363	else:
				364	npu_op.block_traversal = block_traversal_map[cmd.weight_tensor.block_traversal]
				365	return npu_op
				366
				367
				368	def create_npu_conv_depthwise_op(cmd: NpuStripe, arch: ArchitectureFeatures) -> NpuConvDepthWiseOperation:
				369	"""Converts the command to NpuConvDepthWiseOperation"""
				370	npu_op = NpuConvDepthWiseOperation()
				371	set_common_op_fields(npu_op, cmd, arch)
				372	return npu_op
				373
				374
				375	def create_npu_pool_op(cmd: NpuStripe, arch: ArchitectureFeatures) -> NpuPoolingOperation:
				376	"""Converts the command to NpuPoolingOperation"""
				377	ps = cmd.ps
				378	op = ps.primary_op
				379	pool_op = NpuPoolingOp.AVERAGE
				380	if op.type.is_maxpool_op():
				381	pool_op = NpuPoolingOp.MAX
				382	elif op.type.is_avgpool_op() or op.type == Op.ResizeBilinear:
				383	pool_op = NpuPoolingOp.AVERAGE
				384	elif op.type == Op.ReduceSum:
				385	pool_op = NpuPoolingOp.REDUCE_SUM
				386	else:
				387	assert 0, f"Unknown pool type {op.type}"
				388	npu_op = NpuPoolingOperation(pool_op)
				389	set_common_op_fields(npu_op, cmd, arch)
				390	# Pooling specific info
Fredrik Svedberg	e82be7c	2021-01-18 15:21:03 +0100	[diff] [blame]	391	if op.type == Op.ResizeBilinear:
				392	npu_op.rescale = op.rescale
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	393	return npu_op
				394
				395
				396	def create_npu_elementwise_op(cmd: NpuStripe, arch: ArchitectureFeatures) -> NpuElementWiseOperation:
				397	"""Converts the command to NpuElementWiseOperation"""
				398	ps = cmd.ps
				399	op = ps.primary_op
				400	assert op.type in elementwise_op_map, f"Unknown elementwise type {op.type}"
				401	elemwise_op = elementwise_op_map[op.type]
				402	npu_op = NpuElementWiseOperation(elemwise_op)
Patrik Gustavsson	2349d42	2020-12-01 16:02:29 +0100	[diff] [blame]	403
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	404	if elemwise_op not in UNARY_ELEMWISE_OPS:
Patrik Gustavsson	3a26920	2021-01-21 08:28:55 +0100	[diff] [blame]	405	ifm_shape = [] if cmd.ifm_tensor.shape == [] else ps.ifm_shapes[0].as_list()
				406	ifm2_shape = [] if cmd.ifm2_tensor.shape == [] else ps.ifm_shapes[1].as_list()
				407	if not ifm_ifm2_correct_order(ifm_shape, ifm2_shape):
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	408	# The scalar/broadcasted feature map has to be the ifm2 tensor so switch the ifms
				409	cmd.ifm_tensor, cmd.ifm2_tensor = cmd.ifm2_tensor, cmd.ifm_tensor
				410	cmd.ifm_box, cmd.ifm2_box = cmd.ifm2_box, cmd.ifm_box
Patrik Gustavsson	2349d42	2020-12-01 16:02:29 +0100	[diff] [blame]	411	ps.ifm_shapes[0], ps.ifm_shapes[1] = ps.ifm_shapes[1], ps.ifm_shapes[0]
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	412	npu_op.reversed_operands = True
Patrik Gustavsson	2349d42	2020-12-01 16:02:29 +0100	[diff] [blame]	413	npu_op.ifm2 = create_feature_map(cmd.ifm2_tensor, cmd.ifm2_box, arch, ps.ifm_shapes[1])
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	414	npu_op.ifm2.quantization = get_ifm_or_ifm2_quantization(ps, cmd.ifm2_tensor)
				415	if cmd.ifm2_tensor.shape == []:
				416	# scalar
				417	assert cmd.ifm2_tensor.quant_values.size == 1
				418	npu_op.ifm2_scalar = cmd.ifm2_tensor.values.item(0)
				419	npu_op.ifm2.shape = NpuShape3D(height=0, width=0, depth=0)
				420	else:
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	421	ifm2_blk = cmd.ifm2_box.get_block()
Patrik Gustavsson	3a26920	2021-01-21 08:28:55 +0100	[diff] [blame]	422	ifm2_width = ps.ifm_shapes[1].width
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	423	npu_op.ifm2.shape = NpuShape3D(height=ifm2_blk.height, width=ifm2_width, depth=ifm2_blk.depth)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	424	set_common_op_fields(npu_op, cmd, arch)
				425	# Check if output scale needs to be overridden
				426	output_scale = None
				427	if op.type == Op.Add and "resizebilinear" in op.attrs:
				428	# Force output scale same as the input scale for
				429	# resizebilinear 1x1 that is converted to add
				430	output_scale = npu_op.ifm2.quantization.scale_f32
Fredrik Svedberg	f2afd7f	2021-02-01 21:42:12 +0100	[diff] [blame]	431	if op.type == Op.Abs:
				432	output_scale = npu_op.ifm.quantization.scale_f32 / npu_op.ofm.quantization.scale_f32
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	433	if op.type == Op.LeakyRelu:
				434	output_scale = op.attrs["alpha"]
Fredrik Svedberg	e82be7c	2021-01-18 15:21:03 +0100	[diff] [blame]	435	if op.type == Op.RescaleAdd:
				436	assert op.rescale is not None, f"{op.type} must have rescale"
				437	npu_op.rescale = op.rescale
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	438	if op.type in (Op.Add, Op.Mul, Op.Sub):
				439	if op.activation is not None and op.activation.op_type in (Op.Sigmoid, Op.Tanh):
				440	output_scale = 1 / 0x3000
				441	if output_scale is not None:
				442	npu_op.ofm.quantization = NpuQuantization(scale_f32=output_scale, zero_point=npu_op.ofm.quantization.zero_point)
				443	return npu_op
				444
				445
				446	def create_dma_op(cmd: DMA, arch: ArchitectureFeatures) -> NpuDmaOperation:
				447	"""Converts the command to NpuDmaOperation"""
				448	src_region = get_region(cmd.in_tensor, arch)
				449	if cmd.out_tensor.purpose == TensorPurpose.LUT:
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	450	dest_region = BASE_PTR_INDEX_MEM2MEM
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	451	else:
				452	dest_region = get_region(cmd.out_tensor, arch)
				453
				454	start_coord = cmd.box.start_coord
				455	src_addr = cmd.in_tensor.address_for_coordinate(start_coord)
				456	dest_addr = cmd.out_tensor.address_for_coordinate(start_coord)
				457
				458	if cmd.in_tensor.compressed_values is not None:
				459	if cmd.out_tensor.purpose == TensorPurpose.FSBias:
				460	sz = cmd.in_tensor.storage_size()
				461	else:
				462	stream_index = cmd.in_tensor.compressed_stream_index_from_coord(start_coord)
				463	sz = cmd.in_tensor.size_of_compressed_stream(stream_index)
				464	else:
				465	sz = cmd.in_tensor.address_for_coordinate(cmd.box.end_coord, is_top_box=True) - src_addr
				466	src = NpuAddressRange(src_region, int(src_addr), int(sz))
				467	dest = NpuAddressRange(dest_region, int(dest_addr), int(sz))
				468	return NpuDmaOperation(src, dest)
				469
				470
				471	def convert_command_to_npu_op(cmd: Command, arch: ArchitectureFeatures) -> NpuOperation:
				472	"""Converts the high level command to NpuOperation"""
Dwight Lidman	9b43f84	2020-12-08 17:56:44 +0100	[diff] [blame]	473	npu_op: NpuOperation
				474	if isinstance(cmd, DMA):
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	475	npu_op = create_dma_op(cmd, arch)
Dwight Lidman	9b43f84	2020-12-08 17:56:44 +0100	[diff] [blame]	476	elif isinstance(cmd, NpuStripe):
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	477	npu_block_type = cmd.ps.primary_op.type.npu_block_type
				478	if npu_block_type in (NpuBlockType.ConvolutionMxN, NpuBlockType.VectorProduct):
				479	npu_op = create_npu_conv2d_op(cmd, arch)
				480	elif npu_block_type == NpuBlockType.ConvolutionDepthWise:
				481	npu_op = create_npu_conv_depthwise_op(cmd, arch)
				482	elif npu_block_type in (NpuBlockType.Pooling, NpuBlockType.ReduceSum):
				483	npu_op = create_npu_pool_op(cmd, arch)
				484	elif npu_block_type == NpuBlockType.ElementWise:
				485	npu_op = create_npu_elementwise_op(cmd, arch)
				486	else:
				487	assert 0, f"Unknown command type {npu_block_type}"
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	488	return npu_op
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	489
				490
				491	def generate_register_command_stream_for_sg(nng, sg, arch, verbose=False):
				492	"""Generates command stream for the subgraph, adds it to sg.register_command_stream"""
				493	# Convert high level command stream to list of NpuOperation
				494	npu_op_list = []
				495	npu_op_to_cmd = dict() # map from npu op to high level command
				496	for cmd in sg.high_level_command_stream:
Dwight Lidman	9b43f84	2020-12-08 17:56:44 +0100	[diff] [blame]	497	if isinstance(cmd, NpuStripe) and cmd.ps.npu_block_type == NpuBlockType.Default:
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	498	print("Warning: Skipping register command stream generation for", cmd.ps)
				499	else:
				500	npu_op = convert_command_to_npu_op(cmd, arch)
				501	npu_op_list.append(npu_op)
				502	npu_op_to_cmd[npu_op] = cmd
				503	# Generate register commands
erik.andersson@arm.com	ad45f79	2021-02-03 10:20:16 +0100	[diff] [blame^]	504	if len(sg.high_level_command_stream) > 0:
				505	stream_id = DebugDatabase.add_stream(sg)
				506	sg.generated_stream_id = stream_id
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	507
erik.andersson@arm.com	ad45f79	2021-02-03 10:20:16 +0100	[diff] [blame^]	508	def add_to_debug_db(npu_op: NpuOperation, offset: int):
				509	"""Adds info to the debug database"""
				510	if not isinstance(npu_op, NpuDmaOperation):
				511	cmd = npu_op_to_cmd[npu_op]
				512	DebugDatabase.add_command(stream_id, offset, cmd.ps.primary_op)
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	513
erik.andersson@arm.com	ad45f79	2021-02-03 10:20:16 +0100	[diff] [blame^]	514	sg.register_command_stream = generate_command_stream(npu_op_list, arch, verbose, add_to_debug_db, npu_op_to_cmd)