Blame - ethosu/vela/high_level_command_to_npu_op.py - ml/ethos-u/ethos-u-vela

blob: 6246b37e14c14250b608e4ef26731dc4c97cc3bb [file] [log] [blame]

erik.andersson@arm.com	ad45f79	2021-02-03 10:20:16 +0100	[diff] [blame]	1	# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved.
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	2	#
				3	# SPDX-License-Identifier: Apache-2.0
				4	#
				5	# Licensed under the Apache License, Version 2.0 (the License); you may
				6	# not use this file except in compliance with the License.
				7	# You may obtain a copy of the License at
				8	#
				9	# www.apache.org/licenses/LICENSE-2.0
				10	#
				11	# Unless required by applicable law or agreed to in writing, software
				12	# distributed under the License is distributed on an AS IS BASIS, WITHOUT
				13	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	# See the License for the specific language governing permissions and
				15	# limitations under the License.
				16	#
				17	# Description:
				18	# Conversion from high level command to NpuOperation
				19	from enum import IntEnum
Jonas Ohlsson	845e232	2022-03-01 12:39:55 +0100	[diff] [blame]	20	from typing import cast
Louis Verhaard	024c355	2021-03-17 14:26:34 +0100	[diff] [blame]	21	from typing import Dict
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	22	from typing import List
				23	from typing import Optional
Jonas Ohlsson	845e232	2022-03-01 12:39:55 +0100	[diff] [blame]	24	from typing import Tuple
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	25
				26	from .api import NpuActivation
				27	from .api import NpuActivationOp
				28	from .api import NpuAddressRange
				29	from .api import NpuBlockOperation
				30	from .api import NpuBlockTraversal
				31	from .api import NpuConv2DOperation
				32	from .api import NpuConvDepthWiseOperation
				33	from .api import NpuDataType
				34	from .api import NpuDmaOperation
				35	from .api import NpuElementWiseOp
				36	from .api import NpuElementWiseOperation
				37	from .api import NpuFeatureMap
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	38	from .api import NpuLayout
				39	from .api import NpuOperation
Rickard Bolin	9ae3455	2022-06-09 13:07:17 +0000	[diff] [blame]	40	from .api import NpuOperationType
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	41	from .api import NpuPadding
				42	from .api import NpuPoolingOp
				43	from .api import NpuPoolingOperation
				44	from .api import NpuQuantization
				45	from .api import NpuResamplingMode
				46	from .api import NpuRoundingMode
				47	from .api import NpuShape3D
				48	from .api import NpuTileBox
				49	from .architecture_features import ArchitectureFeatures
				50	from .data_type import DataType
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	51	from .debug_database import DebugDatabase
Michael McGeagh	7a6f843	2020-12-02 15:29:22 +0000	[diff] [blame]	52	from .errors import UnsupportedFeatureError
Tim Hall	3c5cfe9	2022-03-16 16:31:57 +0000	[diff] [blame]	53	from .ethos_u55_regs.ethos_u55_regs import resampling_mode
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	54	from .high_level_command_stream import Box
				55	from .high_level_command_stream import Command
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	56	from .high_level_command_stream import DMA
				57	from .high_level_command_stream import NpuStripe
Fredrik Svedberg	838df0a	2021-09-17 16:29:22 +0200	[diff] [blame]	58	from .numeric_util import quantise_float32
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	59	from .numeric_util import round_up
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	60	from .operation import NpuBlockType
				61	from .operation import Op
				62	from .operation import Operation
Rickard Bolin	9ae3455	2022-06-09 13:07:17 +0000	[diff] [blame]	63	from .operation import Padding
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	64	from .register_command_stream_generator import generate_command_stream
				65	from .register_command_stream_util import BASE_PTR_INDEX_MEM2MEM
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	66	from .register_command_stream_util import to_npu_kernel
				67	from .register_command_stream_util import UNARY_ELEMWISE_OPS
patrik.gustavsson	eeb8515	2020-12-21 17:10:40 +0000	[diff] [blame]	68	from .shape4d import Shape4D
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	69	from .tensor import MemType
				70	from .tensor import Tensor
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	71	from .tensor import TensorFormat
				72	from .tensor import TensorPurpose
Jonas Ohlsson	845e232	2022-03-01 12:39:55 +0100	[diff] [blame]	73	from .weight_compressor import NpuWeightTensor
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	74	from .weight_compressor import WeightKey
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	75
				76
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	77	class BasePointerIndex(IntEnum):
				78	WeightTensor = 0 # base address index for the Weight tensor
				79	ScratchTensor = 1 # base address index for the Scratch_tensor in the TensorArena
				80	ScratchFastTensor = 2 # base address for the Scratch_fast_tensor
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	81
				82
				83	dtype_map = {
				84	DataType.uint8: NpuDataType.UINT8,
				85	DataType.int8: NpuDataType.INT8,
				86	DataType.uint16: NpuDataType.UINT16,
				87	DataType.int16: NpuDataType.INT16,
				88	DataType.int32: NpuDataType.INT32,
				89	}
				90
				91
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	92	# Maps an elementwise op type to an elementwise_mode enum value used by NPU_OP_ELEMENTWISE
				93	elementwise_op_map = {
				94	Op.Mul: NpuElementWiseOp.MUL,
Patrik Gustavsson	b081d67	2021-08-25 13:49:25 +0200	[diff] [blame]	95	Op.RescaleMul: NpuElementWiseOp.MUL,
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	96	Op.Add: NpuElementWiseOp.ADD,
Fredrik Svedberg	e82be7c	2021-01-18 15:21:03 +0100	[diff] [blame]	97	Op.RescaleAdd: NpuElementWiseOp.ADD,
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	98	Op.Sub: NpuElementWiseOp.SUB,
				99	Op.Minimum: NpuElementWiseOp.MIN,
				100	Op.Maximum: NpuElementWiseOp.MAX,
				101	Op.LeakyRelu: NpuElementWiseOp.LRELU,
				102	Op.Abs: NpuElementWiseOp.ABS,
				103	Op.CLZ: NpuElementWiseOp.CLZ,
				104	Op.SHR: NpuElementWiseOp.SHR,
				105	Op.SHL: NpuElementWiseOp.SHL,
				106	}
				107
				108
Tim Hall	3c5cfe9	2022-03-16 16:31:57 +0000	[diff] [blame]	109	# inverse of the resampling_mode_map in the register command stream generator
				110	resampling_mode_inv_map = {
				111	resampling_mode.NONE: NpuResamplingMode.NONE,
				112	resampling_mode.NEAREST: NpuResamplingMode.NEAREST,
				113	resampling_mode.TRANSPOSE: NpuResamplingMode.TRANSPOSE,
				114	}
				115
				116
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	117	def ifm_ifm2_correct_order(ifm_shape: List[int], ifm2_shape: List[int]) -> bool:
				118	if ifm_shape == []:
				119	# Scalar needs to be in IFM2
				120	return False
				121	if ifm2_shape == []:
				122	return True
				123
				124	for ifm, ifm2 in zip(ifm_shape, ifm2_shape):
				125	if ifm != ifm2 and ifm == 1:
				126	# Broadcasted FM needs to be in IFM2
				127	return False
				128	return True
				129
				130
Patrik Gustavsson	b0ca274	2020-11-18 07:59:09 +0100	[diff] [blame]	131	def get_rounding_mode(op: Operation, fused_quantize: bool) -> NpuRoundingMode:
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	132	"""Specifies type of rounding to be used"""
				133	rounding_mode = NpuRoundingMode.TFL
Tim Hall	885033b	2022-07-21 11:46:03 +0100	[diff] [blame]	134	if op.type.is_resize_op():
Dwight Lidman	9d24393	2021-08-10 12:53:12 +0200	[diff] [blame]	135	rounding_mode = NpuRoundingMode.NATURAL
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	136	elif (
				137	op.type.npu_block_type in (NpuBlockType.ConvolutionMxN, NpuBlockType.ConvolutionDepthWise)
				138	and op.ifm.dtype == DataType.int16
				139	):
				140	rounding_mode = NpuRoundingMode.NATURAL
Patrik Gustavsson	b0ca274	2020-11-18 07:59:09 +0100	[diff] [blame]	141	elif (
				142	not fused_quantize
				143	and op.type.is_avgpool_op()
				144	and op.memory_function == Op.ConcatSliceWrite
				145	and op.kernel.elements_wh() == 1
				146	):
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	147	rounding_mode = NpuRoundingMode.NATURAL
Louis Verhaard	1a92f78	2021-02-09 16:08:26 +0100	[diff] [blame]	148	if op.rounding_mode is not None:
				149	rounding_mode = op.rounding_mode
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	150	return rounding_mode
				151
				152
Rickard Bolin	9ae3455	2022-06-09 13:07:17 +0000	[diff] [blame]	153	def create_padding(cmd: NpuStripe, primary_op: Operation, npu_op: NpuBlockOperation) -> NpuPadding:
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	154	if primary_op.type.npu_block_type == NpuBlockType.VectorProduct:
				155	return NpuPadding(top=0, left=0, bottom=0, right=0)
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	156	top, left, bottom, right = primary_op.attrs["explicit_padding"]
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	157
				158	# Check if this is for horizontal ifm streaming
				159	if not (cmd.is_first_h_stripe and cmd.is_last_h_stripe):
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	160	top = cmd.pad_top
				161	bottom = cmd.pad_bottom
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	162
Tim Hall	3751aa4	2021-12-16 13:17:29 +0000	[diff] [blame]	163	# the ifm box coordinate range depends upon whether the primary op was combined with a split slice read
				164	ifm_read_offset = primary_op.read_offsets[0]
				165	ifm_read_shape = primary_op.read_shapes[0]
				166	if ifm_read_offset is None or len(ifm_read_offset) < 2:
				167	box_start_coord_min = 0
				168	box_end_coord_max = cmd.ps.ifm_shapes[0].width
				169	else:
				170	box_start_coord_min = ifm_read_offset[-2]
				171	box_end_coord_max = ifm_read_shape[-2]
				172
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	173	# Indexing from end since a 1x1 Avgpool might have been added with non 4-dimensional input/output,
				174	# because of activation function needed to be fused.
Tim Hall	3751aa4	2021-12-16 13:17:29 +0000	[diff] [blame]	175	if len(cmd.ifm_box.start_coord) >= 2 and cmd.ifm_box.start_coord[-2] > box_start_coord_min:
				176	left = 0
				177	if len(cmd.ifm_box.end_coord) >= 2 and cmd.ifm_box.end_coord[-2] < box_end_coord_max:
				178	right = 0
Rickard Bolin	9ae3455	2022-06-09 13:07:17 +0000	[diff] [blame]	179
				180	# If tile padding is selected, modify the tile base addresses and set NpuPadding to zero.
				181	if primary_op.attrs.get("padding", None) == Padding.TILE:
				182	assert cmd.ifm_tensor.format == TensorFormat.NHCWB16, "Tensor format NHCWB16 required to perform tile padding"
				183	assert npu_op.op_type == NpuOperationType.ConvDepthWise, "Tile padding only supported for depthwise convolution"
				184	assert npu_op.ifm is not None, "Feature map must be initialized to modify the tile addresses"
				185	npu_op.ifm.tiles = modify_tile_addresses_for_padding(
				186	npu_op.ifm.tiles,
				187	primary_op.attrs.get("explicit_padding", None),
				188	channels=cmd.ps.ifm_shapes[0].depth,
				189	dtype=cmd.ifm_tensor.dtype,
				190	)
				191	top, left, bottom, right = 0, 0, 0, 0
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	192	return NpuPadding(top=top, left=left, bottom=bottom, right=right)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	193
				194
Rickard Bolin	9ae3455	2022-06-09 13:07:17 +0000	[diff] [blame]	195	def modify_tile_addresses_for_padding(
				196	tile_box: NpuTileBox, padding_direction: List[int], channels: int, dtype: DataType
				197	) -> NpuTileBox:
				198	# Addresses are 16-bytes aligned when using the NHCWB16 format, which is required to utilize tiling
				199	# Calculate the offset to top right, bottom left and bottom right element in the IFM (top left offset is 0)
				200	"""
				201	Example: 4x4x1 IFM
				202	\| a b c d \| <-- Offset to TR ('d') is (w0-1) = 3
				203	\| e f g h \|
				204	\| i j k l \|
				205	\| m n o p \| <-- Offset to BL ('m') is (w0(h0-1)) = 12 and to BR ('p') ((w0h0)-1) = 15
				206	"""
				207	h0, h1, w0, addresses = tile_box
				208	elem_size = 2 if dtype == DataType.int16 else 1
				209	tr_offset = (w0 - 1) * 16 * elem_size
				210	bl_offset = w0 * (h0 - 1) * 16 * (round_up(channels, 16) // 16) * elem_size
				211	br_offset = tr_offset + bl_offset
				212
				213	# Explicit padding order: (Top, Left, Bottom, Right)
				214	if padding_direction == (1, 1, 0, 0):
				215	# Pad top left corner
				216	"""
				217	\| a a b \|
				218	\| a b \| -> \| a a b \|
				219	\| c d \| \| c c d \|
				220	"""
				221	addresses = [addresses[0]] * 4
				222	h0, h1, w0 = 1, 1, 1
				223
				224	elif padding_direction == (1, 0, 0, 1):
				225	# Pad top right corner
				226	"""
				227	\| a b b \|
				228	\| a b \| -> \| a b b \|
				229	\| c d \| \| c d d \|
				230	"""
				231	addresses = [addresses[0], addresses[0] + tr_offset, addresses[0], addresses[0] + tr_offset]
				232	h0, h1, w0 = 1, 1, w0
				233
				234	elif padding_direction == (0, 1, 1, 0):
				235	# Pad bottom left corner
				236	"""
				237	\| a b \| \| a a b \|
				238	\| c d \| -> \| c c d \|
				239	\| c c d \|
				240	"""
				241	addresses = [addresses[0], addresses[0], addresses[0] + bl_offset, addresses[0] + bl_offset]
				242	h0, h1, w0 = h0, h1, 1
				243
				244	elif padding_direction == (0, 0, 1, 1):
				245	# Pad bottom right corner
				246	"""
				247	\| a b \| \| a b b \|
				248	\| c d \| -> \| c d d \|
				249	\| c d d \|
				250	"""
				251	addresses = [
				252	addresses[0],
				253	addresses[0] + tr_offset,
				254	addresses[0] + bl_offset,
				255	addresses[0] + br_offset,
				256	]
				257	# h0, h1, w0 = h0, h1, w0
				258	else:
				259	assert 0, "Invalid padding direction for tile padding"
				260
				261	return NpuTileBox(height_0=h0, height_1=h1, width_0=w0, addresses=[int(addr) for addr in addresses])
				262
				263
Louis Verhaard	024c355	2021-03-17 14:26:34 +0100	[diff] [blame]	264	def get_region(mem_type: MemType, arch: ArchitectureFeatures) -> int:
Tim Hall	1bd531d	2020-11-01 20:59:36 +0000	[diff] [blame]	265	base_ptr_idx_map = {
				266	MemType.Permanent_NPU: BasePointerIndex.WeightTensor,
				267	MemType.Permanent_CPU: BasePointerIndex.WeightTensor,
				268	MemType.Scratch: BasePointerIndex.ScratchTensor,
				269	}
				270
				271	if arch.is_spilling_enabled():
				272	base_ptr_idx_map[MemType.Scratch_fast] = BasePointerIndex.ScratchFastTensor
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	273	else:
Tim Hall	1bd531d	2020-11-01 20:59:36 +0000	[diff] [blame]	274	base_ptr_idx_map[MemType.Scratch_fast] = BasePointerIndex.ScratchTensor
				275
Louis Verhaard	024c355	2021-03-17 14:26:34 +0100	[diff] [blame]	276	return base_ptr_idx_map[mem_type].value
				277
				278
				279	def get_mem_limits_for_regions(arch: ArchitectureFeatures) -> Dict[int, int]:
				280	"""Returns map region -> max size of the region in bytes"""
				281	mem_limits = dict()
				282	for mem_type in MemType.all():
				283	mem_limits[get_region(mem_type, arch)] = arch.mem_type_size(mem_type)
				284	mem_limits[BASE_PTR_INDEX_MEM2MEM] = arch.shram_size_bytes
				285	return mem_limits
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	286
				287
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	288	def get_ifm_depth(npu_block_type: NpuBlockType, ifm_box: Box, ofm_box: Box) -> int:
				289	if npu_block_type in (NpuBlockType.ConvolutionMxN, NpuBlockType.VectorProduct, NpuBlockType.ReduceSum):
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	290	block = ifm_box.get_block()
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	291	else:
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	292	block = ofm_box.get_block()
				293	return block.depth
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	294
				295
				296	def use_zero_point_0(ps, tens: Tensor, is_ifm_tensor: bool) -> bool:
				297	"""Checks if quantization should use 0 as zero point"""
				298	if tens.dtype == DataType.int32 and is_ifm_tensor:
				299	return True
Tim Hall	885033b	2022-07-21 11:46:03 +0100	[diff] [blame]	300	if ps.primary_op.type not in (Op.AvgPool, Op.CLZ, Op.SHL) and not ps.primary_op.type.is_resize_op():
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	301	return False
Patrik Gustavsson	c74682c	2021-08-17 14:26:38 +0200	[diff] [blame]	302	if ps.primary_op.type == Op.AvgPool and ps.primary_op.explicit_scaling:
				303	return False
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	304	fused_quantize = any(op.type == Op.Quantize for op in ps.ops)
				305	forced_ofm_quantization = ps.primary_op.forced_output_quantization
				306	use_0 = (
Fredrik Svedberg	838df0a	2021-09-17 16:29:22 +0200	[diff] [blame]	307	(
				308	ps.primary_op.activation is None
				309	or forced_ofm_quantization is not None
Fredrik Svedberg	6f87be4	2021-10-07 10:54:20 +0200	[diff] [blame]	310	or (
				311	ps.primary_op.type.is_avgpool_op()
				312	and ps.primary_op.activation.op_type.is_relu_op()
				313	and not ps.primary_op.rescale
				314	)
Fredrik Svedberg	838df0a	2021-09-17 16:29:22 +0200	[diff] [blame]	315	)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	316	and (ps.primary_op.memory_function != Op.ConcatSliceWrite)
				317	and not fused_quantize
				318	)
				319	return use_0
				320
				321
				322	def get_ifm_or_ifm2_quantization(ps, tens: Tensor) -> Optional[NpuQuantization]:
				323	"""Gets quantization for IFM/IFM2"""
Dwight Lidman	4f728c0	2020-12-17 15:14:45 +0100	[diff] [blame]	324	op = ps.primary_op
				325	ifm_quant = op.forced_input_quantization if op.forced_input_quantization is not None else tens.quantization
				326	if ifm_quant is None:
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	327	return None
				328	if use_zero_point_0(ps, tens, True):
				329	zero_point = 0
				330	else:
Dwight Lidman	4f728c0	2020-12-17 15:14:45 +0100	[diff] [blame]	331	zero_point = int(ifm_quant.zero_point)
				332	return NpuQuantization(scale_f32=ifm_quant.scale_f32, zero_point=zero_point)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	333
				334
				335	def get_ofm_quantization(ps, tens: Tensor) -> Optional[NpuQuantization]:
				336	"""Gets quantization for OFM"""
				337	op = ps.primary_op
				338	# Check if operation's output quantization is should be used instead of the output tensor's quantization
				339	# (used in LUTs)
				340	ofm_quant = op.forced_output_quantization if op.forced_output_quantization is not None else tens.quantization
				341	if ofm_quant is None:
				342	return None
				343	if use_zero_point_0(ps, tens, False):
				344	zero_point = 0
				345	else:
				346	zero_point = int(ofm_quant.zero_point)
				347	return NpuQuantization(scale_f32=ofm_quant.scale_f32, zero_point=zero_point)
				348
				349
Rickard Bolin	17e53b5	2022-09-06 16:09:01 +0000	[diff] [blame^]	350	def create_feature_map(
				351	tens: Tensor,
				352	box: Box,
				353	arch: ArchitectureFeatures,
				354	op_shape4D: Shape4D,
				355	stride_multiplier: Optional[List[int]] = None,
				356	) -> NpuFeatureMap:
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	357	"""Creates feature map with common fields populated"""
				358	fm = NpuFeatureMap()
Louis Verhaard	024c355	2021-03-17 14:26:34 +0100	[diff] [blame]	359	fm.region = get_region(tens.mem_type, arch)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	360	fm.data_type = dtype_map[tens.dtype]
				361	if tens.format == TensorFormat.NHWC:
				362	fm.layout = NpuLayout.NHWC
				363	elif tens.format == TensorFormat.NHCWB16:
				364	fm.layout = NpuLayout.NHCWB16
				365	else:
				366	assert 0, "Incorrect tensor format"
Rickard Bolin	17e53b5	2022-09-06 16:09:01 +0000	[diff] [blame^]	367
				368	strides = tens.get_strides(op_shape4D)
				369	assert strides is not None
				370
				371	if stride_multiplier and stride_multiplier != [1, 1, 1]:
				372	assert (
				373	tens.format == TensorFormat.NHWC
				374	), "Only default stride multiplier ([1, 1, 1]) supported for NHCWB16 format"
				375	# Multiply strides for C/H/W (in that order) with corresponding stride factor
				376	for i, stride_factor in enumerate(stride_multiplier, start=1):
				377	strides[i] *= stride_factor
				378
Patrik Gustavsson	3a26920	2021-01-21 08:28:55 +0100	[diff] [blame]	379	height_0, height_1, width_0, addresses = tens.addresses_for_rolling_buffer(
Rickard Bolin	17e53b5	2022-09-06 16:09:01 +0000	[diff] [blame^]	380	box.start_coord, box.end_coord, strides, op_shape4D
Patrik Gustavsson	3a26920	2021-01-21 08:28:55 +0100	[diff] [blame]	381	)
Rickard Bolin	17e53b5	2022-09-06 16:09:01 +0000	[diff] [blame^]	382
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	383	fm.tiles = NpuTileBox(
				384	height_0=height_0, height_1=height_1, width_0=width_0, addresses=[int(addr) for addr in addresses]
				385	)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	386	fm.strides = NpuShape3D(height=int(strides[2]), width=int(strides[3]), depth=int(strides[1]))
Tim Hall	68df8a1	2022-03-16 16:51:16 +0000	[diff] [blame]	387	fm.name = tens.name
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	388	return fm
				389
				390
Tim Hall	d784af7	2021-06-08 21:25:57 +0100	[diff] [blame]	391	def create_weights(
Jonas Ohlsson	845e232	2022-03-01 12:39:55 +0100	[diff] [blame]	392	weight_tensor: NpuWeightTensor, weight_box: Box, scale_tensor: NpuWeightTensor, arch: ArchitectureFeatures
				393	) -> Tuple[List[NpuAddressRange], List[NpuAddressRange]]:
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	394	"""Returns address ranges for weights and scales"""
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	395	weights = []
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	396	biases = []
Tim Hall	d784af7	2021-06-08 21:25:57 +0100	[diff] [blame]	397	shared_region = get_region(weight_tensor.mem_type, arch)
Jonas Ohlsson	845e232	2022-03-01 12:39:55 +0100	[diff] [blame]	398	scale_region = get_region(scale_tensor.mem_type, arch) if scale_tensor else 0
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	399
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	400	w_tensor_src = weight_tensor
				401	if weight_tensor.src_tensor:
Jonas Ohlsson	845e232	2022-03-01 12:39:55 +0100	[diff] [blame]	402	w_tensor_src = cast(NpuWeightTensor, weight_tensor.src_tensor)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	403
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	404	core_offset = 0
				405	for core in range(0, arch.ncores):
				406	# Get weight range per core
				407	key = WeightKey(core, weight_box.start_coord[-1])
				408	if key in w_tensor_src.encoded_ranges:
				409	weight_range = w_tensor_src.encoded_ranges[key]
Rickard Bolin	fd8b500	2022-05-16 09:11:06 +0000	[diff] [blame]	410	if weight_tensor == w_tensor_src:
				411	# Straight from source tensor
				412	address = weight_tensor.address + weight_range.offset
Tim Hall	b5df773	2022-05-04 16:20:43 +0100	[diff] [blame]	413	else:
Rickard Bolin	fd8b500	2022-05-16 09:11:06 +0000	[diff] [blame]	414	# Weight buffered tensor
				415	address = weight_tensor.address + core_offset
				416	core_offset += round_up(weight_range.total_bytes, 16)
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	417
				418	# Location of weights in tensor
				419	addr_range = NpuAddressRange(
Tim Hall	d784af7	2021-06-08 21:25:57 +0100	[diff] [blame]	420	shared_region, int(address + weight_range.weight_offset), round_up(int(weight_range.weight_bytes), 16)
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	421	)
				422	weights.append(addr_range)
Tim Hall	d784af7	2021-06-08 21:25:57 +0100	[diff] [blame]	423
				424	# Location of standalone scales or combined weights tensor scales
				425	if scale_tensor:
				426	assert scale_tensor.src_tensor is None # Must be standalone
				427	scale_range = scale_tensor.encoded_ranges[key]
				428	address = scale_tensor.address + scale_range.offset
				429	addr_range = NpuAddressRange(scale_region, int(address), round_up(int(scale_range.scale_bytes), 16))
				430	else:
				431	addr_range = NpuAddressRange(shared_region, int(address), round_up(int(weight_range.scale_bytes), 16))
				432
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	433	biases.append(addr_range)
				434
				435	return weights, biases
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	436
				437
				438	def create_npu_activation(op: Operation) -> NpuActivation:
				439	"""Creates fused activation function"""
				440	if op.activation is None:
				441	return NpuActivation(NpuActivationOp.NONE_OR_RELU)
				442	faf = op.activation.op_type
				443	act_op = NpuActivationOp.NONE_OR_RELU
				444	if faf == Op.Tanh:
				445	act_op = NpuActivationOp.TANH
				446	elif faf == Op.Sigmoid:
				447	act_op = NpuActivationOp.SIGMOID
				448	elif faf == Op.LUT:
				449	act_op = NpuActivationOp.TABLE_LOOKUP
				450	elif not faf.is_relu_op():
Michael McGeagh	7a6f843	2020-12-02 15:29:22 +0000	[diff] [blame]	451	raise UnsupportedFeatureError(f"Unsupported fused_activation_function: {faf.name}")
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	452
				453	act = NpuActivation(act_op)
				454	act.min = op.activation.min
				455	act.max = op.activation.max
Fredrik Svedberg	6f87be4	2021-10-07 10:54:20 +0200	[diff] [blame]	456	if act_op is NpuActivationOp.NONE_OR_RELU and op.type.is_avgpool_op() and not op.rescale:
Fredrik Svedberg	838df0a	2021-09-17 16:29:22 +0200	[diff] [blame]	457	quant = op.ofm.quantization
				458	if quant and quant.zero_point: # Zero point is not 0
				459	scale_f32 = 1 if quant.scale_f32 is None else quant.scale_f32
				460	zero_point = quant.zero_point
				461	if act.min is not None:
				462	act.min = scale_f32 * quantise_float32(act.min, scale_f32, zero_point)
				463	if act.max is not None:
				464	act.max = scale_f32 * quantise_float32(act.max, scale_f32, zero_point)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	465	act.lookup_table_index = op.activation.lut_index
				466	return act
				467
				468
				469	def set_common_op_fields(npu_op: NpuBlockOperation, cmd: NpuStripe, arch: ArchitectureFeatures):
				470	"""Sets common fields of the given operation"""
				471	ps = cmd.ps
				472	op = ps.primary_op
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	473
				474	ifm_height = cmd.ifm_box.get_block().height
Patrik Gustavsson	3a26920	2021-01-21 08:28:55 +0100	[diff] [blame]	475	ifm_width = cmd.ps.ifm_shapes[0].width
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	476	ifm_depth = get_ifm_depth(op.type.npu_block_type, cmd.ifm_box, cmd.ofm_box)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	477
Patrik Gustavsson	2349d42	2020-12-01 16:02:29 +0100	[diff] [blame]	478	npu_op.ifm = create_feature_map(cmd.ifm_tensor, cmd.ifm_box, arch, ps.ifm_shapes[0])
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	479	npu_op.ifm.shape = NpuShape3D(height=ifm_height, width=ifm_width, depth=ifm_depth)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	480	npu_op.ifm.quantization = get_ifm_or_ifm2_quantization(ps, cmd.ifm_tensor)
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	481
				482	out_block = cmd.ofm_box.get_block()
Rickard Bolin	17e53b5	2022-09-06 16:09:01 +0000	[diff] [blame^]	483	npu_op.ofm = create_feature_map(cmd.ofm_tensor, cmd.ofm_box, arch, ps.ofm_shapes[0], op.ofm_stride_multiplier)
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	484	npu_op.ofm.shape = NpuShape3D(height=out_block.height, width=out_block.width, depth=out_block.depth)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	485	npu_op.ofm.quantization = get_ofm_quantization(ps, cmd.ofm_tensor)
				486
				487	if cmd.weight_tensor is not None:
Tim Hall	d784af7	2021-06-08 21:25:57 +0100	[diff] [blame]	488	npu_op.weights, npu_op.biases = create_weights(cmd.weight_tensor, cmd.weight_box, cmd.scale_tensor, arch)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	489	npu_op.activation = create_npu_activation(op)
Patrik Gustavsson	b0ca274	2020-11-18 07:59:09 +0100	[diff] [blame]	490	npu_op.fused_quantize = any(op.type == Op.Quantize for op in ps.ops)
				491	npu_op.rounding_mode = get_rounding_mode(op, npu_op.fused_quantize)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	492	npu_op.block_config = NpuShape3D(height=ps.block_config[0], width=ps.block_config[1], depth=ps.block_config[3])
				493
				494	if not op.type.is_elementwise_op():
Rickard Bolin	9ae3455	2022-06-09 13:07:17 +0000	[diff] [blame]	495	npu_op.padding = create_padding(cmd, op, npu_op)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	496	npu_op.kernel = to_npu_kernel(op.kernel)
Tim Hall	3c5cfe9	2022-03-16 16:31:57 +0000	[diff] [blame]	497	npu_op.ifm_upscale = resampling_mode_inv_map[op.ifm_resampling_mode]
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	498	return npu_op
				499
				500
				501	def create_npu_conv2d_op(cmd: NpuStripe, arch: ArchitectureFeatures) -> NpuConv2DOperation:
				502	"""Converts the command to NpuConv2DOperation"""
				503	npu_op = NpuConv2DOperation()
				504	set_common_op_fields(npu_op, cmd, arch)
				505	if cmd.ps.primary_op.type.npu_block_type == NpuBlockType.VectorProduct:
				506	npu_op.block_traversal = NpuBlockTraversal.DEPTH_FIRST
				507	else:
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	508	if cmd.weight_tensor.src_tensor:
				509	npu_op.block_traversal = cmd.weight_tensor.src_tensor.hw_traversal
				510	else:
				511	npu_op.block_traversal = cmd.weight_tensor.hw_traversal
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	512	return npu_op
				513
				514
				515	def create_npu_conv_depthwise_op(cmd: NpuStripe, arch: ArchitectureFeatures) -> NpuConvDepthWiseOperation:
				516	"""Converts the command to NpuConvDepthWiseOperation"""
				517	npu_op = NpuConvDepthWiseOperation()
				518	set_common_op_fields(npu_op, cmd, arch)
				519	return npu_op
				520
				521
				522	def create_npu_pool_op(cmd: NpuStripe, arch: ArchitectureFeatures) -> NpuPoolingOperation:
				523	"""Converts the command to NpuPoolingOperation"""
				524	ps = cmd.ps
				525	op = ps.primary_op
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	526	if op.type.is_maxpool_op():
				527	pool_op = NpuPoolingOp.MAX
Tim Hall	885033b	2022-07-21 11:46:03 +0100	[diff] [blame]	528	elif op.type.is_avgpool_op() or op.type.is_resize_op():
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	529	pool_op = NpuPoolingOp.AVERAGE
				530	elif op.type == Op.ReduceSum:
				531	pool_op = NpuPoolingOp.REDUCE_SUM
				532	else:
				533	assert 0, f"Unknown pool type {op.type}"
				534	npu_op = NpuPoolingOperation(pool_op)
				535	set_common_op_fields(npu_op, cmd, arch)
				536	# Pooling specific info
Dwight Lidman	4f728c0	2020-12-17 15:14:45 +0100	[diff] [blame]	537	npu_op.rescale = op.rescale
Patrik Gustavsson	c74682c	2021-08-17 14:26:38 +0200	[diff] [blame]	538	if op.explicit_scaling:
				539	# Note: reuse of rescale for explicit scaling to not expose this in the external API
				540	assert npu_op.rescale is None
				541	npu_op.rescale = op.explicit_scaling
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	542	return npu_op
				543
				544
				545	def create_npu_elementwise_op(cmd: NpuStripe, arch: ArchitectureFeatures) -> NpuElementWiseOperation:
				546	"""Converts the command to NpuElementWiseOperation"""
				547	ps = cmd.ps
				548	op = ps.primary_op
				549	assert op.type in elementwise_op_map, f"Unknown elementwise type {op.type}"
				550	elemwise_op = elementwise_op_map[op.type]
				551	npu_op = NpuElementWiseOperation(elemwise_op)
Patrik Gustavsson	2349d42	2020-12-01 16:02:29 +0100	[diff] [blame]	552
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	553	if elemwise_op not in UNARY_ELEMWISE_OPS:
Patrik Gustavsson	3a26920	2021-01-21 08:28:55 +0100	[diff] [blame]	554	ifm_shape = [] if cmd.ifm_tensor.shape == [] else ps.ifm_shapes[0].as_list()
				555	ifm2_shape = [] if cmd.ifm2_tensor.shape == [] else ps.ifm_shapes[1].as_list()
				556	if not ifm_ifm2_correct_order(ifm_shape, ifm2_shape):
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	557	# The scalar/broadcasted feature map has to be the ifm2 tensor so switch the ifms
				558	cmd.ifm_tensor, cmd.ifm2_tensor = cmd.ifm2_tensor, cmd.ifm_tensor
				559	cmd.ifm_box, cmd.ifm2_box = cmd.ifm2_box, cmd.ifm_box
Patrik Gustavsson	2349d42	2020-12-01 16:02:29 +0100	[diff] [blame]	560	ps.ifm_shapes[0], ps.ifm_shapes[1] = ps.ifm_shapes[1], ps.ifm_shapes[0]
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	561	npu_op.reversed_operands = True
Patrik Gustavsson	2349d42	2020-12-01 16:02:29 +0100	[diff] [blame]	562	npu_op.ifm2 = create_feature_map(cmd.ifm2_tensor, cmd.ifm2_box, arch, ps.ifm_shapes[1])
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	563	npu_op.ifm2.quantization = get_ifm_or_ifm2_quantization(ps, cmd.ifm2_tensor)
				564	if cmd.ifm2_tensor.shape == []:
				565	# scalar
James Peet	7519d50	2021-07-19 16:47:58 +0100	[diff] [blame]	566	npu_op.ifm2_scalar = cmd.ifm2_tensor.get_scalar()
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	567	npu_op.ifm2.shape = NpuShape3D(height=0, width=0, depth=0)
				568	else:
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	569	ifm2_blk = cmd.ifm2_box.get_block()
Patrik Gustavsson	3a26920	2021-01-21 08:28:55 +0100	[diff] [blame]	570	ifm2_width = ps.ifm_shapes[1].width
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	571	npu_op.ifm2.shape = NpuShape3D(height=ifm2_blk.height, width=ifm2_width, depth=ifm2_blk.depth)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	572	set_common_op_fields(npu_op, cmd, arch)
				573	# Check if output scale needs to be overridden
				574	output_scale = None
Tim Hall	885033b	2022-07-21 11:46:03 +0100	[diff] [blame]	575	if op.type == Op.Add and op.original_type.is_resize_op():
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	576	# Force output scale same as the input scale for
Tim Hall	885033b	2022-07-21 11:46:03 +0100	[diff] [blame]	577	# resizebilinear/nearestneighbor 1x1 that is converted to add
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	578	output_scale = npu_op.ifm2.quantization.scale_f32
Tim Hall	885033b	2022-07-21 11:46:03 +0100	[diff] [blame]	579	elif op.type == Op.Abs:
Fredrik Svedberg	f2afd7f	2021-02-01 21:42:12 +0100	[diff] [blame]	580	output_scale = npu_op.ifm.quantization.scale_f32 / npu_op.ofm.quantization.scale_f32
Tim Hall	885033b	2022-07-21 11:46:03 +0100	[diff] [blame]	581	elif op.type == Op.LeakyRelu:
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	582	output_scale = op.attrs["alpha"]
Tim Hall	885033b	2022-07-21 11:46:03 +0100	[diff] [blame]	583	elif op.type in (Op.RescaleAdd, Op.RescaleMul):
Fredrik Svedberg	e82be7c	2021-01-18 15:21:03 +0100	[diff] [blame]	584	assert op.rescale is not None, f"{op.type} must have rescale"
				585	npu_op.rescale = op.rescale
Tim Hall	885033b	2022-07-21 11:46:03 +0100	[diff] [blame]	586	elif op.type in (Op.Add, Op.Mul, Op.Sub):
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	587	if op.activation is not None and op.activation.op_type in (Op.Sigmoid, Op.Tanh):
				588	output_scale = 1 / 0x3000
				589	if output_scale is not None:
				590	npu_op.ofm.quantization = NpuQuantization(scale_f32=output_scale, zero_point=npu_op.ofm.quantization.zero_point)
				591	return npu_op
				592
				593
				594	def create_dma_op(cmd: DMA, arch: ArchitectureFeatures) -> NpuDmaOperation:
				595	"""Converts the command to NpuDmaOperation"""
Louis Verhaard	024c355	2021-03-17 14:26:34 +0100	[diff] [blame]	596	src_region = get_region(cmd.in_tensor.mem_type, arch)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	597	if cmd.out_tensor.purpose == TensorPurpose.LUT:
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	598	dest_region = BASE_PTR_INDEX_MEM2MEM
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	599	else:
Louis Verhaard	024c355	2021-03-17 14:26:34 +0100	[diff] [blame]	600	dest_region = get_region(cmd.out_tensor.mem_type, arch)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	601
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	602	if cmd.in_tensor.purpose == TensorPurpose.Weights:
				603	# Get weight range per core
				604	sz = 0
				605	for core in range(0, arch.ncores):
				606	key = WeightKey(core, cmd.box.start_coord[-1])
				607	if key in cmd.in_tensor.encoded_ranges:
				608	weight_range = cmd.in_tensor.encoded_ranges[key]
				609	sz += round_up(weight_range.total_bytes, 16)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	610
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	611	if core == 0:
				612	weight_range = cmd.in_tensor.encoded_ranges[key]
				613	src_addr = cmd.in_tensor.address + weight_range.offset
Rickard Bolin	fd8b500	2022-05-16 09:11:06 +0000	[diff] [blame]	614	dest_addr = cmd.out_tensor.address
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	615	else:
Rickard Bolin	17e53b5	2022-09-06 16:09:01 +0000	[diff] [blame^]	616	src_addr = cmd.in_tensor.address_for_coordinate(cmd.box.start_coord)
				617	dest_addr = cmd.out_tensor.address_for_coordinate(cmd.box.start_coord)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	618	sz = cmd.in_tensor.address_for_coordinate(cmd.box.end_coord, is_top_box=True) - src_addr
				619	src = NpuAddressRange(src_region, int(src_addr), int(sz))
				620	dest = NpuAddressRange(dest_region, int(dest_addr), int(sz))
				621	return NpuDmaOperation(src, dest)
				622
				623
				624	def convert_command_to_npu_op(cmd: Command, arch: ArchitectureFeatures) -> NpuOperation:
				625	"""Converts the high level command to NpuOperation"""
Dwight Lidman	9b43f84	2020-12-08 17:56:44 +0100	[diff] [blame]	626	npu_op: NpuOperation
				627	if isinstance(cmd, DMA):
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	628	npu_op = create_dma_op(cmd, arch)
Tim Hall	68df8a1	2022-03-16 16:51:16 +0000	[diff] [blame]	629	npu_op.name = cmd.out_tensor.name
Dwight Lidman	9b43f84	2020-12-08 17:56:44 +0100	[diff] [blame]	630	elif isinstance(cmd, NpuStripe):
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	631	npu_block_type = cmd.ps.primary_op.type.npu_block_type
				632	if npu_block_type in (NpuBlockType.ConvolutionMxN, NpuBlockType.VectorProduct):
				633	npu_op = create_npu_conv2d_op(cmd, arch)
				634	elif npu_block_type == NpuBlockType.ConvolutionDepthWise:
				635	npu_op = create_npu_conv_depthwise_op(cmd, arch)
				636	elif npu_block_type in (NpuBlockType.Pooling, NpuBlockType.ReduceSum):
				637	npu_op = create_npu_pool_op(cmd, arch)
				638	elif npu_block_type == NpuBlockType.ElementWise:
				639	npu_op = create_npu_elementwise_op(cmd, arch)
				640	else:
				641	assert 0, f"Unknown command type {npu_block_type}"
Tim Hall	68df8a1	2022-03-16 16:51:16 +0000	[diff] [blame]	642	npu_op.name = cmd.ps.primary_op.name
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	643	return npu_op
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	644
				645
				646	def generate_register_command_stream_for_sg(nng, sg, arch, verbose=False):
				647	"""Generates command stream for the subgraph, adds it to sg.register_command_stream"""
				648	# Convert high level command stream to list of NpuOperation
				649	npu_op_list = []
				650	npu_op_to_cmd = dict() # map from npu op to high level command
				651	for cmd in sg.high_level_command_stream:
Dwight Lidman	9b43f84	2020-12-08 17:56:44 +0100	[diff] [blame]	652	if isinstance(cmd, NpuStripe) and cmd.ps.npu_block_type == NpuBlockType.Default:
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	653	print("Warning: Skipping register command stream generation for", cmd.ps)
				654	else:
				655	npu_op = convert_command_to_npu_op(cmd, arch)
				656	npu_op_list.append(npu_op)
				657	npu_op_to_cmd[npu_op] = cmd
Louis Verhaard	024c355	2021-03-17 14:26:34 +0100	[diff] [blame]	658	mem_limits = get_mem_limits_for_regions(arch)
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	659	# Generate register commands
erik.andersson@arm.com	ad45f79	2021-02-03 10:20:16 +0100	[diff] [blame]	660	if len(sg.high_level_command_stream) > 0:
				661	stream_id = DebugDatabase.add_stream(sg)
				662	sg.generated_stream_id = stream_id
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	663
erik.andersson@arm.com	ad45f79	2021-02-03 10:20:16 +0100	[diff] [blame]	664	def add_to_debug_db(npu_op: NpuOperation, offset: int):
				665	"""Adds info to the debug database"""
				666	if not isinstance(npu_op, NpuDmaOperation):
				667	cmd = npu_op_to_cmd[npu_op]
				668	DebugDatabase.add_command(stream_id, offset, cmd.ps.primary_op)
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	669
Louis Verhaard	024c355	2021-03-17 14:26:34 +0100	[diff] [blame]	670	sg.register_command_stream = generate_command_stream(
				671	npu_op_list, arch, verbose, mem_limits, add_to_debug_db, npu_op_to_cmd
				672	)