Blame - ethosu/vela/high_level_command_to_npu_op.py - ml/ethos-u/ethos-u-vela

blob: 7923e3717446831e769e3c37a05fef4b2d2c868c [file] [log] [blame]

Rickard Bolin	fea1516	2022-07-04 16:19:16 +0000	[diff] [blame^]	1	# Copyright (C) 2020-2022 Arm Limited or its affiliates. All rights reserved.
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	2	#
				3	# SPDX-License-Identifier: Apache-2.0
				4	#
				5	# Licensed under the Apache License, Version 2.0 (the License); you may
				6	# not use this file except in compliance with the License.
				7	# You may obtain a copy of the License at
				8	#
				9	# www.apache.org/licenses/LICENSE-2.0
				10	#
				11	# Unless required by applicable law or agreed to in writing, software
				12	# distributed under the License is distributed on an AS IS BASIS, WITHOUT
				13	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	# See the License for the specific language governing permissions and
				15	# limitations under the License.
				16	#
				17	# Description:
				18	# Conversion from high level command to NpuOperation
				19	from enum import IntEnum
Jonas Ohlsson	845e232	2022-03-01 12:39:55 +0100	[diff] [blame]	20	from typing import cast
Louis Verhaard	024c355	2021-03-17 14:26:34 +0100	[diff] [blame]	21	from typing import Dict
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	22	from typing import List
				23	from typing import Optional
Jonas Ohlsson	845e232	2022-03-01 12:39:55 +0100	[diff] [blame]	24	from typing import Tuple
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	25
				26	from .api import NpuActivation
				27	from .api import NpuActivationOp
				28	from .api import NpuAddressRange
				29	from .api import NpuBlockOperation
				30	from .api import NpuBlockTraversal
				31	from .api import NpuConv2DOperation
				32	from .api import NpuConvDepthWiseOperation
				33	from .api import NpuDataType
				34	from .api import NpuDmaOperation
				35	from .api import NpuElementWiseOp
				36	from .api import NpuElementWiseOperation
				37	from .api import NpuFeatureMap
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	38	from .api import NpuLayout
				39	from .api import NpuOperation
Rickard Bolin	9ae3455	2022-06-09 13:07:17 +0000	[diff] [blame]	40	from .api import NpuOperationType
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	41	from .api import NpuPadding
				42	from .api import NpuPoolingOp
				43	from .api import NpuPoolingOperation
				44	from .api import NpuQuantization
				45	from .api import NpuResamplingMode
				46	from .api import NpuRoundingMode
				47	from .api import NpuShape3D
				48	from .api import NpuTileBox
				49	from .architecture_features import ArchitectureFeatures
				50	from .data_type import DataType
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	51	from .debug_database import DebugDatabase
Michael McGeagh	7a6f843	2020-12-02 15:29:22 +0000	[diff] [blame]	52	from .errors import UnsupportedFeatureError
Tim Hall	3c5cfe9	2022-03-16 16:31:57 +0000	[diff] [blame]	53	from .ethos_u55_regs.ethos_u55_regs import resampling_mode
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	54	from .high_level_command_stream import Box
				55	from .high_level_command_stream import Command
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	56	from .high_level_command_stream import DMA
				57	from .high_level_command_stream import NpuStripe
Fredrik Svedberg	838df0a	2021-09-17 16:29:22 +0200	[diff] [blame]	58	from .numeric_util import quantise_float32
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	59	from .numeric_util import round_up
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	60	from .operation import NpuBlockType
				61	from .operation import Op
				62	from .operation import Operation
Rickard Bolin	9ae3455	2022-06-09 13:07:17 +0000	[diff] [blame]	63	from .operation import Padding
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	64	from .register_command_stream_generator import generate_command_stream
				65	from .register_command_stream_util import BASE_PTR_INDEX_MEM2MEM
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	66	from .register_command_stream_util import to_npu_kernel
				67	from .register_command_stream_util import UNARY_ELEMWISE_OPS
patrik.gustavsson	eeb8515	2020-12-21 17:10:40 +0000	[diff] [blame]	68	from .shape4d import Shape4D
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	69	from .tensor import MemType
				70	from .tensor import Tensor
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	71	from .tensor import TensorFormat
				72	from .tensor import TensorPurpose
Jonas Ohlsson	845e232	2022-03-01 12:39:55 +0100	[diff] [blame]	73	from .weight_compressor import NpuWeightTensor
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	74	from .weight_compressor import WeightKey
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	75
				76
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	77	class BasePointerIndex(IntEnum):
				78	WeightTensor = 0 # base address index for the Weight tensor
				79	ScratchTensor = 1 # base address index for the Scratch_tensor in the TensorArena
				80	ScratchFastTensor = 2 # base address for the Scratch_fast_tensor
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	81
				82
				83	dtype_map = {
				84	DataType.uint8: NpuDataType.UINT8,
				85	DataType.int8: NpuDataType.INT8,
				86	DataType.uint16: NpuDataType.UINT16,
				87	DataType.int16: NpuDataType.INT16,
				88	DataType.int32: NpuDataType.INT32,
				89	}
				90
				91
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	92	# Maps an elementwise op type to an elementwise_mode enum value used by NPU_OP_ELEMENTWISE
				93	elementwise_op_map = {
				94	Op.Mul: NpuElementWiseOp.MUL,
Patrik Gustavsson	b081d67	2021-08-25 13:49:25 +0200	[diff] [blame]	95	Op.RescaleMul: NpuElementWiseOp.MUL,
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	96	Op.Add: NpuElementWiseOp.ADD,
Fredrik Svedberg	e82be7c	2021-01-18 15:21:03 +0100	[diff] [blame]	97	Op.RescaleAdd: NpuElementWiseOp.ADD,
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	98	Op.Sub: NpuElementWiseOp.SUB,
				99	Op.Minimum: NpuElementWiseOp.MIN,
				100	Op.Maximum: NpuElementWiseOp.MAX,
				101	Op.LeakyRelu: NpuElementWiseOp.LRELU,
				102	Op.Abs: NpuElementWiseOp.ABS,
				103	Op.CLZ: NpuElementWiseOp.CLZ,
				104	Op.SHR: NpuElementWiseOp.SHR,
				105	Op.SHL: NpuElementWiseOp.SHL,
				106	}
				107
				108
Tim Hall	3c5cfe9	2022-03-16 16:31:57 +0000	[diff] [blame]	109	# inverse of the resampling_mode_map in the register command stream generator
				110	resampling_mode_inv_map = {
				111	resampling_mode.NONE: NpuResamplingMode.NONE,
				112	resampling_mode.NEAREST: NpuResamplingMode.NEAREST,
				113	resampling_mode.TRANSPOSE: NpuResamplingMode.TRANSPOSE,
				114	}
				115
				116
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	117	def ifm_ifm2_correct_order(ifm_shape: List[int], ifm2_shape: List[int]) -> bool:
				118	if ifm_shape == []:
				119	# Scalar needs to be in IFM2
				120	return False
				121	if ifm2_shape == []:
				122	return True
				123
				124	for ifm, ifm2 in zip(ifm_shape, ifm2_shape):
				125	if ifm != ifm2 and ifm == 1:
				126	# Broadcasted FM needs to be in IFM2
				127	return False
				128	return True
				129
				130
Patrik Gustavsson	b0ca274	2020-11-18 07:59:09 +0100	[diff] [blame]	131	def get_rounding_mode(op: Operation, fused_quantize: bool) -> NpuRoundingMode:
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	132	"""Specifies type of rounding to be used"""
				133	rounding_mode = NpuRoundingMode.TFL
Tim Hall	885033b	2022-07-21 11:46:03 +0100	[diff] [blame]	134	if op.type.is_resize_op():
Dwight Lidman	9d24393	2021-08-10 12:53:12 +0200	[diff] [blame]	135	rounding_mode = NpuRoundingMode.NATURAL
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	136	elif (
				137	op.type.npu_block_type in (NpuBlockType.ConvolutionMxN, NpuBlockType.ConvolutionDepthWise)
				138	and op.ifm.dtype == DataType.int16
				139	):
				140	rounding_mode = NpuRoundingMode.NATURAL
Patrik Gustavsson	b0ca274	2020-11-18 07:59:09 +0100	[diff] [blame]	141	elif (
				142	not fused_quantize
				143	and op.type.is_avgpool_op()
				144	and op.memory_function == Op.ConcatSliceWrite
				145	and op.kernel.elements_wh() == 1
				146	):
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	147	rounding_mode = NpuRoundingMode.NATURAL
Louis Verhaard	1a92f78	2021-02-09 16:08:26 +0100	[diff] [blame]	148	if op.rounding_mode is not None:
				149	rounding_mode = op.rounding_mode
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	150	return rounding_mode
				151
				152
Rickard Bolin	9ae3455	2022-06-09 13:07:17 +0000	[diff] [blame]	153	def create_padding(cmd: NpuStripe, primary_op: Operation, npu_op: NpuBlockOperation) -> NpuPadding:
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	154	if primary_op.type.npu_block_type == NpuBlockType.VectorProduct:
				155	return NpuPadding(top=0, left=0, bottom=0, right=0)
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	156	top, left, bottom, right = primary_op.attrs["explicit_padding"]
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	157
				158	# Check if this is for horizontal ifm streaming
				159	if not (cmd.is_first_h_stripe and cmd.is_last_h_stripe):
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	160	top = cmd.pad_top
				161	bottom = cmd.pad_bottom
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	162
Tim Hall	3751aa4	2021-12-16 13:17:29 +0000	[diff] [blame]	163	# the ifm box coordinate range depends upon whether the primary op was combined with a split slice read
				164	ifm_read_offset = primary_op.read_offsets[0]
				165	ifm_read_shape = primary_op.read_shapes[0]
				166	if ifm_read_offset is None or len(ifm_read_offset) < 2:
				167	box_start_coord_min = 0
				168	box_end_coord_max = cmd.ps.ifm_shapes[0].width
				169	else:
				170	box_start_coord_min = ifm_read_offset[-2]
				171	box_end_coord_max = ifm_read_shape[-2]
				172
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	173	# Indexing from end since a 1x1 Avgpool might have been added with non 4-dimensional input/output,
				174	# because of activation function needed to be fused.
Tim Hall	3751aa4	2021-12-16 13:17:29 +0000	[diff] [blame]	175	if len(cmd.ifm_box.start_coord) >= 2 and cmd.ifm_box.start_coord[-2] > box_start_coord_min:
				176	left = 0
				177	if len(cmd.ifm_box.end_coord) >= 2 and cmd.ifm_box.end_coord[-2] < box_end_coord_max:
				178	right = 0
Rickard Bolin	9ae3455	2022-06-09 13:07:17 +0000	[diff] [blame]	179
				180	# If tile padding is selected, modify the tile base addresses and set NpuPadding to zero.
				181	if primary_op.attrs.get("padding", None) == Padding.TILE:
				182	assert cmd.ifm_tensor.format == TensorFormat.NHCWB16, "Tensor format NHCWB16 required to perform tile padding"
				183	assert npu_op.op_type == NpuOperationType.ConvDepthWise, "Tile padding only supported for depthwise convolution"
				184	assert npu_op.ifm is not None, "Feature map must be initialized to modify the tile addresses"
				185	npu_op.ifm.tiles = modify_tile_addresses_for_padding(
				186	npu_op.ifm.tiles,
				187	primary_op.attrs.get("explicit_padding", None),
				188	channels=cmd.ps.ifm_shapes[0].depth,
				189	dtype=cmd.ifm_tensor.dtype,
				190	)
				191	top, left, bottom, right = 0, 0, 0, 0
Rickard Bolin	fea1516	2022-07-04 16:19:16 +0000	[diff] [blame^]	192
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	193	return NpuPadding(top=top, left=left, bottom=bottom, right=right)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	194
				195
Rickard Bolin	9ae3455	2022-06-09 13:07:17 +0000	[diff] [blame]	196	def modify_tile_addresses_for_padding(
				197	tile_box: NpuTileBox, padding_direction: List[int], channels: int, dtype: DataType
				198	) -> NpuTileBox:
				199	# Addresses are 16-bytes aligned when using the NHCWB16 format, which is required to utilize tiling
				200	# Calculate the offset to top right, bottom left and bottom right element in the IFM (top left offset is 0)
				201	"""
				202	Example: 4x4x1 IFM
				203	\| a b c d \| <-- Offset to TR ('d') is (w0-1) = 3
				204	\| e f g h \|
				205	\| i j k l \|
				206	\| m n o p \| <-- Offset to BL ('m') is (w0(h0-1)) = 12 and to BR ('p') ((w0h0)-1) = 15
				207	"""
				208	h0, h1, w0, addresses = tile_box
				209	elem_size = 2 if dtype == DataType.int16 else 1
				210	tr_offset = (w0 - 1) * 16 * elem_size
				211	bl_offset = w0 * (h0 - 1) * 16 * (round_up(channels, 16) // 16) * elem_size
				212	br_offset = tr_offset + bl_offset
				213
				214	# Explicit padding order: (Top, Left, Bottom, Right)
				215	if padding_direction == (1, 1, 0, 0):
				216	# Pad top left corner
				217	"""
				218	\| a a b \|
				219	\| a b \| -> \| a a b \|
				220	\| c d \| \| c c d \|
				221	"""
				222	addresses = [addresses[0]] * 4
				223	h0, h1, w0 = 1, 1, 1
				224
				225	elif padding_direction == (1, 0, 0, 1):
				226	# Pad top right corner
				227	"""
				228	\| a b b \|
				229	\| a b \| -> \| a b b \|
				230	\| c d \| \| c d d \|
				231	"""
				232	addresses = [addresses[0], addresses[0] + tr_offset, addresses[0], addresses[0] + tr_offset]
				233	h0, h1, w0 = 1, 1, w0
				234
				235	elif padding_direction == (0, 1, 1, 0):
				236	# Pad bottom left corner
				237	"""
				238	\| a b \| \| a a b \|
				239	\| c d \| -> \| c c d \|
				240	\| c c d \|
				241	"""
				242	addresses = [addresses[0], addresses[0], addresses[0] + bl_offset, addresses[0] + bl_offset]
				243	h0, h1, w0 = h0, h1, 1
				244
				245	elif padding_direction == (0, 0, 1, 1):
				246	# Pad bottom right corner
				247	"""
				248	\| a b \| \| a b b \|
				249	\| c d \| -> \| c d d \|
				250	\| c d d \|
				251	"""
				252	addresses = [
				253	addresses[0],
				254	addresses[0] + tr_offset,
				255	addresses[0] + bl_offset,
				256	addresses[0] + br_offset,
				257	]
				258	# h0, h1, w0 = h0, h1, w0
				259	else:
				260	assert 0, "Invalid padding direction for tile padding"
				261
				262	return NpuTileBox(height_0=h0, height_1=h1, width_0=w0, addresses=[int(addr) for addr in addresses])
				263
				264
Louis Verhaard	024c355	2021-03-17 14:26:34 +0100	[diff] [blame]	265	def get_region(mem_type: MemType, arch: ArchitectureFeatures) -> int:
Tim Hall	1bd531d	2020-11-01 20:59:36 +0000	[diff] [blame]	266	base_ptr_idx_map = {
				267	MemType.Permanent_NPU: BasePointerIndex.WeightTensor,
				268	MemType.Permanent_CPU: BasePointerIndex.WeightTensor,
				269	MemType.Scratch: BasePointerIndex.ScratchTensor,
				270	}
				271
				272	if arch.is_spilling_enabled():
				273	base_ptr_idx_map[MemType.Scratch_fast] = BasePointerIndex.ScratchFastTensor
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	274	else:
Tim Hall	1bd531d	2020-11-01 20:59:36 +0000	[diff] [blame]	275	base_ptr_idx_map[MemType.Scratch_fast] = BasePointerIndex.ScratchTensor
				276
Louis Verhaard	024c355	2021-03-17 14:26:34 +0100	[diff] [blame]	277	return base_ptr_idx_map[mem_type].value
				278
				279
				280	def get_mem_limits_for_regions(arch: ArchitectureFeatures) -> Dict[int, int]:
				281	"""Returns map region -> max size of the region in bytes"""
				282	mem_limits = dict()
				283	for mem_type in MemType.all():
				284	mem_limits[get_region(mem_type, arch)] = arch.mem_type_size(mem_type)
				285	mem_limits[BASE_PTR_INDEX_MEM2MEM] = arch.shram_size_bytes
				286	return mem_limits
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	287
				288
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	289	def get_ifm_depth(npu_block_type: NpuBlockType, ifm_box: Box, ofm_box: Box) -> int:
				290	if npu_block_type in (NpuBlockType.ConvolutionMxN, NpuBlockType.VectorProduct, NpuBlockType.ReduceSum):
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	291	block = ifm_box.get_block()
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	292	else:
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	293	block = ofm_box.get_block()
				294	return block.depth
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	295
				296
				297	def use_zero_point_0(ps, tens: Tensor, is_ifm_tensor: bool) -> bool:
				298	"""Checks if quantization should use 0 as zero point"""
				299	if tens.dtype == DataType.int32 and is_ifm_tensor:
				300	return True
Rickard Bolin	fea1516	2022-07-04 16:19:16 +0000	[diff] [blame^]	301	# Force zero point to 0 for ResizeBilinear when converting to a DepthwiseConv since the reference kernel
				302	# will ignore the zero point.
				303	if ps.primary_op.original_type == Op.ResizeBilinear and ps.primary_op.type == Op.DepthwiseConv2DBias:
				304	return True
Tim Hall	885033b	2022-07-21 11:46:03 +0100	[diff] [blame]	305	if ps.primary_op.type not in (Op.AvgPool, Op.CLZ, Op.SHL) and not ps.primary_op.type.is_resize_op():
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	306	return False
Patrik Gustavsson	c74682c	2021-08-17 14:26:38 +0200	[diff] [blame]	307	if ps.primary_op.type == Op.AvgPool and ps.primary_op.explicit_scaling:
				308	return False
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	309	fused_quantize = any(op.type == Op.Quantize for op in ps.ops)
				310	forced_ofm_quantization = ps.primary_op.forced_output_quantization
				311	use_0 = (
Fredrik Svedberg	838df0a	2021-09-17 16:29:22 +0200	[diff] [blame]	312	(
				313	ps.primary_op.activation is None
				314	or forced_ofm_quantization is not None
Fredrik Svedberg	6f87be4	2021-10-07 10:54:20 +0200	[diff] [blame]	315	or (
				316	ps.primary_op.type.is_avgpool_op()
				317	and ps.primary_op.activation.op_type.is_relu_op()
				318	and not ps.primary_op.rescale
				319	)
Fredrik Svedberg	838df0a	2021-09-17 16:29:22 +0200	[diff] [blame]	320	)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	321	and (ps.primary_op.memory_function != Op.ConcatSliceWrite)
				322	and not fused_quantize
				323	)
				324	return use_0
				325
				326
				327	def get_ifm_or_ifm2_quantization(ps, tens: Tensor) -> Optional[NpuQuantization]:
				328	"""Gets quantization for IFM/IFM2"""
Dwight Lidman	4f728c0	2020-12-17 15:14:45 +0100	[diff] [blame]	329	op = ps.primary_op
				330	ifm_quant = op.forced_input_quantization if op.forced_input_quantization is not None else tens.quantization
				331	if ifm_quant is None:
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	332	return None
				333	if use_zero_point_0(ps, tens, True):
				334	zero_point = 0
				335	else:
Dwight Lidman	4f728c0	2020-12-17 15:14:45 +0100	[diff] [blame]	336	zero_point = int(ifm_quant.zero_point)
				337	return NpuQuantization(scale_f32=ifm_quant.scale_f32, zero_point=zero_point)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	338
				339
				340	def get_ofm_quantization(ps, tens: Tensor) -> Optional[NpuQuantization]:
				341	"""Gets quantization for OFM"""
				342	op = ps.primary_op
				343	# Check if operation's output quantization is should be used instead of the output tensor's quantization
				344	# (used in LUTs)
				345	ofm_quant = op.forced_output_quantization if op.forced_output_quantization is not None else tens.quantization
				346	if ofm_quant is None:
				347	return None
				348	if use_zero_point_0(ps, tens, False):
				349	zero_point = 0
				350	else:
				351	zero_point = int(ofm_quant.zero_point)
				352	return NpuQuantization(scale_f32=ofm_quant.scale_f32, zero_point=zero_point)
				353
				354
Rickard Bolin	17e53b5	2022-09-06 16:09:01 +0000	[diff] [blame]	355	def create_feature_map(
				356	tens: Tensor,
				357	box: Box,
				358	arch: ArchitectureFeatures,
				359	op_shape4D: Shape4D,
Rickard Bolin	fea1516	2022-07-04 16:19:16 +0000	[diff] [blame^]	360	tile_base_offsets: List[int],
Rickard Bolin	17e53b5	2022-09-06 16:09:01 +0000	[diff] [blame]	361	stride_multiplier: Optional[List[int]] = None,
				362	) -> NpuFeatureMap:
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	363	"""Creates feature map with common fields populated"""
				364	fm = NpuFeatureMap()
Louis Verhaard	024c355	2021-03-17 14:26:34 +0100	[diff] [blame]	365	fm.region = get_region(tens.mem_type, arch)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	366	fm.data_type = dtype_map[tens.dtype]
				367	if tens.format == TensorFormat.NHWC:
				368	fm.layout = NpuLayout.NHWC
				369	elif tens.format == TensorFormat.NHCWB16:
				370	fm.layout = NpuLayout.NHCWB16
				371	else:
				372	assert 0, "Incorrect tensor format"
Rickard Bolin	17e53b5	2022-09-06 16:09:01 +0000	[diff] [blame]	373
				374	strides = tens.get_strides(op_shape4D)
				375	assert strides is not None
				376
				377	if stride_multiplier and stride_multiplier != [1, 1, 1]:
				378	assert (
				379	tens.format == TensorFormat.NHWC
				380	), "Only default stride multiplier ([1, 1, 1]) supported for NHCWB16 format"
				381	# Multiply strides for C/H/W (in that order) with corresponding stride factor
				382	for i, stride_factor in enumerate(stride_multiplier, start=1):
				383	strides[i] *= stride_factor
				384
Patrik Gustavsson	3a26920	2021-01-21 08:28:55 +0100	[diff] [blame]	385	height_0, height_1, width_0, addresses = tens.addresses_for_rolling_buffer(
Rickard Bolin	17e53b5	2022-09-06 16:09:01 +0000	[diff] [blame]	386	box.start_coord, box.end_coord, strides, op_shape4D
Patrik Gustavsson	3a26920	2021-01-21 08:28:55 +0100	[diff] [blame]	387	)
Rickard Bolin	17e53b5	2022-09-06 16:09:01 +0000	[diff] [blame]	388
Rickard Bolin	fea1516	2022-07-04 16:19:16 +0000	[diff] [blame^]	389	for idx, offset in enumerate(tile_base_offsets):
				390	addresses[idx] += offset
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	391	fm.tiles = NpuTileBox(
				392	height_0=height_0, height_1=height_1, width_0=width_0, addresses=[int(addr) for addr in addresses]
				393	)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	394	fm.strides = NpuShape3D(height=int(strides[2]), width=int(strides[3]), depth=int(strides[1]))
Tim Hall	68df8a1	2022-03-16 16:51:16 +0000	[diff] [blame]	395	fm.name = tens.name
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	396	return fm
				397
				398
Tim Hall	d784af7	2021-06-08 21:25:57 +0100	[diff] [blame]	399	def create_weights(
Jonas Ohlsson	845e232	2022-03-01 12:39:55 +0100	[diff] [blame]	400	weight_tensor: NpuWeightTensor, weight_box: Box, scale_tensor: NpuWeightTensor, arch: ArchitectureFeatures
				401	) -> Tuple[List[NpuAddressRange], List[NpuAddressRange]]:
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	402	"""Returns address ranges for weights and scales"""
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	403	weights = []
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	404	biases = []
Tim Hall	d784af7	2021-06-08 21:25:57 +0100	[diff] [blame]	405	shared_region = get_region(weight_tensor.mem_type, arch)
Jonas Ohlsson	845e232	2022-03-01 12:39:55 +0100	[diff] [blame]	406	scale_region = get_region(scale_tensor.mem_type, arch) if scale_tensor else 0
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	407
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	408	w_tensor_src = weight_tensor
				409	if weight_tensor.src_tensor:
Jonas Ohlsson	845e232	2022-03-01 12:39:55 +0100	[diff] [blame]	410	w_tensor_src = cast(NpuWeightTensor, weight_tensor.src_tensor)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	411
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	412	core_offset = 0
				413	for core in range(0, arch.ncores):
				414	# Get weight range per core
				415	key = WeightKey(core, weight_box.start_coord[-1])
				416	if key in w_tensor_src.encoded_ranges:
				417	weight_range = w_tensor_src.encoded_ranges[key]
Rickard Bolin	fd8b500	2022-05-16 09:11:06 +0000	[diff] [blame]	418	if weight_tensor == w_tensor_src:
				419	# Straight from source tensor
				420	address = weight_tensor.address + weight_range.offset
Tim Hall	b5df773	2022-05-04 16:20:43 +0100	[diff] [blame]	421	else:
Rickard Bolin	fd8b500	2022-05-16 09:11:06 +0000	[diff] [blame]	422	# Weight buffered tensor
				423	address = weight_tensor.address + core_offset
				424	core_offset += round_up(weight_range.total_bytes, 16)
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	425
				426	# Location of weights in tensor
				427	addr_range = NpuAddressRange(
Tim Hall	d784af7	2021-06-08 21:25:57 +0100	[diff] [blame]	428	shared_region, int(address + weight_range.weight_offset), round_up(int(weight_range.weight_bytes), 16)
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	429	)
				430	weights.append(addr_range)
Tim Hall	d784af7	2021-06-08 21:25:57 +0100	[diff] [blame]	431
				432	# Location of standalone scales or combined weights tensor scales
				433	if scale_tensor:
				434	assert scale_tensor.src_tensor is None # Must be standalone
				435	scale_range = scale_tensor.encoded_ranges[key]
				436	address = scale_tensor.address + scale_range.offset
				437	addr_range = NpuAddressRange(scale_region, int(address), round_up(int(scale_range.scale_bytes), 16))
				438	else:
				439	addr_range = NpuAddressRange(shared_region, int(address), round_up(int(weight_range.scale_bytes), 16))
				440
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	441	biases.append(addr_range)
				442
				443	return weights, biases
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	444
				445
				446	def create_npu_activation(op: Operation) -> NpuActivation:
				447	"""Creates fused activation function"""
				448	if op.activation is None:
				449	return NpuActivation(NpuActivationOp.NONE_OR_RELU)
				450	faf = op.activation.op_type
				451	act_op = NpuActivationOp.NONE_OR_RELU
				452	if faf == Op.Tanh:
				453	act_op = NpuActivationOp.TANH
				454	elif faf == Op.Sigmoid:
				455	act_op = NpuActivationOp.SIGMOID
				456	elif faf == Op.LUT:
				457	act_op = NpuActivationOp.TABLE_LOOKUP
				458	elif not faf.is_relu_op():
Michael McGeagh	7a6f843	2020-12-02 15:29:22 +0000	[diff] [blame]	459	raise UnsupportedFeatureError(f"Unsupported fused_activation_function: {faf.name}")
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	460
				461	act = NpuActivation(act_op)
				462	act.min = op.activation.min
				463	act.max = op.activation.max
Fredrik Svedberg	6f87be4	2021-10-07 10:54:20 +0200	[diff] [blame]	464	if act_op is NpuActivationOp.NONE_OR_RELU and op.type.is_avgpool_op() and not op.rescale:
Fredrik Svedberg	838df0a	2021-09-17 16:29:22 +0200	[diff] [blame]	465	quant = op.ofm.quantization
				466	if quant and quant.zero_point: # Zero point is not 0
				467	scale_f32 = 1 if quant.scale_f32 is None else quant.scale_f32
				468	zero_point = quant.zero_point
				469	if act.min is not None:
				470	act.min = scale_f32 * quantise_float32(act.min, scale_f32, zero_point)
				471	if act.max is not None:
				472	act.max = scale_f32 * quantise_float32(act.max, scale_f32, zero_point)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	473	act.lookup_table_index = op.activation.lut_index
				474	return act
				475
				476
				477	def set_common_op_fields(npu_op: NpuBlockOperation, cmd: NpuStripe, arch: ArchitectureFeatures):
				478	"""Sets common fields of the given operation"""
				479	ps = cmd.ps
				480	op = ps.primary_op
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	481
				482	ifm_height = cmd.ifm_box.get_block().height
Patrik Gustavsson	3a26920	2021-01-21 08:28:55 +0100	[diff] [blame]	483	ifm_width = cmd.ps.ifm_shapes[0].width
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	484	ifm_depth = get_ifm_depth(op.type.npu_block_type, cmd.ifm_box, cmd.ofm_box)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	485
Rickard Bolin	fea1516	2022-07-04 16:19:16 +0000	[diff] [blame^]	486	npu_op.ifm = create_feature_map(cmd.ifm_tensor, cmd.ifm_box, arch, ps.ifm_shapes[0], op.tile_base_offsets_ifm[0])
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	487	npu_op.ifm.shape = NpuShape3D(height=ifm_height, width=ifm_width, depth=ifm_depth)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	488	npu_op.ifm.quantization = get_ifm_or_ifm2_quantization(ps, cmd.ifm_tensor)
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	489
				490	out_block = cmd.ofm_box.get_block()
Rickard Bolin	fea1516	2022-07-04 16:19:16 +0000	[diff] [blame^]	491	npu_op.ofm = create_feature_map(
				492	cmd.ofm_tensor, cmd.ofm_box, arch, ps.ofm_shapes[0], op.tile_base_offsets_ofm, op.ofm_stride_multiplier
				493	)
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	494	npu_op.ofm.shape = NpuShape3D(height=out_block.height, width=out_block.width, depth=out_block.depth)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	495	npu_op.ofm.quantization = get_ofm_quantization(ps, cmd.ofm_tensor)
				496
				497	if cmd.weight_tensor is not None:
Tim Hall	d784af7	2021-06-08 21:25:57 +0100	[diff] [blame]	498	npu_op.weights, npu_op.biases = create_weights(cmd.weight_tensor, cmd.weight_box, cmd.scale_tensor, arch)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	499	npu_op.activation = create_npu_activation(op)
Patrik Gustavsson	b0ca274	2020-11-18 07:59:09 +0100	[diff] [blame]	500	npu_op.fused_quantize = any(op.type == Op.Quantize for op in ps.ops)
				501	npu_op.rounding_mode = get_rounding_mode(op, npu_op.fused_quantize)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	502	npu_op.block_config = NpuShape3D(height=ps.block_config[0], width=ps.block_config[1], depth=ps.block_config[3])
				503
				504	if not op.type.is_elementwise_op():
Rickard Bolin	9ae3455	2022-06-09 13:07:17 +0000	[diff] [blame]	505	npu_op.padding = create_padding(cmd, op, npu_op)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	506	npu_op.kernel = to_npu_kernel(op.kernel)
Tim Hall	3c5cfe9	2022-03-16 16:31:57 +0000	[diff] [blame]	507	npu_op.ifm_upscale = resampling_mode_inv_map[op.ifm_resampling_mode]
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	508	return npu_op
				509
				510
				511	def create_npu_conv2d_op(cmd: NpuStripe, arch: ArchitectureFeatures) -> NpuConv2DOperation:
				512	"""Converts the command to NpuConv2DOperation"""
				513	npu_op = NpuConv2DOperation()
				514	set_common_op_fields(npu_op, cmd, arch)
				515	if cmd.ps.primary_op.type.npu_block_type == NpuBlockType.VectorProduct:
				516	npu_op.block_traversal = NpuBlockTraversal.DEPTH_FIRST
				517	else:
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	518	if cmd.weight_tensor.src_tensor:
				519	npu_op.block_traversal = cmd.weight_tensor.src_tensor.hw_traversal
				520	else:
				521	npu_op.block_traversal = cmd.weight_tensor.hw_traversal
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	522	return npu_op
				523
				524
				525	def create_npu_conv_depthwise_op(cmd: NpuStripe, arch: ArchitectureFeatures) -> NpuConvDepthWiseOperation:
				526	"""Converts the command to NpuConvDepthWiseOperation"""
				527	npu_op = NpuConvDepthWiseOperation()
				528	set_common_op_fields(npu_op, cmd, arch)
				529	return npu_op
				530
				531
				532	def create_npu_pool_op(cmd: NpuStripe, arch: ArchitectureFeatures) -> NpuPoolingOperation:
				533	"""Converts the command to NpuPoolingOperation"""
				534	ps = cmd.ps
				535	op = ps.primary_op
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	536	if op.type.is_maxpool_op():
				537	pool_op = NpuPoolingOp.MAX
Tim Hall	885033b	2022-07-21 11:46:03 +0100	[diff] [blame]	538	elif op.type.is_avgpool_op() or op.type.is_resize_op():
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	539	pool_op = NpuPoolingOp.AVERAGE
				540	elif op.type == Op.ReduceSum:
				541	pool_op = NpuPoolingOp.REDUCE_SUM
				542	else:
				543	assert 0, f"Unknown pool type {op.type}"
				544	npu_op = NpuPoolingOperation(pool_op)
				545	set_common_op_fields(npu_op, cmd, arch)
				546	# Pooling specific info
Dwight Lidman	4f728c0	2020-12-17 15:14:45 +0100	[diff] [blame]	547	npu_op.rescale = op.rescale
Patrik Gustavsson	c74682c	2021-08-17 14:26:38 +0200	[diff] [blame]	548	if op.explicit_scaling:
				549	# Note: reuse of rescale for explicit scaling to not expose this in the external API
				550	assert npu_op.rescale is None
				551	npu_op.rescale = op.explicit_scaling
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	552	return npu_op
				553
				554
				555	def create_npu_elementwise_op(cmd: NpuStripe, arch: ArchitectureFeatures) -> NpuElementWiseOperation:
				556	"""Converts the command to NpuElementWiseOperation"""
				557	ps = cmd.ps
				558	op = ps.primary_op
				559	assert op.type in elementwise_op_map, f"Unknown elementwise type {op.type}"
				560	elemwise_op = elementwise_op_map[op.type]
				561	npu_op = NpuElementWiseOperation(elemwise_op)
Patrik Gustavsson	2349d42	2020-12-01 16:02:29 +0100	[diff] [blame]	562
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	563	if elemwise_op not in UNARY_ELEMWISE_OPS:
Patrik Gustavsson	3a26920	2021-01-21 08:28:55 +0100	[diff] [blame]	564	ifm_shape = [] if cmd.ifm_tensor.shape == [] else ps.ifm_shapes[0].as_list()
				565	ifm2_shape = [] if cmd.ifm2_tensor.shape == [] else ps.ifm_shapes[1].as_list()
				566	if not ifm_ifm2_correct_order(ifm_shape, ifm2_shape):
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	567	# The scalar/broadcasted feature map has to be the ifm2 tensor so switch the ifms
				568	cmd.ifm_tensor, cmd.ifm2_tensor = cmd.ifm2_tensor, cmd.ifm_tensor
				569	cmd.ifm_box, cmd.ifm2_box = cmd.ifm2_box, cmd.ifm_box
Patrik Gustavsson	2349d42	2020-12-01 16:02:29 +0100	[diff] [blame]	570	ps.ifm_shapes[0], ps.ifm_shapes[1] = ps.ifm_shapes[1], ps.ifm_shapes[0]
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	571	npu_op.reversed_operands = True
Rickard Bolin	fea1516	2022-07-04 16:19:16 +0000	[diff] [blame^]	572	npu_op.ifm2 = create_feature_map(
				573	cmd.ifm2_tensor,
				574	cmd.ifm2_box,
				575	arch,
				576	ps.ifm_shapes[1],
				577	op.tile_base_offsets_ifm[1],
				578	)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	579	npu_op.ifm2.quantization = get_ifm_or_ifm2_quantization(ps, cmd.ifm2_tensor)
				580	if cmd.ifm2_tensor.shape == []:
				581	# scalar
James Peet	7519d50	2021-07-19 16:47:58 +0100	[diff] [blame]	582	npu_op.ifm2_scalar = cmd.ifm2_tensor.get_scalar()
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	583	npu_op.ifm2.shape = NpuShape3D(height=0, width=0, depth=0)
				584	else:
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	585	ifm2_blk = cmd.ifm2_box.get_block()
Patrik Gustavsson	3a26920	2021-01-21 08:28:55 +0100	[diff] [blame]	586	ifm2_width = ps.ifm_shapes[1].width
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	587	npu_op.ifm2.shape = NpuShape3D(height=ifm2_blk.height, width=ifm2_width, depth=ifm2_blk.depth)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	588	set_common_op_fields(npu_op, cmd, arch)
				589	# Check if output scale needs to be overridden
				590	output_scale = None
Tim Hall	885033b	2022-07-21 11:46:03 +0100	[diff] [blame]	591	if op.type == Op.Add and op.original_type.is_resize_op():
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	592	# Force output scale same as the input scale for
Tim Hall	885033b	2022-07-21 11:46:03 +0100	[diff] [blame]	593	# resizebilinear/nearestneighbor 1x1 that is converted to add
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	594	output_scale = npu_op.ifm2.quantization.scale_f32
Tim Hall	885033b	2022-07-21 11:46:03 +0100	[diff] [blame]	595	elif op.type == Op.Abs:
Fredrik Svedberg	f2afd7f	2021-02-01 21:42:12 +0100	[diff] [blame]	596	output_scale = npu_op.ifm.quantization.scale_f32 / npu_op.ofm.quantization.scale_f32
Tim Hall	885033b	2022-07-21 11:46:03 +0100	[diff] [blame]	597	elif op.type == Op.LeakyRelu:
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	598	output_scale = op.attrs["alpha"]
Tim Hall	885033b	2022-07-21 11:46:03 +0100	[diff] [blame]	599	elif op.type in (Op.RescaleAdd, Op.RescaleMul):
Fredrik Svedberg	e82be7c	2021-01-18 15:21:03 +0100	[diff] [blame]	600	assert op.rescale is not None, f"{op.type} must have rescale"
				601	npu_op.rescale = op.rescale
Tim Hall	885033b	2022-07-21 11:46:03 +0100	[diff] [blame]	602	elif op.type in (Op.Add, Op.Mul, Op.Sub):
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	603	if op.activation is not None and op.activation.op_type in (Op.Sigmoid, Op.Tanh):
				604	output_scale = 1 / 0x3000
				605	if output_scale is not None:
				606	npu_op.ofm.quantization = NpuQuantization(scale_f32=output_scale, zero_point=npu_op.ofm.quantization.zero_point)
				607	return npu_op
				608
				609
				610	def create_dma_op(cmd: DMA, arch: ArchitectureFeatures) -> NpuDmaOperation:
				611	"""Converts the command to NpuDmaOperation"""
Louis Verhaard	024c355	2021-03-17 14:26:34 +0100	[diff] [blame]	612	src_region = get_region(cmd.in_tensor.mem_type, arch)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	613	if cmd.out_tensor.purpose == TensorPurpose.LUT:
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	614	dest_region = BASE_PTR_INDEX_MEM2MEM
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	615	else:
Louis Verhaard	024c355	2021-03-17 14:26:34 +0100	[diff] [blame]	616	dest_region = get_region(cmd.out_tensor.mem_type, arch)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	617
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	618	if cmd.in_tensor.purpose == TensorPurpose.Weights:
				619	# Get weight range per core
				620	sz = 0
				621	for core in range(0, arch.ncores):
				622	key = WeightKey(core, cmd.box.start_coord[-1])
				623	if key in cmd.in_tensor.encoded_ranges:
				624	weight_range = cmd.in_tensor.encoded_ranges[key]
				625	sz += round_up(weight_range.total_bytes, 16)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	626
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	627	if core == 0:
				628	weight_range = cmd.in_tensor.encoded_ranges[key]
				629	src_addr = cmd.in_tensor.address + weight_range.offset
Rickard Bolin	fd8b500	2022-05-16 09:11:06 +0000	[diff] [blame]	630	dest_addr = cmd.out_tensor.address
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	631	else:
Rickard Bolin	17e53b5	2022-09-06 16:09:01 +0000	[diff] [blame]	632	src_addr = cmd.in_tensor.address_for_coordinate(cmd.box.start_coord)
				633	dest_addr = cmd.out_tensor.address_for_coordinate(cmd.box.start_coord)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	634	sz = cmd.in_tensor.address_for_coordinate(cmd.box.end_coord, is_top_box=True) - src_addr
				635	src = NpuAddressRange(src_region, int(src_addr), int(sz))
				636	dest = NpuAddressRange(dest_region, int(dest_addr), int(sz))
				637	return NpuDmaOperation(src, dest)
				638
				639
				640	def convert_command_to_npu_op(cmd: Command, arch: ArchitectureFeatures) -> NpuOperation:
				641	"""Converts the high level command to NpuOperation"""
Dwight Lidman	9b43f84	2020-12-08 17:56:44 +0100	[diff] [blame]	642	npu_op: NpuOperation
				643	if isinstance(cmd, DMA):
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	644	npu_op = create_dma_op(cmd, arch)
Tim Hall	68df8a1	2022-03-16 16:51:16 +0000	[diff] [blame]	645	npu_op.name = cmd.out_tensor.name
Dwight Lidman	9b43f84	2020-12-08 17:56:44 +0100	[diff] [blame]	646	elif isinstance(cmd, NpuStripe):
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	647	npu_block_type = cmd.ps.primary_op.type.npu_block_type
				648	if npu_block_type in (NpuBlockType.ConvolutionMxN, NpuBlockType.VectorProduct):
				649	npu_op = create_npu_conv2d_op(cmd, arch)
				650	elif npu_block_type == NpuBlockType.ConvolutionDepthWise:
				651	npu_op = create_npu_conv_depthwise_op(cmd, arch)
				652	elif npu_block_type in (NpuBlockType.Pooling, NpuBlockType.ReduceSum):
				653	npu_op = create_npu_pool_op(cmd, arch)
				654	elif npu_block_type == NpuBlockType.ElementWise:
				655	npu_op = create_npu_elementwise_op(cmd, arch)
				656	else:
				657	assert 0, f"Unknown command type {npu_block_type}"
Tim Hall	68df8a1	2022-03-16 16:51:16 +0000	[diff] [blame]	658	npu_op.name = cmd.ps.primary_op.name
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	659	return npu_op
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	660
				661
				662	def generate_register_command_stream_for_sg(nng, sg, arch, verbose=False):
				663	"""Generates command stream for the subgraph, adds it to sg.register_command_stream"""
				664	# Convert high level command stream to list of NpuOperation
				665	npu_op_list = []
				666	npu_op_to_cmd = dict() # map from npu op to high level command
				667	for cmd in sg.high_level_command_stream:
Dwight Lidman	9b43f84	2020-12-08 17:56:44 +0100	[diff] [blame]	668	if isinstance(cmd, NpuStripe) and cmd.ps.npu_block_type == NpuBlockType.Default:
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	669	print("Warning: Skipping register command stream generation for", cmd.ps)
				670	else:
				671	npu_op = convert_command_to_npu_op(cmd, arch)
				672	npu_op_list.append(npu_op)
				673	npu_op_to_cmd[npu_op] = cmd
Louis Verhaard	024c355	2021-03-17 14:26:34 +0100	[diff] [blame]	674	mem_limits = get_mem_limits_for_regions(arch)
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	675	# Generate register commands
erik.andersson@arm.com	ad45f79	2021-02-03 10:20:16 +0100	[diff] [blame]	676	if len(sg.high_level_command_stream) > 0:
				677	stream_id = DebugDatabase.add_stream(sg)
				678	sg.generated_stream_id = stream_id
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	679
erik.andersson@arm.com	ad45f79	2021-02-03 10:20:16 +0100	[diff] [blame]	680	def add_to_debug_db(npu_op: NpuOperation, offset: int):
				681	"""Adds info to the debug database"""
				682	if not isinstance(npu_op, NpuDmaOperation):
				683	cmd = npu_op_to_cmd[npu_op]
				684	DebugDatabase.add_command(stream_id, offset, cmd.ps.primary_op)
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	685
Louis Verhaard	024c355	2021-03-17 14:26:34 +0100	[diff] [blame]	686	sg.register_command_stream = generate_command_stream(
				687	npu_op_list, arch, verbose, mem_limits, add_to_debug_db, npu_op_to_cmd
				688	)