Blame - ethosu/vela/high_level_command_to_npu_op.py - ml/ethos-u/ethos-u-vela

blob: 7634fe1fd5890493f9a6f6ed9b4078d5df2fa4d4 [file] [log] [blame]

Johan Alfven	bfe6fe3	2023-02-14 15:20:03 +0100	[diff] [blame]	1	# SPDX-FileCopyrightText: Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	2	#
				3	# SPDX-License-Identifier: Apache-2.0
				4	#
				5	# Licensed under the Apache License, Version 2.0 (the License); you may
				6	# not use this file except in compliance with the License.
				7	# You may obtain a copy of the License at
				8	#
				9	# www.apache.org/licenses/LICENSE-2.0
				10	#
				11	# Unless required by applicable law or agreed to in writing, software
				12	# distributed under the License is distributed on an AS IS BASIS, WITHOUT
				13	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	# See the License for the specific language governing permissions and
				15	# limitations under the License.
				16	#
				17	# Description:
				18	# Conversion from high level command to NpuOperation
				19	from enum import IntEnum
Jonas Ohlsson	845e232	2022-03-01 12:39:55 +0100	[diff] [blame]	20	from typing import cast
Louis Verhaard	024c355	2021-03-17 14:26:34 +0100	[diff] [blame]	21	from typing import Dict
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	22	from typing import List
				23	from typing import Optional
Jonas Ohlsson	845e232	2022-03-01 12:39:55 +0100	[diff] [blame]	24	from typing import Tuple
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	25
				26	from .api import NpuActivation
				27	from .api import NpuActivationOp
				28	from .api import NpuAddressRange
				29	from .api import NpuBlockOperation
				30	from .api import NpuBlockTraversal
				31	from .api import NpuConv2DOperation
				32	from .api import NpuConvDepthWiseOperation
				33	from .api import NpuDataType
				34	from .api import NpuDmaOperation
				35	from .api import NpuElementWiseOp
				36	from .api import NpuElementWiseOperation
				37	from .api import NpuFeatureMap
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	38	from .api import NpuLayout
				39	from .api import NpuOperation
Rickard Bolin	9ae3455	2022-06-09 13:07:17 +0000	[diff] [blame]	40	from .api import NpuOperationType
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	41	from .api import NpuPadding
				42	from .api import NpuPoolingOp
				43	from .api import NpuPoolingOperation
				44	from .api import NpuQuantization
				45	from .api import NpuResamplingMode
				46	from .api import NpuRoundingMode
				47	from .api import NpuShape3D
				48	from .api import NpuTileBox
				49	from .architecture_features import ArchitectureFeatures
				50	from .data_type import DataType
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	51	from .debug_database import DebugDatabase
Michael McGeagh	7a6f843	2020-12-02 15:29:22 +0000	[diff] [blame]	52	from .errors import UnsupportedFeatureError
Tim Hall	3c5cfe9	2022-03-16 16:31:57 +0000	[diff] [blame]	53	from .ethos_u55_regs.ethos_u55_regs import resampling_mode
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	54	from .high_level_command_stream import Box
				55	from .high_level_command_stream import Command
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	56	from .high_level_command_stream import DMA
Johan Alfven	9072496	2023-02-02 09:07:48 +0100	[diff] [blame^]	57	from .high_level_command_stream import NOP
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	58	from .high_level_command_stream import NpuStripe
Fredrik Svedberg	838df0a	2021-09-17 16:29:22 +0200	[diff] [blame]	59	from .numeric_util import quantise_float32
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	60	from .numeric_util import round_up
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	61	from .operation import NpuBlockType
				62	from .operation import Op
				63	from .operation import Operation
Rickard Bolin	9ae3455	2022-06-09 13:07:17 +0000	[diff] [blame]	64	from .operation import Padding
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	65	from .register_command_stream_generator import generate_command_stream
				66	from .register_command_stream_util import BASE_PTR_INDEX_MEM2MEM
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	67	from .register_command_stream_util import to_npu_kernel
				68	from .register_command_stream_util import UNARY_ELEMWISE_OPS
patrik.gustavsson	eeb8515	2020-12-21 17:10:40 +0000	[diff] [blame]	69	from .shape4d import Shape4D
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	70	from .tensor import MemType
				71	from .tensor import Tensor
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	72	from .tensor import TensorFormat
				73	from .tensor import TensorPurpose
Jonas Ohlsson	845e232	2022-03-01 12:39:55 +0100	[diff] [blame]	74	from .weight_compressor import NpuWeightTensor
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	75	from .weight_compressor import WeightKey
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	76
				77
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	78	class BasePointerIndex(IntEnum):
				79	WeightTensor = 0 # base address index for the Weight tensor
				80	ScratchTensor = 1 # base address index for the Scratch_tensor in the TensorArena
				81	ScratchFastTensor = 2 # base address for the Scratch_fast_tensor
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	82
				83
				84	dtype_map = {
				85	DataType.uint8: NpuDataType.UINT8,
				86	DataType.int8: NpuDataType.INT8,
				87	DataType.uint16: NpuDataType.UINT16,
				88	DataType.int16: NpuDataType.INT16,
				89	DataType.int32: NpuDataType.INT32,
				90	}
				91
				92
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	93	# Maps an elementwise op type to an elementwise_mode enum value used by NPU_OP_ELEMENTWISE
				94	elementwise_op_map = {
				95	Op.Mul: NpuElementWiseOp.MUL,
				96	Op.Add: NpuElementWiseOp.ADD,
				97	Op.Sub: NpuElementWiseOp.SUB,
				98	Op.Minimum: NpuElementWiseOp.MIN,
				99	Op.Maximum: NpuElementWiseOp.MAX,
				100	Op.LeakyRelu: NpuElementWiseOp.LRELU,
				101	Op.Abs: NpuElementWiseOp.ABS,
				102	Op.CLZ: NpuElementWiseOp.CLZ,
				103	Op.SHR: NpuElementWiseOp.SHR,
				104	Op.SHL: NpuElementWiseOp.SHL,
				105	}
				106
				107
Tim Hall	3c5cfe9	2022-03-16 16:31:57 +0000	[diff] [blame]	108	# inverse of the resampling_mode_map in the register command stream generator
				109	resampling_mode_inv_map = {
				110	resampling_mode.NONE: NpuResamplingMode.NONE,
				111	resampling_mode.NEAREST: NpuResamplingMode.NEAREST,
				112	resampling_mode.TRANSPOSE: NpuResamplingMode.TRANSPOSE,
				113	}
				114
				115
Johan Alfvén	56a71b0	2022-10-19 11:20:12 +0200	[diff] [blame]	116	def ifm_ifm2_correct_order(ifm_shape: Shape4D, ifm2_shape: Shape4D) -> bool:
				117
				118	if ifm_shape is None:
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	119	# Scalar needs to be in IFM2
				120	return False
Johan Alfvén	56a71b0	2022-10-19 11:20:12 +0200	[diff] [blame]	121	if ifm2_shape is None:
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	122	return True
				123
Johan Alfvén	56a71b0	2022-10-19 11:20:12 +0200	[diff] [blame]	124	for ifm, ifm2 in zip(ifm_shape.as_list(), ifm2_shape.as_list()):
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	125	if ifm != ifm2 and ifm == 1:
				126	# Broadcasted FM needs to be in IFM2
				127	return False
				128	return True
				129
				130
Patrik Gustavsson	b0ca274	2020-11-18 07:59:09 +0100	[diff] [blame]	131	def get_rounding_mode(op: Operation, fused_quantize: bool) -> NpuRoundingMode:
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	132	"""Specifies type of rounding to be used"""
				133	rounding_mode = NpuRoundingMode.TFL
Tim Hall	885033b	2022-07-21 11:46:03 +0100	[diff] [blame]	134	if op.type.is_resize_op():
Dwight Lidman	9d24393	2021-08-10 12:53:12 +0200	[diff] [blame]	135	rounding_mode = NpuRoundingMode.NATURAL
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	136	elif (
				137	op.type.npu_block_type in (NpuBlockType.ConvolutionMxN, NpuBlockType.ConvolutionDepthWise)
				138	and op.ifm.dtype == DataType.int16
				139	):
				140	rounding_mode = NpuRoundingMode.NATURAL
Patrik Gustavsson	b0ca274	2020-11-18 07:59:09 +0100	[diff] [blame]	141	elif (
				142	not fused_quantize
				143	and op.type.is_avgpool_op()
				144	and op.memory_function == Op.ConcatSliceWrite
				145	and op.kernel.elements_wh() == 1
				146	):
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	147	rounding_mode = NpuRoundingMode.NATURAL
Louis Verhaard	1a92f78	2021-02-09 16:08:26 +0100	[diff] [blame]	148	if op.rounding_mode is not None:
				149	rounding_mode = op.rounding_mode
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	150	return rounding_mode
				151
				152
Rickard Bolin	9ae3455	2022-06-09 13:07:17 +0000	[diff] [blame]	153	def create_padding(cmd: NpuStripe, primary_op: Operation, npu_op: NpuBlockOperation) -> NpuPadding:
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	154	if primary_op.type.npu_block_type == NpuBlockType.VectorProduct:
				155	return NpuPadding(top=0, left=0, bottom=0, right=0)
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	156	top, left, bottom, right = primary_op.attrs["explicit_padding"]
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	157
				158	# Check if this is for horizontal ifm streaming
				159	if not (cmd.is_first_h_stripe and cmd.is_last_h_stripe):
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	160	top = cmd.pad_top
				161	bottom = cmd.pad_bottom
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	162
Tim Hall	3751aa4	2021-12-16 13:17:29 +0000	[diff] [blame]	163	# the ifm box coordinate range depends upon whether the primary op was combined with a split slice read
				164	ifm_read_offset = primary_op.read_offsets[0]
				165	ifm_read_shape = primary_op.read_shapes[0]
				166	if ifm_read_offset is None or len(ifm_read_offset) < 2:
				167	box_start_coord_min = 0
				168	box_end_coord_max = cmd.ps.ifm_shapes[0].width
				169	else:
				170	box_start_coord_min = ifm_read_offset[-2]
				171	box_end_coord_max = ifm_read_shape[-2]
				172
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	173	# Indexing from end since a 1x1 Avgpool might have been added with non 4-dimensional input/output,
				174	# because of activation function needed to be fused.
Tim Hall	3751aa4	2021-12-16 13:17:29 +0000	[diff] [blame]	175	if len(cmd.ifm_box.start_coord) >= 2 and cmd.ifm_box.start_coord[-2] > box_start_coord_min:
				176	left = 0
				177	if len(cmd.ifm_box.end_coord) >= 2 and cmd.ifm_box.end_coord[-2] < box_end_coord_max:
				178	right = 0
Rickard Bolin	9ae3455	2022-06-09 13:07:17 +0000	[diff] [blame]	179
				180	# If tile padding is selected, modify the tile base addresses and set NpuPadding to zero.
				181	if primary_op.attrs.get("padding", None) == Padding.TILE:
				182	assert cmd.ifm_tensor.format == TensorFormat.NHCWB16, "Tensor format NHCWB16 required to perform tile padding"
				183	assert npu_op.op_type == NpuOperationType.ConvDepthWise, "Tile padding only supported for depthwise convolution"
				184	assert npu_op.ifm is not None, "Feature map must be initialized to modify the tile addresses"
				185	npu_op.ifm.tiles = modify_tile_addresses_for_padding(
				186	npu_op.ifm.tiles,
				187	primary_op.attrs.get("explicit_padding", None),
				188	channels=cmd.ps.ifm_shapes[0].depth,
				189	dtype=cmd.ifm_tensor.dtype,
				190	)
				191	top, left, bottom, right = 0, 0, 0, 0
Rickard Bolin	fea1516	2022-07-04 16:19:16 +0000	[diff] [blame]	192
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	193	return NpuPadding(top=top, left=left, bottom=bottom, right=right)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	194
				195
Rickard Bolin	9ae3455	2022-06-09 13:07:17 +0000	[diff] [blame]	196	def modify_tile_addresses_for_padding(
				197	tile_box: NpuTileBox, padding_direction: List[int], channels: int, dtype: DataType
				198	) -> NpuTileBox:
				199	# Addresses are 16-bytes aligned when using the NHCWB16 format, which is required to utilize tiling
				200	# Calculate the offset to top right, bottom left and bottom right element in the IFM (top left offset is 0)
				201	"""
				202	Example: 4x4x1 IFM
				203	\| a b c d \| <-- Offset to TR ('d') is (w0-1) = 3
				204	\| e f g h \|
				205	\| i j k l \|
				206	\| m n o p \| <-- Offset to BL ('m') is (w0(h0-1)) = 12 and to BR ('p') ((w0h0)-1) = 15
				207	"""
				208	h0, h1, w0, addresses = tile_box
				209	elem_size = 2 if dtype == DataType.int16 else 1
				210	tr_offset = (w0 - 1) * 16 * elem_size
				211	bl_offset = w0 * (h0 - 1) * 16 * (round_up(channels, 16) // 16) * elem_size
				212	br_offset = tr_offset + bl_offset
				213
				214	# Explicit padding order: (Top, Left, Bottom, Right)
				215	if padding_direction == (1, 1, 0, 0):
				216	# Pad top left corner
				217	"""
				218	\| a a b \|
				219	\| a b \| -> \| a a b \|
				220	\| c d \| \| c c d \|
				221	"""
				222	addresses = [addresses[0]] * 4
				223	h0, h1, w0 = 1, 1, 1
				224
				225	elif padding_direction == (1, 0, 0, 1):
				226	# Pad top right corner
				227	"""
				228	\| a b b \|
				229	\| a b \| -> \| a b b \|
				230	\| c d \| \| c d d \|
				231	"""
				232	addresses = [addresses[0], addresses[0] + tr_offset, addresses[0], addresses[0] + tr_offset]
				233	h0, h1, w0 = 1, 1, w0
				234
				235	elif padding_direction == (0, 1, 1, 0):
				236	# Pad bottom left corner
				237	"""
				238	\| a b \| \| a a b \|
				239	\| c d \| -> \| c c d \|
				240	\| c c d \|
				241	"""
				242	addresses = [addresses[0], addresses[0], addresses[0] + bl_offset, addresses[0] + bl_offset]
				243	h0, h1, w0 = h0, h1, 1
				244
				245	elif padding_direction == (0, 0, 1, 1):
				246	# Pad bottom right corner
				247	"""
				248	\| a b \| \| a b b \|
				249	\| c d \| -> \| c d d \|
				250	\| c d d \|
				251	"""
				252	addresses = [
				253	addresses[0],
				254	addresses[0] + tr_offset,
				255	addresses[0] + bl_offset,
				256	addresses[0] + br_offset,
				257	]
				258	# h0, h1, w0 = h0, h1, w0
				259	else:
				260	assert 0, "Invalid padding direction for tile padding"
				261
				262	return NpuTileBox(height_0=h0, height_1=h1, width_0=w0, addresses=[int(addr) for addr in addresses])
				263
				264
Louis Verhaard	024c355	2021-03-17 14:26:34 +0100	[diff] [blame]	265	def get_region(mem_type: MemType, arch: ArchitectureFeatures) -> int:
Tim Hall	1bd531d	2020-11-01 20:59:36 +0000	[diff] [blame]	266	base_ptr_idx_map = {
				267	MemType.Permanent_NPU: BasePointerIndex.WeightTensor,
				268	MemType.Permanent_CPU: BasePointerIndex.WeightTensor,
				269	MemType.Scratch: BasePointerIndex.ScratchTensor,
				270	}
				271
				272	if arch.is_spilling_enabled():
				273	base_ptr_idx_map[MemType.Scratch_fast] = BasePointerIndex.ScratchFastTensor
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	274	else:
Tim Hall	1bd531d	2020-11-01 20:59:36 +0000	[diff] [blame]	275	base_ptr_idx_map[MemType.Scratch_fast] = BasePointerIndex.ScratchTensor
				276
Louis Verhaard	024c355	2021-03-17 14:26:34 +0100	[diff] [blame]	277	return base_ptr_idx_map[mem_type].value
				278
				279
				280	def get_mem_limits_for_regions(arch: ArchitectureFeatures) -> Dict[int, int]:
				281	"""Returns map region -> max size of the region in bytes"""
				282	mem_limits = dict()
				283	for mem_type in MemType.all():
				284	mem_limits[get_region(mem_type, arch)] = arch.mem_type_size(mem_type)
				285	mem_limits[BASE_PTR_INDEX_MEM2MEM] = arch.shram_size_bytes
				286	return mem_limits
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	287
				288
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	289	def get_ifm_depth(npu_block_type: NpuBlockType, ifm_box: Box, ofm_box: Box) -> int:
				290	if npu_block_type in (NpuBlockType.ConvolutionMxN, NpuBlockType.VectorProduct, NpuBlockType.ReduceSum):
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	291	block = ifm_box.get_block()
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	292	else:
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	293	block = ofm_box.get_block()
				294	return block.depth
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	295
				296
				297	def use_zero_point_0(ps, tens: Tensor, is_ifm_tensor: bool) -> bool:
				298	"""Checks if quantization should use 0 as zero point"""
				299	if tens.dtype == DataType.int32 and is_ifm_tensor:
				300	return True
Rickard Bolin	fea1516	2022-07-04 16:19:16 +0000	[diff] [blame]	301	# Force zero point to 0 for ResizeBilinear when converting to a DepthwiseConv since the reference kernel
				302	# will ignore the zero point.
				303	if ps.primary_op.original_type == Op.ResizeBilinear and ps.primary_op.type == Op.DepthwiseConv2DBias:
				304	return True
Tim Hall	885033b	2022-07-21 11:46:03 +0100	[diff] [blame]	305	if ps.primary_op.type not in (Op.AvgPool, Op.CLZ, Op.SHL) and not ps.primary_op.type.is_resize_op():
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	306	return False
Patrik Gustavsson	c74682c	2021-08-17 14:26:38 +0200	[diff] [blame]	307	if ps.primary_op.type == Op.AvgPool and ps.primary_op.explicit_scaling:
				308	return False
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	309	fused_quantize = any(op.type == Op.Quantize for op in ps.ops)
				310	forced_ofm_quantization = ps.primary_op.forced_output_quantization
				311	use_0 = (
Fredrik Svedberg	838df0a	2021-09-17 16:29:22 +0200	[diff] [blame]	312	(
				313	ps.primary_op.activation is None
				314	or forced_ofm_quantization is not None
Fredrik Svedberg	4a434cb	2022-09-27 14:13:01 +0200	[diff] [blame]	315	or (ps.primary_op.type.is_avgpool_op() and ps.primary_op.activation.op_type.is_relu_op())
Fredrik Svedberg	838df0a	2021-09-17 16:29:22 +0200	[diff] [blame]	316	)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	317	and (ps.primary_op.memory_function != Op.ConcatSliceWrite)
				318	and not fused_quantize
				319	)
				320	return use_0
				321
				322
				323	def get_ifm_or_ifm2_quantization(ps, tens: Tensor) -> Optional[NpuQuantization]:
				324	"""Gets quantization for IFM/IFM2"""
Dwight Lidman	4f728c0	2020-12-17 15:14:45 +0100	[diff] [blame]	325	op = ps.primary_op
				326	ifm_quant = op.forced_input_quantization if op.forced_input_quantization is not None else tens.quantization
				327	if ifm_quant is None:
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	328	return None
				329	if use_zero_point_0(ps, tens, True):
				330	zero_point = 0
				331	else:
Dwight Lidman	4f728c0	2020-12-17 15:14:45 +0100	[diff] [blame]	332	zero_point = int(ifm_quant.zero_point)
				333	return NpuQuantization(scale_f32=ifm_quant.scale_f32, zero_point=zero_point)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	334
				335
				336	def get_ofm_quantization(ps, tens: Tensor) -> Optional[NpuQuantization]:
				337	"""Gets quantization for OFM"""
				338	op = ps.primary_op
				339	# Check if operation's output quantization is should be used instead of the output tensor's quantization
				340	# (used in LUTs)
				341	ofm_quant = op.forced_output_quantization if op.forced_output_quantization is not None else tens.quantization
				342	if ofm_quant is None:
				343	return None
				344	if use_zero_point_0(ps, tens, False):
				345	zero_point = 0
				346	else:
				347	zero_point = int(ofm_quant.zero_point)
				348	return NpuQuantization(scale_f32=ofm_quant.scale_f32, zero_point=zero_point)
				349
				350
Rickard Bolin	17e53b5	2022-09-06 16:09:01 +0000	[diff] [blame]	351	def create_feature_map(
				352	tens: Tensor,
				353	box: Box,
				354	arch: ArchitectureFeatures,
				355	op_shape4D: Shape4D,
Rickard Bolin	fea1516	2022-07-04 16:19:16 +0000	[diff] [blame]	356	tile_base_offsets: List[int],
Rickard Bolin	17e53b5	2022-09-06 16:09:01 +0000	[diff] [blame]	357	stride_multiplier: Optional[List[int]] = None,
				358	) -> NpuFeatureMap:
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	359	"""Creates feature map with common fields populated"""
				360	fm = NpuFeatureMap()
Louis Verhaard	024c355	2021-03-17 14:26:34 +0100	[diff] [blame]	361	fm.region = get_region(tens.mem_type, arch)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	362	fm.data_type = dtype_map[tens.dtype]
				363	if tens.format == TensorFormat.NHWC:
				364	fm.layout = NpuLayout.NHWC
				365	elif tens.format == TensorFormat.NHCWB16:
				366	fm.layout = NpuLayout.NHCWB16
				367	else:
				368	assert 0, "Incorrect tensor format"
Rickard Bolin	17e53b5	2022-09-06 16:09:01 +0000	[diff] [blame]	369
				370	strides = tens.get_strides(op_shape4D)
				371	assert strides is not None
				372
				373	if stride_multiplier and stride_multiplier != [1, 1, 1]:
				374	assert (
				375	tens.format == TensorFormat.NHWC
				376	), "Only default stride multiplier ([1, 1, 1]) supported for NHCWB16 format"
				377	# Multiply strides for C/H/W (in that order) with corresponding stride factor
				378	for i, stride_factor in enumerate(stride_multiplier, start=1):
				379	strides[i] *= stride_factor
				380
Patrik Gustavsson	3a26920	2021-01-21 08:28:55 +0100	[diff] [blame]	381	height_0, height_1, width_0, addresses = tens.addresses_for_rolling_buffer(
Rickard Bolin	17e53b5	2022-09-06 16:09:01 +0000	[diff] [blame]	382	box.start_coord, box.end_coord, strides, op_shape4D
Patrik Gustavsson	3a26920	2021-01-21 08:28:55 +0100	[diff] [blame]	383	)
Rickard Bolin	17e53b5	2022-09-06 16:09:01 +0000	[diff] [blame]	384
Rickard Bolin	fea1516	2022-07-04 16:19:16 +0000	[diff] [blame]	385	for idx, offset in enumerate(tile_base_offsets):
				386	addresses[idx] += offset
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	387	fm.tiles = NpuTileBox(
				388	height_0=height_0, height_1=height_1, width_0=width_0, addresses=[int(addr) for addr in addresses]
				389	)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	390	fm.strides = NpuShape3D(height=int(strides[2]), width=int(strides[3]), depth=int(strides[1]))
Tim Hall	68df8a1	2022-03-16 16:51:16 +0000	[diff] [blame]	391	fm.name = tens.name
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	392	return fm
				393
				394
Tim Hall	d784af7	2021-06-08 21:25:57 +0100	[diff] [blame]	395	def create_weights(
Jonas Ohlsson	845e232	2022-03-01 12:39:55 +0100	[diff] [blame]	396	weight_tensor: NpuWeightTensor, weight_box: Box, scale_tensor: NpuWeightTensor, arch: ArchitectureFeatures
				397	) -> Tuple[List[NpuAddressRange], List[NpuAddressRange]]:
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	398	"""Returns address ranges for weights and scales"""
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	399	weights = []
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	400	biases = []
Tim Hall	d784af7	2021-06-08 21:25:57 +0100	[diff] [blame]	401	shared_region = get_region(weight_tensor.mem_type, arch)
Jonas Ohlsson	845e232	2022-03-01 12:39:55 +0100	[diff] [blame]	402	scale_region = get_region(scale_tensor.mem_type, arch) if scale_tensor else 0
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	403
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	404	w_tensor_src = weight_tensor
				405	if weight_tensor.src_tensor:
Jonas Ohlsson	845e232	2022-03-01 12:39:55 +0100	[diff] [blame]	406	w_tensor_src = cast(NpuWeightTensor, weight_tensor.src_tensor)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	407
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	408	core_offset = 0
				409	for core in range(0, arch.ncores):
				410	# Get weight range per core
				411	key = WeightKey(core, weight_box.start_coord[-1])
				412	if key in w_tensor_src.encoded_ranges:
				413	weight_range = w_tensor_src.encoded_ranges[key]
Rickard Bolin	fd8b500	2022-05-16 09:11:06 +0000	[diff] [blame]	414	if weight_tensor == w_tensor_src:
				415	# Straight from source tensor
				416	address = weight_tensor.address + weight_range.offset
Tim Hall	b5df773	2022-05-04 16:20:43 +0100	[diff] [blame]	417	else:
Rickard Bolin	fd8b500	2022-05-16 09:11:06 +0000	[diff] [blame]	418	# Weight buffered tensor
				419	address = weight_tensor.address + core_offset
				420	core_offset += round_up(weight_range.total_bytes, 16)
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	421
				422	# Location of weights in tensor
				423	addr_range = NpuAddressRange(
Tim Hall	d784af7	2021-06-08 21:25:57 +0100	[diff] [blame]	424	shared_region, int(address + weight_range.weight_offset), round_up(int(weight_range.weight_bytes), 16)
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	425	)
				426	weights.append(addr_range)
Tim Hall	d784af7	2021-06-08 21:25:57 +0100	[diff] [blame]	427
				428	# Location of standalone scales or combined weights tensor scales
				429	if scale_tensor:
				430	assert scale_tensor.src_tensor is None # Must be standalone
				431	scale_range = scale_tensor.encoded_ranges[key]
				432	address = scale_tensor.address + scale_range.offset
				433	addr_range = NpuAddressRange(scale_region, int(address), round_up(int(scale_range.scale_bytes), 16))
				434	else:
				435	addr_range = NpuAddressRange(shared_region, int(address), round_up(int(weight_range.scale_bytes), 16))
				436
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	437	biases.append(addr_range)
				438
				439	return weights, biases
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	440
				441
				442	def create_npu_activation(op: Operation) -> NpuActivation:
				443	"""Creates fused activation function"""
				444	if op.activation is None:
				445	return NpuActivation(NpuActivationOp.NONE_OR_RELU)
				446	faf = op.activation.op_type
				447	act_op = NpuActivationOp.NONE_OR_RELU
				448	if faf == Op.Tanh:
				449	act_op = NpuActivationOp.TANH
				450	elif faf == Op.Sigmoid:
				451	act_op = NpuActivationOp.SIGMOID
				452	elif faf == Op.LUT:
				453	act_op = NpuActivationOp.TABLE_LOOKUP
				454	elif not faf.is_relu_op():
Michael McGeagh	7a6f843	2020-12-02 15:29:22 +0000	[diff] [blame]	455	raise UnsupportedFeatureError(f"Unsupported fused_activation_function: {faf.name}")
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	456
				457	act = NpuActivation(act_op)
				458	act.min = op.activation.min
				459	act.max = op.activation.max
Fredrik Svedberg	4a434cb	2022-09-27 14:13:01 +0200	[diff] [blame]	460	if act_op is NpuActivationOp.NONE_OR_RELU and op.type.is_avgpool_op() and not op.explicit_scaling:
Fredrik Svedberg	838df0a	2021-09-17 16:29:22 +0200	[diff] [blame]	461	quant = op.ofm.quantization
				462	if quant and quant.zero_point: # Zero point is not 0
				463	scale_f32 = 1 if quant.scale_f32 is None else quant.scale_f32
				464	zero_point = quant.zero_point
				465	if act.min is not None:
				466	act.min = scale_f32 * quantise_float32(act.min, scale_f32, zero_point)
				467	if act.max is not None:
				468	act.max = scale_f32 * quantise_float32(act.max, scale_f32, zero_point)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	469	act.lookup_table_index = op.activation.lut_index
				470	return act
				471
				472
				473	def set_common_op_fields(npu_op: NpuBlockOperation, cmd: NpuStripe, arch: ArchitectureFeatures):
				474	"""Sets common fields of the given operation"""
				475	ps = cmd.ps
				476	op = ps.primary_op
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	477
				478	ifm_height = cmd.ifm_box.get_block().height
Johan Alfven	bfe6fe3	2023-02-14 15:20:03 +0100	[diff] [blame]	479	ifm_width = cmd.ifm_box.get_block().width
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	480	ifm_depth = get_ifm_depth(op.type.npu_block_type, cmd.ifm_box, cmd.ofm_box)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	481
Rickard Bolin	fea1516	2022-07-04 16:19:16 +0000	[diff] [blame]	482	npu_op.ifm = create_feature_map(cmd.ifm_tensor, cmd.ifm_box, arch, ps.ifm_shapes[0], op.tile_base_offsets_ifm[0])
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	483	npu_op.ifm.shape = NpuShape3D(height=ifm_height, width=ifm_width, depth=ifm_depth)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	484	npu_op.ifm.quantization = get_ifm_or_ifm2_quantization(ps, cmd.ifm_tensor)
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	485
				486	out_block = cmd.ofm_box.get_block()
Rickard Bolin	fea1516	2022-07-04 16:19:16 +0000	[diff] [blame]	487	npu_op.ofm = create_feature_map(
				488	cmd.ofm_tensor, cmd.ofm_box, arch, ps.ofm_shapes[0], op.tile_base_offsets_ofm, op.ofm_stride_multiplier
				489	)
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	490	npu_op.ofm.shape = NpuShape3D(height=out_block.height, width=out_block.width, depth=out_block.depth)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	491	npu_op.ofm.quantization = get_ofm_quantization(ps, cmd.ofm_tensor)
				492
				493	if cmd.weight_tensor is not None:
Tim Hall	d784af7	2021-06-08 21:25:57 +0100	[diff] [blame]	494	npu_op.weights, npu_op.biases = create_weights(cmd.weight_tensor, cmd.weight_box, cmd.scale_tensor, arch)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	495	npu_op.activation = create_npu_activation(op)
Patrik Gustavsson	b0ca274	2020-11-18 07:59:09 +0100	[diff] [blame]	496	npu_op.fused_quantize = any(op.type == Op.Quantize for op in ps.ops)
				497	npu_op.rounding_mode = get_rounding_mode(op, npu_op.fused_quantize)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	498	npu_op.block_config = NpuShape3D(height=ps.block_config[0], width=ps.block_config[1], depth=ps.block_config[3])
				499
				500	if not op.type.is_elementwise_op():
Rickard Bolin	9ae3455	2022-06-09 13:07:17 +0000	[diff] [blame]	501	npu_op.padding = create_padding(cmd, op, npu_op)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	502	npu_op.kernel = to_npu_kernel(op.kernel)
Tim Hall	3c5cfe9	2022-03-16 16:31:57 +0000	[diff] [blame]	503	npu_op.ifm_upscale = resampling_mode_inv_map[op.ifm_resampling_mode]
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	504	return npu_op
				505
				506
				507	def create_npu_conv2d_op(cmd: NpuStripe, arch: ArchitectureFeatures) -> NpuConv2DOperation:
				508	"""Converts the command to NpuConv2DOperation"""
				509	npu_op = NpuConv2DOperation()
				510	set_common_op_fields(npu_op, cmd, arch)
				511	if cmd.ps.primary_op.type.npu_block_type == NpuBlockType.VectorProduct:
				512	npu_op.block_traversal = NpuBlockTraversal.DEPTH_FIRST
				513	else:
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	514	if cmd.weight_tensor.src_tensor:
				515	npu_op.block_traversal = cmd.weight_tensor.src_tensor.hw_traversal
				516	else:
				517	npu_op.block_traversal = cmd.weight_tensor.hw_traversal
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	518	return npu_op
				519
				520
				521	def create_npu_conv_depthwise_op(cmd: NpuStripe, arch: ArchitectureFeatures) -> NpuConvDepthWiseOperation:
				522	"""Converts the command to NpuConvDepthWiseOperation"""
				523	npu_op = NpuConvDepthWiseOperation()
				524	set_common_op_fields(npu_op, cmd, arch)
				525	return npu_op
				526
				527
				528	def create_npu_pool_op(cmd: NpuStripe, arch: ArchitectureFeatures) -> NpuPoolingOperation:
				529	"""Converts the command to NpuPoolingOperation"""
				530	ps = cmd.ps
				531	op = ps.primary_op
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	532	if op.type.is_maxpool_op():
				533	pool_op = NpuPoolingOp.MAX
Tim Hall	885033b	2022-07-21 11:46:03 +0100	[diff] [blame]	534	elif op.type.is_avgpool_op() or op.type.is_resize_op():
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	535	pool_op = NpuPoolingOp.AVERAGE
				536	elif op.type == Op.ReduceSum:
				537	pool_op = NpuPoolingOp.REDUCE_SUM
				538	else:
				539	assert 0, f"Unknown pool type {op.type}"
				540	npu_op = NpuPoolingOperation(pool_op)
				541	set_common_op_fields(npu_op, cmd, arch)
				542	# Pooling specific info
Patrik Gustavsson	c74682c	2021-08-17 14:26:38 +0200	[diff] [blame]	543	if op.explicit_scaling:
				544	# Note: reuse of rescale for explicit scaling to not expose this in the external API
Patrik Gustavsson	c74682c	2021-08-17 14:26:38 +0200	[diff] [blame]	545	npu_op.rescale = op.explicit_scaling
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	546	return npu_op
				547
				548
				549	def create_npu_elementwise_op(cmd: NpuStripe, arch: ArchitectureFeatures) -> NpuElementWiseOperation:
				550	"""Converts the command to NpuElementWiseOperation"""
				551	ps = cmd.ps
				552	op = ps.primary_op
				553	assert op.type in elementwise_op_map, f"Unknown elementwise type {op.type}"
				554	elemwise_op = elementwise_op_map[op.type]
				555	npu_op = NpuElementWiseOperation(elemwise_op)
Patrik Gustavsson	2349d42	2020-12-01 16:02:29 +0100	[diff] [blame]	556
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	557	if elemwise_op not in UNARY_ELEMWISE_OPS:
Johan Alfvén	56a71b0	2022-10-19 11:20:12 +0200	[diff] [blame]	558	ifm_shape = None if cmd.ifm_tensor.shape == [] else ps.ifm_shapes[0]
				559	ifm2_shape = None if cmd.ifm2_tensor.shape == [] else ps.ifm_shapes[1]
Fredrik Svedberg	b81e1bb	2022-10-11 21:50:51 +0200	[diff] [blame]	560	if cmd.reversed_operands:
				561	assert ifm_ifm2_correct_order(ifm_shape, ifm2_shape)
				562	npu_op.reversed_operands = True
				563	elif not ifm_ifm2_correct_order(ifm_shape, ifm2_shape):
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	564	# The scalar/broadcasted feature map has to be the ifm2 tensor so switch the ifms
				565	cmd.ifm_tensor, cmd.ifm2_tensor = cmd.ifm2_tensor, cmd.ifm_tensor
				566	cmd.ifm_box, cmd.ifm2_box = cmd.ifm2_box, cmd.ifm_box
Patrik Gustavsson	2349d42	2020-12-01 16:02:29 +0100	[diff] [blame]	567	ps.ifm_shapes[0], ps.ifm_shapes[1] = ps.ifm_shapes[1], ps.ifm_shapes[0]
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	568	npu_op.reversed_operands = True
Rickard Bolin	fea1516	2022-07-04 16:19:16 +0000	[diff] [blame]	569	npu_op.ifm2 = create_feature_map(
				570	cmd.ifm2_tensor,
				571	cmd.ifm2_box,
				572	arch,
				573	ps.ifm_shapes[1],
				574	op.tile_base_offsets_ifm[1],
				575	)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	576	npu_op.ifm2.quantization = get_ifm_or_ifm2_quantization(ps, cmd.ifm2_tensor)
				577	if cmd.ifm2_tensor.shape == []:
				578	# scalar
James Peet	7519d50	2021-07-19 16:47:58 +0100	[diff] [blame]	579	npu_op.ifm2_scalar = cmd.ifm2_tensor.get_scalar()
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	580	npu_op.ifm2.shape = NpuShape3D(height=0, width=0, depth=0)
				581	else:
Louis Verhaard	69b3176	2020-11-17 09:45:20 +0100	[diff] [blame]	582	ifm2_blk = cmd.ifm2_box.get_block()
Johan Alfven	bfe6fe3	2023-02-14 15:20:03 +0100	[diff] [blame]	583	npu_op.ifm2.shape = NpuShape3D(height=ifm2_blk.height, width=ifm2_blk.width, depth=ifm2_blk.depth)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	584	set_common_op_fields(npu_op, cmd, arch)
				585	# Check if output scale needs to be overridden
				586	output_scale = None
Fredrik Svedberg	4a434cb	2022-09-27 14:13:01 +0200	[diff] [blame]	587	if op.explicit_scaling is not None:
				588	assert not op.explicit_scaling.per_channel
				589	assert op.type in (Op.Add, Op.Mul, Op.Sub)
				590	npu_op.rescale = (op.explicit_scaling.multiplier[0], op.explicit_scaling.shift[0])
				591	elif op.type == Op.Add and op.original_type.is_resize_op():
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	592	# Force output scale same as the input scale for
Tim Hall	885033b	2022-07-21 11:46:03 +0100	[diff] [blame]	593	# resizebilinear/nearestneighbor 1x1 that is converted to add
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	594	output_scale = npu_op.ifm2.quantization.scale_f32
Tim Hall	885033b	2022-07-21 11:46:03 +0100	[diff] [blame]	595	elif op.type == Op.Abs:
Fredrik Svedberg	f2afd7f	2021-02-01 21:42:12 +0100	[diff] [blame]	596	output_scale = npu_op.ifm.quantization.scale_f32 / npu_op.ofm.quantization.scale_f32
Tim Hall	885033b	2022-07-21 11:46:03 +0100	[diff] [blame]	597	elif op.type == Op.LeakyRelu:
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	598	output_scale = op.attrs["alpha"]
Tim Hall	885033b	2022-07-21 11:46:03 +0100	[diff] [blame]	599	elif op.type in (Op.Add, Op.Mul, Op.Sub):
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	600	if op.activation is not None and op.activation.op_type in (Op.Sigmoid, Op.Tanh):
				601	output_scale = 1 / 0x3000
				602	if output_scale is not None:
				603	npu_op.ofm.quantization = NpuQuantization(scale_f32=output_scale, zero_point=npu_op.ofm.quantization.zero_point)
				604	return npu_op
				605
				606
				607	def create_dma_op(cmd: DMA, arch: ArchitectureFeatures) -> NpuDmaOperation:
				608	"""Converts the command to NpuDmaOperation"""
Louis Verhaard	024c355	2021-03-17 14:26:34 +0100	[diff] [blame]	609	src_region = get_region(cmd.in_tensor.mem_type, arch)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	610	if cmd.out_tensor.purpose == TensorPurpose.LUT:
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	611	dest_region = BASE_PTR_INDEX_MEM2MEM
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	612	else:
Louis Verhaard	024c355	2021-03-17 14:26:34 +0100	[diff] [blame]	613	dest_region = get_region(cmd.out_tensor.mem_type, arch)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	614
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	615	if cmd.in_tensor.purpose == TensorPurpose.Weights:
				616	# Get weight range per core
				617	sz = 0
				618	for core in range(0, arch.ncores):
				619	key = WeightKey(core, cmd.box.start_coord[-1])
				620	if key in cmd.in_tensor.encoded_ranges:
				621	weight_range = cmd.in_tensor.encoded_ranges[key]
				622	sz += round_up(weight_range.total_bytes, 16)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	623
Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	624	if core == 0:
				625	weight_range = cmd.in_tensor.encoded_ranges[key]
				626	src_addr = cmd.in_tensor.address + weight_range.offset
Rickard Bolin	fd8b500	2022-05-16 09:11:06 +0000	[diff] [blame]	627	dest_addr = cmd.out_tensor.address
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	628	else:
Rickard Bolin	17e53b5	2022-09-06 16:09:01 +0000	[diff] [blame]	629	src_addr = cmd.in_tensor.address_for_coordinate(cmd.box.start_coord)
				630	dest_addr = cmd.out_tensor.address_for_coordinate(cmd.box.start_coord)
Johan Alfven	9072496	2023-02-02 09:07:48 +0100	[diff] [blame^]	631	# DMA must use 16 bytes alignment (tensors are always aligned but the sz calculation uses actual size)
				632	sz = round_up(cmd.in_tensor.address_for_coordinate(cmd.box.end_coord, is_top_box=True) - src_addr, 16)
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	633	src = NpuAddressRange(src_region, int(src_addr), int(sz))
				634	dest = NpuAddressRange(dest_region, int(dest_addr), int(sz))
				635	return NpuDmaOperation(src, dest)
				636
				637
				638	def convert_command_to_npu_op(cmd: Command, arch: ArchitectureFeatures) -> NpuOperation:
				639	"""Converts the high level command to NpuOperation"""
Dwight Lidman	9b43f84	2020-12-08 17:56:44 +0100	[diff] [blame]	640	npu_op: NpuOperation
				641	if isinstance(cmd, DMA):
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	642	npu_op = create_dma_op(cmd, arch)
Tim Hall	68df8a1	2022-03-16 16:51:16 +0000	[diff] [blame]	643	npu_op.name = cmd.out_tensor.name
Dwight Lidman	9b43f84	2020-12-08 17:56:44 +0100	[diff] [blame]	644	elif isinstance(cmd, NpuStripe):
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	645	npu_block_type = cmd.ps.primary_op.type.npu_block_type
				646	if npu_block_type in (NpuBlockType.ConvolutionMxN, NpuBlockType.VectorProduct):
				647	npu_op = create_npu_conv2d_op(cmd, arch)
				648	elif npu_block_type == NpuBlockType.ConvolutionDepthWise:
				649	npu_op = create_npu_conv_depthwise_op(cmd, arch)
				650	elif npu_block_type in (NpuBlockType.Pooling, NpuBlockType.ReduceSum):
				651	npu_op = create_npu_pool_op(cmd, arch)
				652	elif npu_block_type == NpuBlockType.ElementWise:
				653	npu_op = create_npu_elementwise_op(cmd, arch)
				654	else:
				655	assert 0, f"Unknown command type {npu_block_type}"
Tim Hall	68df8a1	2022-03-16 16:51:16 +0000	[diff] [blame]	656	npu_op.name = cmd.ps.primary_op.name
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	657	return npu_op
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	658
				659
				660	def generate_register_command_stream_for_sg(nng, sg, arch, verbose=False):
				661	"""Generates command stream for the subgraph, adds it to sg.register_command_stream"""
				662	# Convert high level command stream to list of NpuOperation
				663	npu_op_list = []
				664	npu_op_to_cmd = dict() # map from npu op to high level command
				665	for cmd in sg.high_level_command_stream:
Dwight Lidman	9b43f84	2020-12-08 17:56:44 +0100	[diff] [blame]	666	if isinstance(cmd, NpuStripe) and cmd.ps.npu_block_type == NpuBlockType.Default:
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	667	print("Warning: Skipping register command stream generation for", cmd.ps)
Johan Alfven	9072496	2023-02-02 09:07:48 +0100	[diff] [blame^]	668	elif isinstance(cmd, NOP):
				669	# NOP should not generate anything
				670	continue
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	671	else:
				672	npu_op = convert_command_to_npu_op(cmd, arch)
				673	npu_op_list.append(npu_op)
				674	npu_op_to_cmd[npu_op] = cmd
Louis Verhaard	024c355	2021-03-17 14:26:34 +0100	[diff] [blame]	675	mem_limits = get_mem_limits_for_regions(arch)
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	676	# Generate register commands
erik.andersson@arm.com	ad45f79	2021-02-03 10:20:16 +0100	[diff] [blame]	677	if len(sg.high_level_command_stream) > 0:
				678	stream_id = DebugDatabase.add_stream(sg)
				679	sg.generated_stream_id = stream_id
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	680
erik.andersson@arm.com	ad45f79	2021-02-03 10:20:16 +0100	[diff] [blame]	681	def add_to_debug_db(npu_op: NpuOperation, offset: int):
				682	"""Adds info to the debug database"""
				683	if not isinstance(npu_op, NpuDmaOperation):
				684	cmd = npu_op_to_cmd[npu_op]
				685	DebugDatabase.add_command(stream_id, offset, cmd.ps.primary_op)
Louis Verhaard	1e17018	2020-11-26 11:42:04 +0100	[diff] [blame]	686
Louis Verhaard	024c355	2021-03-17 14:26:34 +0100	[diff] [blame]	687	sg.register_command_stream = generate_command_stream(
				688	npu_op_list, arch, verbose, mem_limits, add_to_debug_db, npu_op_to_cmd
				689	)