Blame - ethosu/vela/api.py - ml/ethos-u/ethos-u-vela

blob: 7125e889d809ddc9df48a4905547372dd73d1e8a [file] [log] [blame]

William Isaksson	56e5f0c	2024-01-10 12:28:04 +0100	[diff] [blame]	1	# SPDX-FileCopyrightText: Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	2	#
				3	# SPDX-License-Identifier: Apache-2.0
				4	#
				5	# Licensed under the Apache License, Version 2.0 (the License); you may
				6	# not use this file except in compliance with the License.
				7	# You may obtain a copy of the License at
				8	#
				9	# www.apache.org/licenses/LICENSE-2.0
				10	#
				11	# Unless required by applicable law or agreed to in writing, software
				12	# distributed under the License is distributed on an AS IS BASIS, WITHOUT
				13	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	# See the License for the specific language governing permissions and
				15	# limitations under the License.
				16	#
				17	# Description:
Louis Verhaard	aeae567	2020-11-02 18:04:27 +0100	[diff] [blame]	18	# Contains external APIs
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	19	from enum import auto
				20	from enum import Enum
				21	from typing import List
				22	from typing import NamedTuple
				23	from typing import Optional
				24	from typing import Tuple
				25
Louis Verhaard	aeae567	2020-11-02 18:04:27 +0100	[diff] [blame]	26	import numpy
				27
Patrik Gustavsson	c74682c	2021-08-17 14:26:38 +0200	[diff] [blame]	28
Louis Verhaard	11831ce	2020-11-18 18:53:24 +0100	[diff] [blame]	29	API_VERSION_MAJOR = 1
William Isaksson	56e5f0c	2024-01-10 12:28:04 +0100	[diff] [blame]	30	API_VERSION_MINOR = 5
Louis Verhaard	11831ce	2020-11-18 18:53:24 +0100	[diff] [blame]	31	API_VERSION = f"{API_VERSION_MAJOR}.{API_VERSION_MINOR}"
Patrik Gustavsson	c8a22f1	2020-11-18 17:05:50 +0100	[diff] [blame]	32
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	33
Louis Verhaard	aeae567	2020-11-02 18:04:27 +0100	[diff] [blame]	34	class NpuAccelerator(Enum):
				35	"""
				36	Supported accelerators
				37	"""
				38
				39	Ethos_U55_32 = auto()
				40	Ethos_U55_64 = auto()
				41	Ethos_U55_128 = auto()
				42	Ethos_U55_256 = auto()
				43	Ethos_U65_256 = auto()
				44	Ethos_U65_512 = auto()
				45
				46
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	47	class NpuElementWiseOp(Enum):
				48	"""
				49	Elementwise operation
				50	"""
				51
				52	ADD = auto()
				53	SUB = auto()
				54	MUL = auto()
				55	ABS = auto()
				56	MIN = auto()
				57	MAX = auto()
				58	LRELU = auto() # Leaky relu
				59	CLZ = auto() # Number leading zeros
				60	SHR = auto() # Rounded right-shift
				61	SHL = auto() # Bitwise shift-left
				62
				63
				64	class NpuPoolingOp(Enum):
				65	"""
				66	Pooling operation
				67	"""
				68
				69	MAX = auto()
				70	AVERAGE = auto()
				71	REDUCE_SUM = auto()
				72
				73
				74	class NpuActivationOp(Enum):
				75	"""
				76	Activation function
				77	"""
				78
				79	NONE_OR_RELU = auto() # Clamps output using min/max
				80	TANH = auto()
				81	SIGMOID = auto()
				82	TABLE_LOOKUP = auto() # Performs table look-up, using the provided table lookup index
				83
				84
				85	class NpuRoundingMode(Enum):
				86	"""
				87	Available rounding modes
				88	"""
				89
				90	TFL = auto() # TensorFlow Lite rounding
				91	TRUNCATE = auto() # Truncate towards zero
				92	NATURAL = auto() # Round to nearest with x.5 rounded up, towards +infinity
				93
				94
				95	class NpuLayout(Enum):
				96	"""
				97	Tensor layout of feature maps
				98	"""
				99
				100	NHWC = auto()
				101	NHCWB16 = auto()
				102
				103	def __str__(self):
				104	return self.name
				105
				106
				107	class NpuResamplingMode(Enum):
				108	"""
				109	Resampling mode
				110	"""
				111
				112	NONE = auto() # No resampling is performed
				113	NEAREST = auto() # 2x2 insert nearest
				114	TRANSPOSE = auto() # 2x2 transpose
				115
				116
				117	class NpuBlockTraversal(Enum):
				118	"""
				119	Block-traversal of weights
				120	"""
				121
				122	DEPTH_FIRST = auto()
				123	PART_KERNEL_FIRST = auto()
				124
				125
				126	class NpuDataType(Enum):
				127	"""
				128	Supported data types in feature maps
				129	"""
				130
				131	UINT8 = 8, False, auto()
				132	INT8 = 8, True, auto()
				133	UINT16 = 16, False, auto()
				134	INT16 = 16, True, auto()
				135	INT32 = 32, True, auto()
				136
				137	def is_signed(self) -> bool:
				138	"""Checks if this data type is signed or unsigned"""
				139	return self.value[1]
				140
				141	def size_in_bits(self) -> int:
Jonas Ohlsson	d857507	2022-03-30 10:30:25 +0200	[diff] [blame]	142	"""Size of the data type in bits"""
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	143	return self.value[0]
				144
				145	def size_in_bytes(self) -> int:
Jonas Ohlsson	d857507	2022-03-30 10:30:25 +0200	[diff] [blame]	146	"""Size of the data type in bytes"""
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	147	return self.value[0] // 8
				148
				149	def min_value(self) -> int:
				150	"""Minimum value of this type"""
				151	if self.is_signed():
				152	return -(1 << (self.size_in_bits() - 1))
				153	else:
				154	return 0
				155
				156	def max_value(self) -> int:
				157	"""Maximum value of this type"""
				158	if self.is_signed():
				159	return (1 << (self.size_in_bits() - 1)) - 1
				160	else:
				161	return (1 << self.size_in_bits()) - 1
				162
				163	def __str__(self):
				164	return self.name
				165
				166	__repr__ = __str__
				167
				168
				169	class NpuAddressRange(NamedTuple):
				170	"""
				171	Address range
				172	"""
				173
				174	region: int # Memory region, a value between 0 and 7
				175	address: int # Address, offset from the region's base address
				176	length: int # The length of the range, in bytes
				177
				178	def __str__(self):
				179	return f"(region={self.region}, address={hex(self.address)}, length={self.length})"
				180
				181
				182	class NpuTileBox(NamedTuple):
				183	"""
				184	Specifies the addresses and dimensions of the tiles of a feature map.
				185	A feature map can use 1 to 4 tiles
				186	"""
				187
				188	height_0: int # The height of tile 0
				189	height_1: int # The height of tile 1, 0 if unused
				190	width_0: int # the width of tile 0, and tile 2 (if used)
				191	addresses: List[int] # A list of 4 addresses, set unused addresses to 0
				192
				193
				194	class NpuShape3D(NamedTuple):
				195	"""
				196	Shape of (part of) a feature map
				197	"""
				198
				199	height: int
				200	width: int
				201	depth: int
				202
				203
				204	class NpuQuantization(NamedTuple):
				205	"""
				206	Quantization parameters
				207	"""
				208
				209	scale_f32: Optional[float]
				210	zero_point: int
				211
				212
				213	class NpuPadding(NamedTuple):
				214	"""
				215	Padding to be applied to a convolution operation
				216	"""
				217
				218	top: int
				219	left: int
				220	bottom: int
				221	right: int
				222
				223
				224	class NpuActivation:
				225	"""
				226	Activation function, fused with NPU operations
				227	"""
				228
				229	def __init__(self, op_type: NpuActivationOp):
				230	self.op_type = op_type # The activation operation to be performed
				231	# min/max are optional
				232	self.min: Optional[float] = None # E.g. set to 0.0 for RELU
				233	self.max: Optional[float] = None # E.g. set to 6.0 for RELU6
				234	# Table lookup index, only applicable for TABLE_LOOKUP activation, 0-7
				235	self.lookup_table_index: int = 0
				236
				237
				238	class NpuFeatureMap:
				239	"""
				240	Basic information about IFM, IFM2, OFM
				241	"""
				242
				243	def __init__(self):
				244	self.data_type: NpuDataType = NpuDataType.UINT8
				245	# The memory region, a value 0-7
				246	self.region: int = 0
				247	# Shape of the feature map
				248	self.shape: NpuShape3D = NpuShape3D(height=0, width=0, depth=0)
				249	# The tiles that comprise the feature map. In the normal case when only 1 tile is used,
				250	# height_0 == self.shape.height, height_1 is 0, width_0 == self.shape.width, addresses[1:] are set to 0
				251	self.tiles: NpuTileBox = NpuTileBox(height_0=0, height_1=0, width_0=0, addresses=[0, 0, 0, 0])
				252	self.quantization: Optional[NpuQuantization]
				253	self.layout: NpuLayout = NpuLayout.NHWC
				254	# x/y/c strides used by the NPU when traversing the feature map, if None, vela will use default strides
				255	self.strides: Optional[NpuShape3D] = None
Tim Hall	68df8a1	2022-03-16 16:51:16 +0000	[diff] [blame]	256	# Used for debug
				257	self.name: Optional[str] = None
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	258
				259
				260	class NpuKernel:
				261	"""
				262	Kernel information for NPU operations
				263	"""
				264
				265	def __init__(self, w: int, h: int, stride_x: int = 1, stride_y: int = 1, dilation_x: int = 1, dilation_y: int = 1):
				266	assert stride_x > 0 and stride_y > 0
				267	assert dilation_x > 0 and dilation_y > 0
				268	self.width = w
				269	self.height = h
				270	self.stride_x = stride_x
				271	self.stride_y = stride_y
				272	self.dilation_x = dilation_x
				273	self.dilation_y = dilation_y
				274
				275
William Isaksson	56e5f0c	2024-01-10 12:28:04 +0100	[diff] [blame]	276	class NpuAccumulatorType(Enum):
				277	"""
				278	Accumulator dtype of NPU operation
				279	"""
				280
				281	Default = auto()
				282	Int32 = auto()
				283	Int40 = auto()
				284
				285
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	286	class NpuOperationType(Enum):
				287	"""
				288	Type of NPU operation
				289	"""
				290
				291	Dma = auto()
				292	Conv2D = auto()
				293	ConvDepthWise = auto()
				294	Pooling = auto()
				295	ElementWise = auto()
				296
				297
				298	class NpuOperation:
				299	"""
				300	Base class for all NPU operations
				301	"""
				302
				303	def __init__(self, op_type: NpuOperationType):
				304	self.op_type = op_type
Tim Hall	68df8a1	2022-03-16 16:51:16 +0000	[diff] [blame]	305	# Used for debug
				306	self.name: Optional[str] = None
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	307
				308
				309	class NpuDmaOperation(NpuOperation):
				310	"""
				311	DMA operation
				312	"""
				313
				314	def __init__(self, src: NpuAddressRange, dest: NpuAddressRange):
				315	super().__init__(NpuOperationType.Dma)
				316	self.src = src
				317	self.dest = dest
				318	# DMA channel, usually 0 (user channel)
				319	self.channel: int = 0
				320	# Channel mode, 0 = external, 1 = internal (should usually be 0)
				321	self.mode: int = 0
				322
				323
				324	class NpuBlockOperation(NpuOperation):
				325	"""
				326	Base class for operations which produce an OFM
				327	"""
				328
				329	def __init__(self, op_type: NpuOperationType):
				330	super().__init__(op_type)
				331	self.ifm: Optional[NpuFeatureMap] = None
				332	self.ifm2: Optional[NpuFeatureMap] = None
				333	# The non-quantized scalar value in a binary elementwise operation. Only set if IFM2 is scalar
				334	self.ifm2_scalar: Optional[float] = None
				335	self.ofm: Optional[NpuFeatureMap] = None
				336	self.kernel: Optional[NpuKernel] = None
				337	# Weights, one element for each NPU core, empty if no weights are used.
Louis Verhaard	933f55e	2020-11-25 14:10:30 +0100	[diff] [blame]	338	# Must have been compressed using npu_encode_weights()
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	339	self.weights: List[NpuAddressRange] = []
				340	# Biases, one element for each NPU core, empty if no bias is used.
Louis Verhaard	933f55e	2020-11-25 14:10:30 +0100	[diff] [blame]	341	# Must have been encoded using npu_encode_bias()
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	342	self.biases: List[NpuAddressRange] = []
				343	self.padding: Optional[NpuPadding] = None
				344	# Optional activation function to be applied
				345	self.activation: Optional[NpuActivation] = None
Louis Verhaard	933f55e	2020-11-25 14:10:30 +0100	[diff] [blame]	346	# The block config to be used, which must be valid for the given operation.
				347	# See also npu_find_block_configs.
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	348	# If the operation has weights, the depth of the block config must be the same as
Louis Verhaard	933f55e	2020-11-25 14:10:30 +0100	[diff] [blame]	349	# the ofm depth used in the call to npu_encode_weights()
				350	self.block_config: NpuShape3D
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	351	self.rounding_mode: NpuRoundingMode = NpuRoundingMode.TFL
				352	# Set to True if the operations is fused with a Quantize operation (affects scaling)
				353	self.fused_quantize: bool = False
				354	# IFM upscaling to be applied
				355	self.ifm_upscale: NpuResamplingMode = NpuResamplingMode.NONE
William Isaksson	56e5f0c	2024-01-10 12:28:04 +0100	[diff] [blame]	356	self.accumulator_type: NpuAccumulatorType = NpuAccumulatorType.Default
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	357
				358
				359	class NpuConv2DOperation(NpuBlockOperation):
				360	"""
				361	NPU_OP_CONV operation
				362	"""
				363
				364	def __init__(self):
				365	super().__init__(NpuOperationType.Conv2D)
				366	# Block traversal must be consistent with the block_traversal parameter specified in
				367	# weight_compressor.encode_weights()
				368	self.block_traversal: NpuBlockTraversal = NpuBlockTraversal.PART_KERNEL_FIRST
				369
				370
				371	class NpuConvDepthWiseOperation(NpuBlockOperation):
				372	"""
				373	NPU_OP_DEPTHWISE operation
				374	"""
				375
				376	def __init__(self):
				377	super().__init__(NpuOperationType.ConvDepthWise)
				378
				379
				380	class NpuPoolingOperation(NpuBlockOperation):
				381	"""
				382	NPU_OP_POOL operation
				383	"""
				384
				385	def __init__(self, pooling_op_type: NpuPoolingOp):
				386	super().__init__(NpuOperationType.Pooling)
				387	self.sub_op_type: NpuPoolingOp = pooling_op_type
Tim Hall	885033b	2022-07-21 11:46:03 +0100	[diff] [blame]	388	# Set to a float value for ResizeBilinear/NearestNeighbor operations (affects scaling), else to None
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	389	self.rescale: Optional[float] = None
				390
				391
				392	class NpuElementWiseOperation(NpuBlockOperation):
				393	"""
				394	NPU_OP_ELEMENTWISE operation
				395	"""
				396
				397	def __init__(self, elementwise_op_type: NpuElementWiseOp):
				398	super().__init__(NpuOperationType.ElementWise)
				399	self.sub_op_type: NpuElementWiseOp = elementwise_op_type
				400	# Set to True for binary operators where IFM2 should be used as first operand
				401	self.reversed_operands: bool = False
				402	# Set to a tuple (scale, shift) for explicit rescale, else to None
				403	self.rescale: Optional[Tuple] = None
Patrik Gustavsson	c8a22f1	2020-11-18 17:05:50 +0100	[diff] [blame]	404
				405
Louis Verhaard	11831ce	2020-11-18 18:53:24 +0100	[diff] [blame]	406	def npu_get_api_version():
Patrik Gustavsson	c8a22f1	2020-11-18 17:05:50 +0100	[diff] [blame]	407	"""
				408	Public facing API to get the API version
				409	:return: int, the 16 most significant bits, corresponding to major version
				410	the 16 least significant bits, corresponding to minor version
				411	"""
Louis Verhaard	11831ce	2020-11-18 18:53:24 +0100	[diff] [blame]	412	version = (API_VERSION_MAJOR << 16) \| (API_VERSION_MINOR & 0xFFFF)
Patrik Gustavsson	c8a22f1	2020-11-18 17:05:50 +0100	[diff] [blame]	413	return version
Louis Verhaard	aeae567	2020-11-02 18:04:27 +0100	[diff] [blame]	414
				415
				416	def npu_encode_weights(
				417	accelerator: NpuAccelerator,
				418	weights_volume: numpy.ndarray,
				419	dilation_xy: Tuple[int, int],
				420	ifm_bitdepth: int,
				421	ofm_block_depth: int,
				422	is_depthwise: bool,
				423	block_traversal: NpuBlockTraversal,
				424	):
				425	"""
				426	Public facing API to use the Ethos-U weight encoding.
				427
				428	:param accelerator: NpuAccelerator enum to pick the correct accelerator
				429	:param weights_volume: numpy.ndarray in OHWI layout with a shape of four
				430	:param dilation_xy: a two element tuple of dilation attributes in x,y dimension
				431	:param ifm_bitdepth: the bitdepth of input feature map
				432	:param ofm_block_depth: the depth of blocks for processing
				433	:param is_depthwise: a boolean indicating these weights are used for a depthwise traversal
				434	:param block_traversal: indicates how these weights are traversed on sub-kernel basis
Fredrik Svedberg	f5c07c4	2021-04-23 14:36:42 +0200	[diff] [blame]	435	:return: a bytearray of encoded weights
Louis Verhaard	aeae567	2020-11-02 18:04:27 +0100	[diff] [blame]	436	"""
				437	from .architecture_features import Accelerator
				438	from . import weight_compressor
				439
				440	acc = Accelerator.from_npu_accelerator(accelerator)
Fredrik Svedberg	f5c07c4	2021-04-23 14:36:42 +0200	[diff] [blame]	441	encoded_weights, _ = weight_compressor.encode_weights(
Louis Verhaard	aeae567	2020-11-02 18:04:27 +0100	[diff] [blame]	442	acc, weights_volume, dilation_xy, ifm_bitdepth, ofm_block_depth, is_depthwise, block_traversal
				443	)
Fredrik Svedberg	f5c07c4	2021-04-23 14:36:42 +0200	[diff] [blame]	444	return encoded_weights
Louis Verhaard	aeae567	2020-11-02 18:04:27 +0100	[diff] [blame]	445
				446
				447	def npu_encode_bias(bias: numpy.int64, scale: int, shift: int):
				448	"""
				449	Public facing API to pack bias and scale values as required by the hardware
				450	:param bias: 64-bit signed number that includes 40-bit signed bias
				451	:param scale: 32-bit scale value
				452	:param shift: 6-bit shift value
				453	:return: packed 80-bit [0(2-bits),shift(6-bits),scale(32-bits),bias(40-bits)]
				454	"""
				455	from . import weight_compressor
				456
				457	return weight_compressor.encode_bias(bias, scale, shift)
				458
				459
Louis Verhaard	933f55e	2020-11-25 14:10:30 +0100	[diff] [blame]	460	def npu_find_block_configs(npu_op: NpuOperation, accelerator: NpuAccelerator) -> List[NpuShape3D]:
				461	"""
				462	Public facing API that returns a list of block configs that are valid for the given operation.
				463	This function can be used to find a valid value for npu_op.block_config.
				464	The block config is the unit of work in which the NPU generates the OFM.
				465	"""
Jacob Bohlin	b8060f5	2021-08-09 12:22:51 +0100	[diff] [blame]	466	from .architecture_features import Accelerator
				467	from .architecture_features import ArchitectureFeatures
				468	from .architecture_features import Block
				469	from .architecture_features import create_default_arch
				470	from .architecture_allocator import try_block_config
				471	from .register_command_stream_generator import resampling_mode_map
				472	from .register_command_stream_util import to_kernel
				473	from .operation import NpuBlockType
Louis Verhaard	933f55e	2020-11-25 14:10:30 +0100	[diff] [blame]	474
Jacob Bohlin	b8060f5	2021-08-09 12:22:51 +0100	[diff] [blame]	475	is_partkernel = False
				476	if isinstance(npu_op, NpuConv2DOperation):
				477	block_type = NpuBlockType.ConvolutionMxN
				478	is_partkernel = npu_op.block_traversal == NpuBlockTraversal.PART_KERNEL_FIRST
				479	elif isinstance(npu_op, NpuConvDepthWiseOperation):
				480	block_type = NpuBlockType.ConvolutionDepthWise
				481	elif isinstance(npu_op, NpuPoolingOperation):
				482	block_type = NpuBlockType.ReduceSum if npu_op.sub_op_type == NpuPoolingOp.REDUCE_SUM else NpuBlockType.Pooling
				483	elif isinstance(npu_op, NpuElementWiseOperation):
				484	block_type = NpuBlockType.ElementWise
				485	else:
				486	assert 0, "Unsupported operation"
				487
				488	ifm_shape = Block(npu_op.ifm.shape.width, npu_op.ifm.shape.height, npu_op.ifm.shape.depth)
				489	ifm2_shape = None
				490	if npu_op.ifm2:
				491	ifm2_shape = Block(npu_op.ifm2.shape.width, npu_op.ifm2.shape.height, npu_op.ifm2.shape.depth)
				492	ofm_shape = Block(npu_op.ofm.shape.width, npu_op.ofm.shape.height, npu_op.ofm.shape.depth)
				493
				494	ifm_resampling_mode = resampling_mode_map[npu_op.ifm_upscale]
				495	ifm_bits = npu_op.ifm.data_type.size_in_bits()
				496	kernel = to_kernel(npu_op.kernel)
				497	lut_banks = 0
				498	if npu_op.activation:
				499	lut_banks = 2 if npu_op.activation.op_type == NpuActivationOp.TABLE_LOOKUP else 0
				500
				501	has_scaling = True
				502	for tensor in [npu_op.ifm, npu_op.ifm2, npu_op.ofm]:
				503	if tensor and tensor.quantization is None:
				504	has_scaling = False
				505	break
				506
				507	arch = create_default_arch(Accelerator.from_npu_accelerator(accelerator))
				508
				509	max_block_width = min(arch.ofm_block_max.width, ofm_shape.width)
				510	max_block_height = min(arch.ofm_block_max.height, ofm_shape.height)
				511	max_block_depth = min(arch.ofm_block_max.depth, ofm_shape.depth)
				512
				513	min_block_height = max(arch.ofm_ublock.height, 2 if ifm_resampling_mode != NpuResamplingMode.NONE else 1)
				514	min_block_width = max(arch.ofm_ublock.width, 2 if ifm_resampling_mode != NpuResamplingMode.NONE else 1)
				515
				516	valid_block_configs = []
				517	for w in range(min_block_width, max_block_width + min_block_width, min_block_width):
				518	for h in range(min_block_height, max_block_height + min_block_height, min_block_height):
				519	# Try valid OFM block depths
				520	for c in range(arch.ofm_ublock.depth, max_block_depth + arch.ofm_ublock.depth, arch.ofm_ublock.depth):
				521	# OFM block depth has the constraint that if it causes the OFM to be
				522	# split, it must be a multiple of the OFM split size
				523	if (c >= max_block_depth) or (c < max_block_depth and (c % ArchitectureFeatures.OFMSplitDepth) == 0):
				524	block = Block(w, h, c)
				525	config = try_block_config(
				526	block,
				527	arch,
				528	block_type,
				529	ofm_shape,
				530	ifm_shape,
				531	ifm2_shape,
				532	npu_op.ifm2_scalar is not None,
				533	ifm_bits,
				534	is_partkernel,
				535	kernel,
				536	lut_banks,
				537	has_scaling,
				538	ifm_resampling_mode,
				539	)
				540
				541	if config:
				542	ofm_block = config.ofm_block
				543	valid_block_configs.append(NpuShape3D(ofm_block.height, ofm_block.width, ofm_block.depth))
				544
				545	assert len(valid_block_configs) > 0
				546	return valid_block_configs
Louis Verhaard	933f55e	2020-11-25 14:10:30 +0100	[diff] [blame]	547
				548
Louis Verhaard	aeae567	2020-11-02 18:04:27 +0100	[diff] [blame]	549	def npu_generate_register_command_stream(npu_op_list: List[NpuOperation], accelerator: NpuAccelerator) -> List[int]:
				550	"""
				551	Public facing API for generating an Ethos-U register command stream.
				552	Calculates dependencies between commands and inserts wait operations if needed.
				553
				554	:param npu_op_list: List[NpuOperation] list of high level NPU operations
				555	:param accelerator: NpuAccelerator enum to pick the correct accelerator
				556	:return register commands, as a list of 32-bit integers
				557	"""
				558	from . import register_command_stream_generator
				559
				560	return register_command_stream_generator.generate_register_command_stream(npu_op_list, accelerator)
Louis Verhaard	5207830	2020-11-18 13:35:06 +0100	[diff] [blame]	561
				562
				563	def npu_create_driver_payload(register_command_stream: List[int], accelerator: NpuAccelerator) -> bytes:
				564	"""
				565	Public facing API for generating driver payload, containing a driver header
				566	and the given Ethos-U register command stream.
				567	Returns the payload, in little endian format, which must be placed in memory on a 16-byte aligned
				568	address.
				569
				570	:param register_command_stream: List[int] register commands, as a list of 32-bit integers
				571	:param accelerator: NpuAccelerator enum to pick the correct accelerator
				572	:return driver payload, as a byte array
				573	"""
				574	from . import driver_actions
				575
				576	return driver_actions.npu_create_driver_payload(register_command_stream, accelerator)