Blame - ethosu/vela/api.py - ml/ethos-u/ethos-u-vela

blob: 399fd46d7072cabe56191dbf6bcaf04ed54d7f4b [file] [log] [blame]

Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	1	# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
				2	#
				3	# SPDX-License-Identifier: Apache-2.0
				4	#
				5	# Licensed under the Apache License, Version 2.0 (the License); you may
				6	# not use this file except in compliance with the License.
				7	# You may obtain a copy of the License at
				8	#
				9	# www.apache.org/licenses/LICENSE-2.0
				10	#
				11	# Unless required by applicable law or agreed to in writing, software
				12	# distributed under the License is distributed on an AS IS BASIS, WITHOUT
				13	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	# See the License for the specific language governing permissions and
				15	# limitations under the License.
				16	#
				17	# Description:
Louis Verhaard	aeae567	2020-11-02 18:04:27 +0100	[diff] [blame]	18	# Contains external APIs
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	19	from enum import auto
				20	from enum import Enum
				21	from typing import List
				22	from typing import NamedTuple
				23	from typing import Optional
				24	from typing import Tuple
				25
Louis Verhaard	aeae567	2020-11-02 18:04:27 +0100	[diff] [blame]	26	import numpy
				27
Patrik Gustavsson	c74682c	2021-08-17 14:26:38 +0200	[diff] [blame]	28
Louis Verhaard	11831ce	2020-11-18 18:53:24 +0100	[diff] [blame]	29	API_VERSION_MAJOR = 1
Tim Hall	68df8a1	2022-03-16 16:51:16 +0000	[diff] [blame]	30	API_VERSION_MINOR = 3
Louis Verhaard	11831ce	2020-11-18 18:53:24 +0100	[diff] [blame]	31	API_VERSION = f"{API_VERSION_MAJOR}.{API_VERSION_MINOR}"
Patrik Gustavsson	c8a22f1	2020-11-18 17:05:50 +0100	[diff] [blame]	32
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	33
Louis Verhaard	aeae567	2020-11-02 18:04:27 +0100	[diff] [blame]	34	class NpuAccelerator(Enum):
				35	"""
				36	Supported accelerators
				37	"""
				38
				39	Ethos_U55_32 = auto()
				40	Ethos_U55_64 = auto()
				41	Ethos_U55_128 = auto()
				42	Ethos_U55_256 = auto()
				43	Ethos_U65_256 = auto()
				44	Ethos_U65_512 = auto()
				45
				46
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	47	class NpuElementWiseOp(Enum):
				48	"""
				49	Elementwise operation
				50	"""
				51
				52	ADD = auto()
				53	SUB = auto()
				54	MUL = auto()
				55	ABS = auto()
				56	MIN = auto()
				57	MAX = auto()
				58	LRELU = auto() # Leaky relu
				59	CLZ = auto() # Number leading zeros
				60	SHR = auto() # Rounded right-shift
				61	SHL = auto() # Bitwise shift-left
				62
				63
				64	class NpuPoolingOp(Enum):
				65	"""
				66	Pooling operation
				67	"""
				68
				69	MAX = auto()
				70	AVERAGE = auto()
				71	REDUCE_SUM = auto()
				72
				73
				74	class NpuActivationOp(Enum):
				75	"""
				76	Activation function
				77	"""
				78
				79	NONE_OR_RELU = auto() # Clamps output using min/max
				80	TANH = auto()
				81	SIGMOID = auto()
				82	TABLE_LOOKUP = auto() # Performs table look-up, using the provided table lookup index
				83
				84
				85	class NpuRoundingMode(Enum):
				86	"""
				87	Available rounding modes
				88	"""
				89
				90	TFL = auto() # TensorFlow Lite rounding
				91	TRUNCATE = auto() # Truncate towards zero
				92	NATURAL = auto() # Round to nearest with x.5 rounded up, towards +infinity
				93
				94
				95	class NpuLayout(Enum):
				96	"""
				97	Tensor layout of feature maps
				98	"""
				99
				100	NHWC = auto()
				101	NHCWB16 = auto()
				102
				103	def __str__(self):
				104	return self.name
				105
				106
				107	class NpuResamplingMode(Enum):
				108	"""
				109	Resampling mode
				110	"""
				111
				112	NONE = auto() # No resampling is performed
				113	NEAREST = auto() # 2x2 insert nearest
				114	TRANSPOSE = auto() # 2x2 transpose
				115
				116
				117	class NpuBlockTraversal(Enum):
				118	"""
				119	Block-traversal of weights
				120	"""
				121
				122	DEPTH_FIRST = auto()
				123	PART_KERNEL_FIRST = auto()
				124
				125
				126	class NpuDataType(Enum):
				127	"""
				128	Supported data types in feature maps
				129	"""
				130
				131	UINT8 = 8, False, auto()
				132	INT8 = 8, True, auto()
				133	UINT16 = 16, False, auto()
				134	INT16 = 16, True, auto()
				135	INT32 = 32, True, auto()
				136
				137	def is_signed(self) -> bool:
				138	"""Checks if this data type is signed or unsigned"""
				139	return self.value[1]
				140
				141	def size_in_bits(self) -> int:
Jonas Ohlsson	d857507	2022-03-30 10:30:25 +0200	[diff] [blame]	142	"""Size of the data type in bits"""
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	143	return self.value[0]
				144
				145	def size_in_bytes(self) -> int:
Jonas Ohlsson	d857507	2022-03-30 10:30:25 +0200	[diff] [blame]	146	"""Size of the data type in bytes"""
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	147	return self.value[0] // 8
				148
				149	def min_value(self) -> int:
				150	"""Minimum value of this type"""
				151	if self.is_signed():
				152	return -(1 << (self.size_in_bits() - 1))
				153	else:
				154	return 0
				155
				156	def max_value(self) -> int:
				157	"""Maximum value of this type"""
				158	if self.is_signed():
				159	return (1 << (self.size_in_bits() - 1)) - 1
				160	else:
				161	return (1 << self.size_in_bits()) - 1
				162
				163	def __str__(self):
				164	return self.name
				165
				166	__repr__ = __str__
				167
				168
				169	class NpuAddressRange(NamedTuple):
				170	"""
				171	Address range
				172	"""
				173
				174	region: int # Memory region, a value between 0 and 7
				175	address: int # Address, offset from the region's base address
				176	length: int # The length of the range, in bytes
				177
				178	def __str__(self):
				179	return f"(region={self.region}, address={hex(self.address)}, length={self.length})"
				180
				181
				182	class NpuTileBox(NamedTuple):
				183	"""
				184	Specifies the addresses and dimensions of the tiles of a feature map.
				185	A feature map can use 1 to 4 tiles
				186	"""
				187
				188	height_0: int # The height of tile 0
				189	height_1: int # The height of tile 1, 0 if unused
				190	width_0: int # the width of tile 0, and tile 2 (if used)
				191	addresses: List[int] # A list of 4 addresses, set unused addresses to 0
				192
				193
				194	class NpuShape3D(NamedTuple):
				195	"""
				196	Shape of (part of) a feature map
				197	"""
				198
				199	height: int
				200	width: int
				201	depth: int
				202
				203
				204	class NpuQuantization(NamedTuple):
				205	"""
				206	Quantization parameters
				207	"""
				208
				209	scale_f32: Optional[float]
				210	zero_point: int
				211
				212
				213	class NpuPadding(NamedTuple):
				214	"""
				215	Padding to be applied to a convolution operation
				216	"""
				217
				218	top: int
				219	left: int
				220	bottom: int
				221	right: int
				222
				223
				224	class NpuActivation:
				225	"""
				226	Activation function, fused with NPU operations
				227	"""
				228
				229	def __init__(self, op_type: NpuActivationOp):
				230	self.op_type = op_type # The activation operation to be performed
				231	# min/max are optional
				232	self.min: Optional[float] = None # E.g. set to 0.0 for RELU
				233	self.max: Optional[float] = None # E.g. set to 6.0 for RELU6
				234	# Table lookup index, only applicable for TABLE_LOOKUP activation, 0-7
				235	self.lookup_table_index: int = 0
				236
				237
				238	class NpuFeatureMap:
				239	"""
				240	Basic information about IFM, IFM2, OFM
				241	"""
				242
				243	def __init__(self):
				244	self.data_type: NpuDataType = NpuDataType.UINT8
				245	# The memory region, a value 0-7
				246	self.region: int = 0
				247	# Shape of the feature map
				248	self.shape: NpuShape3D = NpuShape3D(height=0, width=0, depth=0)
				249	# The tiles that comprise the feature map. In the normal case when only 1 tile is used,
				250	# height_0 == self.shape.height, height_1 is 0, width_0 == self.shape.width, addresses[1:] are set to 0
				251	self.tiles: NpuTileBox = NpuTileBox(height_0=0, height_1=0, width_0=0, addresses=[0, 0, 0, 0])
				252	self.quantization: Optional[NpuQuantization]
				253	self.layout: NpuLayout = NpuLayout.NHWC
				254	# x/y/c strides used by the NPU when traversing the feature map, if None, vela will use default strides
				255	self.strides: Optional[NpuShape3D] = None
Tim Hall	68df8a1	2022-03-16 16:51:16 +0000	[diff] [blame]	256	# Used for debug
				257	self.name: Optional[str] = None
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	258
				259
				260	class NpuKernel:
				261	"""
				262	Kernel information for NPU operations
				263	"""
				264
				265	def __init__(self, w: int, h: int, stride_x: int = 1, stride_y: int = 1, dilation_x: int = 1, dilation_y: int = 1):
				266	assert stride_x > 0 and stride_y > 0
				267	assert dilation_x > 0 and dilation_y > 0
				268	self.width = w
				269	self.height = h
				270	self.stride_x = stride_x
				271	self.stride_y = stride_y
				272	self.dilation_x = dilation_x
				273	self.dilation_y = dilation_y
				274
				275
				276	class NpuOperationType(Enum):
				277	"""
				278	Type of NPU operation
				279	"""
				280
				281	Dma = auto()
				282	Conv2D = auto()
				283	ConvDepthWise = auto()
				284	Pooling = auto()
				285	ElementWise = auto()
				286
				287
				288	class NpuOperation:
				289	"""
				290	Base class for all NPU operations
				291	"""
				292
				293	def __init__(self, op_type: NpuOperationType):
				294	self.op_type = op_type
Tim Hall	68df8a1	2022-03-16 16:51:16 +0000	[diff] [blame]	295	# Used for debug
				296	self.name: Optional[str] = None
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	297
				298
				299	class NpuDmaOperation(NpuOperation):
				300	"""
				301	DMA operation
				302	"""
				303
				304	def __init__(self, src: NpuAddressRange, dest: NpuAddressRange):
				305	super().__init__(NpuOperationType.Dma)
				306	self.src = src
				307	self.dest = dest
				308	# DMA channel, usually 0 (user channel)
				309	self.channel: int = 0
				310	# Channel mode, 0 = external, 1 = internal (should usually be 0)
				311	self.mode: int = 0
				312
				313
				314	class NpuBlockOperation(NpuOperation):
				315	"""
				316	Base class for operations which produce an OFM
				317	"""
				318
				319	def __init__(self, op_type: NpuOperationType):
				320	super().__init__(op_type)
				321	self.ifm: Optional[NpuFeatureMap] = None
				322	self.ifm2: Optional[NpuFeatureMap] = None
				323	# The non-quantized scalar value in a binary elementwise operation. Only set if IFM2 is scalar
				324	self.ifm2_scalar: Optional[float] = None
				325	self.ofm: Optional[NpuFeatureMap] = None
				326	self.kernel: Optional[NpuKernel] = None
				327	# Weights, one element for each NPU core, empty if no weights are used.
Louis Verhaard	933f55e	2020-11-25 14:10:30 +0100	[diff] [blame]	328	# Must have been compressed using npu_encode_weights()
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	329	self.weights: List[NpuAddressRange] = []
				330	# Biases, one element for each NPU core, empty if no bias is used.
Louis Verhaard	933f55e	2020-11-25 14:10:30 +0100	[diff] [blame]	331	# Must have been encoded using npu_encode_bias()
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	332	self.biases: List[NpuAddressRange] = []
				333	self.padding: Optional[NpuPadding] = None
				334	# Optional activation function to be applied
				335	self.activation: Optional[NpuActivation] = None
Louis Verhaard	933f55e	2020-11-25 14:10:30 +0100	[diff] [blame]	336	# The block config to be used, which must be valid for the given operation.
				337	# See also npu_find_block_configs.
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	338	# If the operation has weights, the depth of the block config must be the same as
Louis Verhaard	933f55e	2020-11-25 14:10:30 +0100	[diff] [blame]	339	# the ofm depth used in the call to npu_encode_weights()
				340	self.block_config: NpuShape3D
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	341	self.rounding_mode: NpuRoundingMode = NpuRoundingMode.TFL
				342	# Set to True if the operations is fused with a Quantize operation (affects scaling)
				343	self.fused_quantize: bool = False
				344	# IFM upscaling to be applied
				345	self.ifm_upscale: NpuResamplingMode = NpuResamplingMode.NONE
				346
				347
				348	class NpuConv2DOperation(NpuBlockOperation):
				349	"""
				350	NPU_OP_CONV operation
				351	"""
				352
				353	def __init__(self):
				354	super().__init__(NpuOperationType.Conv2D)
				355	# Block traversal must be consistent with the block_traversal parameter specified in
				356	# weight_compressor.encode_weights()
				357	self.block_traversal: NpuBlockTraversal = NpuBlockTraversal.PART_KERNEL_FIRST
				358
				359
				360	class NpuConvDepthWiseOperation(NpuBlockOperation):
				361	"""
				362	NPU_OP_DEPTHWISE operation
				363	"""
				364
				365	def __init__(self):
				366	super().__init__(NpuOperationType.ConvDepthWise)
				367
				368
				369	class NpuPoolingOperation(NpuBlockOperation):
				370	"""
				371	NPU_OP_POOL operation
				372	"""
				373
				374	def __init__(self, pooling_op_type: NpuPoolingOp):
				375	super().__init__(NpuOperationType.Pooling)
				376	self.sub_op_type: NpuPoolingOp = pooling_op_type
				377	# Set to a float value for ResizeBilinear operations (affects scaling), else to None
				378	self.rescale: Optional[float] = None
				379
				380
				381	class NpuElementWiseOperation(NpuBlockOperation):
				382	"""
				383	NPU_OP_ELEMENTWISE operation
				384	"""
				385
				386	def __init__(self, elementwise_op_type: NpuElementWiseOp):
				387	super().__init__(NpuOperationType.ElementWise)
				388	self.sub_op_type: NpuElementWiseOp = elementwise_op_type
				389	# Set to True for binary operators where IFM2 should be used as first operand
				390	self.reversed_operands: bool = False
				391	# Set to a tuple (scale, shift) for explicit rescale, else to None
				392	self.rescale: Optional[Tuple] = None
Patrik Gustavsson	c8a22f1	2020-11-18 17:05:50 +0100	[diff] [blame]	393
				394
Louis Verhaard	11831ce	2020-11-18 18:53:24 +0100	[diff] [blame]	395	def npu_get_api_version():
Patrik Gustavsson	c8a22f1	2020-11-18 17:05:50 +0100	[diff] [blame]	396	"""
				397	Public facing API to get the API version
				398	:return: int, the 16 most significant bits, corresponding to major version
				399	the 16 least significant bits, corresponding to minor version
				400	"""
Louis Verhaard	11831ce	2020-11-18 18:53:24 +0100	[diff] [blame]	401	version = (API_VERSION_MAJOR << 16) \| (API_VERSION_MINOR & 0xFFFF)
Patrik Gustavsson	c8a22f1	2020-11-18 17:05:50 +0100	[diff] [blame]	402	return version
Louis Verhaard	aeae567	2020-11-02 18:04:27 +0100	[diff] [blame]	403
				404
				405	def npu_encode_weights(
				406	accelerator: NpuAccelerator,
				407	weights_volume: numpy.ndarray,
				408	dilation_xy: Tuple[int, int],
				409	ifm_bitdepth: int,
				410	ofm_block_depth: int,
				411	is_depthwise: bool,
				412	block_traversal: NpuBlockTraversal,
				413	):
				414	"""
				415	Public facing API to use the Ethos-U weight encoding.
				416
				417	:param accelerator: NpuAccelerator enum to pick the correct accelerator
				418	:param weights_volume: numpy.ndarray in OHWI layout with a shape of four
				419	:param dilation_xy: a two element tuple of dilation attributes in x,y dimension
				420	:param ifm_bitdepth: the bitdepth of input feature map
				421	:param ofm_block_depth: the depth of blocks for processing
				422	:param is_depthwise: a boolean indicating these weights are used for a depthwise traversal
				423	:param block_traversal: indicates how these weights are traversed on sub-kernel basis
Fredrik Svedberg	f5c07c4	2021-04-23 14:36:42 +0200	[diff] [blame]	424	:return: a bytearray of encoded weights
Louis Verhaard	aeae567	2020-11-02 18:04:27 +0100	[diff] [blame]	425	"""
				426	from .architecture_features import Accelerator
				427	from . import weight_compressor
				428
				429	acc = Accelerator.from_npu_accelerator(accelerator)
Fredrik Svedberg	f5c07c4	2021-04-23 14:36:42 +0200	[diff] [blame]	430	encoded_weights, _ = weight_compressor.encode_weights(
Louis Verhaard	aeae567	2020-11-02 18:04:27 +0100	[diff] [blame]	431	acc, weights_volume, dilation_xy, ifm_bitdepth, ofm_block_depth, is_depthwise, block_traversal
				432	)
Fredrik Svedberg	f5c07c4	2021-04-23 14:36:42 +0200	[diff] [blame]	433	return encoded_weights
Louis Verhaard	aeae567	2020-11-02 18:04:27 +0100	[diff] [blame]	434
				435
				436	def npu_encode_bias(bias: numpy.int64, scale: int, shift: int):
				437	"""
				438	Public facing API to pack bias and scale values as required by the hardware
				439	:param bias: 64-bit signed number that includes 40-bit signed bias
				440	:param scale: 32-bit scale value
				441	:param shift: 6-bit shift value
				442	:return: packed 80-bit [0(2-bits),shift(6-bits),scale(32-bits),bias(40-bits)]
				443	"""
				444	from . import weight_compressor
				445
				446	return weight_compressor.encode_bias(bias, scale, shift)
				447
				448
Louis Verhaard	933f55e	2020-11-25 14:10:30 +0100	[diff] [blame]	449	def npu_find_block_configs(npu_op: NpuOperation, accelerator: NpuAccelerator) -> List[NpuShape3D]:
				450	"""
				451	Public facing API that returns a list of block configs that are valid for the given operation.
				452	This function can be used to find a valid value for npu_op.block_config.
				453	The block config is the unit of work in which the NPU generates the OFM.
				454	"""
Jacob Bohlin	b8060f5	2021-08-09 12:22:51 +0100	[diff] [blame]	455	from .architecture_features import Accelerator
				456	from .architecture_features import ArchitectureFeatures
				457	from .architecture_features import Block
				458	from .architecture_features import create_default_arch
				459	from .architecture_allocator import try_block_config
				460	from .register_command_stream_generator import resampling_mode_map
				461	from .register_command_stream_util import to_kernel
				462	from .operation import NpuBlockType
Louis Verhaard	933f55e	2020-11-25 14:10:30 +0100	[diff] [blame]	463
Jacob Bohlin	b8060f5	2021-08-09 12:22:51 +0100	[diff] [blame]	464	is_partkernel = False
				465	if isinstance(npu_op, NpuConv2DOperation):
				466	block_type = NpuBlockType.ConvolutionMxN
				467	is_partkernel = npu_op.block_traversal == NpuBlockTraversal.PART_KERNEL_FIRST
				468	elif isinstance(npu_op, NpuConvDepthWiseOperation):
				469	block_type = NpuBlockType.ConvolutionDepthWise
				470	elif isinstance(npu_op, NpuPoolingOperation):
				471	block_type = NpuBlockType.ReduceSum if npu_op.sub_op_type == NpuPoolingOp.REDUCE_SUM else NpuBlockType.Pooling
				472	elif isinstance(npu_op, NpuElementWiseOperation):
				473	block_type = NpuBlockType.ElementWise
				474	else:
				475	assert 0, "Unsupported operation"
				476
				477	ifm_shape = Block(npu_op.ifm.shape.width, npu_op.ifm.shape.height, npu_op.ifm.shape.depth)
				478	ifm2_shape = None
				479	if npu_op.ifm2:
				480	ifm2_shape = Block(npu_op.ifm2.shape.width, npu_op.ifm2.shape.height, npu_op.ifm2.shape.depth)
				481	ofm_shape = Block(npu_op.ofm.shape.width, npu_op.ofm.shape.height, npu_op.ofm.shape.depth)
				482
				483	ifm_resampling_mode = resampling_mode_map[npu_op.ifm_upscale]
				484	ifm_bits = npu_op.ifm.data_type.size_in_bits()
				485	kernel = to_kernel(npu_op.kernel)
				486	lut_banks = 0
				487	if npu_op.activation:
				488	lut_banks = 2 if npu_op.activation.op_type == NpuActivationOp.TABLE_LOOKUP else 0
				489
				490	has_scaling = True
				491	for tensor in [npu_op.ifm, npu_op.ifm2, npu_op.ofm]:
				492	if tensor and tensor.quantization is None:
				493	has_scaling = False
				494	break
				495
				496	arch = create_default_arch(Accelerator.from_npu_accelerator(accelerator))
				497
				498	max_block_width = min(arch.ofm_block_max.width, ofm_shape.width)
				499	max_block_height = min(arch.ofm_block_max.height, ofm_shape.height)
				500	max_block_depth = min(arch.ofm_block_max.depth, ofm_shape.depth)
				501
				502	min_block_height = max(arch.ofm_ublock.height, 2 if ifm_resampling_mode != NpuResamplingMode.NONE else 1)
				503	min_block_width = max(arch.ofm_ublock.width, 2 if ifm_resampling_mode != NpuResamplingMode.NONE else 1)
				504
				505	valid_block_configs = []
				506	for w in range(min_block_width, max_block_width + min_block_width, min_block_width):
				507	for h in range(min_block_height, max_block_height + min_block_height, min_block_height):
				508	# Try valid OFM block depths
				509	for c in range(arch.ofm_ublock.depth, max_block_depth + arch.ofm_ublock.depth, arch.ofm_ublock.depth):
				510	# OFM block depth has the constraint that if it causes the OFM to be
				511	# split, it must be a multiple of the OFM split size
				512	if (c >= max_block_depth) or (c < max_block_depth and (c % ArchitectureFeatures.OFMSplitDepth) == 0):
				513	block = Block(w, h, c)
				514	config = try_block_config(
				515	block,
				516	arch,
				517	block_type,
				518	ofm_shape,
				519	ifm_shape,
				520	ifm2_shape,
				521	npu_op.ifm2_scalar is not None,
				522	ifm_bits,
				523	is_partkernel,
				524	kernel,
				525	lut_banks,
				526	has_scaling,
				527	ifm_resampling_mode,
				528	)
				529
				530	if config:
				531	ofm_block = config.ofm_block
				532	valid_block_configs.append(NpuShape3D(ofm_block.height, ofm_block.width, ofm_block.depth))
				533
				534	assert len(valid_block_configs) > 0
				535	return valid_block_configs
Louis Verhaard	933f55e	2020-11-25 14:10:30 +0100	[diff] [blame]	536
				537
Louis Verhaard	aeae567	2020-11-02 18:04:27 +0100	[diff] [blame]	538	def npu_generate_register_command_stream(npu_op_list: List[NpuOperation], accelerator: NpuAccelerator) -> List[int]:
				539	"""
				540	Public facing API for generating an Ethos-U register command stream.
				541	Calculates dependencies between commands and inserts wait operations if needed.
				542
				543	:param npu_op_list: List[NpuOperation] list of high level NPU operations
				544	:param accelerator: NpuAccelerator enum to pick the correct accelerator
				545	:return register commands, as a list of 32-bit integers
				546	"""
				547	from . import register_command_stream_generator
				548
				549	return register_command_stream_generator.generate_register_command_stream(npu_op_list, accelerator)
Louis Verhaard	5207830	2020-11-18 13:35:06 +0100	[diff] [blame]	550
				551
				552	def npu_create_driver_payload(register_command_stream: List[int], accelerator: NpuAccelerator) -> bytes:
				553	"""
				554	Public facing API for generating driver payload, containing a driver header
				555	and the given Ethos-U register command stream.
				556	Returns the payload, in little endian format, which must be placed in memory on a 16-byte aligned
				557	address.
				558
				559	:param register_command_stream: List[int] register commands, as a list of 32-bit integers
				560	:param accelerator: NpuAccelerator enum to pick the correct accelerator
				561	:return driver payload, as a byte array
				562	"""
				563	from . import driver_actions
				564
				565	return driver_actions.npu_create_driver_payload(register_command_stream, accelerator)