Blame - ethosu/vela/api.py - ml/ethos-u/ethos-u-vela

blob: e91c0bdb0043eec801642ef450dd3fbecc5aa2c3 [file] [log] [blame]

Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	1	# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
				2	#
				3	# SPDX-License-Identifier: Apache-2.0
				4	#
				5	# Licensed under the Apache License, Version 2.0 (the License); you may
				6	# not use this file except in compliance with the License.
				7	# You may obtain a copy of the License at
				8	#
				9	# www.apache.org/licenses/LICENSE-2.0
				10	#
				11	# Unless required by applicable law or agreed to in writing, software
				12	# distributed under the License is distributed on an AS IS BASIS, WITHOUT
				13	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	# See the License for the specific language governing permissions and
				15	# limitations under the License.
				16	#
				17	# Description:
Louis Verhaard	aeae567	2020-11-02 18:04:27 +0100	[diff] [blame]	18	# Contains external APIs
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	19	from enum import auto
				20	from enum import Enum
				21	from typing import List
				22	from typing import NamedTuple
				23	from typing import Optional
				24	from typing import Tuple
				25
Louis Verhaard	aeae567	2020-11-02 18:04:27 +0100	[diff] [blame]	26	import numpy
				27
Louis Verhaard	11831ce	2020-11-18 18:53:24 +0100	[diff] [blame]	28	API_VERSION_MAJOR = 1
				29	API_VERSION_MINOR = 0
				30	API_VERSION = f"{API_VERSION_MAJOR}.{API_VERSION_MINOR}"
Patrik Gustavsson	c8a22f1	2020-11-18 17:05:50 +0100	[diff] [blame]	31
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	32
Louis Verhaard	aeae567	2020-11-02 18:04:27 +0100	[diff] [blame]	33	class NpuAccelerator(Enum):
				34	"""
				35	Supported accelerators
				36	"""
				37
				38	Ethos_U55_32 = auto()
				39	Ethos_U55_64 = auto()
				40	Ethos_U55_128 = auto()
				41	Ethos_U55_256 = auto()
				42	Ethos_U65_256 = auto()
				43	Ethos_U65_512 = auto()
				44
				45
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	46	class NpuElementWiseOp(Enum):
				47	"""
				48	Elementwise operation
				49	"""
				50
				51	ADD = auto()
				52	SUB = auto()
				53	MUL = auto()
				54	ABS = auto()
				55	MIN = auto()
				56	MAX = auto()
				57	LRELU = auto() # Leaky relu
				58	CLZ = auto() # Number leading zeros
				59	SHR = auto() # Rounded right-shift
				60	SHL = auto() # Bitwise shift-left
				61
				62
				63	class NpuPoolingOp(Enum):
				64	"""
				65	Pooling operation
				66	"""
				67
				68	MAX = auto()
				69	AVERAGE = auto()
				70	REDUCE_SUM = auto()
				71
				72
				73	class NpuActivationOp(Enum):
				74	"""
				75	Activation function
				76	"""
				77
				78	NONE_OR_RELU = auto() # Clamps output using min/max
				79	TANH = auto()
				80	SIGMOID = auto()
				81	TABLE_LOOKUP = auto() # Performs table look-up, using the provided table lookup index
				82
				83
				84	class NpuRoundingMode(Enum):
				85	"""
				86	Available rounding modes
				87	"""
				88
				89	TFL = auto() # TensorFlow Lite rounding
				90	TRUNCATE = auto() # Truncate towards zero
				91	NATURAL = auto() # Round to nearest with x.5 rounded up, towards +infinity
				92
				93
				94	class NpuLayout(Enum):
				95	"""
				96	Tensor layout of feature maps
				97	"""
				98
				99	NHWC = auto()
				100	NHCWB16 = auto()
				101
				102	def __str__(self):
				103	return self.name
				104
				105
				106	class NpuResamplingMode(Enum):
				107	"""
				108	Resampling mode
				109	"""
				110
				111	NONE = auto() # No resampling is performed
				112	NEAREST = auto() # 2x2 insert nearest
				113	TRANSPOSE = auto() # 2x2 transpose
				114
				115
				116	class NpuBlockTraversal(Enum):
				117	"""
				118	Block-traversal of weights
				119	"""
				120
				121	DEPTH_FIRST = auto()
				122	PART_KERNEL_FIRST = auto()
				123
				124
				125	class NpuDataType(Enum):
				126	"""
				127	Supported data types in feature maps
				128	"""
				129
				130	UINT8 = 8, False, auto()
				131	INT8 = 8, True, auto()
				132	UINT16 = 16, False, auto()
				133	INT16 = 16, True, auto()
				134	INT32 = 32, True, auto()
				135
				136	def is_signed(self) -> bool:
				137	"""Checks if this data type is signed or unsigned"""
				138	return self.value[1]
				139
				140	def size_in_bits(self) -> int:
				141	""" Size of the data type in bits"""
				142	return self.value[0]
				143
				144	def size_in_bytes(self) -> int:
				145	""" Size of the data type in bytes"""
				146	return self.value[0] // 8
				147
				148	def min_value(self) -> int:
				149	"""Minimum value of this type"""
				150	if self.is_signed():
				151	return -(1 << (self.size_in_bits() - 1))
				152	else:
				153	return 0
				154
				155	def max_value(self) -> int:
				156	"""Maximum value of this type"""
				157	if self.is_signed():
				158	return (1 << (self.size_in_bits() - 1)) - 1
				159	else:
				160	return (1 << self.size_in_bits()) - 1
				161
				162	def __str__(self):
				163	return self.name
				164
				165	__repr__ = __str__
				166
				167
				168	class NpuAddressRange(NamedTuple):
				169	"""
				170	Address range
				171	"""
				172
				173	region: int # Memory region, a value between 0 and 7
				174	address: int # Address, offset from the region's base address
				175	length: int # The length of the range, in bytes
				176
				177	def __str__(self):
				178	return f"(region={self.region}, address={hex(self.address)}, length={self.length})"
				179
				180
				181	class NpuTileBox(NamedTuple):
				182	"""
				183	Specifies the addresses and dimensions of the tiles of a feature map.
				184	A feature map can use 1 to 4 tiles
				185	"""
				186
				187	height_0: int # The height of tile 0
				188	height_1: int # The height of tile 1, 0 if unused
				189	width_0: int # the width of tile 0, and tile 2 (if used)
				190	addresses: List[int] # A list of 4 addresses, set unused addresses to 0
				191
				192
				193	class NpuShape3D(NamedTuple):
				194	"""
				195	Shape of (part of) a feature map
				196	"""
				197
				198	height: int
				199	width: int
				200	depth: int
				201
				202
				203	class NpuQuantization(NamedTuple):
				204	"""
				205	Quantization parameters
				206	"""
				207
				208	scale_f32: Optional[float]
				209	zero_point: int
				210
				211
				212	class NpuPadding(NamedTuple):
				213	"""
				214	Padding to be applied to a convolution operation
				215	"""
				216
				217	top: int
				218	left: int
				219	bottom: int
				220	right: int
				221
				222
				223	class NpuActivation:
				224	"""
				225	Activation function, fused with NPU operations
				226	"""
				227
				228	def __init__(self, op_type: NpuActivationOp):
				229	self.op_type = op_type # The activation operation to be performed
				230	# min/max are optional
				231	self.min: Optional[float] = None # E.g. set to 0.0 for RELU
				232	self.max: Optional[float] = None # E.g. set to 6.0 for RELU6
				233	# Table lookup index, only applicable for TABLE_LOOKUP activation, 0-7
				234	self.lookup_table_index: int = 0
				235
				236
				237	class NpuFeatureMap:
				238	"""
				239	Basic information about IFM, IFM2, OFM
				240	"""
				241
				242	def __init__(self):
				243	self.data_type: NpuDataType = NpuDataType.UINT8
				244	# The memory region, a value 0-7
				245	self.region: int = 0
				246	# Shape of the feature map
				247	self.shape: NpuShape3D = NpuShape3D(height=0, width=0, depth=0)
				248	# The tiles that comprise the feature map. In the normal case when only 1 tile is used,
				249	# height_0 == self.shape.height, height_1 is 0, width_0 == self.shape.width, addresses[1:] are set to 0
				250	self.tiles: NpuTileBox = NpuTileBox(height_0=0, height_1=0, width_0=0, addresses=[0, 0, 0, 0])
				251	self.quantization: Optional[NpuQuantization]
				252	self.layout: NpuLayout = NpuLayout.NHWC
				253	# x/y/c strides used by the NPU when traversing the feature map, if None, vela will use default strides
				254	self.strides: Optional[NpuShape3D] = None
				255
				256
				257	class NpuKernel:
				258	"""
				259	Kernel information for NPU operations
				260	"""
				261
				262	def __init__(self, w: int, h: int, stride_x: int = 1, stride_y: int = 1, dilation_x: int = 1, dilation_y: int = 1):
				263	assert stride_x > 0 and stride_y > 0
				264	assert dilation_x > 0 and dilation_y > 0
				265	self.width = w
				266	self.height = h
				267	self.stride_x = stride_x
				268	self.stride_y = stride_y
				269	self.dilation_x = dilation_x
				270	self.dilation_y = dilation_y
				271
				272
				273	class NpuOperationType(Enum):
				274	"""
				275	Type of NPU operation
				276	"""
				277
				278	Dma = auto()
				279	Conv2D = auto()
				280	ConvDepthWise = auto()
				281	Pooling = auto()
				282	ElementWise = auto()
				283
				284
				285	class NpuOperation:
				286	"""
				287	Base class for all NPU operations
				288	"""
				289
				290	def __init__(self, op_type: NpuOperationType):
				291	self.op_type = op_type
				292
				293
				294	class NpuDmaOperation(NpuOperation):
				295	"""
				296	DMA operation
				297	"""
				298
				299	def __init__(self, src: NpuAddressRange, dest: NpuAddressRange):
				300	super().__init__(NpuOperationType.Dma)
				301	self.src = src
				302	self.dest = dest
				303	# DMA channel, usually 0 (user channel)
				304	self.channel: int = 0
				305	# Channel mode, 0 = external, 1 = internal (should usually be 0)
				306	self.mode: int = 0
				307
				308
				309	class NpuBlockOperation(NpuOperation):
				310	"""
				311	Base class for operations which produce an OFM
				312	"""
				313
				314	def __init__(self, op_type: NpuOperationType):
				315	super().__init__(op_type)
				316	self.ifm: Optional[NpuFeatureMap] = None
				317	self.ifm2: Optional[NpuFeatureMap] = None
				318	# The non-quantized scalar value in a binary elementwise operation. Only set if IFM2 is scalar
				319	self.ifm2_scalar: Optional[float] = None
				320	self.ofm: Optional[NpuFeatureMap] = None
				321	self.kernel: Optional[NpuKernel] = None
				322	# Weights, one element for each NPU core, empty if no weights are used.
Louis Verhaard	933f55e	2020-11-25 14:10:30 +0100	[diff] [blame]	323	# Must have been compressed using npu_encode_weights()
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	324	self.weights: List[NpuAddressRange] = []
				325	# Biases, one element for each NPU core, empty if no bias is used.
Louis Verhaard	933f55e	2020-11-25 14:10:30 +0100	[diff] [blame]	326	# Must have been encoded using npu_encode_bias()
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	327	self.biases: List[NpuAddressRange] = []
				328	self.padding: Optional[NpuPadding] = None
				329	# Optional activation function to be applied
				330	self.activation: Optional[NpuActivation] = None
Louis Verhaard	933f55e	2020-11-25 14:10:30 +0100	[diff] [blame]	331	# The block config to be used, which must be valid for the given operation.
				332	# See also npu_find_block_configs.
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	333	# If the operation has weights, the depth of the block config must be the same as
Louis Verhaard	933f55e	2020-11-25 14:10:30 +0100	[diff] [blame]	334	# the ofm depth used in the call to npu_encode_weights()
				335	self.block_config: NpuShape3D
Louis Verhaard	e8a5a78	2020-11-02 18:04:27 +0100	[diff] [blame]	336	self.rounding_mode: NpuRoundingMode = NpuRoundingMode.TFL
				337	# Set to True if the operations is fused with a Quantize operation (affects scaling)
				338	self.fused_quantize: bool = False
				339	# IFM upscaling to be applied
				340	self.ifm_upscale: NpuResamplingMode = NpuResamplingMode.NONE
				341
				342
				343	class NpuConv2DOperation(NpuBlockOperation):
				344	"""
				345	NPU_OP_CONV operation
				346	"""
				347
				348	def __init__(self):
				349	super().__init__(NpuOperationType.Conv2D)
				350	# Block traversal must be consistent with the block_traversal parameter specified in
				351	# weight_compressor.encode_weights()
				352	self.block_traversal: NpuBlockTraversal = NpuBlockTraversal.PART_KERNEL_FIRST
				353
				354
				355	class NpuConvDepthWiseOperation(NpuBlockOperation):
				356	"""
				357	NPU_OP_DEPTHWISE operation
				358	"""
				359
				360	def __init__(self):
				361	super().__init__(NpuOperationType.ConvDepthWise)
				362
				363
				364	class NpuPoolingOperation(NpuBlockOperation):
				365	"""
				366	NPU_OP_POOL operation
				367	"""
				368
				369	def __init__(self, pooling_op_type: NpuPoolingOp):
				370	super().__init__(NpuOperationType.Pooling)
				371	self.sub_op_type: NpuPoolingOp = pooling_op_type
				372	# Set to a float value for ResizeBilinear operations (affects scaling), else to None
				373	self.rescale: Optional[float] = None
				374
				375
				376	class NpuElementWiseOperation(NpuBlockOperation):
				377	"""
				378	NPU_OP_ELEMENTWISE operation
				379	"""
				380
				381	def __init__(self, elementwise_op_type: NpuElementWiseOp):
				382	super().__init__(NpuOperationType.ElementWise)
				383	self.sub_op_type: NpuElementWiseOp = elementwise_op_type
				384	# Set to True for binary operators where IFM2 should be used as first operand
				385	self.reversed_operands: bool = False
				386	# Set to a tuple (scale, shift) for explicit rescale, else to None
				387	self.rescale: Optional[Tuple] = None
Patrik Gustavsson	c8a22f1	2020-11-18 17:05:50 +0100	[diff] [blame]	388
				389
Louis Verhaard	11831ce	2020-11-18 18:53:24 +0100	[diff] [blame]	390	def npu_get_api_version():
Patrik Gustavsson	c8a22f1	2020-11-18 17:05:50 +0100	[diff] [blame]	391	"""
				392	Public facing API to get the API version
				393	:return: int, the 16 most significant bits, corresponding to major version
				394	the 16 least significant bits, corresponding to minor version
				395	"""
Louis Verhaard	11831ce	2020-11-18 18:53:24 +0100	[diff] [blame]	396	version = (API_VERSION_MAJOR << 16) \| (API_VERSION_MINOR & 0xFFFF)
Patrik Gustavsson	c8a22f1	2020-11-18 17:05:50 +0100	[diff] [blame]	397	return version
Louis Verhaard	aeae567	2020-11-02 18:04:27 +0100	[diff] [blame]	398
				399
				400	def npu_encode_weights(
				401	accelerator: NpuAccelerator,
				402	weights_volume: numpy.ndarray,
				403	dilation_xy: Tuple[int, int],
				404	ifm_bitdepth: int,
				405	ofm_block_depth: int,
				406	is_depthwise: bool,
				407	block_traversal: NpuBlockTraversal,
				408	):
				409	"""
				410	Public facing API to use the Ethos-U weight encoding.
				411
				412	:param accelerator: NpuAccelerator enum to pick the correct accelerator
				413	:param weights_volume: numpy.ndarray in OHWI layout with a shape of four
				414	:param dilation_xy: a two element tuple of dilation attributes in x,y dimension
				415	:param ifm_bitdepth: the bitdepth of input feature map
				416	:param ofm_block_depth: the depth of blocks for processing
				417	:param is_depthwise: a boolean indicating these weights are used for a depthwise traversal
				418	:param block_traversal: indicates how these weights are traversed on sub-kernel basis
Fredrik Svedberg	f5c07c4	2021-04-23 14:36:42 +0200	[diff] [blame^]	419	:return: a bytearray of encoded weights
Louis Verhaard	aeae567	2020-11-02 18:04:27 +0100	[diff] [blame]	420	"""
				421	from .architecture_features import Accelerator
				422	from . import weight_compressor
				423
				424	acc = Accelerator.from_npu_accelerator(accelerator)
Fredrik Svedberg	f5c07c4	2021-04-23 14:36:42 +0200	[diff] [blame^]	425	encoded_weights, _ = weight_compressor.encode_weights(
Louis Verhaard	aeae567	2020-11-02 18:04:27 +0100	[diff] [blame]	426	acc, weights_volume, dilation_xy, ifm_bitdepth, ofm_block_depth, is_depthwise, block_traversal
				427	)
Fredrik Svedberg	f5c07c4	2021-04-23 14:36:42 +0200	[diff] [blame^]	428	return encoded_weights
Louis Verhaard	aeae567	2020-11-02 18:04:27 +0100	[diff] [blame]	429
				430
				431	def npu_encode_bias(bias: numpy.int64, scale: int, shift: int):
				432	"""
				433	Public facing API to pack bias and scale values as required by the hardware
				434	:param bias: 64-bit signed number that includes 40-bit signed bias
				435	:param scale: 32-bit scale value
				436	:param shift: 6-bit shift value
				437	:return: packed 80-bit [0(2-bits),shift(6-bits),scale(32-bits),bias(40-bits)]
				438	"""
				439	from . import weight_compressor
				440
				441	return weight_compressor.encode_bias(bias, scale, shift)
				442
				443
Louis Verhaard	933f55e	2020-11-25 14:10:30 +0100	[diff] [blame]	444	def npu_find_block_configs(npu_op: NpuOperation, accelerator: NpuAccelerator) -> List[NpuShape3D]:
				445	"""
				446	Public facing API that returns a list of block configs that are valid for the given operation.
				447	This function can be used to find a valid value for npu_op.block_config.
				448	The block config is the unit of work in which the NPU generates the OFM.
				449	"""
				450	from . import register_command_stream_generator
				451
				452	return register_command_stream_generator.find_block_configs(npu_op, accelerator)
				453
				454
Louis Verhaard	aeae567	2020-11-02 18:04:27 +0100	[diff] [blame]	455	def npu_generate_register_command_stream(npu_op_list: List[NpuOperation], accelerator: NpuAccelerator) -> List[int]:
				456	"""
				457	Public facing API for generating an Ethos-U register command stream.
				458	Calculates dependencies between commands and inserts wait operations if needed.
				459
				460	:param npu_op_list: List[NpuOperation] list of high level NPU operations
				461	:param accelerator: NpuAccelerator enum to pick the correct accelerator
				462	:return register commands, as a list of 32-bit integers
				463	"""
				464	from . import register_command_stream_generator
				465
				466	return register_command_stream_generator.generate_register_command_stream(npu_op_list, accelerator)
Louis Verhaard	5207830	2020-11-18 13:35:06 +0100	[diff] [blame]	467
				468
				469	def npu_create_driver_payload(register_command_stream: List[int], accelerator: NpuAccelerator) -> bytes:
				470	"""
				471	Public facing API for generating driver payload, containing a driver header
				472	and the given Ethos-U register command stream.
				473	Returns the payload, in little endian format, which must be placed in memory on a 16-byte aligned
				474	address.
				475
				476	:param register_command_stream: List[int] register commands, as a list of 32-bit integers
				477	:param accelerator: NpuAccelerator enum to pick the correct accelerator
				478	:return driver payload, as a byte array
				479	"""
				480	from . import driver_actions
				481
				482	return driver_actions.npu_create_driver_payload(register_command_stream, accelerator)