blob: e91c0bdb0043eec801642ef450dd3fbecc5aa2c3 [file] [log] [blame]
Louis Verhaarde8a5a782020-11-02 18:04:27 +01001# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17# Description:
Louis Verhaardaeae5672020-11-02 18:04:27 +010018# Contains external APIs
Louis Verhaarde8a5a782020-11-02 18:04:27 +010019from enum import auto
20from enum import Enum
21from typing import List
22from typing import NamedTuple
23from typing import Optional
24from typing import Tuple
25
Louis Verhaardaeae5672020-11-02 18:04:27 +010026import numpy
27
Louis Verhaard11831ce2020-11-18 18:53:24 +010028API_VERSION_MAJOR = 1
29API_VERSION_MINOR = 0
30API_VERSION = f"{API_VERSION_MAJOR}.{API_VERSION_MINOR}"
Patrik Gustavssonc8a22f12020-11-18 17:05:50 +010031
Louis Verhaarde8a5a782020-11-02 18:04:27 +010032
Louis Verhaardaeae5672020-11-02 18:04:27 +010033class NpuAccelerator(Enum):
34 """
35 Supported accelerators
36 """
37
38 Ethos_U55_32 = auto()
39 Ethos_U55_64 = auto()
40 Ethos_U55_128 = auto()
41 Ethos_U55_256 = auto()
42 Ethos_U65_256 = auto()
43 Ethos_U65_512 = auto()
44
45
Louis Verhaarde8a5a782020-11-02 18:04:27 +010046class NpuElementWiseOp(Enum):
47 """
48 Elementwise operation
49 """
50
51 ADD = auto()
52 SUB = auto()
53 MUL = auto()
54 ABS = auto()
55 MIN = auto()
56 MAX = auto()
57 LRELU = auto() # Leaky relu
58 CLZ = auto() # Number leading zeros
59 SHR = auto() # Rounded right-shift
60 SHL = auto() # Bitwise shift-left
61
62
63class NpuPoolingOp(Enum):
64 """
65 Pooling operation
66 """
67
68 MAX = auto()
69 AVERAGE = auto()
70 REDUCE_SUM = auto()
71
72
73class NpuActivationOp(Enum):
74 """
75 Activation function
76 """
77
78 NONE_OR_RELU = auto() # Clamps output using min/max
79 TANH = auto()
80 SIGMOID = auto()
81 TABLE_LOOKUP = auto() # Performs table look-up, using the provided table lookup index
82
83
84class NpuRoundingMode(Enum):
85 """
86 Available rounding modes
87 """
88
89 TFL = auto() # TensorFlow Lite rounding
90 TRUNCATE = auto() # Truncate towards zero
91 NATURAL = auto() # Round to nearest with x.5 rounded up, towards +infinity
92
93
94class NpuLayout(Enum):
95 """
96 Tensor layout of feature maps
97 """
98
99 NHWC = auto()
100 NHCWB16 = auto()
101
102 def __str__(self):
103 return self.name
104
105
106class NpuResamplingMode(Enum):
107 """
108 Resampling mode
109 """
110
111 NONE = auto() # No resampling is performed
112 NEAREST = auto() # 2x2 insert nearest
113 TRANSPOSE = auto() # 2x2 transpose
114
115
116class NpuBlockTraversal(Enum):
117 """
118 Block-traversal of weights
119 """
120
121 DEPTH_FIRST = auto()
122 PART_KERNEL_FIRST = auto()
123
124
125class NpuDataType(Enum):
126 """
127 Supported data types in feature maps
128 """
129
130 UINT8 = 8, False, auto()
131 INT8 = 8, True, auto()
132 UINT16 = 16, False, auto()
133 INT16 = 16, True, auto()
134 INT32 = 32, True, auto()
135
136 def is_signed(self) -> bool:
137 """Checks if this data type is signed or unsigned"""
138 return self.value[1]
139
140 def size_in_bits(self) -> int:
141 """ Size of the data type in bits"""
142 return self.value[0]
143
144 def size_in_bytes(self) -> int:
145 """ Size of the data type in bytes"""
146 return self.value[0] // 8
147
148 def min_value(self) -> int:
149 """Minimum value of this type"""
150 if self.is_signed():
151 return -(1 << (self.size_in_bits() - 1))
152 else:
153 return 0
154
155 def max_value(self) -> int:
156 """Maximum value of this type"""
157 if self.is_signed():
158 return (1 << (self.size_in_bits() - 1)) - 1
159 else:
160 return (1 << self.size_in_bits()) - 1
161
162 def __str__(self):
163 return self.name
164
165 __repr__ = __str__
166
167
168class NpuAddressRange(NamedTuple):
169 """
170 Address range
171 """
172
173 region: int # Memory region, a value between 0 and 7
174 address: int # Address, offset from the region's base address
175 length: int # The length of the range, in bytes
176
177 def __str__(self):
178 return f"(region={self.region}, address={hex(self.address)}, length={self.length})"
179
180
181class NpuTileBox(NamedTuple):
182 """
183 Specifies the addresses and dimensions of the tiles of a feature map.
184 A feature map can use 1 to 4 tiles
185 """
186
187 height_0: int # The height of tile 0
188 height_1: int # The height of tile 1, 0 if unused
189 width_0: int # the width of tile 0, and tile 2 (if used)
190 addresses: List[int] # A list of 4 addresses, set unused addresses to 0
191
192
193class NpuShape3D(NamedTuple):
194 """
195 Shape of (part of) a feature map
196 """
197
198 height: int
199 width: int
200 depth: int
201
202
203class NpuQuantization(NamedTuple):
204 """
205 Quantization parameters
206 """
207
208 scale_f32: Optional[float]
209 zero_point: int
210
211
212class NpuPadding(NamedTuple):
213 """
214 Padding to be applied to a convolution operation
215 """
216
217 top: int
218 left: int
219 bottom: int
220 right: int
221
222
223class NpuActivation:
224 """
225 Activation function, fused with NPU operations
226 """
227
228 def __init__(self, op_type: NpuActivationOp):
229 self.op_type = op_type # The activation operation to be performed
230 # min/max are optional
231 self.min: Optional[float] = None # E.g. set to 0.0 for RELU
232 self.max: Optional[float] = None # E.g. set to 6.0 for RELU6
233 # Table lookup index, only applicable for TABLE_LOOKUP activation, 0-7
234 self.lookup_table_index: int = 0
235
236
237class NpuFeatureMap:
238 """
239 Basic information about IFM, IFM2, OFM
240 """
241
242 def __init__(self):
243 self.data_type: NpuDataType = NpuDataType.UINT8
244 # The memory region, a value 0-7
245 self.region: int = 0
246 # Shape of the feature map
247 self.shape: NpuShape3D = NpuShape3D(height=0, width=0, depth=0)
248 # The tiles that comprise the feature map. In the normal case when only 1 tile is used,
249 # height_0 == self.shape.height, height_1 is 0, width_0 == self.shape.width, addresses[1:] are set to 0
250 self.tiles: NpuTileBox = NpuTileBox(height_0=0, height_1=0, width_0=0, addresses=[0, 0, 0, 0])
251 self.quantization: Optional[NpuQuantization]
252 self.layout: NpuLayout = NpuLayout.NHWC
253 # x/y/c strides used by the NPU when traversing the feature map, if None, vela will use default strides
254 self.strides: Optional[NpuShape3D] = None
255
256
257class NpuKernel:
258 """
259 Kernel information for NPU operations
260 """
261
262 def __init__(self, w: int, h: int, stride_x: int = 1, stride_y: int = 1, dilation_x: int = 1, dilation_y: int = 1):
263 assert stride_x > 0 and stride_y > 0
264 assert dilation_x > 0 and dilation_y > 0
265 self.width = w
266 self.height = h
267 self.stride_x = stride_x
268 self.stride_y = stride_y
269 self.dilation_x = dilation_x
270 self.dilation_y = dilation_y
271
272
273class NpuOperationType(Enum):
274 """
275 Type of NPU operation
276 """
277
278 Dma = auto()
279 Conv2D = auto()
280 ConvDepthWise = auto()
281 Pooling = auto()
282 ElementWise = auto()
283
284
285class NpuOperation:
286 """
287 Base class for all NPU operations
288 """
289
290 def __init__(self, op_type: NpuOperationType):
291 self.op_type = op_type
292
293
294class NpuDmaOperation(NpuOperation):
295 """
296 DMA operation
297 """
298
299 def __init__(self, src: NpuAddressRange, dest: NpuAddressRange):
300 super().__init__(NpuOperationType.Dma)
301 self.src = src
302 self.dest = dest
303 # DMA channel, usually 0 (user channel)
304 self.channel: int = 0
305 # Channel mode, 0 = external, 1 = internal (should usually be 0)
306 self.mode: int = 0
307
308
309class NpuBlockOperation(NpuOperation):
310 """
311 Base class for operations which produce an OFM
312 """
313
314 def __init__(self, op_type: NpuOperationType):
315 super().__init__(op_type)
316 self.ifm: Optional[NpuFeatureMap] = None
317 self.ifm2: Optional[NpuFeatureMap] = None
318 # The non-quantized scalar value in a binary elementwise operation. Only set if IFM2 is scalar
319 self.ifm2_scalar: Optional[float] = None
320 self.ofm: Optional[NpuFeatureMap] = None
321 self.kernel: Optional[NpuKernel] = None
322 # Weights, one element for each NPU core, empty if no weights are used.
Louis Verhaard933f55e2020-11-25 14:10:30 +0100323 # Must have been compressed using npu_encode_weights()
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100324 self.weights: List[NpuAddressRange] = []
325 # Biases, one element for each NPU core, empty if no bias is used.
Louis Verhaard933f55e2020-11-25 14:10:30 +0100326 # Must have been encoded using npu_encode_bias()
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100327 self.biases: List[NpuAddressRange] = []
328 self.padding: Optional[NpuPadding] = None
329 # Optional activation function to be applied
330 self.activation: Optional[NpuActivation] = None
Louis Verhaard933f55e2020-11-25 14:10:30 +0100331 # The block config to be used, which must be valid for the given operation.
332 # See also npu_find_block_configs.
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100333 # If the operation has weights, the depth of the block config must be the same as
Louis Verhaard933f55e2020-11-25 14:10:30 +0100334 # the ofm depth used in the call to npu_encode_weights()
335 self.block_config: NpuShape3D
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100336 self.rounding_mode: NpuRoundingMode = NpuRoundingMode.TFL
337 # Set to True if the operations is fused with a Quantize operation (affects scaling)
338 self.fused_quantize: bool = False
339 # IFM upscaling to be applied
340 self.ifm_upscale: NpuResamplingMode = NpuResamplingMode.NONE
341
342
343class NpuConv2DOperation(NpuBlockOperation):
344 """
345 NPU_OP_CONV operation
346 """
347
348 def __init__(self):
349 super().__init__(NpuOperationType.Conv2D)
350 # Block traversal must be consistent with the block_traversal parameter specified in
351 # weight_compressor.encode_weights()
352 self.block_traversal: NpuBlockTraversal = NpuBlockTraversal.PART_KERNEL_FIRST
353
354
355class NpuConvDepthWiseOperation(NpuBlockOperation):
356 """
357 NPU_OP_DEPTHWISE operation
358 """
359
360 def __init__(self):
361 super().__init__(NpuOperationType.ConvDepthWise)
362
363
364class NpuPoolingOperation(NpuBlockOperation):
365 """
366 NPU_OP_POOL operation
367 """
368
369 def __init__(self, pooling_op_type: NpuPoolingOp):
370 super().__init__(NpuOperationType.Pooling)
371 self.sub_op_type: NpuPoolingOp = pooling_op_type
372 # Set to a float value for ResizeBilinear operations (affects scaling), else to None
373 self.rescale: Optional[float] = None
374
375
376class NpuElementWiseOperation(NpuBlockOperation):
377 """
378 NPU_OP_ELEMENTWISE operation
379 """
380
381 def __init__(self, elementwise_op_type: NpuElementWiseOp):
382 super().__init__(NpuOperationType.ElementWise)
383 self.sub_op_type: NpuElementWiseOp = elementwise_op_type
384 # Set to True for binary operators where IFM2 should be used as first operand
385 self.reversed_operands: bool = False
386 # Set to a tuple (scale, shift) for explicit rescale, else to None
387 self.rescale: Optional[Tuple] = None
Patrik Gustavssonc8a22f12020-11-18 17:05:50 +0100388
389
Louis Verhaard11831ce2020-11-18 18:53:24 +0100390def npu_get_api_version():
Patrik Gustavssonc8a22f12020-11-18 17:05:50 +0100391 """
392 Public facing API to get the API version
393 :return: int, the 16 most significant bits, corresponding to major version
394 the 16 least significant bits, corresponding to minor version
395 """
Louis Verhaard11831ce2020-11-18 18:53:24 +0100396 version = (API_VERSION_MAJOR << 16) | (API_VERSION_MINOR & 0xFFFF)
Patrik Gustavssonc8a22f12020-11-18 17:05:50 +0100397 return version
Louis Verhaardaeae5672020-11-02 18:04:27 +0100398
399
400def npu_encode_weights(
401 accelerator: NpuAccelerator,
402 weights_volume: numpy.ndarray,
403 dilation_xy: Tuple[int, int],
404 ifm_bitdepth: int,
405 ofm_block_depth: int,
406 is_depthwise: bool,
407 block_traversal: NpuBlockTraversal,
408):
409 """
410 Public facing API to use the Ethos-U weight encoding.
411
412 :param accelerator: NpuAccelerator enum to pick the correct accelerator
413 :param weights_volume: numpy.ndarray in OHWI layout with a shape of four
414 :param dilation_xy: a two element tuple of dilation attributes in x,y dimension
415 :param ifm_bitdepth: the bitdepth of input feature map
416 :param ofm_block_depth: the depth of blocks for processing
417 :param is_depthwise: a boolean indicating these weights are used for a depthwise traversal
418 :param block_traversal: indicates how these weights are traversed on sub-kernel basis
Fredrik Svedbergf5c07c42021-04-23 14:36:42 +0200419 :return: a bytearray of encoded weights
Louis Verhaardaeae5672020-11-02 18:04:27 +0100420 """
421 from .architecture_features import Accelerator
422 from . import weight_compressor
423
424 acc = Accelerator.from_npu_accelerator(accelerator)
Fredrik Svedbergf5c07c42021-04-23 14:36:42 +0200425 encoded_weights, _ = weight_compressor.encode_weights(
Louis Verhaardaeae5672020-11-02 18:04:27 +0100426 acc, weights_volume, dilation_xy, ifm_bitdepth, ofm_block_depth, is_depthwise, block_traversal
427 )
Fredrik Svedbergf5c07c42021-04-23 14:36:42 +0200428 return encoded_weights
Louis Verhaardaeae5672020-11-02 18:04:27 +0100429
430
431def npu_encode_bias(bias: numpy.int64, scale: int, shift: int):
432 """
433 Public facing API to pack bias and scale values as required by the hardware
434 :param bias: 64-bit signed number that includes 40-bit signed bias
435 :param scale: 32-bit scale value
436 :param shift: 6-bit shift value
437 :return: packed 80-bit [0(2-bits),shift(6-bits),scale(32-bits),bias(40-bits)]
438 """
439 from . import weight_compressor
440
441 return weight_compressor.encode_bias(bias, scale, shift)
442
443
Louis Verhaard933f55e2020-11-25 14:10:30 +0100444def npu_find_block_configs(npu_op: NpuOperation, accelerator: NpuAccelerator) -> List[NpuShape3D]:
445 """
446 Public facing API that returns a list of block configs that are valid for the given operation.
447 This function can be used to find a valid value for npu_op.block_config.
448 The block config is the unit of work in which the NPU generates the OFM.
449 """
450 from . import register_command_stream_generator
451
452 return register_command_stream_generator.find_block_configs(npu_op, accelerator)
453
454
Louis Verhaardaeae5672020-11-02 18:04:27 +0100455def npu_generate_register_command_stream(npu_op_list: List[NpuOperation], accelerator: NpuAccelerator) -> List[int]:
456 """
457 Public facing API for generating an Ethos-U register command stream.
458 Calculates dependencies between commands and inserts wait operations if needed.
459
460 :param npu_op_list: List[NpuOperation] list of high level NPU operations
461 :param accelerator: NpuAccelerator enum to pick the correct accelerator
462 :return register commands, as a list of 32-bit integers
463 """
464 from . import register_command_stream_generator
465
466 return register_command_stream_generator.generate_register_command_stream(npu_op_list, accelerator)
Louis Verhaard52078302020-11-18 13:35:06 +0100467
468
469def npu_create_driver_payload(register_command_stream: List[int], accelerator: NpuAccelerator) -> bytes:
470 """
471 Public facing API for generating driver payload, containing a driver header
472 and the given Ethos-U register command stream.
473 Returns the payload, in little endian format, which must be placed in memory on a 16-byte aligned
474 address.
475
476 :param register_command_stream: List[int] register commands, as a list of 32-bit integers
477 :param accelerator: NpuAccelerator enum to pick the correct accelerator
478 :return driver payload, as a byte array
479 """
480 from . import driver_actions
481
482 return driver_actions.npu_create_driver_payload(register_command_stream, accelerator)