Louis Verhaard | e8a5a78 | 2020-11-02 18:04:27 +0100 | [diff] [blame^] | 1 | # Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved. |
| 2 | # |
| 3 | # SPDX-License-Identifier: Apache-2.0 |
| 4 | # |
| 5 | # Licensed under the Apache License, Version 2.0 (the License); you may |
| 6 | # not use this file except in compliance with the License. |
| 7 | # You may obtain a copy of the License at |
| 8 | # |
| 9 | # www.apache.org/licenses/LICENSE-2.0 |
| 10 | # |
| 11 | # Unless required by applicable law or agreed to in writing, software |
| 12 | # distributed under the License is distributed on an AS IS BASIS, WITHOUT |
| 13 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | # See the License for the specific language governing permissions and |
| 15 | # limitations under the License. |
| 16 | # |
| 17 | # Description: |
| 18 | # Contains data types used in the external API for code generation |
| 19 | from enum import auto |
| 20 | from enum import Enum |
| 21 | from typing import List |
| 22 | from typing import NamedTuple |
| 23 | from typing import Optional |
| 24 | from typing import Tuple |
| 25 | |
| 26 | |
| 27 | class NpuElementWiseOp(Enum): |
| 28 | """ |
| 29 | Elementwise operation |
| 30 | """ |
| 31 | |
| 32 | ADD = auto() |
| 33 | SUB = auto() |
| 34 | MUL = auto() |
| 35 | ABS = auto() |
| 36 | MIN = auto() |
| 37 | MAX = auto() |
| 38 | LRELU = auto() # Leaky relu |
| 39 | CLZ = auto() # Number leading zeros |
| 40 | SHR = auto() # Rounded right-shift |
| 41 | SHL = auto() # Bitwise shift-left |
| 42 | |
| 43 | |
| 44 | class NpuPoolingOp(Enum): |
| 45 | """ |
| 46 | Pooling operation |
| 47 | """ |
| 48 | |
| 49 | MAX = auto() |
| 50 | AVERAGE = auto() |
| 51 | REDUCE_SUM = auto() |
| 52 | |
| 53 | |
| 54 | class NpuActivationOp(Enum): |
| 55 | """ |
| 56 | Activation function |
| 57 | """ |
| 58 | |
| 59 | NONE_OR_RELU = auto() # Clamps output using min/max |
| 60 | TANH = auto() |
| 61 | SIGMOID = auto() |
| 62 | TABLE_LOOKUP = auto() # Performs table look-up, using the provided table lookup index |
| 63 | |
| 64 | |
| 65 | class NpuRoundingMode(Enum): |
| 66 | """ |
| 67 | Available rounding modes |
| 68 | """ |
| 69 | |
| 70 | TFL = auto() # TensorFlow Lite rounding |
| 71 | TRUNCATE = auto() # Truncate towards zero |
| 72 | NATURAL = auto() # Round to nearest with x.5 rounded up, towards +infinity |
| 73 | |
| 74 | |
| 75 | class NpuLayout(Enum): |
| 76 | """ |
| 77 | Tensor layout of feature maps |
| 78 | """ |
| 79 | |
| 80 | NHWC = auto() |
| 81 | NHCWB16 = auto() |
| 82 | |
| 83 | def __str__(self): |
| 84 | return self.name |
| 85 | |
| 86 | |
| 87 | class NpuResamplingMode(Enum): |
| 88 | """ |
| 89 | Resampling mode |
| 90 | """ |
| 91 | |
| 92 | NONE = auto() # No resampling is performed |
| 93 | NEAREST = auto() # 2x2 insert nearest |
| 94 | TRANSPOSE = auto() # 2x2 transpose |
| 95 | |
| 96 | |
| 97 | class NpuBlockTraversal(Enum): |
| 98 | """ |
| 99 | Block-traversal of weights |
| 100 | """ |
| 101 | |
| 102 | DEPTH_FIRST = auto() |
| 103 | PART_KERNEL_FIRST = auto() |
| 104 | |
| 105 | |
| 106 | class NpuDataType(Enum): |
| 107 | """ |
| 108 | Supported data types in feature maps |
| 109 | """ |
| 110 | |
| 111 | UINT8 = 8, False, auto() |
| 112 | INT8 = 8, True, auto() |
| 113 | UINT16 = 16, False, auto() |
| 114 | INT16 = 16, True, auto() |
| 115 | INT32 = 32, True, auto() |
| 116 | |
| 117 | def is_signed(self) -> bool: |
| 118 | """Checks if this data type is signed or unsigned""" |
| 119 | return self.value[1] |
| 120 | |
| 121 | def size_in_bits(self) -> int: |
| 122 | """ Size of the data type in bits""" |
| 123 | return self.value[0] |
| 124 | |
| 125 | def size_in_bytes(self) -> int: |
| 126 | """ Size of the data type in bytes""" |
| 127 | return self.value[0] // 8 |
| 128 | |
| 129 | def min_value(self) -> int: |
| 130 | """Minimum value of this type""" |
| 131 | if self.is_signed(): |
| 132 | return -(1 << (self.size_in_bits() - 1)) |
| 133 | else: |
| 134 | return 0 |
| 135 | |
| 136 | def max_value(self) -> int: |
| 137 | """Maximum value of this type""" |
| 138 | if self.is_signed(): |
| 139 | return (1 << (self.size_in_bits() - 1)) - 1 |
| 140 | else: |
| 141 | return (1 << self.size_in_bits()) - 1 |
| 142 | |
| 143 | def __str__(self): |
| 144 | return self.name |
| 145 | |
| 146 | __repr__ = __str__ |
| 147 | |
| 148 | |
| 149 | class NpuAddressRange(NamedTuple): |
| 150 | """ |
| 151 | Address range |
| 152 | """ |
| 153 | |
| 154 | region: int # Memory region, a value between 0 and 7 |
| 155 | address: int # Address, offset from the region's base address |
| 156 | length: int # The length of the range, in bytes |
| 157 | |
| 158 | def __str__(self): |
| 159 | return f"(region={self.region}, address={hex(self.address)}, length={self.length})" |
| 160 | |
| 161 | |
| 162 | class NpuTileBox(NamedTuple): |
| 163 | """ |
| 164 | Specifies the addresses and dimensions of the tiles of a feature map. |
| 165 | A feature map can use 1 to 4 tiles |
| 166 | """ |
| 167 | |
| 168 | height_0: int # The height of tile 0 |
| 169 | height_1: int # The height of tile 1, 0 if unused |
| 170 | width_0: int # the width of tile 0, and tile 2 (if used) |
| 171 | addresses: List[int] # A list of 4 addresses, set unused addresses to 0 |
| 172 | |
| 173 | |
| 174 | class NpuShape3D(NamedTuple): |
| 175 | """ |
| 176 | Shape of (part of) a feature map |
| 177 | """ |
| 178 | |
| 179 | height: int |
| 180 | width: int |
| 181 | depth: int |
| 182 | |
| 183 | |
| 184 | class NpuQuantization(NamedTuple): |
| 185 | """ |
| 186 | Quantization parameters |
| 187 | """ |
| 188 | |
| 189 | scale_f32: Optional[float] |
| 190 | zero_point: int |
| 191 | |
| 192 | |
| 193 | class NpuPadding(NamedTuple): |
| 194 | """ |
| 195 | Padding to be applied to a convolution operation |
| 196 | """ |
| 197 | |
| 198 | top: int |
| 199 | left: int |
| 200 | bottom: int |
| 201 | right: int |
| 202 | |
| 203 | |
| 204 | class NpuActivation: |
| 205 | """ |
| 206 | Activation function, fused with NPU operations |
| 207 | """ |
| 208 | |
| 209 | def __init__(self, op_type: NpuActivationOp): |
| 210 | self.op_type = op_type # The activation operation to be performed |
| 211 | # min/max are optional |
| 212 | self.min: Optional[float] = None # E.g. set to 0.0 for RELU |
| 213 | self.max: Optional[float] = None # E.g. set to 6.0 for RELU6 |
| 214 | # Table lookup index, only applicable for TABLE_LOOKUP activation, 0-7 |
| 215 | self.lookup_table_index: int = 0 |
| 216 | |
| 217 | |
| 218 | class NpuFeatureMap: |
| 219 | """ |
| 220 | Basic information about IFM, IFM2, OFM |
| 221 | """ |
| 222 | |
| 223 | def __init__(self): |
| 224 | self.data_type: NpuDataType = NpuDataType.UINT8 |
| 225 | # The memory region, a value 0-7 |
| 226 | self.region: int = 0 |
| 227 | # Shape of the feature map |
| 228 | self.shape: NpuShape3D = NpuShape3D(height=0, width=0, depth=0) |
| 229 | # The tiles that comprise the feature map. In the normal case when only 1 tile is used, |
| 230 | # height_0 == self.shape.height, height_1 is 0, width_0 == self.shape.width, addresses[1:] are set to 0 |
| 231 | self.tiles: NpuTileBox = NpuTileBox(height_0=0, height_1=0, width_0=0, addresses=[0, 0, 0, 0]) |
| 232 | self.quantization: Optional[NpuQuantization] |
| 233 | self.layout: NpuLayout = NpuLayout.NHWC |
| 234 | # x/y/c strides used by the NPU when traversing the feature map, if None, vela will use default strides |
| 235 | self.strides: Optional[NpuShape3D] = None |
| 236 | |
| 237 | |
| 238 | class NpuKernel: |
| 239 | """ |
| 240 | Kernel information for NPU operations |
| 241 | """ |
| 242 | |
| 243 | def __init__(self, w: int, h: int, stride_x: int = 1, stride_y: int = 1, dilation_x: int = 1, dilation_y: int = 1): |
| 244 | assert stride_x > 0 and stride_y > 0 |
| 245 | assert dilation_x > 0 and dilation_y > 0 |
| 246 | self.width = w |
| 247 | self.height = h |
| 248 | self.stride_x = stride_x |
| 249 | self.stride_y = stride_y |
| 250 | self.dilation_x = dilation_x |
| 251 | self.dilation_y = dilation_y |
| 252 | |
| 253 | |
| 254 | class NpuOperationType(Enum): |
| 255 | """ |
| 256 | Type of NPU operation |
| 257 | """ |
| 258 | |
| 259 | Dma = auto() |
| 260 | Conv2D = auto() |
| 261 | ConvDepthWise = auto() |
| 262 | Pooling = auto() |
| 263 | ElementWise = auto() |
| 264 | |
| 265 | |
| 266 | class NpuOperation: |
| 267 | """ |
| 268 | Base class for all NPU operations |
| 269 | """ |
| 270 | |
| 271 | def __init__(self, op_type: NpuOperationType): |
| 272 | self.op_type = op_type |
| 273 | |
| 274 | |
| 275 | class NpuDmaOperation(NpuOperation): |
| 276 | """ |
| 277 | DMA operation |
| 278 | """ |
| 279 | |
| 280 | def __init__(self, src: NpuAddressRange, dest: NpuAddressRange): |
| 281 | super().__init__(NpuOperationType.Dma) |
| 282 | self.src = src |
| 283 | self.dest = dest |
| 284 | # DMA channel, usually 0 (user channel) |
| 285 | self.channel: int = 0 |
| 286 | # Channel mode, 0 = external, 1 = internal (should usually be 0) |
| 287 | self.mode: int = 0 |
| 288 | |
| 289 | |
| 290 | class NpuBlockOperation(NpuOperation): |
| 291 | """ |
| 292 | Base class for operations which produce an OFM |
| 293 | """ |
| 294 | |
| 295 | def __init__(self, op_type: NpuOperationType): |
| 296 | super().__init__(op_type) |
| 297 | self.ifm: Optional[NpuFeatureMap] = None |
| 298 | self.ifm2: Optional[NpuFeatureMap] = None |
| 299 | # The non-quantized scalar value in a binary elementwise operation. Only set if IFM2 is scalar |
| 300 | self.ifm2_scalar: Optional[float] = None |
| 301 | self.ofm: Optional[NpuFeatureMap] = None |
| 302 | self.kernel: Optional[NpuKernel] = None |
| 303 | # Weights, one element for each NPU core, empty if no weights are used. |
| 304 | # Must have been compressed using weight_compressor.encode_weights() |
| 305 | self.weights: List[NpuAddressRange] = [] |
| 306 | # Biases, one element for each NPU core, empty if no bias is used. |
| 307 | # Must have been encoded using weight_compressor.encode_bias() |
| 308 | self.biases: List[NpuAddressRange] = [] |
| 309 | self.padding: Optional[NpuPadding] = None |
| 310 | # Optional activation function to be applied |
| 311 | self.activation: Optional[NpuActivation] = None |
| 312 | # The block config is the unit of work in which the NPU generates the OFM. |
| 313 | # If the operation has weights, the depth of the block config must be the same as |
| 314 | # the ofm depth used in the call to weight_compressor.encode_weights() |
| 315 | # If set to None, vela will determine a suitable block size (can only be used if there are no weights) |
| 316 | # If block_config.width and height are set to -1, vela will determine suitable width/height |
| 317 | self.block_config: Optional[NpuShape3D] = None # OFM_BLK parameters |
| 318 | self.rounding_mode: NpuRoundingMode = NpuRoundingMode.TFL |
| 319 | # Set to True if the operations is fused with a Quantize operation (affects scaling) |
| 320 | self.fused_quantize: bool = False |
| 321 | # IFM upscaling to be applied |
| 322 | self.ifm_upscale: NpuResamplingMode = NpuResamplingMode.NONE |
| 323 | |
| 324 | |
| 325 | class NpuConv2DOperation(NpuBlockOperation): |
| 326 | """ |
| 327 | NPU_OP_CONV operation |
| 328 | """ |
| 329 | |
| 330 | def __init__(self): |
| 331 | super().__init__(NpuOperationType.Conv2D) |
| 332 | # Block traversal must be consistent with the block_traversal parameter specified in |
| 333 | # weight_compressor.encode_weights() |
| 334 | self.block_traversal: NpuBlockTraversal = NpuBlockTraversal.PART_KERNEL_FIRST |
| 335 | |
| 336 | |
| 337 | class NpuConvDepthWiseOperation(NpuBlockOperation): |
| 338 | """ |
| 339 | NPU_OP_DEPTHWISE operation |
| 340 | """ |
| 341 | |
| 342 | def __init__(self): |
| 343 | super().__init__(NpuOperationType.ConvDepthWise) |
| 344 | |
| 345 | |
| 346 | class NpuPoolingOperation(NpuBlockOperation): |
| 347 | """ |
| 348 | NPU_OP_POOL operation |
| 349 | """ |
| 350 | |
| 351 | def __init__(self, pooling_op_type: NpuPoolingOp): |
| 352 | super().__init__(NpuOperationType.Pooling) |
| 353 | self.sub_op_type: NpuPoolingOp = pooling_op_type |
| 354 | # Set to a float value for ResizeBilinear operations (affects scaling), else to None |
| 355 | self.rescale: Optional[float] = None |
| 356 | |
| 357 | |
| 358 | class NpuElementWiseOperation(NpuBlockOperation): |
| 359 | """ |
| 360 | NPU_OP_ELEMENTWISE operation |
| 361 | """ |
| 362 | |
| 363 | def __init__(self, elementwise_op_type: NpuElementWiseOp): |
| 364 | super().__init__(NpuOperationType.ElementWise) |
| 365 | self.sub_op_type: NpuElementWiseOp = elementwise_op_type |
| 366 | # Set to True for binary operators where IFM2 should be used as first operand |
| 367 | self.reversed_operands: bool = False |
| 368 | # Set to a tuple (scale, shift) for explicit rescale, else to None |
| 369 | self.rescale: Optional[Tuple] = None |