MLBEDSW-839: Code generation using external API

Added external API to generate register command streams.

Existing code generation has been refactored to make
use of this API.

Change-Id: Ibb4c2b167809869f16470b14da24f08a65c82b7b
Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
diff --git a/ethosu/vela/api.py b/ethosu/vela/api.py
new file mode 100644
index 0000000..06de0d9
--- /dev/null
+++ b/ethosu/vela/api.py
@@ -0,0 +1,369 @@
+# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Description:
+# Contains data types used in the external API for code generation
+from enum import auto
+from enum import Enum
+from typing import List
+from typing import NamedTuple
+from typing import Optional
+from typing import Tuple
+
+
+class NpuElementWiseOp(Enum):
+    """
+    Elementwise operation
+    """
+
+    ADD = auto()
+    SUB = auto()
+    MUL = auto()
+    ABS = auto()
+    MIN = auto()
+    MAX = auto()
+    LRELU = auto()  # Leaky relu
+    CLZ = auto()  # Number leading zeros
+    SHR = auto()  # Rounded right-shift
+    SHL = auto()  # Bitwise shift-left
+
+
+class NpuPoolingOp(Enum):
+    """
+    Pooling operation
+    """
+
+    MAX = auto()
+    AVERAGE = auto()
+    REDUCE_SUM = auto()
+
+
+class NpuActivationOp(Enum):
+    """
+    Activation function
+    """
+
+    NONE_OR_RELU = auto()  # Clamps output using min/max
+    TANH = auto()
+    SIGMOID = auto()
+    TABLE_LOOKUP = auto()  # Performs table look-up, using the provided table lookup index
+
+
+class NpuRoundingMode(Enum):
+    """
+    Available rounding modes
+    """
+
+    TFL = auto()  # TensorFlow Lite rounding
+    TRUNCATE = auto()  # Truncate towards zero
+    NATURAL = auto()  # Round to nearest with x.5 rounded up, towards +infinity
+
+
+class NpuLayout(Enum):
+    """
+    Tensor layout of feature maps
+    """
+
+    NHWC = auto()
+    NHCWB16 = auto()
+
+    def __str__(self):
+        return self.name
+
+
+class NpuResamplingMode(Enum):
+    """
+    Resampling mode
+    """
+
+    NONE = auto()  # No resampling is performed
+    NEAREST = auto()  # 2x2 insert nearest
+    TRANSPOSE = auto()  # 2x2 transpose
+
+
+class NpuBlockTraversal(Enum):
+    """
+    Block-traversal of weights
+    """
+
+    DEPTH_FIRST = auto()
+    PART_KERNEL_FIRST = auto()
+
+
+class NpuDataType(Enum):
+    """
+    Supported data types in feature maps
+    """
+
+    UINT8 = 8, False, auto()
+    INT8 = 8, True, auto()
+    UINT16 = 16, False, auto()
+    INT16 = 16, True, auto()
+    INT32 = 32, True, auto()
+
+    def is_signed(self) -> bool:
+        """Checks if this data type is signed or unsigned"""
+        return self.value[1]
+
+    def size_in_bits(self) -> int:
+        """ Size of the data type in bits"""
+        return self.value[0]
+
+    def size_in_bytes(self) -> int:
+        """ Size of the data type in bytes"""
+        return self.value[0] // 8
+
+    def min_value(self) -> int:
+        """Minimum value of this type"""
+        if self.is_signed():
+            return -(1 << (self.size_in_bits() - 1))
+        else:
+            return 0
+
+    def max_value(self) -> int:
+        """Maximum value of this type"""
+        if self.is_signed():
+            return (1 << (self.size_in_bits() - 1)) - 1
+        else:
+            return (1 << self.size_in_bits()) - 1
+
+    def __str__(self):
+        return self.name
+
+    __repr__ = __str__
+
+
+class NpuAddressRange(NamedTuple):
+    """
+    Address range
+    """
+
+    region: int  # Memory region, a value between 0 and 7
+    address: int  # Address, offset from the region's base address
+    length: int  # The length of the range, in bytes
+
+    def __str__(self):
+        return f"(region={self.region}, address={hex(self.address)}, length={self.length})"
+
+
+class NpuTileBox(NamedTuple):
+    """
+    Specifies the addresses and dimensions of the tiles of a feature map.
+    A feature map can use 1 to 4 tiles
+    """
+
+    height_0: int  # The height of tile 0
+    height_1: int  # The height of tile 1, 0 if unused
+    width_0: int  # the width of tile 0, and tile 2 (if used)
+    addresses: List[int]  # A list of 4 addresses, set unused addresses to 0
+
+
+class NpuShape3D(NamedTuple):
+    """
+    Shape of (part of) a feature map
+    """
+
+    height: int
+    width: int
+    depth: int
+
+
+class NpuQuantization(NamedTuple):
+    """
+    Quantization parameters
+    """
+
+    scale_f32: Optional[float]
+    zero_point: int
+
+
+class NpuPadding(NamedTuple):
+    """
+    Padding to be applied to a convolution operation
+    """
+
+    top: int
+    left: int
+    bottom: int
+    right: int
+
+
+class NpuActivation:
+    """
+    Activation function, fused with NPU operations
+    """
+
+    def __init__(self, op_type: NpuActivationOp):
+        self.op_type = op_type  # The activation operation to be performed
+        # min/max are optional
+        self.min: Optional[float] = None  # E.g. set to 0.0 for RELU
+        self.max: Optional[float] = None  # E.g. set to 6.0 for RELU6
+        # Table lookup index, only applicable for TABLE_LOOKUP activation, 0-7
+        self.lookup_table_index: int = 0
+
+
+class NpuFeatureMap:
+    """
+    Basic information about IFM, IFM2, OFM
+    """
+
+    def __init__(self):
+        self.data_type: NpuDataType = NpuDataType.UINT8
+        # The memory region, a value 0-7
+        self.region: int = 0
+        # Shape of the feature map
+        self.shape: NpuShape3D = NpuShape3D(height=0, width=0, depth=0)
+        # The tiles that comprise the feature map. In the normal case when only 1 tile is used,
+        # height_0 == self.shape.height, height_1 is 0, width_0 == self.shape.width, addresses[1:] are set to 0
+        self.tiles: NpuTileBox = NpuTileBox(height_0=0, height_1=0, width_0=0, addresses=[0, 0, 0, 0])
+        self.quantization: Optional[NpuQuantization]
+        self.layout: NpuLayout = NpuLayout.NHWC
+        # x/y/c strides used by the NPU when traversing the feature map, if None, vela will use default strides
+        self.strides: Optional[NpuShape3D] = None
+
+
+class NpuKernel:
+    """
+    Kernel information for NPU operations
+    """
+
+    def __init__(self, w: int, h: int, stride_x: int = 1, stride_y: int = 1, dilation_x: int = 1, dilation_y: int = 1):
+        assert stride_x > 0 and stride_y > 0
+        assert dilation_x > 0 and dilation_y > 0
+        self.width = w
+        self.height = h
+        self.stride_x = stride_x
+        self.stride_y = stride_y
+        self.dilation_x = dilation_x
+        self.dilation_y = dilation_y
+
+
+class NpuOperationType(Enum):
+    """
+    Type of NPU operation
+    """
+
+    Dma = auto()
+    Conv2D = auto()
+    ConvDepthWise = auto()
+    Pooling = auto()
+    ElementWise = auto()
+
+
+class NpuOperation:
+    """
+    Base class for all NPU operations
+    """
+
+    def __init__(self, op_type: NpuOperationType):
+        self.op_type = op_type
+
+
+class NpuDmaOperation(NpuOperation):
+    """
+    DMA operation
+    """
+
+    def __init__(self, src: NpuAddressRange, dest: NpuAddressRange):
+        super().__init__(NpuOperationType.Dma)
+        self.src = src
+        self.dest = dest
+        # DMA channel, usually 0 (user channel)
+        self.channel: int = 0
+        # Channel mode, 0 = external, 1 = internal (should usually be 0)
+        self.mode: int = 0
+
+
+class NpuBlockOperation(NpuOperation):
+    """
+    Base class for operations which produce an OFM
+    """
+
+    def __init__(self, op_type: NpuOperationType):
+        super().__init__(op_type)
+        self.ifm: Optional[NpuFeatureMap] = None
+        self.ifm2: Optional[NpuFeatureMap] = None
+        # The non-quantized scalar value in a binary elementwise operation. Only set if IFM2 is scalar
+        self.ifm2_scalar: Optional[float] = None
+        self.ofm: Optional[NpuFeatureMap] = None
+        self.kernel: Optional[NpuKernel] = None
+        # Weights, one element for each NPU core, empty if no weights are used.
+        # Must have been compressed using weight_compressor.encode_weights()
+        self.weights: List[NpuAddressRange] = []
+        # Biases, one element for each NPU core, empty if no bias is used.
+        # Must have been encoded using weight_compressor.encode_bias()
+        self.biases: List[NpuAddressRange] = []
+        self.padding: Optional[NpuPadding] = None
+        # Optional activation function to be applied
+        self.activation: Optional[NpuActivation] = None
+        # The block config is the unit of work in which the NPU generates the OFM.
+        # If the operation has weights, the depth of the block config must be the same as
+        # the ofm depth used in the call to weight_compressor.encode_weights()
+        # If set to None, vela will determine a suitable block size (can only be used if there are no weights)
+        # If block_config.width and height are set to -1, vela will determine suitable width/height
+        self.block_config: Optional[NpuShape3D] = None  # OFM_BLK parameters
+        self.rounding_mode: NpuRoundingMode = NpuRoundingMode.TFL
+        # Set to True if the operations is fused with a Quantize operation (affects scaling)
+        self.fused_quantize: bool = False
+        # IFM upscaling to be applied
+        self.ifm_upscale: NpuResamplingMode = NpuResamplingMode.NONE
+
+
+class NpuConv2DOperation(NpuBlockOperation):
+    """
+    NPU_OP_CONV operation
+    """
+
+    def __init__(self):
+        super().__init__(NpuOperationType.Conv2D)
+        # Block traversal must be consistent with the block_traversal parameter specified in
+        # weight_compressor.encode_weights()
+        self.block_traversal: NpuBlockTraversal = NpuBlockTraversal.PART_KERNEL_FIRST
+
+
+class NpuConvDepthWiseOperation(NpuBlockOperation):
+    """
+    NPU_OP_DEPTHWISE operation
+    """
+
+    def __init__(self):
+        super().__init__(NpuOperationType.ConvDepthWise)
+
+
+class NpuPoolingOperation(NpuBlockOperation):
+    """
+    NPU_OP_POOL operation
+    """
+
+    def __init__(self, pooling_op_type: NpuPoolingOp):
+        super().__init__(NpuOperationType.Pooling)
+        self.sub_op_type: NpuPoolingOp = pooling_op_type
+        # Set to a float value for ResizeBilinear operations (affects scaling), else to None
+        self.rescale: Optional[float] = None
+
+
+class NpuElementWiseOperation(NpuBlockOperation):
+    """
+    NPU_OP_ELEMENTWISE operation
+    """
+
+    def __init__(self, elementwise_op_type: NpuElementWiseOp):
+        super().__init__(NpuOperationType.ElementWise)
+        self.sub_op_type: NpuElementWiseOp = elementwise_op_type
+        # Set to True for binary operators where IFM2 should be used as first operand
+        self.reversed_operands: bool = False
+        # Set to a tuple (scale, shift) for explicit rescale, else to None
+        self.rescale: Optional[Tuple] = None