MLBEDSW-6686: Resize bilinear HPC with tile padding

- Added support for Resize Bilinear with half pixel centers for int8 and
uint8.

- Utilizes the new "TILE" padding mode.

- Utilizes ofm stride multipliers and modified tile base offsets to
write OFMs interleaved.

Signed-off-by: Rickard Bolin <rickard.bolin@arm.com>
Change-Id: I37fa77c022a368f05fda0ead75d8696c9205f833
diff --git a/ethosu/vela/operation.py b/ethosu/vela/operation.py
index e162204..af2205c 100644
--- a/ethosu/vela/operation.py
+++ b/ethosu/vela/operation.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved.
+# Copyright (C) 2020-2022 Arm Limited or its affiliates. All rights reserved.
 #
 # SPDX-License-Identifier: Apache-2.0
 #
@@ -474,7 +474,7 @@
 
     __slots__ = (
         "type",
-        "original_type",
+        "_original_type",
         "name",
         "op_index",
         "attrs",
@@ -501,12 +501,14 @@
         "write_offset",
         "write_shape",
         "ifm_resampling_mode",
+        "tile_base_offsets_ifm",
+        "tile_base_offsets_ofm",
         "ofm_stride_multiplier",
     )
 
     def __init__(self, op_type: Op, name: str):
         self.type = op_type
-        self.original_type = op_type
+        self._original_type = op_type  # the original type of the operation. once set this shouldn't be changed
         self.name = name
         self.attrs: Dict[str, Any] = {}
         self.inputs: List[Optional[Tensor]] = []
@@ -546,6 +548,10 @@
         # write_offset 0,9,0,0, write_shape 1,1,8,1
         self.write_shape: Optional[Shape4D] = None
         self.ifm_resampling_mode: resampling_mode = resampling_mode.NONE
+        # ifm (nhwc), ifm2 (nhwc)
+        self.tile_base_offsets_ifm: List[List[int]] = [[0, 0, 0, 0], [0, 0, 0, 0]]
+        # ofm (nhwc)
+        self.tile_base_offsets_ofm: List[int] = [0, 0, 0, 0]
         # For interleaved/sparse outputs - stride is multiplied with the stride factor of the corresponding axis
         # Order is [C, H, W] - default is no multiplication
         self.ofm_stride_multiplier: List[int] = [1, 1, 1]
@@ -553,6 +559,9 @@
     def clone(self, suffix="_clone"):
         res = Operation(self.type, self.name + suffix)
 
+        # maintain the original type, in cases where the type was changed to something different
+        res._original_type = self._original_type
+
         res.attrs = dict(self.attrs)
         res.inputs = list(self.inputs)
         res.outputs = list(self.outputs)
@@ -567,11 +576,15 @@
         res.op_index = None  # not relevant as not part of input network
         res.read_offsets = list(self.read_offsets)
         res.read_shapes = list(self.read_shapes)
+        res.write_offset = Shape4D(*self.write_offset) if self.write_offset else None
+        res.write_shape = Shape4D(*self.write_shape) if self.write_shape else None
         res.rounding_mode = self.rounding_mode
         res.explicit_scaling = self.explicit_scaling
         res.low_precision_scaling = self.low_precision_scaling
         res.rescale = self.rescale
         res.ifm_resampling_mode = self.ifm_resampling_mode
+        res.tile_base_offsets_ifm = [_ifm.copy() for _ifm in self.tile_base_offsets_ifm]
+        res.tile_base_offsets_ofm = self.tile_base_offsets_ofm.copy()
         res.ofm_stride_multiplier = self.ofm_stride_multiplier.copy()
 
         return res
@@ -581,6 +594,10 @@
 
     __repr__ = __str__
 
+    @property
+    def original_type(self):
+        return self._original_type
+
     def get_kernel_size(self):
         weights = self.weights
         if weights and self.type.npu_block_type in (NpuBlockType.ConvolutionDepthWise, NpuBlockType.ConvolutionMxN):