MLBEDSW-4022: support PAD followed by pool operator

PAD followed by max/average pool is run on NPU if NPU
padding can be used. Average pool is converted to depthwise.

Change-Id: Icc3652e6d9ecff5ac3dc7d92080313d90c245404
Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
diff --git a/ethosu/vela/operation.py b/ethosu/vela/operation.py
index e4d11be..967d30b 100644
--- a/ethosu/vela/operation.py
+++ b/ethosu/vela/operation.py
@@ -25,6 +25,7 @@
 from typing import Tuple
 from typing import TYPE_CHECKING
 
+from .api import NpuRoundingMode
 from .errors import VelaError
 from .numeric_util import full_shape
 from .shape4d import Shape4D
@@ -420,6 +421,7 @@
         "ofm_shapes",
         "rescale",
         "read_offsets",
+        "rounding_mode",
     )
 
     def __init__(self, op_type: Op, name: str):
@@ -448,6 +450,7 @@
         # (which overrides the ofm tensor's scale)
         self.rescale = None
         self.read_offsets: List[Shape4D] = [None, None]  # offset for [ifm, ifm2]
+        self.rounding_mode: Optional[NpuRoundingMode] = None
 
     def clone(self, suffix="_clone"):
         res = Operation(self.type, self.name + suffix)
@@ -464,6 +467,7 @@
         res.scheduled_pass = self.scheduled_pass
         res.op_index = None  # not relevant as not part of input network
         res.read_offsets = list(self.read_offsets)
+        res.rounding_mode = self.rounding_mode
 
         return res