TOSA: Support for AVGPOOL, MAXPOOL and CONV2D

Added support for
-AVGPOOL and CONV2D with TFLite correspondence
-MAXPOOL
-additional support for replacing RESCALE ops with avgpool.

No support for breaking down tensors over the
size supported by NPU.

Signed-off-by: Patrik Gustavsson <patrik.gustavsson@arm.com>
Change-Id: I1d2aa50ac30a26283b3e6f1fe88cba1544b7c189
diff --git a/ethosu/vela/graph_optimiser_util.py b/ethosu/vela/graph_optimiser_util.py
index 5e676f1..570c724 100644
--- a/ethosu/vela/graph_optimiser_util.py
+++ b/ethosu/vela/graph_optimiser_util.py
@@ -15,6 +15,8 @@
 # limitations under the License.
 # Description:
 # Common functions and definitions used during the graph optimization.
+from typing import Tuple
+
 from .data_type import DataType
 from .debug_database import DebugDatabase
 from .errors import VelaError
@@ -132,6 +134,21 @@
     tens.needs_linear_format = False
 
 
+def calc_explicit_padding(input_size, stride, filter_size, pad_before, pad_after) -> Tuple[int, int]:
+    """
+    Based on explicit padding provided in a PAD operation, returns the corresponding hardware padding
+    that provides equivalent results.
+    """
+    total_padding = needed_total_padding(input_size, stride, filter_size)
+
+    # The bottom/right padding might need downward adjustment depending on stride/input size
+    total_minus_before = total_padding - pad_before
+    output_pad_after = pad_after
+    while output_pad_after > 0 and output_pad_after % stride != total_minus_before % stride:
+        output_pad_after -= 1
+    return pad_before, output_pad_after
+
+
 def needed_total_padding(input_size, stride, filter_size):
     out_size = (input_size + stride - 1) // stride
     needed_input = (out_size - 1) * stride + filter_size