MLBEDSW-4022: support PAD followed by pool operator

PAD followed by max/average pool is run on NPU if NPU
padding can be used. Average pool is converted to depthwise.

Change-Id: Icc3652e6d9ecff5ac3dc7d92080313d90c245404
Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
diff --git a/ethosu/vela/test/test_graph_optimiser.py b/ethosu/vela/test/test_graph_optimiser.py
index 40b8cd5..285b3ac 100644
--- a/ethosu/vela/test/test_graph_optimiser.py
+++ b/ethosu/vela/test/test_graph_optimiser.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
+# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved.
 #
 # SPDX-License-Identifier: Apache-2.0
 #
@@ -157,6 +157,53 @@
     assert pad_op not in op.ifm.ops
 
 
+def test_optimise_pad_followed_by_avg_pool():
+    """
+    Tests that the PAD operator is bypassed when followed by a average pool operator,
+    and that the average pool is converted to a depthwise
+    """
+    # Create Pad operation followed by AvgPool
+    quant = testutil.default_quant_params()
+    in_tens = Tensor([1, 76, 75, 64], DataType.uint8, "input")
+    in_tens.quantization = quant
+    pad_input = create_const_tensor("pad_input", [4, 2], DataType.int32, [[0, 0], [2, 1], [1, 1], [0, 0]])
+    temp_tens = Tensor([1, 79, 77, 64], DataType.uint8, "pad_out")
+    temp_tens.quantization = quant.clone()
+    out_tens = Tensor([1, 76, 75, 64], DataType.uint8, "output")
+    out_tens.quantization = quant.clone()
+
+    pad_op = testutil.create_op(Op.Pad, [in_tens, pad_input], temp_tens)
+    attrs = {
+        "padding": Padding.VALID,
+        "ksize": [1, 5, 3, 1],
+        "stride_w": 2,
+        "stride_h": 2,
+        "dilation_w_factor": 1,
+        "dilation_h_factor": 1,
+    }
+    attrs["strides"] = (1, attrs["stride_h"], attrs["stride_w"], 1)
+    pad_op.run_on_npu = True
+    conv2d_op = testutil.create_op(Op.AvgPool, [temp_tens], out_tens, attrs)
+    conv2d_op.run_on_npu = True
+    nng = Graph()
+    sg = testutil.create_subgraph([pad_op, conv2d_op])
+    nng.subgraphs.append(sg)
+    arch = testutil.create_arch()
+
+    optimise_pad(conv2d_op, nng, arch)
+
+    op = sg.output_tensors[0].ops[0]
+    assert op.type == Op.DepthwiseConv2DBias
+    assert op.attrs["padding"] == Padding.EXPLICIT
+    assert op.attrs["explicit_padding"] == (2, 1, 1, 1)
+    assert op.ifm.shape == [1, 76, 75, 64]
+    assert pad_op not in op.ifm.ops
+    # Check that bias and weight tensors have been added
+    assert op.bias.shape == [64]
+    print("op.weights:", op.weights)
+    assert op.weights.shape == [5, 3, 1, 64]
+
+
 def test_remove_reshape():
     """
     Tests that the expected reshape are removed in graph_optimisation
diff --git a/ethosu/vela/test/test_supported_operators.py b/ethosu/vela/test/test_supported_operators.py
index 3e9724d..6401d29 100644
--- a/ethosu/vela/test/test_supported_operators.py
+++ b/ethosu/vela/test/test_supported_operators.py
@@ -609,14 +609,7 @@
     op_consumer = testutil.create_op_with_quant_tensors(Op.ConcatTFLite, [1, 1, 1, 4], [1, 1, 1, 8])
     op.ofm.consumer_list = [op_consumer]
     assert not support.is_operator_supported(op)
-    op_consumer = testutil.create_op_with_quant_tensors(Op.AvgPool, [1, 8, 8, 8], [1, 8, 8, 8])
-    op_consumer.attrs = {
-        "stride_w": 2,
-        "stride_h": 2,
-        "filter_width": 2,
-        "filter_height": 2,
-        "padding": Padding.VALID,
-    }
+    op_consumer = testutil.create_elemwise_op(Op.Add, "op", [1, 3, 3, 1], [1, 3, 3, 1], [1, 3, 3, 1])
     op.ofm.consumer_list = [op_consumer]
     assert not support.is_operator_supported(op)
 
@@ -655,6 +648,55 @@
     assert support.is_operator_supported(op) == expected
 
 
+pad_avg_pool_test_data = [
+    ((3, 3), (1, 1, 1, 1), True),
+    ((2, 4), (1, 2, 1, 2), True),
+    ((5, 3), (2, 1, 2, 1), True),
+    ((5, 3), (0, 1, 2, 1), True),
+    ((5, 3), (2, 0, 2, 1), True),
+    ((5, 3), (2, 1, 0, 1), True),
+    ((5, 3), (2, 1, 0, 1), True),
+    ((4, 4), (2, 2, 2, 2), True),
+    ((4, 4), (1, 2, 2, 2), False),
+    ((4, 4), (2, 1, 2, 2), False),
+    ((4, 4), (2, 2, 1, 2), False),
+    ((4, 4), (2, 2, 2, 1), False),
+]
+
+
+@pytest.mark.parametrize("k_size, padding, expected", pad_avg_pool_test_data)
+def test_pad_followed_by_avg_pool(k_size, padding, expected):
+    # Tests PAD followed by AvgPool
+    k_w, k_h = k_size
+    top, left, bottom, right = padding
+    pad_values = [[0, 0], [top, bottom], [left, right], [0, 0]]
+    dtype = DataType.int8
+    qp = testutil.default_quant_params()
+    in_shape = [1, 15, 17, 8]
+    out_shape = [1, in_shape[1] + top + bottom, in_shape[2] + left + right, in_shape[3]]
+    in0 = Tensor(in_shape, dtype, "in")
+    in0.quantization = qp
+    pad_tensor = create_const_tensor(
+        name="pad", shape=list(np.shape(pad_values)), values=pad_values, dtype=DataType.int32
+    )
+    out = Tensor(out_shape, dtype, "out")
+    out.quantization = qp.clone()
+    op = testutil.create_op(Op.Pad, [in0, pad_tensor], out)
+    pool_out_tens = Tensor(in_shape, dtype, "output")
+    pool_out_tens.quantization = qp.clone()
+    attrs = {
+        "padding": Padding.VALID,
+        "ksize": [1, k_w, k_h, 1],
+        "stride_w": 1,
+        "stride_h": 1,
+        "dilation_w_factor": 1,
+        "dilation_h_factor": 1,
+    }
+    pool_op = testutil.create_op(Op.AvgPool, [out], pool_out_tens, attrs)
+    pool_op.add_input_tensor(out)
+    assert support.is_operator_supported(op) == expected
+
+
 def create_strided_slice():
     # Creates a valid strided slice operator with some valid inputs/outputs
     op = create_strided_slice_op([1, 10, 10, 10], [1, 5, 5, 10], [127, 2, 2, 0], [0, 7, -3, 0])