MLBEDSW-6905: Add dilation greater than 2 support

 - Added graph optimisation pass to support dilations greater than 2
in either dimension
 - Removed supported operators restrictions
 - Removed erroneous dilation on TRANSPOSE_CONV
 - Updated unit tests and documentation

Signed-off-by: Tim Hall <tim.hall@arm.com>
Change-Id: Ide302374b0d5eff25c20501383a63f6aa7625c52
diff --git a/SUPPORTED_OPS.md b/SUPPORTED_OPS.md
index d1814a6..14c61b4 100644
--- a/SUPPORTED_OPS.md
+++ b/SUPPORTED_OPS.md
@@ -1,7 +1,7 @@
 # Supported Ops
 
 This file was automatically generated by Vela using the `--supported-ops-report` parameter.  
-Vela version: `3.6.0rc1.dev37+g53605be.d20221027`
+Vela version: `3.6.0rc1.dev28+gd3d81b3`
 
 This file complies with
 [**Gitiles Markdown syntax**](https://github.com/google/gitiles/blob/master/Documentation/markdown.md)
@@ -124,7 +124,6 @@
 - Stride values for both width and height must be integer types
 - Dilation factor values for both width and height must be integer types
 - Stride values for both width and height must be in the range [1, 3]
-- Dilation factor values for both width and height must be in the range [1, 2]
 - Dilated kernel height must be in the range [1, 64]
 - Product of dilated kernel width and height must be in the range [1, 4096]
 - Weight tensor must be 8-bit
@@ -140,7 +139,6 @@
 - Stride values for both width and height must be integer types
 - Dilation factor values for both width and height must be integer types
 - Stride values for both width and height must be in the range [1, 3]
-- Dilation factor values for both width and height must be in the range [1, 2]
 - Dilated kernel height must be in the range [1, 64]
 - Product of dilated kernel width and height must be in the range [1, 4096]
 - Weight tensor must be 8-bit
@@ -323,8 +321,6 @@
 This is a list of constraints that the TRANSPOSE_CONV operator must satisfy in order to be scheduled on the NPU.
 
 - Stride values for both width and height must be integer types
-- Dilation factor values for both width and height must be integer types
-- Dilation factor values for both width and height must be in the range [1, 2]
 - Dilated kernel height must be in the range [1, 64]
 - Product of dilated kernel width and height must be in the range [1, 4096]
 - Weight tensor must be 8-bit
diff --git a/ethosu/vela/test/test_tflite_supported_operators.py b/ethosu/vela/test/test_tflite_supported_operators.py
index 4410938..6468d3d 100644
--- a/ethosu/vela/test/test_tflite_supported_operators.py
+++ b/ethosu/vela/test/test_tflite_supported_operators.py
@@ -111,13 +111,6 @@
     assert not support.is_operator_supported(op)
 
 
-def test_constraint_dilation_range():
-    # Dilation width and height must lie within a certain range
-    op = testutil.create_op_with_quant_tensors(Op.Conv2DBias, [1, 8, 8, 8], [1, 8, 8, 8])
-    op.attrs = {"stride_w": 1, "stride_h": 1, "dilation_w_factor": 0, "dilation_h_factor": 20}
-    assert not support.is_operator_supported(op)
-
-
 def test_constraint_dilated_height_range():
     # Dilated kernel height must lie within a certain range
     op = testutil.create_op_with_quant_tensors(Op.Conv2DBias, [1, 8, 8, 8], [1, 8, 8, 8], weights_shape=[65, 64, 1, 1])
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index 90b2932..fcaac15 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -1810,6 +1810,53 @@
     return op
 
 
+def fixup_dilation_gt2(op, arch, nng):
+    assert op.run_on_npu
+    if op.type == Op.Conv2DBias or op.type == Op.DepthwiseConv2DBias:
+        dilation_w, dilation_h = op.get_kernel_dilation()
+
+        # if dilation in either axis is greater than that supported by the hardware then we must manually dilate the
+        # kernel
+        if dilation_w > 2 or dilation_h > 2:
+            kernel_w, kernel_h = op.get_kernel_size()
+            kernel_ic = op.weights.shape[-2]
+            kernel_oc = op.weights.shape[-1]
+
+            # if the dilation is a multiple of 2 then the hardware dialtion can be enabled to provide that multiple
+            # of 2. this allows the kernel size to be reduced (via the scaled dilation) by half in that dimension.
+            # odd = 1, even = 2
+            hw_dilation_h = 1 if (dilation_h & 1) else 2
+            hw_dilation_w = 1 if (dilation_w & 1) else 2
+
+            scale_dilation_h = dilation_h // hw_dilation_h
+            scale_dilation_w = dilation_w // hw_dilation_w
+
+            # create new empty kernel (HWIO format)
+            new_kernel_h = (kernel_h - 1) * scale_dilation_h + 1
+            new_kernel_w = (kernel_w - 1) * scale_dilation_w + 1
+
+            new_kernel_shape = [new_kernel_h, new_kernel_w, kernel_ic, kernel_oc]
+            new_kernel_values = np.zeros(new_kernel_shape, dtype=op.weights.values.dtype)
+
+            # copy the original kernel values into the new sparse kernel
+            for h in range(0, kernel_h):
+                for w in range(0, kernel_w):
+                    new_h = h * scale_dilation_h
+                    new_w = w * scale_dilation_w
+                    new_kernel_values[new_h, new_w, :, :] = op.weights.values[h, w, :, :]
+
+            # update the weight tensor with the new dilated kernel
+            op.weights.shape = new_kernel_shape
+            op.weights.values = new_kernel_values
+
+            # enable(=2) / disable(=1) hardware dilation
+            op.attrs["dilation"] = (1, hw_dilation_h, hw_dilation_w, 1)  # nhwc format
+            op.attrs["dilation_h_factor"] = hw_dilation_h
+            op.attrs["dilation_w_factor"] = hw_dilation_w
+
+    return op
+
+
 def supported_operator_check(op, arch, nng):
     op.run_on_npu = arch.tflite_supported_operators.is_operator_supported(op)
     return op
@@ -1909,6 +1956,7 @@
         fixup_asymmetric_weights,
         convert_tanh_sigmoid_to_lut,
         replace_pad_by_hw_pad,
+        fixup_dilation_gt2,
     ]
 
     for idx, sg in enumerate(nng.subgraphs):
diff --git a/ethosu/vela/tflite_model_semantic.py b/ethosu/vela/tflite_model_semantic.py
index 7a0e234..189e837 100644
--- a/ethosu/vela/tflite_model_semantic.py
+++ b/ethosu/vela/tflite_model_semantic.py
@@ -106,7 +106,9 @@
         # Conv-like checks:
         for op_type in TFLiteSemantic.convolution_like_ops:
             self.specific_constraints[op_type].append(TFLiteSemantic.constraint_stride_type)
-            self.specific_constraints[op_type].append(TFLiteSemantic.constraint_dilation_type)
+            if op_type not in TFLiteSemantic.transpose_convolution_ops:
+                # Transpose Conv does not contain dilation
+                self.specific_constraints[op_type].append(TFLiteSemantic.constraint_dilation_type)
 
         # Pooling checks:
         for op_type in TFLiteSemantic.pooling_ops:
diff --git a/ethosu/vela/tflite_supported_operators.py b/ethosu/vela/tflite_supported_operators.py
index fd8bbee..abbfb17 100644
--- a/ethosu/vela/tflite_supported_operators.py
+++ b/ethosu/vela/tflite_supported_operators.py
@@ -189,7 +189,6 @@
     # Defined ranges for allowed values:
     tens_dim_range = (1, 65535)
     stride_range = (1, 3)
-    dilation_range = (1, 2)
     dilated_height_range = (1, 64)
     dilated_product_range = (1, 64 * 64)
     weights_limit = 127 * 65536
@@ -225,8 +224,10 @@
 
         # Conv-like checks:
         for op_type in TFLiteSupportedOperators.convolution_like_ops:
-            self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_stride_range)
-            self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_dilation_range)
+            if op_type not in TFLiteSupportedOperators.transpose_convolution_ops:
+                # Transpose Conv has a specific stride constraint (see constraint_tconv_stride below)
+                self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_stride_range)
+
             self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_dilated_height_range)
             self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_dilated_product_range)
             self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_weights_type)
@@ -234,9 +235,6 @@
             self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_weights_limit)
             self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_bias_type)
             self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_bias_40bit)
-        # Remove stride contraint from Transpose Conv because it has a specific one (see below)
-        for op_type in TFLiteSupportedOperators.transpose_convolution_ops:
-            self.specific_constraints[op_type].remove(TFLiteSupportedOperators.constraint_stride_range)
         # Transpose Conv specific checks:
         for op_type in TFLiteSupportedOperators.transpose_convolution_ops:
             self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_tconv_stride)
@@ -434,15 +432,6 @@
         return valid, f"Op has stride WxH as: {w}x{h}"
 
     @classmethod
-    @docstring_format_args(dilation_range)
-    def constraint_dilation_range(cls, op):
-        "Dilation factor values for both width and height must be in the range [{}, {}]"
-        w, h = op.get_kernel_dilation()
-        dilation_min, dilation_max = cls.dilation_range
-        valid = (dilation_min <= w <= dilation_max) and (dilation_min <= h <= dilation_max)
-        return valid, f"Op has dilation factor WxH as: {w}x{h}"
-
-    @classmethod
     @docstring_format_args(dilated_height_range)
     def constraint_dilated_height_range(cls, op):
         "Dilated kernel height must be in the range [{}, {}]"