MLBEDSW-3623: Diff on semantic_segmentation The root cause of this diff is precision errors caused by rounding several times when performing a resize bilinear upscaling to more than twice the initial size. This is solved by rewriting the algorithm to perform nearest neighbour upscaling to the correct size and then applying one larger average pool instead of several 2x2 pools. Avgpool with padding is limited to kernel size 8x8, which constraints the largest possible bilinear upscaling to 8 times the input size. Signed-off-by: Rickard Bolin <rickard.bolin@arm.com> Change-Id: I846232f309ba26aab6c385e593cbe25b646c6668

commit: e546defed8b204b175f708fa51366462db41ad07 [log] [tgz]
author: Rickard Bolin <rickard.bolin@arm.com> Tue Jan 25 15:45:00 2022 +0000
committer: Rickard Bolin <rickard.bolin@arm.com> Wed Feb 02 08:49:52 2022 +0000
tree: 4f87258f336e2f58016707f8646d9bab6257f388
parent: 628928d32ceae5c95abd9b3a2cb7333b1e5de059 [diff]
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index 8cfc373..4098798 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py

@@ -299,13 +299,13 @@
     return op
 
 
-# Convert ResizeBilinear to a number of 2x2 pool ops
-def convert_resizebilinear_to_2x2_pool(op):
-    count = 0
+# Convert ResizeBilinear to a number of 2x2 nearest neighbor upscaling and one avgpool op with kernel size dependent
+# on the upscaling factor. Avgpool kernel limit of 8x8 when padding is applied limits upscaling to 8x8.
+def convert_resizebilinear_to_nearest_neighbor_upscaling_and_pool(op):
     pre_op = op
     outputs = op.outputs
-
-    op.attrs.update({"strides": (1, 1, 1, 1), "ksize": (1, 2, 2, 1)})
+    dtype = op.ifm.dtype
+    op.attrs.update({"strides": (1, 1, 1, 1), "ksize": (1, 1, 1, 1)})
     if op.attrs["align_corners"]:
         shape_modifier = 1
         op.attrs["padding"] = Padding.VALID
@@ -316,41 +316,41 @@
 
     upscaled_shape = np.array(op.ifm_shapes[0].get_hw_as_list())
     out_shape = np.array(op.ofm_shapes[0].get_hw_as_list())
-    if (upscaled_shape == upscaled_shape * 2 - shape_modifier).all():
-        return op
 
-    while (upscaled_shape < out_shape).all():
-        if count == 0:
-            scaled_op = pre_op
-        else:
-            scaled_op = op.clone("_{}".format(count))
+    # Calculate how many times 2x2 upscaling needs to be performed
+    upscale_factor = round(out_shape[1] / upscaled_shape[1])
+    n = int(np.log2(upscale_factor))
+
+    # Perform 2x2 upscaling n-1 times
+    scaled_op = pre_op
+    for count in range(n - 1):
+        if count > 0:
+            scaled_op = op.clone(f"_{count}")
             scaled_op.inputs[0] = pre_op.outputs[0]
 
+        # Nearest neighbor 2x2 upscaling
         upscaled_shape = upscaled_shape * 2 - shape_modifier
+        shape = op.ofm_shapes[0].as_list()
+        shape[1:3] = upscaled_shape
+        out_tens = Tensor(shape, dtype, f"{op.outputs[0].name}_{count}")
+        out_tens.quantization = op.outputs[0].quantization.clone()
+        scaled_op.set_output_tensor(out_tens)
+        pre_op = scaled_op
 
-        if (upscaled_shape == out_shape).all():
-            scaled_op.outputs = outputs
-            scaled_op.outputs[0].ops = [scaled_op]
-        else:
-            shape = op.ofm_shapes[0].as_list()
-            shape[1:3] = upscaled_shape
-            out_tens = Tensor(shape, DataType.int16, "{}_{}".format(op.outputs[0].name, count))
-            out_tens.quantization = op.outputs[0].quantization.clone()
-            out_tens.quantization.quant_min = np.iinfo(np.int16).min
-            out_tens.quantization.quant_max = np.iinfo(np.int16).max
-            scaled_op.set_output_tensor(out_tens)
-            pre_op = scaled_op
-            count += 1
-
-        # Setup the scale value
-        if scaled_op.inputs[0].dtype.bits == 8 and scaled_op.outputs[0].dtype.bits == 16:
-            scaled_op.rescale = 128
-        elif scaled_op.inputs[0].dtype.bits == 16 and scaled_op.outputs[0].dtype.bits == 8:
-            scaled_op.rescale = 1 / 128
-        else:
-            scaled_op.rescale = None
         scaled_op.set_ifm_ofm_shapes()
 
+    # Last 2x2 upscaling also applies avgpool with kernel size dependent on the upscaling factor and adds
+    # padding to the right and bottom.
+    if n > 1:
+        scaled_op = op.clone(f"_{n-1}")
+        scaled_op.inputs[0] = pre_op.outputs[0]
+    scaled_op.attrs["padding"] = Padding.EXPLICIT
+    scaled_op.attrs["explicit_padding"] = [0, 0, upscale_factor - 1, upscale_factor - 1]
+    scaled_op.attrs.update({"ksize": (1, upscale_factor, upscale_factor, 1)})
+    scaled_op.outputs = outputs
+    scaled_op.outputs[0].ops = [scaled_op]
+    scaled_op.set_ifm_ofm_shapes()
+
     return op
 
 
@@ -363,7 +363,7 @@
         elif op.ifm_shapes[0].height == 1 and op.ifm_shapes[0].width == 1:
             convert_resizebilinear_1x1_to_add(op)
         else:
-            convert_resizebilinear_to_2x2_pool(op)
+            convert_resizebilinear_to_nearest_neighbor_upscaling_and_pool(op)
 
     return op
 

diff --git a/ethosu/vela/tflite_supported_operators.py b/ethosu/vela/tflite_supported_operators.py
index 60bc6fd..193a23f 100644
--- a/ethosu/vela/tflite_supported_operators.py
+++ b/ethosu/vela/tflite_supported_operators.py

@@ -511,8 +511,8 @@
         """The width and height of the IFM and OFM must match one of the following criteria:
         IFM W and H must both be 1
         IFM must match OFM
-        OFM W and H must be 2x IFM -1, if align_corners is True
-        OFM W and H must be 2x IFM, if align_corners is False"""
+        OFM W and H must be equal and 2/4/8x IFM -1, if align_corners is True
+        OFM W and H must be equal and 2/4/8x IFM, if align_corners is False"""
         # Easier to start with False condition as very few cases result in a supported resize
         valid = False
         ifm_shape = op.ifm.shape
@@ -523,16 +523,12 @@
             if ((ifm_shape[1] == 1) and (ifm_shape[2] == 1)) or (ifm_shape == ofm_shape):
                 valid = True
             else:
-                upscaled_shape = np.array(ifm_shape[1:3])
-                out_shape = np.array(ofm_shape[1:3])
-                while (upscaled_shape < out_shape).all():
-                    upscaled_shape *= 2
-                    if align_corners:
-                        upscaled_shape -= 1
-                    # Valid if OFM is 2x IFM (-1 for align corners)
-                    if np.array_equal(out_shape, upscaled_shape):
-                        valid = True
-                        break
+                # Valid if OFM is 2/4/8x IFM (-1 for align corners)
+                w_upscale_factor = (ofm_shape[1] + 1) / ifm_shape[1] if align_corners else ofm_shape[1] / ifm_shape[1]
+                h_upscale_factor = (ofm_shape[2] + 1) / ifm_shape[2] if align_corners else ofm_shape[2] / ifm_shape[2]
+
+                valid = w_upscale_factor == h_upscale_factor and w_upscale_factor in [2, 4, 8]
+
         return valid, f"Op has ifm_shape={ifm_shape}, ofm_shape={ofm_shape} and align_corners={align_corners}"
 
     @staticmethod
commit	e546defed8b204b175f708fa51366462db41ad07	[log] [tgz]
author	Rickard Bolin <rickard.bolin@arm.com>	Tue Jan 25 15:45:00 2022 +0000
committer	Rickard Bolin <rickard.bolin@arm.com>	Wed Feb 02 08:49:52 2022 +0000
tree	4f87258f336e2f58016707f8646d9bab6257f388
parent	628928d32ceae5c95abd9b3a2cb7333b1e5de059 [diff]