MLBEDSW-4913 Fix inception_v1/v3 output diff

Fix inception_v1/v3 output diffs.
Removing the Squeeze operator in the graph optimisation step.
The squeeze operator removes dimensions of size 1 from tensor shape.
The memory layout is preserved.

Signed-off-by: Jonas Ohlsson <jonas.ohlsson@arm.com>
Change-Id: I4ceffcbb141af5ed50b0d1a9d1d67622e638c2a1
diff --git a/SUPPORTED_OPS.md b/SUPPORTED_OPS.md
index 0e2076c..9c2a9f4 100644
--- a/SUPPORTED_OPS.md
+++ b/SUPPORTED_OPS.md
@@ -1,7 +1,7 @@
 # Supported Ops
 
 This file was automatically generated by Vela using the `--supported-ops-report` parameter.  
-Vela version: `3.0.1.dev13+g004cefe`
+Vela version: `3.1.0rc2.dev6+g4f87092`
 
 This file complies with
 [**Gitiles Markdown syntax**](https://github.com/google/gitiles/blob/master/Documentation/markdown.md)
@@ -46,6 +46,7 @@
 | SOFTMAX | [Generic](#tflite-generic-constraints), [Specific](#tflite-softmax-constraints) |
 | SPLIT | [Generic](#tflite-generic-constraints) |
 | SPLIT_V | [Generic](#tflite-generic-constraints), [Specific](#tflite-split_v-constraints) |
+| SQUEEZE | [Generic](#tflite-generic-constraints) |
 | STRIDED_SLICE | [Generic](#tflite-generic-constraints), [Specific](#tflite-strided_slice-constraints) |
 | SUB | [Generic](#tflite-generic-constraints), [Specific](#tflite-sub-constraints) |
 | TANH | [Generic](#tflite-generic-constraints) |
@@ -345,6 +346,7 @@
 | SOFTMAX | [Generic](#tosa-generic-constraints) |
 | SPLIT | [Generic](#tosa-generic-constraints) |
 | SPLIT_V | [Generic](#tosa-generic-constraints) |
+| SQUEEZE | [Generic](#tosa-generic-constraints) |
 | STRIDED_SLICE | [Generic](#tosa-generic-constraints) |
 | SUB | [Generic](#tosa-generic-constraints) |
 | TANH | [Generic](#tosa-generic-constraints) |
diff --git a/ethosu/vela/graph_optimiser_util.py b/ethosu/vela/graph_optimiser_util.py
index 0b44b8f..5e676f1 100644
--- a/ethosu/vela/graph_optimiser_util.py
+++ b/ethosu/vela/graph_optimiser_util.py
@@ -23,7 +23,10 @@
 from .tensor import check_quantized_tens_scaling_equal
 
 
-memory_only_ops = (Op.Reshape,)
+memory_only_ops = (
+    Op.Reshape,
+    Op.Squeeze,
+)
 
 
 def _avoid_nhcwb16_for_concat(tens):
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index 2959803..6c85bb4 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -1061,8 +1061,8 @@
     return op
 
 
-def remove_reshapes(op, arch):
-    if op.run_on_npu and op.type == Op.Reshape:
+def remove_reshape_and_squeeze_ops(op, arch):
+    if op.run_on_npu and (op.type == Op.Reshape or op.type == Op.Squeeze):
         ofm = op.ofm
         ifm = op.ifm
 
@@ -1073,11 +1073,11 @@
             # or the reshape need to be replace with a NOP.
             return
 
-        # Check if Reshape ifm/ofm are network ifm/ofm
+        # Check if ifm/ofm are network ifm/ofm
         ifm_is_sg_ifm = ifm.ops[0].type in (Op.Placeholder, Op.SubgraphInput, Op.Const)
         ifm_is_sg_ofm = any(ifm_cons is None for ifm_cons in ifm.consumer_list)
         ofm_is_sg_ofm = any(ofm_cons is None for ofm_cons in ofm.consumer_list)
-        # Check if ifm/ofm is produced repectivly consumed by CPU
+        # Check if ifm/ofm is produced respectively consumed by CPU
         ifm_is_cpu_produced = any(ifm_prod is not None and not ifm_prod.run_on_npu for ifm_prod in op.ifm.ops)
         ofm_is_cpu_consumed = any(ofm_cons is not None and not ofm_cons.run_on_npu for ofm_cons in op.ofm.consumer_list)
 
@@ -1097,7 +1097,7 @@
                     if cons_ifm == ifm:
                         ifm_cons.set_input_tensor(ofm, ifm_idx)
         else:
-            # Bypassed Reshape by replacing ofm with ifm
+            # Bypassed by replacing ofm with ifm
             for cons in ofm.consumer_list:
                 for ifm_idx, cons_ifm in enumerate(cons.inputs):
                     if cons_ifm == ofm:
@@ -1567,9 +1567,9 @@
             nng, sg, arch, [], [fix_sg_input_output], rewrite_unsupported=False,
         )
 
-    # Removal of reshapes
+    # Removal of reshapes and squeeze
     for sg in nng.subgraphs:
-        rewrite_graph.visit_graph_post_order(sg.output_tensors, arch, [], [remove_reshapes])
+        rewrite_graph.visit_graph_post_order(sg.output_tensors, arch, [], [remove_reshape_and_squeeze_ops])
         sg.refresh_after_modification()
 
     # Rewrite of operators
diff --git a/ethosu/vela/tflite_supported_operators.py b/ethosu/vela/tflite_supported_operators.py
index dc4e6f0..016d44e 100644
--- a/ethosu/vela/tflite_supported_operators.py
+++ b/ethosu/vela/tflite_supported_operators.py
@@ -86,7 +86,7 @@
     )
     split_ops = set((Op.Split, Op.SplitV, Op.StridedSlice, Op.Slice, Op.UnpackReshaped, Op.Unpack,))
     concat_ops = set((Op.Concat, Op.ConcatTFLite, Op.PackReshaped, Op.Pack,))
-    memory_only_ops = set((Op.Reshape, Op.QuantizedReshape,)) | concat_ops | split_ops
+    memory_only_ops = set((Op.Reshape, Op.QuantizedReshape, Op.Squeeze,)) | concat_ops | split_ops
     per_axis_quant_ops = convolution_like_ops  # per-axis/channel quantization only currently supported for conv ops
     supported_fused_activations = relu_ops | set((Op.Tanh, Op.Sigmoid, Op.LUT,))
     supported_operators = npu_pre_ops | mac_main_ops | elem_wise_main_ops | pad_ops | npu_post_ops | memory_only_ops