MLBEDSW-1497: Add Quantize operator support

Signed-off-by: Jacob Bohlin <jacob.bohlin@arm.com>
Change-Id: Iaf4d7ab9c32b0d783072c5f131a61bfebe77cc16
diff --git a/ethosu/vela/pass_packing.py b/ethosu/vela/pass_packing.py
index fff192d..c14a70b 100644
--- a/ethosu/vela/pass_packing.py
+++ b/ethosu/vela/pass_packing.py
@@ -100,7 +100,7 @@
 
 npu_post_fuse_limited_ops = set(
     # Set of post operators that should not be fused with main/elementwise ops
-    ("ConcatSliceWrite", "Sigmoid", "Tanh")
+    ("ConcatSliceWrite", "Sigmoid", "Tanh", "Quantize")
 )
 
 elem_wise_ops = elem_wise_main_ops | activation_ops | set(("Sigmoid", "Tanh"))
diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py
index 4bbea01..28bc6b7 100644
--- a/ethosu/vela/register_command_stream_generator.py
+++ b/ethosu/vela/register_command_stream_generator.py
@@ -430,6 +430,7 @@
                 rounding_mode = rounding.TRUNCATE
             fmf = primary_op.attrs.get("fused_memory_function", None)
             faf = primary_op.attrs.get("fused_activation_function", None)
+            fused_quantize = any(op.type == "Quantize" for op in ps.ops)
 
             # Specifies which operand to apply scaling to in bitexact elementwise ADD/SUB
             op_to_scale = 0
@@ -628,6 +629,11 @@
                                 scale = (1 << shift) * 3 * multiplier
                             else:
                                 scale = int(round_away_zero(scale * rescale))
+                        elif fused_quantize:
+                            # Quantize op requires different scaling
+                            ifm_scale_f64 = np.double(cmd.ifm_tensor.quantization.scale_f32)
+                            ofm_scale_f64 = np.double(cmd.ofm_tensor.quantization.scale_f32)
+                            scale, shift = scaling.quantise_scale(ifm_scale_f64 / ofm_scale_f64)
                         else:
                             # In case avg pool fused with concat or other memory operation, rescaling might be needed.
                             # k_height == k_width == 1 is allways true in this case
@@ -846,7 +852,7 @@
                 if tens is None:
                     continue
 
-                need_zero_point = (faf is not None) or (fmf == "ConcatSliceWrite")
+                need_zero_point = (faf is not None) or (fmf == "ConcatSliceWrite") or fused_quantize
                 if (
                     primary_op.type in set(("AvgPool", "AvgPoolAct", "ResizeBilinear")) and not need_zero_point
                 ) or tens.quantization is None:
diff --git a/ethosu/vela/supported_operators.py b/ethosu/vela/supported_operators.py
index 55e718e..3bf46a9 100644
--- a/ethosu/vela/supported_operators.py
+++ b/ethosu/vela/supported_operators.py
@@ -66,6 +66,8 @@
             | set(("ConcatSliceWrite"))
             # bias add and batch norm
             | set(("QuantizedBiasAdd", "Requantize", "QuantizedBatchNorm", "BiasAdd", "FusedBatchNorm"))
+            # Quantization
+            | set(("Quantize",))
         )
         self.split_ops = set(("Split", "SplitV", "StridedSlice", "Slice", "UnpackReshaped", "Unpack"))
         self.concat_ops = set(("Concat", "ConcatV2", "QuantizedConcat", "ConcatTFLite", "PackReshaped", "Pack"))