MLBEDSW-5209 Vela: output diff depthwise with non-zero zero points

Fixed by adjusting zero points for ops with int8 IFM and asymmetric weights
since the reference does not support asymmetric weights for int8 IFM and
ignores the zero points.

Signed-off-by: Fredrik Svedberg <fredrik.svedberg@arm.com>
Change-Id: I2a206a01a471a53aa864a6a3616aa23d2a5a23c8
diff --git a/SUPPORTED_OPS.md b/SUPPORTED_OPS.md
index f96bd4a..cfb3a5d 100644
--- a/SUPPORTED_OPS.md
+++ b/SUPPORTED_OPS.md
@@ -1,7 +1,7 @@
 # Supported Ops
 
 This file was automatically generated by Vela using the `--supported-ops-report` parameter.  
-Vela version: `3.1.1.dev13+gf54e94a.d20210914`
+Vela version: `3.1.1.dev32+gdc3b1f9`
 
 This file complies with
 [**Gitiles Markdown syntax**](https://github.com/google/gitiles/blob/master/Documentation/markdown.md)
@@ -210,6 +210,7 @@
 - IFM must be int8 or uint8
 - Input tensor must be at least 2D
 - Axis indices must correspond to height and width axes
+- IFM Tensor batch size must be 1
 - Product of height and width can be at most 65536
 - Product of height and width can be at most 4096 when IFM and OFM have different scale or zero point,  
         or keep_dims is True
@@ -265,6 +266,7 @@
         IFM must match OFM  
         OFM W and H must be 2x IFM -1, if align_corners is True  
         OFM W and H must be 2x IFM, if align_corners is False
+- half_pixel_centers are not supported
 
 ### TFLite SOFTMAX Constraints
 
diff --git a/ethosu/vela/tensor.py b/ethosu/vela/tensor.py
index d62ebc8..8c5e277 100644
--- a/ethosu/vela/tensor.py
+++ b/ethosu/vela/tensor.py
@@ -209,7 +209,17 @@
 
 
 class QuantizationParameters:
-    __slots__ = "min", "max", "num_bits", "narrow_range", "scale_f32", "zero_point", "quant_min", "quant_max"
+    __slots__ = (
+        "min",
+        "max",
+        "num_bits",
+        "narrow_range",
+        "scale_f32",
+        "zero_point",
+        "quant_min",
+        "quant_max",
+        "quant_dim",
+    )
 
     def __init__(
         self,
@@ -228,6 +238,7 @@
         self.zero_point: Union[int, np.ndarray, None] = None
         self.quant_min: Optional[float] = None
         self.quant_max: Optional[float] = None
+        self.quant_dim: Optional[int] = None
 
     def __str__(self):
         return "<nng.QuantizationParameters min=%s max=%s, num_bits=%s, scale=%s, zero_point=%s>" % (
@@ -252,6 +263,7 @@
         res.zero_point = self.zero_point
         res.quant_min = self.quant_min
         res.quant_max = self.quant_max
+        res.quant_dim = self.quant_dim
         return res
 
     def dequantize(self, values) -> np.ndarray:
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index 2469a70..e01433d 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -1152,6 +1152,16 @@
     return op
 
 
+def fixup_asymmetric_weights(op, arch, nng):
+    if op.run_on_npu and (op.type.is_conv2d_op() or op.type.is_depthwise_conv2d_op()):
+        if op.ifm.dtype == DataType.int8:
+            if not np.all(op.weights.quantization.zero_point == 0):
+                print(f"Warning: {op.type} '{op.name}' has asymmetric weights, zero points have been adjusted.")
+                op.weights.quantization.zero_point *= 0
+
+    return op
+
+
 def convert_mean_to_depthwise_conv_or_avgpool(op, arch, nng):
     if op.type == Op.Mean and op.run_on_npu:
         keep_dims = op.attrs.get("keep_dims", False)
@@ -1405,6 +1415,7 @@
         reorder_depthwise_weights,
         fixup_resizebilinear,
         fixup_bias_tensors,
+        fixup_asymmetric_weights,
         convert_mul_max_to_abs_or_lrelu,
         convert_lrelu,
         convert_tanh_sigmoid_to_lut,
diff --git a/ethosu/vela/tflite_reader.py b/ethosu/vela/tflite_reader.py
index fbee793..8dc5efe 100644
--- a/ethosu/vela/tflite_reader.py
+++ b/ethosu/vela/tflite_reader.py
@@ -88,6 +88,7 @@
             tens.quantization.max = self.len1_array_to_scalar(quant.MaxAsNumpy())
             tens.quantization.scale_f32 = self.len1_array_to_scalar(quant.ScaleAsNumpy())
             tens.quantization.zero_point = self.len1_array_to_scalar(quant.ZeroPointAsNumpy())
+            tens.quantization.quant_dim = quant.QuantizedDimension()
 
         if dtype == DataType.uint8:
             tens.quantization.quant_min = 0
diff --git a/ethosu/vela/tflite_writer.py b/ethosu/vela/tflite_writer.py
index e6dd85b..d134c07 100644
--- a/ethosu/vela/tflite_writer.py
+++ b/ethosu/vela/tflite_writer.py
@@ -236,6 +236,8 @@
                 QuantizationParameters.QuantizationParametersAddScale(builder, scale)
             if zero_point is not None:
                 QuantizationParameters.QuantizationParametersAddZeroPoint(builder, zero_point)
+            if quant.quant_dim is not None:
+                QuantizationParameters.QuantizationParametersAddQuantizedDimension(builder, quant.quant_dim)
             qp = QuantizationParameters.QuantizationParametersEnd(builder)
 
         return qp