MLBEDSW-4163: OFM zero point outside valid range This commit fixes a bug where the OFM zero point would assume values outside of [0, 255] due to it's usage as a stand-in for a bias when emulating the TensorFlow Lite implementation of MEAN. The solution is to adjust for the bias using an ADD operator with the bias value as an int16 const tensor. The 16-bit integer is needed as the bias is 32 bits in the original implementation but can effectively assume values in the range [-255, 255]. Signed-off-by: Dwight Lidman <dwight.lidman@arm.com> Change-Id: I84df48ea89bb559954f1b2c289b65e08a6418274

commit: 9bb1e2ed361286769c362f002910d1dbd1736b05 [log] [tgz]
author: Dwight Lidman <dwight.lidman@arm.com> Thu Mar 18 14:51:42 2021 +0100
committer: patrik.gustavsson <patrik.gustavsson@arm.com> Fri Mar 26 10:07:24 2021 +0000
tree: cf16840abfe8c5494678d5ff824d37eb7417c8e9
parent: c629129f79666eeea1d86a779c8cb245e052672f [diff]
diff --git a/ethosu/vela/graph_optimiser.py b/ethosu/vela/graph_optimiser.py
index e8218fc..bea22a2 100644
--- a/ethosu/vela/graph_optimiser.py
+++ b/ethosu/vela/graph_optimiser.py

@@ -1435,12 +1435,46 @@
                 # This attribute means a different scaling calculation is used in order to match reference
                 op.low_precision_scaling = True
                 weight_scale = h * w
+                # Set zero points to 0 as they will be adjusted for with bias term
                 foq = ofmq.clone()
-                foq.zero_point -= int(np.round(ifmq.zero_point * ifmq.scale_f32 / foq.scale_f32))
-                op.forced_output_quantization = foq
+                foq.zero_point = 0
                 fiq = ifmq.clone()
                 fiq.zero_point = 0
                 op.forced_input_quantization = fiq
+                bias_term = ofmq.zero_point - int(ifmq.zero_point * ifmq.scale_f32 / ofmq.scale_f32)
+                # If the bias term is outside uint8 range, we need an Add op to apply it.
+                if bias_term < 0 or bias_term > 255:
+                    intermediate = op.ofm.clone(suffix="_intermediate", set_unique=True)
+                    # Bias term has higher bitness (i32) than input/output (u8).
+                    # 16 bits is enough since the bias is added/subtracted from a u8 value,
+                    # the bias can only effectively assume values in the range [-255, 255].
+                    intermediate.dtype = DataType.int16
+                    intermediate.quantization.zero_point = 0
+                    add_op = Operation(Op.Add, op.name + "_bias")
+                    add_op.forced_output_quantization = foq
+                    add_op.add_input_tensor(intermediate)
+                    quant = QuantizationParameters()
+                    quant.zero_point = 0
+                    bias_term_tens = create_const_tensor(
+                        op.name + "_bias",
+                        [1, 1, 1, 1],
+                        DataType.int16,
+                        [bias_term],
+                        np.int16,
+                        quantization=quant,
+                        quant_value_dtype=np.int16,
+                    )
+                    add_op.add_input_tensor(bias_term_tens)
+                    add_op.set_output_tensor(op.ofm)
+                    add_op.set_ifm_ofm_shapes()
+                    add_op.activation = op.activation
+                    op.activation = None
+                    op.set_output_tensor(intermediate)
+                    op.set_ifm_ofm_shapes()
+                # If not, we can just do it with the OFM zero point.
+                else:
+                    foq.zero_point = bias_term
+                    op.forced_output_quantization = foq
             else:
                 assert inp.dtype == DataType.int8
                 # Use a depthwise to calculate the sum,
commit	9bb1e2ed361286769c362f002910d1dbd1736b05	[log] [tgz]
author	Dwight Lidman <dwight.lidman@arm.com>	Thu Mar 18 14:51:42 2021 +0100
committer	patrik.gustavsson <patrik.gustavsson@arm.com>	Fri Mar 26 10:07:24 2021 +0000
tree	cf16840abfe8c5494678d5ff824d37eb7417c8e9
parent	c629129f79666eeea1d86a779c8cb245e052672f [diff]