MLBEDSW-7151: MLCE: Difference in model output between x86 & aarch64
- The issue is due to undefined behaviour when casting a NumPy float
to a NumPy unsigned integer which occurs in create_const_tensor()
- The fix is to make sure that the values are first cast to a Python
float
- In addition, the values datatype argument has been removed from
create_const_tensor() to stop the tensor and values datatypes getting
out of sync
Change-Id: I134b9be8c941b361929a5ae7db8cb35f2e9728f2
Signed-off-by: Tim Hall <tim.hall@arm.com>
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index 242f0ea..ff7b486 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2020-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# SPDX-FileCopyrightText: Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -343,17 +343,10 @@
weight_quant.zero_point = 0
weight_quant.quant_dim = 0
ofm_dtype = ofm.dtype
- if ofm_dtype == DataType.uint8:
- weight_value_dtype = np.uint8
+ if ofm_dtype.type == BaseType.UnsignedInt:
weight_quant.quant_min = 0
weight_quant.quant_max = (1 << ofm_dtype.bits) - 1
else:
- if ofm_dtype == DataType.int8:
- weight_value_dtype = np.int8
- else:
- assert ofm_dtype == DataType.int16
- weight_value_dtype = np.int16
-
weight_quant.quant_min = -(1 << (ofm_dtype.bits - 1))
weight_quant.quant_max = (1 << (ofm_dtype.bits - 1)) - 1
@@ -376,9 +369,8 @@
create_const_tensor(
"weights",
weight_shape,
- ofm.dtype,
+ ofm_dtype,
np.array(weight_values).reshape(weight_shape),
- value_dtype=weight_value_dtype,
quantization=weight_quant,
),
1, # inputs tensor weight index
@@ -586,7 +578,6 @@
shape,
intermediate_tens.dtype,
np.array(kernel).reshape(shape),
- value_dtype=np.int8,
quantization=quant,
),
)
@@ -1227,9 +1218,7 @@
scalar, _ = scaling.elementwise_mul_scale(ifm.quantization.scale_f32, alpha, ofm.quantization.scale_f32)
else:
scalar = 1
- alpha_tens = create_const_tensor(
- op.name + "_alpha_scalar", [1], alpha_dtype, [scalar], alpha_dtype.as_numpy_type(), quantization=quantization
- )
+ alpha_tens = create_const_tensor(op.name + "_alpha_scalar", [1], alpha_dtype, [scalar], quantization=quantization)
mul_alpha.add_input_tensor(alpha_tens)
fm_alpha = ofm.clone(op.name + "_alpha", set_unique=True)
mul_alpha.set_output_tensor(fm_alpha)
@@ -1256,9 +1245,7 @@
quantization.max = quantization.quant_max - quantization.quant_min
quantization.scale_f32 = np.float32(1)
quantization.zero_point = 0
- identity_tens = create_const_tensor(
- op.name + "_id_scalar", [], ifm.dtype, [1], np.uint8, quantization=quantization
- )
+ identity_tens = create_const_tensor(op.name + "_id_scalar", [], ifm.dtype, [1], quantization=quantization)
mul_identity.add_input_tensor(identity_tens)
# Make sure that fm_id is allocated to a different address than fm_alpha
fm_id = ofm.clone(op.name + "_id", set_unique=True)
@@ -1470,7 +1457,6 @@
shape,
op.ifm.dtype,
weights,
- np.uint8,
purpose=TensorPurpose.Weights,
quantization=quantization,
)
@@ -1526,7 +1512,7 @@
if top > 0:
shape = Shape4D(1, top, ofm_shape.width, ofm_shape.depth)
zero_tens = create_const_tensor(
- op.name + "_top", shape.as_list(), ofm.dtype, shape.elements() * [pad_value], np.uint8, quantization=quant
+ op.name + "_top", shape.as_list(), ofm.dtype, shape.elements() * [pad_value], quantization=quant
)
# If top/bottom or left/right are equal, the const tensors can be allocated to the same address
zero_tens.equivalence_id = create_equivalence_id(tuple(zero_tens.values))
@@ -1538,7 +1524,6 @@
shape.as_list(),
ofm.dtype,
shape.elements() * [pad_value],
- np.uint8,
quantization=quant,
)
zero_tens.equivalence_id = create_equivalence_id(tuple(zero_tens.values))
@@ -1548,14 +1533,14 @@
if left > 0:
shape = Shape4D(1, ifm_shape.height, left, ofm_shape.depth)
zero_tens = create_const_tensor(
- op.name + "_left", shape.as_list(), ofm.dtype, shape.elements() * [pad_value], np.uint8, quantization=quant
+ op.name + "_left", shape.as_list(), ofm.dtype, shape.elements() * [pad_value], quantization=quant
)
zero_tens.equivalence_id = create_equivalence_id(tuple(zero_tens.values))
create_avg_pool_for_concat(op, op.name + "_left", zero_tens, shape, shp_top)
if right > 0:
shape = Shape4D(1, ifm_shape.height, right, ofm_shape.depth)
zero_tens = create_const_tensor(
- op.name + "_right", shape.as_list(), ofm.dtype, shape.elements() * [pad_value], np.uint8, quantization=quant
+ op.name + "_right", shape.as_list(), ofm.dtype, shape.elements() * [pad_value], quantization=quant
)
zero_tens.equivalence_id = create_equivalence_id(tuple(zero_tens.values))
create_avg_pool_for_concat(
@@ -1715,7 +1700,6 @@
weight_shape,
inp.dtype,
np.ones(weight_shape),
- value_dtype=np.uint8,
quantization=weight_quant,
),
1,
@@ -2008,8 +1992,7 @@
ofm_clone = ofm.clone()
ofm_clone.values = ofm.values
ofm.values = None
- np_dtype = ofm.dtype.as_numpy_type()
- zero = create_const_tensor("zero", [1], ofm.dtype, [0], np_dtype, quantization=ofm.quantization)
+ zero = create_const_tensor("zero", [1], ofm.dtype, [0], quantization=ofm.quantization)
memcpy = create_add_nop(f"{ofm.name}_copy")
memcpy.add_input_tensor(ofm_clone)
memcpy.add_input_tensor(zero)