MLBEDSW-7281: create_const_tensor OverflowError on Microsoft Windows - Additional overflow checks are performed when running under Microsoft Windows compared to Linux. These checks happen when converting from Python int to NumPy int/uint - The problem is that the lut activation values are int32 type, however they are defined as Python ints. If these are converted to numpy.int32 it could result in an overflow error - The fix is to convert these values to uint32 but keep the operator's IFM tensor type the same (as this will allow them to be interpreted correctly) - Fixing this highlighted another problem where convert_to_lut always calls create_lut_tensor() with an int8 datatype, whereas it should be using the IFM datatype Change-Id: I781a9d850f654267aa4a67754438607c4bb95685 Signed-off-by: Tim Hall <tim.hall@arm.com>

commit: 1c5904891b51ff8fa90c7fafbd067b39655d1505 [log] [tgz]
author: Tim Hall <tim.hall@arm.com> Thu Jan 26 17:27:00 2023 +0000
committer: Tim Hall <tim.hall@arm.com> Thu Feb 09 12:19:46 2023 +0000
tree: 5d1d432424b315103931b67f5fcc601b8d99a695
parent: 090f18a55fcd4f7ae8ca1ae633418d05c62cbb6e [diff]
diff --git a/ethosu/vela/graph_optimiser_util.py b/ethosu/vela/graph_optimiser_util.py
index 2822feb..24a5583 100644
--- a/ethosu/vela/graph_optimiser_util.py
+++ b/ethosu/vela/graph_optimiser_util.py

@@ -417,7 +417,8 @@
 
 def convert_to_lut(op, lut_values, lut_name):
     # Rewrite the operation by Add with scalar 0 + LUT activation
-    ifm = op.inputs[0]
+    ifm = op.ifm
+    ofm = op.ofm
     if ifm is None:
         return op
     assert ifm.dtype.size_in_bytes() == 1
@@ -429,7 +430,7 @@
     quantization = QuantizationParameters(0.0, 255.0)
     quantization.scale_f32 = ifm.quantization.scale_f32
     quantization.zero_point = 0
-    tens = create_const_tensor(op.inputs[0].name + "_scalar0", [], ifm.dtype, [0], quantization=quantization)
+    tens = create_const_tensor(ifm.name + "_scalar0", [], ifm.dtype, [0], quantization=quantization)
     op.add_input_tensor(tens)
     op.ifm_shapes.append(Shape4D(tens.shape))  # TODO no shape?
 
@@ -437,7 +438,13 @@
     # so even if the OFM has a different scale than the IFM, the generated OFM scale instructions
     # should be the same as the IFM
     op.forced_output_quantization = ifm.quantization
-    lut_tensor = lut.create_lut_tensor(op.name + "_values", lut_values, DataType.int8)
+
+    # the lut tensor datatype needs to match both; the ofm datatype, because these are the values output; and the
+    # datatype used to generate the lut values (which is probably the ifm datatype), because we want to avoid any
+    # potential overflow errors in create_lut_tensor() caused by converting Python int (which could represent a uint)
+    # to NumPy int. this can be guaranteed by checking that the ifm and ofm datatypes are the same
+    assert ifm.dtype == ofm.dtype
+    lut_tensor = lut.create_lut_tensor(op.name + "_values", lut_values, ofm.dtype)
     op.set_activation_lut(lut_tensor)
     op.set_ifm_ofm_shapes()
     DebugDatabase.add_optimised(op, op)

diff --git a/ethosu/vela/softmax.py b/ethosu/vela/softmax.py
index 575e1e6..5a06c1b 100644
--- a/ethosu/vela/softmax.py
+++ b/ethosu/vela/softmax.py

@@ -270,7 +270,7 @@
             ifm2_shape=ifm_max_shape,
         )
         sub_op.set_activation_lut(
-            create_const_tensor(f"{sub_op.name}_exp_lut", [1, 1, 1, 256], DataType.int32, exp_lut, TensorPurpose.LUT)
+            create_const_tensor(f"{sub_op.name}_exp_lut", [1, 1, 1, 256], DataType.uint32, exp_lut, TensorPurpose.LUT)
         )
         ifm_exp = add_op_get_ofm(sub_op)
         # Note: activation.min/max are non-quantized values
@@ -505,8 +505,10 @@
             f"{name}_const", [1, 1, 1, 1], DataType.int32, [32767], quantization=no_scale_quant
         )
         add_op = create_add(name, mul2_ofm, const_add, mul2_ofm.quantization.clone(), dtype=DataType.int16)
+        # lut activation values are int32 type however they are defined as Python ints. If these are converted to
+        # numpy.int32 it could result in an overflow error. Therefore, they are forced to uint32 to avoid this
         add_op.set_activation_lut(
-            create_const_tensor(f"{name}_exp_lut", [1, 1, 1, 512], DataType.int32, self.EXP_LUT, TensorPurpose.LUT)
+            create_const_tensor(f"{name}_exp_lut", [1, 1, 1, 512], DataType.uint32, self.EXP_LUT, TensorPurpose.LUT)
         )
         ifm_exp = add_op_get_ofm(add_op)
 
@@ -550,11 +552,13 @@
             f"{name}_const", [1, 1, 1, 1], DataType.int32, [32768], quantization=no_scale_quant
         )
         sub11_op = create_sub(name, shifted_sum_minus_one_16, sub11_const, no_scale_quant, dtype=DataType.int16)
+        # lut activation values are int32 type however they are defined as Python ints. If these are converted to
+        # numpy.int32 it could result in an overflow error. Therefore, they are forced to uint32 to avoid this
         sub11_op.set_activation_lut(
             create_const_tensor(
                 f"{name}_one_over_one_plus_x_lut",
                 [1, 1, 1, 512],
-                DataType.int32,
+                DataType.uint32,
                 self.ONE_OVER_ONE_PLUS_X_LUT,
                 TensorPurpose.LUT,
             )

diff --git a/ethosu/vela/test/test_lut.py b/ethosu/vela/test/test_lut.py
index 712be7a..58e72bb 100644
--- a/ethosu/vela/test/test_lut.py
+++ b/ethosu/vela/test/test_lut.py

@@ -35,7 +35,7 @@
 def set_256_lut(op, key, arch):
     random.seed(key)
     values = random.choices(range(256), k=256)
-    lut_tensor = create_const_tensor(op.name + "_lut", [1, 1, 1, 256], DataType.int8, values, TensorPurpose.LUT)
+    lut_tensor = create_const_tensor(op.name + "_lut", [1, 1, 1, 256], DataType.uint8, values, TensorPurpose.LUT)
     scratch_lut_tensor = lut_tensor.clone_into_fast_storage(arch)
     op.set_activation_lut(scratch_lut_tensor)
commit	1c5904891b51ff8fa90c7fafbd067b39655d1505	[log] [tgz]
author	Tim Hall <tim.hall@arm.com>	Thu Jan 26 17:27:00 2023 +0000
committer	Tim Hall <tim.hall@arm.com>	Thu Feb 09 12:19:46 2023 +0000
tree	5d1d432424b315103931b67f5fcc601b8d99a695
parent	090f18a55fcd4f7ae8ca1ae633418d05c62cbb6e [diff]