MLBEDSW-7281: create_const_tensor OverflowError on Microsoft Windows

 - Additional overflow checks are performed when running under
Microsoft Windows compared to Linux. These checks happen when
converting from Python int to NumPy int/uint
 - The problem is that the lut activation values are int32 type,
however they are defined as Python ints. If these are converted to
numpy.int32 it could result in an overflow error
 - The fix is to convert these values to uint32 but keep the
operator's IFM tensor type the same (as this will allow them to be
interpreted correctly)
 - Fixing this highlighted another problem where convert_to_lut
always calls create_lut_tensor() with an int8 datatype, whereas it
should be using the IFM datatype

Change-Id: I781a9d850f654267aa4a67754438607c4bb95685
Signed-off-by: Tim Hall <tim.hall@arm.com>
diff --git a/ethosu/vela/graph_optimiser_util.py b/ethosu/vela/graph_optimiser_util.py
index 2822feb..24a5583 100644
--- a/ethosu/vela/graph_optimiser_util.py
+++ b/ethosu/vela/graph_optimiser_util.py
@@ -417,7 +417,8 @@
 
 def convert_to_lut(op, lut_values, lut_name):
     # Rewrite the operation by Add with scalar 0 + LUT activation
-    ifm = op.inputs[0]
+    ifm = op.ifm
+    ofm = op.ofm
     if ifm is None:
         return op
     assert ifm.dtype.size_in_bytes() == 1
@@ -429,7 +430,7 @@
     quantization = QuantizationParameters(0.0, 255.0)
     quantization.scale_f32 = ifm.quantization.scale_f32
     quantization.zero_point = 0
-    tens = create_const_tensor(op.inputs[0].name + "_scalar0", [], ifm.dtype, [0], quantization=quantization)
+    tens = create_const_tensor(ifm.name + "_scalar0", [], ifm.dtype, [0], quantization=quantization)
     op.add_input_tensor(tens)
     op.ifm_shapes.append(Shape4D(tens.shape))  # TODO no shape?
 
@@ -437,7 +438,13 @@
     # so even if the OFM has a different scale than the IFM, the generated OFM scale instructions
     # should be the same as the IFM
     op.forced_output_quantization = ifm.quantization
-    lut_tensor = lut.create_lut_tensor(op.name + "_values", lut_values, DataType.int8)
+
+    # the lut tensor datatype needs to match both; the ofm datatype, because these are the values output; and the
+    # datatype used to generate the lut values (which is probably the ifm datatype), because we want to avoid any
+    # potential overflow errors in create_lut_tensor() caused by converting Python int (which could represent a uint)
+    # to NumPy int. this can be guaranteed by checking that the ifm and ofm datatypes are the same
+    assert ifm.dtype == ofm.dtype
+    lut_tensor = lut.create_lut_tensor(op.name + "_values", lut_values, ofm.dtype)
     op.set_activation_lut(lut_tensor)
     op.set_ifm_ofm_shapes()
     DebugDatabase.add_optimised(op, op)
diff --git a/ethosu/vela/softmax.py b/ethosu/vela/softmax.py
index 575e1e6..5a06c1b 100644
--- a/ethosu/vela/softmax.py
+++ b/ethosu/vela/softmax.py
@@ -270,7 +270,7 @@
             ifm2_shape=ifm_max_shape,
         )
         sub_op.set_activation_lut(
-            create_const_tensor(f"{sub_op.name}_exp_lut", [1, 1, 1, 256], DataType.int32, exp_lut, TensorPurpose.LUT)
+            create_const_tensor(f"{sub_op.name}_exp_lut", [1, 1, 1, 256], DataType.uint32, exp_lut, TensorPurpose.LUT)
         )
         ifm_exp = add_op_get_ofm(sub_op)
         # Note: activation.min/max are non-quantized values
@@ -505,8 +505,10 @@
             f"{name}_const", [1, 1, 1, 1], DataType.int32, [32767], quantization=no_scale_quant
         )
         add_op = create_add(name, mul2_ofm, const_add, mul2_ofm.quantization.clone(), dtype=DataType.int16)
+        # lut activation values are int32 type however they are defined as Python ints. If these are converted to
+        # numpy.int32 it could result in an overflow error. Therefore, they are forced to uint32 to avoid this
         add_op.set_activation_lut(
-            create_const_tensor(f"{name}_exp_lut", [1, 1, 1, 512], DataType.int32, self.EXP_LUT, TensorPurpose.LUT)
+            create_const_tensor(f"{name}_exp_lut", [1, 1, 1, 512], DataType.uint32, self.EXP_LUT, TensorPurpose.LUT)
         )
         ifm_exp = add_op_get_ofm(add_op)
 
@@ -550,11 +552,13 @@
             f"{name}_const", [1, 1, 1, 1], DataType.int32, [32768], quantization=no_scale_quant
         )
         sub11_op = create_sub(name, shifted_sum_minus_one_16, sub11_const, no_scale_quant, dtype=DataType.int16)
+        # lut activation values are int32 type however they are defined as Python ints. If these are converted to
+        # numpy.int32 it could result in an overflow error. Therefore, they are forced to uint32 to avoid this
         sub11_op.set_activation_lut(
             create_const_tensor(
                 f"{name}_one_over_one_plus_x_lut",
                 [1, 1, 1, 512],
-                DataType.int32,
+                DataType.uint32,
                 self.ONE_OVER_ONE_PLUS_X_LUT,
                 TensorPurpose.LUT,
             )
diff --git a/ethosu/vela/test/test_lut.py b/ethosu/vela/test/test_lut.py
index 712be7a..58e72bb 100644
--- a/ethosu/vela/test/test_lut.py
+++ b/ethosu/vela/test/test_lut.py
@@ -35,7 +35,7 @@
 def set_256_lut(op, key, arch):
     random.seed(key)
     values = random.choices(range(256), k=256)
-    lut_tensor = create_const_tensor(op.name + "_lut", [1, 1, 1, 256], DataType.int8, values, TensorPurpose.LUT)
+    lut_tensor = create_const_tensor(op.name + "_lut", [1, 1, 1, 256], DataType.uint8, values, TensorPurpose.LUT)
     scratch_lut_tensor = lut_tensor.clone_into_fast_storage(arch)
     op.set_activation_lut(scratch_lut_tensor)