MLBEDSW-2900: softmax output diff on FPGA

 - Corrected the rounding mode for softmax

Signed-off-by: Tim Hall <tim.hall@arm.com>
Change-Id: If136491c7668e85fba1e2c56c8cff11aa32db328
diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py
index 12ade8d..0a35647 100644
--- a/ethosu/vela/register_command_stream_generator.py
+++ b/ethosu/vela/register_command_stream_generator.py
@@ -436,7 +436,9 @@
             # Specifies if global scale from the NPU_SET_OFM_SCALE register should be used instead of per-channel scale
             use_global_scale = False
             # Specifies type of rounding to be used.
-            rounding_mode = rounding.TFL
+            rounding_mode = (
+                rounding.NATURAL if primary_op.attrs.get("rounding_mode", "") == b"NATURAL" else rounding.TFL
+            )
             if primary_op.type == "ResizeBilinear":
                 rounding_mode = rounding.TRUNCATE
             fmf = primary_op.attrs.get("fused_memory_function", None)
diff --git a/ethosu/vela/softmax.py b/ethosu/vela/softmax.py
index 7c23f47..2834f8c 100644
--- a/ethosu/vela/softmax.py
+++ b/ethosu/vela/softmax.py
@@ -257,6 +257,7 @@
 
         # PASS 2 - SHR
         shr2_op = Operation("SHR", self.op.name + "_shr2")
+        shr2_op.attrs["rounding_mode"] = b"NATURAL"
         shr2_op.add_input_tensor(ifm_exp)
         shr2_op.add_input_tensor(
             create_const_tensor(
@@ -454,6 +455,7 @@
 
         # PASS 30 - SHR
         shr30_op = Operation("SHR", self.op.name + "_shr30")
+        shr30_op.attrs["rounding_mode"] = b"NATURAL"
         shr30_op.add_input_tensor(scaled_exp)
         shr30_op.add_input_tensor(right_shift)
         shr30_op.set_output_tensor(ofm)