MLBEDSW-3502: Bug fix addresses >= 32 bit

Bug fix in generation of register command offsets that do not fit in 32 bit.

Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
Change-Id: Iabb99cf6536c0f77b934691f8744df61f1eab3ed
diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py
index 733be59..fb705b9 100644
--- a/ethosu/vela/register_command_stream_generator.py
+++ b/ethosu/vela/register_command_stream_generator.py
@@ -176,7 +176,8 @@
         self.offset += CommandStreamEmitter.WORD_SIZE
 
     def cmd1_with_offset(self, cmd: cmd1, offset, param=0x0):
-        offset = int(offset) & 0xFFFFFFFFF
+        offset = int(offset) & 0xFFFFFFFF
+        param = int(param) & 0xFFFF
         command = cmd.value | CmdMode.Payload32.value | (param << 16)
 
         if not self.get_reg_machine(cmd).set_register(cmd, (command, offset)):
diff --git a/ethosu/vela/test/extapi/test_extapi_generate_commands.py b/ethosu/vela/test/extapi/test_extapi_generate_commands.py
index db0485c..3c9a43d 100644
--- a/ethosu/vela/test/extapi/test_extapi_generate_commands.py
+++ b/ethosu/vela/test/extapi/test_extapi_generate_commands.py
@@ -61,7 +61,7 @@
 
 def check_cmd1(cmd_stream, cmd, offset, param=0x0):
     """Checks that the command stream contains the given command + parameter"""
-    offset = int(offset) & 0xFFFFFFFFF
+    offset = int(offset) & 0xFFFFFFFF
     command = cmd.value | CmdMode.Payload32.value | (param << 16)
     for i in range(len(cmd_stream) - 1):
         if cmd_stream[i] == command and cmd_stream[i + 1] == offset:
@@ -380,9 +380,11 @@
     with pytest.raises(VelaError):
         npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U65_256)
     # bias with high end address, but still within range
+    addr = (1 << 48) - 1024
     conv_op = create_fully_connected_op()
-    conv_op.biases = [NpuAddressRange(region=0, address=(1 << 48) - 1024, length=1000)]
-    npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U65_512)
+    conv_op.biases = [NpuAddressRange(region=0, address=addr, length=1000)]
+    cmds = npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U65_512)
+    check_cmd1(cmds, cmd1.NPU_SET_SCALE_BASE, addr & ((1 << 32) - 1), (addr >> 32) & ((1 << 16) - 1))
     conv_op = create_fully_connected_op()
     # weights with negative address
     conv_op.weights = [NpuAddressRange(region=0, address=-16, length=1000)]