MLBEDSW-6018: Fix double buffering on dual core Only the first half of weight double buffers was used on dual core configurations, which causes degraded performance. Change-Id: I49972c00343bbffbae28ed11c645e993ed61d43f Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>

commit: e91b531d18373299f6f337b285389b5da11264a0 [log] [tgz]
author: Louis Verhaard <louis.verhaard@arm.com> Fri Jan 21 13:38:50 2022 +0100
committer: Louis Verhaard <louis.verhaard@arm.com> Tue Jan 25 11:44:17 2022 +0100
tree: 4b80a25e30fe50764561700eed351130c8912a23
parent: 849ff81f82c10a68898e5101930b92372bec5565 [diff]
diff --git a/ethosu/vela/high_level_command_to_npu_op.py b/ethosu/vela/high_level_command_to_npu_op.py
index 9abfbd4..6c403c8 100644
--- a/ethosu/vela/high_level_command_to_npu_op.py
+++ b/ethosu/vela/high_level_command_to_npu_op.py

@@ -201,6 +201,11 @@
     return upscale
 
 
+def get_double_buffer_offset(arch: ArchitectureFeatures, range_index: int, core: int) -> int:
+    """Returns 0 if the first half of a double buffer should be used, 1 if the second half should be used"""
+    return ((range_index - core) // arch.ncores) % 2
+
+
 def get_ifm_depth(npu_block_type: NpuBlockType, ifm_box: Box, ofm_box: Box) -> int:
     if npu_block_type in (NpuBlockType.ConvolutionMxN, NpuBlockType.VectorProduct, NpuBlockType.ReduceSum):
         block = ifm_box.get_block()
@@ -310,8 +315,8 @@
             if weight_tensor.sub_purpose == TensorSubPurpose.DoubleBuffer:
                 assert weight_tensor != w_tensor_src
                 # Double buffered inside weight_tensor
-                address = weight_tensor.address + w_tensor_src.max_range_bytes * ((weight_range.index - core) % 2)
-                address += core_offset
+                address = weight_tensor.address + core_offset
+                address += get_double_buffer_offset(arch, weight_range.index, core) * w_tensor_src.max_range_bytes
                 core_offset += round_up(weight_range.total_bytes, 16)
             else:
                 if weight_tensor == w_tensor_src:
@@ -522,7 +527,7 @@
 
                     if cmd.out_tensor.sub_purpose == TensorSubPurpose.DoubleBuffer:
                         dest_addr = cmd.out_tensor.address + cmd.in_tensor.max_range_bytes * (
-                            (weight_range.index - core) % 2
+                            get_double_buffer_offset(arch, weight_range.index, core)
                         )
                     else:
                         dest_addr = cmd.out_tensor.address
commit	e91b531d18373299f6f337b285389b5da11264a0	[log] [tgz]
author	Louis Verhaard <louis.verhaard@arm.com>	Fri Jan 21 13:38:50 2022 +0100
committer	Louis Verhaard <louis.verhaard@arm.com>	Tue Jan 25 11:44:17 2022 +0100
tree	4b80a25e30fe50764561700eed351130c8912a23
parent	849ff81f82c10a68898e5101930b92372bec5565 [diff]