MLBEDSW-3562: Improve blockdep calculation Blockdep calculation can now handle different sized IFM/OFM. Change-Id: I898a3c1c3a6778916802f3dbfa658328e5093096 Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>

commit: d2665804871d76a16d5962952ba95500e3977c56 [log] [tgz]
author: Louis Verhaard <louis.verhaard@arm.com> Fri Nov 20 13:08:55 2020 +0100
committer: Louis Verhaard <louis.verhaard@arm.com> Thu Nov 26 17:18:48 2020 +0100
tree: ea7fc78d7dae7f4258939cd2cfa8cffad92e566d
parent: 603016ccaa6cdb1a9b6d4547c561e4b45c90d3d5 [diff] [blame]
diff --git a/ethosu/vela/test/test_register_command_generator.py b/ethosu/vela/test/test_register_command_generator.py
index f2a1609..2760c86 100644
--- a/ethosu/vela/test/test_register_command_generator.py
+++ b/ethosu/vela/test/test_register_command_generator.py

@@ -17,13 +17,24 @@
 # Description:
 # Contains unit tests for register command stream generator
 from ethosu.vela.api import NpuAddressRange
+from ethosu.vela.api import NpuBlockTraversal
+from ethosu.vela.api import NpuConv2DOperation
+from ethosu.vela.api import NpuConvDepthWiseOperation
 from ethosu.vela.api import NpuDataType
+from ethosu.vela.api import NpuElementWiseOp
+from ethosu.vela.api import NpuElementWiseOperation
 from ethosu.vela.api import NpuFeatureMap
+from ethosu.vela.api import NpuKernel
 from ethosu.vela.api import NpuLayout
+from ethosu.vela.api import NpuPadding
 from ethosu.vela.api import NpuShape3D
 from ethosu.vela.api import NpuTileBox
+from ethosu.vela.architecture_features import Accelerator
+from ethosu.vela.architecture_features import create_default_arch
+from ethosu.vela.register_command_stream_generator import calc_blockdep
 from ethosu.vela.register_command_stream_generator import get_address_ranges
 from ethosu.vela.register_command_stream_generator import get_strides
+from ethosu.vela.test.extapi.test_extapi_generate_commands import create_feature_map
 
 
 def test_get_fm_strides():
@@ -39,6 +50,11 @@
     assert get_strides(fm) == NpuShape3D(height=240, width=24, depth=1)
 
 
+# -------------------------------------------------------------------
+# ADDRESS TESTS
+# -------------------------------------------------------------------
+
+
 def test_get_address_ranges_one_tile():
     """Tests calculation of feature map address ranges, with 1 tile used"""
     fm = NpuFeatureMap()
@@ -100,5 +116,89 @@
         NpuAddressRange(region=6, address=16, length=18952),
         NpuAddressRange(region=6, address=32000, length=6280),
         NpuAddressRange(region=6, address=8000, length=12552),
-        NpuAddressRange(region=6, address=28800, length=12680),
+        NpuAddressRange(region=6, address=16000, length=25480),
     ]
+
+
+# -------------------------------------------------------------------
+# BLOCKDEP TESTS
+# -------------------------------------------------------------------
+
+
+def test_calc_blockdep0():
+    """
+    Tests blockdep calculation, op1 that produces op2's IFM2.
+    op2 takes 1 block to complete, which results in blockdep 0
+    """
+    op1 = NpuElementWiseOperation(NpuElementWiseOp.CLZ)
+    op1.ifm = create_feature_map(NpuShape3D(height=1, width=1, depth=1), 1, 0x60, layout=NpuLayout.NHCWB16,)
+    intermediate_fm = create_feature_map(NpuShape3D(height=1, width=1, depth=1), 1, 0xA0, layout=NpuLayout.NHCWB16,)
+    op1.ofm = intermediate_fm
+    op1.block_config = NpuShape3D(height=1, width=1, depth=4)
+    op2 = NpuElementWiseOperation(NpuElementWiseOp.SUB)
+    op2.ifm = create_feature_map(NpuShape3D(height=1, width=1, depth=1), 1, 0x39AC0, layout=NpuLayout.NHCWB16,)
+    op2.ifm2 = intermediate_fm
+    op2.ofm = create_feature_map(NpuShape3D(height=1, width=1, depth=1), 1, 0xE0, layout=NpuLayout.NHCWB16,)
+    op2.block_config = NpuShape3D(height=1, width=1, depth=4)
+    arch = create_default_arch(Accelerator.Ethos_U55_128)
+    block_dep = calc_blockdep(arch, op1, op2)
+    assert block_dep == 0
+
+
+def test_calc_blockdep2():
+    """
+    Tests blockdep calculation, op1 produces part of the input of op2,
+    op1 and op2 have different sizes.
+    op2 takes 3 blocks to complete, op1's last block collides with op2's last block
+    which results in blockdep 2
+    """
+    op1 = NpuConv2DOperation()
+    op1.ifm = create_feature_map(NpuShape3D(height=4, width=48, depth=8), 1, 0x4C80, layout=NpuLayout.NHCWB16,)
+    op1.ofm = create_feature_map(NpuShape3D(height=4, width=48, depth=16), 1, 0x6480, layout=NpuLayout.NHCWB16,)
+    op1.kernel = NpuKernel(1, 1)
+    op1.weights = [NpuAddressRange(region=1, address=0x4AE0, length=208)]
+    op1.biases = [NpuAddressRange(region=1, address=0x49A0, length=160)]
+    op1.padding = NpuPadding(top=0, left=0, right=0, bottom=0)
+    op1.block_traversal = NpuBlockTraversal.PART_KERNEL_FIRST
+    op1.block_config = NpuShape3D(height=4, width=6, depth=16)
+    op2 = NpuConvDepthWiseOperation()
+    op2.ifm = create_feature_map(NpuShape3D(height=3, width=48, depth=16), 1, 0, layout=NpuLayout.NHCWB16,)
+    # op2 has two tiles, the lower tile is produced by op1
+    op2.ifm.tiles = NpuTileBox(height_0=2, height_1=2, width_0=48, addresses=[0x7680, 0, 0x6480, 0])
+    op2.ofm = create_feature_map(NpuShape3D(height=1, width=24, depth=16), 1, 0x6480, layout=NpuLayout.NHCWB16,)
+    op2.kernel = NpuKernel(3, 3, stride_x=2, stride_y=2)
+    op2.weights = [NpuAddressRange(region=1, address=0x4BB0, length=208)]
+    op2.biases = [NpuAddressRange(region=1, address=0x4A40, length=160)]
+    op2.padding = NpuPadding(top=0, left=0, right=0, bottom=0)
+    op2.block_config = NpuShape3D(height=1, width=8, depth=16)
+    arch = create_default_arch(Accelerator.Ethos_U55_128)
+    block_dep = calc_blockdep(arch, op1, op2)
+    assert block_dep == 2
+
+
+def test_calc_blockdep3():
+    """
+    Tests blockdep calculation, op2 consumes part of op1, op1 and op2 have different sizes.
+    There is no overlap between the last blocks of op1 and the first jobs of op2,
+    which results in blockdep 3
+    """
+    op1 = NpuConv2DOperation()
+    op1.ifm = create_feature_map(NpuShape3D(height=13, width=96, depth=1), 1, 0, layout=NpuLayout.NHWC,)
+    op1.ofm = create_feature_map(NpuShape3D(height=6, width=48, depth=8), 1, 0x7C80, layout=NpuLayout.NHCWB16,)
+    op1.kernel = NpuKernel(3, 3, stride_x=2, stride_y=2)
+    op1.weights = [NpuAddressRange(region=1, address=0x4AE0, length=144)]
+    op1.biases = [NpuAddressRange(region=1, address=0x49A0, length=80)]
+    op1.padding = NpuPadding(top=0, left=0, right=1, bottom=0)
+    op1.block_traversal = NpuBlockTraversal.PART_KERNEL_FIRST
+    op1.block_config = NpuShape3D(height=6, width=3, depth=8)
+    op2 = NpuConvDepthWiseOperation()
+    op2.ifm = create_feature_map(NpuShape3D(height=5, width=48, depth=8), 1, 0x7C80, layout=NpuLayout.NHCWB16,)
+    op2.ofm = create_feature_map(NpuShape3D(height=4, width=48, depth=8), 1, 0x4C80, layout=NpuLayout.NHCWB16,)
+    op2.kernel = NpuKernel(3, 3)
+    op2.weights = [NpuAddressRange(region=1, address=0x4BB0, length=112)]
+    op2.biases = [NpuAddressRange(region=1, address=0x4A40, length=80)]
+    op2.padding = NpuPadding(top=0, left=0, right=0, bottom=0)
+    op2.block_config = NpuShape3D(height=4, width=6, depth=8)
+    arch = create_default_arch(Accelerator.Ethos_U55_128)
+    block_dep = calc_blockdep(arch, op1, op2)
+    assert block_dep == 3
commit	d2665804871d76a16d5962952ba95500e3977c56	[log] [tgz]
author	Louis Verhaard <louis.verhaard@arm.com>	Fri Nov 20 13:08:55 2020 +0100
committer	Louis Verhaard <louis.verhaard@arm.com>	Thu Nov 26 17:18:48 2020 +0100
tree	ea7fc78d7dae7f4258939cd2cfa8cffad92e566d
parent	603016ccaa6cdb1a9b6d4547c561e4b45c90d3d5 [diff] [blame]