MLBEDSW-3643: Refactor blockdep calculation

Moved blockdep calculation and other helper functions for
code generation to a separate file.

Change-Id: I2f8ccea478654272ebf42217fc5c1800e9ad177a
Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
diff --git a/ethosu/vela/test/test_register_command_stream_util.py b/ethosu/vela/test/test_register_command_stream_util.py
new file mode 100644
index 0000000..985523f
--- /dev/null
+++ b/ethosu/vela/test/test_register_command_stream_util.py
@@ -0,0 +1,204 @@
+# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Description:
+# Contains unit tests for register command stream generator
+from ethosu.vela.api import NpuAddressRange
+from ethosu.vela.api import NpuBlockTraversal
+from ethosu.vela.api import NpuConv2DOperation
+from ethosu.vela.api import NpuConvDepthWiseOperation
+from ethosu.vela.api import NpuDataType
+from ethosu.vela.api import NpuElementWiseOp
+from ethosu.vela.api import NpuElementWiseOperation
+from ethosu.vela.api import NpuFeatureMap
+from ethosu.vela.api import NpuKernel
+from ethosu.vela.api import NpuLayout
+from ethosu.vela.api import NpuPadding
+from ethosu.vela.api import NpuShape3D
+from ethosu.vela.api import NpuTileBox
+from ethosu.vela.architecture_features import Accelerator
+from ethosu.vela.architecture_features import create_default_arch
+from ethosu.vela.register_command_stream_generator import calc_blockdep
+from ethosu.vela.register_command_stream_generator import get_strides
+from ethosu.vela.register_command_stream_util import get_address_ranges
+from ethosu.vela.test.extapi.test_extapi_generate_commands import create_feature_map
+
+
+def test_get_fm_strides():
+    """Tests calculation of feature map strides"""
+    fm = NpuFeatureMap()
+    fm.layout = NpuLayout.NHCWB16
+    fm.data_type = NpuDataType.INT16
+    fm.shape = NpuShape3D(height=7, width=10, depth=24)
+    assert get_strides(fm) == NpuShape3D(height=640, width=32, depth=320)
+    fm.layout = NpuLayout.NHWC
+    assert get_strides(fm) == NpuShape3D(height=480, width=48, depth=2)
+    fm.data_type = NpuDataType.UINT8
+    assert get_strides(fm) == NpuShape3D(height=240, width=24, depth=1)
+
+
+# -------------------------------------------------------------------
+# ADDRESS TESTS
+# -------------------------------------------------------------------
+
+
+def test_get_address_ranges_one_tile():
+    """Tests calculation of feature map address ranges, with 1 tile used"""
+    fm = NpuFeatureMap()
+    fm.region = 4
+    fm.layout = NpuLayout.NHWC
+    fm.data_type = NpuDataType.INT16
+    fm.shape = NpuShape3D(height=50, width=40, depth=3)
+    fm.tiles = NpuTileBox(height_0=50, height_1=50, width_0=40, addresses=[8000, 0, 0, 0])
+    ranges = get_address_ranges(fm)
+    assert ranges == [NpuAddressRange(region=4, address=8000, length=12000), None, None, None]
+
+
+def test_get_address_ranges_horizontal_tiles():
+    """Tests calculation of feature map address ranges, with 2 horizontal tiles used"""
+    fm = NpuFeatureMap()
+    fm.region = 6
+    fm.layout = NpuLayout.NHWC
+    fm.data_type = NpuDataType.INT16
+    fm.shape = NpuShape3D(height=50, width=10, depth=20)
+    fm.tiles = NpuTileBox(height_0=20, height_1=30, width_0=10, addresses=[256, 0, 16000, 0])
+    ranges = get_address_ranges(fm)
+    assert ranges == [
+        NpuAddressRange(region=6, address=256, length=8000),
+        None,
+        NpuAddressRange(region=6, address=16000, length=12000),
+        None,
+    ]
+
+
+def test_get_address_ranges_vertical_tiles():
+    """Tests calculation of feature map address ranges, with 2 vertical tiles used"""
+    fm = NpuFeatureMap()
+    fm.region = 6
+    fm.layout = NpuLayout.NHWC
+    fm.data_type = NpuDataType.INT8
+    # Set strides explicitly
+    fm.shape = NpuShape3D(height=50, width=10, depth=20)
+    fm.strides = NpuShape3D(height=100, width=20, depth=1)
+    fm.tiles = NpuTileBox(height_0=50, height_1=50, width_0=5, addresses=[16, 32000, 0, 0])
+    ranges = get_address_ranges(fm)
+    assert ranges == [
+        NpuAddressRange(region=6, address=16, length=5000),
+        NpuAddressRange(region=6, address=32000, length=5000),
+        None,
+        None,
+    ]
+
+
+def test_get_address_ranges_4_tiles():
+    """Tests calculation of feature map address ranges, with 4 tiles used"""
+    fm = NpuFeatureMap()
+    fm.region = 6
+    fm.layout = NpuLayout.NHCWB16
+    fm.data_type = NpuDataType.INT16
+    fm.shape = NpuShape3D(height=50, width=10, depth=20)
+    fm.tiles = NpuTileBox(height_0=30, height_1=10, width_0=3, addresses=[16, 32000, 8000, 16000])
+    ranges = get_address_ranges(fm)
+    assert ranges == [
+        NpuAddressRange(region=6, address=16, length=18952),
+        NpuAddressRange(region=6, address=32000, length=6280),
+        NpuAddressRange(region=6, address=8000, length=12552),
+        NpuAddressRange(region=6, address=16000, length=25480),
+    ]
+
+
+# -------------------------------------------------------------------
+# BLOCKDEP TESTS
+# -------------------------------------------------------------------
+
+
+def test_calc_blockdep0():
+    """
+    Tests blockdep calculation, op1 that produces op2's IFM2.
+    op2 takes 1 block to complete, which results in blockdep 0
+    """
+    op1 = NpuElementWiseOperation(NpuElementWiseOp.CLZ)
+    op1.ifm = create_feature_map(NpuShape3D(height=1, width=1, depth=1), 1, 0x60, layout=NpuLayout.NHCWB16,)
+    intermediate_fm = create_feature_map(NpuShape3D(height=1, width=1, depth=1), 1, 0xA0, layout=NpuLayout.NHCWB16,)
+    op1.ofm = intermediate_fm
+    op1.block_config = NpuShape3D(height=1, width=1, depth=4)
+    op2 = NpuElementWiseOperation(NpuElementWiseOp.SUB)
+    op2.ifm = create_feature_map(NpuShape3D(height=1, width=1, depth=1), 1, 0x39AC0, layout=NpuLayout.NHCWB16,)
+    op2.ifm2 = intermediate_fm
+    op2.ofm = create_feature_map(NpuShape3D(height=1, width=1, depth=1), 1, 0xE0, layout=NpuLayout.NHCWB16,)
+    op2.block_config = NpuShape3D(height=1, width=1, depth=4)
+    arch = create_default_arch(Accelerator.Ethos_U55_128)
+    block_dep = calc_blockdep(arch, op1, op2)
+    assert block_dep == 0
+
+
+def test_calc_blockdep2():
+    """
+    Tests blockdep calculation, op1 produces part of the input of op2,
+    op1 and op2 have different sizes.
+    op2 takes 3 blocks to complete, op1's last block collides with op2's last block
+    which results in blockdep 2
+    """
+    op1 = NpuConv2DOperation()
+    op1.ifm = create_feature_map(NpuShape3D(height=4, width=48, depth=8), 1, 0x4C80, layout=NpuLayout.NHCWB16,)
+    op1.ofm = create_feature_map(NpuShape3D(height=4, width=48, depth=16), 1, 0x6480, layout=NpuLayout.NHCWB16,)
+    op1.kernel = NpuKernel(1, 1)
+    op1.weights = [NpuAddressRange(region=1, address=0x4AE0, length=208)]
+    op1.biases = [NpuAddressRange(region=1, address=0x49A0, length=160)]
+    op1.padding = NpuPadding(top=0, left=0, right=0, bottom=0)
+    op1.block_traversal = NpuBlockTraversal.PART_KERNEL_FIRST
+    op1.block_config = NpuShape3D(height=4, width=6, depth=16)
+    op2 = NpuConvDepthWiseOperation()
+    op2.ifm = create_feature_map(NpuShape3D(height=3, width=48, depth=16), 1, 0, layout=NpuLayout.NHCWB16,)
+    # op2 has two tiles, the lower tile is produced by op1
+    op2.ifm.tiles = NpuTileBox(height_0=2, height_1=2, width_0=48, addresses=[0x7680, 0, 0x6480, 0])
+    op2.ofm = create_feature_map(NpuShape3D(height=1, width=24, depth=16), 1, 0x6480, layout=NpuLayout.NHCWB16,)
+    op2.kernel = NpuKernel(3, 3, stride_x=2, stride_y=2)
+    op2.weights = [NpuAddressRange(region=1, address=0x4BB0, length=208)]
+    op2.biases = [NpuAddressRange(region=1, address=0x4A40, length=160)]
+    op2.padding = NpuPadding(top=0, left=0, right=0, bottom=0)
+    op2.block_config = NpuShape3D(height=1, width=8, depth=16)
+    arch = create_default_arch(Accelerator.Ethos_U55_128)
+    block_dep = calc_blockdep(arch, op1, op2)
+    assert block_dep == 2
+
+
+def test_calc_blockdep3():
+    """
+    Tests blockdep calculation, op2 consumes part of op1, op1 and op2 have different sizes.
+    There is no overlap between the last blocks of op1 and the first jobs of op2,
+    which results in blockdep 3
+    """
+    op1 = NpuConv2DOperation()
+    op1.ifm = create_feature_map(NpuShape3D(height=13, width=96, depth=1), 1, 0, layout=NpuLayout.NHWC,)
+    op1.ofm = create_feature_map(NpuShape3D(height=6, width=48, depth=8), 1, 0x7C80, layout=NpuLayout.NHCWB16,)
+    op1.kernel = NpuKernel(3, 3, stride_x=2, stride_y=2)
+    op1.weights = [NpuAddressRange(region=1, address=0x4AE0, length=144)]
+    op1.biases = [NpuAddressRange(region=1, address=0x49A0, length=80)]
+    op1.padding = NpuPadding(top=0, left=0, right=1, bottom=0)
+    op1.block_traversal = NpuBlockTraversal.PART_KERNEL_FIRST
+    op1.block_config = NpuShape3D(height=6, width=3, depth=8)
+    op2 = NpuConvDepthWiseOperation()
+    op2.ifm = create_feature_map(NpuShape3D(height=5, width=48, depth=8), 1, 0x7C80, layout=NpuLayout.NHCWB16,)
+    op2.ofm = create_feature_map(NpuShape3D(height=4, width=48, depth=8), 1, 0x4C80, layout=NpuLayout.NHCWB16,)
+    op2.kernel = NpuKernel(3, 3)
+    op2.weights = [NpuAddressRange(region=1, address=0x4BB0, length=112)]
+    op2.biases = [NpuAddressRange(region=1, address=0x4A40, length=80)]
+    op2.padding = NpuPadding(top=0, left=0, right=0, bottom=0)
+    op2.block_config = NpuShape3D(height=4, width=6, depth=8)
+    arch = create_default_arch(Accelerator.Ethos_U55_128)
+    block_dep = calc_blockdep(arch, op1, op2)
+    assert block_dep == 3