blob: 2760c860eee6f7b36a69aa90cb8267e03d4f3621 [file] [log] [blame]
Louis Verhaarde8a5a782020-11-02 18:04:27 +01001# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17# Description:
18# Contains unit tests for register command stream generator
19from ethosu.vela.api import NpuAddressRange
Louis Verhaardd2665802020-11-20 13:08:55 +010020from ethosu.vela.api import NpuBlockTraversal
21from ethosu.vela.api import NpuConv2DOperation
22from ethosu.vela.api import NpuConvDepthWiseOperation
Louis Verhaarde8a5a782020-11-02 18:04:27 +010023from ethosu.vela.api import NpuDataType
Louis Verhaardd2665802020-11-20 13:08:55 +010024from ethosu.vela.api import NpuElementWiseOp
25from ethosu.vela.api import NpuElementWiseOperation
Louis Verhaarde8a5a782020-11-02 18:04:27 +010026from ethosu.vela.api import NpuFeatureMap
Louis Verhaardd2665802020-11-20 13:08:55 +010027from ethosu.vela.api import NpuKernel
Louis Verhaarde8a5a782020-11-02 18:04:27 +010028from ethosu.vela.api import NpuLayout
Louis Verhaardd2665802020-11-20 13:08:55 +010029from ethosu.vela.api import NpuPadding
Louis Verhaarde8a5a782020-11-02 18:04:27 +010030from ethosu.vela.api import NpuShape3D
31from ethosu.vela.api import NpuTileBox
Louis Verhaardd2665802020-11-20 13:08:55 +010032from ethosu.vela.architecture_features import Accelerator
33from ethosu.vela.architecture_features import create_default_arch
34from ethosu.vela.register_command_stream_generator import calc_blockdep
Louis Verhaarde8a5a782020-11-02 18:04:27 +010035from ethosu.vela.register_command_stream_generator import get_address_ranges
36from ethosu.vela.register_command_stream_generator import get_strides
Louis Verhaardd2665802020-11-20 13:08:55 +010037from ethosu.vela.test.extapi.test_extapi_generate_commands import create_feature_map
Louis Verhaarde8a5a782020-11-02 18:04:27 +010038
39
40def test_get_fm_strides():
41 """Tests calculation of feature map strides"""
42 fm = NpuFeatureMap()
43 fm.layout = NpuLayout.NHCWB16
44 fm.data_type = NpuDataType.INT16
45 fm.shape = NpuShape3D(height=7, width=10, depth=24)
46 assert get_strides(fm) == NpuShape3D(height=640, width=32, depth=320)
47 fm.layout = NpuLayout.NHWC
48 assert get_strides(fm) == NpuShape3D(height=480, width=48, depth=2)
49 fm.data_type = NpuDataType.UINT8
50 assert get_strides(fm) == NpuShape3D(height=240, width=24, depth=1)
51
52
Louis Verhaardd2665802020-11-20 13:08:55 +010053# -------------------------------------------------------------------
54# ADDRESS TESTS
55# -------------------------------------------------------------------
56
57
Louis Verhaarde8a5a782020-11-02 18:04:27 +010058def test_get_address_ranges_one_tile():
59 """Tests calculation of feature map address ranges, with 1 tile used"""
60 fm = NpuFeatureMap()
61 fm.region = 4
62 fm.layout = NpuLayout.NHWC
63 fm.data_type = NpuDataType.INT16
64 fm.shape = NpuShape3D(height=50, width=40, depth=3)
65 fm.tiles = NpuTileBox(height_0=50, height_1=50, width_0=40, addresses=[8000, 0, 0, 0])
66 ranges = get_address_ranges(fm)
67 assert ranges == [NpuAddressRange(region=4, address=8000, length=12000), None, None, None]
68
69
70def test_get_address_ranges_horizontal_tiles():
71 """Tests calculation of feature map address ranges, with 2 horizontal tiles used"""
72 fm = NpuFeatureMap()
73 fm.region = 6
74 fm.layout = NpuLayout.NHWC
75 fm.data_type = NpuDataType.INT16
76 fm.shape = NpuShape3D(height=50, width=10, depth=20)
77 fm.tiles = NpuTileBox(height_0=20, height_1=30, width_0=10, addresses=[256, 0, 16000, 0])
78 ranges = get_address_ranges(fm)
79 assert ranges == [
80 NpuAddressRange(region=6, address=256, length=8000),
81 None,
82 NpuAddressRange(region=6, address=16000, length=12000),
83 None,
84 ]
85
86
87def test_get_address_ranges_vertical_tiles():
88 """Tests calculation of feature map address ranges, with 2 vertical tiles used"""
89 fm = NpuFeatureMap()
90 fm.region = 6
91 fm.layout = NpuLayout.NHWC
92 fm.data_type = NpuDataType.INT8
93 # Set strides explicitly
94 fm.shape = NpuShape3D(height=50, width=10, depth=20)
95 fm.strides = NpuShape3D(height=100, width=20, depth=1)
96 fm.tiles = NpuTileBox(height_0=50, height_1=50, width_0=5, addresses=[16, 32000, 0, 0])
97 ranges = get_address_ranges(fm)
98 assert ranges == [
99 NpuAddressRange(region=6, address=16, length=5000),
100 NpuAddressRange(region=6, address=32000, length=5000),
101 None,
102 None,
103 ]
104
105
106def test_get_address_ranges_4_tiles():
107 """Tests calculation of feature map address ranges, with 4 tiles used"""
108 fm = NpuFeatureMap()
109 fm.region = 6
110 fm.layout = NpuLayout.NHCWB16
111 fm.data_type = NpuDataType.INT16
112 fm.shape = NpuShape3D(height=50, width=10, depth=20)
113 fm.tiles = NpuTileBox(height_0=30, height_1=10, width_0=3, addresses=[16, 32000, 8000, 16000])
114 ranges = get_address_ranges(fm)
115 assert ranges == [
116 NpuAddressRange(region=6, address=16, length=18952),
117 NpuAddressRange(region=6, address=32000, length=6280),
118 NpuAddressRange(region=6, address=8000, length=12552),
Louis Verhaardd2665802020-11-20 13:08:55 +0100119 NpuAddressRange(region=6, address=16000, length=25480),
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100120 ]
Louis Verhaardd2665802020-11-20 13:08:55 +0100121
122
123# -------------------------------------------------------------------
124# BLOCKDEP TESTS
125# -------------------------------------------------------------------
126
127
128def test_calc_blockdep0():
129 """
130 Tests blockdep calculation, op1 that produces op2's IFM2.
131 op2 takes 1 block to complete, which results in blockdep 0
132 """
133 op1 = NpuElementWiseOperation(NpuElementWiseOp.CLZ)
134 op1.ifm = create_feature_map(NpuShape3D(height=1, width=1, depth=1), 1, 0x60, layout=NpuLayout.NHCWB16,)
135 intermediate_fm = create_feature_map(NpuShape3D(height=1, width=1, depth=1), 1, 0xA0, layout=NpuLayout.NHCWB16,)
136 op1.ofm = intermediate_fm
137 op1.block_config = NpuShape3D(height=1, width=1, depth=4)
138 op2 = NpuElementWiseOperation(NpuElementWiseOp.SUB)
139 op2.ifm = create_feature_map(NpuShape3D(height=1, width=1, depth=1), 1, 0x39AC0, layout=NpuLayout.NHCWB16,)
140 op2.ifm2 = intermediate_fm
141 op2.ofm = create_feature_map(NpuShape3D(height=1, width=1, depth=1), 1, 0xE0, layout=NpuLayout.NHCWB16,)
142 op2.block_config = NpuShape3D(height=1, width=1, depth=4)
143 arch = create_default_arch(Accelerator.Ethos_U55_128)
144 block_dep = calc_blockdep(arch, op1, op2)
145 assert block_dep == 0
146
147
148def test_calc_blockdep2():
149 """
150 Tests blockdep calculation, op1 produces part of the input of op2,
151 op1 and op2 have different sizes.
152 op2 takes 3 blocks to complete, op1's last block collides with op2's last block
153 which results in blockdep 2
154 """
155 op1 = NpuConv2DOperation()
156 op1.ifm = create_feature_map(NpuShape3D(height=4, width=48, depth=8), 1, 0x4C80, layout=NpuLayout.NHCWB16,)
157 op1.ofm = create_feature_map(NpuShape3D(height=4, width=48, depth=16), 1, 0x6480, layout=NpuLayout.NHCWB16,)
158 op1.kernel = NpuKernel(1, 1)
159 op1.weights = [NpuAddressRange(region=1, address=0x4AE0, length=208)]
160 op1.biases = [NpuAddressRange(region=1, address=0x49A0, length=160)]
161 op1.padding = NpuPadding(top=0, left=0, right=0, bottom=0)
162 op1.block_traversal = NpuBlockTraversal.PART_KERNEL_FIRST
163 op1.block_config = NpuShape3D(height=4, width=6, depth=16)
164 op2 = NpuConvDepthWiseOperation()
165 op2.ifm = create_feature_map(NpuShape3D(height=3, width=48, depth=16), 1, 0, layout=NpuLayout.NHCWB16,)
166 # op2 has two tiles, the lower tile is produced by op1
167 op2.ifm.tiles = NpuTileBox(height_0=2, height_1=2, width_0=48, addresses=[0x7680, 0, 0x6480, 0])
168 op2.ofm = create_feature_map(NpuShape3D(height=1, width=24, depth=16), 1, 0x6480, layout=NpuLayout.NHCWB16,)
169 op2.kernel = NpuKernel(3, 3, stride_x=2, stride_y=2)
170 op2.weights = [NpuAddressRange(region=1, address=0x4BB0, length=208)]
171 op2.biases = [NpuAddressRange(region=1, address=0x4A40, length=160)]
172 op2.padding = NpuPadding(top=0, left=0, right=0, bottom=0)
173 op2.block_config = NpuShape3D(height=1, width=8, depth=16)
174 arch = create_default_arch(Accelerator.Ethos_U55_128)
175 block_dep = calc_blockdep(arch, op1, op2)
176 assert block_dep == 2
177
178
179def test_calc_blockdep3():
180 """
181 Tests blockdep calculation, op2 consumes part of op1, op1 and op2 have different sizes.
182 There is no overlap between the last blocks of op1 and the first jobs of op2,
183 which results in blockdep 3
184 """
185 op1 = NpuConv2DOperation()
186 op1.ifm = create_feature_map(NpuShape3D(height=13, width=96, depth=1), 1, 0, layout=NpuLayout.NHWC,)
187 op1.ofm = create_feature_map(NpuShape3D(height=6, width=48, depth=8), 1, 0x7C80, layout=NpuLayout.NHCWB16,)
188 op1.kernel = NpuKernel(3, 3, stride_x=2, stride_y=2)
189 op1.weights = [NpuAddressRange(region=1, address=0x4AE0, length=144)]
190 op1.biases = [NpuAddressRange(region=1, address=0x49A0, length=80)]
191 op1.padding = NpuPadding(top=0, left=0, right=1, bottom=0)
192 op1.block_traversal = NpuBlockTraversal.PART_KERNEL_FIRST
193 op1.block_config = NpuShape3D(height=6, width=3, depth=8)
194 op2 = NpuConvDepthWiseOperation()
195 op2.ifm = create_feature_map(NpuShape3D(height=5, width=48, depth=8), 1, 0x7C80, layout=NpuLayout.NHCWB16,)
196 op2.ofm = create_feature_map(NpuShape3D(height=4, width=48, depth=8), 1, 0x4C80, layout=NpuLayout.NHCWB16,)
197 op2.kernel = NpuKernel(3, 3)
198 op2.weights = [NpuAddressRange(region=1, address=0x4BB0, length=112)]
199 op2.biases = [NpuAddressRange(region=1, address=0x4A40, length=80)]
200 op2.padding = NpuPadding(top=0, left=0, right=0, bottom=0)
201 op2.block_config = NpuShape3D(height=4, width=6, depth=8)
202 arch = create_default_arch(Accelerator.Ethos_U55_128)
203 block_dep = calc_blockdep(arch, op1, op2)
204 assert block_dep == 3