blob: 96dbdb216ce0c37ab4e199633e84d9ccbd688a97 [file] [log] [blame]
Rickard Bolinbc6ee582022-11-04 08:24:29 +00001# SPDX-FileCopyrightText: Copyright 2020, 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
Louis Verhaarde8a5a782020-11-02 18:04:27 +01002#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17# Description:
18# Contains unit tests for register command stream generator
19from ethosu.vela.api import NpuAddressRange
Louis Verhaardd2665802020-11-20 13:08:55 +010020from ethosu.vela.api import NpuBlockTraversal
21from ethosu.vela.api import NpuConv2DOperation
22from ethosu.vela.api import NpuConvDepthWiseOperation
Louis Verhaarde8a5a782020-11-02 18:04:27 +010023from ethosu.vela.api import NpuDataType
Louis Verhaardd2665802020-11-20 13:08:55 +010024from ethosu.vela.api import NpuElementWiseOp
25from ethosu.vela.api import NpuElementWiseOperation
Louis Verhaarde8a5a782020-11-02 18:04:27 +010026from ethosu.vela.api import NpuFeatureMap
Louis Verhaardd2665802020-11-20 13:08:55 +010027from ethosu.vela.api import NpuKernel
Louis Verhaarde8a5a782020-11-02 18:04:27 +010028from ethosu.vela.api import NpuLayout
Louis Verhaardd2665802020-11-20 13:08:55 +010029from ethosu.vela.api import NpuPadding
Louis Verhaarde8a5a782020-11-02 18:04:27 +010030from ethosu.vela.api import NpuShape3D
31from ethosu.vela.api import NpuTileBox
Louis Verhaardd2665802020-11-20 13:08:55 +010032from ethosu.vela.architecture_features import Accelerator
33from ethosu.vela.architecture_features import create_default_arch
34from ethosu.vela.register_command_stream_generator import calc_blockdep
Louis Verhaarde8a5a782020-11-02 18:04:27 +010035from ethosu.vela.register_command_stream_generator import get_strides
Louis Verhaard1e170182020-11-26 11:42:04 +010036from ethosu.vela.register_command_stream_util import get_address_ranges
Louis Verhaardd2665802020-11-20 13:08:55 +010037from ethosu.vela.test.extapi.test_extapi_generate_commands import create_feature_map
Louis Verhaarde8a5a782020-11-02 18:04:27 +010038
39
40def test_get_fm_strides():
41 """Tests calculation of feature map strides"""
42 fm = NpuFeatureMap()
43 fm.layout = NpuLayout.NHCWB16
44 fm.data_type = NpuDataType.INT16
45 fm.shape = NpuShape3D(height=7, width=10, depth=24)
46 assert get_strides(fm) == NpuShape3D(height=640, width=32, depth=320)
47 fm.layout = NpuLayout.NHWC
48 assert get_strides(fm) == NpuShape3D(height=480, width=48, depth=2)
49 fm.data_type = NpuDataType.UINT8
50 assert get_strides(fm) == NpuShape3D(height=240, width=24, depth=1)
51
52
Louis Verhaardd2665802020-11-20 13:08:55 +010053# -------------------------------------------------------------------
54# ADDRESS TESTS
55# -------------------------------------------------------------------
56
57
Louis Verhaarde8a5a782020-11-02 18:04:27 +010058def test_get_address_ranges_one_tile():
59 """Tests calculation of feature map address ranges, with 1 tile used"""
60 fm = NpuFeatureMap()
61 fm.region = 4
62 fm.layout = NpuLayout.NHWC
63 fm.data_type = NpuDataType.INT16
64 fm.shape = NpuShape3D(height=50, width=40, depth=3)
65 fm.tiles = NpuTileBox(height_0=50, height_1=50, width_0=40, addresses=[8000, 0, 0, 0])
66 ranges = get_address_ranges(fm)
67 assert ranges == [NpuAddressRange(region=4, address=8000, length=12000), None, None, None]
68
69
70def test_get_address_ranges_horizontal_tiles():
71 """Tests calculation of feature map address ranges, with 2 horizontal tiles used"""
72 fm = NpuFeatureMap()
73 fm.region = 6
74 fm.layout = NpuLayout.NHWC
75 fm.data_type = NpuDataType.INT16
76 fm.shape = NpuShape3D(height=50, width=10, depth=20)
77 fm.tiles = NpuTileBox(height_0=20, height_1=30, width_0=10, addresses=[256, 0, 16000, 0])
78 ranges = get_address_ranges(fm)
79 assert ranges == [
80 NpuAddressRange(region=6, address=256, length=8000),
81 None,
82 NpuAddressRange(region=6, address=16000, length=12000),
83 None,
84 ]
85
86
87def test_get_address_ranges_vertical_tiles():
88 """Tests calculation of feature map address ranges, with 2 vertical tiles used"""
89 fm = NpuFeatureMap()
90 fm.region = 6
91 fm.layout = NpuLayout.NHWC
92 fm.data_type = NpuDataType.INT8
93 # Set strides explicitly
94 fm.shape = NpuShape3D(height=50, width=10, depth=20)
95 fm.strides = NpuShape3D(height=100, width=20, depth=1)
96 fm.tiles = NpuTileBox(height_0=50, height_1=50, width_0=5, addresses=[16, 32000, 0, 0])
97 ranges = get_address_ranges(fm)
98 assert ranges == [
99 NpuAddressRange(region=6, address=16, length=5000),
100 NpuAddressRange(region=6, address=32000, length=5000),
101 None,
102 None,
103 ]
104
105
106def test_get_address_ranges_4_tiles():
107 """Tests calculation of feature map address ranges, with 4 tiles used"""
108 fm = NpuFeatureMap()
109 fm.region = 6
110 fm.layout = NpuLayout.NHCWB16
111 fm.data_type = NpuDataType.INT16
112 fm.shape = NpuShape3D(height=50, width=10, depth=20)
113 fm.tiles = NpuTileBox(height_0=30, height_1=10, width_0=3, addresses=[16, 32000, 8000, 16000])
114 ranges = get_address_ranges(fm)
115 assert ranges == [
116 NpuAddressRange(region=6, address=16, length=18952),
117 NpuAddressRange(region=6, address=32000, length=6280),
118 NpuAddressRange(region=6, address=8000, length=12552),
Louis Verhaardd2665802020-11-20 13:08:55 +0100119 NpuAddressRange(region=6, address=16000, length=25480),
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100120 ]
Louis Verhaardd2665802020-11-20 13:08:55 +0100121
122
123# -------------------------------------------------------------------
124# BLOCKDEP TESTS
125# -------------------------------------------------------------------
126
127
128def test_calc_blockdep0():
129 """
130 Tests blockdep calculation, op1 that produces op2's IFM2.
131 op2 takes 1 block to complete, which results in blockdep 0
132 """
133 op1 = NpuElementWiseOperation(NpuElementWiseOp.CLZ)
Jonas Ohlssond8575072022-03-30 10:30:25 +0200134 op1.ifm = create_feature_map(
135 NpuShape3D(height=1, width=1, depth=1),
136 1,
137 0x60,
138 layout=NpuLayout.NHCWB16,
139 )
140 intermediate_fm = create_feature_map(
141 NpuShape3D(height=1, width=1, depth=1),
142 1,
143 0xA0,
144 layout=NpuLayout.NHCWB16,
145 )
Louis Verhaardd2665802020-11-20 13:08:55 +0100146 op1.ofm = intermediate_fm
147 op1.block_config = NpuShape3D(height=1, width=1, depth=4)
148 op2 = NpuElementWiseOperation(NpuElementWiseOp.SUB)
Jonas Ohlssond8575072022-03-30 10:30:25 +0200149 op2.ifm = create_feature_map(
150 NpuShape3D(height=1, width=1, depth=1),
151 1,
152 0x39AC0,
153 layout=NpuLayout.NHCWB16,
154 )
Louis Verhaardd2665802020-11-20 13:08:55 +0100155 op2.ifm2 = intermediate_fm
Jonas Ohlssond8575072022-03-30 10:30:25 +0200156 op2.ofm = create_feature_map(
157 NpuShape3D(height=1, width=1, depth=1),
158 1,
159 0xE0,
160 layout=NpuLayout.NHCWB16,
161 )
Louis Verhaardd2665802020-11-20 13:08:55 +0100162 op2.block_config = NpuShape3D(height=1, width=1, depth=4)
163 arch = create_default_arch(Accelerator.Ethos_U55_128)
164 block_dep = calc_blockdep(arch, op1, op2)
165 assert block_dep == 0
166
167
168def test_calc_blockdep2():
169 """
170 Tests blockdep calculation, op1 produces part of the input of op2,
171 op1 and op2 have different sizes.
172 op2 takes 3 blocks to complete, op1's last block collides with op2's last block
173 which results in blockdep 2
174 """
175 op1 = NpuConv2DOperation()
Jonas Ohlssond8575072022-03-30 10:30:25 +0200176 op1.ifm = create_feature_map(
177 NpuShape3D(height=4, width=48, depth=8),
178 1,
179 0x4C80,
180 layout=NpuLayout.NHCWB16,
181 )
182 op1.ofm = create_feature_map(
183 NpuShape3D(height=4, width=48, depth=16),
184 1,
185 0x6480,
186 layout=NpuLayout.NHCWB16,
187 )
Louis Verhaardd2665802020-11-20 13:08:55 +0100188 op1.kernel = NpuKernel(1, 1)
189 op1.weights = [NpuAddressRange(region=1, address=0x4AE0, length=208)]
190 op1.biases = [NpuAddressRange(region=1, address=0x49A0, length=160)]
191 op1.padding = NpuPadding(top=0, left=0, right=0, bottom=0)
192 op1.block_traversal = NpuBlockTraversal.PART_KERNEL_FIRST
193 op1.block_config = NpuShape3D(height=4, width=6, depth=16)
194 op2 = NpuConvDepthWiseOperation()
Jonas Ohlssond8575072022-03-30 10:30:25 +0200195 op2.ifm = create_feature_map(
196 NpuShape3D(height=3, width=48, depth=16),
197 1,
198 0,
199 layout=NpuLayout.NHCWB16,
200 )
Louis Verhaardd2665802020-11-20 13:08:55 +0100201 # op2 has two tiles, the lower tile is produced by op1
202 op2.ifm.tiles = NpuTileBox(height_0=2, height_1=2, width_0=48, addresses=[0x7680, 0, 0x6480, 0])
Jonas Ohlssond8575072022-03-30 10:30:25 +0200203 op2.ofm = create_feature_map(
204 NpuShape3D(height=1, width=24, depth=16),
205 1,
206 0x6480,
207 layout=NpuLayout.NHCWB16,
208 )
Louis Verhaardd2665802020-11-20 13:08:55 +0100209 op2.kernel = NpuKernel(3, 3, stride_x=2, stride_y=2)
210 op2.weights = [NpuAddressRange(region=1, address=0x4BB0, length=208)]
211 op2.biases = [NpuAddressRange(region=1, address=0x4A40, length=160)]
212 op2.padding = NpuPadding(top=0, left=0, right=0, bottom=0)
213 op2.block_config = NpuShape3D(height=1, width=8, depth=16)
214 arch = create_default_arch(Accelerator.Ethos_U55_128)
215 block_dep = calc_blockdep(arch, op1, op2)
216 assert block_dep == 2
217
218
219def test_calc_blockdep3():
220 """
221 Tests blockdep calculation, op2 consumes part of op1, op1 and op2 have different sizes.
222 There is no overlap between the last blocks of op1 and the first jobs of op2,
223 which results in blockdep 3
224 """
225 op1 = NpuConv2DOperation()
Jonas Ohlssond8575072022-03-30 10:30:25 +0200226 op1.ifm = create_feature_map(
227 NpuShape3D(height=13, width=96, depth=1),
228 1,
229 0,
230 layout=NpuLayout.NHWC,
231 )
232 op1.ofm = create_feature_map(
233 NpuShape3D(height=6, width=48, depth=8),
234 1,
235 0x7C80,
236 layout=NpuLayout.NHCWB16,
237 )
Louis Verhaardd2665802020-11-20 13:08:55 +0100238 op1.kernel = NpuKernel(3, 3, stride_x=2, stride_y=2)
239 op1.weights = [NpuAddressRange(region=1, address=0x4AE0, length=144)]
240 op1.biases = [NpuAddressRange(region=1, address=0x49A0, length=80)]
241 op1.padding = NpuPadding(top=0, left=0, right=1, bottom=0)
242 op1.block_traversal = NpuBlockTraversal.PART_KERNEL_FIRST
243 op1.block_config = NpuShape3D(height=6, width=3, depth=8)
244 op2 = NpuConvDepthWiseOperation()
Jonas Ohlssond8575072022-03-30 10:30:25 +0200245 op2.ifm = create_feature_map(
246 NpuShape3D(height=5, width=48, depth=8),
247 1,
248 0x7C80,
249 layout=NpuLayout.NHCWB16,
250 )
251 op2.ofm = create_feature_map(
252 NpuShape3D(height=4, width=48, depth=8),
253 1,
254 0x4C80,
255 layout=NpuLayout.NHCWB16,
256 )
Louis Verhaardd2665802020-11-20 13:08:55 +0100257 op2.kernel = NpuKernel(3, 3)
258 op2.weights = [NpuAddressRange(region=1, address=0x4BB0, length=112)]
259 op2.biases = [NpuAddressRange(region=1, address=0x4A40, length=80)]
260 op2.padding = NpuPadding(top=0, left=0, right=0, bottom=0)
261 op2.block_config = NpuShape3D(height=4, width=6, depth=8)
262 arch = create_default_arch(Accelerator.Ethos_U55_128)
263 block_dep = calc_blockdep(arch, op1, op2)
264 assert block_dep == 3