blob: db0485c58f070bf5febadfbc71b29d4793ff0e84 [file] [log] [blame]
Louis Verhaard024c3552021-03-17 14:26:34 +01001# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved.
Louis Verhaarde8a5a782020-11-02 18:04:27 +01002#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17# Description:
Louis Verhaardaeae5672020-11-02 18:04:27 +010018# Contains unit tests for npu_generate_register_command_stream API for an external consumer
Louis Verhaard024c3552021-03-17 14:26:34 +010019import pytest
20
Louis Verhaard933f55e2020-11-25 14:10:30 +010021from ethosu.vela.api import npu_find_block_configs
Louis Verhaardaeae5672020-11-02 18:04:27 +010022from ethosu.vela.api import npu_generate_register_command_stream
23from ethosu.vela.api import NpuAccelerator
Louis Verhaarde8a5a782020-11-02 18:04:27 +010024from ethosu.vela.api import NpuActivation
25from ethosu.vela.api import NpuActivationOp
26from ethosu.vela.api import NpuAddressRange
27from ethosu.vela.api import NpuBlockTraversal
28from ethosu.vela.api import NpuConv2DOperation
29from ethosu.vela.api import NpuConvDepthWiseOperation
30from ethosu.vela.api import NpuDataType
31from ethosu.vela.api import NpuDmaOperation
32from ethosu.vela.api import NpuElementWiseOp
33from ethosu.vela.api import NpuElementWiseOperation
34from ethosu.vela.api import NpuFeatureMap
35from ethosu.vela.api import NpuKernel
36from ethosu.vela.api import NpuLayout
37from ethosu.vela.api import NpuPadding
38from ethosu.vela.api import NpuPoolingOp
39from ethosu.vela.api import NpuPoolingOperation
40from ethosu.vela.api import NpuQuantization
41from ethosu.vela.api import NpuShape3D
42from ethosu.vela.api import NpuTileBox
Louis Verhaard024c3552021-03-17 14:26:34 +010043from ethosu.vela.architecture_features import Accelerator
44from ethosu.vela.architecture_features import create_default_arch
45from ethosu.vela.errors import VelaError
Louis Verhaarde8a5a782020-11-02 18:04:27 +010046from ethosu.vela.ethos_u55_regs.ethos_u55_regs import cmd0
47from ethosu.vela.ethos_u55_regs.ethos_u55_regs import cmd1
Louis Verhaard024c3552021-03-17 14:26:34 +010048from ethosu.vela.high_level_command_to_npu_op import BasePointerIndex
49from ethosu.vela.high_level_command_to_npu_op import get_mem_limits_for_regions
Louis Verhaarde8a5a782020-11-02 18:04:27 +010050from ethosu.vela.register_command_stream_generator import CmdMode
Louis Verhaard024c3552021-03-17 14:26:34 +010051from ethosu.vela.register_command_stream_generator import generate_command_stream
Louis Verhaard1e170182020-11-26 11:42:04 +010052from ethosu.vela.register_command_stream_util import get_address_ranges
Louis Verhaarde8a5a782020-11-02 18:04:27 +010053
54
55def check_cmd0(cmd_stream, cmd, param):
56 """Checks that the command stream contains the given command + parameter"""
57 param = int(param) & 0xFFFF
58 command = cmd.value | (param << 16)
59 assert command in cmd_stream, f"Not in command stream: {cmd} {param}"
60
61
62def check_cmd1(cmd_stream, cmd, offset, param=0x0):
63 """Checks that the command stream contains the given command + parameter"""
64 offset = int(offset) & 0xFFFFFFFFF
65 command = cmd.value | CmdMode.Payload32.value | (param << 16)
66 for i in range(len(cmd_stream) - 1):
67 if cmd_stream[i] == command and cmd_stream[i + 1] == offset:
68 return # found
69 assert False, f"Not in command stream: {cmd} {offset} {param}"
70
71
72def find_cmd0(cmd_stream, cmd) -> int:
73 """Returns parameter of the first command in the stream that matches the given command"""
74 for command in cmd_stream:
75 if (command & 0xFFFF) == cmd.value:
76 return (command >> 16) & 0xFFFF
77 assert False, f"Not in command stream: {cmd}"
78
79
80def create_feature_map(
81 shape: NpuShape3D,
82 region: int,
83 address: int,
84 dtype: NpuDataType = NpuDataType.UINT8,
85 layout: NpuLayout = NpuLayout.NHWC,
86 quant=NpuQuantization(scale_f32=1, zero_point=0),
87) -> NpuFeatureMap:
88 """Creates feature map using 1 tile"""
89 fm = NpuFeatureMap()
90 fm.data_type = dtype
91 fm.shape = shape
92 fm.tiles = NpuTileBox(
93 width_0=shape.width, height_0=shape.height, height_1=shape.height, addresses=[address, 0, 0, 0]
94 )
95 fm.region = region
96 fm.layout = layout
97 fm.quantization = quant
98 return fm
99
100
101def test_conv2d():
102 """Tests command stream generation for a conv2d operation"""
103 op = NpuConv2DOperation()
104 op.ifm = create_feature_map(
105 NpuShape3D(height=30, width=62, depth=46), 1, 512, quant=NpuQuantization(scale_f32=0.007843138, zero_point=128)
106 )
107 op.ofm = create_feature_map(
108 NpuShape3D(height=30, width=31, depth=46),
109 1,
110 0x14E40,
111 quant=NpuQuantization(scale_f32=0.20392157, zero_point=128),
112 )
113 op.kernel = NpuKernel(3, 2, 2, 1)
114 op.weights = [NpuAddressRange(region=0, address=0, length=7696)]
115 op.biases = [NpuAddressRange(region=0, address=32000, length=464)]
116 op.padding = NpuPadding(top=0, left=0, right=1, bottom=1)
117 op.block_traversal = NpuBlockTraversal.PART_KERNEL_FIRST
Louis Verhaard933f55e2020-11-25 14:10:30 +0100118 op.block_config = NpuShape3D(height=16, width=4, depth=16)
Louis Verhaardaeae5672020-11-02 18:04:27 +0100119 cmds = npu_generate_register_command_stream([op], NpuAccelerator.Ethos_U55_128)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100120 check_cmd0(cmds, cmd0.NPU_SET_IFM_REGION, 1)
121 check_cmd1(cmds, cmd1.NPU_SET_IFM_BASE0, 512)
122 check_cmd1(cmds, cmd1.NPU_SET_IFM_BASE1, 0)
123 check_cmd1(cmds, cmd1.NPU_SET_IFM_BASE2, 0)
124 check_cmd1(cmds, cmd1.NPU_SET_IFM_BASE3, 0)
125 check_cmd0(cmds, cmd0.NPU_SET_IFM_HEIGHT0_M1, 29)
126 check_cmd0(cmds, cmd0.NPU_SET_IFM_HEIGHT1_M1, 29)
127 check_cmd0(cmds, cmd0.NPU_SET_IFM_WIDTH0_M1, 61)
128 check_cmd0(cmds, cmd0.NPU_SET_IFM_DEPTH_M1, 45)
129 check_cmd1(cmds, cmd1.NPU_SET_IFM_STRIDE_C, 1)
130 check_cmd1(cmds, cmd1.NPU_SET_IFM_STRIDE_Y, 2852)
131 check_cmd1(cmds, cmd1.NPU_SET_IFM_STRIDE_X, 46)
132 check_cmd0(cmds, cmd0.NPU_SET_IFM_ZERO_POINT, 128)
133 check_cmd0(cmds, cmd0.NPU_SET_IFM_PRECISION, 0)
134 check_cmd0(cmds, cmd0.NPU_SET_IFM_UPSCALE, 0)
135 check_cmd0(cmds, cmd0.NPU_SET_IFM_PAD_TOP, 0)
136 check_cmd0(cmds, cmd0.NPU_SET_IFM_PAD_LEFT, 0)
137 check_cmd0(cmds, cmd0.NPU_SET_IFM_PAD_BOTTOM, 1)
138 check_cmd0(cmds, cmd0.NPU_SET_IFM_PAD_RIGHT, 1)
139 check_cmd0(cmds, cmd0.NPU_SET_OFM_REGION, 1)
140 check_cmd1(cmds, cmd1.NPU_SET_OFM_BASE0, 85568)
141 check_cmd1(cmds, cmd1.NPU_SET_OFM_BASE1, 0)
142 check_cmd1(cmds, cmd1.NPU_SET_OFM_BASE2, 0)
143 check_cmd1(cmds, cmd1.NPU_SET_OFM_BASE3, 0)
144 check_cmd0(cmds, cmd0.NPU_SET_OFM_HEIGHT0_M1, 29)
145 check_cmd0(cmds, cmd0.NPU_SET_OFM_HEIGHT1_M1, 29)
146 check_cmd0(cmds, cmd0.NPU_SET_OFM_WIDTH0_M1, 30)
147 check_cmd0(cmds, cmd0.NPU_SET_OFM_HEIGHT_M1, 29)
148 check_cmd0(cmds, cmd0.NPU_SET_OFM_WIDTH_M1, 30)
149 check_cmd0(cmds, cmd0.NPU_SET_OFM_DEPTH_M1, 45)
150 check_cmd1(cmds, cmd1.NPU_SET_OFM_STRIDE_C, 1)
151 check_cmd1(cmds, cmd1.NPU_SET_OFM_STRIDE_Y, 1426)
152 check_cmd1(cmds, cmd1.NPU_SET_OFM_STRIDE_X, 46)
153 check_cmd0(cmds, cmd0.NPU_SET_OFM_ZERO_POINT, 128)
154 check_cmd0(cmds, cmd0.NPU_SET_OFM_PRECISION, 0)
155 check_cmd0(cmds, cmd0.NPU_SET_KERNEL_HEIGHT_M1, 1)
156 check_cmd0(cmds, cmd0.NPU_SET_KERNEL_WIDTH_M1, 2)
157 check_cmd0(cmds, cmd0.NPU_SET_KERNEL_STRIDE, 5)
158 check_cmd0(cmds, cmd0.NPU_SET_WEIGHT_REGION, 0)
159 check_cmd1(cmds, cmd1.NPU_SET_WEIGHT_BASE, 0)
160 check_cmd1(cmds, cmd1.NPU_SET_WEIGHT_LENGTH, 7696)
161 check_cmd0(cmds, cmd0.NPU_SET_SCALE_REGION, 0)
162 check_cmd1(cmds, cmd1.NPU_SET_SCALE_BASE, 32000)
163 check_cmd1(cmds, cmd1.NPU_SET_SCALE_LENGTH, 464)
164 check_cmd0(cmds, cmd0.NPU_SET_ACTIVATION, 0)
165 check_cmd0(cmds, cmd0.NPU_SET_ACTIVATION_MIN, 0)
166 check_cmd0(cmds, cmd0.NPU_SET_ACTIVATION_MAX, 255)
167 check_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_HEIGHT_M1, 15)
168 check_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_WIDTH_M1, 3)
169 check_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_DEPTH_M1, 15)
170 check_cmd0(cmds, cmd0.NPU_SET_IFM_IB_END, 14)
171 check_cmd0(cmds, cmd0.NPU_SET_AB_START, 14)
172 check_cmd0(cmds, cmd0.NPU_SET_ACC_FORMAT, 0)
173 check_cmd0(cmds, cmd0.NPU_SET_BLOCKDEP, 0)
174 check_cmd0(cmds, cmd0.NPU_OP_CONV, 0)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100175
176
177def create_fully_connected_op() -> NpuConv2DOperation:
178 op = NpuConv2DOperation()
179 op.ifm = create_feature_map(
180 NpuShape3D(height=1, width=1, depth=114),
181 1,
182 0,
183 quant=NpuQuantization(scale_f32=0.007843138, zero_point=128),
184 layout=NpuLayout.NHCWB16,
185 )
186 op.ofm = create_feature_map(
187 NpuShape3D(height=1, width=1, depth=96),
188 1,
189 0x6A0,
190 quant=NpuQuantization(scale_f32=0.20392157, zero_point=128),
191 layout=NpuLayout.NHCWB16,
192 )
193 op.kernel = NpuKernel(1, 1)
194 op.weights = [NpuAddressRange(region=0, address=0x16880, length=13120)]
195 op.biases = [NpuAddressRange(region=0, address=0x19BC0, length=960)]
196 op.padding = NpuPadding(top=0, left=0, right=0, bottom=0)
197 op.block_traversal = NpuBlockTraversal.DEPTH_FIRST
Louis Verhaard933f55e2020-11-25 14:10:30 +0100198 op.block_config = NpuShape3D(height=2, width=4, depth=96)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100199 return op
200
201
202def test_fully_connected():
203 """Tests command stream generation for a fully connected operation"""
204 op = create_fully_connected_op()
Louis Verhaardaeae5672020-11-02 18:04:27 +0100205 cmds = npu_generate_register_command_stream([op], NpuAccelerator.Ethos_U55_128)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100206 check_cmd0(cmds, cmd0.NPU_OP_CONV, 0)
207 assert len(cmds) > 20
208
209
210def test_depthwise():
211 """Test depthwise operation, preceeded by DMA operation"""
212 weights_src = NpuAddressRange(region=0, address=0x40, length=96)
213 weights_dest = NpuAddressRange(region=1, address=0x10000, length=96)
214 dma_op = NpuDmaOperation(weights_src, weights_dest)
215 op = NpuConvDepthWiseOperation()
216 ifm_quant = NpuQuantization(scale_f32=0.007843138, zero_point=128)
217 op.ifm = create_feature_map(NpuShape3D(height=64, width=64, depth=8), 1, 0x0, quant=ifm_quant)
218 ofm_quant = NpuQuantization(scale_f32=0.062745101749897, zero_point=128)
219 op.ofm = create_feature_map(NpuShape3D(height=64, width=64, depth=8), 1, 0x8000, quant=ofm_quant)
220 op.kernel = NpuKernel(3, 3)
221 op.padding = NpuPadding(top=1, left=1, right=1, bottom=1)
222 op.weights = [weights_dest]
223 op.biases = [NpuAddressRange(region=0, address=0, length=80)]
Louis Verhaard933f55e2020-11-25 14:10:30 +0100224 op.block_config = NpuShape3D(height=8, width=12, depth=8)
Louis Verhaardaeae5672020-11-02 18:04:27 +0100225 cmds = npu_generate_register_command_stream([dma_op, op], NpuAccelerator.Ethos_U55_128)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100226 check_cmd0(cmds, cmd0.NPU_SET_DMA0_SRC_REGION, 0)
227 check_cmd1(cmds, cmd1.NPU_SET_DMA0_SRC, 0x40)
228 check_cmd0(cmds, cmd0.NPU_SET_DMA0_DST_REGION, 1)
229 check_cmd1(cmds, cmd1.NPU_SET_DMA0_DST, 0x10000)
230 check_cmd1(cmds, cmd1.NPU_SET_DMA0_LEN, 96)
231 check_cmd0(cmds, cmd0.NPU_OP_DMA_START, 0)
232 # A DMA WAIT should have been inserted
233 check_cmd0(cmds, cmd0.NPU_OP_DMA_WAIT, 0)
234 check_cmd0(cmds, cmd0.NPU_OP_DEPTHWISE, 0)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100235
236
237def test_mul_with_broadcast_and_relu():
238 """Test multiplication with broadcasted IFM2"""
239 op = NpuElementWiseOperation(NpuElementWiseOp.MUL)
240 op.ifm = create_feature_map(NpuShape3D(height=31, width=22, depth=31), 1, 0x20)
241 op.ifm2 = create_feature_map(NpuShape3D(height=1, width=22, depth=1), 1, 0)
242 op.ofm = create_feature_map(NpuShape3D(height=31, width=22, depth=31), 1, 0x52C0)
243 op.activation = NpuActivation(NpuActivationOp.NONE_OR_RELU)
244 op.activation.min = 0 # RELU
Louis Verhaard933f55e2020-11-25 14:10:30 +0100245 accelerator = NpuAccelerator.Ethos_U55_32
246 # Select a block config using npu_find_block_configs
247 op.block_config = npu_find_block_configs(op, accelerator)[0]
248 cmds = npu_generate_register_command_stream([op], accelerator)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100249 check_cmd1(cmds, cmd1.NPU_SET_OFM_SCALE, 1073741824, 30)
250 check_cmd0(cmds, cmd0.NPU_SET_IFM_REGION, 1)
251 check_cmd1(cmds, cmd1.NPU_SET_IFM_BASE0, 32)
252 check_cmd1(cmds, cmd1.NPU_SET_IFM_BASE1, 0)
253 check_cmd1(cmds, cmd1.NPU_SET_IFM_BASE2, 0)
254 check_cmd1(cmds, cmd1.NPU_SET_IFM_BASE3, 0)
255 check_cmd0(cmds, cmd0.NPU_SET_IFM_HEIGHT0_M1, 30)
256 check_cmd0(cmds, cmd0.NPU_SET_IFM_HEIGHT1_M1, 30)
257 check_cmd0(cmds, cmd0.NPU_SET_IFM_WIDTH0_M1, 21)
258 check_cmd0(cmds, cmd0.NPU_SET_IFM_DEPTH_M1, 30)
259 check_cmd1(cmds, cmd1.NPU_SET_IFM_STRIDE_C, 1)
260 check_cmd1(cmds, cmd1.NPU_SET_IFM_STRIDE_Y, 682)
261 check_cmd1(cmds, cmd1.NPU_SET_IFM_STRIDE_X, 31)
262 check_cmd0(cmds, cmd0.NPU_SET_IFM_ZERO_POINT, 0)
263 check_cmd0(cmds, cmd0.NPU_SET_IFM_PRECISION, 0)
264 check_cmd0(cmds, cmd0.NPU_SET_IFM_UPSCALE, 0)
265 check_cmd0(cmds, cmd0.NPU_SET_OFM_REGION, 1)
266 check_cmd1(cmds, cmd1.NPU_SET_OFM_BASE0, 21184)
267 check_cmd1(cmds, cmd1.NPU_SET_OFM_BASE1, 0)
268 check_cmd1(cmds, cmd1.NPU_SET_OFM_BASE2, 0)
269 check_cmd1(cmds, cmd1.NPU_SET_OFM_BASE3, 0)
270 check_cmd0(cmds, cmd0.NPU_SET_OFM_HEIGHT0_M1, 30)
271 check_cmd0(cmds, cmd0.NPU_SET_OFM_HEIGHT1_M1, 30)
272 check_cmd0(cmds, cmd0.NPU_SET_OFM_WIDTH0_M1, 21)
273 check_cmd0(cmds, cmd0.NPU_SET_OFM_HEIGHT_M1, 30)
274 check_cmd0(cmds, cmd0.NPU_SET_OFM_WIDTH_M1, 21)
275 check_cmd0(cmds, cmd0.NPU_SET_OFM_DEPTH_M1, 30)
276 check_cmd1(cmds, cmd1.NPU_SET_OFM_STRIDE_C, 1)
277 check_cmd1(cmds, cmd1.NPU_SET_OFM_STRIDE_Y, 682)
278 check_cmd1(cmds, cmd1.NPU_SET_OFM_STRIDE_X, 31)
279 check_cmd0(cmds, cmd0.NPU_SET_OFM_ZERO_POINT, 0)
280 check_cmd0(cmds, cmd0.NPU_SET_OFM_PRECISION, 256)
281 check_cmd0(cmds, cmd0.NPU_SET_ACTIVATION, 0)
282 check_cmd0(cmds, cmd0.NPU_SET_ACTIVATION_MIN, 0)
283 check_cmd0(cmds, cmd0.NPU_SET_ACTIVATION_MAX, 255)
284 check_cmd0(cmds, cmd0.NPU_SET_IFM2_REGION, 1)
285 check_cmd1(cmds, cmd1.NPU_SET_IFM2_BASE0, 0)
286 check_cmd1(cmds, cmd1.NPU_SET_IFM2_BASE1, 0)
287 check_cmd1(cmds, cmd1.NPU_SET_IFM2_BASE2, 0)
288 check_cmd1(cmds, cmd1.NPU_SET_IFM2_BASE3, 0)
289 check_cmd0(cmds, cmd0.NPU_SET_IFM2_HEIGHT0_M1, 0)
290 check_cmd0(cmds, cmd0.NPU_SET_IFM2_HEIGHT1_M1, 0)
291 check_cmd0(cmds, cmd0.NPU_SET_IFM2_WIDTH0_M1, 21)
292 check_cmd1(cmds, cmd1.NPU_SET_IFM2_STRIDE_C, 1)
293 check_cmd1(cmds, cmd1.NPU_SET_IFM2_STRIDE_Y, 22)
294 check_cmd1(cmds, cmd1.NPU_SET_IFM2_STRIDE_X, 1)
295 check_cmd0(cmds, cmd0.NPU_SET_IFM2_ZERO_POINT, 0)
296 check_cmd0(cmds, cmd0.NPU_SET_IFM2_PRECISION, 0)
297 check_cmd0(cmds, cmd0.NPU_SET_IFM2_BROADCAST, 5)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100298 check_cmd0(cmds, cmd0.NPU_SET_IFM_IB_END, 16)
299 check_cmd0(cmds, cmd0.NPU_SET_AB_START, 16)
300 check_cmd0(cmds, cmd0.NPU_SET_IFM2_IB_START, 9)
301 check_cmd0(cmds, cmd0.NPU_SET_ACC_FORMAT, 0)
302 check_cmd0(cmds, cmd0.NPU_SET_BLOCKDEP, 0)
303 check_cmd0(cmds, cmd0.NPU_OP_ELEMENTWISE, 0)
304 # Check that block width/height were generated that fit
305 blk_height = find_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_HEIGHT_M1)
306 blk_width = find_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_WIDTH_M1)
307 blk_depth = find_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_DEPTH_M1)
308 assert blk_height >= 0
309 assert blk_width >= 0
310 assert blk_depth >= 0
311 assert (blk_height + 1) * (blk_width + 1) + (blk_depth + 1) <= 3072
312
313
314def create_avg_pool_op() -> NpuPoolingOperation:
315 op = NpuPoolingOperation(NpuPoolingOp.AVERAGE)
316 op.ifm = create_feature_map(
317 NpuShape3D(height=29, width=30, depth=27), 2, 0, quant=NpuQuantization(scale_f32=0.007843138, zero_point=128)
318 )
319 op.ofm = create_feature_map(
320 NpuShape3D(height=10, width=10, depth=27),
321 2,
322 0x5BD0,
323 quant=NpuQuantization(scale_f32=0.20392157, zero_point=128),
324 )
325 op.kernel = NpuKernel(8, 2, 3, 3)
326 op.padding = NpuPadding(top=0, left=2, right=3, bottom=0)
Louis Verhaard933f55e2020-11-25 14:10:30 +0100327 # Select a block config
328 op.block_config = NpuShape3D(height=4, width=4, depth=16)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100329 return op
330
331
332def test_avg_pool():
333 """Tests average pool operation"""
334 op = create_avg_pool_op()
Louis Verhaardaeae5672020-11-02 18:04:27 +0100335 cmds = npu_generate_register_command_stream([op], NpuAccelerator.Ethos_U55_128)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100336 check_cmd0(cmds, cmd0.NPU_OP_POOL, 1)
337 assert len(cmds) > 10
338
339
340def test_two_operations():
341 """Tests code generation with 2 operations"""
342 op1 = create_fully_connected_op()
343 op2 = create_avg_pool_op()
Louis Verhaardaeae5672020-11-02 18:04:27 +0100344 cmds = npu_generate_register_command_stream([op1, op2], NpuAccelerator.Ethos_U55_64)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100345 check_cmd0(cmds, cmd0.NPU_OP_POOL, 1)
346 check_cmd0(cmds, cmd0.NPU_OP_CONV, 0)
347 check_cmd0(cmds, cmd0.NPU_SET_BLOCKDEP, 0)
348 # The operations are not dependent, so expect a blockdep 3
349 check_cmd0(cmds, cmd0.NPU_SET_BLOCKDEP, 3)
350 assert len(cmds) > 10
351
352
353def test_dma_op():
354 """Tests DMA operation followed by average pool. The DMA provides the contents of the average pool's IFM."""
355 pool_op = create_avg_pool_op()
356 assert pool_op.ofm is not None
357 dest = get_address_ranges(pool_op.ofm)[0]
358 assert dest is not None
359 src = NpuAddressRange(0, 0x24000, dest.length)
360 dma_op = NpuDmaOperation(src, dest)
Louis Verhaardaeae5672020-11-02 18:04:27 +0100361 cmds = npu_generate_register_command_stream([dma_op, pool_op], NpuAccelerator.Ethos_U55_64)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100362 check_cmd0(cmds, cmd0.NPU_OP_DMA_START, 0)
363 # A DMA WAIT should have been inserted
364 check_cmd0(cmds, cmd0.NPU_OP_DMA_WAIT, 0)
365 check_cmd0(cmds, cmd0.NPU_OP_POOL, 1)
Louis Verhaard024c3552021-03-17 14:26:34 +0100366
367
368def test_check_mem_limits():
369 # Tests that no code is generated with addresses out of bounds
370 conv_op = create_fully_connected_op()
371 # bias with end address out of range
372 conv_op.biases = [NpuAddressRange(region=0, address=(1 << 32) - 16, length=1000)]
373 with pytest.raises(VelaError):
374 npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U55_64)
375 # same test should pass with Ethos_U65_512
376 npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U65_512)
377 # weights with end address out of range
378 conv_op = create_fully_connected_op()
379 conv_op.weights = [NpuAddressRange(region=0, address=(1 << 48) - 960, length=1000)]
380 with pytest.raises(VelaError):
381 npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U65_256)
382 # bias with high end address, but still within range
383 conv_op = create_fully_connected_op()
384 conv_op.biases = [NpuAddressRange(region=0, address=(1 << 48) - 1024, length=1000)]
385 npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U65_512)
386 conv_op = create_fully_connected_op()
387 # weights with negative address
388 conv_op.weights = [NpuAddressRange(region=0, address=-16, length=1000)]
389 with pytest.raises(VelaError):
390 npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U55_32)
391 op = create_avg_pool_op()
392 # Tile 4's end address out of range
393 op.ifm.tiles = NpuTileBox(width_0=1, height_0=1, height_1=1, addresses=[0, 800, 4000, (1 << 32) - 16])
394 with pytest.raises(VelaError):
395 npu_generate_register_command_stream([op], NpuAccelerator.Ethos_U55_256)
396 op = create_avg_pool_op()
397 # IFM region out of range
398 op.ifm.region = 8
399 with pytest.raises(VelaError):
400 npu_generate_register_command_stream([op], NpuAccelerator.Ethos_U55_64)
401
402
403def test_check_sram_limit_spilling():
404 # Tests that no code is generated with addresses outside available sram spilling range
405 arch = create_default_arch(Accelerator.Ethos_U65_512)
406 assert arch.is_spilling_enabled()
407 op = create_avg_pool_op()
408 op.ifm.region = 0
409 # OFM in scratch fast memory
410 op.ofm.region = int(BasePointerIndex.ScratchFastTensor)
411 w, h = op.ofm.shape.width, op.ofm.shape.height
412 op.ofm.tiles = NpuTileBox(width_0=w, height_0=h, height_1=h, addresses=[32 * 1024, 0, 0, 0])
413 # 384K for spilling should fit
414 arch.sram_size = 384 * 1024
415 mem_limits = get_mem_limits_for_regions(arch)
416 generate_command_stream([op], arch, verbose=False, mem_limits=mem_limits)
417 # 32K for spilling does not fit, due to the OFM address
418 arch.sram_size = 32 * 1024
419 mem_limits = get_mem_limits_for_regions(arch)
420 with pytest.raises(VelaError):
421 generate_command_stream([op], arch, verbose=False, mem_limits=mem_limits)