blob: 7d80b4751d8defc5bd1dcc00228878bc90a54cbe [file] [log] [blame]
Rickard Bolinbc6ee582022-11-04 08:24:29 +00001# SPDX-FileCopyrightText: Copyright 2020-2021 Arm Limited and/or its affiliates <open-source-office@arm.com>
Louis Verhaarde8a5a782020-11-02 18:04:27 +01002#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17# Description:
Louis Verhaardaeae5672020-11-02 18:04:27 +010018# Contains unit tests for npu_generate_register_command_stream API for an external consumer
Louis Verhaard024c3552021-03-17 14:26:34 +010019import pytest
20
Louis Verhaard933f55e2020-11-25 14:10:30 +010021from ethosu.vela.api import npu_find_block_configs
Louis Verhaardaeae5672020-11-02 18:04:27 +010022from ethosu.vela.api import npu_generate_register_command_stream
23from ethosu.vela.api import NpuAccelerator
Louis Verhaarde8a5a782020-11-02 18:04:27 +010024from ethosu.vela.api import NpuActivation
25from ethosu.vela.api import NpuActivationOp
26from ethosu.vela.api import NpuAddressRange
27from ethosu.vela.api import NpuBlockTraversal
28from ethosu.vela.api import NpuConv2DOperation
29from ethosu.vela.api import NpuConvDepthWiseOperation
30from ethosu.vela.api import NpuDataType
31from ethosu.vela.api import NpuDmaOperation
32from ethosu.vela.api import NpuElementWiseOp
33from ethosu.vela.api import NpuElementWiseOperation
34from ethosu.vela.api import NpuFeatureMap
35from ethosu.vela.api import NpuKernel
36from ethosu.vela.api import NpuLayout
37from ethosu.vela.api import NpuPadding
38from ethosu.vela.api import NpuPoolingOp
39from ethosu.vela.api import NpuPoolingOperation
40from ethosu.vela.api import NpuQuantization
41from ethosu.vela.api import NpuShape3D
42from ethosu.vela.api import NpuTileBox
Louis Verhaard024c3552021-03-17 14:26:34 +010043from ethosu.vela.architecture_features import Accelerator
44from ethosu.vela.architecture_features import create_default_arch
45from ethosu.vela.errors import VelaError
Louis Verhaarde8a5a782020-11-02 18:04:27 +010046from ethosu.vela.ethos_u55_regs.ethos_u55_regs import cmd0
47from ethosu.vela.ethos_u55_regs.ethos_u55_regs import cmd1
Louis Verhaard024c3552021-03-17 14:26:34 +010048from ethosu.vela.high_level_command_to_npu_op import BasePointerIndex
49from ethosu.vela.high_level_command_to_npu_op import get_mem_limits_for_regions
Louis Verhaarde8a5a782020-11-02 18:04:27 +010050from ethosu.vela.register_command_stream_generator import CmdMode
Louis Verhaard024c3552021-03-17 14:26:34 +010051from ethosu.vela.register_command_stream_generator import generate_command_stream
Louis Verhaard1e170182020-11-26 11:42:04 +010052from ethosu.vela.register_command_stream_util import get_address_ranges
Louis Verhaarde8a5a782020-11-02 18:04:27 +010053
54
55def check_cmd0(cmd_stream, cmd, param):
56 """Checks that the command stream contains the given command + parameter"""
57 param = int(param) & 0xFFFF
58 command = cmd.value | (param << 16)
59 assert command in cmd_stream, f"Not in command stream: {cmd} {param}"
60
61
62def check_cmd1(cmd_stream, cmd, offset, param=0x0):
63 """Checks that the command stream contains the given command + parameter"""
Louis Verhaard893780c2021-03-30 09:02:30 +020064 offset = int(offset) & 0xFFFFFFFF
Louis Verhaarde8a5a782020-11-02 18:04:27 +010065 command = cmd.value | CmdMode.Payload32.value | (param << 16)
66 for i in range(len(cmd_stream) - 1):
67 if cmd_stream[i] == command and cmd_stream[i + 1] == offset:
68 return # found
69 assert False, f"Not in command stream: {cmd} {offset} {param}"
70
71
72def find_cmd0(cmd_stream, cmd) -> int:
73 """Returns parameter of the first command in the stream that matches the given command"""
74 for command in cmd_stream:
75 if (command & 0xFFFF) == cmd.value:
76 return (command >> 16) & 0xFFFF
77 assert False, f"Not in command stream: {cmd}"
78
79
80def create_feature_map(
81 shape: NpuShape3D,
82 region: int,
83 address: int,
84 dtype: NpuDataType = NpuDataType.UINT8,
85 layout: NpuLayout = NpuLayout.NHWC,
86 quant=NpuQuantization(scale_f32=1, zero_point=0),
87) -> NpuFeatureMap:
88 """Creates feature map using 1 tile"""
89 fm = NpuFeatureMap()
90 fm.data_type = dtype
91 fm.shape = shape
92 fm.tiles = NpuTileBox(
93 width_0=shape.width, height_0=shape.height, height_1=shape.height, addresses=[address, 0, 0, 0]
94 )
95 fm.region = region
96 fm.layout = layout
97 fm.quantization = quant
98 return fm
99
100
101def test_conv2d():
102 """Tests command stream generation for a conv2d operation"""
103 op = NpuConv2DOperation()
104 op.ifm = create_feature_map(
105 NpuShape3D(height=30, width=62, depth=46), 1, 512, quant=NpuQuantization(scale_f32=0.007843138, zero_point=128)
106 )
107 op.ofm = create_feature_map(
108 NpuShape3D(height=30, width=31, depth=46),
109 1,
110 0x14E40,
111 quant=NpuQuantization(scale_f32=0.20392157, zero_point=128),
112 )
113 op.kernel = NpuKernel(3, 2, 2, 1)
114 op.weights = [NpuAddressRange(region=0, address=0, length=7696)]
115 op.biases = [NpuAddressRange(region=0, address=32000, length=464)]
116 op.padding = NpuPadding(top=0, left=0, right=1, bottom=1)
117 op.block_traversal = NpuBlockTraversal.PART_KERNEL_FIRST
Louis Verhaard933f55e2020-11-25 14:10:30 +0100118 op.block_config = NpuShape3D(height=16, width=4, depth=16)
Louis Verhaardaeae5672020-11-02 18:04:27 +0100119 cmds = npu_generate_register_command_stream([op], NpuAccelerator.Ethos_U55_128)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100120 check_cmd0(cmds, cmd0.NPU_SET_IFM_REGION, 1)
121 check_cmd1(cmds, cmd1.NPU_SET_IFM_BASE0, 512)
122 check_cmd1(cmds, cmd1.NPU_SET_IFM_BASE1, 0)
123 check_cmd1(cmds, cmd1.NPU_SET_IFM_BASE2, 0)
124 check_cmd1(cmds, cmd1.NPU_SET_IFM_BASE3, 0)
125 check_cmd0(cmds, cmd0.NPU_SET_IFM_HEIGHT0_M1, 29)
126 check_cmd0(cmds, cmd0.NPU_SET_IFM_HEIGHT1_M1, 29)
127 check_cmd0(cmds, cmd0.NPU_SET_IFM_WIDTH0_M1, 61)
128 check_cmd0(cmds, cmd0.NPU_SET_IFM_DEPTH_M1, 45)
129 check_cmd1(cmds, cmd1.NPU_SET_IFM_STRIDE_C, 1)
130 check_cmd1(cmds, cmd1.NPU_SET_IFM_STRIDE_Y, 2852)
131 check_cmd1(cmds, cmd1.NPU_SET_IFM_STRIDE_X, 46)
132 check_cmd0(cmds, cmd0.NPU_SET_IFM_ZERO_POINT, 128)
133 check_cmd0(cmds, cmd0.NPU_SET_IFM_PRECISION, 0)
134 check_cmd0(cmds, cmd0.NPU_SET_IFM_UPSCALE, 0)
135 check_cmd0(cmds, cmd0.NPU_SET_IFM_PAD_TOP, 0)
136 check_cmd0(cmds, cmd0.NPU_SET_IFM_PAD_LEFT, 0)
137 check_cmd0(cmds, cmd0.NPU_SET_IFM_PAD_BOTTOM, 1)
138 check_cmd0(cmds, cmd0.NPU_SET_IFM_PAD_RIGHT, 1)
139 check_cmd0(cmds, cmd0.NPU_SET_OFM_REGION, 1)
140 check_cmd1(cmds, cmd1.NPU_SET_OFM_BASE0, 85568)
141 check_cmd1(cmds, cmd1.NPU_SET_OFM_BASE1, 0)
142 check_cmd1(cmds, cmd1.NPU_SET_OFM_BASE2, 0)
143 check_cmd1(cmds, cmd1.NPU_SET_OFM_BASE3, 0)
144 check_cmd0(cmds, cmd0.NPU_SET_OFM_HEIGHT0_M1, 29)
145 check_cmd0(cmds, cmd0.NPU_SET_OFM_HEIGHT1_M1, 29)
146 check_cmd0(cmds, cmd0.NPU_SET_OFM_WIDTH0_M1, 30)
147 check_cmd0(cmds, cmd0.NPU_SET_OFM_HEIGHT_M1, 29)
148 check_cmd0(cmds, cmd0.NPU_SET_OFM_WIDTH_M1, 30)
149 check_cmd0(cmds, cmd0.NPU_SET_OFM_DEPTH_M1, 45)
150 check_cmd1(cmds, cmd1.NPU_SET_OFM_STRIDE_C, 1)
151 check_cmd1(cmds, cmd1.NPU_SET_OFM_STRIDE_Y, 1426)
152 check_cmd1(cmds, cmd1.NPU_SET_OFM_STRIDE_X, 46)
153 check_cmd0(cmds, cmd0.NPU_SET_OFM_ZERO_POINT, 128)
154 check_cmd0(cmds, cmd0.NPU_SET_OFM_PRECISION, 0)
155 check_cmd0(cmds, cmd0.NPU_SET_KERNEL_HEIGHT_M1, 1)
156 check_cmd0(cmds, cmd0.NPU_SET_KERNEL_WIDTH_M1, 2)
157 check_cmd0(cmds, cmd0.NPU_SET_KERNEL_STRIDE, 5)
158 check_cmd0(cmds, cmd0.NPU_SET_WEIGHT_REGION, 0)
159 check_cmd1(cmds, cmd1.NPU_SET_WEIGHT_BASE, 0)
160 check_cmd1(cmds, cmd1.NPU_SET_WEIGHT_LENGTH, 7696)
161 check_cmd0(cmds, cmd0.NPU_SET_SCALE_REGION, 0)
162 check_cmd1(cmds, cmd1.NPU_SET_SCALE_BASE, 32000)
163 check_cmd1(cmds, cmd1.NPU_SET_SCALE_LENGTH, 464)
164 check_cmd0(cmds, cmd0.NPU_SET_ACTIVATION, 0)
165 check_cmd0(cmds, cmd0.NPU_SET_ACTIVATION_MIN, 0)
166 check_cmd0(cmds, cmd0.NPU_SET_ACTIVATION_MAX, 255)
167 check_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_HEIGHT_M1, 15)
168 check_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_WIDTH_M1, 3)
169 check_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_DEPTH_M1, 15)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100170 check_cmd0(cmds, cmd0.NPU_SET_ACC_FORMAT, 0)
171 check_cmd0(cmds, cmd0.NPU_SET_BLOCKDEP, 0)
172 check_cmd0(cmds, cmd0.NPU_OP_CONV, 0)
Tim Halld8339a72021-05-27 18:49:40 +0100173 ib_end = find_cmd0(cmds, cmd0.NPU_SET_IFM_IB_END)
174 ab_start = find_cmd0(cmds, cmd0.NPU_SET_AB_START)
175 assert ib_end > 0
176 assert ib_end <= ab_start
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100177
178
179def create_fully_connected_op() -> NpuConv2DOperation:
180 op = NpuConv2DOperation()
181 op.ifm = create_feature_map(
182 NpuShape3D(height=1, width=1, depth=114),
183 1,
184 0,
185 quant=NpuQuantization(scale_f32=0.007843138, zero_point=128),
186 layout=NpuLayout.NHCWB16,
187 )
188 op.ofm = create_feature_map(
189 NpuShape3D(height=1, width=1, depth=96),
190 1,
191 0x6A0,
192 quant=NpuQuantization(scale_f32=0.20392157, zero_point=128),
193 layout=NpuLayout.NHCWB16,
194 )
195 op.kernel = NpuKernel(1, 1)
196 op.weights = [NpuAddressRange(region=0, address=0x16880, length=13120)]
197 op.biases = [NpuAddressRange(region=0, address=0x19BC0, length=960)]
198 op.padding = NpuPadding(top=0, left=0, right=0, bottom=0)
199 op.block_traversal = NpuBlockTraversal.DEPTH_FIRST
Louis Verhaard933f55e2020-11-25 14:10:30 +0100200 op.block_config = NpuShape3D(height=2, width=4, depth=96)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100201 return op
202
203
204def test_fully_connected():
205 """Tests command stream generation for a fully connected operation"""
206 op = create_fully_connected_op()
Louis Verhaardaeae5672020-11-02 18:04:27 +0100207 cmds = npu_generate_register_command_stream([op], NpuAccelerator.Ethos_U55_128)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100208 check_cmd0(cmds, cmd0.NPU_OP_CONV, 0)
209 assert len(cmds) > 20
210
211
212def test_depthwise():
213 """Test depthwise operation, preceeded by DMA operation"""
214 weights_src = NpuAddressRange(region=0, address=0x40, length=96)
215 weights_dest = NpuAddressRange(region=1, address=0x10000, length=96)
216 dma_op = NpuDmaOperation(weights_src, weights_dest)
217 op = NpuConvDepthWiseOperation()
218 ifm_quant = NpuQuantization(scale_f32=0.007843138, zero_point=128)
219 op.ifm = create_feature_map(NpuShape3D(height=64, width=64, depth=8), 1, 0x0, quant=ifm_quant)
220 ofm_quant = NpuQuantization(scale_f32=0.062745101749897, zero_point=128)
221 op.ofm = create_feature_map(NpuShape3D(height=64, width=64, depth=8), 1, 0x8000, quant=ofm_quant)
222 op.kernel = NpuKernel(3, 3)
223 op.padding = NpuPadding(top=1, left=1, right=1, bottom=1)
224 op.weights = [weights_dest]
225 op.biases = [NpuAddressRange(region=0, address=0, length=80)]
Louis Verhaard933f55e2020-11-25 14:10:30 +0100226 op.block_config = NpuShape3D(height=8, width=12, depth=8)
Louis Verhaardaeae5672020-11-02 18:04:27 +0100227 cmds = npu_generate_register_command_stream([dma_op, op], NpuAccelerator.Ethos_U55_128)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100228 check_cmd0(cmds, cmd0.NPU_SET_DMA0_SRC_REGION, 0)
229 check_cmd1(cmds, cmd1.NPU_SET_DMA0_SRC, 0x40)
230 check_cmd0(cmds, cmd0.NPU_SET_DMA0_DST_REGION, 1)
231 check_cmd1(cmds, cmd1.NPU_SET_DMA0_DST, 0x10000)
232 check_cmd1(cmds, cmd1.NPU_SET_DMA0_LEN, 96)
233 check_cmd0(cmds, cmd0.NPU_OP_DMA_START, 0)
234 # A DMA WAIT should have been inserted
235 check_cmd0(cmds, cmd0.NPU_OP_DMA_WAIT, 0)
236 check_cmd0(cmds, cmd0.NPU_OP_DEPTHWISE, 0)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100237
238
239def test_mul_with_broadcast_and_relu():
240 """Test multiplication with broadcasted IFM2"""
241 op = NpuElementWiseOperation(NpuElementWiseOp.MUL)
242 op.ifm = create_feature_map(NpuShape3D(height=31, width=22, depth=31), 1, 0x20)
243 op.ifm2 = create_feature_map(NpuShape3D(height=1, width=22, depth=1), 1, 0)
244 op.ofm = create_feature_map(NpuShape3D(height=31, width=22, depth=31), 1, 0x52C0)
245 op.activation = NpuActivation(NpuActivationOp.NONE_OR_RELU)
246 op.activation.min = 0 # RELU
Louis Verhaard933f55e2020-11-25 14:10:30 +0100247 accelerator = NpuAccelerator.Ethos_U55_32
248 # Select a block config using npu_find_block_configs
249 op.block_config = npu_find_block_configs(op, accelerator)[0]
250 cmds = npu_generate_register_command_stream([op], accelerator)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100251 check_cmd1(cmds, cmd1.NPU_SET_OFM_SCALE, 1073741824, 30)
252 check_cmd0(cmds, cmd0.NPU_SET_IFM_REGION, 1)
253 check_cmd1(cmds, cmd1.NPU_SET_IFM_BASE0, 32)
254 check_cmd1(cmds, cmd1.NPU_SET_IFM_BASE1, 0)
255 check_cmd1(cmds, cmd1.NPU_SET_IFM_BASE2, 0)
256 check_cmd1(cmds, cmd1.NPU_SET_IFM_BASE3, 0)
257 check_cmd0(cmds, cmd0.NPU_SET_IFM_HEIGHT0_M1, 30)
258 check_cmd0(cmds, cmd0.NPU_SET_IFM_HEIGHT1_M1, 30)
259 check_cmd0(cmds, cmd0.NPU_SET_IFM_WIDTH0_M1, 21)
260 check_cmd0(cmds, cmd0.NPU_SET_IFM_DEPTH_M1, 30)
261 check_cmd1(cmds, cmd1.NPU_SET_IFM_STRIDE_C, 1)
262 check_cmd1(cmds, cmd1.NPU_SET_IFM_STRIDE_Y, 682)
263 check_cmd1(cmds, cmd1.NPU_SET_IFM_STRIDE_X, 31)
264 check_cmd0(cmds, cmd0.NPU_SET_IFM_ZERO_POINT, 0)
265 check_cmd0(cmds, cmd0.NPU_SET_IFM_PRECISION, 0)
266 check_cmd0(cmds, cmd0.NPU_SET_IFM_UPSCALE, 0)
267 check_cmd0(cmds, cmd0.NPU_SET_OFM_REGION, 1)
268 check_cmd1(cmds, cmd1.NPU_SET_OFM_BASE0, 21184)
269 check_cmd1(cmds, cmd1.NPU_SET_OFM_BASE1, 0)
270 check_cmd1(cmds, cmd1.NPU_SET_OFM_BASE2, 0)
271 check_cmd1(cmds, cmd1.NPU_SET_OFM_BASE3, 0)
272 check_cmd0(cmds, cmd0.NPU_SET_OFM_HEIGHT0_M1, 30)
273 check_cmd0(cmds, cmd0.NPU_SET_OFM_HEIGHT1_M1, 30)
274 check_cmd0(cmds, cmd0.NPU_SET_OFM_WIDTH0_M1, 21)
275 check_cmd0(cmds, cmd0.NPU_SET_OFM_HEIGHT_M1, 30)
276 check_cmd0(cmds, cmd0.NPU_SET_OFM_WIDTH_M1, 21)
277 check_cmd0(cmds, cmd0.NPU_SET_OFM_DEPTH_M1, 30)
278 check_cmd1(cmds, cmd1.NPU_SET_OFM_STRIDE_C, 1)
279 check_cmd1(cmds, cmd1.NPU_SET_OFM_STRIDE_Y, 682)
280 check_cmd1(cmds, cmd1.NPU_SET_OFM_STRIDE_X, 31)
281 check_cmd0(cmds, cmd0.NPU_SET_OFM_ZERO_POINT, 0)
282 check_cmd0(cmds, cmd0.NPU_SET_OFM_PRECISION, 256)
283 check_cmd0(cmds, cmd0.NPU_SET_ACTIVATION, 0)
284 check_cmd0(cmds, cmd0.NPU_SET_ACTIVATION_MIN, 0)
285 check_cmd0(cmds, cmd0.NPU_SET_ACTIVATION_MAX, 255)
286 check_cmd0(cmds, cmd0.NPU_SET_IFM2_REGION, 1)
287 check_cmd1(cmds, cmd1.NPU_SET_IFM2_BASE0, 0)
288 check_cmd1(cmds, cmd1.NPU_SET_IFM2_BASE1, 0)
289 check_cmd1(cmds, cmd1.NPU_SET_IFM2_BASE2, 0)
290 check_cmd1(cmds, cmd1.NPU_SET_IFM2_BASE3, 0)
291 check_cmd0(cmds, cmd0.NPU_SET_IFM2_HEIGHT0_M1, 0)
292 check_cmd0(cmds, cmd0.NPU_SET_IFM2_HEIGHT1_M1, 0)
293 check_cmd0(cmds, cmd0.NPU_SET_IFM2_WIDTH0_M1, 21)
294 check_cmd1(cmds, cmd1.NPU_SET_IFM2_STRIDE_C, 1)
295 check_cmd1(cmds, cmd1.NPU_SET_IFM2_STRIDE_Y, 22)
296 check_cmd1(cmds, cmd1.NPU_SET_IFM2_STRIDE_X, 1)
297 check_cmd0(cmds, cmd0.NPU_SET_IFM2_ZERO_POINT, 0)
298 check_cmd0(cmds, cmd0.NPU_SET_IFM2_PRECISION, 0)
299 check_cmd0(cmds, cmd0.NPU_SET_IFM2_BROADCAST, 5)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100300 check_cmd0(cmds, cmd0.NPU_SET_IFM_IB_END, 16)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100301 check_cmd0(cmds, cmd0.NPU_SET_ACC_FORMAT, 0)
302 check_cmd0(cmds, cmd0.NPU_SET_BLOCKDEP, 0)
303 check_cmd0(cmds, cmd0.NPU_OP_ELEMENTWISE, 0)
Tim Halld8339a72021-05-27 18:49:40 +0100304 ab_start = find_cmd0(cmds, cmd0.NPU_SET_AB_START)
305 assert ab_start > 0
306 ifm2_ib_start = find_cmd0(cmds, cmd0.NPU_SET_IFM2_IB_START)
307 assert 0 < ifm2_ib_start < ab_start
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100308 # Check that block width/height were generated that fit
309 blk_height = find_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_HEIGHT_M1)
310 blk_width = find_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_WIDTH_M1)
311 blk_depth = find_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_DEPTH_M1)
312 assert blk_height >= 0
313 assert blk_width >= 0
314 assert blk_depth >= 0
315 assert (blk_height + 1) * (blk_width + 1) + (blk_depth + 1) <= 3072
316
317
318def create_avg_pool_op() -> NpuPoolingOperation:
319 op = NpuPoolingOperation(NpuPoolingOp.AVERAGE)
320 op.ifm = create_feature_map(
321 NpuShape3D(height=29, width=30, depth=27), 2, 0, quant=NpuQuantization(scale_f32=0.007843138, zero_point=128)
322 )
323 op.ofm = create_feature_map(
324 NpuShape3D(height=10, width=10, depth=27),
325 2,
326 0x5BD0,
327 quant=NpuQuantization(scale_f32=0.20392157, zero_point=128),
328 )
329 op.kernel = NpuKernel(8, 2, 3, 3)
330 op.padding = NpuPadding(top=0, left=2, right=3, bottom=0)
Louis Verhaard933f55e2020-11-25 14:10:30 +0100331 # Select a block config
332 op.block_config = NpuShape3D(height=4, width=4, depth=16)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100333 return op
334
335
336def test_avg_pool():
337 """Tests average pool operation"""
338 op = create_avg_pool_op()
Louis Verhaardaeae5672020-11-02 18:04:27 +0100339 cmds = npu_generate_register_command_stream([op], NpuAccelerator.Ethos_U55_128)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100340 check_cmd0(cmds, cmd0.NPU_OP_POOL, 1)
341 assert len(cmds) > 10
342
343
344def test_two_operations():
345 """Tests code generation with 2 operations"""
346 op1 = create_fully_connected_op()
347 op2 = create_avg_pool_op()
Louis Verhaardaeae5672020-11-02 18:04:27 +0100348 cmds = npu_generate_register_command_stream([op1, op2], NpuAccelerator.Ethos_U55_64)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100349 check_cmd0(cmds, cmd0.NPU_OP_POOL, 1)
350 check_cmd0(cmds, cmd0.NPU_OP_CONV, 0)
351 check_cmd0(cmds, cmd0.NPU_SET_BLOCKDEP, 0)
352 # The operations are not dependent, so expect a blockdep 3
353 check_cmd0(cmds, cmd0.NPU_SET_BLOCKDEP, 3)
354 assert len(cmds) > 10
355
356
357def test_dma_op():
358 """Tests DMA operation followed by average pool. The DMA provides the contents of the average pool's IFM."""
359 pool_op = create_avg_pool_op()
360 assert pool_op.ofm is not None
361 dest = get_address_ranges(pool_op.ofm)[0]
362 assert dest is not None
363 src = NpuAddressRange(0, 0x24000, dest.length)
364 dma_op = NpuDmaOperation(src, dest)
Louis Verhaardaeae5672020-11-02 18:04:27 +0100365 cmds = npu_generate_register_command_stream([dma_op, pool_op], NpuAccelerator.Ethos_U55_64)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100366 check_cmd0(cmds, cmd0.NPU_OP_DMA_START, 0)
367 # A DMA WAIT should have been inserted
368 check_cmd0(cmds, cmd0.NPU_OP_DMA_WAIT, 0)
369 check_cmd0(cmds, cmd0.NPU_OP_POOL, 1)
Louis Verhaard024c3552021-03-17 14:26:34 +0100370
371
372def test_check_mem_limits():
373 # Tests that no code is generated with addresses out of bounds
374 conv_op = create_fully_connected_op()
375 # bias with end address out of range
376 conv_op.biases = [NpuAddressRange(region=0, address=(1 << 32) - 16, length=1000)]
377 with pytest.raises(VelaError):
378 npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U55_64)
379 # same test should pass with Ethos_U65_512
380 npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U65_512)
381 # weights with end address out of range
382 conv_op = create_fully_connected_op()
Tim Hall53c62452021-08-06 13:51:34 +0100383 conv_op.weights = [NpuAddressRange(region=0, address=(1 << 40) - 960, length=1000)]
Louis Verhaard024c3552021-03-17 14:26:34 +0100384 with pytest.raises(VelaError):
385 npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U65_256)
386 # bias with high end address, but still within range
Tim Hall53c62452021-08-06 13:51:34 +0100387 addr = (1 << 40) - 1024
Louis Verhaard024c3552021-03-17 14:26:34 +0100388 conv_op = create_fully_connected_op()
Louis Verhaard893780c2021-03-30 09:02:30 +0200389 conv_op.biases = [NpuAddressRange(region=0, address=addr, length=1000)]
390 cmds = npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U65_512)
391 check_cmd1(cmds, cmd1.NPU_SET_SCALE_BASE, addr & ((1 << 32) - 1), (addr >> 32) & ((1 << 16) - 1))
Louis Verhaard024c3552021-03-17 14:26:34 +0100392 conv_op = create_fully_connected_op()
393 # weights with negative address
394 conv_op.weights = [NpuAddressRange(region=0, address=-16, length=1000)]
395 with pytest.raises(VelaError):
396 npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U55_32)
397 op = create_avg_pool_op()
398 # Tile 4's end address out of range
399 op.ifm.tiles = NpuTileBox(width_0=1, height_0=1, height_1=1, addresses=[0, 800, 4000, (1 << 32) - 16])
400 with pytest.raises(VelaError):
401 npu_generate_register_command_stream([op], NpuAccelerator.Ethos_U55_256)
402 op = create_avg_pool_op()
403 # IFM region out of range
404 op.ifm.region = 8
405 with pytest.raises(VelaError):
406 npu_generate_register_command_stream([op], NpuAccelerator.Ethos_U55_64)
407
408
409def test_check_sram_limit_spilling():
410 # Tests that no code is generated with addresses outside available sram spilling range
411 arch = create_default_arch(Accelerator.Ethos_U65_512)
412 assert arch.is_spilling_enabled()
413 op = create_avg_pool_op()
414 op.ifm.region = 0
415 # OFM in scratch fast memory
416 op.ofm.region = int(BasePointerIndex.ScratchFastTensor)
417 w, h = op.ofm.shape.width, op.ofm.shape.height
418 op.ofm.tiles = NpuTileBox(width_0=w, height_0=h, height_1=h, addresses=[32 * 1024, 0, 0, 0])
419 # 384K for spilling should fit
Tim Halld8339a72021-05-27 18:49:40 +0100420 arch.arena_cache_size = 384 * 1024
Louis Verhaard024c3552021-03-17 14:26:34 +0100421 mem_limits = get_mem_limits_for_regions(arch)
422 generate_command_stream([op], arch, verbose=False, mem_limits=mem_limits)
423 # 32K for spilling does not fit, due to the OFM address
Tim Halld8339a72021-05-27 18:49:40 +0100424 arch.arena_cache_size = 32 * 1024
Louis Verhaard024c3552021-03-17 14:26:34 +0100425 mem_limits = get_mem_limits_for_regions(arch)
426 with pytest.raises(VelaError):
427 generate_command_stream([op], arch, verbose=False, mem_limits=mem_limits)