blob: b1d31413fa3869d100107ad6bbcbb06da6e4590b [file] [log] [blame]
Alexander Hansson2d54e5c2023-06-21 09:19:04 +00001# SPDX-FileCopyrightText: Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
Louis Verhaarde8a5a782020-11-02 18:04:27 +01002#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17# Description:
Louis Verhaardaeae5672020-11-02 18:04:27 +010018# Contains unit tests for npu_generate_register_command_stream API for an external consumer
Louis Verhaard024c3552021-03-17 14:26:34 +010019import pytest
20
Louis Verhaard933f55e2020-11-25 14:10:30 +010021from ethosu.vela.api import npu_find_block_configs
Louis Verhaardaeae5672020-11-02 18:04:27 +010022from ethosu.vela.api import npu_generate_register_command_stream
23from ethosu.vela.api import NpuAccelerator
Louis Verhaarde8a5a782020-11-02 18:04:27 +010024from ethosu.vela.api import NpuActivation
25from ethosu.vela.api import NpuActivationOp
26from ethosu.vela.api import NpuAddressRange
27from ethosu.vela.api import NpuBlockTraversal
28from ethosu.vela.api import NpuConv2DOperation
29from ethosu.vela.api import NpuConvDepthWiseOperation
30from ethosu.vela.api import NpuDataType
31from ethosu.vela.api import NpuDmaOperation
32from ethosu.vela.api import NpuElementWiseOp
33from ethosu.vela.api import NpuElementWiseOperation
34from ethosu.vela.api import NpuFeatureMap
35from ethosu.vela.api import NpuKernel
36from ethosu.vela.api import NpuLayout
37from ethosu.vela.api import NpuPadding
38from ethosu.vela.api import NpuPoolingOp
39from ethosu.vela.api import NpuPoolingOperation
40from ethosu.vela.api import NpuQuantization
41from ethosu.vela.api import NpuShape3D
42from ethosu.vela.api import NpuTileBox
Louis Verhaard024c3552021-03-17 14:26:34 +010043from ethosu.vela.architecture_features import Accelerator
44from ethosu.vela.architecture_features import create_default_arch
45from ethosu.vela.errors import VelaError
Louis Verhaarde8a5a782020-11-02 18:04:27 +010046from ethosu.vela.ethos_u55_regs.ethos_u55_regs import cmd0
47from ethosu.vela.ethos_u55_regs.ethos_u55_regs import cmd1
Louis Verhaard024c3552021-03-17 14:26:34 +010048from ethosu.vela.high_level_command_to_npu_op import BasePointerIndex
49from ethosu.vela.high_level_command_to_npu_op import get_mem_limits_for_regions
Louis Verhaarde8a5a782020-11-02 18:04:27 +010050from ethosu.vela.register_command_stream_generator import CmdMode
Louis Verhaard024c3552021-03-17 14:26:34 +010051from ethosu.vela.register_command_stream_generator import generate_command_stream
Louis Verhaard1e170182020-11-26 11:42:04 +010052from ethosu.vela.register_command_stream_util import get_address_ranges
Louis Verhaarde8a5a782020-11-02 18:04:27 +010053
54
Alexander Hansson2d54e5c2023-06-21 09:19:04 +000055def check_cmd0(cmd_stream, cmd, param, idx=0):
56 """
57 Checks that command + parameter exists in the command stream after position idx.
58 Returns the position in the command stream (if found) otherwise asserts.
59 """
Louis Verhaarde8a5a782020-11-02 18:04:27 +010060 param = int(param) & 0xFFFF
61 command = cmd.value | (param << 16)
Alexander Hansson2d54e5c2023-06-21 09:19:04 +000062 for i in range(idx, len(cmd_stream)):
63 if cmd_stream[i] == command:
64 return i
65 assert False, f"{cmd} {param} not found in the command stream (after position {idx})"
Louis Verhaarde8a5a782020-11-02 18:04:27 +010066
67
Alexander Hansson2d54e5c2023-06-21 09:19:04 +000068def check_cmd1(cmd_stream, cmd, offset, param=0x0, idx=0):
69 """
70 Checks that command + parameter exists in the command stream after position idx.
71 Returns the position in the command stream (if found) otherwise asserts.
72 """
Louis Verhaard893780c2021-03-30 09:02:30 +020073 offset = int(offset) & 0xFFFFFFFF
Louis Verhaarde8a5a782020-11-02 18:04:27 +010074 command = cmd.value | CmdMode.Payload32.value | (param << 16)
Alexander Hansson2d54e5c2023-06-21 09:19:04 +000075 for i in range(idx, len(cmd_stream) - 1):
Louis Verhaarde8a5a782020-11-02 18:04:27 +010076 if cmd_stream[i] == command and cmd_stream[i + 1] == offset:
Alexander Hansson2d54e5c2023-06-21 09:19:04 +000077 return i
78 assert False, f"{cmd} {offset} {param} not found in the command stream (after position {idx})"
Louis Verhaarde8a5a782020-11-02 18:04:27 +010079
80
81def find_cmd0(cmd_stream, cmd) -> int:
82 """Returns parameter of the first command in the stream that matches the given command"""
83 for command in cmd_stream:
84 if (command & 0xFFFF) == cmd.value:
85 return (command >> 16) & 0xFFFF
86 assert False, f"Not in command stream: {cmd}"
87
88
89def create_feature_map(
90 shape: NpuShape3D,
91 region: int,
92 address: int,
93 dtype: NpuDataType = NpuDataType.UINT8,
94 layout: NpuLayout = NpuLayout.NHWC,
95 quant=NpuQuantization(scale_f32=1, zero_point=0),
96) -> NpuFeatureMap:
97 """Creates feature map using 1 tile"""
98 fm = NpuFeatureMap()
99 fm.data_type = dtype
100 fm.shape = shape
101 fm.tiles = NpuTileBox(
102 width_0=shape.width, height_0=shape.height, height_1=shape.height, addresses=[address, 0, 0, 0]
103 )
104 fm.region = region
105 fm.layout = layout
106 fm.quantization = quant
107 return fm
108
109
110def test_conv2d():
111 """Tests command stream generation for a conv2d operation"""
112 op = NpuConv2DOperation()
113 op.ifm = create_feature_map(
114 NpuShape3D(height=30, width=62, depth=46), 1, 512, quant=NpuQuantization(scale_f32=0.007843138, zero_point=128)
115 )
116 op.ofm = create_feature_map(
117 NpuShape3D(height=30, width=31, depth=46),
118 1,
119 0x14E40,
120 quant=NpuQuantization(scale_f32=0.20392157, zero_point=128),
121 )
122 op.kernel = NpuKernel(3, 2, 2, 1)
123 op.weights = [NpuAddressRange(region=0, address=0, length=7696)]
124 op.biases = [NpuAddressRange(region=0, address=32000, length=464)]
125 op.padding = NpuPadding(top=0, left=0, right=1, bottom=1)
126 op.block_traversal = NpuBlockTraversal.PART_KERNEL_FIRST
Louis Verhaard933f55e2020-11-25 14:10:30 +0100127 op.block_config = NpuShape3D(height=16, width=4, depth=16)
Louis Verhaardaeae5672020-11-02 18:04:27 +0100128 cmds = npu_generate_register_command_stream([op], NpuAccelerator.Ethos_U55_128)
Alexander Hansson2d54e5c2023-06-21 09:19:04 +0000129 set_cmds = list()
130 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_IFM_REGION, 1))
131 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_IFM_BASE0, 512))
132 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_IFM_BASE1, 0))
133 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_IFM_BASE2, 0))
134 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_IFM_BASE3, 0))
135 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_IFM_HEIGHT0_M1, 29))
136 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_IFM_HEIGHT1_M1, 29))
137 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_IFM_WIDTH0_M1, 61))
138 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_IFM_DEPTH_M1, 45))
139 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_IFM_STRIDE_C, 1))
140 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_IFM_STRIDE_Y, 2852))
141 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_IFM_STRIDE_X, 46))
142 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_IFM_ZERO_POINT, 128))
143 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_IFM_PRECISION, 0))
144 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_IFM_UPSCALE, 0))
145 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_IFM_PAD_TOP, 0))
146 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_IFM_PAD_LEFT, 0))
147 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_IFM_PAD_BOTTOM, 1))
148 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_IFM_PAD_RIGHT, 1))
149 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_OFM_REGION, 1))
150 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_OFM_BASE0, 85568))
151 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_OFM_BASE1, 0))
152 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_OFM_BASE2, 0))
153 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_OFM_BASE3, 0))
154 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_OFM_HEIGHT0_M1, 29))
155 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_OFM_HEIGHT1_M1, 29))
156 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_OFM_WIDTH0_M1, 30))
157 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_OFM_HEIGHT_M1, 29))
158 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_OFM_WIDTH_M1, 30))
159 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_OFM_DEPTH_M1, 45))
160 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_OFM_STRIDE_C, 1))
161 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_OFM_STRIDE_Y, 1426))
162 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_OFM_STRIDE_X, 46))
163 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_OFM_ZERO_POINT, 128))
164 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_OFM_PRECISION, 0))
165 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_KERNEL_HEIGHT_M1, 1))
166 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_KERNEL_WIDTH_M1, 2))
167 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_KERNEL_STRIDE, 5))
168 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_WEIGHT_REGION, 0))
169 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_WEIGHT_BASE, 0))
170 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_WEIGHT_LENGTH, 7696))
171 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_SCALE_REGION, 0))
172 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_SCALE_BASE, 32000))
173 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_SCALE_LENGTH, 464))
174 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_ACTIVATION, 0))
175 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_ACTIVATION_MIN, 0))
176 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_ACTIVATION_MAX, 255))
177 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_HEIGHT_M1, 15))
178 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_WIDTH_M1, 3))
179 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_DEPTH_M1, 15))
180 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_ACC_FORMAT, 0))
181 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_BLOCKDEP, 0))
182 conv_idx = check_cmd0(cmds, cmd0.NPU_OP_CONV, 0)
183 assert all([conv_idx > x for x in set_cmds]), "NPU_OP_CONV occured before the last SET operation."
Tim Halld8339a72021-05-27 18:49:40 +0100184 ib_end = find_cmd0(cmds, cmd0.NPU_SET_IFM_IB_END)
185 ab_start = find_cmd0(cmds, cmd0.NPU_SET_AB_START)
186 assert ib_end > 0
187 assert ib_end <= ab_start
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100188
189
190def create_fully_connected_op() -> NpuConv2DOperation:
191 op = NpuConv2DOperation()
192 op.ifm = create_feature_map(
193 NpuShape3D(height=1, width=1, depth=114),
194 1,
195 0,
196 quant=NpuQuantization(scale_f32=0.007843138, zero_point=128),
197 layout=NpuLayout.NHCWB16,
198 )
199 op.ofm = create_feature_map(
200 NpuShape3D(height=1, width=1, depth=96),
201 1,
202 0x6A0,
203 quant=NpuQuantization(scale_f32=0.20392157, zero_point=128),
204 layout=NpuLayout.NHCWB16,
205 )
206 op.kernel = NpuKernel(1, 1)
207 op.weights = [NpuAddressRange(region=0, address=0x16880, length=13120)]
208 op.biases = [NpuAddressRange(region=0, address=0x19BC0, length=960)]
209 op.padding = NpuPadding(top=0, left=0, right=0, bottom=0)
210 op.block_traversal = NpuBlockTraversal.DEPTH_FIRST
Louis Verhaard933f55e2020-11-25 14:10:30 +0100211 op.block_config = NpuShape3D(height=2, width=4, depth=96)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100212 return op
213
214
215def test_fully_connected():
216 """Tests command stream generation for a fully connected operation"""
217 op = create_fully_connected_op()
Louis Verhaardaeae5672020-11-02 18:04:27 +0100218 cmds = npu_generate_register_command_stream([op], NpuAccelerator.Ethos_U55_128)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100219 check_cmd0(cmds, cmd0.NPU_OP_CONV, 0)
220 assert len(cmds) > 20
221
222
223def test_depthwise():
224 """Test depthwise operation, preceeded by DMA operation"""
225 weights_src = NpuAddressRange(region=0, address=0x40, length=96)
226 weights_dest = NpuAddressRange(region=1, address=0x10000, length=96)
227 dma_op = NpuDmaOperation(weights_src, weights_dest)
228 op = NpuConvDepthWiseOperation()
229 ifm_quant = NpuQuantization(scale_f32=0.007843138, zero_point=128)
230 op.ifm = create_feature_map(NpuShape3D(height=64, width=64, depth=8), 1, 0x0, quant=ifm_quant)
231 ofm_quant = NpuQuantization(scale_f32=0.062745101749897, zero_point=128)
232 op.ofm = create_feature_map(NpuShape3D(height=64, width=64, depth=8), 1, 0x8000, quant=ofm_quant)
233 op.kernel = NpuKernel(3, 3)
234 op.padding = NpuPadding(top=1, left=1, right=1, bottom=1)
235 op.weights = [weights_dest]
236 op.biases = [NpuAddressRange(region=0, address=0, length=80)]
Louis Verhaard933f55e2020-11-25 14:10:30 +0100237 op.block_config = NpuShape3D(height=8, width=12, depth=8)
Louis Verhaardaeae5672020-11-02 18:04:27 +0100238 cmds = npu_generate_register_command_stream([dma_op, op], NpuAccelerator.Ethos_U55_128)
Alexander Hansson2d54e5c2023-06-21 09:19:04 +0000239 set_cmds = list()
240 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_DMA0_SRC_REGION, 0))
241 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_DMA0_SRC, 0x40))
242 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_DMA0_DST_REGION, 1))
243 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_DMA0_DST, 0x10000))
244 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_DMA0_LEN, 96))
245 dma_start_idx = check_cmd0(cmds, cmd0.NPU_OP_DMA_START, 0)
246 assert all([dma_start_idx > x for x in set_cmds]), "DMA_START occured before the last SET_DMA operation"
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100247 # A DMA WAIT should have been inserted
Alexander Hansson2d54e5c2023-06-21 09:19:04 +0000248 dma_wait_idx = check_cmd0(cmds, cmd0.NPU_OP_DMA_WAIT, 0, dma_start_idx)
249 check_cmd0(cmds, cmd0.NPU_OP_DEPTHWISE, 0, dma_wait_idx)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100250
251
252def test_mul_with_broadcast_and_relu():
253 """Test multiplication with broadcasted IFM2"""
254 op = NpuElementWiseOperation(NpuElementWiseOp.MUL)
255 op.ifm = create_feature_map(NpuShape3D(height=31, width=22, depth=31), 1, 0x20)
256 op.ifm2 = create_feature_map(NpuShape3D(height=1, width=22, depth=1), 1, 0)
257 op.ofm = create_feature_map(NpuShape3D(height=31, width=22, depth=31), 1, 0x52C0)
258 op.activation = NpuActivation(NpuActivationOp.NONE_OR_RELU)
259 op.activation.min = 0 # RELU
Louis Verhaard933f55e2020-11-25 14:10:30 +0100260 accelerator = NpuAccelerator.Ethos_U55_32
261 # Select a block config using npu_find_block_configs
262 op.block_config = npu_find_block_configs(op, accelerator)[0]
263 cmds = npu_generate_register_command_stream([op], accelerator)
Alexander Hansson2d54e5c2023-06-21 09:19:04 +0000264 set_cmds = list()
265 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_OFM_SCALE, 1073741824, 30))
266 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_IFM_REGION, 1))
267 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_IFM_BASE0, 32))
268 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_IFM_BASE1, 0))
269 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_IFM_BASE2, 0))
270 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_IFM_BASE3, 0))
271 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_IFM_HEIGHT0_M1, 30))
272 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_IFM_HEIGHT1_M1, 30))
273 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_IFM_WIDTH0_M1, 21))
274 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_IFM_DEPTH_M1, 30))
275 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_IFM_STRIDE_C, 1))
276 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_IFM_STRIDE_Y, 682))
277 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_IFM_STRIDE_X, 31))
278 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_IFM_ZERO_POINT, 0))
279 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_IFM_PRECISION, 0))
280 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_IFM_UPSCALE, 0))
281 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_OFM_REGION, 1))
282 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_OFM_BASE0, 21184))
283 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_OFM_BASE1, 0))
284 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_OFM_BASE2, 0))
285 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_OFM_BASE3, 0))
286 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_OFM_HEIGHT0_M1, 30))
287 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_OFM_HEIGHT1_M1, 30))
288 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_OFM_WIDTH0_M1, 21))
289 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_OFM_HEIGHT_M1, 30))
290 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_OFM_WIDTH_M1, 21))
291 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_OFM_DEPTH_M1, 30))
292 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_OFM_STRIDE_C, 1))
293 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_OFM_STRIDE_Y, 682))
294 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_OFM_STRIDE_X, 31))
295 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_OFM_ZERO_POINT, 0))
296 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_OFM_PRECISION, 256))
297 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_ACTIVATION, 0))
298 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_ACTIVATION_MIN, 0))
299 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_ACTIVATION_MAX, 255))
300 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_IFM2_REGION, 1))
301 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_IFM2_BASE0, 0))
302 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_IFM2_BASE1, 0))
303 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_IFM2_BASE2, 0))
304 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_IFM2_BASE3, 0))
305 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_IFM2_HEIGHT0_M1, 0))
306 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_IFM2_HEIGHT1_M1, 0))
307 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_IFM2_WIDTH0_M1, 21))
308 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_IFM2_STRIDE_C, 1))
309 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_IFM2_STRIDE_Y, 22))
310 set_cmds.append(check_cmd1(cmds, cmd1.NPU_SET_IFM2_STRIDE_X, 1))
311 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_IFM2_ZERO_POINT, 0))
312 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_IFM2_PRECISION, 0))
313 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_IFM2_BROADCAST, 5))
314 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_IFM_IB_END, 16))
315 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_ACC_FORMAT, 0))
316 set_cmds.append(check_cmd0(cmds, cmd0.NPU_SET_BLOCKDEP, 0))
317 elementwise_idx = check_cmd0(cmds, cmd0.NPU_OP_ELEMENTWISE, 0)
318 assert all([elementwise_idx > x for x in set_cmds]), "NPU_OP_ELEMENTWISE occured before the last SET cmd"
Tim Halld8339a72021-05-27 18:49:40 +0100319 ab_start = find_cmd0(cmds, cmd0.NPU_SET_AB_START)
320 assert ab_start > 0
321 ifm2_ib_start = find_cmd0(cmds, cmd0.NPU_SET_IFM2_IB_START)
322 assert 0 < ifm2_ib_start < ab_start
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100323 # Check that block width/height were generated that fit
324 blk_height = find_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_HEIGHT_M1)
325 blk_width = find_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_WIDTH_M1)
326 blk_depth = find_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_DEPTH_M1)
327 assert blk_height >= 0
328 assert blk_width >= 0
329 assert blk_depth >= 0
330 assert (blk_height + 1) * (blk_width + 1) + (blk_depth + 1) <= 3072
331
332
333def create_avg_pool_op() -> NpuPoolingOperation:
334 op = NpuPoolingOperation(NpuPoolingOp.AVERAGE)
335 op.ifm = create_feature_map(
336 NpuShape3D(height=29, width=30, depth=27), 2, 0, quant=NpuQuantization(scale_f32=0.007843138, zero_point=128)
337 )
338 op.ofm = create_feature_map(
339 NpuShape3D(height=10, width=10, depth=27),
340 2,
341 0x5BD0,
342 quant=NpuQuantization(scale_f32=0.20392157, zero_point=128),
343 )
344 op.kernel = NpuKernel(8, 2, 3, 3)
345 op.padding = NpuPadding(top=0, left=2, right=3, bottom=0)
Louis Verhaard933f55e2020-11-25 14:10:30 +0100346 # Select a block config
347 op.block_config = NpuShape3D(height=4, width=4, depth=16)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100348 return op
349
350
351def test_avg_pool():
352 """Tests average pool operation"""
353 op = create_avg_pool_op()
Louis Verhaardaeae5672020-11-02 18:04:27 +0100354 cmds = npu_generate_register_command_stream([op], NpuAccelerator.Ethos_U55_128)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100355 check_cmd0(cmds, cmd0.NPU_OP_POOL, 1)
356 assert len(cmds) > 10
357
358
359def test_two_operations():
360 """Tests code generation with 2 operations"""
361 op1 = create_fully_connected_op()
362 op2 = create_avg_pool_op()
Louis Verhaardaeae5672020-11-02 18:04:27 +0100363 cmds = npu_generate_register_command_stream([op1, op2], NpuAccelerator.Ethos_U55_64)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100364 check_cmd0(cmds, cmd0.NPU_OP_POOL, 1)
365 check_cmd0(cmds, cmd0.NPU_OP_CONV, 0)
366 check_cmd0(cmds, cmd0.NPU_SET_BLOCKDEP, 0)
367 # The operations are not dependent, so expect a blockdep 3
368 check_cmd0(cmds, cmd0.NPU_SET_BLOCKDEP, 3)
369 assert len(cmds) > 10
370
371
372def test_dma_op():
373 """Tests DMA operation followed by average pool. The DMA provides the contents of the average pool's IFM."""
374 pool_op = create_avg_pool_op()
Alexander Hansson2d54e5c2023-06-21 09:19:04 +0000375 assert pool_op.ifm is not None
376 dest = get_address_ranges(pool_op.ifm)[0]
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100377 assert dest is not None
378 src = NpuAddressRange(0, 0x24000, dest.length)
379 dma_op = NpuDmaOperation(src, dest)
Louis Verhaardaeae5672020-11-02 18:04:27 +0100380 cmds = npu_generate_register_command_stream([dma_op, pool_op], NpuAccelerator.Ethos_U55_64)
Alexander Hansson2d54e5c2023-06-21 09:19:04 +0000381 dma_start_idx = check_cmd0(cmds, cmd0.NPU_OP_DMA_START, 0)
382 # A DMA WAIT should have been inserted after the dma start
383 dma_wait_idx = check_cmd0(cmds, cmd0.NPU_OP_DMA_WAIT, 0, dma_start_idx)
384 check_cmd0(cmds, cmd0.NPU_OP_POOL, 1, dma_wait_idx)
Louis Verhaard024c3552021-03-17 14:26:34 +0100385
386
387def test_check_mem_limits():
388 # Tests that no code is generated with addresses out of bounds
389 conv_op = create_fully_connected_op()
390 # bias with end address out of range
391 conv_op.biases = [NpuAddressRange(region=0, address=(1 << 32) - 16, length=1000)]
392 with pytest.raises(VelaError):
393 npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U55_64)
394 # same test should pass with Ethos_U65_512
395 npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U65_512)
396 # weights with end address out of range
397 conv_op = create_fully_connected_op()
Tim Hall53c62452021-08-06 13:51:34 +0100398 conv_op.weights = [NpuAddressRange(region=0, address=(1 << 40) - 960, length=1000)]
Louis Verhaard024c3552021-03-17 14:26:34 +0100399 with pytest.raises(VelaError):
400 npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U65_256)
401 # bias with high end address, but still within range
Tim Hall53c62452021-08-06 13:51:34 +0100402 addr = (1 << 40) - 1024
Louis Verhaard024c3552021-03-17 14:26:34 +0100403 conv_op = create_fully_connected_op()
Louis Verhaard893780c2021-03-30 09:02:30 +0200404 conv_op.biases = [NpuAddressRange(region=0, address=addr, length=1000)]
405 cmds = npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U65_512)
406 check_cmd1(cmds, cmd1.NPU_SET_SCALE_BASE, addr & ((1 << 32) - 1), (addr >> 32) & ((1 << 16) - 1))
Louis Verhaard024c3552021-03-17 14:26:34 +0100407 conv_op = create_fully_connected_op()
408 # weights with negative address
409 conv_op.weights = [NpuAddressRange(region=0, address=-16, length=1000)]
410 with pytest.raises(VelaError):
411 npu_generate_register_command_stream([conv_op], NpuAccelerator.Ethos_U55_32)
412 op = create_avg_pool_op()
413 # Tile 4's end address out of range
414 op.ifm.tiles = NpuTileBox(width_0=1, height_0=1, height_1=1, addresses=[0, 800, 4000, (1 << 32) - 16])
415 with pytest.raises(VelaError):
416 npu_generate_register_command_stream([op], NpuAccelerator.Ethos_U55_256)
417 op = create_avg_pool_op()
418 # IFM region out of range
419 op.ifm.region = 8
420 with pytest.raises(VelaError):
421 npu_generate_register_command_stream([op], NpuAccelerator.Ethos_U55_64)
422
423
424def test_check_sram_limit_spilling():
425 # Tests that no code is generated with addresses outside available sram spilling range
426 arch = create_default_arch(Accelerator.Ethos_U65_512)
427 assert arch.is_spilling_enabled()
428 op = create_avg_pool_op()
429 op.ifm.region = 0
430 # OFM in scratch fast memory
431 op.ofm.region = int(BasePointerIndex.ScratchFastTensor)
432 w, h = op.ofm.shape.width, op.ofm.shape.height
433 op.ofm.tiles = NpuTileBox(width_0=w, height_0=h, height_1=h, addresses=[32 * 1024, 0, 0, 0])
434 # 384K for spilling should fit
Tim Halld8339a72021-05-27 18:49:40 +0100435 arch.arena_cache_size = 384 * 1024
Louis Verhaard024c3552021-03-17 14:26:34 +0100436 mem_limits = get_mem_limits_for_regions(arch)
437 generate_command_stream([op], arch, verbose=False, mem_limits=mem_limits)
438 # 32K for spilling does not fit, due to the OFM address
Tim Halld8339a72021-05-27 18:49:40 +0100439 arch.arena_cache_size = 32 * 1024
Louis Verhaard024c3552021-03-17 14:26:34 +0100440 mem_limits = get_mem_limits_for_regions(arch)
441 with pytest.raises(VelaError):
442 generate_command_stream([op], arch, verbose=False, mem_limits=mem_limits)