blob: 90732707a84f202a4d9f4dfc0e00c6431da8bf53 [file] [log] [blame]
Rickard Bolinbc6ee582022-11-04 08:24:29 +00001# SPDX-FileCopyrightText: Copyright 2020-2021 Arm Limited and/or its affiliates <open-source-office@arm.com>
Louis Verhaard0b8268a2020-08-05 16:11:29 +02002#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Rickard Bolinbc6ee582022-11-04 08:24:29 +000016#
Louis Verhaard0b8268a2020-08-05 16:11:29 +020017# Description:
18# Unit tests for LUT support
Jacob Bohlin1a666972020-09-11 10:04:15 +020019import random
20
Louis Verhaard0b8268a2020-08-05 16:11:29 +020021import numpy as np
22
Louis Verhaard0b8268a2020-08-05 16:11:29 +020023from ethosu.vela import lut
24from ethosu.vela import mark_tensors
25from ethosu.vela import pass_packing
26from ethosu.vela.data_type import DataType
27from ethosu.vela.high_level_command_stream import DMA
28from ethosu.vela.nn_graph import Graph
Louis Verhaardaee5d752020-09-30 09:01:52 +020029from ethosu.vela.operation import Op
Tim Halld8339a72021-05-27 18:49:40 +010030from ethosu.vela.rewrite_graph import rewrite_graph_pre_order
Louis Verhaard0b8268a2020-08-05 16:11:29 +020031from ethosu.vela.rewrite_graph import verify_graph_health
32from ethosu.vela.tensor import create_const_tensor
33from ethosu.vela.tensor import TensorPurpose
34from ethosu.vela.test import testutil
35
36
Tim Halld8339a72021-05-27 18:49:40 +010037def set_256_lut(op, key, arch):
Jacob Bohlin1a666972020-09-11 10:04:15 +020038 random.seed(key)
39 values = random.choices(range(256), k=256)
Louis Verhaard0b8268a2020-08-05 16:11:29 +020040 lut_tensor = create_const_tensor(
41 op.name + "_lut", [1, 1, 1, 256], DataType.int8, values, np.uint8, TensorPurpose.LUT
42 )
Tim Halld8339a72021-05-27 18:49:40 +010043 scratch_lut_tensor = lut_tensor.clone_into_fast_storage(arch)
44 op.set_activation_lut(scratch_lut_tensor)
Louis Verhaard0b8268a2020-08-05 16:11:29 +020045
46
Tim Halld8339a72021-05-27 18:49:40 +010047def set_1K_lut(op, key, arch):
Jacob Bohlin1a666972020-09-11 10:04:15 +020048 random.seed(key)
49 values = random.choices(range(256), k=256)
Louis Verhaard0b8268a2020-08-05 16:11:29 +020050 lut_tensor = create_const_tensor(
51 op.name + "_lut", [1, 1, 1, 256], DataType.int32, values, np.uint32, TensorPurpose.LUT
52 )
Tim Halld8339a72021-05-27 18:49:40 +010053 scratch_lut_tensor = lut_tensor.clone_into_fast_storage(arch)
54 op.set_activation_lut(scratch_lut_tensor)
Louis Verhaard0b8268a2020-08-05 16:11:29 +020055
56
Tim Halld8339a72021-05-27 18:49:40 +010057def set_2K_lut(op, key, arch):
Jacob Bohlin1a666972020-09-11 10:04:15 +020058 random.seed(key)
59 values = random.choices(range(512), k=512)
Louis Verhaard0b8268a2020-08-05 16:11:29 +020060 lut_tensor = create_const_tensor(
61 op.name + "_lut", [1, 1, 1, 512], DataType.int32, values, np.uint32, TensorPurpose.LUT
62 )
Tim Halld8339a72021-05-27 18:49:40 +010063 scratch_lut_tensor = lut_tensor.clone_into_fast_storage(arch)
64 op.set_activation_lut(scratch_lut_tensor)
Louis Verhaard0b8268a2020-08-05 16:11:29 +020065
66
67def process(arch, op_list):
68 # Returns subgraph with given operations
69 nng = Graph()
70 sg = testutil.create_subgraph(op_list)
71 nng.subgraphs.append(sg)
72 assert verify_graph_health(nng)
73 nng = mark_tensors.mark_tensor_purpose(nng, arch, False)
74 assert verify_graph_health(nng)
Tim Halld8339a72021-05-27 18:49:40 +010075 rewrite_graph_pre_order(nng, sg, arch, [], [])
Louis Verhaard0b8268a2020-08-05 16:11:29 +020076 pass_packing.pack_into_passes(nng, arch, False)
77 assert verify_graph_health(nng)
78 # Create a DMA instruction for every op
79 cmd_list = []
80 for ps in sg.passes:
Tim Halld8339a72021-05-27 18:49:40 +010081 for input_tens in ps.inputs:
82 if input_tens.src_tensor:
83 cmd_list.append(DMA(ps, input_tens.src_tensor, input_tens, None))
84
Louis Verhaard0b8268a2020-08-05 16:11:29 +020085 sg.high_level_command_stream = cmd_list
86 return sg
87
88
Patrik Gustavssone5cf95b2020-09-03 16:39:52 +020089def filter_lut_cmds(cmd_list):
90 lut_cmd_list = []
91 for cmd in cmd_list:
92 if "lut" in cmd.in_tensor.name:
93 lut_cmd_list.append(cmd)
94 return lut_cmd_list
95
96
Louis Verhaard0b8268a2020-08-05 16:11:29 +020097def test_optimize_high_level_cmd_stream_2K():
98 # Tests lut.optimize_high_level_cmd_stream, blending 256 byte and 2K luts
99 arch = testutil.create_arch()
100 shape = [1, 1, 1, 1]
101 # u8 LUT op, should lead to DMA
Louis Verhaardaee5d752020-09-30 09:01:52 +0200102 op0 = testutil.create_elemwise_op(Op.Add, "op0", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100103 set_256_lut(op0, "lut0", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200104 # u8 LUT op, should lead to DMA
Louis Verhaardaee5d752020-09-30 09:01:52 +0200105 op1 = testutil.create_elemwise_op(Op.Add, "op1", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100106 set_256_lut(op1, "lut1", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200107 # u8 LUT op with different LUT, should lead to DMA
Louis Verhaardaee5d752020-09-30 09:01:52 +0200108 op2 = testutil.create_elemwise_op(Op.Add, "op2", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100109 set_256_lut(op2, "lut2", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200110 # u8 LUT op with same LUT as in op1, should not lead to DMA
Louis Verhaardaee5d752020-09-30 09:01:52 +0200111 op3 = testutil.create_elemwise_op(Op.Add, "op3", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100112 set_256_lut(op3, "lut1", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200113 # u8 LUT op with same LUT as in op2, should not lead to DMA
Louis Verhaardaee5d752020-09-30 09:01:52 +0200114 op4 = testutil.create_elemwise_op(Op.Add, "op4", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100115 set_256_lut(op4, "lut2", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200116 # 2K LUT op, should lead to DMA, and will overwrite all previous LUTs in SHRAM
Louis Verhaardaee5d752020-09-30 09:01:52 +0200117 op5_2K = testutil.create_elemwise_op(Op.Add, "op5", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100118 set_2K_lut(op5_2K, "lut5", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200119 # Another 2K LUT op, should lead to DMA, and will overwrite the previous LUT in SHRAM
Louis Verhaardaee5d752020-09-30 09:01:52 +0200120 op6_2K = testutil.create_elemwise_op(Op.Add, "op6", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100121 set_2K_lut(op6_2K, "lut6", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200122 # u8 LUT op with same LUT as in op1, should lead to DMA
Louis Verhaardaee5d752020-09-30 09:01:52 +0200123 op7 = testutil.create_elemwise_op(Op.Add, "op7", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100124 set_256_lut(op7, "lut1", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200125
126 op_list = [op0, op1, op2, op3, op4, op5_2K, op6_2K, op7]
127 sg = process(arch, op_list)
128 orig_cmd_list = sg.high_level_command_stream
129 sg.high_level_command_stream = orig_cmd_list
130 lut.optimize_high_level_cmd_stream(sg, arch)
131 cmd_list = sg.high_level_command_stream
132 # Check that only the needed DMA commands are left
133 expected_dma_ops = [op0, op1, op2, op5_2K, op6_2K, op7]
Patrik Gustavssone5cf95b2020-09-03 16:39:52 +0200134
135 cmd_list = filter_lut_cmds(cmd_list)
136 orig_cmd_list = filter_lut_cmds(orig_cmd_list)
137
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200138 for (cmd, op) in zip(cmd_list, expected_dma_ops):
Tim Halld8339a72021-05-27 18:49:40 +0100139 assert cmd.in_tensor == op.activation_lut.src_tensor
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200140 # Check that lut0, lut1 and lut2 in op0, op1, op2 are stored on different addresses
141 assert orig_cmd_list[0].out_tensor.address != orig_cmd_list[1].out_tensor.address
142 assert orig_cmd_list[0].out_tensor.address != orig_cmd_list[2].out_tensor.address
143 assert orig_cmd_list[1].out_tensor.address != orig_cmd_list[2].out_tensor.address
144 # Check that lut1 in op1 and op3 have same address
145 assert orig_cmd_list[1].out_tensor.address == orig_cmd_list[3].out_tensor.address
146 # Check that lut2 in op2 and op4 have same address
147 assert orig_cmd_list[2].out_tensor.address == orig_cmd_list[4].out_tensor.address
148 # Check that lut-s for 16 bit (op5 and op6) are stored on same address
149 assert orig_cmd_list[5].out_tensor.address == orig_cmd_list[6].out_tensor.address
150
151
152def test_optimize_high_level_cmd_stream_1K():
153 # Tests lut.optimize_high_level_cmd_stream, blending 256 and 1K luts
154 arch = testutil.create_arch()
155 shape = [1, 1, 1, 1]
156 # u8 LUT op, should lead to DMA
Louis Verhaardaee5d752020-09-30 09:01:52 +0200157 op0 = testutil.create_elemwise_op(Op.Add, "op0", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100158 set_256_lut(op0, "lut0", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200159 # u8 LUT op, should lead to DMA
Louis Verhaardaee5d752020-09-30 09:01:52 +0200160 op1 = testutil.create_elemwise_op(Op.Add, "op1", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100161 set_256_lut(op1, "lut1", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200162 # 1K LUT op with different LUT, should lead to DMA
Louis Verhaardaee5d752020-09-30 09:01:52 +0200163 op2_1K = testutil.create_elemwise_op(Op.Add, "op2", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100164 set_1K_lut(op2_1K, "lut2", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200165 # u8 LUT op with same LUT as in op1, should not lead to DMA
Louis Verhaardaee5d752020-09-30 09:01:52 +0200166 op3 = testutil.create_elemwise_op(Op.Add, "op3", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100167 set_256_lut(op3, "lut1", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200168 # 1K LUT op with same LUT as in op2, should not lead to DMA
Louis Verhaardaee5d752020-09-30 09:01:52 +0200169 op4_1K = testutil.create_elemwise_op(Op.Add, "op4", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100170 set_1K_lut(op4_1K, "lut2", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200171 # 1K LUT op, should lead to DMA, and will overwrite lut2
Louis Verhaardaee5d752020-09-30 09:01:52 +0200172 op5_2K = testutil.create_elemwise_op(Op.Add, "op5", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100173 set_1K_lut(op5_2K, "lut5", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200174 # u8 LUT op, lut0 should still be present, should not lead to DMA
Louis Verhaardaee5d752020-09-30 09:01:52 +0200175 op6 = testutil.create_elemwise_op(Op.Add, "op6", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100176 set_256_lut(op6, "lut0", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200177 # 1K LUT op with same LUT as in op2, should lead to DMA
Louis Verhaardaee5d752020-09-30 09:01:52 +0200178 op7 = testutil.create_elemwise_op(Op.Add, "op7", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100179 set_1K_lut(op7, "lut2", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200180
181 op_list = [op0, op1, op2_1K, op3, op4_1K, op5_2K, op6, op7]
182 sg = process(arch, op_list)
183 orig_cmd_list = sg.high_level_command_stream
184 sg.high_level_command_stream = orig_cmd_list
185 lut.optimize_high_level_cmd_stream(sg, arch)
186 cmd_list = sg.high_level_command_stream
Patrik Gustavssone5cf95b2020-09-03 16:39:52 +0200187
188 cmd_list = filter_lut_cmds(cmd_list)
189 orig_cmd_list = filter_lut_cmds(orig_cmd_list)
190
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200191 # Check that only the needed DMA commands are left
192 expected_dma_ops = [op0, op1, op2_1K, op5_2K, op7]
193 for (cmd, op) in zip(cmd_list, expected_dma_ops):
Tim Halld8339a72021-05-27 18:49:40 +0100194 assert cmd.in_tensor == op.activation_lut.src_tensor
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200195 # Check that lut0, lut1 and lut2 in op0, op1, op2 are stored on different addresses
196 assert orig_cmd_list[0].out_tensor.address != orig_cmd_list[1].out_tensor.address
197 assert orig_cmd_list[0].out_tensor.address != orig_cmd_list[2].out_tensor.address
198 assert orig_cmd_list[1].out_tensor.address != orig_cmd_list[2].out_tensor.address
199 # Check that lut1 in op1 and op3 have same address
200 assert orig_cmd_list[1].out_tensor.address == orig_cmd_list[3].out_tensor.address
201 # Check that lut2 in op2 and op4 and op7 have same address
202 assert orig_cmd_list[2].out_tensor.address == orig_cmd_list[4].out_tensor.address
203 assert orig_cmd_list[2].out_tensor.address == orig_cmd_list[7].out_tensor.address