blob: 4ddc8b9508e34fad1e55578f5fc61320b6f6e712 [file] [log] [blame]
Louis Verhaard0b8268a2020-08-05 16:11:29 +02001# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16# Description:
17# Unit tests for LUT support
Jacob Bohlin1a666972020-09-11 10:04:15 +020018import random
19
Louis Verhaard0b8268a2020-08-05 16:11:29 +020020import numpy as np
21
Louis Verhaard0b8268a2020-08-05 16:11:29 +020022from ethosu.vela import lut
23from ethosu.vela import mark_tensors
24from ethosu.vela import pass_packing
25from ethosu.vela.data_type import DataType
26from ethosu.vela.high_level_command_stream import DMA
27from ethosu.vela.nn_graph import Graph
Louis Verhaardaee5d752020-09-30 09:01:52 +020028from ethosu.vela.operation import Op
Tim Halld8339a72021-05-27 18:49:40 +010029from ethosu.vela.rewrite_graph import rewrite_graph_pre_order
Louis Verhaard0b8268a2020-08-05 16:11:29 +020030from ethosu.vela.rewrite_graph import verify_graph_health
31from ethosu.vela.tensor import create_const_tensor
32from ethosu.vela.tensor import TensorPurpose
33from ethosu.vela.test import testutil
34
35
Tim Halld8339a72021-05-27 18:49:40 +010036def set_256_lut(op, key, arch):
Jacob Bohlin1a666972020-09-11 10:04:15 +020037 random.seed(key)
38 values = random.choices(range(256), k=256)
Louis Verhaard0b8268a2020-08-05 16:11:29 +020039 lut_tensor = create_const_tensor(
40 op.name + "_lut", [1, 1, 1, 256], DataType.int8, values, np.uint8, TensorPurpose.LUT
41 )
Tim Halld8339a72021-05-27 18:49:40 +010042 scratch_lut_tensor = lut_tensor.clone_into_fast_storage(arch)
43 op.set_activation_lut(scratch_lut_tensor)
Louis Verhaard0b8268a2020-08-05 16:11:29 +020044
45
Tim Halld8339a72021-05-27 18:49:40 +010046def set_1K_lut(op, key, arch):
Jacob Bohlin1a666972020-09-11 10:04:15 +020047 random.seed(key)
48 values = random.choices(range(256), k=256)
Louis Verhaard0b8268a2020-08-05 16:11:29 +020049 lut_tensor = create_const_tensor(
50 op.name + "_lut", [1, 1, 1, 256], DataType.int32, values, np.uint32, TensorPurpose.LUT
51 )
Tim Halld8339a72021-05-27 18:49:40 +010052 scratch_lut_tensor = lut_tensor.clone_into_fast_storage(arch)
53 op.set_activation_lut(scratch_lut_tensor)
Louis Verhaard0b8268a2020-08-05 16:11:29 +020054
55
Tim Halld8339a72021-05-27 18:49:40 +010056def set_2K_lut(op, key, arch):
Jacob Bohlin1a666972020-09-11 10:04:15 +020057 random.seed(key)
58 values = random.choices(range(512), k=512)
Louis Verhaard0b8268a2020-08-05 16:11:29 +020059 lut_tensor = create_const_tensor(
60 op.name + "_lut", [1, 1, 1, 512], DataType.int32, values, np.uint32, TensorPurpose.LUT
61 )
Tim Halld8339a72021-05-27 18:49:40 +010062 scratch_lut_tensor = lut_tensor.clone_into_fast_storage(arch)
63 op.set_activation_lut(scratch_lut_tensor)
Louis Verhaard0b8268a2020-08-05 16:11:29 +020064
65
66def process(arch, op_list):
67 # Returns subgraph with given operations
68 nng = Graph()
69 sg = testutil.create_subgraph(op_list)
70 nng.subgraphs.append(sg)
71 assert verify_graph_health(nng)
72 nng = mark_tensors.mark_tensor_purpose(nng, arch, False)
73 assert verify_graph_health(nng)
Tim Halld8339a72021-05-27 18:49:40 +010074 rewrite_graph_pre_order(nng, sg, arch, [], [])
Louis Verhaard0b8268a2020-08-05 16:11:29 +020075 pass_packing.pack_into_passes(nng, arch, False)
76 assert verify_graph_health(nng)
77 # Create a DMA instruction for every op
78 cmd_list = []
79 for ps in sg.passes:
Tim Halld8339a72021-05-27 18:49:40 +010080 for input_tens in ps.inputs:
81 if input_tens.src_tensor:
82 cmd_list.append(DMA(ps, input_tens.src_tensor, input_tens, None))
83
Louis Verhaard0b8268a2020-08-05 16:11:29 +020084 sg.high_level_command_stream = cmd_list
85 return sg
86
87
Patrik Gustavssone5cf95b2020-09-03 16:39:52 +020088def filter_lut_cmds(cmd_list):
89 lut_cmd_list = []
90 for cmd in cmd_list:
91 if "lut" in cmd.in_tensor.name:
92 lut_cmd_list.append(cmd)
93 return lut_cmd_list
94
95
Louis Verhaard0b8268a2020-08-05 16:11:29 +020096def test_optimize_high_level_cmd_stream_2K():
97 # Tests lut.optimize_high_level_cmd_stream, blending 256 byte and 2K luts
98 arch = testutil.create_arch()
99 shape = [1, 1, 1, 1]
100 # u8 LUT op, should lead to DMA
Louis Verhaardaee5d752020-09-30 09:01:52 +0200101 op0 = testutil.create_elemwise_op(Op.Add, "op0", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100102 set_256_lut(op0, "lut0", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200103 # u8 LUT op, should lead to DMA
Louis Verhaardaee5d752020-09-30 09:01:52 +0200104 op1 = testutil.create_elemwise_op(Op.Add, "op1", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100105 set_256_lut(op1, "lut1", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200106 # u8 LUT op with different LUT, should lead to DMA
Louis Verhaardaee5d752020-09-30 09:01:52 +0200107 op2 = testutil.create_elemwise_op(Op.Add, "op2", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100108 set_256_lut(op2, "lut2", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200109 # u8 LUT op with same LUT as in op1, should not lead to DMA
Louis Verhaardaee5d752020-09-30 09:01:52 +0200110 op3 = testutil.create_elemwise_op(Op.Add, "op3", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100111 set_256_lut(op3, "lut1", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200112 # u8 LUT op with same LUT as in op2, should not lead to DMA
Louis Verhaardaee5d752020-09-30 09:01:52 +0200113 op4 = testutil.create_elemwise_op(Op.Add, "op4", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100114 set_256_lut(op4, "lut2", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200115 # 2K LUT op, should lead to DMA, and will overwrite all previous LUTs in SHRAM
Louis Verhaardaee5d752020-09-30 09:01:52 +0200116 op5_2K = testutil.create_elemwise_op(Op.Add, "op5", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100117 set_2K_lut(op5_2K, "lut5", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200118 # Another 2K LUT op, should lead to DMA, and will overwrite the previous LUT in SHRAM
Louis Verhaardaee5d752020-09-30 09:01:52 +0200119 op6_2K = testutil.create_elemwise_op(Op.Add, "op6", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100120 set_2K_lut(op6_2K, "lut6", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200121 # u8 LUT op with same LUT as in op1, should lead to DMA
Louis Verhaardaee5d752020-09-30 09:01:52 +0200122 op7 = testutil.create_elemwise_op(Op.Add, "op7", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100123 set_256_lut(op7, "lut1", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200124
125 op_list = [op0, op1, op2, op3, op4, op5_2K, op6_2K, op7]
126 sg = process(arch, op_list)
127 orig_cmd_list = sg.high_level_command_stream
128 sg.high_level_command_stream = orig_cmd_list
129 lut.optimize_high_level_cmd_stream(sg, arch)
130 cmd_list = sg.high_level_command_stream
131 # Check that only the needed DMA commands are left
132 expected_dma_ops = [op0, op1, op2, op5_2K, op6_2K, op7]
Patrik Gustavssone5cf95b2020-09-03 16:39:52 +0200133
134 cmd_list = filter_lut_cmds(cmd_list)
135 orig_cmd_list = filter_lut_cmds(orig_cmd_list)
136
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200137 for (cmd, op) in zip(cmd_list, expected_dma_ops):
Tim Halld8339a72021-05-27 18:49:40 +0100138 assert cmd.in_tensor == op.activation_lut.src_tensor
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200139 # Check that lut0, lut1 and lut2 in op0, op1, op2 are stored on different addresses
140 assert orig_cmd_list[0].out_tensor.address != orig_cmd_list[1].out_tensor.address
141 assert orig_cmd_list[0].out_tensor.address != orig_cmd_list[2].out_tensor.address
142 assert orig_cmd_list[1].out_tensor.address != orig_cmd_list[2].out_tensor.address
143 # Check that lut1 in op1 and op3 have same address
144 assert orig_cmd_list[1].out_tensor.address == orig_cmd_list[3].out_tensor.address
145 # Check that lut2 in op2 and op4 have same address
146 assert orig_cmd_list[2].out_tensor.address == orig_cmd_list[4].out_tensor.address
147 # Check that lut-s for 16 bit (op5 and op6) are stored on same address
148 assert orig_cmd_list[5].out_tensor.address == orig_cmd_list[6].out_tensor.address
149
150
151def test_optimize_high_level_cmd_stream_1K():
152 # Tests lut.optimize_high_level_cmd_stream, blending 256 and 1K luts
153 arch = testutil.create_arch()
154 shape = [1, 1, 1, 1]
155 # u8 LUT op, should lead to DMA
Louis Verhaardaee5d752020-09-30 09:01:52 +0200156 op0 = testutil.create_elemwise_op(Op.Add, "op0", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100157 set_256_lut(op0, "lut0", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200158 # u8 LUT op, should lead to DMA
Louis Verhaardaee5d752020-09-30 09:01:52 +0200159 op1 = testutil.create_elemwise_op(Op.Add, "op1", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100160 set_256_lut(op1, "lut1", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200161 # 1K LUT op with different LUT, should lead to DMA
Louis Verhaardaee5d752020-09-30 09:01:52 +0200162 op2_1K = testutil.create_elemwise_op(Op.Add, "op2", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100163 set_1K_lut(op2_1K, "lut2", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200164 # u8 LUT op with same LUT as in op1, should not lead to DMA
Louis Verhaardaee5d752020-09-30 09:01:52 +0200165 op3 = testutil.create_elemwise_op(Op.Add, "op3", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100166 set_256_lut(op3, "lut1", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200167 # 1K LUT op with same LUT as in op2, should not lead to DMA
Louis Verhaardaee5d752020-09-30 09:01:52 +0200168 op4_1K = testutil.create_elemwise_op(Op.Add, "op4", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100169 set_1K_lut(op4_1K, "lut2", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200170 # 1K LUT op, should lead to DMA, and will overwrite lut2
Louis Verhaardaee5d752020-09-30 09:01:52 +0200171 op5_2K = testutil.create_elemwise_op(Op.Add, "op5", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100172 set_1K_lut(op5_2K, "lut5", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200173 # u8 LUT op, lut0 should still be present, should not lead to DMA
Louis Verhaardaee5d752020-09-30 09:01:52 +0200174 op6 = testutil.create_elemwise_op(Op.Add, "op6", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100175 set_256_lut(op6, "lut0", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200176 # 1K LUT op with same LUT as in op2, should lead to DMA
Louis Verhaardaee5d752020-09-30 09:01:52 +0200177 op7 = testutil.create_elemwise_op(Op.Add, "op7", shape, shape, shape)
Tim Halld8339a72021-05-27 18:49:40 +0100178 set_1K_lut(op7, "lut2", arch)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200179
180 op_list = [op0, op1, op2_1K, op3, op4_1K, op5_2K, op6, op7]
181 sg = process(arch, op_list)
182 orig_cmd_list = sg.high_level_command_stream
183 sg.high_level_command_stream = orig_cmd_list
184 lut.optimize_high_level_cmd_stream(sg, arch)
185 cmd_list = sg.high_level_command_stream
Patrik Gustavssone5cf95b2020-09-03 16:39:52 +0200186
187 cmd_list = filter_lut_cmds(cmd_list)
188 orig_cmd_list = filter_lut_cmds(orig_cmd_list)
189
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200190 # Check that only the needed DMA commands are left
191 expected_dma_ops = [op0, op1, op2_1K, op5_2K, op7]
192 for (cmd, op) in zip(cmd_list, expected_dma_ops):
Tim Halld8339a72021-05-27 18:49:40 +0100193 assert cmd.in_tensor == op.activation_lut.src_tensor
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200194 # Check that lut0, lut1 and lut2 in op0, op1, op2 are stored on different addresses
195 assert orig_cmd_list[0].out_tensor.address != orig_cmd_list[1].out_tensor.address
196 assert orig_cmd_list[0].out_tensor.address != orig_cmd_list[2].out_tensor.address
197 assert orig_cmd_list[1].out_tensor.address != orig_cmd_list[2].out_tensor.address
198 # Check that lut1 in op1 and op3 have same address
199 assert orig_cmd_list[1].out_tensor.address == orig_cmd_list[3].out_tensor.address
200 # Check that lut2 in op2 and op4 and op7 have same address
201 assert orig_cmd_list[2].out_tensor.address == orig_cmd_list[4].out_tensor.address
202 assert orig_cmd_list[2].out_tensor.address == orig_cmd_list[7].out_tensor.address