Blame - ethosu/vela/test/test_lut.py - ml/ethos-u/ethos-u-vela

blob: 3b7f57be35ddcf92950a24992eeede7ac4d670a6 [file] [log] [blame]

Louis Verhaard	0b8268a	2020-08-05 16:11:29 +0200	[diff] [blame]	1	# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
				2	#
				3	# SPDX-License-Identifier: Apache-2.0
				4	#
				5	# Licensed under the Apache License, Version 2.0 (the License); you may
				6	# not use this file except in compliance with the License.
				7	# You may obtain a copy of the License at
				8	#
				9	# www.apache.org/licenses/LICENSE-2.0
				10	#
				11	# Unless required by applicable law or agreed to in writing, software
				12	# distributed under the License is distributed on an AS IS BASIS, WITHOUT
				13	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	# See the License for the specific language governing permissions and
				15	# limitations under the License.
				16	# Description:
				17	# Unit tests for LUT support
				18	import numpy as np
				19
				20	from ethosu.vela import insert_dma
				21	from ethosu.vela import lut
				22	from ethosu.vela import mark_tensors
				23	from ethosu.vela import pass_packing
				24	from ethosu.vela.data_type import DataType
				25	from ethosu.vela.high_level_command_stream import DMA
				26	from ethosu.vela.nn_graph import Graph
				27	from ethosu.vela.rewrite_graph import verify_graph_health
				28	from ethosu.vela.tensor import create_const_tensor
				29	from ethosu.vela.tensor import TensorPurpose
				30	from ethosu.vela.test import testutil
				31
				32
				33	def set_256_lut(op, key):
				34	values = list(range(256))
				35	lut_tensor = create_const_tensor(
				36	op.name + "_lut", [1, 1, 1, 256], DataType.int8, values, np.uint8, TensorPurpose.LUT
				37	)
				38	lut_tensor.equivalence_id = lut.create_equivalence_id(key)
				39	op.set_activation_lut(lut_tensor)
				40
				41
				42	def set_1K_lut(op, key):
				43	values = list(range(256))
				44	lut_tensor = create_const_tensor(
				45	op.name + "_lut", [1, 1, 1, 256], DataType.int32, values, np.uint32, TensorPurpose.LUT
				46	)
				47	lut_tensor.equivalence_id = lut.create_equivalence_id(key)
				48	op.set_activation_lut(lut_tensor)
				49
				50
				51	def set_2K_lut(op, key):
				52	values = list(range(512))
				53	lut_tensor = create_const_tensor(
				54	op.name + "_lut", [1, 1, 1, 512], DataType.int32, values, np.uint32, TensorPurpose.LUT
				55	)
				56	lut_tensor.equivalence_id = lut.create_equivalence_id(key)
				57	op.set_activation_lut(lut_tensor)
				58
				59
				60	def process(arch, op_list):
				61	# Returns subgraph with given operations
				62	nng = Graph()
				63	sg = testutil.create_subgraph(op_list)
				64	nng.subgraphs.append(sg)
				65	assert verify_graph_health(nng)
				66	nng = mark_tensors.mark_tensor_purpose(nng, arch, False)
				67	assert verify_graph_health(nng)
				68	nng = insert_dma.insert_dma_commands(nng, arch, False)
				69	assert verify_graph_health(nng)
				70	pass_packing.pack_into_passes(nng, arch, False)
				71	assert verify_graph_health(nng)
				72	# Create a DMA instruction for every op
				73	cmd_list = []
				74	for ps in sg.passes:
				75	for intermediate in ps.intermediates:
				76	if intermediate.needs_dma():
				77	cmd_list.append(DMA(ps, intermediate.get_dma_src_tensor(), intermediate, None))
				78	sg.high_level_command_stream = cmd_list
				79	return sg
				80
				81
				82	def test_optimize_high_level_cmd_stream_2K():
				83	# Tests lut.optimize_high_level_cmd_stream, blending 256 byte and 2K luts
				84	arch = testutil.create_arch()
				85	shape = [1, 1, 1, 1]
				86	# u8 LUT op, should lead to DMA
				87	op0 = testutil.create_elemwise_op("AddAct", "op0", shape, shape, shape)
				88	set_256_lut(op0, "lut0")
				89	# u8 LUT op, should lead to DMA
				90	op1 = testutil.create_elemwise_op("AddAct", "op1", shape, shape, shape)
				91	set_256_lut(op1, "lut1")
				92	# u8 LUT op with different LUT, should lead to DMA
				93	op2 = testutil.create_elemwise_op("AddAct", "op2", shape, shape, shape)
				94	set_256_lut(op2, "lut2")
				95	# u8 LUT op with same LUT as in op1, should not lead to DMA
				96	op3 = testutil.create_elemwise_op("AddAct", "op3", shape, shape, shape)
				97	set_256_lut(op3, "lut1")
				98	# u8 LUT op with same LUT as in op2, should not lead to DMA
				99	op4 = testutil.create_elemwise_op("AddAct", "op4", shape, shape, shape)
				100	set_256_lut(op4, "lut2")
				101	# 2K LUT op, should lead to DMA, and will overwrite all previous LUTs in SHRAM
				102	op5_2K = testutil.create_elemwise_op("AddAct", "op5", shape, shape, shape)
				103	set_2K_lut(op5_2K, "lut5")
				104	# Another 2K LUT op, should lead to DMA, and will overwrite the previous LUT in SHRAM
				105	op6_2K = testutil.create_elemwise_op("AddAct", "op6", shape, shape, shape)
				106	set_2K_lut(op6_2K, "lut6")
				107	# u8 LUT op with same LUT as in op1, should lead to DMA
				108	op7 = testutil.create_elemwise_op("AddAct", "op7", shape, shape, shape)
				109	set_256_lut(op7, "lut1")
				110
				111	op_list = [op0, op1, op2, op3, op4, op5_2K, op6_2K, op7]
				112	sg = process(arch, op_list)
				113	orig_cmd_list = sg.high_level_command_stream
				114	sg.high_level_command_stream = orig_cmd_list
				115	lut.optimize_high_level_cmd_stream(sg, arch)
				116	cmd_list = sg.high_level_command_stream
				117	# Check that only the needed DMA commands are left
				118	expected_dma_ops = [op0, op1, op2, op5_2K, op6_2K, op7]
				119	for (cmd, op) in zip(cmd_list, expected_dma_ops):
				120	assert cmd.in_tensor == op.activation_lut
				121	# Check that lut0, lut1 and lut2 in op0, op1, op2 are stored on different addresses
				122	assert orig_cmd_list[0].out_tensor.address != orig_cmd_list[1].out_tensor.address
				123	assert orig_cmd_list[0].out_tensor.address != orig_cmd_list[2].out_tensor.address
				124	assert orig_cmd_list[1].out_tensor.address != orig_cmd_list[2].out_tensor.address
				125	# Check that lut1 in op1 and op3 have same address
				126	assert orig_cmd_list[1].out_tensor.address == orig_cmd_list[3].out_tensor.address
				127	# Check that lut2 in op2 and op4 have same address
				128	assert orig_cmd_list[2].out_tensor.address == orig_cmd_list[4].out_tensor.address
				129	# Check that lut-s for 16 bit (op5 and op6) are stored on same address
				130	assert orig_cmd_list[5].out_tensor.address == orig_cmd_list[6].out_tensor.address
				131
				132
				133	def test_optimize_high_level_cmd_stream_1K():
				134	# Tests lut.optimize_high_level_cmd_stream, blending 256 and 1K luts
				135	arch = testutil.create_arch()
				136	shape = [1, 1, 1, 1]
				137	# u8 LUT op, should lead to DMA
				138	op0 = testutil.create_elemwise_op("AddAct", "op0", shape, shape, shape)
				139	set_256_lut(op0, "lut0")
				140	# u8 LUT op, should lead to DMA
				141	op1 = testutil.create_elemwise_op("AddAct", "op1", shape, shape, shape)
				142	set_256_lut(op1, "lut1")
				143	# 1K LUT op with different LUT, should lead to DMA
				144	op2_1K = testutil.create_elemwise_op("AddAct", "op2", shape, shape, shape)
				145	set_1K_lut(op2_1K, "lut2")
				146	# u8 LUT op with same LUT as in op1, should not lead to DMA
				147	op3 = testutil.create_elemwise_op("AddAct", "op3", shape, shape, shape)
				148	set_256_lut(op3, "lut1")
				149	# 1K LUT op with same LUT as in op2, should not lead to DMA
				150	op4_1K = testutil.create_elemwise_op("AddAct", "op4", shape, shape, shape)
				151	set_1K_lut(op4_1K, "lut2")
				152	# 1K LUT op, should lead to DMA, and will overwrite lut2
				153	op5_2K = testutil.create_elemwise_op("AddAct", "op5", shape, shape, shape)
				154	set_1K_lut(op5_2K, "lut5")
				155	# u8 LUT op, lut0 should still be present, should not lead to DMA
				156	op6 = testutil.create_elemwise_op("AddAct", "op6", shape, shape, shape)
				157	set_256_lut(op6, "lut0")
				158	# 1K LUT op with same LUT as in op2, should lead to DMA
				159	op7 = testutil.create_elemwise_op("AddAct", "op7", shape, shape, shape)
				160	set_1K_lut(op7, "lut2")
				161
				162	op_list = [op0, op1, op2_1K, op3, op4_1K, op5_2K, op6, op7]
				163	sg = process(arch, op_list)
				164	orig_cmd_list = sg.high_level_command_stream
				165	sg.high_level_command_stream = orig_cmd_list
				166	lut.optimize_high_level_cmd_stream(sg, arch)
				167	cmd_list = sg.high_level_command_stream
				168	# Check that only the needed DMA commands are left
				169	expected_dma_ops = [op0, op1, op2_1K, op5_2K, op7]
				170	for (cmd, op) in zip(cmd_list, expected_dma_ops):
				171	assert cmd.in_tensor == op.activation_lut
				172	# Check that lut0, lut1 and lut2 in op0, op1, op2 are stored on different addresses
				173	assert orig_cmd_list[0].out_tensor.address != orig_cmd_list[1].out_tensor.address
				174	assert orig_cmd_list[0].out_tensor.address != orig_cmd_list[2].out_tensor.address
				175	assert orig_cmd_list[1].out_tensor.address != orig_cmd_list[2].out_tensor.address
				176	# Check that lut1 in op1 and op3 have same address
				177	assert orig_cmd_list[1].out_tensor.address == orig_cmd_list[3].out_tensor.address
				178	# Check that lut2 in op2 and op4 and op7 have same address
				179	assert orig_cmd_list[2].out_tensor.address == orig_cmd_list[4].out_tensor.address
				180	assert orig_cmd_list[2].out_tensor.address == orig_cmd_list[7].out_tensor.address