Blame - ethosu/vela/test/test_new_performance.py - ml/ethos-u/ethos-u-vela

blob: a35905b318aa97ce0d01fddf18750b4b939b557f [file] [log] [blame]

Tim Hall	d8339a7	2021-05-27 18:49:40 +0100	[diff] [blame]	1	# Copyright (C) 2021 Arm Limited or its affiliates. All rights reserved.
				2	#
				3	# SPDX-License-Identifier: Apache-2.0
				4	#
				5	# Licensed under the Apache License, Version 2.0 (the License); you may
				6	# not use this file except in compliance with the License.
				7	# You may obtain a copy of the License at
				8	#
				9	# www.apache.org/licenses/LICENSE-2.0
				10	#
				11	# Unless required by applicable law or agreed to in writing, software
				12	# distributed under the License is distributed on an AS IS BASIS, WITHOUT
				13	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	# See the License for the specific language governing permissions and
				15	# limitations under the License.
				16	#
				17	# Description:
				18	# Contains unit tests for new performance estimation code
				19	from ethosu.vela import architecture_allocator
				20	from ethosu.vela import architecture_features
				21	from ethosu.vela import npu_performance
				22	from ethosu.vela import operation
				23	from ethosu.vela.architecture_features import resampling_mode
				24	from ethosu.vela.shape4d import Shape4D
				25	from ethosu.vela.shape4d import VolumeIterator
				26	from ethosu.vela.tensor import MemArea
				27
				28
				29	def test_new_performance():
				30	arch = architecture_features.create_default_arch(architecture_features.Accelerator.Ethos_U55_128)
				31
				32	query = npu_performance.PerformanceQuery(architecture_features.NpuBlockType.ConvolutionMxN)
				33	query.ifm_shape = Shape4D(1, 16, 16, 16)
				34	query.ifm2_shape = Shape4D()
				35	query.ifm_memory_area = MemArea.Sram
				36	query.ifm_bits = 8
				37	query.ofm_shape = Shape4D(1, 16, 16, 1)
				38	query.ofm_memory_area = MemArea.Sram
				39	query.ofm_bits = 8
				40	query.const_shape = Shape4D(1, 1, 1, query.ofm_shape.depth)
				41	query.const_memory_area = MemArea.OffChipFlash
				42	query.kernel = operation.Kernel(1, 1, 1, 1, 1, 1, valid_padding=False)
				43	query.config = architecture_allocator.find_block_config(
				44	arch,
				45	architecture_features.NpuBlockType.ConvolutionMxN,
				46	Shape4D(1, 16, 16, 1),
				47	query.ifm_shape,
				48	None,
				49	False,
				50	8,
				51	query.kernel,
				52	0,
				53	False,
				54	resampling_mode.NONE,
				55	)
				56
				57	print("For block Config = {}".format(query.config))
				58
				59	# -s to display output
				60	for sub_shape in [Shape4D(1, 4, 8, 16), Shape4D(1, 8, 8, 16), Shape4D(1, 8, 16, 16), query.ofm_shape]:
				61	print("\n-- Subshape = {}".format(sub_shape))
				62	iterator = VolumeIterator(query.ofm_shape, sub_shape)
				63	a = npu_performance.ElementAccess()
				64	c = npu_performance.CycleCost()
				65	for pos, shape in iterator:
				66	print("\tpos = {} shape = {}".format(pos, shape))
				67	ta, tc = npu_performance.measure_performance_cost(
				68	arch, operation.Op.Conv2D, operation.Op.Relu, query, pos, shape
				69	)
				70	a += ta
				71	c += tc
				72	print("\t\taccess: {}".format(ta))
				73	print("\t\tcycles: {}".format(tc))
				74	print("\tAccess: {}".format(a))
				75	print("\tCycles: {}".format(c))
				76	assert c.op_macs == 4096
				77
				78	assert True # Any successful result is okay