blob: 8222ef1ddfcda7793447888d5b44c3c09d9d7e3d [file] [log] [blame]
Rickard Bolinbc6ee582022-11-04 08:24:29 +00001# SPDX-FileCopyrightText: Copyright 2021 Arm Limited and/or its affiliates <open-source-office@arm.com>
Tim Halld8339a72021-05-27 18:49:40 +01002#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17# Description:
18# Contains unit tests for new performance estimation code
19from ethosu.vela import architecture_allocator
20from ethosu.vela import architecture_features
21from ethosu.vela import npu_performance
22from ethosu.vela import operation
23from ethosu.vela.architecture_features import resampling_mode
24from ethosu.vela.shape4d import Shape4D
25from ethosu.vela.shape4d import VolumeIterator
26from ethosu.vela.tensor import MemArea
27
28
29def test_new_performance():
30 arch = architecture_features.create_default_arch(architecture_features.Accelerator.Ethos_U55_128)
31
32 query = npu_performance.PerformanceQuery(architecture_features.NpuBlockType.ConvolutionMxN)
33 query.ifm_shape = Shape4D(1, 16, 16, 16)
34 query.ifm2_shape = Shape4D()
35 query.ifm_memory_area = MemArea.Sram
36 query.ifm_bits = 8
37 query.ofm_shape = Shape4D(1, 16, 16, 1)
38 query.ofm_memory_area = MemArea.Sram
39 query.ofm_bits = 8
40 query.const_shape = Shape4D(1, 1, 1, query.ofm_shape.depth)
41 query.const_memory_area = MemArea.OffChipFlash
42 query.kernel = operation.Kernel(1, 1, 1, 1, 1, 1, valid_padding=False)
43 query.config = architecture_allocator.find_block_config(
44 arch,
45 architecture_features.NpuBlockType.ConvolutionMxN,
46 Shape4D(1, 16, 16, 1),
47 query.ifm_shape,
48 None,
49 False,
50 8,
51 query.kernel,
52 0,
53 False,
54 resampling_mode.NONE,
55 )
56
57 print("For block Config = {}".format(query.config))
58
59 # -s to display output
60 for sub_shape in [Shape4D(1, 4, 8, 16), Shape4D(1, 8, 8, 16), Shape4D(1, 8, 16, 16), query.ofm_shape]:
61 print("\n-- Subshape = {}".format(sub_shape))
62 iterator = VolumeIterator(query.ofm_shape, sub_shape)
63 a = npu_performance.ElementAccess()
64 c = npu_performance.CycleCost()
65 for pos, shape in iterator:
66 print("\tpos = {} shape = {}".format(pos, shape))
67 ta, tc = npu_performance.measure_performance_cost(
68 arch, operation.Op.Conv2D, operation.Op.Relu, query, pos, shape
69 )
70 a += ta
71 c += tc
72 print("\t\taccess: {}".format(ta))
73 print("\t\tcycles: {}".format(tc))
74 print("\tAccess: {}".format(a))
75 print("\tCycles: {}".format(c))
76 assert c.op_macs == 4096
77
78 assert True # Any successful result is okay