Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 1 | # Copyright (C) 2021 Arm Limited or its affiliates. All rights reserved. |
| 2 | # |
| 3 | # SPDX-License-Identifier: Apache-2.0 |
| 4 | # |
| 5 | # Licensed under the Apache License, Version 2.0 (the License); you may |
| 6 | # not use this file except in compliance with the License. |
| 7 | # You may obtain a copy of the License at |
| 8 | # |
| 9 | # www.apache.org/licenses/LICENSE-2.0 |
| 10 | # |
| 11 | # Unless required by applicable law or agreed to in writing, software |
| 12 | # distributed under the License is distributed on an AS IS BASIS, WITHOUT |
| 13 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | # See the License for the specific language governing permissions and |
| 15 | # limitations under the License. |
| 16 | # |
| 17 | # Description: |
| 18 | # Contains unit tests for new performance estimation code |
| 19 | from ethosu.vela import architecture_allocator |
| 20 | from ethosu.vela import architecture_features |
| 21 | from ethosu.vela import npu_performance |
| 22 | from ethosu.vela import operation |
| 23 | from ethosu.vela.architecture_features import resampling_mode |
| 24 | from ethosu.vela.shape4d import Shape4D |
| 25 | from ethosu.vela.shape4d import VolumeIterator |
| 26 | from ethosu.vela.tensor import MemArea |
| 27 | |
| 28 | |
| 29 | def test_new_performance(): |
| 30 | arch = architecture_features.create_default_arch(architecture_features.Accelerator.Ethos_U55_128) |
| 31 | |
| 32 | query = npu_performance.PerformanceQuery(architecture_features.NpuBlockType.ConvolutionMxN) |
| 33 | query.ifm_shape = Shape4D(1, 16, 16, 16) |
| 34 | query.ifm2_shape = Shape4D() |
| 35 | query.ifm_memory_area = MemArea.Sram |
| 36 | query.ifm_bits = 8 |
| 37 | query.ofm_shape = Shape4D(1, 16, 16, 1) |
| 38 | query.ofm_memory_area = MemArea.Sram |
| 39 | query.ofm_bits = 8 |
| 40 | query.const_shape = Shape4D(1, 1, 1, query.ofm_shape.depth) |
| 41 | query.const_memory_area = MemArea.OffChipFlash |
| 42 | query.kernel = operation.Kernel(1, 1, 1, 1, 1, 1, valid_padding=False) |
| 43 | query.config = architecture_allocator.find_block_config( |
| 44 | arch, |
| 45 | architecture_features.NpuBlockType.ConvolutionMxN, |
| 46 | Shape4D(1, 16, 16, 1), |
| 47 | query.ifm_shape, |
| 48 | None, |
| 49 | False, |
| 50 | 8, |
| 51 | query.kernel, |
| 52 | 0, |
| 53 | False, |
| 54 | resampling_mode.NONE, |
| 55 | ) |
| 56 | |
| 57 | print("For block Config = {}".format(query.config)) |
| 58 | |
| 59 | # -s to display output |
| 60 | for sub_shape in [Shape4D(1, 4, 8, 16), Shape4D(1, 8, 8, 16), Shape4D(1, 8, 16, 16), query.ofm_shape]: |
| 61 | print("\n-- Subshape = {}".format(sub_shape)) |
| 62 | iterator = VolumeIterator(query.ofm_shape, sub_shape) |
| 63 | a = npu_performance.ElementAccess() |
| 64 | c = npu_performance.CycleCost() |
| 65 | for pos, shape in iterator: |
| 66 | print("\tpos = {} shape = {}".format(pos, shape)) |
| 67 | ta, tc = npu_performance.measure_performance_cost( |
| 68 | arch, operation.Op.Conv2D, operation.Op.Relu, query, pos, shape |
| 69 | ) |
| 70 | a += ta |
| 71 | c += tc |
| 72 | print("\t\taccess: {}".format(ta)) |
| 73 | print("\t\tcycles: {}".format(tc)) |
| 74 | print("\tAccess: {}".format(a)) |
| 75 | print("\tCycles: {}".format(c)) |
| 76 | assert c.op_macs == 4096 |
| 77 | |
| 78 | assert True # Any successful result is okay |