Rickard Bolin | bc6ee58 | 2022-11-04 08:24:29 +0000 | [diff] [blame] | 1 | # SPDX-FileCopyrightText: Copyright 2020-2022 Arm Limited and/or its affiliates <open-source-office@arm.com> |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 2 | # |
| 3 | # SPDX-License-Identifier: Apache-2.0 |
| 4 | # |
| 5 | # Licensed under the Apache License, Version 2.0 (the License); you may |
| 6 | # not use this file except in compliance with the License. |
| 7 | # You may obtain a copy of the License at |
| 8 | # |
| 9 | # www.apache.org/licenses/LICENSE-2.0 |
| 10 | # |
| 11 | # Unless required by applicable law or agreed to in writing, software |
| 12 | # distributed under the License is distributed on an AS IS BASIS, WITHOUT |
| 13 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | # See the License for the specific language governing permissions and |
| 15 | # limitations under the License. |
Rickard Bolin | bc6ee58 | 2022-11-04 08:24:29 +0000 | [diff] [blame] | 16 | # |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 17 | # Description: |
| 18 | # Creates driver actions that are embedded in the custom operator payload. |
Louis Verhaard | 5207830 | 2020-11-18 13:35:06 +0100 | [diff] [blame] | 19 | import struct |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 20 | from typing import List |
Diego Russo | ea6111a | 2020-04-14 18:41:58 +0100 | [diff] [blame] | 21 | |
| 22 | import numpy as np |
| 23 | |
Louis Verhaard | 5207830 | 2020-11-18 13:35:06 +0100 | [diff] [blame] | 24 | from .api import NpuAccelerator |
| 25 | from .architecture_features import Accelerator |
| 26 | from .architecture_features import ArchitectureFeatures |
| 27 | from .architecture_features import create_default_arch |
erik.andersson@arm.com | 1878dab | 2021-03-16 09:40:24 +0100 | [diff] [blame] | 28 | from .errors import VelaError |
Diego Russo | e8a1045 | 2020-04-21 17:39:10 +0100 | [diff] [blame] | 29 | from .ethos_u55_regs.ethos_u55_regs import ARCH_VER |
| 30 | from .ethos_u55_regs.ethos_u55_regs import config_r |
| 31 | from .ethos_u55_regs.ethos_u55_regs import id_r |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 32 | |
| 33 | |
| 34 | class DACommands: |
| 35 | Reserved = 0x00 |
| 36 | Config = 0x01 |
| 37 | Config_PatchShift = 4 |
| 38 | CmdStream = 0x02 |
| 39 | ReadAPB = 0x03 |
| 40 | ReadAPB_CountShift = 12 |
| 41 | ReadAPB_IndexMask = (1 << ReadAPB_CountShift) - 1 |
| 42 | DumpSHRAM = 0x04 |
| 43 | NOP = 0x05 |
| 44 | |
| 45 | |
| 46 | def make_da_tag(id: int, reserved: int, param: int) -> int: |
| 47 | tag: int = id |
| 48 | tag |= reserved << 8 |
| 49 | tag |= param << 16 |
| 50 | return tag |
| 51 | |
| 52 | |
| 53 | def emit_fourcc(data: List[int], fourcc: str): |
Diego Russo | ea6111a | 2020-04-14 18:41:58 +0100 | [diff] [blame] | 54 | assert data is not None |
| 55 | assert fourcc is not None |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 56 | assert len(fourcc) == 4 |
| 57 | value: int = 0 |
| 58 | value = fourcc[0].encode()[0] |
| 59 | value |= fourcc[1].encode()[0] << 8 |
| 60 | value |= fourcc[2].encode()[0] << 16 |
| 61 | value |= fourcc[3].encode()[0] << 24 |
| 62 | data.append(value) |
| 63 | |
| 64 | |
| 65 | def build_id_word(): |
| 66 | arch_major_rev, arch_minor_rev, arch_patch_rev = (int(x) for x in ARCH_VER.split(".")) |
| 67 | n = id_r() |
| 68 | n.set_arch_major_rev(arch_major_rev) |
| 69 | n.set_arch_minor_rev(arch_minor_rev) |
| 70 | n.set_arch_patch_rev(arch_patch_rev) |
| 71 | return n.word |
| 72 | |
| 73 | |
| 74 | def build_config_word(arch): |
Tim Hall | f7e810a | 2020-06-25 15:04:31 +0100 | [diff] [blame] | 75 | macs_cc = arch.ncores * arch.config.macs |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 76 | log2_macs_cc = int(np.log2(macs_cc) + 0.5) |
Tim Hall | f7e810a | 2020-06-25 15:04:31 +0100 | [diff] [blame] | 77 | shram_size = arch.ncores * int(arch.shram_size_bytes / 1024) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 78 | n = config_r() |
Jonny Svärd | bdb1d6e | 2022-01-04 12:38:29 +0100 | [diff] [blame] | 79 | if arch.is_ethos_u65_system: |
| 80 | n.set_product(1) |
| 81 | else: |
| 82 | n.set_product(0) # U55 |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 83 | n.set_shram_size(shram_size) |
| 84 | n.set_cmd_stream_version(0) # may be incremented in the future |
| 85 | n.set_macs_per_cc(log2_macs_cc) |
| 86 | return n.word |
| 87 | |
| 88 | |
| 89 | def emit_config(data: List[int], rel: int, patch: int, arch): |
Diego Russo | ea6111a | 2020-04-14 18:41:58 +0100 | [diff] [blame] | 90 | assert data is not None |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 91 | data.append(make_da_tag(DACommands.Config, 0, (patch << DACommands.Config_PatchShift) | rel)) |
| 92 | data.append(build_config_word(arch)) |
| 93 | data.append(build_id_word()) |
| 94 | |
| 95 | |
| 96 | def emit_cmd_stream_header(data: List[int], length: int): |
Diego Russo | ea6111a | 2020-04-14 18:41:58 +0100 | [diff] [blame] | 97 | assert data is not None |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 98 | # Insert NOPs to align start of command stream to 16 bytes |
| 99 | num_nops = 4 - ((len(data) + 1) % 4) |
| 100 | for _ in range(num_nops): |
| 101 | data.append(make_da_tag(DACommands.NOP, 0, 0)) |
| 102 | |
| 103 | # Use the reserved 8 bit as the length high |
| 104 | length_high = (length & 0x00FF0000) >> 16 |
| 105 | length_low = length & 0x0000FFFF |
| 106 | data.append(make_da_tag(DACommands.CmdStream, length_high, length_low)) |
| 107 | |
| 108 | |
| 109 | def emit_reg_read(data: List[int], reg_index: int, reg_count: int = 1): |
Diego Russo | ea6111a | 2020-04-14 18:41:58 +0100 | [diff] [blame] | 110 | assert data is not None |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 111 | assert reg_index >= 0 |
| 112 | assert reg_count >= 1 |
| 113 | payload: int = (reg_index & DACommands.ReadAPB_IndexMask) | ((reg_count << DACommands.ReadAPB_CountShift) - 1) |
| 114 | data.append(make_da_tag(DACommands.ReadAPB, 0, payload)) |
| 115 | |
| 116 | |
| 117 | def emit_dump_shram(data: List[int]): |
Diego Russo | ea6111a | 2020-04-14 18:41:58 +0100 | [diff] [blame] | 118 | assert data is not None |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 119 | data.append(make_da_tag(DACommands.DumpSHRAM, 0, 0)) |
Louis Verhaard | 5207830 | 2020-11-18 13:35:06 +0100 | [diff] [blame] | 120 | |
| 121 | |
| 122 | def create_driver_payload(register_command_stream: List[int], arch: ArchitectureFeatures) -> bytes: |
Jonas Ohlsson | d857507 | 2022-03-30 10:30:25 +0200 | [diff] [blame] | 123 | """Creates driver header and includes the given command""" |
Louis Verhaard | 5207830 | 2020-11-18 13:35:06 +0100 | [diff] [blame] | 124 | # Prepare driver actions for this command tensor |
Dwight Lidman | 9b43f84 | 2020-12-08 17:56:44 +0100 | [diff] [blame] | 125 | da_list: List[int] = [] |
Louis Verhaard | 5207830 | 2020-11-18 13:35:06 +0100 | [diff] [blame] | 126 | emit_fourcc(da_list, "COP1") |
| 127 | emit_config(da_list, 0, 1, arch) |
erik.andersson@arm.com | 1878dab | 2021-03-16 09:40:24 +0100 | [diff] [blame] | 128 | if len(register_command_stream) >= 1 << 24: |
| 129 | raise VelaError( |
| 130 | "The command stream exceeds the driver size limit of 64 MiB. " |
| 131 | f"The current stream size is {4*len(register_command_stream)/2**20:.2F} MiB" |
| 132 | ) |
| 133 | |
Louis Verhaard | 5207830 | 2020-11-18 13:35:06 +0100 | [diff] [blame] | 134 | emit_cmd_stream_header(da_list, len(register_command_stream)) |
| 135 | |
| 136 | # Append command stream words |
| 137 | da_list.extend(register_command_stream) |
| 138 | # Convert to bytes, in little endian format |
| 139 | return struct.pack("<{0}I".format(len(da_list)), *da_list) |
| 140 | |
| 141 | |
| 142 | def npu_create_driver_payload(register_command_stream: List[int], accelerator: NpuAccelerator) -> bytes: |
| 143 | """Internal implementation of the public facing API to create driver payload""" |
| 144 | arch = create_default_arch(Accelerator.from_npu_accelerator(accelerator)) |
| 145 | return create_driver_payload(register_command_stream, arch) |