Kshitij Sisodia | f9efe0d | 2022-09-30 16:42:50 +0100 | [diff] [blame] | 1 | # SPDX-FileCopyrightText: Copyright 2021-2022 Arm Limited and/or its affiliates <open-source-office@arm.com> |
| 2 | # SPDX-License-Identifier: Apache-2.0 |
| 3 | import logging |
| 4 | import time |
| 5 | from typing import List |
| 6 | from .._generated.driver import Device, Inference, Network, Buffer, InferenceStatus_OK |
| 7 | |
| 8 | |
| 9 | def open_device(device: str) -> Device: |
| 10 | """Opens the Ethos-U device file descriptor. |
| 11 | |
| 12 | Args: |
| 13 | device: device name. |
| 14 | |
| 15 | Returns: |
| 16 | `Device`: Return the object that represents Ethos-U device file descriptor and manages Ethos-U device lifecycle. |
| 17 | """ |
| 18 | device = Device("/dev/{}".format(device)) |
| 19 | return device |
| 20 | |
| 21 | |
| 22 | def load_model(device: Device, model: str) -> Network: |
| 23 | """Create a `Network` when providing `Device` object and a string containing tflite file path. |
| 24 | |
| 25 | Args: |
| 26 | device: `Device` object that Ethos-U device file descriptor. |
| 27 | model: tflite model file path . |
| 28 | |
| 29 | Returns: |
| 30 | `Network`: Return the object that represent the neural __network file descriptor received from the Ethos-U device. |
| 31 | """ |
| 32 | logging.info("Creating network") |
| 33 | network_buffer = Buffer(device, model) |
| 34 | return Network(device, network_buffer) |
| 35 | |
| 36 | |
| 37 | def populate_buffers(input_data: List[bytearray], buffers: List[Buffer]): |
| 38 | """Set the feature maps associated memory buffer with the given data. |
| 39 | |
| 40 | Args: |
| 41 | input_data: list of input feature maps data. |
| 42 | buffers: list of already initialized ifm buffers. |
| 43 | Raises: |
| 44 | RuntimeError: if input data size is incorrect. |
| 45 | """ |
| 46 | number_of_buffers = len(buffers) |
| 47 | |
| 48 | if number_of_buffers != len(input_data): |
| 49 | raise RuntimeError("Incorrect number of inputs, expected {}, got {}.".format(number_of_buffers, len(input_data))) |
| 50 | |
| 51 | for index, (buffer, data_chunk) in enumerate(zip(buffers, input_data)): |
| 52 | cap = buffer.capacity() |
| 53 | logging.info("Copying data to a buffer {} of {} with size = {}".format(index + 1, number_of_buffers, cap)) |
| 54 | |
| 55 | if len(data_chunk) > cap: |
| 56 | raise RuntimeError("Buffer expects {} bytes, got {} bytes.".format(cap, len(data_chunk))) |
| 57 | buffer.resize(len(data_chunk)) |
| 58 | buffer.from_buffer(data_chunk) |
| 59 | |
| 60 | |
| 61 | def allocate_buffers(device: Device, dimensions: List) -> List[Buffer]: |
| 62 | """Returns output feature maps associated with memory buffers. |
| 63 | |
| 64 | Args: |
| 65 | device: `Device` object that Ethos-U device file descriptor. |
| 66 | dimensions: `Network` object that represent the neural __network file descriptor. |
| 67 | |
| 68 | Returns: |
| 69 | list: output feature map buffers. |
| 70 | """ |
| 71 | buffers = [] |
| 72 | total = len(dimensions) |
| 73 | for index, size in enumerate(dimensions): |
| 74 | logging.info("Allocating {} of {} buffer with size = {}".format(index + 1, total, size)) |
| 75 | buffer = Buffer(device, size) |
| 76 | buffers.append(buffer) |
| 77 | |
| 78 | return buffers |
| 79 | |
| 80 | |
| 81 | def get_results(inference: Inference) -> List[Buffer]: |
| 82 | """Retrieves output inference buffers |
| 83 | |
| 84 | Args: |
| 85 | inference: `Inference` object that represents the inference file descriptor. |
| 86 | |
| 87 | Returns: |
| 88 | list: list of buffer objects |
| 89 | Raises: |
| 90 | RuntimeError: in case of inference returned failure status. |
| 91 | |
| 92 | """ |
| 93 | if InferenceStatus_OK != inference.status(): |
| 94 | raise RuntimeError("Inference failed!") |
| 95 | else: |
| 96 | logging.info("Inference succeeded!") |
| 97 | return inference.getOfmBuffers() |
| 98 | |
| 99 | |
| 100 | class InferenceRunner: |
| 101 | """Helper class to execute inference.""" |
| 102 | |
| 103 | def __init__(self, device_name: str, model: str): |
| 104 | """Initialises instance to execute inferences on the given model with given device |
| 105 | |
| 106 | Device is opened with the name '/dev/<device_name>'. |
| 107 | Input/Output feature maps memory is allocated. |
| 108 | |
| 109 | Args: |
| 110 | device_name: npu device name |
| 111 | model: Tflite model file path |
| 112 | """ |
| 113 | self.__device = open_device(device_name) |
| 114 | if not InferenceRunner.wait_for_ping(self.__device, 3): |
| 115 | raise RuntimeError("Failed to communicate with device {}".format(device_name)) |
| 116 | |
| 117 | self.__network = load_model(self.__device, model) |
| 118 | # it is important to have a reference to current inference object to have access to OFMs. |
| 119 | self.__inf = None |
| 120 | self.__enabled_counters = () |
| 121 | |
| 122 | @staticmethod |
| 123 | def wait_for_ping(device: Device, count: int) -> bool: |
| 124 | if count == 0: |
| 125 | return False |
| 126 | try: |
| 127 | device.ping() |
| 128 | return True |
| 129 | except: |
| 130 | logging.info("Waiting for device: {}".format(count)) |
| 131 | time.sleep(0.5) |
| 132 | return InferenceRunner.wait_for_ping(device, count-1) |
| 133 | |
| 134 | def set_enabled_counters(self, enabled_counters: List[int] = ()): |
| 135 | """Set the enabled performance counter to use during inference. |
| 136 | |
| 137 | Args: |
| 138 | enabled_counters: list of integer counter to enable. |
| 139 | Raises: |
| 140 | ValueError: in case of inference returned failure status or the Pmu counter requests exceed the maximum supported. |
| 141 | """ |
| 142 | max_pmu_events = Inference.getMaxPmuEventCounters() |
| 143 | if len(enabled_counters) > max_pmu_events: |
| 144 | raise ValueError("Number of PMU counters requested exceed the maximum supported ({}).".format(max_pmu_events)) |
| 145 | self.__enabled_counters = enabled_counters |
| 146 | |
| 147 | def run(self, input_data: List[bytearray], timeout: int) -> List[Buffer]: |
| 148 | """Run a inference with the given input feature maps data. |
| 149 | |
| 150 | Args: |
| 151 | input_data: data list containing input data as binary arrays |
| 152 | timeout: inference timout in nano seconds |
| 153 | |
| 154 | Returns: |
| 155 | list: list of buffer objects |
| 156 | """ |
| 157 | ofms = allocate_buffers(self.__device, self.__network.getOfmDims()) |
| 158 | ifms = allocate_buffers(self.__device, self.__network.getIfmDims()) |
| 159 | populate_buffers(input_data, ifms) |
| 160 | |
| 161 | self.__inf = Inference( |
| 162 | self.__network, |
| 163 | ifms, |
| 164 | ofms, |
| 165 | self.__enabled_counters, |
| 166 | True) |
| 167 | |
| 168 | self.__inf.wait(int(timeout)) |
| 169 | return get_results(self.__inf) |
| 170 | |
| 171 | def get_pmu_counters(self) -> List: |
| 172 | """Return the PMU data for the inference run. |
| 173 | |
| 174 | Returns: |
| 175 | list: pairs of PMU type and cycle count value |
| 176 | """ |
| 177 | return list(zip(self.__enabled_counters, self.__inf.getPmuCounters())) |
| 178 | |
| 179 | def get_pmu_total_cycles(self) -> int: |
| 180 | """ |
| 181 | Returns the total cycle count, including idle cycles, as reported by |
| 182 | the PMU |
| 183 | |
| 184 | Returns: total cycle count |
| 185 | """ |
| 186 | return self.__inf.getCycleCounter() |