Add driver async API

Add an asynchronous API to the driver. The current synchronous API
is now using the new async API internally. The main new functions
are ethosu_invoke_async() and ethosu_wait(). Every successfull call
to ethosu_invoke_async() must be followed by a call to ethosu_wait()
to get the status of the inference. The wait function can be called
in a blocking or non-blocking mode by specifying the `block`
argument to true/false. The regular synchronous invoke function is
implemented as a invoke_async followed by a wait(block=true) call.

Short sommary of changes:
 - Add an internal ethosu_job struct to keep track of
   inference data and job state.
 - Use async API in blocking mode for normal flow
 - Change default semaphore implementation to binary type
 - Move error prints out of interrupt context
 - Move ethosu_inference_begin() callback to right before HW invoke
 - Always call ethosu_inference_end() callback, even in case of errors
 - On NPU error, do not keep NPU powered after being reset

Change-Id: If4c3c46e3c6732a669e17251bd848dea5765a490
diff --git a/include/ethosu_driver.h b/include/ethosu_driver.h
index ff8d1eb..bf6a578 100644
--- a/include/ethosu_driver.h
+++ b/include/ethosu_driver.h
@@ -48,17 +48,35 @@
 // Forward declare
 struct ethosu_device;
 
+enum ethosu_job_state
+{
+    ETHOSU_JOB_IDLE = 0,
+    ETHOSU_JOB_RUNNING,
+    ETHOSU_JOB_DONE
+};
+
+struct ethosu_job
+{
+    volatile enum ethosu_job_state state;
+    const void *custom_data_ptr;
+    int custom_data_size;
+    const uint64_t *base_addr;
+    const size_t *base_addr_size;
+    int num_base_addr;
+    void *user_arg;
+};
+
 struct ethosu_driver
 {
     struct ethosu_device *dev;
     struct ethosu_driver *next;
+    struct ethosu_job job;
     void *semaphore;
     uint64_t fast_memory;
     size_t fast_memory_size;
     bool status_error;
     bool dev_power_always_on;
     bool reserved;
-    volatile bool irq_triggered;
     uint8_t clock_request;
     uint8_t power_request;
 };
@@ -159,6 +177,33 @@
     ethosu_invoke_v3(drv, custom_data_ptr, custom_data_size, base_addr, base_addr_size, num_base_addr, 0)
 
 /**
+ * Invoke Vela command stream using async interface.
+ * Must be followed by call(s) to ethosu_wait() upon successful return.
+ * Returns
+ *   -1 on error
+ *    0 on success
+ */
+int ethosu_invoke_async(struct ethosu_driver *drv,
+                        const void *custom_data_ptr,
+                        const int custom_data_size,
+                        const uint64_t *base_addr,
+                        const size_t *base_addr_size,
+                        const int num_base_addr,
+                        void *user_arg);
+
+/**
+ * Wait for inference to complete (block=true)
+ * Poll status or finish up if inference is complete (block=false)
+ * (This function is only intended to be used in conjuction with ethosu_invoke_async)
+ * Returns
+ *    1 on inference running (only for block=false)
+ *    0 on inference success
+ *   -1 on inference error
+ *   -2 on inference not invoked
+ */
+int ethosu_wait(struct ethosu_driver *drv, bool block);
+
+/**
  * Set Ethos-U power mode.
  */
 void ethosu_set_power_mode(struct ethosu_driver *drv, bool always_on);
diff --git a/src/ethosu_device.h b/src/ethosu_device.h
index cc9ea8b..0b45fd6 100644
--- a/src/ethosu_device.h
+++ b/src/ethosu_device.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited. All rights reserved.
+ * Copyright (c) 2019-2022 Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -81,13 +81,17 @@
  *                             - All input tensors
  *                             - All output tensors
  * \param[in] num_base_addr    Number of base addresses.
- * \return                     \ref ethosu_error_codes
  */
-enum ethosu_error_codes ethosu_dev_run_command_stream(struct ethosu_device *dev,
-                                                      const uint8_t *cmd_stream_ptr,
-                                                      uint32_t cms_length,
-                                                      const uint64_t *base_addr,
-                                                      int num_base_addr);
+void ethosu_dev_run_command_stream(struct ethosu_device *dev,
+                                   const uint8_t *cmd_stream_ptr,
+                                   uint32_t cms_length,
+                                   const uint64_t *base_addr,
+                                   int num_base_addr);
+
+/**
+ * Print information on NPU error status
+ */
+void ethosu_dev_print_err_status(struct ethosu_device *dev);
 
 /**
  *  Interrupt handler on device layer
diff --git a/src/ethosu_device_u55_u65.c b/src/ethosu_device_u55_u65.c
index 9a92f63..31379fc 100644
--- a/src/ethosu_device_u55_u65.c
+++ b/src/ethosu_device_u55_u65.c
@@ -133,11 +133,11 @@
     return ETHOSU_SUCCESS;
 }
 
-enum ethosu_error_codes ethosu_dev_run_command_stream(struct ethosu_device *dev,
-                                                      const uint8_t *cmd_stream_ptr,
-                                                      uint32_t cms_length,
-                                                      const uint64_t *base_addr,
-                                                      int num_base_addr)
+void ethosu_dev_run_command_stream(struct ethosu_device *dev,
+                                   const uint8_t *cmd_stream_ptr,
+                                   uint32_t cms_length,
+                                   const uint64_t *base_addr,
+                                   int num_base_addr)
 {
     assert(num_base_addr <= NPU_REG_BASEP_ARRLEN);
 
@@ -168,8 +168,14 @@
 
     dev->reg->CMD.word = cmd.word;
     LOG_DEBUG("CMD=0x%08x", cmd.word);
+}
 
-    return ETHOSU_SUCCESS;
+void ethosu_dev_print_err_status(struct ethosu_device *dev)
+{
+    LOG_ERR("NPU status=0x%08" PRIx32 ", qread=%" PRIu32 ", cmd_end_reached=%d",
+            dev->reg->STATUS.word,
+            dev->reg->QREAD.word,
+            dev->reg->STATUS.cmd_end_reached);
 }
 
 bool ethosu_dev_handle_interrupt(struct ethosu_device *dev)
@@ -185,12 +191,6 @@
     if (dev->reg->STATUS.bus_status || dev->reg->STATUS.cmd_parse_error || dev->reg->STATUS.wd_fault ||
         dev->reg->STATUS.ecc_fault || !dev->reg->STATUS.cmd_end_reached)
     {
-        LOG_ERR("NPU fault. status=0x%08" PRIx32 ", qread=%" PRIu32 ", cmd_end_reached=%d",
-                dev->reg->STATUS.word,
-                dev->reg->QREAD.word,
-                dev->reg->STATUS.cmd_end_reached);
-        ethosu_dev_soft_reset(dev);
-        ethosu_dev_set_clock_and_power(dev, ETHOSU_CLOCK_Q_UNCHANGED, ETHOSU_POWER_Q_DISABLE);
         return false;
     }
 
diff --git a/src/ethosu_driver.c b/src/ethosu_driver.c
index 866b94c..9175991 100644
--- a/src/ethosu_driver.c
+++ b/src/ethosu_driver.c
@@ -147,7 +147,7 @@
 
 struct ethosu_semaphore_t
 {
-    int count;
+    uint8_t count;
 };
 
 static void *ethosu_mutex;
@@ -177,7 +177,7 @@
 void *__attribute__((weak)) ethosu_semaphore_create(void)
 {
     struct ethosu_semaphore_t *sem = malloc(sizeof(*sem));
-    sem->count                     = 1;
+    sem->count                     = 0;
     return sem;
 }
 
@@ -190,18 +190,18 @@
 void __attribute__((weak)) ethosu_semaphore_take(void *sem)
 {
     struct ethosu_semaphore_t *s = sem;
-    while (s->count <= 0)
+    while (s->count == 0)
     {
         __WFE();
     }
-    s->count--;
+    s->count = 0;
 }
 
 // Baremetal simulation of giving a semaphore and waking up processes using intrinsics
 void __attribute__((weak)) ethosu_semaphore_give(void *sem)
 {
     struct ethosu_semaphore_t *s = sem;
-    s->count++;
+    s->count                     = 1;
     __SEV();
 }
 
@@ -224,20 +224,6 @@
 /******************************************************************************
  * Static functions
  ******************************************************************************/
-static inline void wait_for_irq(struct ethosu_driver *drv)
-{
-    while (1)
-    {
-        if (drv->irq_triggered)
-        {
-            drv->irq_triggered = false;
-            break;
-        }
-
-        ethosu_semaphore_take(drv->semaphore);
-    }
-}
-
 static void ethosu_register_driver(struct ethosu_driver *drv)
 {
     // Register driver as new HEAD of list
@@ -290,6 +276,11 @@
     return NULL;
 }
 
+static void ethosu_reset_job(struct ethosu_driver *drv)
+{
+    memset(&drv->job, 0, sizeof(struct ethosu_job));
+}
+
 static int handle_optimizer_config(struct ethosu_driver *drv, struct opt_cfg_s *opt_cfg_p)
 {
     LOG_INFO("Optimizer release nbr: %d patch: %d", opt_cfg_p->da_data.rel_nbr, opt_cfg_p->da_data.patch_nbr);
@@ -302,12 +293,7 @@
     return 0;
 }
 
-static int handle_command_stream(struct ethosu_driver *drv,
-                                 const uint8_t *cmd_stream,
-                                 const int cms_length,
-                                 const uint64_t *base_addr,
-                                 const size_t *base_addr_size,
-                                 const int num_base_addr)
+static int handle_command_stream(struct ethosu_driver *drv, const uint8_t *cmd_stream, const int cms_length)
 {
     uint32_t cms_bytes       = cms_length * BYTES_IN_32_BITS;
     ptrdiff_t cmd_stream_ptr = (ptrdiff_t)cmd_stream;
@@ -321,27 +307,28 @@
     }
 
     // Verify 16 byte alignment for base address'
-    for (int i = 0; i < num_base_addr; i++)
+    for (int i = 0; i < drv->job.num_base_addr; i++)
     {
-        if (0 != (base_addr[i] & MASK_16_BYTE_ALIGN))
+        if (0 != (drv->job.base_addr[i] & MASK_16_BYTE_ALIGN))
         {
-            LOG_ERR("Base addr %d: 0x%llx not aligned to 16 bytes", i, base_addr[i]);
+            LOG_ERR("Base addr %d: 0x%llx not aligned to 16 bytes", i, drv->job.base_addr[i]);
             return -1;
         }
     }
 
-    /* Flush the cache if available on CPU.
-     * The upcasting to uin32_t* is ok since the pointer never is dereferenced.
-     * The base_addr_size is null if invoking from prior to invoke_V2, in that case
-     * the whole cache is being flushed.
-     */
+    drv->job.state = ETHOSU_JOB_RUNNING;
 
-    if (base_addr_size != NULL)
+    // Flush the cache if available on CPU.
+    // The upcasting to uin32_t* is ok since the pointer never is dereferenced.
+    // The base_addr_size is null if invoking from prior to invoke_V2, in that case
+    // the whole cache is being flushed.
+
+    if (drv->job.base_addr_size != NULL)
     {
         ethosu_flush_dcache((uint32_t *)cmd_stream_ptr, cms_bytes);
-        for (int i = 0; i < num_base_addr; i++)
+        for (int i = 0; i < drv->job.num_base_addr; i++)
         {
-            ethosu_flush_dcache((uint32_t *)(uintptr_t)base_addr[i], base_addr_size[i]);
+            ethosu_flush_dcache((uint32_t *)(uintptr_t)drv->job.base_addr[i], drv->job.base_addr_size[i]);
         }
     }
     else
@@ -349,31 +336,23 @@
         ethosu_flush_dcache(NULL, 0);
     }
 
+    // Request power gating disabled during inference run
+    if (!drv->dev_power_always_on)
+    {
+        // Will soft reset if security state or privilege level needs changing.
+        // Also note that any configurations done in the NPU prior to this point
+        // are lost in case power gating has been in effect.
+        set_clock_and_power_request(drv, ETHOSU_INFERENCE_REQUEST, ETHOSU_CLOCK_Q_ENABLE, ETHOSU_POWER_Q_DISABLE);
+
+        // Make sure AXI settings are applied
+        ethosu_dev_axi_init(drv->dev);
+    }
+
+    // Inference begin callback
+    ethosu_inference_begin(drv, drv->job.user_arg);
+
     // Execute the command stream
-    if (ETHOSU_SUCCESS != ethosu_dev_run_command_stream(drv->dev, cmd_stream, cms_bytes, base_addr, num_base_addr))
-    {
-        return -1;
-    }
-
-    wait_for_irq(drv);
-
-    // Check if any error occured
-    if (drv->status_error)
-    {
-        return -1;
-    }
-
-    if (base_addr_size != NULL)
-    {
-        for (int i = 0; i < num_base_addr; i++)
-        {
-            ethosu_invalidate_dcache((uint32_t *)(uintptr_t)base_addr[i], base_addr_size[i]);
-        }
-    }
-    else
-    {
-        ethosu_invalidate_dcache(NULL, 0);
-    }
+    ethosu_dev_run_command_stream(drv->dev, cmd_stream, cms_bytes, drv->job.base_addr, drv->job.num_base_addr);
 
     return 0;
 }
@@ -385,7 +364,7 @@
 {
     LOG_DEBUG("Got interrupt from Ethos-U");
 
-    drv->irq_triggered = true;
+    drv->job.state = ETHOSU_JOB_DONE;
     if (!ethosu_dev_handle_interrupt(drv->dev))
     {
         drv->status_error = true;
@@ -424,7 +403,6 @@
 
     drv->fast_memory      = (uint32_t)fast_memory;
     drv->fast_memory_size = fast_memory_size;
-    drv->irq_triggered    = false;
 
     // Initialize the device and set requested security state and privilege mode
     drv->dev = ethosu_dev_init(base_address, secure_enable, privilege_enable);
@@ -476,33 +454,129 @@
     ethosu_dev_get_hw_info(drv->dev, hw);
 }
 
-int ethosu_invoke_v3(struct ethosu_driver *drv,
-                     const void *custom_data_ptr,
-                     const int custom_data_size,
-                     const uint64_t *base_addr,
-                     const size_t *base_addr_size,
-                     const int num_base_addr,
-                     void *user_arg)
+int ethosu_wait(struct ethosu_driver *drv, bool block)
 {
+    int ret = 0;
+
+    switch (drv->job.state)
+    {
+    case ETHOSU_JOB_IDLE:
+        LOG_ERR("Inference job not running...");
+        ret = -2;
+        break;
+    case ETHOSU_JOB_RUNNING:
+        if (!block)
+        {
+            // Inference still running, do not block
+            ret = 1;
+            break;
+        }
+        // fall through
+    case ETHOSU_JOB_DONE:
+        // Wait for interrupt in blocking mode. In non-blocking mode
+        // the interrupt has already triggered
+        ethosu_semaphore_take(drv->semaphore);
+
+        // Inference done callback
+        ethosu_inference_end(drv, drv->job.user_arg);
+
+        // Check NPU and interrupt status
+        if (drv->status_error)
+        {
+            LOG_ERR("NPU error(s) occured during inference.");
+            ethosu_dev_print_err_status(drv->dev);
+
+            // Reset the NPU
+            (void)ethosu_dev_soft_reset(drv->dev);
+            // NPU is no longer in error state
+            drv->status_error = false;
+
+            ret = -1;
+        }
+
+        // Clear the clock/power gating disable request
+        if (!drv->dev_power_always_on)
+        {
+            // NOTE: Other requesters (like PMU) can be active, keeping
+            // clock/power gating disabled until no requests remain.
+            set_clock_and_power_request(drv, ETHOSU_INFERENCE_REQUEST, ETHOSU_CLOCK_Q_ENABLE, ETHOSU_POWER_Q_ENABLE);
+        }
+
+        if (ret == 0)
+        {
+            // Invalidate cache
+            if (drv->job.base_addr_size != NULL)
+            {
+                for (int i = 0; i < drv->job.num_base_addr; i++)
+                {
+                    ethosu_invalidate_dcache((uint32_t *)(uintptr_t)drv->job.base_addr[i], drv->job.base_addr_size[i]);
+                }
+            }
+            else
+            {
+                ethosu_invalidate_dcache(NULL, 0);
+            }
+
+            LOG_DEBUG("Inference finished successfully...");
+        }
+
+        // Reset internal job (state resets to IDLE)
+        ethosu_reset_job(drv);
+        break;
+
+    default:
+        LOG_ERR("Unexpected job state");
+        ethosu_reset_job(drv);
+        ret = -1;
+        break;
+    }
+
+    // Return inference job status
+    return ret;
+}
+
+int ethosu_invoke_async(struct ethosu_driver *drv,
+                        const void *custom_data_ptr,
+                        const int custom_data_size,
+                        const uint64_t *base_addr,
+                        const size_t *base_addr_size,
+                        const int num_base_addr,
+                        void *user_arg)
+{
+
     const struct cop_data_s *data_ptr = custom_data_ptr;
     const struct cop_data_s *data_end = (struct cop_data_s *)((ptrdiff_t)custom_data_ptr + custom_data_size);
-    int return_code                   = 0;
+
+    // Make sure an inference is not already running
+    if (drv->job.state != ETHOSU_JOB_IDLE)
+    {
+        LOG_ERR("Inference already running, or waiting to be cleared...");
+        return -1;
+    }
+
+    drv->job.state            = ETHOSU_JOB_IDLE;
+    drv->job.custom_data_ptr  = custom_data_ptr;
+    drv->job.custom_data_size = custom_data_size;
+    drv->job.base_addr        = base_addr;
+    drv->job.base_addr_size   = base_addr_size;
+    drv->job.num_base_addr    = num_base_addr;
+    drv->job.user_arg         = user_arg;
 
     // First word in custom_data_ptr should contain "Custom Operator Payload 1"
     if (data_ptr->word != ETHOSU_FOURCC)
     {
         LOG_ERR("Custom Operator Payload: %" PRIu32 " is not correct, expected %x", data_ptr->word, ETHOSU_FOURCC);
-        return -1;
+        goto err;
     }
 
     // Custom data length must be a multiple of 32 bits
     if ((custom_data_size % BYTES_IN_32_BITS) != 0)
     {
         LOG_ERR("custom_data_size=0x%x not a multiple of 4", custom_data_size);
-        return -1;
+        goto err;
     }
 
-    ++data_ptr;
+    data_ptr++;
 
     // Adjust base address to fast memory area
     if (drv->fast_memory != 0 && num_base_addr >= FAST_MEMORY_BASE_ADDR_INDEX)
@@ -514,53 +588,39 @@
             LOG_ERR("Fast memory area too small. fast_memory_size=%u, base_addr_size=%u",
                     drv->fast_memory_size,
                     base_addr_size[FAST_MEMORY_BASE_ADDR_INDEX]);
-            return -1;
+            goto err;
         }
 
         *fast_memory = drv->fast_memory;
     }
 
-    // NPU might have lost power and thus its settings and state
-    if (!drv->dev_power_always_on)
-    {
-        // Set power ON during the inference. Will soft reset if security state or
-        // privilege level needs changing
-        set_clock_and_power_request(drv, ETHOSU_INFERENCE_REQUEST, ETHOSU_CLOCK_Q_ENABLE, ETHOSU_POWER_Q_DISABLE);
-
-        // Make sure AXI settings are applied
-        ethosu_dev_axi_init(drv->dev);
-    }
-
     drv->status_error = false;
 
-    ethosu_inference_begin(drv, user_arg);
-
+    // Parse Custom Operator Payload data
     while (data_ptr < data_end)
     {
-        int ret = 0;
         switch (data_ptr->driver_action_command)
         {
         case OPTIMIZER_CONFIG:
             LOG_DEBUG("OPTIMIZER_CONFIG");
             struct opt_cfg_s *opt_cfg_p = (struct opt_cfg_s *)data_ptr;
 
-            ret = handle_optimizer_config(drv, opt_cfg_p);
+            if (handle_optimizer_config(drv, opt_cfg_p) < 0)
+            {
+                goto err;
+            }
             data_ptr += DRIVER_ACTION_LENGTH_32_BIT_WORD + OPTIMIZER_CONFIG_LENGTH_32_BIT_WORD;
             break;
         case COMMAND_STREAM:
+            // Vela only supports putting one COMMAND_STREAM per op
             LOG_DEBUG("COMMAND_STREAM");
             void *command_stream = (uint8_t *)(data_ptr) + sizeof(struct cop_data_s);
             int cms_length       = (data_ptr->reserved << 16) | data_ptr->length;
 
-            // It is safe to clear this flag without atomic, because npu is not running.
-            drv->irq_triggered = false;
-
-            ret = handle_command_stream(drv, command_stream, cms_length, base_addr, base_addr_size, num_base_addr);
-            if (ret < 0)
+            if (handle_command_stream(drv, command_stream, cms_length) < 0)
             {
-                LOG_ERR("Inference failed.");
+                goto err;
             }
-
             data_ptr += DRIVER_ACTION_LENGTH_32_BIT_WORD + cms_length;
             break;
         case NOP:
@@ -569,42 +629,58 @@
             break;
         default:
             LOG_ERR("UNSUPPORTED driver_action_command: %d", data_ptr->driver_action_command);
-            ret = -1;
-            break;
-        }
-        if (ret != 0)
-        {
-            return_code = -1;
+            goto err;
             break;
         }
     }
 
-    ethosu_inference_end(drv, user_arg);
+    return 0;
+err:
+    LOG_ERR("Failed to invoke inference.");
+    ethosu_reset_job(drv);
+    return -1;
+}
 
-    if (!drv->status_error && !drv->dev_power_always_on)
+int ethosu_invoke_v3(struct ethosu_driver *drv,
+                     const void *custom_data_ptr,
+                     const int custom_data_size,
+                     const uint64_t *base_addr,
+                     const size_t *base_addr_size,
+                     const int num_base_addr,
+                     void *user_arg)
+{
+    if (ethosu_invoke_async(
+            drv, custom_data_ptr, custom_data_size, base_addr, base_addr_size, num_base_addr, user_arg) < 0)
     {
-        set_clock_and_power_request(drv, ETHOSU_INFERENCE_REQUEST, ETHOSU_CLOCK_Q_ENABLE, ETHOSU_POWER_Q_ENABLE);
+        return -1;
     }
 
-    return return_code;
+    return ethosu_wait(drv, true);
 }
 
 void ethosu_set_power_mode(struct ethosu_driver *drv, bool always_on)
 {
     drv->dev_power_always_on = always_on;
 
-    if (always_on && ethosu_dev_verify_access_state(drv->dev) == false)
+    if (always_on)
     {
-        // Reset to enter correct security state/privilege mode
-        if (ethosu_dev_soft_reset(drv->dev) == false)
+        if (ethosu_dev_verify_access_state(drv->dev) == false)
         {
-            LOG_ERR("Failed to set power mode for Ethos-U");
-            return;
+            // Reset to enter correct security state/privilege mode
+            if (ethosu_dev_soft_reset(drv->dev) == false)
+            {
+                LOG_ERR("Failed to set power mode for Ethos-U");
+                return;
+            }
         }
-    }
 
-    ethosu_dev_set_clock_and_power(
-        drv->dev, ETHOSU_CLOCK_Q_UNCHANGED, always_on ? ETHOSU_POWER_Q_DISABLE : ETHOSU_POWER_Q_ENABLE);
+        ethosu_dev_set_clock_and_power(drv->dev, ETHOSU_CLOCK_Q_UNCHANGED, ETHOSU_POWER_Q_DISABLE);
+        ethosu_dev_axi_init(drv->dev);
+    }
+    else
+    {
+        ethosu_dev_set_clock_and_power(drv->dev, ETHOSU_CLOCK_Q_UNCHANGED, ETHOSU_POWER_Q_ENABLE);
+    }
 }
 
 struct ethosu_driver *ethosu_reserve_driver(void)
@@ -635,6 +711,21 @@
     ethosu_mutex_lock(ethosu_mutex);
     if (drv != NULL && drv->reserved)
     {
+        if (drv->job.state == ETHOSU_JOB_RUNNING || drv->job.state == ETHOSU_JOB_DONE)
+        {
+            // Give the inference one shot to complete or force kill the job
+            if (ethosu_wait(drv, false) == 1)
+            {
+                // Still running, soft reset the NPU and reset driver
+                ethosu_dev_soft_reset(drv->dev);
+                ethosu_reset_job(drv);
+                drv->status_error = false;
+                ethosu_semaphore_give(drv->semaphore);
+                (void)set_clock_and_power_request(
+                    drv, ETHOSU_INFERENCE_REQUEST, ETHOSU_CLOCK_Q_ENABLE, ETHOSU_POWER_Q_ENABLE);
+            }
+        }
+
         drv->reserved = false;
         LOG_DEBUG("NPU driver handle %p released", drv);
         ethosu_semaphore_give(ethosu_semaphore);