Added simplified driver setup

If NPU power is guaranteed always on parts of driver setup can be omitted to reduce the number of cycles required to setup the NPU drivers.
By enabling dev_power_always_on, the setup cycles are reduced by approx. ~50%. (4462->2238 in prologue, 1167->642 in epilogue)

Change-Id: I56d380c2571fedbc8888fb7c00fce0e4320f7fb7
diff --git a/include/ethosu_driver.h b/include/ethosu_driver.h
index 46f777a..d47b676 100644
--- a/include/ethosu_driver.h
+++ b/include/ethosu_driver.h
@@ -44,6 +44,7 @@
     uint64_t fast_memory;
     size_t fast_memory_size;
     bool status_error;
+    bool dev_power_always_on;
 };
 
 struct ethosu_version_id
@@ -126,6 +127,11 @@
  */
 void ethosu_irq_handler(void);
 
+/**
+ * Set Ethos-U power mode.
+ */
+void ethosu_set_power_mode(bool);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/ethosu_driver.c b/src/ethosu_driver.c
index 8bc927c..15326e3 100644
--- a/src/ethosu_driver.c
+++ b/src/ethosu_driver.c
@@ -150,11 +150,13 @@
 
 struct ethosu_driver ethosu_drv = {
     .dev = {.base_address = NULL, .proto = 0, .pmccntr = {0}, .pmu_evcntr = {0, 0, 0, 0}, .pmu_evtypr = {0, 0, 0, 0}},
-    .abort_inference = false,
-    .status_error    = false};
+    .abort_inference     = false,
+    .status_error        = false,
+    .dev_power_always_on = false};
 
 // IRQ
 static volatile bool irq_triggered = false;
+static int ethosu_soft_reset_and_restore(struct ethosu_driver *drv);
 void ethosu_irq_handler(void)
 {
     uint8_t irq_raised = 0;
@@ -177,7 +179,7 @@
 
     if (ethosu_status_has_error(&ethosu_drv.dev))
     {
-        (void)ethosu_soft_reset(&ethosu_drv.dev);
+        ethosu_soft_reset_and_restore(&ethosu_drv);
         ethosu_drv.status_error = true;
     }
 }
@@ -253,6 +255,7 @@
         LOG_ERR("Failed reset of Ethos-U\n");
         return -1;
     }
+
     ethosu_drv.status_error = false;
 
     return return_code;
@@ -335,18 +338,21 @@
         *fast_memory = ethosu_drv.fast_memory;
     }
 
-    // Only soft reset if securty state or privilege level needs changing
-    if (ethosu_drv.dev.proto != ethosu_read_reg(&ethosu_drv.dev, NPU_REG_PROT))
+    if (!ethosu_drv.dev_power_always_on)
     {
-        if (ETHOSU_SUCCESS != ethosu_soft_reset(&ethosu_drv.dev))
+        if (ethosu_drv.dev.proto != ethosu_read_reg(&ethosu_drv.dev, NPU_REG_PROT))
         {
-            return -1;
+            if (ETHOSU_SUCCESS != ethosu_soft_reset(&ethosu_drv.dev))
+            {
+                return -1;
+            }
         }
+        ethosu_set_clock_and_power(&ethosu_drv.dev, ETHOSU_CLOCK_Q_ENABLE, ETHOSU_POWER_Q_DISABLE);
+        ethosu_restore_pmu_config(&ethosu_drv.dev);
+        npu_axi_init(&ethosu_drv);
     }
 
     ethosu_drv.status_error = false;
-    ethosu_set_clock_and_power(&ethosu_drv.dev, ETHOSU_CLOCK_Q_ENABLE, ETHOSU_POWER_Q_DISABLE);
-    ethosu_restore_pmu_config(&ethosu_drv.dev);
 
     while (data_ptr < data_end)
     {
@@ -411,7 +417,7 @@
         }
     }
 
-    if (!ethosu_drv.status_error)
+    if (!ethosu_drv.status_error && !ethosu_drv.dev_power_always_on)
     {
         ethosu_save_pmu_counters(&ethosu_drv.dev);
         ethosu_set_clock_and_power(&ethosu_drv.dev, ETHOSU_CLOCK_Q_ENABLE, ETHOSU_POWER_Q_ENABLE);
@@ -425,6 +431,32 @@
     ethosu_drv.abort_inference = true;
 }
 
+void ethosu_set_power_mode(bool always_on)
+{
+    ethosu_drv.dev_power_always_on = always_on;
+
+    if (always_on)
+    {
+        npu_axi_init(&ethosu_drv);
+    }
+}
+
+static int ethosu_soft_reset_and_restore(struct ethosu_driver *drv)
+{
+
+    if (ETHOSU_SUCCESS != ethosu_soft_reset(&drv->dev))
+    {
+        return -1;
+    }
+
+    ethosu_set_clock_and_power(&drv->dev, ETHOSU_CLOCK_Q_ENABLE, ETHOSU_POWER_Q_DISABLE);
+
+    npu_axi_init(drv);
+    ethosu_restore_pmu_config(&drv->dev);
+
+    return 0;
+}
+
 static int handle_optimizer_config(struct ethosu_driver *drv, struct opt_cfg_s *opt_cfg_p)
 {
     struct ethosu_config cfg;
@@ -583,8 +615,6 @@
         return -1;
     }
 
-    npu_axi_init(drv);
-
     /* Flush the cache if available on our CPU.
      * The upcasting to uin32_t* is ok since the pointer never is dereferenced.
      * The base_addr_size is null if invoking from prior to invoke_V2, in that case