Flush and invalidate data caches

Implement a weak linked function to handle the data cache.
If the specific device is implementing a data cache the function should
be overriden with device specific implementation of the flush/invalidate
functions to make sure that the cache is properly maintained with
regards to the NPU DMA transaction.

Change-Id: I175644ef37bee62cc77d789d2b7bc3073e72ea5a
diff --git a/README.md b/README.md
index ba52525..7d6085b 100644
--- a/README.md
+++ b/README.md
@@ -19,6 +19,29 @@
 $ make
 ```
 
+For running the driver on Arm CPUs which are configured with datacache, the
+cache maintenance functions in the driver are exported with weakly linked
+symbols that should be overriden. An example implementation using the CMSIS
+primitives found in cachel1_armv7.h could be as below:
+
+```
+extern "C" {
+void ethosu_flush_dcache(uint32_t *p, size_t bytes) {
+    if (p)
+        SCB_CleanDCache_by_Addr(p, bytes);
+    else
+        SCB_CleanDCache();
+}
+
+void ethosu_invalidate_dcache(uint32_t *p, size_t bytes) {
+    if (p)
+        SCB_InvalidateDCache_by_Addr(p, bytes);
+    else
+        SCB_InvalidateDCache();
+}
+}
+```
+
 # License
 
 The Arm Ethos-U Core Driver is provided under an Apache-2.0 license. Please see
diff --git a/src/ethosu_driver.c b/src/ethosu_driver.c
index d21e0a7..014fb54 100644
--- a/src/ethosu_driver.c
+++ b/src/ethosu_driver.c
@@ -225,6 +225,7 @@
                                  const uint8_t *cmd_stream,
                                  const int cms_length,
                                  const uint64_t *base_addr,
+                                 const size_t *base_addr_size,
                                  const int num_base_addr);
 static int read_apb_reg(struct ethosu_driver *drv, uint16_t);
 static int dump_shram(struct ethosu_driver *drv);
@@ -375,7 +376,8 @@
             // It is safe to clear this flag without atomic, because npu is not running.
             irq_triggered = false;
 
-            ret = handle_command_stream(&ethosu_drv, command_stream, cms_length, base_addr, num_base_addr);
+            ret = handle_command_stream(
+                &ethosu_drv, command_stream, cms_length, base_addr, base_addr_size, num_base_addr);
 
             if (return_code == -1 && ethosu_drv.abort_inference)
             {
@@ -536,10 +538,29 @@
                                 AXI_LIMIT3_MAX_OUTSTANDING_WRITES);
 }
 
+/* Default implementation to flush the data cache. Override if available on the targeted device.
+ * Passing NULL as p argument expects the whole cache to be flushed.
+ */
+void __attribute__((weak)) ethosu_flush_dcache(uint32_t *p, size_t bytes)
+{
+    (void)p;
+    (void)bytes;
+}
+
+/* Default implementation to invalidate the data cache. Override if available on the targeted device.
+ * Passing NULL as p argument expects the whole cache to be flushed.
+ */
+void __attribute__((weak)) ethosu_invalidate_dcache(uint32_t *p, size_t bytes)
+{
+    (void)p;
+    (void)bytes;
+}
+
 static int handle_command_stream(struct ethosu_driver *drv,
                                  const uint8_t *cmd_stream,
                                  const int cms_length,
                                  const uint64_t *base_addr,
+                                 const size_t *base_addr_size,
                                  const int num_base_addr)
 {
     uint32_t qread     = 0;
@@ -567,6 +588,25 @@
     }
     npu_axi_init(drv);
 
+    /* Flush the cache if available on our CPU.
+     * The upcasting to uin32_t* is ok since the pointer never is dereferenced.
+     * The base_addr_size is null if invoking from prior to invoke_V2, in that case
+     * the whole cache is being flushed.
+     */
+
+    if (base_addr_size != NULL)
+    {
+        ethosu_flush_dcache((uint32_t *)cmd_stream, cms_bytes);
+        for (int i = 0; i < num_base_addr; i++)
+        {
+            ethosu_flush_dcache((uint32_t *)base_addr[i], base_addr_size[i]);
+        }
+    }
+    else
+    {
+        ethosu_flush_dcache(NULL, 0);
+    }
+
     if (ETHOSU_SUCCESS != ethosu_run_command_stream(&drv->dev, cmd_stream, cms_bytes, base_addr, num_base_addr))
     {
         return -1;
@@ -579,6 +619,18 @@
         return -1;
     }
 
+    if (base_addr_size != NULL)
+    {
+        for (int i = 0; i < num_base_addr; i++)
+        {
+            ethosu_invalidate_dcache((uint32_t *)base_addr[i], base_addr_size[i]);
+        }
+    }
+    else
+    {
+        ethosu_invalidate_dcache(NULL, 0);
+    }
+
     (void)ethosu_get_qread(&drv->dev, &qread);
     if (qread != cms_bytes)
     {