Add simple async API testing to driver_unit_tests

Change-Id: Iae5733efb40a8fd11e7108e93cc719f67b4f1be3
diff --git a/applications/driver_unit_tests/command_stream.cpp b/applications/driver_unit_tests/command_stream.cpp
index 76dba6f..a2cb63e 100644
--- a/applications/driver_unit_tests/command_stream.cpp
+++ b/applications/driver_unit_tests/command_stream.cpp
@@ -133,6 +133,31 @@
     return 0;
 }
 
+int CommandStream::run_async() {
+    // Base pointer array
+    uint64_t baseAddress[ETHOSU_BASEP_INDEXES];
+    size_t baseAddressSize[ETHOSU_BASEP_INDEXES];
+
+    for (size_t i = 0; i < ETHOSU_BASEP_INDEXES; i++) {
+        baseAddress[i]     = reinterpret_cast<uint64_t>(basePointers[i].data);
+        baseAddressSize[i] = reinterpret_cast<size_t>(basePointers[i].size);
+    }
+
+    int error = ethosu_invoke_async(
+        drv, commandStream.data, commandStream.size, baseAddress, baseAddressSize, ETHOSU_BASEP_INDEXES, nullptr);
+
+    if (error != 0) {
+        printf("Inference invoke async failed. error=%d\n", error);
+        return 1;
+    }
+
+    return 0;
+}
+
+int CommandStream::wait_async(bool block) {
+    return ethosu_wait(drv, block);
+}
+
 DataPointer &CommandStream::getCommandStream() {
     return commandStream;
 }
diff --git a/applications/driver_unit_tests/command_stream.hpp b/applications/driver_unit_tests/command_stream.hpp
index 1783f4c..7163b9d 100644
--- a/applications/driver_unit_tests/command_stream.hpp
+++ b/applications/driver_unit_tests/command_stream.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * Copyright (c) 2021-2022 Arm Limited.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -92,6 +92,8 @@
     virtual ~CommandStream();
 
     int run(size_t repeat = 1);
+    int run_async();
+    int wait_async(bool block = true);
 
     DataPointer &getCommandStream();
     BasePointers &getBasePointers();
diff --git a/applications/driver_unit_tests/conv.cpp b/applications/driver_unit_tests/conv.cpp
index aa4a442..63515fa 100644
--- a/applications/driver_unit_tests/conv.cpp
+++ b/applications/driver_unit_tests/conv.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * Copyright (c) 2021-2022 Arm Limited.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -322,6 +322,9 @@
                      PmuEvents({ETHOSU_PMU_CYCLE, ETHOSU_PMU_NPU_IDLE, ETHOSU_PMU_NPU_ACTIVE}));
 
     const size_t repeat = 100;
+    uint64_t cycleCount = 0;
+    int errors          = 0;
+    int res;
 
     // Input data located inside the scratch buffer
     DataPointer inputPointer(scratch, sizeof(input0));
@@ -332,22 +335,78 @@
     // Expected output data
     DataPointer expectedPointer(expected0, sizeof(expected0));
 
+    printf("Running %d iterations using sync API...\n", repeat);
+
     // Clear PMU
     cs.getPmu().clear();
 
     // Run inference
-    int ret             = cs.run(repeat);
-    uint64_t cycleCount = cs.getPmu().getCycleCount();
+    res = cs.run(repeat);
+    if (res == 0) {
+        cycleCount = cs.getPmu().getCycleCount();
 
-    // Print PMU counters
-    cs.getPmu().print();
-    printf("cycleCount=%llu, cycleCountPerJob=%llu\n", cycleCount, cycleCount / repeat);
+        // Print PMU counters
+        cs.getPmu().print();
+        printf("cycleCount=%llu, cycleCountPerJob=%llu\n", cycleCount, cycleCount / repeat);
 
-    // Compare outut with expected data
-    if (outputPointer != expectedPointer) {
-        printf("Output mismatch\n");
-        return 1;
+        // Compare output with expected data
+        if (outputPointer != expectedPointer) {
+            printf("Output mismatch for sync test\n");
+            res = 1;
+        }
+    }
+    errors = res;
+    printf("\n");
+
+    // Test using the async API
+    printf("Running %d iterations using async API...\n", repeat);
+
+    // Clear PMU
+    cs.getPmu().clear();
+
+    // Dummy wait loop count for inference to complete
+    int wait_loops = 0;
+
+    // Run async job
+    for (size_t r = repeat; r > 0; r--) {
+        res = cs.run_async();
+        if (res != 0) {
+            // Invoke async failed
+            errors++;
+            break;
+        }
+
+        // Wait for inference to complete
+        while ((res = cs.wait_async(false)) == 1) {
+            // CPU is free to do other work while waiting for inference completion
+            wait_loops++;
+        }
+
+        // Check inference result
+        if (res != 0) {
+            printf("Async inference test failed\n");
+            errors++;
+            break;
+        }
     }
 
-    return ret;
+    // Print async results on success
+    if (res == 0) {
+        cycleCount = cs.getPmu().getCycleCount();
+
+        // Print PMU counters
+        cs.getPmu().print();
+        printf("cycleCount=%llu, cycleCountPerJob=%llu\n", cycleCount, cycleCount / repeat);
+
+        printf("wait_loops=%d\n", wait_loops);
+
+        // Compare output with expected data
+        if (outputPointer != expectedPointer) {
+            printf("Output mismatch for async test\n");
+            errors++;
+        }
+    }
+    printf("\n");
+
+    return errors;
 }