PMU multi NPU support

Update sample applications to use an external context for the PMU
configuration. The external context stored in the InferenceJob will
be set as TFLu external context and will be returned in the
ethosu_inference_begin() and ethosu_inference_end() callbacks.

Change-Id: Ief1f0943e322c2b50e8b964017af59161f67de6b
diff --git a/applications/message_handler/message_handler.cpp b/applications/message_handler/message_handler.cpp
index f9f7304..585d63c 100644
--- a/applications/message_handler/message_handler.cpp
+++ b/applications/message_handler/message_handler.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021 Arm Limited. All rights reserved.
+ * Copyright (c) 2020-2022 Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -22,6 +22,7 @@
 
 #ifdef ETHOSU
 #include <ethosu_driver.h>
+#include <pmu_ethosu.h>
 #endif
 
 #include "FreeRTOS.h"
@@ -150,12 +151,12 @@
             break;
         }
 
-        printf("Msg: InferenceReq. user_arg=0x%" PRIx64 ", network={0x%" PRIx32 ", %" PRIu32 "}\n",
+        printf("Msg: InferenceReq. user_arg=0x%" PRIx64 ", network={0x%" PRIx32 ", %" PRIu32 "}, \n",
                inference.user_arg,
                inference.network.ptr,
                inference.network.size);
 
-        printf(", ifm_count=%" PRIu32 ", ifm=[", inference.ifm_count);
+        printf("ifm_count=%" PRIu32 ", ifm=[", inference.ifm_count);
         for (uint32_t i = 0; i < inference.ifm_count; ++i) {
             if (i > 0) {
                 printf(", ");
@@ -223,6 +224,9 @@
 }
 
 void InferenceHandler::runInference(ethosu_core_inference_req &req, ethosu_core_inference_rsp &rsp) {
+    currentReq = &req;
+    currentRsp = &rsp;
+
     /*
      * Setup inference job
      */
@@ -239,15 +243,7 @@
         ofm.push_back(InferenceProcess::DataPtr(reinterpret_cast<void *>(req.ofm[i].ptr), req.ofm[i].size));
     }
 
-    std::vector<InferenceProcess::DataPtr> expectedOutput;
-
-    std::vector<uint8_t> pmuEventConfig(ETHOSU_CORE_PMU_MAX);
-    for (uint32_t i = 0; i < ETHOSU_CORE_PMU_MAX; i++) {
-        pmuEventConfig[i] = req.pmu_event_config[i];
-    }
-
-    InferenceProcess::InferenceJob job(
-        "job", networkModel, ifm, ofm, expectedOutput, -1, pmuEventConfig, req.pmu_cycle_counter_enable);
+    InferenceProcess::InferenceJob job("job", networkModel, ifm, ofm, {}, -1, this);
 
     /*
      * Run inference
@@ -269,16 +265,8 @@
         rsp.ofm_size[i] = job.output[i].size;
     }
 
-    for (size_t i = 0; i < job.pmuEventConfig.size(); i++) {
-        rsp.pmu_event_config[i] = job.pmuEventConfig[i];
-    }
-
-    for (size_t i = 0; i < job.pmuEventCount.size(); i++) {
-        rsp.pmu_event_count[i] = job.pmuEventCount[i];
-    }
-
-    rsp.pmu_cycle_counter_enable = job.pmuCycleCounterEnable;
-    rsp.pmu_cycle_counter_count  = job.pmuCycleCounterCount;
+    currentReq = nullptr;
+    currentRsp = nullptr;
 }
 
 /****************************************************************************
@@ -404,3 +392,56 @@
 }
 
 } // namespace MessageHandler
+
+extern "C" void ethosu_inference_begin(struct ethosu_driver *drv, void *userArg) {
+    MessageHandler::InferenceHandler *self = static_cast<MessageHandler::InferenceHandler *>(userArg);
+
+    // Calculate maximum number of events
+    const int numEvents = std::min(static_cast<int>(ETHOSU_PMU_Get_NumEventCounters()), ETHOSU_CORE_PMU_MAX);
+
+    // Enable PMU
+    ETHOSU_PMU_Enable(drv);
+
+    // Configure and enable events
+    for (int i = 0; i < numEvents; i++) {
+        ETHOSU_PMU_Set_EVTYPER(drv, i, static_cast<ethosu_pmu_event_type>(self->currentReq->pmu_event_config[i]));
+        ETHOSU_PMU_CNTR_Enable(drv, 1 << i);
+    }
+
+    // Enable cycle counter
+    if (self->currentReq->pmu_cycle_counter_enable) {
+        ETHOSU_PMU_CNTR_Enable(drv, ETHOSU_PMU_CCNT_Msk);
+        ETHOSU_PMU_CYCCNT_Reset(drv);
+    }
+
+    // Reset all counters
+    ETHOSU_PMU_EVCNTR_ALL_Reset(drv);
+}
+
+extern "C" void ethosu_inference_end(struct ethosu_driver *drv, void *userArg) {
+    MessageHandler::InferenceHandler *self = static_cast<MessageHandler::InferenceHandler *>(userArg);
+
+    // Get cycle counter
+    self->currentRsp->pmu_cycle_counter_enable = self->currentReq->pmu_cycle_counter_enable;
+    if (self->currentReq->pmu_cycle_counter_enable) {
+        self->currentRsp->pmu_cycle_counter_count = ETHOSU_PMU_Get_CCNTR(drv);
+    }
+
+    // Calculate maximum number of events
+    const int numEvents = std::min(static_cast<int>(ETHOSU_PMU_Get_NumEventCounters()), ETHOSU_CORE_PMU_MAX);
+
+    // Get event counters
+    int i;
+    for (i = 0; i < numEvents; i++) {
+        self->currentRsp->pmu_event_config[i] = self->currentReq->pmu_event_config[i];
+        self->currentRsp->pmu_event_count[i]  = ETHOSU_PMU_Get_EVCNTR(drv, i);
+    }
+
+    for (; i < ETHOSU_CORE_PMU_MAX; i++) {
+        self->currentRsp->pmu_event_config[i] = 0;
+        self->currentRsp->pmu_event_count[i]  = 0;
+    }
+
+    // Disable PMU
+    ETHOSU_PMU_Disable(drv);
+}