Set TFLu external context

Remove PMU configuration from the InferenceJob struct and add an
external context parameter intead. The external context is passed
to the TFLu interpreter and will be returned in the
ethosu_inference_begin() and ethosu_inference_end() callbacks.

Change-Id: I6dab04c0ab5088b1325be365d77d65d1182e7441
diff --git a/applications/inference_process/include/inference_process.hpp b/applications/inference_process/include/inference_process.hpp
index 6ab453c..9635884 100644
--- a/applications/inference_process/include/inference_process.hpp
+++ b/applications/inference_process/include/inference_process.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited. All rights reserved.
+ * Copyright (c) 2019-2022 Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -53,10 +53,7 @@
     std::vector<DataPtr> output;
     std::vector<DataPtr> expectedOutput;
     size_t numBytesToPrint;
-    std::vector<uint8_t> pmuEventConfig;
-    bool pmuCycleCounterEnable;
-    std::vector<uint32_t> pmuEventCount;
-    uint64_t pmuCycleCounterCount;
+    void *externalContext;
 
     InferenceJob();
     InferenceJob(const std::string &name,
@@ -64,9 +61,8 @@
                  const std::vector<DataPtr> &input,
                  const std::vector<DataPtr> &output,
                  const std::vector<DataPtr> &expectedOutput,
-                 size_t numBytesToPrint,
-                 const std::vector<uint8_t> &pmuEventConfig,
-                 const bool pmuCycleCounterEnable);
+                 const size_t numBytesToPrint = 0,
+                 void *externalContext        = nullptr);
 
     void invalidate();
     void clean();
diff --git a/applications/inference_process/src/inference_process.cpp b/applications/inference_process/src/inference_process.cpp
index ebd9d6c..4c65005 100644
--- a/applications/inference_process/src/inference_process.cpp
+++ b/applications/inference_process/src/inference_process.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited. All rights reserved.
+ * Copyright (c) 2019-2022 Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -100,20 +100,18 @@
     return static_cast<char *>(data) + size;
 }
 
-InferenceJob::InferenceJob() : numBytesToPrint(0) {}
+InferenceJob::InferenceJob() : numBytesToPrint(0), externalContext(nullptr) {}
 
 InferenceJob::InferenceJob(const string &_name,
                            const DataPtr &_networkModel,
                            const vector<DataPtr> &_input,
                            const vector<DataPtr> &_output,
                            const vector<DataPtr> &_expectedOutput,
-                           size_t _numBytesToPrint,
-                           const vector<uint8_t> &_pmuEventConfig,
-                           const bool _pmuCycleCounterEnable) :
+                           const size_t _numBytesToPrint,
+                           void *_externalContext) :
     name(_name),
     networkModel(_networkModel), input(_input), output(_output), expectedOutput(_expectedOutput),
-    numBytesToPrint(_numBytesToPrint), pmuEventConfig(_pmuEventConfig), pmuCycleCounterEnable(_pmuCycleCounterEnable),
-    pmuEventCount(), pmuCycleCounterCount(0) {}
+    numBytesToPrint(_numBytesToPrint), externalContext(_externalContext) {}
 
 void InferenceJob::invalidate() {
     networkModel.invalidate();
@@ -167,16 +165,14 @@
 
     // Create the TFL micro interpreter
     tflite::AllOpsResolver resolver;
-#ifdef LAYER_BY_LAYER_PROFILER
-    tflite::LayerByLayerProfiler profiler(job.pmuEventConfig, job.pmuCycleCounterEnable);
-#else
     tflite::ArmProfiler profiler;
-#endif
-
     tflite::MicroErrorReporter errorReporter;
     tflite::MicroInterpreter interpreter(
         model, resolver, tensorArena, tensorArenaSize, &errorReporter, nullptr, &profiler);
 
+    // Set external context
+    interpreter.SetMicroExternalContext(job.externalContext);
+
     // Allocate tensors
     TfLiteStatus status = interpreter.AllocateTensors();
     if (status != kTfLiteOk) {
@@ -196,14 +192,6 @@
         return true;
     }
 
-#ifdef LAYER_BY_LAYER_PROFILER
-    if (job.pmuCycleCounterEnable) {
-        job.pmuCycleCounterCount = profiler.GetPmuCycleCounterCount();
-    }
-
-    job.pmuEventCount.assign(profiler.GetPmuEventCount().begin(), profiler.GetPmuEventCount().end());
-#endif
-
     LOG("Inference runtime: %" PRId32 " cycles\n", profiler.GetTotalTicks());
 
     // Copy output data from TFLu arena to job descriptor
@@ -333,10 +321,6 @@
 }
 
 void InferenceProcess::printJob(InferenceJob &job, tflite::MicroInterpreter &interpreter) {
-    for (size_t i = 0; i < job.pmuEventCount.size(); i++) {
-        LOG("ethosu_pmu_cntr%zu : %" PRIu32 "\n", i, job.pmuEventCount[i]);
-    }
-
     LOG("arena_used_bytes : %zu\n", interpreter.arena_used_bytes());
 
     // Print all of the output data, or the first NUM_BYTES_TO_PRINT bytes,
diff --git a/lib/layer_by_layer_profiler/include/layer_by_layer_profiler.hpp b/lib/layer_by_layer_profiler/include/layer_by_layer_profiler.hpp
index a547576..0c50bc8 100644
--- a/lib/layer_by_layer_profiler/include/layer_by_layer_profiler.hpp
+++ b/lib/layer_by_layer_profiler/include/layer_by_layer_profiler.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -22,7 +22,6 @@
 #include "EventRecorder.h"
 #include "tensorflow/lite/kernels/internal/compatibility.h"
 #include <memory>
-#include <pmu_ethosu.h>
 #include <vector>
 
 // NOTE: This profiler only works on systems with 1 NPU due to the use of
@@ -31,29 +30,21 @@
 class LayerByLayerProfiler : public MicroProfiler {
 public:
     enum Backend { PRINTF, EVENT_RECORDER };
-    LayerByLayerProfiler(const std::vector<uint8_t> &event_config = {},
-                         bool pmu_cycle_counter_enable            = true,
-                         size_t max_events                        = 200,
-                         Backend backend                          = PRINTF,
-                         int32_t event_id = EventID(EventLevelError, EvtStatistics_No, EventRecordNone));
+
+    LayerByLayerProfiler(size_t max_events = 200,
+                         Backend backend   = PRINTF,
+                         int32_t event_id  = EventID(EventLevelError, EvtStatistics_No, EventRecordNone));
+
     uint32_t BeginEvent(const char *tag);
     void EndEvent(uint32_t event_handle);
     int32_t GetTotalTicks() const;
     void Log() const;
 
-    uint64_t GetPmuCycleCounterCount() const;
-    const std::vector<uint32_t> &GetPmuEventCount() const;
-
 private:
     std::unique_ptr<const char *[]> tags_;
     std::unique_ptr<uint64_t[]> start_ticks_;
     std::unique_ptr<uint64_t[]> end_ticks_;
 
-    std::vector<uint8_t> pmu_event_config;
-    std::vector<uint32_t> pmu_event_count;
-    bool pmu_cycle_counter_enable;
-    uint64_t pmu_cycle_counter_count;
-
     size_t max_events_;
     Backend backend;
     int32_t event_id;
diff --git a/lib/layer_by_layer_profiler/src/layer_by_layer_profiler.cpp b/lib/layer_by_layer_profiler/src/layer_by_layer_profiler.cpp
index a4f67d6..4f525ee 100644
--- a/lib/layer_by_layer_profiler/src/layer_by_layer_profiler.cpp
+++ b/lib/layer_by_layer_profiler/src/layer_by_layer_profiler.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -25,27 +25,12 @@
 
 #include "ethosu_log.h"
 #include "layer_by_layer_profiler.hpp"
-#include <ethosu_driver.h>
 #include <inttypes.h>
 #include <stdio.h>
 
-namespace {
-
-uint64_t GetCurrentEthosuTicks(struct ethosu_driver *drv) {
-    return ETHOSU_PMU_Get_CCNTR(drv);
-}
-
-} // namespace
-
 namespace tflite {
 
-LayerByLayerProfiler::LayerByLayerProfiler(const std::vector<uint8_t> &event_config,
-                                           bool _pmu_cycle_counter_enable,
-                                           size_t max_events,
-                                           Backend _backend,
-                                           int32_t _event_id) :
-    pmu_event_config(event_config),
-    pmu_event_count(), pmu_cycle_counter_enable(_pmu_cycle_counter_enable), pmu_cycle_counter_count(0),
+LayerByLayerProfiler::LayerByLayerProfiler(size_t max_events, Backend _backend, int32_t _event_id) :
     max_events_(max_events), backend(_backend), event_id(_event_id), num_events_(0) {
 
     tags_        = std::make_unique<const char *[]>(max_events);
@@ -60,47 +45,11 @@
         num_events_ = 0;
     }
 
-    tags_[num_events_] = tag;
-
-    if (strcmp("ethos-u", tag) == 0) {
-        struct ethosu_driver *drv = ethosu_reserve_driver();
-        size_t numEventCounters   = ETHOSU_PMU_Get_NumEventCounters();
-
-        if (pmu_event_config.size() > numEventCounters) {
-            LOG_WARN("PMU event config list is bigger (%zu) than available PMU event counters (%zu)",
-                     pmu_event_config.size(),
-                     numEventCounters);
-            LOG_WARN("PMU event config list will be truncated");
-            pmu_event_config.resize(numEventCounters);
-        }
-        // Enable PMU
-        ETHOSU_PMU_Enable(drv);
-
-        for (size_t i = 0; i < pmu_event_config.size(); i++) {
-            ETHOSU_PMU_Set_EVTYPER(drv, i, static_cast<ethosu_pmu_event_type>(pmu_event_config[i]));
-        }
-
-        ETHOSU_PMU_CNTR_Enable(drv, (1 << pmu_event_config.size()) - 1);
-        ETHOSU_PMU_EVCNTR_ALL_Reset(drv);
-
-        // Configure the cycle counter
-        if (pmu_cycle_counter_enable) {
-            ETHOSU_PMU_CNTR_Disable(drv, ETHOSU_PMU_CCNT_Msk);
-            ETHOSU_PMU_CYCCNT_Reset(drv);
-
-            ETHOSU_PMU_PMCCNTR_CFG_Set_Stop_Event(drv, ETHOSU_PMU_NPU_IDLE);
-            ETHOSU_PMU_PMCCNTR_CFG_Set_Start_Event(drv, ETHOSU_PMU_NPU_ACTIVE);
-
-            ETHOSU_PMU_CNTR_Enable(drv, ETHOSU_PMU_CCNT_Msk);
-        }
-        start_ticks_[num_events_] = 0; // Hardware cycle counter has been reset above, thus starts at 0
-        ethosu_release_driver(drv);
-    } else {
-        start_ticks_[num_events_] = GetCurrentTimeTicks();
-    }
-
+    tags_[num_events_]        = tag;
+    start_ticks_[num_events_] = GetCurrentTimeTicks();
     end_ticks_[num_events_] =
         start_ticks_[num_events_]; // NOTE: In case an EndEvent() doesn't trigger, cycles reports as 0
+
     return num_events_++;
 }
 
@@ -108,41 +57,12 @@
 void LayerByLayerProfiler::EndEvent(uint32_t event_handle) {
     TFLITE_DCHECK(event_handle < max_events_);
 
-    if (strcmp("ethos-u", tags_[event_handle]) == 0) {
-        struct ethosu_driver *drv = ethosu_reserve_driver();
-
-        end_ticks_[event_handle] = GetCurrentEthosuTicks(drv);
-        // Get the cycle count
-        if (pmu_cycle_counter_enable) {
-            pmu_cycle_counter_count = end_ticks_[event_handle];
-        }
-
-        // Save the PMU counter values
-        // NOTE: If multiple ethos-u layers, only the latest will be saved
-        pmu_event_count.resize(pmu_event_config.size());
-        for (size_t i = 0; i < pmu_event_config.size(); i++) {
-            pmu_event_count[i] = ETHOSU_PMU_Get_EVCNTR(drv, i);
-        }
-
-        // Shut down the PMU
-        ETHOSU_PMU_Disable(drv);
-
-        ethosu_release_driver(drv);
-    } else {
-        end_ticks_[event_handle] = GetCurrentTimeTicks();
-    }
+    end_ticks_[event_handle] = GetCurrentTimeTicks();
 
     if (backend == PRINTF) {
-        if (strcmp("ethos-u", tags_[event_handle]) == 0) {
-            for (size_t i = 0; i < pmu_event_count.size(); i++) {
-                LOG("ethos-u : ethosu_pmu_cntr%zu : %" PRIu32 "\n", i, pmu_event_count[i]);
-            }
-            LOG("ethos-u : cycle_cnt : %" PRIu64 " cycles\n", pmu_cycle_counter_count);
-        } else {
-            LOG("%s : cycle_cnt : %" PRIu64 " cycles\n",
-                tags_[event_handle],
-                end_ticks_[event_handle] - start_ticks_[event_handle]);
-        }
+        LOG("%s : cycle_cnt : %" PRIu64 " cycles\n",
+            tags_[event_handle],
+            end_ticks_[event_handle] - start_ticks_[event_handle]);
     } else {
         EventRecord2(event_id, (int32_t)event_handle, end_ticks_[event_handle] - start_ticks_[event_handle]);
     }
@@ -158,14 +78,6 @@
     return ticks;
 }
 
-uint64_t LayerByLayerProfiler::GetPmuCycleCounterCount() const {
-    return pmu_cycle_counter_count;
-}
-
-const std::vector<uint32_t> &LayerByLayerProfiler::GetPmuEventCount() const {
-    return pmu_event_count;
-}
-
 void LayerByLayerProfiler::Log() const {
 
 #if !defined(TF_LITE_STRIP_ERROR_STRINGS)