Add EthosuProfiler to perform layer-by-layer profiling on Ethos-U.

Change-Id: Idae34fd8ab6b17b0bc21db658fff135a5ddf5461
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e8d46ae..ab8676e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -63,6 +63,9 @@
 # Build RTOS
 add_subdirectory(rtos)
 
+# Build libs
+add_subdirectory(lib)
+
 # Build applications
 add_subdirectory(applications)
 
diff --git a/applications/inference_process/CMakeLists.txt b/applications/inference_process/CMakeLists.txt
index 67caae8..4bd75c4 100644
--- a/applications/inference_process/CMakeLists.txt
+++ b/applications/inference_process/CMakeLists.txt
@@ -22,5 +22,13 @@
 
 target_include_directories(inference_process PUBLIC include
                            PRIVATE ${TENSORFLOW_PATH} ${TENSORFLOW_PATH}/tensorflow/lite/micro/tools/make/downloads/flatbuffers/include)
+
 target_link_libraries(inference_process PRIVATE tflu cmsis_core cmsis_device)
-target_sources(inference_process PRIVATE src/inference_process.cpp)
\ No newline at end of file
+if (TARGET ethosu_profiler)
+    target_link_libraries(inference_process PRIVATE ethosu_profiler)
+endif()
+if (TARGET arm_profiler)
+    target_link_libraries(inference_process PRIVATE arm_profiler)
+endif()
+
+target_sources(inference_process PRIVATE src/inference_process.cpp)
diff --git a/applications/inference_process/src/inference_process.cpp b/applications/inference_process/src/inference_process.cpp
index 743ed64..fa38290 100644
--- a/applications/inference_process/src/inference_process.cpp
+++ b/applications/inference_process/src/inference_process.cpp
@@ -24,6 +24,11 @@
 #include "tensorflow/lite/schema/schema_generated.h"
 #include "tensorflow/lite/version.h"
 
+#include "arm_profiler.hpp"
+#ifdef ETHOSU
+#include "ethosu_profiler.hpp"
+#endif
+
 #include "inference_process.hpp"
 
 #include "cmsis_compiler.h"
@@ -200,15 +205,15 @@
 
     // Create the TFL micro interpreter
     tflite::AllOpsResolver resolver;
-    tflite::MicroProfiler profiler;
-
-#if defined(INFERENCE_PROC_TFLU_PROFILER) && defined(ETHOSU)
+#ifdef ETHOSU
     vector<ethosu_pmu_event_type> pmu_events(ETHOSU_PMU_NCOUNTERS, ETHOSU_PMU_NO_EVENT);
 
     for (size_t i = 0; i < job.pmuEventConfig.size(); i++) {
         pmu_events[i] = ethosu_pmu_event_type(job.pmuEventConfig[i]);
     }
-    profiler.MonitorEthosuPMUEvents(pmu_events[0], pmu_events[1], pmu_events[2], pmu_events[3]);
+    tflite::EthosUProfiler profiler(pmu_events[0], pmu_events[1], pmu_events[2], pmu_events[3]);
+#else
+    tflite::ArmProfiler profiler;
 #endif
     tflite::MicroInterpreter interpreter(model, resolver, tensorArena, tensorArenaSize, reporter, &profiler);
 
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
new file mode 100644
index 0000000..abf444f
--- /dev/null
+++ b/lib/CMakeLists.txt
@@ -0,0 +1,23 @@
+#
+# Copyright (c) 2021 Arm Limited. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Build ethosu_profiler
+add_subdirectory(ethosu_profiler)
+
+# Build arm_profiler
+add_subdirectory(arm_profiler)
diff --git a/lib/arm_profiler/CMakeLists.txt b/lib/arm_profiler/CMakeLists.txt
new file mode 100644
index 0000000..2452a50
--- /dev/null
+++ b/lib/arm_profiler/CMakeLists.txt
@@ -0,0 +1,23 @@
+#
+# Copyright (c) 2021 Arm Limited. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_library(arm_profiler INTERFACE)
+
+target_link_libraries(arm_profiler INTERFACE tflu)
+target_include_directories(arm_profiler INTERFACE include)
+target_sources(arm_profiler INTERFACE src/arm_profiler.cpp)
diff --git a/lib/arm_profiler/include/arm_profiler.hpp b/lib/arm_profiler/include/arm_profiler.hpp
new file mode 100644
index 0000000..f5206fe
--- /dev/null
+++ b/lib/arm_profiler/include/arm_profiler.hpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ARM_PROFILER_H
+#define ARM_PROFILER_H
+
+#include "tensorflow/lite/kernels/internal/compatibility.h"
+#include "tensorflow/lite/micro/micro_profiler.h"
+#include <memory>
+
+// TODO: Merge this profiler with EthosUprofiler.
+namespace tflite {
+class ArmProfiler : public MicroProfiler {
+public:
+    ArmProfiler(size_t max_events = 200);
+    uint32_t BeginEvent(const char *tag);
+    void EndEvent(uint32_t event_handle);
+    int32_t GetTotalTicks() const;
+
+private:
+    size_t max_events_;
+    std::unique_ptr<const char *[]> tags_;
+    std::unique_ptr<int32_t[]> start_ticks_;
+    std::unique_ptr<int32_t[]> end_ticks_;
+
+    int num_events_ = 0;
+
+    TF_LITE_REMOVE_VIRTUAL_DELETE;
+};
+
+} // namespace tflite
+
+#endif
diff --git a/lib/arm_profiler/src/arm_profiler.cpp b/lib/arm_profiler/src/arm_profiler.cpp
new file mode 100644
index 0000000..c58037f
--- /dev/null
+++ b/lib/arm_profiler/src/arm_profiler.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tensorflow/lite/kernels/internal/compatibility.h"
+#include "tensorflow/lite/micro/micro_error_reporter.h"
+#include "tensorflow/lite/micro/micro_time.h"
+
+#include <string.h>
+
+#include "arm_profiler.hpp"
+#include <inttypes.h>
+#include <stdio.h>
+
+namespace tflite {
+
+ArmProfiler::ArmProfiler(size_t max_events) : max_events_(max_events) {
+    tags_        = std::make_unique<const char *[]>(max_events_);
+    start_ticks_ = std::make_unique<int32_t[]>(max_events_);
+    end_ticks_   = std::make_unique<int32_t[]>(max_events_);
+}
+
+uint32_t ArmProfiler::BeginEvent(const char *tag) {
+    if (num_events_ == max_events_) {
+        tflite::GetMicroErrorReporter()->Report("Profiling event overflow, max: %u events", max_events_);
+        num_events_ = 0;
+    }
+    tags_[num_events_]        = tag;
+    start_ticks_[num_events_] = GetCurrentTimeTicks();
+    end_ticks_[num_events_]   = start_ticks_[num_events_] - 1;
+    return num_events_++;
+}
+
+void ArmProfiler::EndEvent(uint32_t event_handle) {
+    TFLITE_DCHECK(event_handle < max_events_);
+    end_ticks_[event_handle] = GetCurrentTimeTicks();
+    tflite::GetMicroErrorReporter()->Report(
+        "%s : cycle_cnt : %u cycles", tags_[event_handle], end_ticks_[event_handle] - start_ticks_[event_handle]);
+}
+
+int32_t ArmProfiler::GetTotalTicks() const {
+    int32_t ticks = 0;
+    for (int i = 0; i < num_events_; ++i) {
+        ticks += end_ticks_[i] - start_ticks_[i];
+    }
+    return ticks;
+}
+
+} // namespace tflite
diff --git a/lib/ethosu_profiler/CMakeLists.txt b/lib/ethosu_profiler/CMakeLists.txt
new file mode 100644
index 0000000..336a696
--- /dev/null
+++ b/lib/ethosu_profiler/CMakeLists.txt
@@ -0,0 +1,26 @@
+#
+# Copyright (c) 2021 Arm Limited. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+if (NOT TARGET ethosu_core_driver)
+    return()
+endif()
+add_library(ethosu_profiler INTERFACE)
+
+target_link_libraries(ethosu_profiler INTERFACE ethosu_core_driver tflu)
+target_include_directories(ethosu_profiler INTERFACE include)
+target_sources(ethosu_profiler INTERFACE src/ethosu_profiler.cpp)
diff --git a/lib/ethosu_profiler/include/ethosu_profiler.hpp b/lib/ethosu_profiler/include/ethosu_profiler.hpp
new file mode 100644
index 0000000..745c670
--- /dev/null
+++ b/lib/ethosu_profiler/include/ethosu_profiler.hpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ETHOSU_PROFILER_H
+#define ETHOSU_PROFILER_H
+
+#include "tensorflow/lite/kernels/internal/compatibility.h"
+#include <memory>
+#include <pmu_ethosu.h>
+
+// NOTE: This profiler only works on systems with 1 NPU due to the use of
+// ethosu_reserve_driver().
+namespace tflite {
+class EthosUProfiler : public MicroProfiler {
+public:
+    EthosUProfiler(ethosu_pmu_event_type event0 = ETHOSU_PMU_NO_EVENT,
+                   ethosu_pmu_event_type event1 = ETHOSU_PMU_NO_EVENT,
+                   ethosu_pmu_event_type event2 = ETHOSU_PMU_NO_EVENT,
+                   ethosu_pmu_event_type event3 = ETHOSU_PMU_NO_EVENT,
+                   size_t max_events            = 200);
+    uint32_t BeginEvent(const char *tag);
+    void EndEvent(uint32_t event_handle);
+    uint64_t GetTotalTicks() const;
+    void Log() const;
+    uint32_t GetEthosuPMUCounter(int counter);
+
+private:
+    void MonitorEthosuPMUEvents(ethosu_pmu_event_type event0,
+                                ethosu_pmu_event_type event1,
+                                ethosu_pmu_event_type event2,
+                                ethosu_pmu_event_type event3);
+
+    size_t max_events_;
+    std::unique_ptr<const char *[]> tags_;
+    std::unique_ptr<uint64_t[]> start_ticks_;
+    std::unique_ptr<uint64_t[]> end_ticks_;
+
+    int num_events_ = 0;
+
+    ethosu_pmu_event_type ethosu_pmu_cntrs[ETHOSU_PMU_NCOUNTERS];
+
+    uint32_t event_counters[ETHOSU_PMU_NCOUNTERS];
+
+    TF_LITE_REMOVE_VIRTUAL_DELETE;
+};
+
+} // namespace tflite
+
+#endif
diff --git a/lib/ethosu_profiler/src/ethosu_profiler.cpp b/lib/ethosu_profiler/src/ethosu_profiler.cpp
new file mode 100644
index 0000000..c69e6f8
--- /dev/null
+++ b/lib/ethosu_profiler/src/ethosu_profiler.cpp
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tensorflow/lite/kernels/internal/compatibility.h"
+#include "tensorflow/lite/micro/micro_error_reporter.h"
+#include "tensorflow/lite/micro/micro_profiler.h"
+#include "tensorflow/lite/micro/micro_time.h"
+
+#include <string.h>
+
+#include "ethosu_profiler.hpp"
+#include <ethosu_driver.h>
+#include <inttypes.h>
+#include <stdio.h>
+
+namespace {
+
+uint64_t GetCurrentEthosuTicks(struct ethosu_driver *drv) {
+    return ETHOSU_PMU_Get_CCNTR_v2(drv);
+}
+
+void InitEthosuPMUCounters(struct ethosu_driver *drv, ethosu_pmu_event_type *ethosu_pmu_cntrs) {
+    ETHOSU_PMU_Enable_v2(drv);
+
+    ETHOSU_PMU_CNTR_Enable_v2(drv,
+                              ETHOSU_PMU_CNT1_Msk | ETHOSU_PMU_CNT2_Msk | ETHOSU_PMU_CNT3_Msk | ETHOSU_PMU_CNT4_Msk |
+                                  ETHOSU_PMU_CCNT_Msk);
+
+    for (int i = 0; i < ETHOSU_PMU_NCOUNTERS; i++) {
+        ETHOSU_PMU_Set_EVTYPER_v2(drv, i, ethosu_pmu_cntrs[i]);
+    }
+
+    ETHOSU_PMU_EVCNTR_ALL_Reset_v2(drv);
+}
+
+uint32_t GetEthosuPMUEventCounter(struct ethosu_driver *drv, int counter) {
+    return ETHOSU_PMU_Get_EVCNTR_v2(drv, counter);
+}
+} // namespace
+
+namespace tflite {
+
+EthosUProfiler::EthosUProfiler(ethosu_pmu_event_type event0,
+                               ethosu_pmu_event_type event1,
+                               ethosu_pmu_event_type event2,
+                               ethosu_pmu_event_type event3,
+                               size_t max_events) :
+    max_events_(max_events) {
+    tags_        = std::make_unique<const char *[]>(max_events_);
+    start_ticks_ = std::make_unique<uint64_t[]>(max_events_);
+    end_ticks_   = std::make_unique<uint64_t[]>(max_events_);
+
+    for (size_t i = 0; i < ETHOSU_PMU_NCOUNTERS; i++) {
+        event_counters[i] = 0;
+    }
+
+    MonitorEthosuPMUEvents(event0, event1, event2, event3);
+}
+
+// NOTE: THIS PROFILER ONLY WORKS ON SYSTEMS WITH 1 NPU
+uint32_t EthosUProfiler::BeginEvent(const char *tag) {
+    if (num_events_ == max_events_) {
+        tflite::GetMicroErrorReporter()->Report("Profiling event overflow, max: %u events", max_events_);
+        num_events_ = 0;
+    }
+
+    tags_[num_events_] = tag;
+
+    if (strcmp("ethos-u", tag) == 0) {
+        struct ethosu_driver *ethosu_drv = ethosu_reserve_driver();
+        ETHOSU_PMU_CYCCNT_Reset_v2(ethosu_drv);
+        ETHOSU_PMU_PMCCNTR_CFG_Set_Start_Event_v2(ethosu_drv, ETHOSU_PMU_NPU_ACTIVE);
+        ETHOSU_PMU_PMCCNTR_CFG_Set_Stop_Event_v2(ethosu_drv, ETHOSU_PMU_NPU_IDLE);
+        start_ticks_[num_events_] = GetCurrentEthosuTicks(ethosu_drv);
+        InitEthosuPMUCounters(ethosu_drv, ethosu_pmu_cntrs);
+        ethosu_release_driver(ethosu_drv);
+    } else {
+        start_ticks_[num_events_] = GetCurrentTimeTicks();
+    }
+
+    end_ticks_[num_events_] = start_ticks_[num_events_] - 1;
+    return num_events_++;
+}
+
+// NOTE: THIS PROFILER ONLY WORKS ON SYSTEMS WITH 1 NPU
+void EthosUProfiler::EndEvent(uint32_t event_handle) {
+    TFLITE_DCHECK(event_handle < max_events_);
+
+    if (strcmp("ethos-u", tags_[event_handle]) == 0) {
+        struct ethosu_driver *ethosu_drv = ethosu_reserve_driver();
+        end_ticks_[event_handle]         = GetCurrentEthosuTicks(ethosu_drv);
+        uint32_t ethosu_pmu_counter_end[ETHOSU_PMU_NCOUNTERS];
+        ETHOSU_PMU_Disable_v2(ethosu_drv);
+        for (size_t i = 0; i < ETHOSU_PMU_NCOUNTERS; i++) {
+            ethosu_pmu_counter_end[i] = GetEthosuPMUEventCounter(ethosu_drv, i);
+            tflite::GetMicroErrorReporter()->Report(
+                "%s : ethosu_pmu_cntr%d : %u", tags_[event_handle], i, ethosu_pmu_counter_end[i]);
+
+            event_counters[i] += ethosu_pmu_counter_end[i];
+        }
+        ethosu_release_driver(ethosu_drv);
+        printf("%s : cycle_cnt : %" PRIu64 " cycles\n",
+               tags_[event_handle],
+               end_ticks_[event_handle] - start_ticks_[event_handle]);
+
+    } else {
+        end_ticks_[event_handle] = GetCurrentTimeTicks();
+        printf("%s : cycle_cnt : %" PRIu64 " cycles\n",
+               tags_[event_handle],
+               end_ticks_[event_handle] - start_ticks_[event_handle]);
+    }
+}
+
+uint64_t EthosUProfiler::GetTotalTicks() const {
+    uint64_t ticks = 0;
+    for (int i = 0; i < num_events_; ++i) {
+        ticks += end_ticks_[i] - start_ticks_[i];
+    }
+
+    return ticks;
+}
+
+void EthosUProfiler::Log() const {
+#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
+    for (int i = 0; i < num_events_; ++i) {
+        uint64_t ticks = end_ticks_[i] - start_ticks_[i];
+        printf("%s took %" PRIu64 " cycles\n", tags_[i], ticks);
+    }
+#endif
+}
+
+void EthosUProfiler::MonitorEthosuPMUEvents(ethosu_pmu_event_type event0,
+                                            ethosu_pmu_event_type event1,
+                                            ethosu_pmu_event_type event2,
+                                            ethosu_pmu_event_type event3) {
+    ethosu_pmu_cntrs[0] = event0;
+    ethosu_pmu_cntrs[1] = event1;
+    ethosu_pmu_cntrs[2] = event2;
+    ethosu_pmu_cntrs[3] = event3;
+}
+
+uint32_t EthosUProfiler::GetEthosuPMUCounter(int counter) {
+    return event_counters[counter];
+}
+
+} // namespace tflite