MLECO-1870: Cherry pick profiling changes from dev to open source repo
* Documentation update

Change-Id: If85e7ebc44498840b291c408f14e66a5a5faa424
Signed-off-by: Isabella Gottardi <isabella.gottardi@arm.com>
diff --git a/source/application/hal/platforms/bare-metal/bsp/bsp-core/include/bsp_core_log.h b/source/application/hal/platforms/bare-metal/bsp/bsp-core/include/bsp_core_log.h
index f049209..ffb55e7 100644
--- a/source/application/hal/platforms/bare-metal/bsp/bsp-core/include/bsp_core_log.h
+++ b/source/application/hal/platforms/bare-metal/bsp/bsp-core/include/bsp_core_log.h
@@ -32,31 +32,31 @@
 #endif /*LOG_LEVEL*/
 
 #if (LOG_LEVEL == LOG_LEVEL_TRACE)
-    #define trace(...)        printf("[TRACE] "); printf(__VA_ARGS__)
+    #define trace(...)        printf("TRACE - "); printf(__VA_ARGS__)
 #else
     #define trace(...)
 #endif  /* LOG_LEVEL == LOG_LEVEL_TRACE */
 
 #if (LOG_LEVEL <= LOG_LEVEL_DEBUG)
-    #define debug(...)        printf("[DEBUG] "); printf(__VA_ARGS__)
+    #define debug(...)        printf("DEBUG - "); printf(__VA_ARGS__)
 #else
     #define debug(...)
 #endif  /* LOG_LEVEL > LOG_LEVEL_TRACE */
 
 #if (LOG_LEVEL <= LOG_LEVEL_INFO)
-    #define info(...)         printf("[INFO] "); printf(__VA_ARGS__)
+    #define info(...)         printf("INFO - "); printf(__VA_ARGS__)
 #else
     #define info(...)
 #endif  /* LOG_LEVEL > LOG_LEVEL_DEBUG */
 
 #if (LOG_LEVEL <= LOG_LEVEL_WARN)
-    #define warn(...)         printf("[WARN] "); printf(__VA_ARGS__)
+    #define warn(...)         printf("WARN - "); printf(__VA_ARGS__)
 #else
     #define warn(...)
 #endif  /* LOG_LEVEL > LOG_LEVEL_INFO */
 
 #if (LOG_LEVEL <= LOG_LEVEL_ERROR)
-    #define printf_err(...)   printf("[ERROR] "); printf(__VA_ARGS__)
+    #define printf_err(...)   printf("ERROR - "); printf(__VA_ARGS__)
 #else
     #define printf_err(...)
 #endif  /* LOG_LEVEL > LOG_LEVEL_INFO */
diff --git a/source/application/hal/platforms/bare-metal/timer/baremetal_timer.c b/source/application/hal/platforms/bare-metal/timer/baremetal_timer.c
index 7257c1d..ef31a71 100644
--- a/source/application/hal/platforms/bare-metal/timer/baremetal_timer.c
+++ b/source/application/hal/platforms/bare-metal/timer/baremetal_timer.c
@@ -50,6 +50,53 @@
 static uint64_t bm_get_npu_active_cycle_diff(time_counter *st,
                                              time_counter *end);
 
+/** @brief  Gets the difference in idle NPU cycle counts
+ * @param[in]   st      Pointer to time_counter value at start time.
+ * @param[in]   end     Pointer to time_counter value at end.
+ * @return      Idle NPU cycle counts difference between the arguments expressed
+ *              as unsigned 64 bit integer.
+ **/
+static uint64_t bm_get_npu_idle_cycle_diff(time_counter *st,
+                                           time_counter *end);
+
+/** @brief  Gets the difference in axi0 bus reads cycle counts
+ * @param[in]   st      Pointer to time_counter value at start time.
+ * @param[in]   end     Pointer to time_counter value at end.
+ * @return      NPU AXI0 read cycle counts  difference between the arguments expressed
+ *              as unsigned 64 bit integer.
+ **/
+static uint64_t bm_get_npu_axi0_read_cycle_diff(time_counter *st,
+                                                time_counter *end);
+
+/** @brief  Gets the difference in axi0 bus writes cycle counts
+ * @param[in]   st      Pointer to time_counter value at start time.
+ * @param[in]   end     Pointer to time_counter value at end.
+ * @return      NPU AXI0 write cycle counts difference between the arguments expressed
+ *              as unsigned 64 bit integer.
+ **/
+static uint64_t bm_get_npu_axi0_write_cycle_diff(time_counter *st,
+                                                 time_counter *end);
+
+/** @brief  Gets the difference in axi1 bus reads cycle counts
+ * @param[in]   st      Pointer to time_counter value at start time.
+ * @param[in]   end     Pointer to time_counter value at end.
+ * @return      NPU AXI1 read cycle counts difference between the arguments expressed
+ *              as unsigned 64 bit integer.
+ **/
+static uint64_t bm_get_npu_axi1_read_cycle_diff(time_counter *st,
+                                                time_counter *end);
+
+/** @brief  Gets the difference for 6 collected cycle counts:
+ * 1) total NPU
+ * 2) active NPU
+ * 3) idle NPU
+ * 4) axi0 read
+ * 5) axi0 write
+ * 6) axi1 read
+ * */
+static int bm_get_npu_cycle_diff(time_counter *st, time_counter *end,
+                                  uint64_t* pmu_counters_values, const size_t size);
+
 #endif /* defined (ARM_NPU) */
 
 #if defined(MPS3_PLATFORM)
@@ -126,8 +173,7 @@
 #if defined (ARM_NPU)
     /* We are capable of reporting npu cycle counts. */
     timer->cap.npu_cycles   = 1;
-    timer->get_npu_total_cycle_diff = bm_get_npu_total_cycle_diff;
-    timer->get_npu_active_cycle_diff = bm_get_npu_active_cycle_diff;
+    timer->get_npu_cycles_diff = bm_get_npu_cycle_diff;
     _init_ethosu_cyclecounter();
 #endif /* defined (ARM_NPU) */
 
@@ -149,37 +195,89 @@
     /* Reset overflow status. */
     ETHOSU_PMU_Set_CNTR_OVS(ETHOSU_PMU_CNT1_Msk | ETHOSU_PMU_CCNT_Msk);
 
-    /* Set the counter #0 to count idle cycles. */
+    /* We can retrieve only 4 PMU counters: */
     ETHOSU_PMU_Set_EVTYPER(0, ETHOSU_PMU_NPU_IDLE);
+    ETHOSU_PMU_Set_EVTYPER(1, ETHOSU_PMU_AXI0_RD_DATA_BEAT_RECEIVED);
+    ETHOSU_PMU_Set_EVTYPER(2, ETHOSU_PMU_AXI0_WR_DATA_BEAT_WRITTEN);
+    ETHOSU_PMU_Set_EVTYPER(3, ETHOSU_PMU_AXI1_RD_DATA_BEAT_RECEIVED);
 
     /* Enable PMU. */
     ETHOSU_PMU_Enable();
 
     /* Enable counters for cycle and counter# 0. */
-    ETHOSU_PMU_CNTR_Enable(ETHOSU_PMU_CNT1_Msk | ETHOSU_PMU_CCNT_Msk);
-
+    ETHOSU_PMU_CNTR_Enable(ETHOSU_PMU_CNT1_Msk | ETHOSU_PMU_CNT2_Msk | ETHOSU_PMU_CNT3_Msk | ETHOSU_PMU_CNT4_Msk| ETHOSU_PMU_CCNT_Msk);
     _reset_ethosu_counters();
 }
 
+static int bm_get_npu_cycle_diff(time_counter *st, time_counter *end,
+                                  uint64_t* pmu_counters_values, const size_t size)
+{
+    if (size == 6) {
+        pmu_counters_values[0] = bm_get_npu_total_cycle_diff(st, end);
+        pmu_counters_values[1] = bm_get_npu_active_cycle_diff(st, end);
+        pmu_counters_values[2] = bm_get_npu_idle_cycle_diff(st, end);
+        pmu_counters_values[3] = bm_get_npu_axi0_read_cycle_diff(st, end);
+        pmu_counters_values[4] = bm_get_npu_axi0_write_cycle_diff(st, end);
+        pmu_counters_values[5] = bm_get_npu_axi1_read_cycle_diff(st, end);
+        return 0;
+    } else {
+        return 1;
+    }
+}
+
 static uint64_t bm_get_npu_total_cycle_diff(time_counter *st, time_counter *end)
 {
     return end->npu_total_ccnt - st->npu_total_ccnt;
 }
 
-static uint64_t bm_get_npu_active_cycle_diff(time_counter *st, time_counter *end)
+static uint32_t counter_overflow(uint32_t pmu_counter_mask)
 {
     /* Check for overflow: The idle counter is 32 bit while the
        total cycle count is 64 bit. */
     const uint32_t overflow_status = ETHOSU_PMU_Get_CNTR_OVS();
+    return pmu_counter_mask & overflow_status;
+}
 
-    if (ETHOSU_PMU_CNT1_Msk & overflow_status) {
+static uint64_t bm_get_npu_idle_cycle_diff(time_counter *st, time_counter *end)
+{
+    if (counter_overflow(ETHOSU_PMU_CNT1_Msk)) {
         printf_err("EthosU PMU idle counter overflow.\n");
         return 0;
     }
+    return (uint64_t)(end->npu_idle_ccnt - st->npu_idle_ccnt);
+}
 
+static uint64_t bm_get_npu_active_cycle_diff(time_counter *st, time_counter *end)
+{
     /* Active NPU time = total time - idle time */
-    return (bm_get_npu_total_cycle_diff(st, end) +
-           (uint64_t)(st->npu_idle_ccnt)) - (uint64_t)(end->npu_idle_ccnt);
+    return bm_get_npu_total_cycle_diff(st, end) - bm_get_npu_idle_cycle_diff(st, end);
+}
+
+static uint64_t bm_get_npu_axi0_read_cycle_diff(time_counter *st, time_counter *end)
+{
+    if (counter_overflow(ETHOSU_PMU_CNT2_Msk)) {
+        printf_err("EthosU PMU axi0 read counter overflow.\n");
+        return 0;
+    }
+    return (uint64_t)(end->npu_axi0_read_ccnt - st->npu_axi0_read_ccnt);
+}
+
+static uint64_t bm_get_npu_axi0_write_cycle_diff(time_counter *st, time_counter *end)
+{
+    if (counter_overflow(ETHOSU_PMU_CNT3_Msk)) {
+        printf_err("EthosU PMU axi0 write counter overflow.\n");
+        return 0;
+    }
+    return (uint64_t)(end->npu_axi0_write_ccnt - st->npu_axi0_write_ccnt);
+}
+
+static uint64_t bm_get_npu_axi1_read_cycle_diff(time_counter *st, time_counter *end)
+{
+    if (counter_overflow(ETHOSU_PMU_CNT4_Msk)) {
+        printf_err("EthosU PMU axi1 read counter overflow.\n");
+        return 0;
+    }
+    return (uint64_t)(end->npu_axi1_read_ccnt - st->npu_axi1_read_ccnt);
 }
 
 #endif /* defined (ARM_NPU) */
@@ -199,15 +297,22 @@
         .counter = get_time_counter(),
 
 #if defined (ARM_NPU)
-        .npu_idle_ccnt = ETHOSU_PMU_Get_EVCNTR(0),
-        .npu_total_ccnt = ETHOSU_PMU_Get_CCNTR()
+            .npu_total_ccnt = ETHOSU_PMU_Get_CCNTR(),
+            .npu_idle_ccnt = ETHOSU_PMU_Get_EVCNTR(0),
+            .npu_axi0_read_ccnt = ETHOSU_PMU_Get_EVCNTR(1),
+            .npu_axi0_write_ccnt = ETHOSU_PMU_Get_EVCNTR(2),
+            .npu_axi1_read_ccnt = ETHOSU_PMU_Get_EVCNTR(3)
 #endif /* defined (ARM_NPU) */
 
     };
 
 #if defined (ARM_NPU)
-    debug("NPU total cc: %llu; NPU idle cc: %u\n",
-        t.npu_total_ccnt, t.npu_idle_ccnt);
+    debug("NPU total cc: %llu; NPU idle cc: %u; NPU axi0 read cc: %u;  NPU axi0 write cc: %u; NPU axi1 read cc: %u\n",
+        t.npu_total_ccnt,
+        t.npu_idle_ccnt,
+        t.npu_axi0_read_ccnt,
+        t.npu_axi0_write_ccnt,
+        t.npu_axi1_read_ccnt);
 #endif /* defined (ARM_NPU) */
 
     return t;
diff --git a/source/application/hal/platforms/bare-metal/timer/include/baremetal_timer.h b/source/application/hal/platforms/bare-metal/timer/include/baremetal_timer.h
index c8fc32c..3020dac 100644
--- a/source/application/hal/platforms/bare-metal/timer/include/baremetal_timer.h
+++ b/source/application/hal/platforms/bare-metal/timer/include/baremetal_timer.h
@@ -34,6 +34,9 @@
 #if defined (ARM_NPU)
     uint64_t                npu_total_ccnt;
     uint32_t                npu_idle_ccnt;
+    uint32_t                npu_axi0_read_ccnt;
+    uint32_t                npu_axi0_write_ccnt;
+    uint32_t                npu_axi1_read_ccnt;
 #endif /* ARM_NPU */
 
 } time_counter;
diff --git a/source/application/hal/platforms/native/data_presentation/log/log.c b/source/application/hal/platforms/native/data_presentation/log/log.c
index 48e8b95..6ce6684 100644
--- a/source/application/hal/platforms/native/data_presentation/log/log.c
+++ b/source/application/hal/platforms/native/data_presentation/log/log.c
@@ -30,11 +30,11 @@
                       const uint32_t pos_x, const uint32_t pos_y,
                       const uint32_t downsample_factor)
 {
-    info("Image details\n");
-    info("Data:                 %p\n", data);
-    info("WxHxC:                %dx%dx%d\n", width, height, channels);
-    info("Pos (x,y):            (%d,%d)\n", pos_x, pos_y);
-    info("Downsampling factor:  %u\n", downsample_factor);
+    debug("Image details\n");
+    debug("Data:                 %p\n", data);
+    debug("WxHxC:                %dx%dx%d\n", width, height, channels);
+    debug("Pos (x,y):            (%d,%d)\n", pos_x, pos_y);
+    debug("Downsampling factor:  %u\n", downsample_factor);
     return 0;
 }
 
@@ -43,8 +43,8 @@
                      const bool allow_multiple_lines)
 {
     UNUSED(allow_multiple_lines);
-    info("%s\n", str);
-    info("Text size: %lu, x: %d, y: %d\n", str_sz, pos_x, pos_y);
+    debug("%s\n", str);
+    debug("Text size: %lu, x: %d, y: %d\n", str_sz, pos_x, pos_y);
     return 0;
 }
 
@@ -53,19 +53,19 @@
                          const uint32_t width, const uint32_t height, 
                          const uint16_t color)
 {
-    info("Showing rectangular, width: %d, height: %d, color: %d, x: %d, y: %d\n", 
+    debug("Showing rectangular, width: %d, height: %d, color: %d, x: %d, y: %d\n", 
             width, height, color, pos_x, pos_y);
     return 0;
 }
 
 int log_clear(const uint16_t color)
 {
-    info("Clearing with color: %d\n", color);
+    debug("Clearing with color: %d\n", color);
     return 0;
 }
 
 int log_set_text_color (const uint16_t color)
 {
-    info("Setting text color: %d\n", color);
+    debug("Setting text color: %d\n", color);
     return 0;
 }
diff --git a/source/application/hal/platforms/native/utils/include/dummy_log.h b/source/application/hal/platforms/native/utils/include/dummy_log.h
index 626436a..3df5c5c 100644
--- a/source/application/hal/platforms/native/utils/include/dummy_log.h
+++ b/source/application/hal/platforms/native/utils/include/dummy_log.h
@@ -32,31 +32,31 @@
 #define UNUSED(x)       ((void)(x))
 
 #if (LOG_LEVEL == LOG_LEVEL_TRACE)
-    #define trace(...)        printf("[TRACE] "); printf(__VA_ARGS__)
+    #define trace(...)        printf("TRACE - "); printf(__VA_ARGS__)
 #else
     #define trace(...)
 #endif  /* LOG_LEVEL == LOG_LEVEL_TRACE */
 
 #if (LOG_LEVEL <= LOG_LEVEL_DEBUG)
-    #define debug(...)        printf("[DEBUG] "); printf(__VA_ARGS__)
+    #define debug(...)        printf("DEBUG - "); printf(__VA_ARGS__)
 #else
     #define debug(...)
 #endif  /* LOG_LEVEL > LOG_LEVEL_TRACE */
 
 #if (LOG_LEVEL <= LOG_LEVEL_INFO)
-    #define info(...)         printf("[INFO] "); printf(__VA_ARGS__)
+    #define info(...)         printf("INFO - "); printf(__VA_ARGS__)
 #else
     #define info(...)
 #endif  /* LOG_LEVEL > LOG_LEVEL_DEBUG */
 
 #if (LOG_LEVEL <= LOG_LEVEL_WARN)
-    #define warn(...)         printf("[WARN] "); printf(__VA_ARGS__)
+    #define warn(...)         printf("WARN - "); printf(__VA_ARGS__)
 #else
     #define warn(...)
 #endif  /* LOG_LEVEL > LOG_LEVEL_INFO */
 
 #if (LOG_LEVEL <= LOG_LEVEL_ERROR)
-    #define printf_err(...)   printf("[ERROR] "); printf(__VA_ARGS__)
+    #define printf_err(...)   printf("ERROR - "); printf(__VA_ARGS__)
 #else
     #define printf_err(...)
 #endif  /* LOG_LEVEL > LOG_LEVEL_INFO */