Improved Logging - Core Software

Logging macros:
Added ethosu_logging.h as a lib, containing logging macros controlled by
a parameter ETHOSU_LOG_SEVERITY set in core_software CMakeLists.txt

Updated inference_process:
Updated inference_process to include ethosu_logging and use the logging
macros rather than printf()

Updated message_process:
Updated message_process to include ethosu_logging and use the
logging macros rather than printf()

Updated ethosu_monitor:
Updated ethosu_monitor to include ethosu_logging and use the
logging macros rather than printf()

Updated layer_by_layer_profiler:
Updated layer_by_layer_profiler to include ethosu_logging and use the
logging macros rather than printf()

Updated mhu_v2:
Updated mhu_v2 to include ethosu_logging and use the
logging macros rather than printf()

Change-Id: I5d6fd80b7645b3e0af5b494eea6dbb7755f02122
diff --git a/applications/inference_process/src/inference_process.cpp b/applications/inference_process/src/inference_process.cpp
index 13cabb2..7058f9c 100644
--- a/applications/inference_process/src/inference_process.cpp
+++ b/applications/inference_process/src/inference_process.cpp
@@ -27,6 +27,7 @@
 #ifdef ETHOSU
 #include "layer_by_layer_profiler.hpp"
 #endif
+#include "ethosu_log.h"
 
 #include "inference_process.hpp"
 
@@ -39,30 +40,28 @@
 namespace {
 
 void tflu_debug_log(const char *s) {
-    fprintf(stderr, "%s", s);
+    LOG_DEBUG("%s", s);
 }
 
 void print_output_data(TfLiteTensor *output, size_t bytesToPrint) {
     const int numBytesToPrint = min(output->bytes, bytesToPrint);
-
-    int dims_size = output->dims->size;
-    printf("{\n");
-    printf("\"dims\": [%d,", dims_size);
+    int dims_size             = output->dims->size;
+    LOG("{\n");
+    LOG("\"dims\": [%d,", dims_size);
     for (int i = 0; i < output->dims->size - 1; ++i) {
-        printf("%d,", output->dims->data[i]);
+        LOG("%d,", output->dims->data[i]);
     }
-    printf("%d],\n", output->dims->data[dims_size - 1]);
-
-    printf("\"data_address\": \"%08" PRIx32 "\",\n", (uint32_t)output->data.data);
-    printf("\"data\":\"");
+    LOG("%d],\n", output->dims->data[dims_size - 1]);
+    LOG("\"data_address\": \"%08" PRIx32 "\",\n", (uint32_t)output->data.data);
+    LOG("\"data\":\"");
     for (int i = 0; i < numBytesToPrint - 1; ++i) {
         if (i % 16 == 0 && i != 0) {
-            printf("\n");
+            LOG("\n");
         }
-        printf("0x%02x,", output->data.uint8[i]);
+        LOG("0x%02x,", output->data.uint8[i]);
     }
-    printf("0x%02x\"\n", output->data.uint8[numBytesToPrint - 1]);
-    printf("}");
+    LOG("0x%02x\"\n", output->data.uint8[numBytesToPrint - 1]);
+    LOG("}");
 }
 
 bool copyOutput(const TfLiteTensor &src, InferenceProcess::DataPtr &dst) {
@@ -71,7 +70,7 @@
     }
 
     if (src.bytes > dst.size) {
-        printf("Tensor size %d does not match output size %d.\n", src.bytes, dst.size);
+        LOG_ERR("Tensor size mismatch (bytes): actual=%d, expected%d.\n", src.bytes, dst.size);
         return true;
     }
 
@@ -181,7 +180,7 @@
 }
 
 bool InferenceProcess::runJob(InferenceJob &job) {
-    printf("Running inference job: %s\n", job.name.c_str());
+    LOG_INFO("Running inference job: %s\n", job.name.c_str());
 
     // Register debug log callback for profiling
     RegisterDebugLogCallback(tflu_debug_log);
@@ -192,9 +191,9 @@
     // Get model handle and verify that the version is correct
     const tflite::Model *model = ::tflite::GetModel(job.networkModel.data);
     if (model->version() != TFLITE_SCHEMA_VERSION) {
-        printf("Model provided is schema version %" PRIu32 " not equal to supported version %d.\n",
-               model->version(),
-               TFLITE_SCHEMA_VERSION);
+        LOG_ERR("Model schema version unsupported: version=%" PRIu32 ", supported=%d.\n",
+                model->version(),
+                TFLITE_SCHEMA_VERSION);
         return true;
     }
 
@@ -211,7 +210,7 @@
     // Allocate tensors
     TfLiteStatus allocate_status = interpreter.AllocateTensors();
     if (allocate_status != kTfLiteOk) {
-        printf("AllocateTensors failed for inference job: %s\n", job.name.c_str());
+        LOG_ERR("Failed to allocate tensors for inference: job=%s\n", job.name.c_str());
         return true;
     }
 
@@ -225,9 +224,9 @@
         }
     }
     if (job.input.size() != inputTensors.size()) {
-        printf("Number of input buffers does not match number of non empty network tensors. input=%zu, network=%zu\n",
-               job.input.size(),
-               inputTensors.size());
+        LOG_ERR("Number of input buffers does not match number of non empty network tensors: input=%zu, network=%zu\n",
+                job.input.size(),
+                inputTensors.size());
         return true;
     }
 
@@ -237,11 +236,11 @@
         const TfLiteTensor *tensor = inputTensors[i];
 
         if (input.size != tensor->bytes) {
-            printf("Input size does not match network size. job=%s, index=%zu, input=%zu, network=%u\n",
-                   job.name.c_str(),
-                   i,
-                   input.size,
-                   tensor->bytes);
+            LOG_ERR("Job input size does not match network input size: job=%s, index=%zu, input=%zu, network=%u\n",
+                    job.name.c_str(),
+                    i,
+                    input.size,
+                    tensor->bytes);
             return true;
         }
 
@@ -251,13 +250,13 @@
     // Run the inference
     TfLiteStatus invoke_status = interpreter.Invoke();
     if (invoke_status != kTfLiteOk) {
-        printf("Invoke failed for inference job: %s\n", job.name.c_str());
+        LOG_ERR("Invoke failed for inference: job=%s\n", job.name.c_str());
         return true;
     }
 
-    printf("%s : %zu\r\n", "arena_used_bytes", interpreter.arena_used_bytes());
+    LOG("arena_used_bytes : %zu\n", interpreter.arena_used_bytes());
 
-    printf("Inference runtime: %u cycles\r\n", (unsigned int)profiler.GetTotalTicks());
+    LOG("Inference runtime: %u cycles\n", (unsigned int)profiler.GetTotalTicks());
 
     if (job.pmuCycleCounterEnable != 0) {
         job.pmuCycleCounterCount = profiler.GetTotalTicks();
@@ -266,7 +265,7 @@
     // Copy output data
     if (job.output.size() > 0) {
         if (interpreter.outputs_size() != job.output.size()) {
-            printf("Number of outputs mismatch. job=%zu, network=%u\n", job.output.size(), interpreter.outputs_size());
+            LOG_ERR("Output size mismatch: job=%zu, network=%u\n", job.output.size(), interpreter.outputs_size());
             return true;
         }
 
@@ -280,28 +279,26 @@
     if (job.numBytesToPrint > 0) {
         // Print all of the output data, or the first NUM_BYTES_TO_PRINT bytes,
         // whichever comes first as well as the output shape.
-        printf("num_of_outputs: %d\n", interpreter.outputs_size());
-        printf("output_begin\n");
-        printf("[\n");
-
+        LOG("num_of_outputs: %d\n", interpreter.outputs_size());
+        LOG("output_begin\n");
+        LOG("[\n");
         for (unsigned int i = 0; i < interpreter.outputs_size(); i++) {
             TfLiteTensor *output = interpreter.output(i);
             print_output_data(output, job.numBytesToPrint);
             if (i != interpreter.outputs_size() - 1) {
-                printf(",\n");
+                LOG(",\n");
             }
         }
-
-        printf("]\n");
-        printf("output_end\n");
+        LOG("]\n");
+        LOG("output_end\n");
     }
 
     if (job.expectedOutput.size() > 0) {
         if (job.expectedOutput.size() != interpreter.outputs_size()) {
-            printf("Expected number of output tensors does not match network. job=%s, expected=%zu, network=%zu\n",
-                   job.name.c_str(),
-                   job.expectedOutput.size(),
-                   interpreter.outputs_size());
+            LOG_ERR("Expected number of output tensors mismatch: job=%s, expected=%zu, network=%zu\n",
+                    job.name.c_str(),
+                    job.expectedOutput.size(),
+                    interpreter.outputs_size());
             return true;
         }
 
@@ -310,33 +307,33 @@
             const TfLiteTensor *output = interpreter.output(i);
 
             if (expected.size != output->bytes) {
-                printf("Expected tensor size does not match output size. job=%s, index=%u, expected=%zu, network=%zu\n",
-                       job.name.c_str(),
-                       i,
-                       expected.size,
-                       output->bytes);
+                LOG_ERR("Expected output tensor size mismatch: job=%s, index=%u, expected=%zu, network=%zu\n",
+                        job.name.c_str(),
+                        i,
+                        expected.size,
+                        output->bytes);
                 return true;
             }
 
             for (unsigned int j = 0; j < output->bytes; ++j) {
                 if (output->data.uint8[j] != static_cast<uint8_t *>(expected.data)[j]) {
-                    printf("Expected data does not match output data. job=%s, index=%u, offset=%u, "
-                           "expected=%02x, network=%02x\n",
-                           job.name.c_str(),
-                           i,
-                           j,
-                           static_cast<uint8_t *>(expected.data)[j],
-                           output->data.uint8[j]);
+                    LOG_ERR("Expected output tensor data mismatch: job=%s, index=%u, offset=%u, "
+                            "expected=%02x, network=%02x\n",
+                            job.name.c_str(),
+                            i,
+                            j,
+                            static_cast<uint8_t *>(expected.data)[j],
+                            output->data.uint8[j]);
                     return true;
                 }
             }
         }
     }
 
-    printf("Finished running job: %s\n", job.name.c_str());
+    LOG_INFO("Finished running job: %s\n", job.name.c_str());
 
     return false;
-}
+} // namespace InferenceProcess
 
 bool InferenceProcess::run(bool exitOnEmpty) {
     bool anyJobFailed = false;
@@ -348,7 +345,7 @@
 
         if (empty) {
             if (exitOnEmpty) {
-                printf("Exit from InferenceProcess::run() on empty job queue!\n");
+                LOG_INFO("Exit from InferenceProcess::run() due to empty job queue\n");
                 break;
             }