Kristofer Jonsson | 641c091 | 2020-08-31 11:34:14 +0200 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2019-2020 Arm Limited. All rights reserved. |
| 3 | * |
| 4 | * SPDX-License-Identifier: Apache-2.0 |
| 5 | * |
| 6 | * Licensed under the Apache License, Version 2.0 (the License); you may |
| 7 | * not use this file except in compliance with the License. |
| 8 | * You may obtain a copy of the License at |
| 9 | * |
| 10 | * www.apache.org/licenses/LICENSE-2.0 |
| 11 | * |
| 12 | * Unless required by applicable law or agreed to in writing, software |
| 13 | * distributed under the License is distributed on an AS IS BASIS, WITHOUT |
| 14 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | * See the License for the specific language governing permissions and |
| 16 | * limitations under the License. |
| 17 | */ |
| 18 | |
| 19 | #include "tensorflow/lite/micro/all_ops_resolver.h" |
| 20 | #include "tensorflow/lite/micro/micro_error_reporter.h" |
| 21 | #include "tensorflow/lite/micro/micro_interpreter.h" |
| 22 | #include "tensorflow/lite/schema/schema_generated.h" |
| 23 | #include "tensorflow/lite/version.h" |
| 24 | |
| 25 | #include "inference_process.hpp" |
| 26 | |
| 27 | #ifndef TENSOR_ARENA_SIZE |
| 28 | #define TENSOR_ARENA_SIZE (1024) |
| 29 | #endif |
| 30 | |
| 31 | __attribute__((section(".bss.NoInit"), aligned(16))) uint8_t inferenceProcessTensorArena[TENSOR_ARENA_SIZE]; |
| 32 | |
| 33 | namespace { |
| 34 | void print_output_data(TfLiteTensor *output, size_t bytesToPrint) { |
| 35 | const int numBytesToPrint = std::min(output->bytes, bytesToPrint); |
| 36 | |
| 37 | int dims_size = output->dims->size; |
| 38 | printf("{\n"); |
| 39 | printf("\"dims\": [%d,", dims_size); |
| 40 | for (int i = 0; i < output->dims->size - 1; ++i) { |
| 41 | printf("%d,", output->dims->data[i]); |
| 42 | } |
| 43 | printf("%d],\n", output->dims->data[dims_size - 1]); |
| 44 | |
| 45 | printf("\"data_address\": \"%08x\",\n", (uint32_t)output->data.data); |
| 46 | printf("\"data\":\""); |
| 47 | for (int i = 0; i < numBytesToPrint - 1; ++i) { |
| 48 | if (i % 16 == 0 && i != 0) { |
| 49 | printf("\n"); |
| 50 | } |
| 51 | printf("0x%02x,", output->data.uint8[i]); |
| 52 | } |
| 53 | printf("0x%02x\"\n", output->data.uint8[numBytesToPrint - 1]); |
| 54 | printf("}"); |
| 55 | } |
| 56 | |
| 57 | bool copyOutput(const TfLiteTensor &src, InferenceProcess::DataPtr &dst) { |
| 58 | if (dst.data == nullptr) { |
| 59 | return false; |
| 60 | } |
| 61 | |
| 62 | if (src.bytes > dst.size) { |
| 63 | printf("Tensor size %d does not match output size %d.\n", src.bytes, dst.size); |
| 64 | return true; |
| 65 | } |
| 66 | |
| 67 | std::copy(src.data.uint8, src.data.uint8 + src.bytes, static_cast<uint8_t *>(dst.data)); |
| 68 | dst.size = src.bytes; |
| 69 | |
| 70 | return false; |
| 71 | } |
| 72 | |
| 73 | } // namespace |
| 74 | |
| 75 | namespace InferenceProcess { |
| 76 | DataPtr::DataPtr(void *data, size_t size) : data(data), size(size) {} |
| 77 | |
| 78 | InferenceJob::InferenceJob() : numBytesToPrint(0) {} |
| 79 | |
| 80 | InferenceJob::InferenceJob(const std::string &name, |
| 81 | const DataPtr &networkModel, |
| 82 | const DataPtr &input, |
| 83 | const DataPtr &output, |
| 84 | const DataPtr &expectedOutput, |
| 85 | size_t numBytesToPrint) : |
| 86 | name(name), |
| 87 | networkModel(networkModel), input(input), output(output), expectedOutput(expectedOutput), |
| 88 | numBytesToPrint(numBytesToPrint) {} |
| 89 | |
| 90 | InferenceProcess::InferenceProcess() : lock(0) {} |
| 91 | |
| 92 | // NOTE: Adding code for get_lock & free_lock with some corrections from |
| 93 | // http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dai0321a/BIHEJCHB.html |
| 94 | // TODO: check correctness? |
| 95 | void InferenceProcess::getLock() { |
| 96 | int status = 0; |
| 97 | |
| 98 | do { |
| 99 | // Wait until lock_var is free |
| 100 | while (__LDREXW(&lock) != 0) |
| 101 | ; |
| 102 | |
| 103 | // Try to set lock_var |
| 104 | status = __STREXW(1, &lock); |
| 105 | } while (status != 0); |
| 106 | |
| 107 | // Do not start any other memory access until memory barrier is completed |
| 108 | __DMB(); |
| 109 | } |
| 110 | |
| 111 | // TODO: check correctness? |
| 112 | void InferenceProcess::freeLock() { |
| 113 | // Ensure memory operations completed before releasing lock |
| 114 | __DMB(); |
| 115 | |
| 116 | lock = 0; |
| 117 | } |
| 118 | |
| 119 | bool InferenceProcess::push(const InferenceJob &job) { |
| 120 | getLock(); |
| 121 | inferenceJobQueue.push(job); |
| 122 | freeLock(); |
| 123 | |
| 124 | return true; |
| 125 | } |
| 126 | |
| 127 | bool InferenceProcess::runJob(InferenceJob &job) { |
| 128 | printf("Running inference job: %s\n", job.name.c_str()); |
| 129 | |
| 130 | tflite::MicroErrorReporter microErrorReporter; |
| 131 | tflite::ErrorReporter *reporter = µErrorReporter; |
| 132 | |
| 133 | const tflite::Model *model = ::tflite::GetModel(job.networkModel.data); |
| 134 | if (model->version() != TFLITE_SCHEMA_VERSION) { |
| 135 | printf("Model provided is schema version %d not equal " |
| 136 | "to supported version %d.\n", |
| 137 | model->version(), |
| 138 | TFLITE_SCHEMA_VERSION); |
| 139 | return true; |
| 140 | } |
| 141 | |
| 142 | tflite::AllOpsResolver resolver; |
| 143 | |
| 144 | tflite::MicroInterpreter interpreter(model, resolver, inferenceProcessTensorArena, TENSOR_ARENA_SIZE, reporter); |
| 145 | |
| 146 | TfLiteStatus allocate_status = interpreter.AllocateTensors(); |
| 147 | if (allocate_status != kTfLiteOk) { |
| 148 | printf("AllocateTensors failed for inference job: %s\n", job.name.c_str()); |
| 149 | return true; |
| 150 | } |
| 151 | |
| 152 | bool inputSizeError = false; |
| 153 | // TODO: adapt for multiple inputs |
| 154 | // for (unsigned int i = 0; i < interpreter.inputs_size(); ++i) |
| 155 | for (unsigned int i = 0; i < 1; ++i) { |
| 156 | TfLiteTensor *input = interpreter.input(i); |
| 157 | if (input->bytes != job.input.size) { |
| 158 | // If input sizes don't match, then we could end up copying |
| 159 | // uninitialized or partial data. |
| 160 | inputSizeError = true; |
| 161 | printf("Allocated size: %d for input: %d doesn't match the " |
| 162 | "received input size: %d for job: %s\n", |
| 163 | input->bytes, |
| 164 | i, |
| 165 | job.input.size, |
| 166 | job.name.c_str()); |
| 167 | return true; |
| 168 | } |
| 169 | memcpy(input->data.uint8, job.input.data, input->bytes); |
| 170 | } |
| 171 | if (inputSizeError) { |
| 172 | return true; |
| 173 | } |
| 174 | |
| 175 | TfLiteStatus invoke_status = interpreter.Invoke(); |
| 176 | if (invoke_status != kTfLiteOk) { |
| 177 | printf("Invoke failed for inference job: %s\n", job.name.c_str()); |
| 178 | return true; |
| 179 | } |
| 180 | |
| 181 | copyOutput(*interpreter.output(0), job.output); |
| 182 | |
| 183 | if (job.numBytesToPrint > 0) { |
| 184 | // Print all of the output data, or the first NUM_BYTES_TO_PRINT bytes, |
| 185 | // whichever comes first as well as the output shape. |
| 186 | printf("num_of_outputs: %d\n", interpreter.outputs_size()); |
| 187 | printf("output_begin\n"); |
| 188 | printf("[\n"); |
| 189 | for (unsigned int i = 0; i < interpreter.outputs_size(); i++) { |
| 190 | TfLiteTensor *output = interpreter.output(i); |
| 191 | print_output_data(output, job.numBytesToPrint); |
| 192 | if (i != interpreter.outputs_size() - 1) { |
| 193 | printf(",\n"); |
| 194 | } |
| 195 | } |
| 196 | printf("]\n"); |
| 197 | printf("output_end\n"); |
| 198 | } |
| 199 | |
| 200 | if (job.expectedOutput.data != nullptr) { |
| 201 | bool outputSizeError = false; |
| 202 | // TODO: adapt for multiple outputs |
| 203 | // for (unsigned int i = 0; i < interpreter.outputs_size(); i++) |
| 204 | for (unsigned int i = 0; i < 1; i++) { |
| 205 | TfLiteTensor *output = interpreter.output(i); |
| 206 | if (job.expectedOutput.size != output->bytes) { |
| 207 | // If the expected output & the actual output size doesn't |
| 208 | // match, we could end up accessing out-of-bound data. |
| 209 | // Also there's no need to compare the data, as we know |
| 210 | // that sizes differ. |
| 211 | outputSizeError = true; |
| 212 | printf("Output size: %d for output: %d doesn't match with " |
| 213 | "the expected output size: %d for job: %s\n", |
| 214 | output->bytes, |
| 215 | i, |
| 216 | job.expectedOutput.size, |
| 217 | job.name.c_str()); |
| 218 | return true; |
| 219 | } |
| 220 | for (unsigned int j = 0; j < output->bytes; ++j) { |
| 221 | if (output->data.uint8[j] != (static_cast<uint8_t *>(job.expectedOutput.data))[j]) { |
| 222 | printf("Output data doesn't match expected output data at index: " |
| 223 | "%d, expected: %02X actual: %02X", |
| 224 | j, |
| 225 | (static_cast<uint8_t *>(job.expectedOutput.data))[j], |
| 226 | output->data.uint8[j]); |
| 227 | } |
| 228 | } |
| 229 | } |
| 230 | if (outputSizeError) { |
| 231 | return true; |
| 232 | } |
| 233 | } |
| 234 | printf("Finished running job: %s\n", job.name.c_str()); |
| 235 | |
| 236 | return false; |
| 237 | } |
| 238 | |
| 239 | bool InferenceProcess::run(bool exitOnEmpty) { |
| 240 | bool anyJobFailed = false; |
| 241 | |
| 242 | while (true) { |
| 243 | getLock(); |
| 244 | bool empty = inferenceJobQueue.empty(); |
| 245 | freeLock(); |
| 246 | |
| 247 | if (empty) { |
| 248 | if (exitOnEmpty) { |
| 249 | printf("Exit from InferenceProcess::run() on empty job queue!\n"); |
| 250 | break; |
| 251 | } |
| 252 | |
| 253 | continue; |
| 254 | } |
| 255 | |
| 256 | getLock(); |
| 257 | InferenceJob job = inferenceJobQueue.front(); |
| 258 | inferenceJobQueue.pop(); |
| 259 | freeLock(); |
| 260 | |
| 261 | if (runJob(job)) { |
| 262 | anyJobFailed = true; |
| 263 | continue; |
| 264 | } |
| 265 | } |
| 266 | |
| 267 | return anyJobFailed; |
| 268 | } |
| 269 | |
| 270 | } // namespace InferenceProcess |