blob: c26bb13aff4cc4277b6604fbab0e3ecac3f071f6 [file] [log] [blame]
Kristofer Jonsson641c0912020-08-31 11:34:14 +02001/*
Per Åstrand90455452021-02-25 11:10:08 +01002 * Copyright (c) 2019-2021 Arm Limited. All rights reserved.
Kristofer Jonsson641c0912020-08-31 11:34:14 +02003 *
4 * SPDX-License-Identifier: Apache-2.0
5 *
6 * Licensed under the Apache License, Version 2.0 (the License); you may
7 * not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
14 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19#include "tensorflow/lite/micro/all_ops_resolver.h"
Måns Nilsson231e1d92020-11-05 12:19:34 +010020#include "tensorflow/lite/micro/cortex_m_generic/debug_log_callback.h"
Kristofer Jonsson641c0912020-08-31 11:34:14 +020021#include "tensorflow/lite/micro/micro_error_reporter.h"
22#include "tensorflow/lite/micro/micro_interpreter.h"
Bhavik Patelffe845d2020-11-16 12:13:56 +010023#include "tensorflow/lite/micro/micro_profiler.h"
Kristofer Jonsson641c0912020-08-31 11:34:14 +020024#include "tensorflow/lite/schema/schema_generated.h"
Kristofer Jonsson641c0912020-08-31 11:34:14 +020025
Jens Elofsson955288a2021-04-22 20:57:15 +020026#include "arm_profiler.hpp"
27#ifdef ETHOSU
Jens Elofsson701a63b2021-05-23 17:37:07 +020028#include "layer_by_layer_profiler.hpp"
Jens Elofsson955288a2021-04-22 20:57:15 +020029#endif
Anton Moberg07cf70b2021-07-07 11:08:17 +020030#include "ethosu_log.h"
Jens Elofsson955288a2021-04-22 20:57:15 +020031
Kristofer Jonsson641c0912020-08-31 11:34:14 +020032#include "inference_process.hpp"
33
Per Åstrandd9afc082020-10-06 13:25:08 +020034#include "cmsis_compiler.h"
35
Per Åstrand91a91732020-09-25 15:04:26 +020036#include <inttypes.h>
37
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +020038using namespace std;
39
Kristofer Jonsson641c0912020-08-31 11:34:14 +020040namespace {
Måns Nilsson231e1d92020-11-05 12:19:34 +010041
42void tflu_debug_log(const char *s) {
Anton Moberg07cf70b2021-07-07 11:08:17 +020043 LOG_DEBUG("%s", s);
Måns Nilsson231e1d92020-11-05 12:19:34 +010044}
45
Kristofer Jonsson641c0912020-08-31 11:34:14 +020046void print_output_data(TfLiteTensor *output, size_t bytesToPrint) {
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +020047 const int numBytesToPrint = min(output->bytes, bytesToPrint);
Anton Moberg07cf70b2021-07-07 11:08:17 +020048 int dims_size = output->dims->size;
49 LOG("{\n");
50 LOG("\"dims\": [%d,", dims_size);
Kristofer Jonsson641c0912020-08-31 11:34:14 +020051 for (int i = 0; i < output->dims->size - 1; ++i) {
Anton Moberg07cf70b2021-07-07 11:08:17 +020052 LOG("%d,", output->dims->data[i]);
Kristofer Jonsson641c0912020-08-31 11:34:14 +020053 }
Anton Moberg07cf70b2021-07-07 11:08:17 +020054 LOG("%d],\n", output->dims->data[dims_size - 1]);
55 LOG("\"data_address\": \"%08" PRIx32 "\",\n", (uint32_t)output->data.data);
56 LOG("\"data\":\"");
Kristofer Jonsson641c0912020-08-31 11:34:14 +020057 for (int i = 0; i < numBytesToPrint - 1; ++i) {
Davide Grohmannfa479e42021-08-11 13:23:09 +020058 /*
59 * Workaround an issue when compiling with GCC where by
60 * printing only a '\n' the produced global output is wrong.
61 */
62 if (i % 15 == 0 && i != 0) {
63 LOG("0x%02x,\n", output->data.uint8[i]);
64 } else {
65 LOG("0x%02x,", output->data.uint8[i]);
Kristofer Jonsson641c0912020-08-31 11:34:14 +020066 }
Kristofer Jonsson641c0912020-08-31 11:34:14 +020067 }
Anton Moberg07cf70b2021-07-07 11:08:17 +020068 LOG("0x%02x\"\n", output->data.uint8[numBytesToPrint - 1]);
69 LOG("}");
Kristofer Jonsson641c0912020-08-31 11:34:14 +020070}
71
72bool copyOutput(const TfLiteTensor &src, InferenceProcess::DataPtr &dst) {
73 if (dst.data == nullptr) {
74 return false;
75 }
76
77 if (src.bytes > dst.size) {
Anton Moberg07cf70b2021-07-07 11:08:17 +020078 LOG_ERR("Tensor size mismatch (bytes): actual=%d, expected%d.\n", src.bytes, dst.size);
Kristofer Jonsson641c0912020-08-31 11:34:14 +020079 return true;
80 }
81
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +020082 copy(src.data.uint8, src.data.uint8 + src.bytes, static_cast<uint8_t *>(dst.data));
Kristofer Jonsson641c0912020-08-31 11:34:14 +020083 dst.size = src.bytes;
84
85 return false;
86}
87
88} // namespace
89
90namespace InferenceProcess {
Per Åstrandbbd9c8f2020-09-25 15:07:35 +020091DataPtr::DataPtr(void *_data, size_t _size) : data(_data), size(_size) {}
Kristofer Jonsson641c0912020-08-31 11:34:14 +020092
Kristofer Jonsson34e24962020-11-23 16:22:10 +010093void DataPtr::invalidate() {
94#if defined(__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
Kristofer Jonsson34e24962020-11-23 16:22:10 +010095 SCB_InvalidateDCache_by_Addr(reinterpret_cast<uint32_t *>(data), size);
96#endif
97}
98
99void DataPtr::clean() {
100#if defined(__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
Kristofer Jonsson34e24962020-11-23 16:22:10 +0100101 SCB_CleanDCache_by_Addr(reinterpret_cast<uint32_t *>(data), size);
102#endif
103}
104
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200105InferenceJob::InferenceJob() : numBytesToPrint(0) {}
106
Per Åstrandbbd9c8f2020-09-25 15:07:35 +0200107InferenceJob::InferenceJob(const string &_name,
108 const DataPtr &_networkModel,
109 const vector<DataPtr> &_input,
110 const vector<DataPtr> &_output,
111 const vector<DataPtr> &_expectedOutput,
Bhavik Patelffe845d2020-11-16 12:13:56 +0100112 size_t _numBytesToPrint,
113 const vector<uint8_t> &_pmuEventConfig,
Bhavik Patel97906eb2020-12-17 15:32:16 +0100114 const uint32_t _pmuCycleCounterEnable) :
Per Åstrandbbd9c8f2020-09-25 15:07:35 +0200115 name(_name),
116 networkModel(_networkModel), input(_input), output(_output), expectedOutput(_expectedOutput),
Bhavik Patel97906eb2020-12-17 15:32:16 +0100117 numBytesToPrint(_numBytesToPrint), pmuEventConfig(_pmuEventConfig), pmuCycleCounterEnable(_pmuCycleCounterEnable),
Jens Elofssonde044c32021-05-06 16:21:29 +0200118 pmuEventCount(), pmuCycleCounterCount(0) {}
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200119
Kristofer Jonsson34e24962020-11-23 16:22:10 +0100120void InferenceJob::invalidate() {
121 networkModel.invalidate();
122
123 for (auto &it : input) {
124 it.invalidate();
125 }
126
127 for (auto &it : output) {
128 it.invalidate();
129 }
130
131 for (auto &it : expectedOutput) {
132 it.invalidate();
133 }
134}
135
136void InferenceJob::clean() {
137 networkModel.clean();
138
139 for (auto &it : input) {
140 it.clean();
141 }
142
143 for (auto &it : output) {
144 it.clean();
145 }
146
147 for (auto &it : expectedOutput) {
148 it.clean();
149 }
150}
151
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200152// NOTE: Adding code for get_lock & free_lock with some corrections from
153// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dai0321a/BIHEJCHB.html
154// TODO: check correctness?
155void InferenceProcess::getLock() {
156 int status = 0;
157
158 do {
159 // Wait until lock_var is free
160 while (__LDREXW(&lock) != 0)
161 ;
162
163 // Try to set lock_var
164 status = __STREXW(1, &lock);
165 } while (status != 0);
166
167 // Do not start any other memory access until memory barrier is completed
168 __DMB();
169}
170
171// TODO: check correctness?
172void InferenceProcess::freeLock() {
173 // Ensure memory operations completed before releasing lock
174 __DMB();
175
176 lock = 0;
177}
178
179bool InferenceProcess::push(const InferenceJob &job) {
180 getLock();
181 inferenceJobQueue.push(job);
182 freeLock();
183
184 return true;
185}
186
187bool InferenceProcess::runJob(InferenceJob &job) {
Anton Moberg07cf70b2021-07-07 11:08:17 +0200188 LOG_INFO("Running inference job: %s\n", job.name.c_str());
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200189
Bhavik Patelffe845d2020-11-16 12:13:56 +0100190 // Register debug log callback for profiling
191 RegisterDebugLogCallback(tflu_debug_log);
192
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200193 tflite::MicroErrorReporter microErrorReporter;
194 tflite::ErrorReporter *reporter = &microErrorReporter;
195
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200196 // Get model handle and verify that the version is correct
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200197 const tflite::Model *model = ::tflite::GetModel(job.networkModel.data);
198 if (model->version() != TFLITE_SCHEMA_VERSION) {
Anton Moberg07cf70b2021-07-07 11:08:17 +0200199 LOG_ERR("Model schema version unsupported: version=%" PRIu32 ", supported=%d.\n",
200 model->version(),
201 TFLITE_SCHEMA_VERSION);
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200202 return true;
203 }
204
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200205 // Create the TFL micro interpreter
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200206 tflite::AllOpsResolver resolver;
Jens Elofsson955288a2021-04-22 20:57:15 +0200207#ifdef ETHOSU
Jens Elofsson701a63b2021-05-23 17:37:07 +0200208 tflite::LayerByLayerProfiler profiler;
Jens Elofsson955288a2021-04-22 20:57:15 +0200209#else
210 tflite::ArmProfiler profiler;
Bhavik Patelffe845d2020-11-16 12:13:56 +0100211#endif
Jens Elofssonde044c32021-05-06 16:21:29 +0200212
Anton Moberg66ed1822021-02-10 08:49:28 +0100213 tflite::MicroInterpreter interpreter(model, resolver, tensorArena, tensorArenaSize, reporter, &profiler);
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200214
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200215 // Allocate tensors
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200216 TfLiteStatus allocate_status = interpreter.AllocateTensors();
217 if (allocate_status != kTfLiteOk) {
Anton Moberg07cf70b2021-07-07 11:08:17 +0200218 LOG_ERR("Failed to allocate tensors for inference: job=%s\n", job.name.c_str());
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200219 return true;
220 }
221
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200222 // Create a filtered list of non empty input tensors
223 vector<TfLiteTensor *> inputTensors;
224 for (size_t i = 0; i < interpreter.inputs_size(); ++i) {
225 TfLiteTensor *tensor = interpreter.input(i);
226
227 if (tensor->bytes > 0) {
228 inputTensors.push_back(tensor);
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200229 }
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200230 }
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200231 if (job.input.size() != inputTensors.size()) {
Anton Moberg07cf70b2021-07-07 11:08:17 +0200232 LOG_ERR("Number of input buffers does not match number of non empty network tensors: input=%zu, network=%zu\n",
233 job.input.size(),
234 inputTensors.size());
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200235 return true;
236 }
237
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200238 // Copy input data
239 for (size_t i = 0; i < inputTensors.size(); ++i) {
240 const DataPtr &input = job.input[i];
241 const TfLiteTensor *tensor = inputTensors[i];
242
243 if (input.size != tensor->bytes) {
Anton Moberg07cf70b2021-07-07 11:08:17 +0200244 LOG_ERR("Job input size does not match network input size: job=%s, index=%zu, input=%zu, network=%u\n",
245 job.name.c_str(),
246 i,
247 input.size,
248 tensor->bytes);
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200249 return true;
250 }
251
252 copy(static_cast<char *>(input.data), static_cast<char *>(input.data) + input.size, tensor->data.uint8);
253 }
254
255 // Run the inference
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200256 TfLiteStatus invoke_status = interpreter.Invoke();
257 if (invoke_status != kTfLiteOk) {
Anton Moberg07cf70b2021-07-07 11:08:17 +0200258 LOG_ERR("Invoke failed for inference: job=%s\n", job.name.c_str());
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200259 return true;
260 }
261
Anton Moberg07cf70b2021-07-07 11:08:17 +0200262 LOG("arena_used_bytes : %zu\n", interpreter.arena_used_bytes());
Bhavik Patelffe845d2020-11-16 12:13:56 +0100263
Anton Moberg07cf70b2021-07-07 11:08:17 +0200264 LOG("Inference runtime: %u cycles\n", (unsigned int)profiler.GetTotalTicks());
Bhavik Patelffe845d2020-11-16 12:13:56 +0100265
266 if (job.pmuCycleCounterEnable != 0) {
Kristofer Jonsson91f600c2021-02-10 11:29:52 +0100267 job.pmuCycleCounterCount = profiler.GetTotalTicks();
Bhavik Patelffe845d2020-11-16 12:13:56 +0100268 }
269
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200270 // Copy output data
271 if (job.output.size() > 0) {
272 if (interpreter.outputs_size() != job.output.size()) {
Anton Moberg07cf70b2021-07-07 11:08:17 +0200273 LOG_ERR("Output size mismatch: job=%zu, network=%u\n", job.output.size(), interpreter.outputs_size());
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200274 return true;
275 }
276
277 for (unsigned i = 0; i < interpreter.outputs_size(); ++i) {
278 if (copyOutput(*interpreter.output(i), job.output[i])) {
279 return true;
280 }
281 }
282 }
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200283
284 if (job.numBytesToPrint > 0) {
285 // Print all of the output data, or the first NUM_BYTES_TO_PRINT bytes,
286 // whichever comes first as well as the output shape.
Anton Moberg07cf70b2021-07-07 11:08:17 +0200287 LOG("num_of_outputs: %d\n", interpreter.outputs_size());
288 LOG("output_begin\n");
289 LOG("[\n");
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200290 for (unsigned int i = 0; i < interpreter.outputs_size(); i++) {
291 TfLiteTensor *output = interpreter.output(i);
292 print_output_data(output, job.numBytesToPrint);
293 if (i != interpreter.outputs_size() - 1) {
Anton Moberg07cf70b2021-07-07 11:08:17 +0200294 LOG(",\n");
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200295 }
296 }
Anton Moberg07cf70b2021-07-07 11:08:17 +0200297 LOG("]\n");
298 LOG("output_end\n");
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200299 }
300
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200301 if (job.expectedOutput.size() > 0) {
302 if (job.expectedOutput.size() != interpreter.outputs_size()) {
Anton Moberg07cf70b2021-07-07 11:08:17 +0200303 LOG_ERR("Expected number of output tensors mismatch: job=%s, expected=%zu, network=%zu\n",
304 job.name.c_str(),
305 job.expectedOutput.size(),
306 interpreter.outputs_size());
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200307 return true;
308 }
309
310 for (unsigned int i = 0; i < interpreter.outputs_size(); i++) {
311 const DataPtr &expected = job.expectedOutput[i];
312 const TfLiteTensor *output = interpreter.output(i);
313
314 if (expected.size != output->bytes) {
Anton Moberg07cf70b2021-07-07 11:08:17 +0200315 LOG_ERR("Expected output tensor size mismatch: job=%s, index=%u, expected=%zu, network=%zu\n",
316 job.name.c_str(),
317 i,
318 expected.size,
319 output->bytes);
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200320 return true;
321 }
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200322
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200323 for (unsigned int j = 0; j < output->bytes; ++j) {
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200324 if (output->data.uint8[j] != static_cast<uint8_t *>(expected.data)[j]) {
Anton Moberg07cf70b2021-07-07 11:08:17 +0200325 LOG_ERR("Expected output tensor data mismatch: job=%s, index=%u, offset=%u, "
326 "expected=%02x, network=%02x\n",
327 job.name.c_str(),
328 i,
329 j,
330 static_cast<uint8_t *>(expected.data)[j],
331 output->data.uint8[j]);
Per Åstrand90455452021-02-25 11:10:08 +0100332 return true;
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200333 }
334 }
335 }
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200336 }
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200337
Anton Moberg07cf70b2021-07-07 11:08:17 +0200338 LOG_INFO("Finished running job: %s\n", job.name.c_str());
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200339
340 return false;
Anton Moberg07cf70b2021-07-07 11:08:17 +0200341} // namespace InferenceProcess
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200342
343bool InferenceProcess::run(bool exitOnEmpty) {
344 bool anyJobFailed = false;
345
346 while (true) {
347 getLock();
348 bool empty = inferenceJobQueue.empty();
349 freeLock();
350
351 if (empty) {
352 if (exitOnEmpty) {
Anton Moberg07cf70b2021-07-07 11:08:17 +0200353 LOG_INFO("Exit from InferenceProcess::run() due to empty job queue\n");
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200354 break;
355 }
356
357 continue;
358 }
359
360 getLock();
361 InferenceJob job = inferenceJobQueue.front();
362 inferenceJobQueue.pop();
363 freeLock();
364
365 if (runJob(job)) {
366 anyJobFailed = true;
367 continue;
368 }
369 }
370
371 return anyJobFailed;
372}
373
374} // namespace InferenceProcess