blob: 94c62d3c7829637e1048a70a9d24327b9d32c744 [file] [log] [blame]
Kristofer Jonsson641c0912020-08-31 11:34:14 +02001/*
Per Åstrand90455452021-02-25 11:10:08 +01002 * Copyright (c) 2019-2021 Arm Limited. All rights reserved.
Kristofer Jonsson641c0912020-08-31 11:34:14 +02003 *
4 * SPDX-License-Identifier: Apache-2.0
5 *
6 * Licensed under the Apache License, Version 2.0 (the License); you may
7 * not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
14 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19#include "tensorflow/lite/micro/all_ops_resolver.h"
Måns Nilsson231e1d92020-11-05 12:19:34 +010020#include "tensorflow/lite/micro/cortex_m_generic/debug_log_callback.h"
Kristofer Jonsson641c0912020-08-31 11:34:14 +020021#include "tensorflow/lite/micro/micro_error_reporter.h"
22#include "tensorflow/lite/micro/micro_interpreter.h"
Bhavik Patelffe845d2020-11-16 12:13:56 +010023#include "tensorflow/lite/micro/micro_profiler.h"
Kristofer Jonsson641c0912020-08-31 11:34:14 +020024#include "tensorflow/lite/schema/schema_generated.h"
Kristofer Jonsson641c0912020-08-31 11:34:14 +020025
Jens Elofsson955288a2021-04-22 20:57:15 +020026#include "arm_profiler.hpp"
Kristofer Jonsson3bd34232021-08-30 13:55:55 +020027#ifdef LAYER_BY_LAYER_PROFILER
Jens Elofsson701a63b2021-05-23 17:37:07 +020028#include "layer_by_layer_profiler.hpp"
Jens Elofsson955288a2021-04-22 20:57:15 +020029#endif
Anton Moberg07cf70b2021-07-07 11:08:17 +020030#include "ethosu_log.h"
Jens Elofsson955288a2021-04-22 20:57:15 +020031
Kristofer Jonsson641c0912020-08-31 11:34:14 +020032#include "inference_process.hpp"
33
Per Åstrandd9afc082020-10-06 13:25:08 +020034#include "cmsis_compiler.h"
35
Per Åstrand91a91732020-09-25 15:04:26 +020036#include <inttypes.h>
37
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +020038using namespace std;
39
Kristofer Jonsson641c0912020-08-31 11:34:14 +020040namespace {
Måns Nilsson231e1d92020-11-05 12:19:34 +010041
42void tflu_debug_log(const char *s) {
Anton Moberge39d3892021-08-17 11:45:18 +020043 LOG("%s", s);
Måns Nilsson231e1d92020-11-05 12:19:34 +010044}
45
Henrik Hoglindae4d8302021-12-08 15:06:02 +010046class Crc {
47public:
48 constexpr Crc() : table() {
49 uint32_t poly = 0xedb88320;
50
51 for (uint32_t i = 0; i < 256; i++) {
52 uint32_t crc = i;
53
54 for (int j = 0; j < 8; j++) {
55 if (crc & 1) {
56 crc = poly ^ (crc >> 1);
57 } else {
58 crc >>= 1;
59 }
60 }
61
62 table[i] = crc;
63 }
64 }
65
66 uint32_t crc32(const void *data, const size_t length, uint32_t init = 0) const {
67 uint32_t crc = init ^ 0xffffffff;
68
69 const uint8_t *v = static_cast<const uint8_t *>(data);
70
71 for (size_t i = 0; i < length; i++) {
72 crc = table[(crc ^ v[i]) & 0xff] ^ (crc >> 8);
73 }
74
75 return crc ^ 0xffffffff;
76 }
77
78private:
79 uint32_t table[256];
80};
81
Kristofer Jonsson641c0912020-08-31 11:34:14 +020082void print_output_data(TfLiteTensor *output, size_t bytesToPrint) {
Jonny Svärd4c11a482021-12-17 17:04:08 +010083 constexpr auto crc = Crc();
Henrik Hoglindae4d8302021-12-08 15:06:02 +010084 const uint32_t output_crc32 = crc.crc32(output->data.data, output->bytes);
Jonny Svärd4c11a482021-12-17 17:04:08 +010085 const int numBytesToPrint = min(output->bytes, bytesToPrint);
86 int dims_size = output->dims->size;
Anton Moberg07cf70b2021-07-07 11:08:17 +020087 LOG("{\n");
88 LOG("\"dims\": [%d,", dims_size);
Kristofer Jonsson641c0912020-08-31 11:34:14 +020089 for (int i = 0; i < output->dims->size - 1; ++i) {
Anton Moberg07cf70b2021-07-07 11:08:17 +020090 LOG("%d,", output->dims->data[i]);
Kristofer Jonsson641c0912020-08-31 11:34:14 +020091 }
Anton Moberg07cf70b2021-07-07 11:08:17 +020092 LOG("%d],\n", output->dims->data[dims_size - 1]);
93 LOG("\"data_address\": \"%08" PRIx32 "\",\n", (uint32_t)output->data.data);
Jonny Svärd4c11a482021-12-17 17:04:08 +010094 if (numBytesToPrint) {
Henrik Hoglindae4d8302021-12-08 15:06:02 +010095 LOG("\"crc32\": \"%08" PRIx32 "\",\n", output_crc32);
96 LOG("\"data\":\"");
97 for (int i = 0; i < numBytesToPrint - 1; ++i) {
98 /*
Jonny Svärd4c11a482021-12-17 17:04:08 +010099 * Workaround an issue when compiling with GCC where by
100 * printing only a '\n' the produced global output is wrong.
101 */
Henrik Hoglindae4d8302021-12-08 15:06:02 +0100102 if (i % 15 == 0 && i != 0) {
103 LOG("0x%02x,\n", output->data.uint8[i]);
104 } else {
105 LOG("0x%02x,", output->data.uint8[i]);
106 }
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200107 }
Henrik Hoglindae4d8302021-12-08 15:06:02 +0100108 LOG("0x%02x\"\n", output->data.uint8[numBytesToPrint - 1]);
Jonny Svärd4c11a482021-12-17 17:04:08 +0100109 } else {
Henrik Hoglindae4d8302021-12-08 15:06:02 +0100110 LOG("\"crc32\": \"%08" PRIx32 "\"\n", output_crc32);
111 }
Anton Moberg07cf70b2021-07-07 11:08:17 +0200112 LOG("}");
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200113}
114
115bool copyOutput(const TfLiteTensor &src, InferenceProcess::DataPtr &dst) {
116 if (dst.data == nullptr) {
117 return false;
118 }
119
120 if (src.bytes > dst.size) {
Kristofer Jonssoneb912392021-11-12 12:51:27 +0100121 LOG_ERR("Tensor size mismatch (bytes): actual=%d, expected%d.", src.bytes, dst.size);
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200122 return true;
123 }
124
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200125 copy(src.data.uint8, src.data.uint8 + src.bytes, static_cast<uint8_t *>(dst.data));
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200126 dst.size = src.bytes;
127
128 return false;
129}
130
131} // namespace
132
133namespace InferenceProcess {
Per Åstrandbbd9c8f2020-09-25 15:07:35 +0200134DataPtr::DataPtr(void *_data, size_t _size) : data(_data), size(_size) {}
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200135
Kristofer Jonsson34e24962020-11-23 16:22:10 +0100136void DataPtr::invalidate() {
137#if defined(__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
Kristofer Jonsson34e24962020-11-23 16:22:10 +0100138 SCB_InvalidateDCache_by_Addr(reinterpret_cast<uint32_t *>(data), size);
139#endif
140}
141
142void DataPtr::clean() {
143#if defined(__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
Kristofer Jonsson34e24962020-11-23 16:22:10 +0100144 SCB_CleanDCache_by_Addr(reinterpret_cast<uint32_t *>(data), size);
145#endif
146}
147
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200148InferenceJob::InferenceJob() : numBytesToPrint(0) {}
149
Per Åstrandbbd9c8f2020-09-25 15:07:35 +0200150InferenceJob::InferenceJob(const string &_name,
151 const DataPtr &_networkModel,
152 const vector<DataPtr> &_input,
153 const vector<DataPtr> &_output,
154 const vector<DataPtr> &_expectedOutput,
Bhavik Patelffe845d2020-11-16 12:13:56 +0100155 size_t _numBytesToPrint,
156 const vector<uint8_t> &_pmuEventConfig,
Jonny Svärd4c11a482021-12-17 17:04:08 +0100157 const bool _pmuCycleCounterEnable) :
Per Åstrandbbd9c8f2020-09-25 15:07:35 +0200158 name(_name),
159 networkModel(_networkModel), input(_input), output(_output), expectedOutput(_expectedOutput),
Bhavik Patel97906eb2020-12-17 15:32:16 +0100160 numBytesToPrint(_numBytesToPrint), pmuEventConfig(_pmuEventConfig), pmuCycleCounterEnable(_pmuCycleCounterEnable),
Jens Elofssonde044c32021-05-06 16:21:29 +0200161 pmuEventCount(), pmuCycleCounterCount(0) {}
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200162
Kristofer Jonsson34e24962020-11-23 16:22:10 +0100163void InferenceJob::invalidate() {
164 networkModel.invalidate();
165
166 for (auto &it : input) {
167 it.invalidate();
168 }
169
170 for (auto &it : output) {
171 it.invalidate();
172 }
173
174 for (auto &it : expectedOutput) {
175 it.invalidate();
176 }
177}
178
179void InferenceJob::clean() {
180 networkModel.clean();
181
182 for (auto &it : input) {
183 it.clean();
184 }
185
186 for (auto &it : output) {
187 it.clean();
188 }
189
190 for (auto &it : expectedOutput) {
191 it.clean();
192 }
193}
194
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200195// NOTE: Adding code for get_lock & free_lock with some corrections from
196// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dai0321a/BIHEJCHB.html
197// TODO: check correctness?
198void InferenceProcess::getLock() {
199 int status = 0;
200
201 do {
202 // Wait until lock_var is free
203 while (__LDREXW(&lock) != 0)
204 ;
205
206 // Try to set lock_var
207 status = __STREXW(1, &lock);
208 } while (status != 0);
209
210 // Do not start any other memory access until memory barrier is completed
211 __DMB();
212}
213
214// TODO: check correctness?
215void InferenceProcess::freeLock() {
216 // Ensure memory operations completed before releasing lock
217 __DMB();
218
219 lock = 0;
220}
221
222bool InferenceProcess::push(const InferenceJob &job) {
223 getLock();
224 inferenceJobQueue.push(job);
225 freeLock();
226
227 return true;
228}
229
230bool InferenceProcess::runJob(InferenceJob &job) {
Kristofer Jonssoneb912392021-11-12 12:51:27 +0100231 LOG_INFO("Running inference job: %s", job.name.c_str());
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200232
Bhavik Patelffe845d2020-11-16 12:13:56 +0100233 // Register debug log callback for profiling
234 RegisterDebugLogCallback(tflu_debug_log);
235
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200236 tflite::MicroErrorReporter microErrorReporter;
237 tflite::ErrorReporter *reporter = &microErrorReporter;
238
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200239 // Get model handle and verify that the version is correct
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200240 const tflite::Model *model = ::tflite::GetModel(job.networkModel.data);
241 if (model->version() != TFLITE_SCHEMA_VERSION) {
Kristofer Jonssoneb912392021-11-12 12:51:27 +0100242 LOG_ERR("Model schema version unsupported: version=%" PRIu32 ", supported=%d.",
Anton Moberg07cf70b2021-07-07 11:08:17 +0200243 model->version(),
244 TFLITE_SCHEMA_VERSION);
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200245 return true;
246 }
247
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200248 // Create the TFL micro interpreter
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200249 tflite::AllOpsResolver resolver;
Kristofer Jonsson3bd34232021-08-30 13:55:55 +0200250#ifdef LAYER_BY_LAYER_PROFILER
Jonny Svärd4c11a482021-12-17 17:04:08 +0100251 tflite::LayerByLayerProfiler profiler(job.pmuEventConfig, job.pmuCycleCounterEnable);
Jens Elofsson955288a2021-04-22 20:57:15 +0200252#else
253 tflite::ArmProfiler profiler;
Bhavik Patelffe845d2020-11-16 12:13:56 +0100254#endif
Jens Elofssonde044c32021-05-06 16:21:29 +0200255
Jens Elofsson97dde7e2021-09-08 16:20:08 +0200256 tflite::MicroInterpreter interpreter(model, resolver, tensorArena, tensorArenaSize, reporter, nullptr, &profiler);
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200257
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200258 // Allocate tensors
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200259 TfLiteStatus allocate_status = interpreter.AllocateTensors();
260 if (allocate_status != kTfLiteOk) {
Kristofer Jonssoneb912392021-11-12 12:51:27 +0100261 LOG_ERR("Failed to allocate tensors for inference: job=%s", job.name.c_str());
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200262 return true;
263 }
264
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200265 // Create a filtered list of non empty input tensors
266 vector<TfLiteTensor *> inputTensors;
267 for (size_t i = 0; i < interpreter.inputs_size(); ++i) {
268 TfLiteTensor *tensor = interpreter.input(i);
269
270 if (tensor->bytes > 0) {
271 inputTensors.push_back(tensor);
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200272 }
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200273 }
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200274 if (job.input.size() != inputTensors.size()) {
Kristofer Jonssoneb912392021-11-12 12:51:27 +0100275 LOG_ERR("Number of input buffers does not match number of non empty network tensors: input=%zu, network=%zu",
Anton Moberg07cf70b2021-07-07 11:08:17 +0200276 job.input.size(),
277 inputTensors.size());
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200278 return true;
279 }
280
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200281 // Copy input data
282 for (size_t i = 0; i < inputTensors.size(); ++i) {
283 const DataPtr &input = job.input[i];
284 const TfLiteTensor *tensor = inputTensors[i];
285
286 if (input.size != tensor->bytes) {
Kristofer Jonssoneb912392021-11-12 12:51:27 +0100287 LOG_ERR("Job input size does not match network input size: job=%s, index=%zu, input=%zu, network=%u",
Anton Moberg07cf70b2021-07-07 11:08:17 +0200288 job.name.c_str(),
289 i,
290 input.size,
291 tensor->bytes);
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200292 return true;
293 }
294
295 copy(static_cast<char *>(input.data), static_cast<char *>(input.data) + input.size, tensor->data.uint8);
296 }
297
298 // Run the inference
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200299 TfLiteStatus invoke_status = interpreter.Invoke();
300 if (invoke_status != kTfLiteOk) {
Kristofer Jonssoneb912392021-11-12 12:51:27 +0100301 LOG_ERR("Invoke failed for inference: job=%s", job.name.c_str());
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200302 return true;
303 }
304
Anton Moberg07cf70b2021-07-07 11:08:17 +0200305 LOG("arena_used_bytes : %zu\n", interpreter.arena_used_bytes());
Bhavik Patelffe845d2020-11-16 12:13:56 +0100306
Anton Moberg07cf70b2021-07-07 11:08:17 +0200307 LOG("Inference runtime: %u cycles\n", (unsigned int)profiler.GetTotalTicks());
Bhavik Patelffe845d2020-11-16 12:13:56 +0100308
Jonny Svärd4c11a482021-12-17 17:04:08 +0100309#ifdef LAYER_BY_LAYER_PROFILER
310 if (job.pmuCycleCounterEnable) {
311 job.pmuCycleCounterCount = profiler.GetPmuCycleCounterCount();
Bhavik Patelffe845d2020-11-16 12:13:56 +0100312 }
Jonny Svärd4c11a482021-12-17 17:04:08 +0100313 job.pmuEventCount.assign(profiler.GetPmuEventCount().begin(), profiler.GetPmuEventCount().end());
314#endif
Bhavik Patelffe845d2020-11-16 12:13:56 +0100315
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200316 // Copy output data
317 if (job.output.size() > 0) {
318 if (interpreter.outputs_size() != job.output.size()) {
Kristofer Jonssoneb912392021-11-12 12:51:27 +0100319 LOG_ERR("Output size mismatch: job=%zu, network=%u", job.output.size(), interpreter.outputs_size());
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200320 return true;
321 }
322
323 for (unsigned i = 0; i < interpreter.outputs_size(); ++i) {
324 if (copyOutput(*interpreter.output(i), job.output[i])) {
325 return true;
326 }
327 }
328 }
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200329
Henrik Hoglindae4d8302021-12-08 15:06:02 +0100330 // Print all of the output data, or the first NUM_BYTES_TO_PRINT bytes,
331 // whichever comes first as well as the output shape.
332 LOG("num_of_outputs: %d\n", interpreter.outputs_size());
333 LOG("output_begin\n");
334 LOG("[\n");
335 for (unsigned int i = 0; i < interpreter.outputs_size(); i++) {
336 TfLiteTensor *output = interpreter.output(i);
337 print_output_data(output, job.numBytesToPrint);
338 if (i != interpreter.outputs_size() - 1) {
339 LOG(",\n");
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200340 }
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200341 }
Henrik Hoglindae4d8302021-12-08 15:06:02 +0100342 LOG("]\n");
343 LOG("output_end\n");
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200344
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200345 if (job.expectedOutput.size() > 0) {
346 if (job.expectedOutput.size() != interpreter.outputs_size()) {
Kristofer Jonssoneb912392021-11-12 12:51:27 +0100347 LOG_ERR("Expected number of output tensors mismatch: job=%s, expected=%zu, network=%zu",
Anton Moberg07cf70b2021-07-07 11:08:17 +0200348 job.name.c_str(),
349 job.expectedOutput.size(),
350 interpreter.outputs_size());
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200351 return true;
352 }
353
354 for (unsigned int i = 0; i < interpreter.outputs_size(); i++) {
355 const DataPtr &expected = job.expectedOutput[i];
356 const TfLiteTensor *output = interpreter.output(i);
357
358 if (expected.size != output->bytes) {
Kristofer Jonssoneb912392021-11-12 12:51:27 +0100359 LOG_ERR("Expected output tensor size mismatch: job=%s, index=%u, expected=%zu, network=%zu",
Anton Moberg07cf70b2021-07-07 11:08:17 +0200360 job.name.c_str(),
361 i,
362 expected.size,
363 output->bytes);
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200364 return true;
365 }
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200366
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200367 for (unsigned int j = 0; j < output->bytes; ++j) {
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200368 if (output->data.uint8[j] != static_cast<uint8_t *>(expected.data)[j]) {
Anton Moberg07cf70b2021-07-07 11:08:17 +0200369 LOG_ERR("Expected output tensor data mismatch: job=%s, index=%u, offset=%u, "
370 "expected=%02x, network=%02x\n",
371 job.name.c_str(),
372 i,
373 j,
374 static_cast<uint8_t *>(expected.data)[j],
375 output->data.uint8[j]);
Per Åstrand90455452021-02-25 11:10:08 +0100376 return true;
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200377 }
378 }
379 }
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200380 }
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200381
Kristofer Jonssoneb912392021-11-12 12:51:27 +0100382 LOG_INFO("Finished running job: %s", job.name.c_str());
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200383
384 return false;
Anton Moberg07cf70b2021-07-07 11:08:17 +0200385} // namespace InferenceProcess
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200386
387bool InferenceProcess::run(bool exitOnEmpty) {
388 bool anyJobFailed = false;
389
390 while (true) {
391 getLock();
392 bool empty = inferenceJobQueue.empty();
393 freeLock();
394
395 if (empty) {
396 if (exitOnEmpty) {
Kristofer Jonssoneb912392021-11-12 12:51:27 +0100397 LOG_INFO("Exit from InferenceProcess::run() due to empty job queue");
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200398 break;
399 }
400
401 continue;
402 }
403
404 getLock();
405 InferenceJob job = inferenceJobQueue.front();
406 inferenceJobQueue.pop();
407 freeLock();
408
409 if (runJob(job)) {
410 anyJobFailed = true;
411 continue;
412 }
413 }
414
415 return anyJobFailed;
416}
417
418} // namespace InferenceProcess