blob: ebd9d6c1a8796ee44102b3c968b84c05f8094f13 [file] [log] [blame]
Kristofer Jonsson641c0912020-08-31 11:34:14 +02001/*
Per Åstrand90455452021-02-25 11:10:08 +01002 * Copyright (c) 2019-2021 Arm Limited. All rights reserved.
Kristofer Jonsson641c0912020-08-31 11:34:14 +02003 *
4 * SPDX-License-Identifier: Apache-2.0
5 *
6 * Licensed under the Apache License, Version 2.0 (the License); you may
7 * not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
14 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19#include "tensorflow/lite/micro/all_ops_resolver.h"
Måns Nilsson231e1d92020-11-05 12:19:34 +010020#include "tensorflow/lite/micro/cortex_m_generic/debug_log_callback.h"
Kristofer Jonsson641c0912020-08-31 11:34:14 +020021#include "tensorflow/lite/micro/micro_error_reporter.h"
22#include "tensorflow/lite/micro/micro_interpreter.h"
Bhavik Patelffe845d2020-11-16 12:13:56 +010023#include "tensorflow/lite/micro/micro_profiler.h"
Kristofer Jonsson641c0912020-08-31 11:34:14 +020024#include "tensorflow/lite/schema/schema_generated.h"
Kristofer Jonsson641c0912020-08-31 11:34:14 +020025
Jens Elofsson955288a2021-04-22 20:57:15 +020026#include "arm_profiler.hpp"
Kristofer Jonsson3bd34232021-08-30 13:55:55 +020027#ifdef LAYER_BY_LAYER_PROFILER
Jens Elofsson701a63b2021-05-23 17:37:07 +020028#include "layer_by_layer_profiler.hpp"
Jens Elofsson955288a2021-04-22 20:57:15 +020029#endif
Anton Moberg07cf70b2021-07-07 11:08:17 +020030#include "ethosu_log.h"
Jens Elofsson955288a2021-04-22 20:57:15 +020031
Kristofer Jonsson641c0912020-08-31 11:34:14 +020032#include "inference_process.hpp"
33
Per Åstrandd9afc082020-10-06 13:25:08 +020034#include "cmsis_compiler.h"
35
Per Åstrand91a91732020-09-25 15:04:26 +020036#include <inttypes.h>
37
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +020038using namespace std;
39
Kristofer Jonsson641c0912020-08-31 11:34:14 +020040namespace {
Måns Nilsson231e1d92020-11-05 12:19:34 +010041
Henrik Hoglindae4d8302021-12-08 15:06:02 +010042class Crc {
43public:
44 constexpr Crc() : table() {
45 uint32_t poly = 0xedb88320;
46
47 for (uint32_t i = 0; i < 256; i++) {
48 uint32_t crc = i;
49
50 for (int j = 0; j < 8; j++) {
51 if (crc & 1) {
52 crc = poly ^ (crc >> 1);
53 } else {
54 crc >>= 1;
55 }
56 }
57
58 table[i] = crc;
59 }
60 }
61
62 uint32_t crc32(const void *data, const size_t length, uint32_t init = 0) const {
63 uint32_t crc = init ^ 0xffffffff;
64
65 const uint8_t *v = static_cast<const uint8_t *>(data);
66
67 for (size_t i = 0; i < length; i++) {
68 crc = table[(crc ^ v[i]) & 0xff] ^ (crc >> 8);
69 }
70
71 return crc ^ 0xffffffff;
72 }
73
74private:
75 uint32_t table[256];
76};
77
Kristofer Jonsson641c0912020-08-31 11:34:14 +020078} // namespace
79
80namespace InferenceProcess {
Per Åstrandbbd9c8f2020-09-25 15:07:35 +020081DataPtr::DataPtr(void *_data, size_t _size) : data(_data), size(_size) {}
Kristofer Jonsson641c0912020-08-31 11:34:14 +020082
Kristofer Jonsson34e24962020-11-23 16:22:10 +010083void DataPtr::invalidate() {
84#if defined(__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
Kristofer Jonsson34e24962020-11-23 16:22:10 +010085 SCB_InvalidateDCache_by_Addr(reinterpret_cast<uint32_t *>(data), size);
86#endif
87}
88
89void DataPtr::clean() {
90#if defined(__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
Kristofer Jonsson34e24962020-11-23 16:22:10 +010091 SCB_CleanDCache_by_Addr(reinterpret_cast<uint32_t *>(data), size);
92#endif
93}
94
Kristofer Jonssondcc1ce02021-12-21 16:25:19 +010095char *DataPtr::begin() const {
96 return static_cast<char *>(data);
97}
98
99char *DataPtr::end() const {
100 return static_cast<char *>(data) + size;
101}
102
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200103InferenceJob::InferenceJob() : numBytesToPrint(0) {}
104
Per Åstrandbbd9c8f2020-09-25 15:07:35 +0200105InferenceJob::InferenceJob(const string &_name,
106 const DataPtr &_networkModel,
107 const vector<DataPtr> &_input,
108 const vector<DataPtr> &_output,
109 const vector<DataPtr> &_expectedOutput,
Bhavik Patelffe845d2020-11-16 12:13:56 +0100110 size_t _numBytesToPrint,
111 const vector<uint8_t> &_pmuEventConfig,
Jonny Svärd4c11a482021-12-17 17:04:08 +0100112 const bool _pmuCycleCounterEnable) :
Per Åstrandbbd9c8f2020-09-25 15:07:35 +0200113 name(_name),
114 networkModel(_networkModel), input(_input), output(_output), expectedOutput(_expectedOutput),
Bhavik Patel97906eb2020-12-17 15:32:16 +0100115 numBytesToPrint(_numBytesToPrint), pmuEventConfig(_pmuEventConfig), pmuCycleCounterEnable(_pmuCycleCounterEnable),
Jens Elofssonde044c32021-05-06 16:21:29 +0200116 pmuEventCount(), pmuCycleCounterCount(0) {}
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200117
Kristofer Jonsson34e24962020-11-23 16:22:10 +0100118void InferenceJob::invalidate() {
119 networkModel.invalidate();
120
121 for (auto &it : input) {
122 it.invalidate();
123 }
124
125 for (auto &it : output) {
126 it.invalidate();
127 }
128
129 for (auto &it : expectedOutput) {
130 it.invalidate();
131 }
132}
133
134void InferenceJob::clean() {
135 networkModel.clean();
136
137 for (auto &it : input) {
138 it.clean();
139 }
140
141 for (auto &it : output) {
142 it.clean();
143 }
144
145 for (auto &it : expectedOutput) {
146 it.clean();
147 }
148}
149
Kristofer Jonsson40d886e2021-12-15 11:16:26 +0100150InferenceProcess::InferenceProcess(uint8_t *_tensorArena, size_t _tensorArenaSize) :
151 tensorArena(_tensorArena), tensorArenaSize(_tensorArenaSize) {}
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200152
153bool InferenceProcess::runJob(InferenceJob &job) {
Kristofer Jonssoneb912392021-11-12 12:51:27 +0100154 LOG_INFO("Running inference job: %s", job.name.c_str());
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200155
Bhavik Patelffe845d2020-11-16 12:13:56 +0100156 // Register debug log callback for profiling
Kristofer Jonssondcc1ce02021-12-21 16:25:19 +0100157 RegisterDebugLogCallback(tfluDebugLog);
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200158
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200159 // Get model handle and verify that the version is correct
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200160 const tflite::Model *model = ::tflite::GetModel(job.networkModel.data);
161 if (model->version() != TFLITE_SCHEMA_VERSION) {
Kristofer Jonssoneb912392021-11-12 12:51:27 +0100162 LOG_ERR("Model schema version unsupported: version=%" PRIu32 ", supported=%d.",
Anton Moberg07cf70b2021-07-07 11:08:17 +0200163 model->version(),
164 TFLITE_SCHEMA_VERSION);
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200165 return true;
166 }
167
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200168 // Create the TFL micro interpreter
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200169 tflite::AllOpsResolver resolver;
Kristofer Jonsson3bd34232021-08-30 13:55:55 +0200170#ifdef LAYER_BY_LAYER_PROFILER
Jonny Svärd4c11a482021-12-17 17:04:08 +0100171 tflite::LayerByLayerProfiler profiler(job.pmuEventConfig, job.pmuCycleCounterEnable);
Jens Elofsson955288a2021-04-22 20:57:15 +0200172#else
173 tflite::ArmProfiler profiler;
Bhavik Patelffe845d2020-11-16 12:13:56 +0100174#endif
Jens Elofssonde044c32021-05-06 16:21:29 +0200175
Kristofer Jonssondcc1ce02021-12-21 16:25:19 +0100176 tflite::MicroErrorReporter errorReporter;
177 tflite::MicroInterpreter interpreter(
178 model, resolver, tensorArena, tensorArenaSize, &errorReporter, nullptr, &profiler);
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200179
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200180 // Allocate tensors
Kristofer Jonssondcc1ce02021-12-21 16:25:19 +0100181 TfLiteStatus status = interpreter.AllocateTensors();
182 if (status != kTfLiteOk) {
Kristofer Jonssoneb912392021-11-12 12:51:27 +0100183 LOG_ERR("Failed to allocate tensors for inference: job=%s", job.name.c_str());
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200184 return true;
185 }
186
Kristofer Jonssondcc1ce02021-12-21 16:25:19 +0100187 // Copy IFM data from job descriptor to TFLu arena
188 if (copyIfm(job, interpreter)) {
189 return true;
190 }
191
192 // Run the inference
193 status = interpreter.Invoke();
194 if (status != kTfLiteOk) {
195 LOG_ERR("Invoke failed for inference: job=%s", job.name.c_str());
196 return true;
197 }
198
199#ifdef LAYER_BY_LAYER_PROFILER
200 if (job.pmuCycleCounterEnable) {
201 job.pmuCycleCounterCount = profiler.GetPmuCycleCounterCount();
202 }
203
204 job.pmuEventCount.assign(profiler.GetPmuEventCount().begin(), profiler.GetPmuEventCount().end());
205#endif
206
207 LOG("Inference runtime: %" PRId32 " cycles\n", profiler.GetTotalTicks());
208
209 // Copy output data from TFLu arena to job descriptor
210 if (copyOfm(job, interpreter)) {
211 return true;
212 }
213
214 printJob(job, interpreter);
215
216 // Compare the OFM with the expected reference data
217 if (compareOfm(job, interpreter)) {
218 return true;
219 }
220
221 LOG_INFO("Finished running job: %s", job.name.c_str());
222
223 return false;
224}
225
226bool InferenceProcess::copyIfm(InferenceJob &job, tflite::MicroInterpreter &interpreter) {
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200227 // Create a filtered list of non empty input tensors
228 vector<TfLiteTensor *> inputTensors;
229 for (size_t i = 0; i < interpreter.inputs_size(); ++i) {
230 TfLiteTensor *tensor = interpreter.input(i);
231
232 if (tensor->bytes > 0) {
233 inputTensors.push_back(tensor);
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200234 }
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200235 }
Kristofer Jonssondcc1ce02021-12-21 16:25:19 +0100236
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200237 if (job.input.size() != inputTensors.size()) {
Kristofer Jonssoneb912392021-11-12 12:51:27 +0100238 LOG_ERR("Number of input buffers does not match number of non empty network tensors: input=%zu, network=%zu",
Anton Moberg07cf70b2021-07-07 11:08:17 +0200239 job.input.size(),
240 inputTensors.size());
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200241 return true;
242 }
243
Kristofer Jonssondcc1ce02021-12-21 16:25:19 +0100244 // Copy input data from job to TFLu arena
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200245 for (size_t i = 0; i < inputTensors.size(); ++i) {
Kristofer Jonssondcc1ce02021-12-21 16:25:19 +0100246 DataPtr &input = job.input[i];
247 TfLiteTensor *tensor = inputTensors[i];
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200248
249 if (input.size != tensor->bytes) {
Kristofer Jonssoneb912392021-11-12 12:51:27 +0100250 LOG_ERR("Job input size does not match network input size: job=%s, index=%zu, input=%zu, network=%u",
Anton Moberg07cf70b2021-07-07 11:08:17 +0200251 job.name.c_str(),
252 i,
253 input.size,
254 tensor->bytes);
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200255 return true;
256 }
257
Kristofer Jonssondcc1ce02021-12-21 16:25:19 +0100258 copy(input.begin(), input.end(), tensor->data.uint8);
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200259 }
260
Kristofer Jonssondcc1ce02021-12-21 16:25:19 +0100261 return false;
262}
263
264bool InferenceProcess::copyOfm(InferenceJob &job, tflite::MicroInterpreter &interpreter) {
265 // Skip copy if output is empty
266 if (job.output.empty()) {
267 return false;
268 }
269
270 if (interpreter.outputs_size() != job.output.size()) {
271 LOG_ERR("Output size mismatch: job=%zu, network=%u", job.output.size(), interpreter.outputs_size());
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200272 return true;
273 }
274
Kristofer Jonssondcc1ce02021-12-21 16:25:19 +0100275 for (unsigned i = 0; i < interpreter.outputs_size(); ++i) {
276 DataPtr &output = job.output[i];
277 TfLiteTensor *tensor = interpreter.output(i);
Bhavik Patelffe845d2020-11-16 12:13:56 +0100278
Kristofer Jonssondcc1ce02021-12-21 16:25:19 +0100279 if (tensor->bytes > output.size) {
280 LOG_ERR("Tensor size mismatch: tensor=%d, expected=%d", tensor->bytes, output.size);
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200281 return true;
282 }
283
Kristofer Jonssondcc1ce02021-12-21 16:25:19 +0100284 copy(tensor->data.uint8, tensor->data.uint8 + tensor->bytes, output.begin());
285 }
286
287 return false;
288}
289
290bool InferenceProcess::compareOfm(InferenceJob &job, tflite::MicroInterpreter &interpreter) {
291 // Skip verification if expected output is empty
292 if (job.expectedOutput.empty()) {
293 return false;
294 }
295
296 if (job.expectedOutput.size() != interpreter.outputs_size()) {
297 LOG_ERR("Expected number of output tensors mismatch: job=%s, expected=%zu, network=%zu",
298 job.name.c_str(),
299 job.expectedOutput.size(),
300 interpreter.outputs_size());
301 return true;
302 }
303
304 for (unsigned int i = 0; i < interpreter.outputs_size(); i++) {
305 const DataPtr &expected = job.expectedOutput[i];
306 const TfLiteTensor *output = interpreter.output(i);
307
308 if (expected.size != output->bytes) {
309 LOG_ERR("Expected output tensor size mismatch: job=%s, index=%u, expected=%zu, network=%zu",
310 job.name.c_str(),
311 i,
312 expected.size,
313 output->bytes);
314 return true;
315 }
316
317 const char *exp = expected.begin();
318 for (unsigned int j = 0; j < output->bytes; ++j) {
319 if (output->data.uint8[j] != exp[j]) {
320 LOG_ERR("Expected output tensor data mismatch: job=%s, index=%u, offset=%u, "
321 "expected=%02x, network=%02x\n",
322 job.name.c_str(),
323 i,
324 j,
325 exp[j],
326 output->data.uint8[j]);
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200327 return true;
328 }
329 }
330 }
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200331
Kristofer Jonssondcc1ce02021-12-21 16:25:19 +0100332 return false;
333}
334
335void InferenceProcess::printJob(InferenceJob &job, tflite::MicroInterpreter &interpreter) {
336 for (size_t i = 0; i < job.pmuEventCount.size(); i++) {
337 LOG("ethosu_pmu_cntr%zu : %" PRIu32 "\n", i, job.pmuEventCount[i]);
338 }
339
340 LOG("arena_used_bytes : %zu\n", interpreter.arena_used_bytes());
341
Henrik Hoglindae4d8302021-12-08 15:06:02 +0100342 // Print all of the output data, or the first NUM_BYTES_TO_PRINT bytes,
343 // whichever comes first as well as the output shape.
344 LOG("num_of_outputs: %d\n", interpreter.outputs_size());
345 LOG("output_begin\n");
346 LOG("[\n");
Kristofer Jonssondcc1ce02021-12-21 16:25:19 +0100347
Henrik Hoglindae4d8302021-12-08 15:06:02 +0100348 for (unsigned int i = 0; i < interpreter.outputs_size(); i++) {
Kristofer Jonssondcc1ce02021-12-21 16:25:19 +0100349 printOutputTensor(interpreter.output(i), job.numBytesToPrint);
350
Henrik Hoglindae4d8302021-12-08 15:06:02 +0100351 if (i != interpreter.outputs_size() - 1) {
352 LOG(",\n");
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200353 }
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200354 }
Kristofer Jonssondcc1ce02021-12-21 16:25:19 +0100355
Henrik Hoglindae4d8302021-12-08 15:06:02 +0100356 LOG("]\n");
357 LOG("output_end\n");
Kristofer Jonssondcc1ce02021-12-21 16:25:19 +0100358}
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200359
Kristofer Jonssondcc1ce02021-12-21 16:25:19 +0100360void InferenceProcess::printOutputTensor(TfLiteTensor *output, size_t bytesToPrint) {
361 constexpr auto crc = Crc();
362 const uint32_t crc32 = crc.crc32(output->data.data, output->bytes);
363 const int numBytesToPrint = min(output->bytes, bytesToPrint);
364 int dims_size = output->dims->size;
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200365
Kristofer Jonssondcc1ce02021-12-21 16:25:19 +0100366 LOG("{\n");
367 LOG("\"dims\": [%d,", dims_size);
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200368
Kristofer Jonssondcc1ce02021-12-21 16:25:19 +0100369 for (int i = 0; i < output->dims->size - 1; ++i) {
370 LOG("%d,", output->dims->data[i]);
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200371 }
Kristofer Jonsson72fa50b2020-09-10 13:26:41 +0200372
Kristofer Jonssondcc1ce02021-12-21 16:25:19 +0100373 LOG("%d],\n", output->dims->data[dims_size - 1]);
374 LOG("\"data_address\": \"%08" PRIx32 "\",\n", (uint32_t)output->data.data);
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200375
Kristofer Jonssondcc1ce02021-12-21 16:25:19 +0100376 if (numBytesToPrint) {
377 LOG("\"crc32\": \"%08" PRIx32 "\",\n", crc32);
378 LOG("\"data\":\"");
379
380 for (int i = 0; i < numBytesToPrint - 1; ++i) {
381 /*
382 * Workaround an issue when compiling with GCC where by
383 * printing only a '\n' the produced global output is wrong.
384 */
385 if (i % 15 == 0 && i != 0) {
386 LOG("0x%02x,\n", output->data.uint8[i]);
387 } else {
388 LOG("0x%02x,", output->data.uint8[i]);
389 }
390 }
391
392 LOG("0x%02x\"\n", output->data.uint8[numBytesToPrint - 1]);
393 } else {
394 LOG("\"crc32\": \"%08" PRIx32 "\"\n", crc32);
395 }
396
397 LOG("}");
398}
399
400void InferenceProcess::tfluDebugLog(const char *s) {
401 LOG("%s", s);
Kristofer Jonsson641c0912020-08-31 11:34:14 +0200402}
403
404} // namespace InferenceProcess