blob: 448e29b2813343d175d2326a2a3b4c8fdf886702 [file] [log] [blame]
Kristofer Jonsson641c0912020-08-31 11:34:14 +02001/*
2 * Copyright (c) 2019-2020 Arm Limited. All rights reserved.
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 *
6 * Licensed under the Apache License, Version 2.0 (the License); you may
7 * not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
14 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19#include "tensorflow/lite/micro/all_ops_resolver.h"
20#include "tensorflow/lite/micro/micro_error_reporter.h"
21#include "tensorflow/lite/micro/micro_interpreter.h"
22#include "tensorflow/lite/schema/schema_generated.h"
23#include "tensorflow/lite/version.h"
24
25#include "inference_process.hpp"
26
27#ifndef TENSOR_ARENA_SIZE
28#define TENSOR_ARENA_SIZE (1024)
29#endif
30
31__attribute__((section(".bss.NoInit"), aligned(16))) uint8_t inferenceProcessTensorArena[TENSOR_ARENA_SIZE];
32
33namespace {
34void print_output_data(TfLiteTensor *output, size_t bytesToPrint) {
35 const int numBytesToPrint = std::min(output->bytes, bytesToPrint);
36
37 int dims_size = output->dims->size;
38 printf("{\n");
39 printf("\"dims\": [%d,", dims_size);
40 for (int i = 0; i < output->dims->size - 1; ++i) {
41 printf("%d,", output->dims->data[i]);
42 }
43 printf("%d],\n", output->dims->data[dims_size - 1]);
44
45 printf("\"data_address\": \"%08x\",\n", (uint32_t)output->data.data);
46 printf("\"data\":\"");
47 for (int i = 0; i < numBytesToPrint - 1; ++i) {
48 if (i % 16 == 0 && i != 0) {
49 printf("\n");
50 }
51 printf("0x%02x,", output->data.uint8[i]);
52 }
53 printf("0x%02x\"\n", output->data.uint8[numBytesToPrint - 1]);
54 printf("}");
55}
56
57bool copyOutput(const TfLiteTensor &src, InferenceProcess::DataPtr &dst) {
58 if (dst.data == nullptr) {
59 return false;
60 }
61
62 if (src.bytes > dst.size) {
63 printf("Tensor size %d does not match output size %d.\n", src.bytes, dst.size);
64 return true;
65 }
66
67 std::copy(src.data.uint8, src.data.uint8 + src.bytes, static_cast<uint8_t *>(dst.data));
68 dst.size = src.bytes;
69
70 return false;
71}
72
73} // namespace
74
75namespace InferenceProcess {
76DataPtr::DataPtr(void *data, size_t size) : data(data), size(size) {}
77
78InferenceJob::InferenceJob() : numBytesToPrint(0) {}
79
80InferenceJob::InferenceJob(const std::string &name,
81 const DataPtr &networkModel,
82 const DataPtr &input,
83 const DataPtr &output,
84 const DataPtr &expectedOutput,
85 size_t numBytesToPrint) :
86 name(name),
87 networkModel(networkModel), input(input), output(output), expectedOutput(expectedOutput),
88 numBytesToPrint(numBytesToPrint) {}
89
90InferenceProcess::InferenceProcess() : lock(0) {}
91
92// NOTE: Adding code for get_lock & free_lock with some corrections from
93// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dai0321a/BIHEJCHB.html
94// TODO: check correctness?
95void InferenceProcess::getLock() {
96 int status = 0;
97
98 do {
99 // Wait until lock_var is free
100 while (__LDREXW(&lock) != 0)
101 ;
102
103 // Try to set lock_var
104 status = __STREXW(1, &lock);
105 } while (status != 0);
106
107 // Do not start any other memory access until memory barrier is completed
108 __DMB();
109}
110
111// TODO: check correctness?
112void InferenceProcess::freeLock() {
113 // Ensure memory operations completed before releasing lock
114 __DMB();
115
116 lock = 0;
117}
118
119bool InferenceProcess::push(const InferenceJob &job) {
120 getLock();
121 inferenceJobQueue.push(job);
122 freeLock();
123
124 return true;
125}
126
127bool InferenceProcess::runJob(InferenceJob &job) {
128 printf("Running inference job: %s\n", job.name.c_str());
129
130 tflite::MicroErrorReporter microErrorReporter;
131 tflite::ErrorReporter *reporter = &microErrorReporter;
132
133 const tflite::Model *model = ::tflite::GetModel(job.networkModel.data);
134 if (model->version() != TFLITE_SCHEMA_VERSION) {
135 printf("Model provided is schema version %d not equal "
136 "to supported version %d.\n",
137 model->version(),
138 TFLITE_SCHEMA_VERSION);
139 return true;
140 }
141
142 tflite::AllOpsResolver resolver;
143
144 tflite::MicroInterpreter interpreter(model, resolver, inferenceProcessTensorArena, TENSOR_ARENA_SIZE, reporter);
145
146 TfLiteStatus allocate_status = interpreter.AllocateTensors();
147 if (allocate_status != kTfLiteOk) {
148 printf("AllocateTensors failed for inference job: %s\n", job.name.c_str());
149 return true;
150 }
151
152 bool inputSizeError = false;
153 // TODO: adapt for multiple inputs
154 // for (unsigned int i = 0; i < interpreter.inputs_size(); ++i)
155 for (unsigned int i = 0; i < 1; ++i) {
156 TfLiteTensor *input = interpreter.input(i);
157 if (input->bytes != job.input.size) {
158 // If input sizes don't match, then we could end up copying
159 // uninitialized or partial data.
160 inputSizeError = true;
161 printf("Allocated size: %d for input: %d doesn't match the "
162 "received input size: %d for job: %s\n",
163 input->bytes,
164 i,
165 job.input.size,
166 job.name.c_str());
167 return true;
168 }
169 memcpy(input->data.uint8, job.input.data, input->bytes);
170 }
171 if (inputSizeError) {
172 return true;
173 }
174
175 TfLiteStatus invoke_status = interpreter.Invoke();
176 if (invoke_status != kTfLiteOk) {
177 printf("Invoke failed for inference job: %s\n", job.name.c_str());
178 return true;
179 }
180
181 copyOutput(*interpreter.output(0), job.output);
182
183 if (job.numBytesToPrint > 0) {
184 // Print all of the output data, or the first NUM_BYTES_TO_PRINT bytes,
185 // whichever comes first as well as the output shape.
186 printf("num_of_outputs: %d\n", interpreter.outputs_size());
187 printf("output_begin\n");
188 printf("[\n");
189 for (unsigned int i = 0; i < interpreter.outputs_size(); i++) {
190 TfLiteTensor *output = interpreter.output(i);
191 print_output_data(output, job.numBytesToPrint);
192 if (i != interpreter.outputs_size() - 1) {
193 printf(",\n");
194 }
195 }
196 printf("]\n");
197 printf("output_end\n");
198 }
199
200 if (job.expectedOutput.data != nullptr) {
201 bool outputSizeError = false;
202 // TODO: adapt for multiple outputs
203 // for (unsigned int i = 0; i < interpreter.outputs_size(); i++)
204 for (unsigned int i = 0; i < 1; i++) {
205 TfLiteTensor *output = interpreter.output(i);
206 if (job.expectedOutput.size != output->bytes) {
207 // If the expected output & the actual output size doesn't
208 // match, we could end up accessing out-of-bound data.
209 // Also there's no need to compare the data, as we know
210 // that sizes differ.
211 outputSizeError = true;
212 printf("Output size: %d for output: %d doesn't match with "
213 "the expected output size: %d for job: %s\n",
214 output->bytes,
215 i,
216 job.expectedOutput.size,
217 job.name.c_str());
218 return true;
219 }
220 for (unsigned int j = 0; j < output->bytes; ++j) {
221 if (output->data.uint8[j] != (static_cast<uint8_t *>(job.expectedOutput.data))[j]) {
222 printf("Output data doesn't match expected output data at index: "
223 "%d, expected: %02X actual: %02X",
224 j,
225 (static_cast<uint8_t *>(job.expectedOutput.data))[j],
226 output->data.uint8[j]);
227 }
228 }
229 }
230 if (outputSizeError) {
231 return true;
232 }
233 }
234 printf("Finished running job: %s\n", job.name.c_str());
235
236 return false;
237}
238
239bool InferenceProcess::run(bool exitOnEmpty) {
240 bool anyJobFailed = false;
241
242 while (true) {
243 getLock();
244 bool empty = inferenceJobQueue.empty();
245 freeLock();
246
247 if (empty) {
248 if (exitOnEmpty) {
249 printf("Exit from InferenceProcess::run() on empty job queue!\n");
250 break;
251 }
252
253 continue;
254 }
255
256 getLock();
257 InferenceJob job = inferenceJobQueue.front();
258 inferenceJobQueue.pop();
259 freeLock();
260
261 if (runJob(job)) {
262 anyJobFailed = true;
263 continue;
264 }
265 }
266
267 return anyJobFailed;
268}
269
270} // namespace InferenceProcess