Blame - applications/inference_process/src/inference_process.cc - ml/ethos-u/ethos-u-core-software

blob: 448e29b2813343d175d2326a2a3b4c8fdf886702 [file] [log] [blame]

Kristofer Jonsson	641c091	2020-08-31 11:34:14 +0200	[diff] [blame]	1	/*
				2	* Copyright (c) 2019-2020 Arm Limited. All rights reserved.
				3	*
				4	* SPDX-License-Identifier: Apache-2.0
				5	*
				6	* Licensed under the Apache License, Version 2.0 (the License); you may
				7	* not use this file except in compliance with the License.
				8	* You may obtain a copy of the License at
				9	*
				10	* www.apache.org/licenses/LICENSE-2.0
				11	*
				12	* Unless required by applicable law or agreed to in writing, software
				13	* distributed under the License is distributed on an AS IS BASIS, WITHOUT
				14	* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				15	* See the License for the specific language governing permissions and
				16	* limitations under the License.
				17	*/
				18
				19	#include "tensorflow/lite/micro/all_ops_resolver.h"
				20	#include "tensorflow/lite/micro/micro_error_reporter.h"
				21	#include "tensorflow/lite/micro/micro_interpreter.h"
				22	#include "tensorflow/lite/schema/schema_generated.h"
				23	#include "tensorflow/lite/version.h"
				24
				25	#include "inference_process.hpp"
				26
				27	#ifndef TENSOR_ARENA_SIZE
				28	#define TENSOR_ARENA_SIZE (1024)
				29	#endif
				30
				31	__attribute__((section(".bss.NoInit"), aligned(16))) uint8_t inferenceProcessTensorArena[TENSOR_ARENA_SIZE];
				32
				33	namespace {
				34	void print_output_data(TfLiteTensor *output, size_t bytesToPrint) {
				35	const int numBytesToPrint = std::min(output->bytes, bytesToPrint);
				36
				37	int dims_size = output->dims->size;
				38	printf("{\n");
				39	printf("\"dims\": [%d,", dims_size);
				40	for (int i = 0; i < output->dims->size - 1; ++i) {
				41	printf("%d,", output->dims->data[i]);
				42	}
				43	printf("%d],\n", output->dims->data[dims_size - 1]);
				44
				45	printf("\"data_address\": \"%08x\",\n", (uint32_t)output->data.data);
				46	printf("\"data\":\"");
				47	for (int i = 0; i < numBytesToPrint - 1; ++i) {
				48	if (i % 16 == 0 && i != 0) {
				49	printf("\n");
				50	}
				51	printf("0x%02x,", output->data.uint8[i]);
				52	}
				53	printf("0x%02x\"\n", output->data.uint8[numBytesToPrint - 1]);
				54	printf("}");
				55	}
				56
				57	bool copyOutput(const TfLiteTensor &src, InferenceProcess::DataPtr &dst) {
				58	if (dst.data == nullptr) {
				59	return false;
				60	}
				61
				62	if (src.bytes > dst.size) {
				63	printf("Tensor size %d does not match output size %d.\n", src.bytes, dst.size);
				64	return true;
				65	}
				66
				67	std::copy(src.data.uint8, src.data.uint8 + src.bytes, static_cast<uint8_t *>(dst.data));
				68	dst.size = src.bytes;
				69
				70	return false;
				71	}
				72
				73	} // namespace
				74
				75	namespace InferenceProcess {
				76	DataPtr::DataPtr(void *data, size_t size) : data(data), size(size) {}
				77
				78	InferenceJob::InferenceJob() : numBytesToPrint(0) {}
				79
				80	InferenceJob::InferenceJob(const std::string &name,
				81	const DataPtr &networkModel,
				82	const DataPtr &input,
				83	const DataPtr &output,
				84	const DataPtr &expectedOutput,
				85	size_t numBytesToPrint) :
				86	name(name),
				87	networkModel(networkModel), input(input), output(output), expectedOutput(expectedOutput),
				88	numBytesToPrint(numBytesToPrint) {}
				89
				90	InferenceProcess::InferenceProcess() : lock(0) {}
				91
				92	// NOTE: Adding code for get_lock & free_lock with some corrections from
				93	// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dai0321a/BIHEJCHB.html
				94	// TODO: check correctness?
				95	void InferenceProcess::getLock() {
				96	int status = 0;
				97
				98	do {
				99	// Wait until lock_var is free
				100	while (__LDREXW(&lock) != 0)
				101	;
				102
				103	// Try to set lock_var
				104	status = __STREXW(1, &lock);
				105	} while (status != 0);
				106
				107	// Do not start any other memory access until memory barrier is completed
				108	__DMB();
				109	}
				110
				111	// TODO: check correctness?
				112	void InferenceProcess::freeLock() {
				113	// Ensure memory operations completed before releasing lock
				114	__DMB();
				115
				116	lock = 0;
				117	}
				118
				119	bool InferenceProcess::push(const InferenceJob &job) {
				120	getLock();
				121	inferenceJobQueue.push(job);
				122	freeLock();
				123
				124	return true;
				125	}
				126
				127	bool InferenceProcess::runJob(InferenceJob &job) {
				128	printf("Running inference job: %s\n", job.name.c_str());
				129
				130	tflite::MicroErrorReporter microErrorReporter;
				131	tflite::ErrorReporter *reporter = &microErrorReporter;
				132
				133	const tflite::Model *model = ::tflite::GetModel(job.networkModel.data);
				134	if (model->version() != TFLITE_SCHEMA_VERSION) {
				135	printf("Model provided is schema version %d not equal "
				136	"to supported version %d.\n",
				137	model->version(),
				138	TFLITE_SCHEMA_VERSION);
				139	return true;
				140	}
				141
				142	tflite::AllOpsResolver resolver;
				143
				144	tflite::MicroInterpreter interpreter(model, resolver, inferenceProcessTensorArena, TENSOR_ARENA_SIZE, reporter);
				145
				146	TfLiteStatus allocate_status = interpreter.AllocateTensors();
				147	if (allocate_status != kTfLiteOk) {
				148	printf("AllocateTensors failed for inference job: %s\n", job.name.c_str());
				149	return true;
				150	}
				151
				152	bool inputSizeError = false;
				153	// TODO: adapt for multiple inputs
				154	// for (unsigned int i = 0; i < interpreter.inputs_size(); ++i)
				155	for (unsigned int i = 0; i < 1; ++i) {
				156	TfLiteTensor *input = interpreter.input(i);
				157	if (input->bytes != job.input.size) {
				158	// If input sizes don't match, then we could end up copying
				159	// uninitialized or partial data.
				160	inputSizeError = true;
				161	printf("Allocated size: %d for input: %d doesn't match the "
				162	"received input size: %d for job: %s\n",
				163	input->bytes,
				164	i,
				165	job.input.size,
				166	job.name.c_str());
				167	return true;
				168	}
				169	memcpy(input->data.uint8, job.input.data, input->bytes);
				170	}
				171	if (inputSizeError) {
				172	return true;
				173	}
				174
				175	TfLiteStatus invoke_status = interpreter.Invoke();
				176	if (invoke_status != kTfLiteOk) {
				177	printf("Invoke failed for inference job: %s\n", job.name.c_str());
				178	return true;
				179	}
				180
				181	copyOutput(*interpreter.output(0), job.output);
				182
				183	if (job.numBytesToPrint > 0) {
				184	// Print all of the output data, or the first NUM_BYTES_TO_PRINT bytes,
				185	// whichever comes first as well as the output shape.
				186	printf("num_of_outputs: %d\n", interpreter.outputs_size());
				187	printf("output_begin\n");
				188	printf("[\n");
				189	for (unsigned int i = 0; i < interpreter.outputs_size(); i++) {
				190	TfLiteTensor *output = interpreter.output(i);
				191	print_output_data(output, job.numBytesToPrint);
				192	if (i != interpreter.outputs_size() - 1) {
				193	printf(",\n");
				194	}
				195	}
				196	printf("]\n");
				197	printf("output_end\n");
				198	}
				199
				200	if (job.expectedOutput.data != nullptr) {
				201	bool outputSizeError = false;
				202	// TODO: adapt for multiple outputs
				203	// for (unsigned int i = 0; i < interpreter.outputs_size(); i++)
				204	for (unsigned int i = 0; i < 1; i++) {
				205	TfLiteTensor *output = interpreter.output(i);
				206	if (job.expectedOutput.size != output->bytes) {
				207	// If the expected output & the actual output size doesn't
				208	// match, we could end up accessing out-of-bound data.
				209	// Also there's no need to compare the data, as we know
				210	// that sizes differ.
				211	outputSizeError = true;
				212	printf("Output size: %d for output: %d doesn't match with "
				213	"the expected output size: %d for job: %s\n",
				214	output->bytes,
				215	i,
				216	job.expectedOutput.size,
				217	job.name.c_str());
				218	return true;
				219	}
				220	for (unsigned int j = 0; j < output->bytes; ++j) {
				221	if (output->data.uint8[j] != (static_cast<uint8_t *>(job.expectedOutput.data))[j]) {
				222	printf("Output data doesn't match expected output data at index: "
				223	"%d, expected: %02X actual: %02X",
				224	j,
				225	(static_cast<uint8_t *>(job.expectedOutput.data))[j],
				226	output->data.uint8[j]);
				227	}
				228	}
				229	}
				230	if (outputSizeError) {
				231	return true;
				232	}
				233	}
				234	printf("Finished running job: %s\n", job.name.c_str());
				235
				236	return false;
				237	}
				238
				239	bool InferenceProcess::run(bool exitOnEmpty) {
				240	bool anyJobFailed = false;
				241
				242	while (true) {
				243	getLock();
				244	bool empty = inferenceJobQueue.empty();
				245	freeLock();
				246
				247	if (empty) {
				248	if (exitOnEmpty) {
				249	printf("Exit from InferenceProcess::run() on empty job queue!\n");
				250	break;
				251	}
				252
				253	continue;
				254	}
				255
				256	getLock();
				257	InferenceJob job = inferenceJobQueue.front();
				258	inferenceJobQueue.pop();
				259	freeLock();
				260
				261	if (runJob(job)) {
				262	anyJobFailed = true;
				263	continue;
				264	}
				265	}
				266
				267	return anyJobFailed;
				268	}
				269
				270	} // namespace InferenceProcess