Blame - applications/threadx_demo/main.cpp - ml/ethos-u/ethos-u-core-platform

blob: dc9055bf42b1ea45dc13ab3b60d21087668626ec [file] [log] [blame]

Lior Dekel	489e40b	2021-08-02 12:03:55 +0300	[diff] [blame]	1	/*
Lior Dekel	4882dbe	2022-02-09 17:18:27 +0200	[diff] [blame]	2	* Copyright (c) 2019-2022 Arm Limited. All rights reserved.
Lior Dekel	489e40b	2021-08-02 12:03:55 +0300	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: Apache-2.0
				5	*
				6	* Licensed under the Apache License, Version 2.0 (the License); you may
				7	* not use this file except in compliance with the License.
				8	* You may obtain a copy of the License at
				9	*
				10	* www.apache.org/licenses/LICENSE-2.0
				11	*
				12	* Unless required by applicable law or agreed to in writing, software
				13	* distributed under the License is distributed on an AS IS BASIS, WITHOUT
				14	* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				15	* See the License for the specific language governing permissions and
				16	* limitations under the License.
				17	*/
				18
				19	/****************************************************************************
				20	* Includes
				21	****************************************************************************/
				22	#include "tx_api.h"
				23
				24	#include <inttypes.h>
				25	#include <stdio.h>
				26	#include <vector>
				27
				28	#include "inference_process.hpp"
				29
				30	// Model data (Defined & changable by modifiying compile definition in CMakeLists.txt)
				31	#include "input.h"
				32	#include "model.h"
				33	#include "output.h"
				34
				35	using namespace std;
				36	using namespace InferenceProcess;
				37
				38	/****************************************************************************
				39	* Defines
				40	****************************************************************************/
				41	// Nr. of threads to process inferences with. Thread reserves driver & runs inference (Normally 1 per NPU, but not a
				42	// must)
				43	#define NUM_INFERENCE_THREADS 1
				44	// Nr. of threads to create jobs and recieve responses
				45	#define NUM_JOB_THREADS 2
				46	// Nr. of jobs to create per job thread
				47	#define NUM_JOBS_PER_THREAD 1
				48
Davide Grohmann	41dc341	2022-02-15 17:19:56 +0100	[diff] [blame^]	49	#define MAX_THREAD_NAME_SIZE 128
				50
Lior Dekel	412adc2	2021-12-22 16:47:25 +0200	[diff] [blame]	51	#define PROCESS_THREAD_STACK_SIZE (20 * 1024)
Lior Dekel	489e40b	2021-08-02 12:03:55 +0300	[diff] [blame]	52	#define SENDER_THREAD_STACK_SIZE (2 * 1024)
				53	#define PROCESS_THREAD_CONTEXT_SIZE (sizeof(TX_THREAD))
				54	#define SENDER_THREAD_CONTEXT_SIZE (sizeof(TX_THREAD))
				55
				56	// Tensor arena size
				57	#ifdef TENSOR_ARENA_SIZE // If defined in model.h
				58	#define TENSOR_ARENA_SIZE_PER_INFERENCE TENSOR_ARENA_SIZE
				59	#else // If not defined, use maximum available - 2M
				60	#define TENSOR_ARENA_SIZE 2000000
				61	#define TENSOR_ARENA_SIZE_PER_INFERENCE (TENSOR_ARENA_SIZE / NUM_INFERENCE_THREADS)
				62	#endif
				63
				64	#define PROCESS_QUEUE_SIZE (NUM_JOBS_PER_THREAD * NUM_JOB_THREADS * sizeof(xInferenceJob *))
				65	#define SENDER_QUEUE_SIZE (NUM_JOBS_PER_THREAD * sizeof(xInferenceJob *))
				66
				67	/* BYTE_POOL_SIZE_OVERHEAD is used to increase the memory byte pool size, as the number of
				68	allocatable bytes in a memory byte pool is slightly less than what was specified during creation */
				69	#define BYTE_POOL_SIZE_OVERHEAD (512)
				70	#define BYTE_POOL_SIZE \
				71	(((PROCESS_THREAD_CONTEXT_SIZE + PROCESS_THREAD_STACK_SIZE) * NUM_INFERENCE_THREADS) + \
				72	(SENDER_THREAD_CONTEXT_SIZE + SENDER_THREAD_STACK_SIZE + SENDER_QUEUE_SIZE) * NUM_JOB_THREADS + \
				73	PROCESS_QUEUE_SIZE + BYTE_POOL_SIZE_OVERHEAD)
				74
				75	/****************************************************************************
				76	* Structures
				77	****************************************************************************/
				78	struct ProcessThreadParams {
				79	ProcessThreadParams() : queueHandle(nullptr), tensorArena(nullptr), arenaSize(0) {}
				80	ProcessThreadParams(TX_QUEUE _queue, uint8_t _tensorArena, size_t _arenaSize) :
				81	queueHandle(_queue), tensorArena(_tensorArena), arenaSize(_arenaSize) {}
				82
				83	TX_QUEUE *queueHandle;
				84	uint8_t *tensorArena;
				85	size_t arenaSize;
				86	};
				87
				88	// Wrapper around InferenceProcess::InferenceJob. Adds responseQueue and status for ThreadX multi-threaded purposes.
				89	struct xInferenceJob : public InferenceJob {
				90	TX_QUEUE *responseQueue;
				91	bool status;
				92
				93	xInferenceJob() : InferenceJob(), responseQueue(nullptr), status(false) {}
				94	xInferenceJob(const string &_name,
				95	const DataPtr &_networkModel,
				96	const vector<DataPtr> &_input,
				97	const vector<DataPtr> &_output,
				98	const vector<DataPtr> &_expectedOutput,
				99	const size_t _numBytesToPrint,
Lior Dekel	4882dbe	2022-02-09 17:18:27 +0200	[diff] [blame]	100	void *_userArg,
Lior Dekel	489e40b	2021-08-02 12:03:55 +0300	[diff] [blame]	101	TX_QUEUE *_queue) :
Lior Dekel	4882dbe	2022-02-09 17:18:27 +0200	[diff] [blame]	102	InferenceJob(_name, _networkModel, _input, _output, _expectedOutput, _numBytesToPrint, _userArg),
Lior Dekel	489e40b	2021-08-02 12:03:55 +0300	[diff] [blame]	103	responseQueue(_queue), status(false) {}
				104	};
				105
				106	/****************************************************************************
				107	* Global and static variables
				108	****************************************************************************/
				109	namespace {
				110	// Number of total completed jobs, needed to exit application correctly if NUM_JOB_THREADS > 1
				111	int totalCompletedJobs = 0;
				112
				113	// TensorArena static initialisation
				114	const size_t arenaSize = TENSOR_ARENA_SIZE_PER_INFERENCE;
				115
				116	TX_QUEUE inferenceProcessQueue;
Davide Grohmann	41dc341	2022-02-15 17:19:56 +0100	[diff] [blame^]	117	char inferenceProcessQueueName[] = "inferenceProcessQueue";
Lior Dekel	489e40b	2021-08-02 12:03:55 +0300	[diff] [blame]	118
				119	ProcessThreadParams threadParams[NUM_INFERENCE_THREADS];
				120
				121	TX_BYTE_POOL bytePool;
Davide Grohmann	41dc341	2022-02-15 17:19:56 +0100	[diff] [blame^]	122	char bytePoolName[] = "byte pool";
				123
Lior Dekel	489e40b	2021-08-02 12:03:55 +0300	[diff] [blame]	124	ULONG memoryArea[BYTE_POOL_SIZE / sizeof(ULONG)];
				125	} // namespace
				126
				127	__attribute__((section(".bss.tensor_arena"), aligned(16)))
				128	uint8_t inferenceProcessTensorArena[NUM_INFERENCE_THREADS][arenaSize];
				129
				130	/****************************************************************************
				131	* Mutex & Semaphore
				132	* Overrides weak-linked symbols in ethosu_driver.c to implement thread handling
				133	****************************************************************************/
				134	extern "C" {
				135	void *ethosu_mutex_create(void) {
				136	UINT status;
				137	TX_MUTEX *mutex;
				138
				139	mutex = new TX_MUTEX;
Davide Grohmann	41dc341	2022-02-15 17:19:56 +0100	[diff] [blame^]	140	status = tx_mutex_create(mutex, nullptr, TX_NO_INHERIT);
Lior Dekel	489e40b	2021-08-02 12:03:55 +0300	[diff] [blame]	141	if (status != TX_SUCCESS) {
				142	printf("mutex create failed, error - %d\n", status);
				143	}
				144	return (void *)mutex;
				145	}
				146
				147	void ethosu_mutex_lock(void *mutex) {
				148	UINT status;
				149	status = tx_mutex_get(reinterpret_cast<TX_MUTEX *>(mutex), TX_WAIT_FOREVER);
				150	if (status != TX_SUCCESS) {
				151	printf("mutex get failed, error - %d\n", status);
				152	}
				153	return;
				154	}
				155
				156	void ethosu_mutex_unlock(void *mutex) {
				157	UINT status;
				158	status = tx_mutex_put(reinterpret_cast<TX_MUTEX *>(mutex));
				159	if (status != TX_SUCCESS) {
				160	printf("mutex put failed, error - %d\n", status);
				161	}
				162	return;
				163	}
				164
				165	void *ethosu_semaphore_create(void) {
				166	UINT status;
				167	TX_SEMAPHORE *semaphore;
				168
				169	semaphore = new TX_SEMAPHORE;
Davide Grohmann	41dc341	2022-02-15 17:19:56 +0100	[diff] [blame^]	170	status = tx_semaphore_create(semaphore, nullptr, 1);
Lior Dekel	489e40b	2021-08-02 12:03:55 +0300	[diff] [blame]	171
				172	if (status != TX_SUCCESS) {
				173	printf("Semaphore create failed, error - %d\n", status);
				174	}
				175
				176	return (void *)semaphore;
				177	}
				178
				179	void ethosu_semaphore_take(void *sem) {
				180	UINT status;
				181
				182	status = tx_semaphore_get(reinterpret_cast<TX_SEMAPHORE *>(sem), TX_WAIT_FOREVER);
				183
				184	if (status != TX_SUCCESS) {
				185	printf("Semaphore get/take, error - %d\n", status);
				186	}
				187
				188	return;
				189	}
				190
				191	void ethosu_semaphore_give(void *sem) {
				192	UINT status;
				193
				194	status = tx_semaphore_put(reinterpret_cast<TX_SEMAPHORE *>(sem));
				195
				196	if (status != TX_SUCCESS) {
				197	printf("Semaphore put/give, error - %d\n", status);
				198	}
				199
				200	return;
				201	}
				202	}
				203
				204	/****************************************************************************
				205	* Functions
				206	****************************************************************************/
				207	// inferenceProcessThread - Run jobs from queue with available driver
				208	void inferenceProcessThread(ULONG pvParameters) {
				209	ProcessThreadParams params = reinterpret_cast<ProcessThreadParams >(pvParameters);
				210	UINT tx_status = TX_QUEUE_ERROR;
				211
				212	class InferenceProcess inferenceProcess(params.tensorArena, params.arenaSize);
				213
				214	for (;;) {
				215	xInferenceJob *xJob;
				216
				217	// Get the job details from the process queue
				218	tx_status = tx_queue_receive(params.queueHandle, &xJob, TX_WAIT_FOREVER);
				219	if (tx_status != TX_SUCCESS) {
				220	printf("process failed to receive from Queue, error - %d\n", tx_status);
				221	exit(1);
				222	}
				223
				224	// run the job
				225	bool status = inferenceProcess.runJob(*xJob);
				226	xJob->status = status;
				227
				228	// Send response for the job in the response queue
				229	tx_status = tx_queue_send(xJob->responseQueue, &xJob, TX_WAIT_FOREVER);
				230	if (tx_status != TX_SUCCESS) {
				231	printf("process inferenceProcessThread failed to send to Queue, error - %d\n", tx_status);
				232	exit(1);
				233	}
				234	}
				235
				236	tx_status = tx_thread_terminate(nullptr);
				237	if (tx_status != TX_SUCCESS) {
				238	printf("process inferenceProcessThread failed to terminate thread, error - %d\n", tx_status);
				239	exit(1);
				240	}
				241	}
				242
				243	// inferenceSenderThread - Creates NUM_INFERNECE_JOBS jobs, queues them, and then listens for completion status
				244	void inferenceSenderThread(ULONG pvParameters) {
				245	int ret = 0;
				246	TX_QUEUE senderQueue;
				247	UINT status = TX_QUEUE_ERROR;
				248	TX_QUEUE inferenceProcessQueueLocal = reinterpret_cast<TX_QUEUE >(pvParameters);
				249	xInferenceJob jobs[NUM_JOBS_PER_THREAD];
Davide Grohmann	41dc341	2022-02-15 17:19:56 +0100	[diff] [blame^]	250	CHAR *senderQueuePtr = nullptr;
				251	char senderQueueName[] = "senderQueue";
Lior Dekel	489e40b	2021-08-02 12:03:55 +0300	[diff] [blame]	252
				253	/* Allocate memory for this inference sender thread responses queue */
				254	status = tx_byte_allocate(&bytePool, reinterpret_cast<VOID **>(&senderQueuePtr), SENDER_QUEUE_SIZE, TX_NO_WAIT);
				255	if (status != TX_SUCCESS) {
				256	printf("Sender thread failed to allocate bytes for Queue, error - %d\n", status);
				257	exit(1);
				258	}
				259
				260	/* Create responses queue for this inference sender thread */
				261	status = tx_queue_create(
Davide Grohmann	41dc341	2022-02-15 17:19:56 +0100	[diff] [blame^]	262	&senderQueue, senderQueueName, sizeof(xInferenceJob *) / sizeof(uint32_t), senderQueuePtr, SENDER_QUEUE_SIZE);
Lior Dekel	489e40b	2021-08-02 12:03:55 +0300	[diff] [blame]	263
				264	if (status != TX_SUCCESS) {
				265	printf("Sender thread failed to create Queue, error - %d\n", status);
				266	exit(1);
				267	}
				268
				269	/* Create the jobs and queue them in the inference process queue */
				270	for (int n = 0; n < NUM_JOBS_PER_THREAD; n++) {
				271
				272	// Create job
				273	xInferenceJob *job = &jobs[n];
				274	job->name = string(modelName);
				275	job->networkModel = DataPtr(networkModelData, sizeof(networkModelData));
				276	job->input.push_back(DataPtr(inputData, sizeof(inputData)));
				277	job->expectedOutput.push_back(DataPtr(expectedOutputData, sizeof(expectedOutputData)));
				278	job->responseQueue = &senderQueue;
				279
				280	// queue job
				281	status = tx_queue_send(inferenceProcessQueueLocal, &job, TX_WAIT_FOREVER);
				282	if (status != TX_SUCCESS) {
				283	printf("Sender thread failed to send to Queue, error - %d\n", status);
				284	exit(1);
				285	}
				286	}
				287
				288	/* Listen for completion status on the response queue */
				289	do {
				290	xInferenceJob *pSendJob;
				291
				292	status = tx_queue_receive(&senderQueue, &pSendJob, TX_WAIT_FOREVER);
				293	if (status != TX_SUCCESS) {
				294	printf("Sender thread failed to receive from Queue, error - %d\n", status);
				295	exit(1);
				296	}
				297
				298	totalCompletedJobs++;
				299	ret = (pSendJob->status);
				300	if (pSendJob->status != 0) {
				301	break;
				302	}
				303	} while (totalCompletedJobs < NUM_JOBS_PER_THREAD * NUM_JOB_THREADS);
				304
				305	/* delete the response queue */
				306	status = tx_queue_delete(&senderQueue);
				307	if (status != TX_SUCCESS) {
				308	printf("Sender thread failed to delete Queue, error - %d\n", status);
				309	exit(1);
				310	}
				311
				312	exit(ret);
				313	}
				314
				315	/****************************************************************************
				316	* Application
				317	****************************************************************************/
				318	int main() {
				319	/* Enter the ThreadX kernel. */
				320	tx_kernel_enter();
				321	return 0;
				322	}
				323
				324	void tx_application_define(void *first_unused_memory) {
				325	UINT status;
				326	CHAR *senderThreadStackPtr[NUM_JOB_THREADS] = {nullptr};
				327	CHAR *processThreadStackPtr[NUM_INFERENCE_THREADS] = {nullptr};
				328	CHAR *processQueuePtr = nullptr;
				329	CHAR *senderThreadPtr[NUM_JOB_THREADS] = {nullptr};
Davide Grohmann	41dc341	2022-02-15 17:19:56 +0100	[diff] [blame^]	330	CHAR senderThreadNames[NUM_JOB_THREADS][MAX_THREAD_NAME_SIZE];
				331	CHAR *processThreadPtr[NUM_INFERENCE_THREADS] = {nullptr};
				332	CHAR processThreadNames[NUM_JOB_THREADS][MAX_THREAD_NAME_SIZE];
Lior Dekel	489e40b	2021-08-02 12:03:55 +0300	[diff] [blame]	333
				334	/* Create a byte memory pool from which to allocate the threads stacks and queues. */
Davide Grohmann	41dc341	2022-02-15 17:19:56 +0100	[diff] [blame^]	335	status = tx_byte_pool_create(&bytePool, bytePoolName, memoryArea, BYTE_POOL_SIZE);
Lior Dekel	489e40b	2021-08-02 12:03:55 +0300	[diff] [blame]	336	if (status != TX_SUCCESS) {
				337	printf("Main failed to allocate pool of bytes, error - %d\n", status);
				338	exit(1);
				339	}
				340
				341	/* Allocate memory for the inference process queue */
				342	status = tx_byte_allocate(&bytePool, reinterpret_cast<VOID **>(&processQueuePtr), PROCESS_QUEUE_SIZE, TX_NO_WAIT);
				343	if (status != TX_SUCCESS) {
				344	printf("Main failed to allocate bytes for process queue, error - %d\n", status);
				345	exit(1);
				346	}
				347
				348	status = tx_queue_create(&inferenceProcessQueue,
Davide Grohmann	41dc341	2022-02-15 17:19:56 +0100	[diff] [blame^]	349	inferenceProcessQueueName,
Lior Dekel	489e40b	2021-08-02 12:03:55 +0300	[diff] [blame]	350	sizeof(xInferenceJob *) / sizeof(uint32_t),
				351	processQueuePtr,
				352	PROCESS_QUEUE_SIZE);
				353	if (status != TX_SUCCESS) {
				354	printf("Main failed to create Queue, error - %d\n", status);
				355	exit(1);
				356	}
				357
				358	/* inferenceSender threads to create and queue the jobs */
				359	for (int n = 0; n < NUM_JOB_THREADS; n++) {
				360
				361	/* Allocate the thread context for the inference sender thread. */
				362	status =
				363	tx_byte_allocate(&bytePool, reinterpret_cast<VOID **>(&senderThreadPtr[n]), sizeof(TX_THREAD), TX_NO_WAIT);
				364	if (status != TX_SUCCESS) {
				365	printf("Main failed to allocate bytes for sender tread, error - %d\n", status);
				366	exit(1);
				367	}
				368
				369	/* Allocate the stack for the inference sender thread. */
				370	status = tx_byte_allocate(
				371	&bytePool, reinterpret_cast<VOID **>(&senderThreadStackPtr[n]), SENDER_THREAD_STACK_SIZE, TX_NO_WAIT);
				372	if (status != TX_SUCCESS) {
				373	printf("Main failed to allocate bytes for sender tread stack, error - %d\n", status);
				374	exit(1);
				375	}
				376
Davide Grohmann	41dc341	2022-02-15 17:19:56 +0100	[diff] [blame^]	377	snprintf(senderThreadNames[n], MAX_THREAD_NAME_SIZE, "senderThread-%d", n);
				378
Lior Dekel	489e40b	2021-08-02 12:03:55 +0300	[diff] [blame]	379	/* Create the inference sender thread. */
				380	status = tx_thread_create(reinterpret_cast<TX_THREAD *>(senderThreadPtr[n]),
Davide Grohmann	41dc341	2022-02-15 17:19:56 +0100	[diff] [blame^]	381	senderThreadNames[n],
Lior Dekel	489e40b	2021-08-02 12:03:55 +0300	[diff] [blame]	382	inferenceSenderThread,
				383	reinterpret_cast<ULONG>(&inferenceProcessQueue),
				384	senderThreadStackPtr[n],
				385	SENDER_THREAD_STACK_SIZE,
				386	1,
				387	1,
				388	TX_NO_TIME_SLICE,
				389	TX_AUTO_START);
				390	if (status != TX_SUCCESS) {
				391	printf("Main failed to create Thread, error - %d\n", status);
				392	exit(1);
				393	}
				394	}
				395
				396	/* Create inferenceProcess threads to process the queued jobs */
				397	for (int n = 0; n < NUM_INFERENCE_THREADS; n++) {
				398
				399	/* Allocate the thread context for the inference process thread. */
				400	status =
				401	tx_byte_allocate(&bytePool, reinterpret_cast<VOID **>(&processThreadPtr[n]), sizeof(TX_THREAD), TX_NO_WAIT);
				402	if (status != TX_SUCCESS) {
				403	printf("Main failed to allocate bytes for process tread, error - %d\n", status);
				404	exit(1);
				405	}
				406
				407	/* Allocate the stack for the inference process thread. */
				408	status = tx_byte_allocate(
				409	&bytePool, reinterpret_cast<VOID **>(&processThreadStackPtr[n]), PROCESS_THREAD_STACK_SIZE, TX_NO_WAIT);
				410	if (status != TX_SUCCESS) {
				411	printf("Main failed to allocate bytes for process stack, error - %d\n", status);
				412	exit(1);
				413	}
				414
				415	threadParams[n] = ProcessThreadParams(
				416	&inferenceProcessQueue, inferenceProcessTensorArena[n], reinterpret_cast<size_t>(arenaSize));
Davide Grohmann	41dc341	2022-02-15 17:19:56 +0100	[diff] [blame^]	417	snprintf(processThreadNames[n], MAX_THREAD_NAME_SIZE, "processThread-%d", n);
Lior Dekel	489e40b	2021-08-02 12:03:55 +0300	[diff] [blame]	418
				419	/* Create the inference process thread. */
				420	status = tx_thread_create(reinterpret_cast<TX_THREAD *>(processThreadPtr[n]),
Davide Grohmann	41dc341	2022-02-15 17:19:56 +0100	[diff] [blame^]	421	processThreadNames[n],
Lior Dekel	489e40b	2021-08-02 12:03:55 +0300	[diff] [blame]	422	inferenceProcessThread,
				423	reinterpret_cast<ULONG>(&threadParams[n]),
				424	processThreadStackPtr[n],
				425	PROCESS_THREAD_STACK_SIZE,
				426	1,
				427	1,
				428	TX_NO_TIME_SLICE,
				429	TX_AUTO_START);
				430	if (status != TX_SUCCESS) {
				431	printf("Main failed to create thread, error - %d\n", status);
				432	exit(1);
				433	}
				434	}
				435
				436	printf("ThreadX application initialisation - Done \n");
				437	return;
				438	}