* Copyright (c) 2019-2021 Arm Limited. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* Includes
#include "FreeRTOS.h"
#include "queue.h"
#include "semphr.h"
#include "task.h"
#include <inttypes.h>
#include <stdio.h>
#include <vector>
#include "inference_process.hpp"
// Model data (Defined & changable by modifiying compile definition in CMakeLists.txt)
#include "input.h"
#include "model.h"
#include "output.h"
using namespace std;
using namespace InferenceProcess;
* Defines
// Nr. of tasks to process inferences with. Task reserves driver & runs inference (Normally 1 per NPU, but not a must)
// Nr. of tasks to create jobs and recieve responses
#define NUM_JOB_TASKS 2
// Nr. of jobs to create per job task
// Tensor arena size
#ifdef TENSOR_ARENA_SIZE // If defined in model.h
#else // If not defined, use maximum available
* InferenceJob
struct ProcessTaskParams {
ProcessTaskParams() : queueHandle(nullptr), tensorArena(nullptr), arenaSize(0) {}
ProcessTaskParams(QueueHandle_t _queue, uint8_t *_tensorArena, size_t _arenaSize) :
queueHandle(_queue), tensorArena(_tensorArena), arenaSize(_arenaSize) {}
QueueHandle_t queueHandle;
uint8_t *tensorArena;
size_t arenaSize;
namespace {
// Number of total completed jobs, needed to exit application correctly if NUM_JOB_TASKS > 1
int totalCompletedJobs = 0;
// TensorArena static initialisation
const size_t arenaSize = TENSOR_ARENA_SIZE_PER_INFERENCE;
// Declare below variables in global scope to avoid stack since FreeRTOS resets stack when the scheduler is started
QueueHandle_t inferenceProcessQueue;
ProcessTaskParams taskParams[NUM_INFERENCE_TASKS];
} // namespace
__attribute__((section(".bss.tensor_arena"), aligned(16)))
uint8_t inferenceProcessTensorArena[NUM_INFERENCE_TASKS][arenaSize];
// Wrapper around InferenceProcess::InferenceJob. Adds responseQueue and status for FreeRTOS multi-tasking purposes.
struct xInferenceJob : public InferenceJob {
QueueHandle_t responseQueue;
bool status;
xInferenceJob() : InferenceJob(), responseQueue(nullptr), status(false) {}
xInferenceJob(const string &_name,
const DataPtr &_networkModel,
const vector<DataPtr> &_input,
const vector<DataPtr> &_output,
const vector<DataPtr> &_expectedOutput,
const size_t _numBytesToPrint,
const vector<uint8_t> &_pmuEventConfig,
const uint32_t _pmuCycleCounterEnable,
QueueHandle_t _queue) :
responseQueue(_queue), status(false) {}
* Mutex & Semaphore
* Overrides weak-linked symbols in ethosu_driver.c to implement thread handling
extern "C" {
void *ethosu_mutex_create(void) {
return xSemaphoreCreateMutex();
void ethosu_mutex_lock(void *mutex) {
SemaphoreHandle_t handle = reinterpret_cast<SemaphoreHandle_t>(mutex);
xSemaphoreTake(handle, portMAX_DELAY);
void ethosu_mutex_unlock(void *mutex) {
SemaphoreHandle_t handle = reinterpret_cast<SemaphoreHandle_t>(mutex);
void *ethosu_semaphore_create(void) {
return xSemaphoreCreateBinary();
void ethosu_semaphore_take(void *sem) {
SemaphoreHandle_t handle = reinterpret_cast<SemaphoreHandle_t>(sem);
xSemaphoreTake(handle, portMAX_DELAY);
void ethosu_semaphore_give(void *sem) {
SemaphoreHandle_t handle = reinterpret_cast<SemaphoreHandle_t>(sem);
* Functions
// inferenceProcessTask - Run jobs from queue with available driver
void inferenceProcessTask(void *pvParameters) {
ProcessTaskParams params = *reinterpret_cast<ProcessTaskParams *>(pvParameters);
class InferenceProcess inferenceProcess(params.tensorArena, params.arenaSize);
for (;;) {
xInferenceJob *xJob;
xQueueReceive(params.queueHandle, &xJob, portMAX_DELAY);
bool status = inferenceProcess.runJob(*xJob);
xJob->status = status;
xQueueSend(xJob->responseQueue, &xJob, portMAX_DELAY);
// inferenceSenderTask - Creates NUM_INFERNECE_JOBS jobs, queues them, and then listens for completion status
void inferenceSenderTask(void *pvParameters) {
int ret = 0;
QueueHandle_t inferenceProcessQueue = reinterpret_cast<QueueHandle_t>(pvParameters);
xInferenceJob jobs[NUM_JOBS_PER_TASK];
// Create queue for response messages
QueueHandle_t senderQueue = xQueueCreate(NUM_JOBS_PER_TASK, sizeof(xInferenceJob *));
// Create and queue the jobs
for (int n = 0; n < NUM_JOBS_PER_TASK; n++) {
// Create job
xInferenceJob *job = &jobs[n];
job->name = string(modelName);
job->networkModel = DataPtr(networkModelData, sizeof(networkModelData));
job->input.push_back(DataPtr(inputData, sizeof(inputData)));
job->expectedOutput.push_back(DataPtr(expectedOutputData, sizeof(expectedOutputData)));
job->responseQueue = senderQueue;
// Send job
printf("Sending inference job: job=%p, name=%s\n", job, job->name.c_str());
xQueueSend(inferenceProcessQueue, &job, portMAX_DELAY);
// Listen for completion status
do {
xInferenceJob *pSendJob;
xQueueReceive(senderQueue, &pSendJob, portMAX_DELAY);
printf("inferenceSenderTask: received response for job: %s, status = %u\n",
ret = (pSendJob->status);
if (pSendJob->status != 0) {
} while (totalCompletedJobs < NUM_JOBS_PER_TASK * NUM_JOB_TASKS);
printf("FreeRTOS application returning %d.\n", ret);
* Application
// FreeRTOS application. NOTE: Additional tasks may require increased heap size.
int main() {
BaseType_t ret;
inferenceProcessQueue = xQueueCreate(NUM_JOBS_PER_TASK, sizeof(xInferenceJob *));
// inferenceSender tasks to create and queue the jobs
for (int n = 0; n < NUM_JOB_TASKS; n++) {
ret = xTaskCreate(inferenceSenderTask, "inferenceSenderTask", 2 * 1024, inferenceProcessQueue, 2, nullptr);
if (ret != pdPASS) {
printf("FreeRTOS: Failed to create 'inferenceSenderTask%i'\n", n);
// Create inferenceProcess tasks to process the queued jobs
for (int n = 0; n < NUM_INFERENCE_TASKS; n++) {
taskParams[n] = ProcessTaskParams(inferenceProcessQueue, inferenceProcessTensorArena[n], arenaSize);
ret = xTaskCreate(inferenceProcessTask, "inferenceProcessTask", 8 * 1024, &taskParams[n], 3, nullptr);
if (ret != pdPASS) {
printf("FreeRTOS: Failed to create 'inferenceProcessTask%i'\n", n);
// Start Scheduler
printf("FreeRTOS application failed to initialise \n");
return 0;