blob: 0010b70500e87ddc9e98f6bd4b846075b0378e1d [file] [log] [blame]
/*
* Copyright (c) 2019-2021 Arm Limited. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/****************************************************************************
* Includes
****************************************************************************/
#include "FreeRTOS.h"
#include "queue.h"
#include "semphr.h"
#include "task.h"
#include <inttypes.h>
#include <stdio.h>
#include <vector>
#include "inference_process.hpp"
// Model data (Defined & changable by modifiying compile definition in CMakeLists.txt)
#include "input.h"
#include "model.h"
#include "output.h"
using namespace std;
using namespace InferenceProcess;
/****************************************************************************
* Defines
****************************************************************************/
// Nr. of tasks to process inferences with. Task reserves driver & runs inference (Normally 1 per NPU, but not a must)
#define NUM_INFERENCE_TASKS 1
// Nr. of tasks to create jobs and recieve responses
#define NUM_JOB_TASKS 1
// Nr. of jobs to create per job task
#define NUM_JOBS_PER_TASK 1
// Tensor arena size
#ifdef TENSOR_ARENA_SIZE // If defined in model.h
#define TENSOR_ARENA_SIZE_PER_INFERENCE TENSOR_ARENA_SIZE
#else // If not defined, use maximum available
#define TENSOR_ARENA_SIZE_PER_INFERENCE 2000000 / NUM_INFERENCE_TASKS
#endif
/****************************************************************************
* InferenceJob
****************************************************************************/
struct ProcessTaskParams {
ProcessTaskParams() {}
ProcessTaskParams(QueueHandle_t _queue, uint8_t *_tensorArena, size_t _arenaSize) :
queueHandle(_queue), tensorArena(_tensorArena), arenaSize(_arenaSize) {}
QueueHandle_t queueHandle;
uint8_t *tensorArena;
size_t arenaSize;
};
// Number of total completed jobs, needed to exit application correctly if NUM_JOB_TASKS > 1
static int totalCompletedJobs = 0;
// TensorArena static initialisation
static const size_t arenaSize = TENSOR_ARENA_SIZE_PER_INFERENCE;
__attribute__((section(".bss.tensor_arena"), aligned(16)))
uint8_t inferenceProcessTensorArena[NUM_INFERENCE_TASKS][arenaSize];
// Wrapper around InferenceProcess::InferenceJob. Adds responseQueue and status for FreeRTOS multi-tasking purposes.
struct xInferenceJob : public InferenceJob {
QueueHandle_t responseQueue;
bool status;
xInferenceJob() : InferenceJob(), responseQueue(nullptr), status(false) {}
xInferenceJob(const string &_name,
const DataPtr &_networkModel,
const vector<DataPtr> &_input,
const vector<DataPtr> &_output,
const vector<DataPtr> &_expectedOutput,
const size_t _numBytesToPrint,
const vector<uint8_t> &_pmuEventConfig,
const uint32_t _pmuCycleCounterEnable,
QueueHandle_t _queue) :
InferenceJob(_name,
_networkModel,
_input,
_output,
_expectedOutput,
_numBytesToPrint,
_pmuEventConfig,
_pmuCycleCounterEnable),
responseQueue(_queue), status(false) {}
};
/****************************************************************************
* Mutex & Semaphore
* Overrides weak-linked symbols in ethosu_driver.c to implement thread handling
****************************************************************************/
extern "C" {
void *ethosu_mutex_create(void) {
return xSemaphoreCreateMutex();
}
void ethosu_mutex_lock(void *mutex) {
SemaphoreHandle_t handle = reinterpret_cast<SemaphoreHandle_t>(mutex);
xSemaphoreTake(handle, portMAX_DELAY);
}
void ethosu_mutex_unlock(void *mutex) {
SemaphoreHandle_t handle = reinterpret_cast<SemaphoreHandle_t>(mutex);
xSemaphoreGive(handle);
}
void *ethosu_semaphore_create(void) {
return xSemaphoreCreateBinary();
}
void ethosu_semaphore_take(void *sem) {
SemaphoreHandle_t handle = reinterpret_cast<SemaphoreHandle_t>(sem);
xSemaphoreTake(handle, portMAX_DELAY);
}
void ethosu_semaphore_give(void *sem) {
SemaphoreHandle_t handle = reinterpret_cast<SemaphoreHandle_t>(sem);
xSemaphoreGive(handle);
}
}
/****************************************************************************
* Functions
****************************************************************************/
// inferenceProcessTask - Run jobs from queue with available driver
void inferenceProcessTask(void *pvParameters) {
ProcessTaskParams params = *reinterpret_cast<ProcessTaskParams *>(pvParameters);
class InferenceProcess inferenceProcess(params.tensorArena, params.arenaSize);
for (;;) {
xInferenceJob *xJob;
xQueueReceive(params.queueHandle, &xJob, portMAX_DELAY);
bool status = inferenceProcess.runJob(*xJob);
xJob->status = status;
xQueueSend(xJob->responseQueue, &xJob, portMAX_DELAY);
}
vTaskDelete(nullptr);
}
// inferenceSenderTask - Creates NUM_INFERNECE_JOBS jobs, queues them, and then listens for completion status
void inferenceSenderTask(void *pvParameters) {
int ret = 0;
QueueHandle_t inferenceProcessQueue = reinterpret_cast<QueueHandle_t>(pvParameters);
xInferenceJob jobs[NUM_JOBS_PER_TASK];
// Create queue for response messages
QueueHandle_t senderQueue = xQueueCreate(NUM_JOBS_PER_TASK, sizeof(xInferenceJob *));
// Create and queue the jobs
for (int n = 0; n < NUM_JOBS_PER_TASK; n++) {
// Create job
xInferenceJob *job = &jobs[n];
job->name = string(modelName);
job->networkModel = DataPtr(networkModelData, sizeof(networkModelData));
job->input.push_back(DataPtr(inputData, sizeof(inputData)));
job->expectedOutput.push_back(DataPtr(expectedOutputData, sizeof(expectedOutputData)));
job->responseQueue = senderQueue;
// Send job
printf("Sending inference job: job=%p, name=%s\n", job, job->name.c_str());
xQueueSend(inferenceProcessQueue, &job, portMAX_DELAY);
}
// Listen for completion status
do {
xInferenceJob *pSendJob;
xQueueReceive(senderQueue, &pSendJob, portMAX_DELAY);
printf("inferenceSenderTask: received response for job: %s, status = %u\n",
pSendJob->name.c_str(),
pSendJob->status);
totalCompletedJobs++;
ret = (pSendJob->status);
if (pSendJob->status != 0) {
break;
}
} while (totalCompletedJobs < NUM_JOBS_PER_TASK * NUM_JOB_TASKS);
vQueueDelete(senderQueue);
printf("FreeRTOS application returning %d.\n", ret);
exit(ret);
}
/****************************************************************************
* Application
****************************************************************************/
// Declare variables in global scope to avoid stack since FreeRTOS resets stack when the scheduler is started
static QueueHandle_t inferenceProcessQueue;
static ProcessTaskParams taskParams[NUM_INFERENCE_TASKS];
// FreeRTOS application. NOTE: Additional tasks may require increased heap size.
int main() {
BaseType_t ret;
inferenceProcessQueue = xQueueCreate(NUM_JOBS_PER_TASK, sizeof(xInferenceJob *));
// inferenceSender tasks to create and queue the jobs
for (int n = 0; n < NUM_JOB_TASKS; n++) {
ret = xTaskCreate(inferenceSenderTask, "inferenceSenderTask", 2 * 1024, inferenceProcessQueue, 2, nullptr);
if (ret != pdPASS) {
printf("FreeRTOS: Failed to create 'inferenceSenderTask%i'\n", n);
exit(1);
}
}
// Create inferenceProcess tasks to process the queued jobs
for (int n = 0; n < NUM_INFERENCE_TASKS; n++) {
taskParams[n] = ProcessTaskParams(inferenceProcessQueue, inferenceProcessTensorArena[n], arenaSize);
ret = xTaskCreate(inferenceProcessTask, "inferenceProcessTask", 3 * 1024, &taskParams[n], 3, nullptr);
if (ret != pdPASS) {
printf("FreeRTOS: Failed to create 'inferenceProcessTask%i'\n", n);
exit(1);
}
}
// Start Scheduler
vTaskStartScheduler();
printf("FreeRTOS application failed to initialise \n");
exit(1);
return 0;
}