blob: c8875a27dc1b91bd5fb8dda83da59a2d49a42ee3 [file] [log] [blame]
//
// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#pragma once
#include "Types.hpp"
#include "armnn/ArmNN.hpp"
#include <armnn/Logging.hpp>
#include <armnn_delegate.hpp>
#include <DelegateOptions.hpp>
#include <DelegateUtils.hpp>
#include <Profiling.hpp>
#include <tensorflow/lite/builtin_ops.h>
#include <tensorflow/lite/c/builtin_op_data.h>
#include <tensorflow/lite/c/common.h>
#include <tensorflow/lite/optional_debug_tools.h>
#include <tensorflow/lite/kernels/builtin_op_kernels.h>
#include <tensorflow/lite/interpreter.h>
#include <tensorflow/lite/kernels/register.h>
#include <string>
#include <vector>
namespace common
{
/**
* @brief Used to load in a network through Tflite Interpreter,
* register Armnn Delegate file to it, and run inference
* on it against a given backend.
* currently it is assumed that the input data will be
* cv:MAT (Frame), the assumption is implemented in
* PrepareTensors method, it can be generalized later
*
*/
template <typename Tout>
class ArmnnNetworkExecutor
{
private:
std::unique_ptr<tflite::Interpreter> m_interpreter;
std::unique_ptr<tflite::FlatBufferModel> m_model;
Profiling m_profiling;
void PrepareTensors(const void* inputData, const size_t dataBytes);
template <typename Enumeration>
auto log_as_int(Enumeration value)
-> typename std::underlying_type<Enumeration>::type
{
return static_cast<typename std::underlying_type<Enumeration>::type>(value);
}
public:
ArmnnNetworkExecutor() = delete;
/**
* @brief Initializes the network with the given input data.
*
*
* * @param[in] modelPath - Relative path to the model file
* * @param[in] backends - The list of preferred backends to run inference on
*/
ArmnnNetworkExecutor(std::string& modelPath,
std::vector<armnn::BackendId>& backends,
bool isProfilingEnabled = false);
/**
* @brief Returns the aspect ratio of the associated model in the order of width, height.
*/
Size GetImageAspectRatio();
/**
* @brief Returns the data type of the associated model.
*/
armnn::DataType GetInputDataType() const;
float GetQuantizationScale();
int GetQuantizationOffset();
float GetOutputQuantizationScale(int tensorIndex);
int GetOutputQuantizationOffset(int tensorIndex);
/**
* @brief Runs inference on the provided input data, and stores the results
* in the provided InferenceResults object.
*
* @param[in] inputData - input frame data
* @param[in] dataBytes - input data size in bytes
* @param[out] outResults - Vector of DetectionResult objects used to store the output result.
*/
bool Run(const void *inputData, const size_t dataBytes,
InferenceResults<Tout> &outResults);
};
template <typename Tout>
ArmnnNetworkExecutor<Tout>::ArmnnNetworkExecutor(std::string& modelPath,
std::vector<armnn::BackendId>& preferredBackends,
bool isProfilingEnabled):
m_profiling(isProfilingEnabled)
{
m_profiling.ProfilingStart();
armnn::OptimizerOptions optimizerOptions;
m_model = tflite::FlatBufferModel::BuildFromFile(modelPath.c_str());
if (m_model == nullptr)
{
const std::string errorMessage{"ArmnnNetworkExecutor: Failed to build the model"};
ARMNN_LOG(error) << errorMessage;
throw armnn::Exception(errorMessage);
}
m_profiling.ProfilingStopAndPrintUs("Loading the model took");
m_profiling.ProfilingStart();
tflite::ops::builtin::BuiltinOpResolver resolver;
tflite::InterpreterBuilder(*m_model, resolver)(&m_interpreter);
if (m_interpreter->AllocateTensors() != kTfLiteOk)
{
const std::string errorMessage{"ArmnnNetworkExecutor: Failed to alloc tensors"};
ARMNN_LOG(error) << errorMessage;
throw armnn::Exception(errorMessage);
}
m_profiling.ProfilingStopAndPrintUs("Create the tflite interpreter");
/* create delegate options */
m_profiling.ProfilingStart();
/* enable fast math optimization */
armnn::BackendOptions modelOptionGpu("GpuAcc", {{"FastMathEnabled", true}});
optimizerOptions.m_ModelOptions.push_back(modelOptionGpu);
armnn::BackendOptions modelOptionCpu("CpuAcc", {{"FastMathEnabled", true}});
optimizerOptions.m_ModelOptions.push_back(modelOptionCpu);
/* enable reduce float32 to float16 optimization */
optimizerOptions.m_ReduceFp32ToFp16 = true;
armnnDelegate::DelegateOptions delegateOptions(preferredBackends, optimizerOptions);
/* create delegate object */
std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
armnnDelegate::TfLiteArmnnDelegateDelete);
/* Register the delegate file */
m_interpreter->ModifyGraphWithDelegate(std::move(theArmnnDelegate));
m_profiling.ProfilingStopAndPrintUs("Create and load ArmNN Delegate");
}
template<typename Tout>
void ArmnnNetworkExecutor<Tout>::PrepareTensors(const void *inputData, const size_t dataBytes)
{
size_t inputTensorSize = m_interpreter->input_tensor(0)->bytes;
auto * inputTensorPtr = m_interpreter->input_tensor(0)->data.raw;
assert(inputTensorSize >= dataBytes);
if (inputTensorPtr != nullptr)
{
memcpy(inputTensorPtr, inputData, inputTensorSize);
}
else
{
const std::string errorMessage{"ArmnnNetworkExecutor: input tensor is null"};
ARMNN_LOG(error) << errorMessage;
throw armnn::Exception(errorMessage);
}
}
template <typename Tout>
bool ArmnnNetworkExecutor<Tout>::Run(const void *inputData, const size_t dataBytes,
InferenceResults<Tout>& outResults)
{
bool ret = false;
m_profiling.ProfilingStart();
PrepareTensors(inputData, dataBytes);
if (m_interpreter->Invoke() == kTfLiteOk)
{
ret = true;
// Extract the output tensor data.
outResults.clear();
outResults.reserve(m_interpreter->outputs().size());
for (int index = 0; index < m_interpreter->outputs().size(); index++)
{
size_t size = m_interpreter->output_tensor(index)->bytes / sizeof(Tout);
const Tout *p_Output = m_interpreter->typed_output_tensor<Tout>(index);
if (p_Output != nullptr) {
InferenceResult<float> outRes(p_Output, p_Output + size);
outResults.emplace_back(outRes);
}
else
{
const std::string errorMessage{"ArmnnNetworkExecutor: p_Output tensor is null"};
ARMNN_LOG(error) << errorMessage;
ret = false;
}
}
}
else
{
const std::string errorMessage{"ArmnnNetworkExecutor: Invoke has failed"};
ARMNN_LOG(error) << errorMessage;
}
m_profiling.ProfilingStopAndPrintUs("Perform inference");
return ret;
}
template <typename Tout>
Size ArmnnNetworkExecutor<Tout>::GetImageAspectRatio()
{
assert(m_interpreter->tensor(m_interpreter->inputs()[0])->dims->size == 4);
return Size(m_interpreter->tensor(m_interpreter->inputs()[0])->dims->data[2],
m_interpreter->tensor(m_interpreter->inputs()[0])->dims->data[1]);
}
template <typename Tout>
armnn::DataType ArmnnNetworkExecutor<Tout>::GetInputDataType() const
{
return GetDataType(*(m_interpreter->tensor(m_interpreter->inputs()[0])));
}
template <typename Tout>
float ArmnnNetworkExecutor<Tout>::GetQuantizationScale()
{
return m_interpreter->tensor(m_interpreter->inputs()[0])->params.scale;
}
template <typename Tout>
int ArmnnNetworkExecutor<Tout>::GetQuantizationOffset()
{
return m_interpreter->tensor(m_interpreter->inputs()[0])->params.zero_point;
}
template <typename Tout>
float ArmnnNetworkExecutor<Tout>::GetOutputQuantizationScale(int tensorIndex)
{
assert(m_interpreter->outputs().size() > tensorIndex);
return m_interpreter->tensor(m_interpreter->outputs()[tensorIndex])->params.scale;
}
template <typename Tout>
int ArmnnNetworkExecutor<Tout>::GetOutputQuantizationOffset(int tensorIndex)
{
assert(m_interpreter->outputs().size() > tensorIndex);
return m_interpreter->tensor(m_interpreter->outputs()[tensorIndex])->params.zero_point;
}
}// namespace common