blob: 96cc1d01847a061892b57d2f187fa5e10dd65806 [file] [log] [blame]
//
// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#pragma once
#include "Types.hpp"
#include "armnn/ArmNN.hpp"
#include "armnnTfLiteParser/ITfLiteParser.hpp"
#include "armnnUtils/DataLayoutIndexed.hpp"
#include <armnn/Logging.hpp>
#include <string>
#include <vector>
namespace common
{
/**
* @brief Used to load in a network through ArmNN and run inference on it against a given backend.
*
*/
template <class Tout>
class ArmnnNetworkExecutor
{
private:
armnn::IRuntimePtr m_Runtime;
armnn::NetworkId m_NetId{};
mutable InferenceResults<Tout> m_OutputBuffer;
armnn::InputTensors m_InputTensors;
armnn::OutputTensors m_OutputTensors;
std::vector<armnnTfLiteParser::BindingPointInfo> m_outputBindingInfo;
std::vector<std::string> m_outputLayerNamesList;
armnnTfLiteParser::BindingPointInfo m_inputBindingInfo;
void PrepareTensors(const void* inputData, const size_t dataBytes);
template <typename Enumeration>
auto log_as_int(Enumeration value)
-> typename std::underlying_type<Enumeration>::type
{
return static_cast<typename std::underlying_type<Enumeration>::type>(value);
}
public:
ArmnnNetworkExecutor() = delete;
/**
* @brief Initializes the network with the given input data. Parsed through TfLiteParser and optimized for a
* given backend.
*
* Note that the output layers names order in m_outputLayerNamesList affects the order of the feature vectors
* in output of the Run method.
*
* * @param[in] modelPath - Relative path to the model file
* * @param[in] backends - The list of preferred backends to run inference on
*/
ArmnnNetworkExecutor(std::string& modelPath,
std::vector<armnn::BackendId>& backends);
/**
* @brief Returns the aspect ratio of the associated model in the order of width, height.
*/
Size GetImageAspectRatio();
armnn::DataType GetInputDataType() const;
float GetQuantizationScale();
int GetQuantizationOffset();
/**
* @brief Runs inference on the provided input data, and stores the results in the provided InferenceResults object.
*
* @param[in] inputData - input frame data
* @param[in] dataBytes - input data size in bytes
* @param[out] results - Vector of DetectionResult objects used to store the output result.
*/
bool Run(const void* inputData, const size_t dataBytes, common::InferenceResults<Tout>& outResults);
};
template <class Tout>
ArmnnNetworkExecutor<Tout>::ArmnnNetworkExecutor(std::string& modelPath,
std::vector<armnn::BackendId>& preferredBackends)
: m_Runtime(armnn::IRuntime::Create(armnn::IRuntime::CreationOptions()))
{
// Import the TensorFlow lite model.
armnnTfLiteParser::ITfLiteParserPtr parser = armnnTfLiteParser::ITfLiteParser::Create();
armnn::INetworkPtr network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
std::vector<std::string> inputNames = parser->GetSubgraphInputTensorNames(0);
m_inputBindingInfo = parser->GetNetworkInputBindingInfo(0, inputNames[0]);
m_outputLayerNamesList = parser->GetSubgraphOutputTensorNames(0);
std::vector<armnn::BindingPointInfo> outputBindings;
for(const std::string& name : m_outputLayerNamesList)
{
m_outputBindingInfo.push_back(std::move(parser->GetNetworkOutputBindingInfo(0, name)));
}
std::vector<std::string> errorMessages;
// optimize the network.
armnn::IOptimizedNetworkPtr optNet = Optimize(*network,
preferredBackends,
m_Runtime->GetDeviceSpec(),
armnn::OptimizerOptions(),
armnn::Optional<std::vector<std::string>&>(errorMessages));
if (!optNet)
{
const std::string errorMessage{"ArmnnNetworkExecutor: Failed to optimize network"};
ARMNN_LOG(error) << errorMessage;
throw armnn::Exception(errorMessage);
}
// Load the optimized network onto the m_Runtime device
std::string errorMessage;
if (armnn::Status::Success != m_Runtime->LoadNetwork(m_NetId, std::move(optNet), errorMessage))
{
ARMNN_LOG(error) << errorMessage;
throw armnn::Exception(errorMessage);
}
//pre-allocate memory for output (the size of it never changes)
for (int it = 0; it < m_outputLayerNamesList.size(); ++it)
{
const armnn::DataType dataType = m_outputBindingInfo[it].second.GetDataType();
const armnn::TensorShape& tensorShape = m_outputBindingInfo[it].second.GetShape();
std::vector<Tout> oneLayerOutResult;
oneLayerOutResult.resize(tensorShape.GetNumElements(), 0);
m_OutputBuffer.emplace_back(oneLayerOutResult);
// Make ArmNN output tensors
m_OutputTensors.reserve(m_OutputBuffer.size());
for (size_t it = 0; it < m_OutputBuffer.size(); ++it)
{
m_OutputTensors.emplace_back(std::make_pair(
m_outputBindingInfo[it].first,
armnn::Tensor(m_outputBindingInfo[it].second,
m_OutputBuffer.at(it).data())
));
}
}
}
template <class Tout>
armnn::DataType ArmnnNetworkExecutor<Tout>::GetInputDataType() const
{
return m_inputBindingInfo.second.GetDataType();
}
template <class Tout>
void ArmnnNetworkExecutor<Tout>::PrepareTensors(const void* inputData, const size_t dataBytes)
{
assert(m_inputBindingInfo.second.GetNumBytes() >= dataBytes);
m_InputTensors.clear();
m_InputTensors = {{ m_inputBindingInfo.first, armnn::ConstTensor(m_inputBindingInfo.second, inputData)}};
}
template <class Tout>
bool ArmnnNetworkExecutor<Tout>::Run(const void* inputData, const size_t dataBytes, InferenceResults<Tout>& outResults)
{
/* Prepare tensors if they are not ready */
ARMNN_LOG(debug) << "Preparing tensors...";
this->PrepareTensors(inputData, dataBytes);
ARMNN_LOG(trace) << "Running inference...";
armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetId, m_InputTensors, m_OutputTensors);
std::stringstream inferenceFinished;
inferenceFinished << "Inference finished with code {" << log_as_int(ret) << "}\n";
ARMNN_LOG(trace) << inferenceFinished.str();
if (ret == armnn::Status::Failure)
{
ARMNN_LOG(error) << "Failed to perform inference.";
}
outResults.reserve(m_outputLayerNamesList.size());
outResults = m_OutputBuffer;
return (armnn::Status::Success == ret);
}
template <class Tout>
float ArmnnNetworkExecutor<Tout>::GetQuantizationScale()
{
return this->m_inputBindingInfo.second.GetQuantizationScale();
}
template <class Tout>
int ArmnnNetworkExecutor<Tout>::GetQuantizationOffset()
{
return this->m_inputBindingInfo.second.GetQuantizationOffset();
}
template <class Tout>
Size ArmnnNetworkExecutor<Tout>::GetImageAspectRatio()
{
const auto shape = m_inputBindingInfo.second.GetShape();
assert(shape.GetNumDimensions() == 4);
armnnUtils::DataLayoutIndexed nhwc(armnn::DataLayout::NHWC);
return Size(shape[nhwc.GetWidthIndex()],
shape[nhwc.GetHeightIndex()]);
}
}// namespace common