| // |
| // Copyright © 2020 Arm Ltd and Contributors. All rights reserved. |
| // SPDX-License-Identifier: MIT |
| // |
| |
| #pragma once |
| |
| #include "Types.hpp" |
| |
| #include "armnn/ArmNN.hpp" |
| #include "armnnTfLiteParser/ITfLiteParser.hpp" |
| #include "armnnUtils/DataLayoutIndexed.hpp" |
| #include <armnn/Logging.hpp> |
| |
| #include <string> |
| #include <vector> |
| |
| namespace common |
| { |
| /** |
| * @brief Used to load in a network through ArmNN and run inference on it against a given backend. |
| * |
| */ |
| template <class Tout> |
| class ArmnnNetworkExecutor |
| { |
| private: |
| armnn::IRuntimePtr m_Runtime; |
| armnn::NetworkId m_NetId{}; |
| mutable InferenceResults<Tout> m_OutputBuffer; |
| armnn::InputTensors m_InputTensors; |
| armnn::OutputTensors m_OutputTensors; |
| std::vector<armnnTfLiteParser::BindingPointInfo> m_outputBindingInfo; |
| |
| std::vector<std::string> m_outputLayerNamesList; |
| |
| armnnTfLiteParser::BindingPointInfo m_inputBindingInfo; |
| |
| void PrepareTensors(const void* inputData, const size_t dataBytes); |
| |
| template <typename Enumeration> |
| auto log_as_int(Enumeration value) |
| -> typename std::underlying_type<Enumeration>::type |
| { |
| return static_cast<typename std::underlying_type<Enumeration>::type>(value); |
| } |
| |
| public: |
| ArmnnNetworkExecutor() = delete; |
| |
| /** |
| * @brief Initializes the network with the given input data. Parsed through TfLiteParser and optimized for a |
| * given backend. |
| * |
| * Note that the output layers names order in m_outputLayerNamesList affects the order of the feature vectors |
| * in output of the Run method. |
| * |
| * * @param[in] modelPath - Relative path to the model file |
| * * @param[in] backends - The list of preferred backends to run inference on |
| */ |
| ArmnnNetworkExecutor(std::string& modelPath, |
| std::vector<armnn::BackendId>& backends); |
| |
| /** |
| * @brief Returns the aspect ratio of the associated model in the order of width, height. |
| */ |
| Size GetImageAspectRatio(); |
| |
| armnn::DataType GetInputDataType() const; |
| |
| float GetQuantizationScale(); |
| |
| int GetQuantizationOffset(); |
| |
| /** |
| * @brief Runs inference on the provided input data, and stores the results in the provided InferenceResults object. |
| * |
| * @param[in] inputData - input frame data |
| * @param[in] dataBytes - input data size in bytes |
| * @param[out] results - Vector of DetectionResult objects used to store the output result. |
| */ |
| bool Run(const void* inputData, const size_t dataBytes, common::InferenceResults<Tout>& outResults); |
| |
| }; |
| |
| template <class Tout> |
| ArmnnNetworkExecutor<Tout>::ArmnnNetworkExecutor(std::string& modelPath, |
| std::vector<armnn::BackendId>& preferredBackends) |
| : m_Runtime(armnn::IRuntime::Create(armnn::IRuntime::CreationOptions())) |
| { |
| // Import the TensorFlow lite model. |
| armnnTfLiteParser::ITfLiteParserPtr parser = armnnTfLiteParser::ITfLiteParser::Create(); |
| armnn::INetworkPtr network = parser->CreateNetworkFromBinaryFile(modelPath.c_str()); |
| |
| std::vector<std::string> inputNames = parser->GetSubgraphInputTensorNames(0); |
| |
| m_inputBindingInfo = parser->GetNetworkInputBindingInfo(0, inputNames[0]); |
| |
| m_outputLayerNamesList = parser->GetSubgraphOutputTensorNames(0); |
| |
| std::vector<armnn::BindingPointInfo> outputBindings; |
| for(const std::string& name : m_outputLayerNamesList) |
| { |
| m_outputBindingInfo.push_back(std::move(parser->GetNetworkOutputBindingInfo(0, name))); |
| } |
| std::vector<std::string> errorMessages; |
| // optimize the network. |
| armnn::IOptimizedNetworkPtr optNet = Optimize(*network, |
| preferredBackends, |
| m_Runtime->GetDeviceSpec(), |
| armnn::OptimizerOptions(), |
| armnn::Optional<std::vector<std::string>&>(errorMessages)); |
| |
| if (!optNet) |
| { |
| const std::string errorMessage{"ArmnnNetworkExecutor: Failed to optimize network"}; |
| ARMNN_LOG(error) << errorMessage; |
| throw armnn::Exception(errorMessage); |
| } |
| |
| // Load the optimized network onto the m_Runtime device |
| std::string errorMessage; |
| if (armnn::Status::Success != m_Runtime->LoadNetwork(m_NetId, std::move(optNet), errorMessage)) |
| { |
| ARMNN_LOG(error) << errorMessage; |
| throw armnn::Exception(errorMessage); |
| } |
| |
| //pre-allocate memory for output (the size of it never changes) |
| for (int it = 0; it < m_outputLayerNamesList.size(); ++it) |
| { |
| const armnn::DataType dataType = m_outputBindingInfo[it].second.GetDataType(); |
| const armnn::TensorShape& tensorShape = m_outputBindingInfo[it].second.GetShape(); |
| |
| std::vector<Tout> oneLayerOutResult; |
| oneLayerOutResult.resize(tensorShape.GetNumElements(), 0); |
| m_OutputBuffer.emplace_back(oneLayerOutResult); |
| |
| // Make ArmNN output tensors |
| m_OutputTensors.reserve(m_OutputBuffer.size()); |
| for (size_t it = 0; it < m_OutputBuffer.size(); ++it) |
| { |
| m_OutputTensors.emplace_back(std::make_pair( |
| m_outputBindingInfo[it].first, |
| armnn::Tensor(m_outputBindingInfo[it].second, |
| m_OutputBuffer.at(it).data()) |
| )); |
| } |
| } |
| |
| } |
| |
| template <class Tout> |
| armnn::DataType ArmnnNetworkExecutor<Tout>::GetInputDataType() const |
| { |
| return m_inputBindingInfo.second.GetDataType(); |
| } |
| |
| template <class Tout> |
| void ArmnnNetworkExecutor<Tout>::PrepareTensors(const void* inputData, const size_t dataBytes) |
| { |
| assert(m_inputBindingInfo.second.GetNumBytes() >= dataBytes); |
| m_InputTensors.clear(); |
| m_InputTensors = {{ m_inputBindingInfo.first, armnn::ConstTensor(m_inputBindingInfo.second, inputData)}}; |
| } |
| |
| template <class Tout> |
| bool ArmnnNetworkExecutor<Tout>::Run(const void* inputData, const size_t dataBytes, InferenceResults<Tout>& outResults) |
| { |
| /* Prepare tensors if they are not ready */ |
| ARMNN_LOG(debug) << "Preparing tensors..."; |
| this->PrepareTensors(inputData, dataBytes); |
| ARMNN_LOG(trace) << "Running inference..."; |
| |
| armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetId, m_InputTensors, m_OutputTensors); |
| |
| std::stringstream inferenceFinished; |
| inferenceFinished << "Inference finished with code {" << log_as_int(ret) << "}\n"; |
| |
| ARMNN_LOG(trace) << inferenceFinished.str(); |
| |
| if (ret == armnn::Status::Failure) |
| { |
| ARMNN_LOG(error) << "Failed to perform inference."; |
| } |
| |
| outResults.reserve(m_outputLayerNamesList.size()); |
| outResults = m_OutputBuffer; |
| |
| return (armnn::Status::Success == ret); |
| } |
| |
| template <class Tout> |
| float ArmnnNetworkExecutor<Tout>::GetQuantizationScale() |
| { |
| return this->m_inputBindingInfo.second.GetQuantizationScale(); |
| } |
| |
| template <class Tout> |
| int ArmnnNetworkExecutor<Tout>::GetQuantizationOffset() |
| { |
| return this->m_inputBindingInfo.second.GetQuantizationOffset(); |
| } |
| |
| template <class Tout> |
| Size ArmnnNetworkExecutor<Tout>::GetImageAspectRatio() |
| { |
| const auto shape = m_inputBindingInfo.second.GetShape(); |
| assert(shape.GetNumDimensions() == 4); |
| armnnUtils::DataLayoutIndexed nhwc(armnn::DataLayout::NHWC); |
| return Size(shape[nhwc.GetWidthIndex()], |
| shape[nhwc.GetHeightIndex()]); |
| } |
| }// namespace common |