samples/common/include/ArmnnUtils/ArmnnNetworkExecutor.hpp - ml/armnn - Gitiles

 //
 // Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //

 #pragma once

 #include "Types.hpp"

 #include "armnn/ArmNN.hpp"
 #include "armnnTfLiteParser/ITfLiteParser.hpp"
 #include "armnnUtils/DataLayoutIndexed.hpp"
 #include <armnn/Logging.hpp>

 #include <string>
 #include <vector>

 namespace common
 {
 /**
 * @brief Used to load in a network through ArmNN and run inference on it against a given backend.
 *
 */
 template <class Tout>
 class ArmnnNetworkExecutor
 {
 private:
     armnn::IRuntimePtr m_Runtime;
     armnn::NetworkId m_NetId{};
     mutable InferenceResults<Tout> m_OutputBuffer;
     armnn::InputTensors     m_InputTensors;
     armnn::OutputTensors    m_OutputTensors;
     std::vector<armnnTfLiteParser::BindingPointInfo> m_outputBindingInfo;

     std::vector<std::string> m_outputLayerNamesList;

     armnnTfLiteParser::BindingPointInfo m_inputBindingInfo;

     void PrepareTensors(const void* inputData, const size_t dataBytes);

     template <typename Enumeration>
     auto log_as_int(Enumeration value)
     -> typename std::underlying_type<Enumeration>::type
     {
         return static_cast<typename std::underlying_type<Enumeration>::type>(value);
     }

 public:
     ArmnnNetworkExecutor() = delete;

     /**
     * @brief Initializes the network with the given input data. Parsed through TfLiteParser and optimized for a
     *        given backend.
     *
     * Note that the output layers names order in m_outputLayerNamesList affects the order of the feature vectors
     * in output of the Run method.
     *
     *       * @param[in] modelPath - Relative path to the model file
     *       * @param[in] backends - The list of preferred backends to run inference on
     */
     ArmnnNetworkExecutor(std::string& modelPath,
                          std::vector<armnn::BackendId>& backends);

     /**
     * @brief Returns the aspect ratio of the associated model in the order of width, height.
     */
     Size GetImageAspectRatio();

     armnn::DataType GetInputDataType() const;

     float GetQuantizationScale();

     int GetQuantizationOffset();

     /**
     * @brief Runs inference on the provided input data, and stores the results in the provided InferenceResults object.
     *
     * @param[in] inputData - input frame data
     * @param[in] dataBytes - input data size in bytes
     * @param[out] results - Vector of DetectionResult objects used to store the output result.
     */
     bool Run(const void* inputData, const size_t dataBytes, common::InferenceResults<Tout>& outResults);

 };

 template <class Tout>
 ArmnnNetworkExecutor<Tout>::ArmnnNetworkExecutor(std::string& modelPath,
                                            std::vector<armnn::BackendId>& preferredBackends)
         : m_Runtime(armnn::IRuntime::Create(armnn::IRuntime::CreationOptions()))
 {
     // Import the TensorFlow lite model.
     armnnTfLiteParser::ITfLiteParserPtr parser = armnnTfLiteParser::ITfLiteParser::Create();
     armnn::INetworkPtr network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());

     std::vector<std::string> inputNames = parser->GetSubgraphInputTensorNames(0);

     m_inputBindingInfo = parser->GetNetworkInputBindingInfo(0, inputNames[0]);

     m_outputLayerNamesList = parser->GetSubgraphOutputTensorNames(0);

     std::vector<armnn::BindingPointInfo> outputBindings;
     for(const std::string& name : m_outputLayerNamesList)
     {
         m_outputBindingInfo.push_back(std::move(parser->GetNetworkOutputBindingInfo(0, name)));
     }
     std::vector<std::string> errorMessages;
     // optimize the network.
     armnn::IOptimizedNetworkPtr optNet = Optimize(*network,
                                                   preferredBackends,
                                                   m_Runtime->GetDeviceSpec(),
                                                   armnn::OptimizerOptions(),
                                                   armnn::Optional<std::vector<std::string>&>(errorMessages));

     if (!optNet)
     {
         const std::string errorMessage{"ArmnnNetworkExecutor: Failed to optimize network"};
         ARMNN_LOG(error) << errorMessage;
         throw armnn::Exception(errorMessage);
     }

     // Load the optimized network onto the m_Runtime device
     std::string errorMessage;
     if (armnn::Status::Success != m_Runtime->LoadNetwork(m_NetId, std::move(optNet), errorMessage))
     {
         ARMNN_LOG(error) << errorMessage;
         throw armnn::Exception(errorMessage);
     }

     //pre-allocate memory for output (the size of it never changes)
     for (int it = 0; it < m_outputLayerNamesList.size(); ++it)
     {
         const armnn::DataType dataType = m_outputBindingInfo[it].second.GetDataType();
         const armnn::TensorShape& tensorShape = m_outputBindingInfo[it].second.GetShape();

         std::vector<Tout> oneLayerOutResult;
         oneLayerOutResult.resize(tensorShape.GetNumElements(), 0);
         m_OutputBuffer.emplace_back(oneLayerOutResult);

         // Make ArmNN output tensors
         m_OutputTensors.reserve(m_OutputBuffer.size());
         for (size_t it = 0; it < m_OutputBuffer.size(); ++it)
         {
             m_OutputTensors.emplace_back(std::make_pair(
                     m_outputBindingInfo[it].first,
                     armnn::Tensor(m_outputBindingInfo[it].second,
                                   m_OutputBuffer.at(it).data())
             ));
         }
     }

 }

 template <class Tout>
 armnn::DataType ArmnnNetworkExecutor<Tout>::GetInputDataType() const
 {
     return m_inputBindingInfo.second.GetDataType();
 }

 template <class Tout>
 void ArmnnNetworkExecutor<Tout>::PrepareTensors(const void* inputData, const size_t dataBytes)
 {
     assert(m_inputBindingInfo.second.GetNumBytes() >= dataBytes);
     m_InputTensors.clear();
     m_InputTensors = {{ m_inputBindingInfo.first, armnn::ConstTensor(m_inputBindingInfo.second, inputData)}};
 }

 template <class Tout>
 bool ArmnnNetworkExecutor<Tout>::Run(const void* inputData, const size_t dataBytes, InferenceResults<Tout>& outResults)
 {
     /* Prepare tensors if they are not ready */
     ARMNN_LOG(debug) << "Preparing tensors...";
     this->PrepareTensors(inputData, dataBytes);
     ARMNN_LOG(trace) << "Running inference...";

     armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetId, m_InputTensors, m_OutputTensors);

     std::stringstream inferenceFinished;
     inferenceFinished << "Inference finished with code {" << log_as_int(ret) << "}\n";

     ARMNN_LOG(trace) << inferenceFinished.str();

     if (ret == armnn::Status::Failure)
     {
         ARMNN_LOG(error) << "Failed to perform inference.";
     }

     outResults.reserve(m_outputLayerNamesList.size());
     outResults = m_OutputBuffer;

     return (armnn::Status::Success == ret);
 }

 template <class Tout>
 float ArmnnNetworkExecutor<Tout>::GetQuantizationScale()
 {
     return this->m_inputBindingInfo.second.GetQuantizationScale();
 }

 template <class Tout>
 int ArmnnNetworkExecutor<Tout>::GetQuantizationOffset()
 {
     return this->m_inputBindingInfo.second.GetQuantizationOffset();
 }

 template <class Tout>
 Size ArmnnNetworkExecutor<Tout>::GetImageAspectRatio()
 {
     const auto shape = m_inputBindingInfo.second.GetShape();
     assert(shape.GetNumDimensions() == 4);
     armnnUtils::DataLayoutIndexed nhwc(armnn::DataLayout::NHWC);
     return Size(shape[nhwc.GetWidthIndex()],
                 shape[nhwc.GetHeightIndex()]);
 }
 }// namespace common
	//
	// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
	// SPDX-License-Identifier: MIT
	//

	#pragma once

	#include "Types.hpp"

	#include "armnn/ArmNN.hpp"
	#include "armnnTfLiteParser/ITfLiteParser.hpp"
	#include "armnnUtils/DataLayoutIndexed.hpp"
	#include <armnn/Logging.hpp>

	#include <string>
	#include <vector>

	namespace common
	{
	/**
	* @brief Used to load in a network through ArmNN and run inference on it against a given backend.
	*
	*/
	template <class Tout>
	class ArmnnNetworkExecutor
	{
	private:
	armnn::IRuntimePtr m_Runtime;
	armnn::NetworkId m_NetId{};
	mutable InferenceResults<Tout> m_OutputBuffer;
	armnn::InputTensors m_InputTensors;
	armnn::OutputTensors m_OutputTensors;
	std::vector<armnnTfLiteParser::BindingPointInfo> m_outputBindingInfo;

	std::vector<std::string> m_outputLayerNamesList;

	armnnTfLiteParser::BindingPointInfo m_inputBindingInfo;

	void PrepareTensors(const void* inputData, const size_t dataBytes);

	template <typename Enumeration>
	auto log_as_int(Enumeration value)
	-> typename std::underlying_type<Enumeration>::type
	{
	return static_cast<typename std::underlying_type<Enumeration>::type>(value);
	}

	public:
	ArmnnNetworkExecutor() = delete;

	/**
	* @brief Initializes the network with the given input data. Parsed through TfLiteParser and optimized for a
	* given backend.
	*
	* Note that the output layers names order in m_outputLayerNamesList affects the order of the feature vectors
	* in output of the Run method.
	*
	* * @param[in] modelPath - Relative path to the model file
	* * @param[in] backends - The list of preferred backends to run inference on
	*/
	ArmnnNetworkExecutor(std::string& modelPath,
	std::vector<armnn::BackendId>& backends);

	/**
	* @brief Returns the aspect ratio of the associated model in the order of width, height.
	*/
	Size GetImageAspectRatio();

	armnn::DataType GetInputDataType() const;

	float GetQuantizationScale();

	int GetQuantizationOffset();

	/**
	* @brief Runs inference on the provided input data, and stores the results in the provided InferenceResults object.
	*
	* @param[in] inputData - input frame data
	* @param[in] dataBytes - input data size in bytes
	* @param[out] results - Vector of DetectionResult objects used to store the output result.
	*/
	bool Run(const void* inputData, const size_t dataBytes, common::InferenceResults<Tout>& outResults);

	};

	template <class Tout>
	ArmnnNetworkExecutor<Tout>::ArmnnNetworkExecutor(std::string& modelPath,
	std::vector<armnn::BackendId>& preferredBackends)
	: m_Runtime(armnn::IRuntime::Create(armnn::IRuntime::CreationOptions()))
	{
	// Import the TensorFlow lite model.
	armnnTfLiteParser::ITfLiteParserPtr parser = armnnTfLiteParser::ITfLiteParser::Create();
	armnn::INetworkPtr network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());

	std::vector<std::string> inputNames = parser->GetSubgraphInputTensorNames(0);

	m_inputBindingInfo = parser->GetNetworkInputBindingInfo(0, inputNames[0]);

	m_outputLayerNamesList = parser->GetSubgraphOutputTensorNames(0);

	std::vector<armnn::BindingPointInfo> outputBindings;
	for(const std::string& name : m_outputLayerNamesList)
	{
	m_outputBindingInfo.push_back(std::move(parser->GetNetworkOutputBindingInfo(0, name)));
	}
	std::vector<std::string> errorMessages;
	// optimize the network.
	armnn::IOptimizedNetworkPtr optNet = Optimize(*network,
	preferredBackends,
	m_Runtime->GetDeviceSpec(),
	armnn::OptimizerOptions(),
	armnn::Optional<std::vector<std::string>&>(errorMessages));

	if (!optNet)
	{
	const std::string errorMessage{"ArmnnNetworkExecutor: Failed to optimize network"};
	ARMNN_LOG(error) << errorMessage;
	throw armnn::Exception(errorMessage);
	}

	// Load the optimized network onto the m_Runtime device
	std::string errorMessage;
	if (armnn::Status::Success != m_Runtime->LoadNetwork(m_NetId, std::move(optNet), errorMessage))
	{
	ARMNN_LOG(error) << errorMessage;
	throw armnn::Exception(errorMessage);
	}

	//pre-allocate memory for output (the size of it never changes)
	for (int it = 0; it < m_outputLayerNamesList.size(); ++it)
	{
	const armnn::DataType dataType = m_outputBindingInfo[it].second.GetDataType();
	const armnn::TensorShape& tensorShape = m_outputBindingInfo[it].second.GetShape();

	std::vector<Tout> oneLayerOutResult;
	oneLayerOutResult.resize(tensorShape.GetNumElements(), 0);
	m_OutputBuffer.emplace_back(oneLayerOutResult);

	// Make ArmNN output tensors
	m_OutputTensors.reserve(m_OutputBuffer.size());
	for (size_t it = 0; it < m_OutputBuffer.size(); ++it)
	{
	m_OutputTensors.emplace_back(std::make_pair(
	m_outputBindingInfo[it].first,
	armnn::Tensor(m_outputBindingInfo[it].second,
	m_OutputBuffer.at(it).data())
	));
	}
	}

	}

	template <class Tout>
	armnn::DataType ArmnnNetworkExecutor<Tout>::GetInputDataType() const
	{
	return m_inputBindingInfo.second.GetDataType();
	}

	template <class Tout>
	void ArmnnNetworkExecutor<Tout>::PrepareTensors(const void* inputData, const size_t dataBytes)
	{
	assert(m_inputBindingInfo.second.GetNumBytes() >= dataBytes);
	m_InputTensors.clear();
	m_InputTensors = {{ m_inputBindingInfo.first, armnn::ConstTensor(m_inputBindingInfo.second, inputData)}};
	}

	template <class Tout>
	bool ArmnnNetworkExecutor<Tout>::Run(const void* inputData, const size_t dataBytes, InferenceResults<Tout>& outResults)
	{
	/* Prepare tensors if they are not ready */
	ARMNN_LOG(debug) << "Preparing tensors...";
	this->PrepareTensors(inputData, dataBytes);
	ARMNN_LOG(trace) << "Running inference...";

	armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetId, m_InputTensors, m_OutputTensors);

	std::stringstream inferenceFinished;
	inferenceFinished << "Inference finished with code {" << log_as_int(ret) << "}\n";

	ARMNN_LOG(trace) << inferenceFinished.str();

	if (ret == armnn::Status::Failure)
	{
	ARMNN_LOG(error) << "Failed to perform inference.";
	}

	outResults.reserve(m_outputLayerNamesList.size());
	outResults = m_OutputBuffer;

	return (armnn::Status::Success == ret);
	}

	template <class Tout>
	float ArmnnNetworkExecutor<Tout>::GetQuantizationScale()
	{
	return this->m_inputBindingInfo.second.GetQuantizationScale();
	}

	template <class Tout>
	int ArmnnNetworkExecutor<Tout>::GetQuantizationOffset()
	{
	return this->m_inputBindingInfo.second.GetQuantizationOffset();
	}

	template <class Tout>
	Size ArmnnNetworkExecutor<Tout>::GetImageAspectRatio()
	{
	const auto shape = m_inputBindingInfo.second.GetShape();
	assert(shape.GetNumDimensions() == 4);
	armnnUtils::DataLayoutIndexed nhwc(armnn::DataLayout::NHWC);
	return Size(shape[nhwc.GetWidthIndex()],
	shape[nhwc.GetHeightIndex()]);
	}
	}// namespace common