Blame - samples/common/include/ArmnnUtils/ArmnnNetworkExecutor.hpp - ml/armnn

blob: 9f1ef5475c1c8ab46836bb306ee821f273da5842 [file] [log] [blame]

Éanna Ó Catháin	c6ab02a	2021-04-07 14:35:25 +0100	[diff] [blame]	1	//
				2	// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5
				6	#pragma once
				7
				8	#include "Types.hpp"
				9
				10	#include "armnn/ArmNN.hpp"
				11	#include "armnnTfLiteParser/ITfLiteParser.hpp"
				12	#include "armnnUtils/DataLayoutIndexed.hpp"
				13	#include <armnn/Logging.hpp>
				14
				15	#include <string>
				16	#include <vector>
				17
				18	namespace common
				19	{
				20	/**
				21	* @brief Used to load in a network through ArmNN and run inference on it against a given backend.
				22	*
				23	*/
				24	template <class Tout>
				25	class ArmnnNetworkExecutor
				26	{
				27	private:
				28	armnn::IRuntimePtr m_Runtime;
				29	armnn::NetworkId m_NetId{};
				30	mutable InferenceResults<Tout> m_OutputBuffer;
				31	armnn::InputTensors m_InputTensors;
				32	armnn::OutputTensors m_OutputTensors;
				33	std::vector<armnnTfLiteParser::BindingPointInfo> m_outputBindingInfo;
				34
				35	std::vector<std::string> m_outputLayerNamesList;
				36
				37	armnnTfLiteParser::BindingPointInfo m_inputBindingInfo;
				38
				39	void PrepareTensors(const void* inputData, const size_t dataBytes);
				40
				41	template <typename Enumeration>
				42	auto log_as_int(Enumeration value)
				43	-> typename std::underlying_type<Enumeration>::type
				44	{
				45	return static_cast<typename std::underlying_type<Enumeration>::type>(value);
				46	}
				47
				48	public:
				49	ArmnnNetworkExecutor() = delete;
				50
				51	/**
				52	* @brief Initializes the network with the given input data. Parsed through TfLiteParser and optimized for a
				53	* given backend.
				54	*
				55	* Note that the output layers names order in m_outputLayerNamesList affects the order of the feature vectors
				56	* in output of the Run method.
				57	*
				58	* * @param[in] modelPath - Relative path to the model file
				59	* * @param[in] backends - The list of preferred backends to run inference on
				60	*/
				61	ArmnnNetworkExecutor(std::string& modelPath,
				62	std::vector<armnn::BackendId>& backends);
				63
				64	/**
				65	* @brief Returns the aspect ratio of the associated model in the order of width, height.
				66	*/
				67	Size GetImageAspectRatio();
				68
				69	armnn::DataType GetInputDataType() const;
				70
				71	float GetQuantizationScale();
				72
				73	int GetQuantizationOffset();
				74
George Gekov	23c2627	2021-08-16 11:32:10 +0100	[diff] [blame]	75	float GetOutputQuantizationScale(int tensorIndex);
				76
				77	int GetOutputQuantizationOffset(int tensorIndex);
				78
Éanna Ó Catháin	c6ab02a	2021-04-07 14:35:25 +0100	[diff] [blame]	79	/**
				80	* @brief Runs inference on the provided input data, and stores the results in the provided InferenceResults object.
				81	*
				82	* @param[in] inputData - input frame data
				83	* @param[in] dataBytes - input data size in bytes
				84	* @param[out] results - Vector of DetectionResult objects used to store the output result.
				85	*/
				86	bool Run(const void* inputData, const size_t dataBytes, common::InferenceResults<Tout>& outResults);
				87
				88	};
				89
				90	template <class Tout>
				91	ArmnnNetworkExecutor<Tout>::ArmnnNetworkExecutor(std::string& modelPath,
				92	std::vector<armnn::BackendId>& preferredBackends)
				93	: m_Runtime(armnn::IRuntime::Create(armnn::IRuntime::CreationOptions()))
				94	{
				95	// Import the TensorFlow lite model.
				96	armnnTfLiteParser::ITfLiteParserPtr parser = armnnTfLiteParser::ITfLiteParser::Create();
				97	armnn::INetworkPtr network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
				98
				99	std::vector<std::string> inputNames = parser->GetSubgraphInputTensorNames(0);
				100
				101	m_inputBindingInfo = parser->GetNetworkInputBindingInfo(0, inputNames[0]);
				102
				103	m_outputLayerNamesList = parser->GetSubgraphOutputTensorNames(0);
				104
				105	std::vector<armnn::BindingPointInfo> outputBindings;
				106	for(const std::string& name : m_outputLayerNamesList)
				107	{
				108	m_outputBindingInfo.push_back(std::move(parser->GetNetworkOutputBindingInfo(0, name)));
				109	}
				110	std::vector<std::string> errorMessages;
				111	// optimize the network.
				112	armnn::IOptimizedNetworkPtr optNet = Optimize(*network,
				113	preferredBackends,
				114	m_Runtime->GetDeviceSpec(),
				115	armnn::OptimizerOptions(),
				116	armnn::Optional<std::vector<std::string>&>(errorMessages));
				117
				118	if (!optNet)
				119	{
				120	const std::string errorMessage{"ArmnnNetworkExecutor: Failed to optimize network"};
				121	ARMNN_LOG(error) << errorMessage;
				122	throw armnn::Exception(errorMessage);
				123	}
				124
				125	// Load the optimized network onto the m_Runtime device
				126	std::string errorMessage;
				127	if (armnn::Status::Success != m_Runtime->LoadNetwork(m_NetId, std::move(optNet), errorMessage))
				128	{
				129	ARMNN_LOG(error) << errorMessage;
				130	throw armnn::Exception(errorMessage);
				131	}
				132
				133	//pre-allocate memory for output (the size of it never changes)
				134	for (int it = 0; it < m_outputLayerNamesList.size(); ++it)
				135	{
				136	const armnn::DataType dataType = m_outputBindingInfo[it].second.GetDataType();
				137	const armnn::TensorShape& tensorShape = m_outputBindingInfo[it].second.GetShape();
				138
				139	std::vector<Tout> oneLayerOutResult;
				140	oneLayerOutResult.resize(tensorShape.GetNumElements(), 0);
				141	m_OutputBuffer.emplace_back(oneLayerOutResult);
				142
				143	// Make ArmNN output tensors
				144	m_OutputTensors.reserve(m_OutputBuffer.size());
				145	for (size_t it = 0; it < m_OutputBuffer.size(); ++it)
				146	{
				147	m_OutputTensors.emplace_back(std::make_pair(
				148	m_outputBindingInfo[it].first,
				149	armnn::Tensor(m_outputBindingInfo[it].second,
				150	m_OutputBuffer.at(it).data())
				151	));
				152	}
				153	}
				154
				155	}
				156
				157	template <class Tout>
				158	armnn::DataType ArmnnNetworkExecutor<Tout>::GetInputDataType() const
				159	{
				160	return m_inputBindingInfo.second.GetDataType();
				161	}
				162
				163	template <class Tout>
				164	void ArmnnNetworkExecutor<Tout>::PrepareTensors(const void* inputData, const size_t dataBytes)
				165	{
				166	assert(m_inputBindingInfo.second.GetNumBytes() >= dataBytes);
				167	m_InputTensors.clear();
				168	m_InputTensors = {{ m_inputBindingInfo.first, armnn::ConstTensor(m_inputBindingInfo.second, inputData)}};
				169	}
				170
				171	template <class Tout>
				172	bool ArmnnNetworkExecutor<Tout>::Run(const void* inputData, const size_t dataBytes, InferenceResults<Tout>& outResults)
				173	{
				174	/* Prepare tensors if they are not ready */
				175	ARMNN_LOG(debug) << "Preparing tensors...";
				176	this->PrepareTensors(inputData, dataBytes);
				177	ARMNN_LOG(trace) << "Running inference...";
				178
				179	armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetId, m_InputTensors, m_OutputTensors);
				180
				181	std::stringstream inferenceFinished;
				182	inferenceFinished << "Inference finished with code {" << log_as_int(ret) << "}\n";
				183
				184	ARMNN_LOG(trace) << inferenceFinished.str();
				185
				186	if (ret == armnn::Status::Failure)
				187	{
				188	ARMNN_LOG(error) << "Failed to perform inference.";
				189	}
				190
				191	outResults.reserve(m_outputLayerNamesList.size());
				192	outResults = m_OutputBuffer;
				193
				194	return (armnn::Status::Success == ret);
				195	}
				196
				197	template <class Tout>
				198	float ArmnnNetworkExecutor<Tout>::GetQuantizationScale()
				199	{
				200	return this->m_inputBindingInfo.second.GetQuantizationScale();
				201	}
				202
				203	template <class Tout>
				204	int ArmnnNetworkExecutor<Tout>::GetQuantizationOffset()
				205	{
				206	return this->m_inputBindingInfo.second.GetQuantizationOffset();
				207	}
				208
				209	template <class Tout>
George Gekov	23c2627	2021-08-16 11:32:10 +0100	[diff] [blame]	210	float ArmnnNetworkExecutor<Tout>::GetOutputQuantizationScale(int tensorIndex)
				211	{
				212	assert(this->m_outputLayerNamesList.size() > tensorIndex);
				213	return this->m_outputBindingInfo[tensorIndex].second.GetQuantizationScale();
				214	}
				215
				216	template <class Tout>
				217	int ArmnnNetworkExecutor<Tout>::GetOutputQuantizationOffset(int tensorIndex)
				218	{
				219	assert(this->m_outputLayerNamesList.size() > tensorIndex);
				220	return this->m_outputBindingInfo[tensorIndex].second.GetQuantizationOffset();
				221	}
				222
				223	template <class Tout>
Éanna Ó Catháin	c6ab02a	2021-04-07 14:35:25 +0100	[diff] [blame]	224	Size ArmnnNetworkExecutor<Tout>::GetImageAspectRatio()
				225	{
				226	const auto shape = m_inputBindingInfo.second.GetShape();
				227	assert(shape.GetNumDimensions() == 4);
				228	armnnUtils::DataLayoutIndexed nhwc(armnn::DataLayout::NHWC);
				229	return Size(shape[nhwc.GetWidthIndex()],
				230	shape[nhwc.GetHeightIndex()]);
				231	}
				232	}// namespace common