Blame - samples/ObjectDetection/include/delegate/ArmnnNetworkExecutor.hpp - ml/armnn

blob: c8875a27dc1b91bd5fb8dda83da59a2d49a42ee3 [file] [log] [blame]

Eanna O Cathain	2f0ddb6	2022-03-03 15:58:10 +0000	[diff] [blame]	1	//
				2	// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5
				6	#pragma once
				7
				8	#include "Types.hpp"
				9
				10	#include "armnn/ArmNN.hpp"
				11	#include <armnn/Logging.hpp>
				12	#include <armnn_delegate.hpp>
				13	#include <DelegateOptions.hpp>
				14	#include <DelegateUtils.hpp>
				15	#include <Profiling.hpp>
				16	#include <tensorflow/lite/builtin_ops.h>
				17	#include <tensorflow/lite/c/builtin_op_data.h>
				18	#include <tensorflow/lite/c/common.h>
				19	#include <tensorflow/lite/optional_debug_tools.h>
				20	#include <tensorflow/lite/kernels/builtin_op_kernels.h>
				21	#include <tensorflow/lite/interpreter.h>
				22	#include <tensorflow/lite/kernels/register.h>
				23
				24	#include <string>
				25	#include <vector>
				26
				27	namespace common
				28	{
				29	/**
				30	* @brief Used to load in a network through Tflite Interpreter,
				31	* register Armnn Delegate file to it, and run inference
				32	* on it against a given backend.
				33	* currently it is assumed that the input data will be
				34	* cv:MAT (Frame), the assumption is implemented in
				35	* PrepareTensors method, it can be generalized later
				36	*
				37	*/
				38	template <typename Tout>
				39	class ArmnnNetworkExecutor
				40	{
				41	private:
				42	std::unique_ptr<tflite::Interpreter> m_interpreter;
				43	std::unique_ptr<tflite::FlatBufferModel> m_model;
				44	Profiling m_profiling;
				45
				46	void PrepareTensors(const void* inputData, const size_t dataBytes);
				47
				48	template <typename Enumeration>
				49	auto log_as_int(Enumeration value)
				50	-> typename std::underlying_type<Enumeration>::type
				51	{
				52	return static_cast<typename std::underlying_type<Enumeration>::type>(value);
				53	}
				54
				55	public:
				56	ArmnnNetworkExecutor() = delete;
				57
				58	/**
				59	* @brief Initializes the network with the given input data.
				60	*
				61	*
				62	* * @param[in] modelPath - Relative path to the model file
				63	* * @param[in] backends - The list of preferred backends to run inference on
				64	*/
				65	ArmnnNetworkExecutor(std::string& modelPath,
				66	std::vector<armnn::BackendId>& backends,
				67	bool isProfilingEnabled = false);
				68
				69	/**
				70	* @brief Returns the aspect ratio of the associated model in the order of width, height.
				71	*/
				72	Size GetImageAspectRatio();
				73
				74	/**
				75	* @brief Returns the data type of the associated model.
				76	*/
				77	armnn::DataType GetInputDataType() const;
				78
				79	float GetQuantizationScale();
				80
				81	int GetQuantizationOffset();
				82
				83	float GetOutputQuantizationScale(int tensorIndex);
				84
				85	int GetOutputQuantizationOffset(int tensorIndex);
				86
				87
				88	/**
				89	* @brief Runs inference on the provided input data, and stores the results
				90	* in the provided InferenceResults object.
				91	*
				92	* @param[in] inputData - input frame data
				93	* @param[in] dataBytes - input data size in bytes
				94	* @param[out] outResults - Vector of DetectionResult objects used to store the output result.
				95	*/
				96	bool Run(const void *inputData, const size_t dataBytes,
				97	InferenceResults<Tout> &outResults);
				98	};
				99
				100	template <typename Tout>
				101	ArmnnNetworkExecutor<Tout>::ArmnnNetworkExecutor(std::string& modelPath,
				102	std::vector<armnn::BackendId>& preferredBackends,
				103	bool isProfilingEnabled):
				104	m_profiling(isProfilingEnabled)
				105	{
				106	m_profiling.ProfilingStart();
				107	armnn::OptimizerOptions optimizerOptions;
				108	m_model = tflite::FlatBufferModel::BuildFromFile(modelPath.c_str());
				109	if (m_model == nullptr)
				110	{
				111	const std::string errorMessage{"ArmnnNetworkExecutor: Failed to build the model"};
				112	ARMNN_LOG(error) << errorMessage;
				113	throw armnn::Exception(errorMessage);
				114	}
				115	m_profiling.ProfilingStopAndPrintUs("Loading the model took");
				116
				117	m_profiling.ProfilingStart();
				118	tflite::ops::builtin::BuiltinOpResolver resolver;
				119	tflite::InterpreterBuilder(*m_model, resolver)(&m_interpreter);
				120	if (m_interpreter->AllocateTensors() != kTfLiteOk)
				121	{
				122	const std::string errorMessage{"ArmnnNetworkExecutor: Failed to alloc tensors"};
				123	ARMNN_LOG(error) << errorMessage;
				124	throw armnn::Exception(errorMessage);
				125	}
				126	m_profiling.ProfilingStopAndPrintUs("Create the tflite interpreter");
				127
				128	/* create delegate options */
				129	m_profiling.ProfilingStart();
				130
				131	/* enable fast math optimization */
				132	armnn::BackendOptions modelOptionGpu("GpuAcc", {{"FastMathEnabled", true}});
				133	optimizerOptions.m_ModelOptions.push_back(modelOptionGpu);
				134
				135	armnn::BackendOptions modelOptionCpu("CpuAcc", {{"FastMathEnabled", true}});
				136	optimizerOptions.m_ModelOptions.push_back(modelOptionCpu);
				137	/* enable reduce float32 to float16 optimization */
				138	optimizerOptions.m_ReduceFp32ToFp16 = true;
				139
				140	armnnDelegate::DelegateOptions delegateOptions(preferredBackends, optimizerOptions);
				141
				142	/* create delegate object */
				143	std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
				144	theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
				145	armnnDelegate::TfLiteArmnnDelegateDelete);
				146
				147	/* Register the delegate file */
				148	m_interpreter->ModifyGraphWithDelegate(std::move(theArmnnDelegate));
				149	m_profiling.ProfilingStopAndPrintUs("Create and load ArmNN Delegate");
				150
				151	}
				152
				153	template<typename Tout>
				154	void ArmnnNetworkExecutor<Tout>::PrepareTensors(const void *inputData, const size_t dataBytes)
				155	{
				156	size_t inputTensorSize = m_interpreter->input_tensor(0)->bytes;
				157	auto * inputTensorPtr = m_interpreter->input_tensor(0)->data.raw;
				158	assert(inputTensorSize >= dataBytes);
				159	if (inputTensorPtr != nullptr)
				160	{
				161	memcpy(inputTensorPtr, inputData, inputTensorSize);
				162	}
				163	else
				164	{
				165	const std::string errorMessage{"ArmnnNetworkExecutor: input tensor is null"};
				166	ARMNN_LOG(error) << errorMessage;
				167	throw armnn::Exception(errorMessage);
				168	}
				169
				170	}
				171
				172	template <typename Tout>
				173	bool ArmnnNetworkExecutor<Tout>::Run(const void *inputData, const size_t dataBytes,
				174	InferenceResults<Tout>& outResults)
				175	{
				176	bool ret = false;
				177	m_profiling.ProfilingStart();
				178	PrepareTensors(inputData, dataBytes);
				179
				180	if (m_interpreter->Invoke() == kTfLiteOk)
				181	{
				182
				183
				184	ret = true;
				185	// Extract the output tensor data.
				186	outResults.clear();
				187	outResults.reserve(m_interpreter->outputs().size());
				188	for (int index = 0; index < m_interpreter->outputs().size(); index++)
				189	{
				190	size_t size = m_interpreter->output_tensor(index)->bytes / sizeof(Tout);
				191	const Tout *p_Output = m_interpreter->typed_output_tensor<Tout>(index);
				192	if (p_Output != nullptr) {
				193	InferenceResult<float> outRes(p_Output, p_Output + size);
				194	outResults.emplace_back(outRes);
				195	}
				196	else
				197	{
				198	const std::string errorMessage{"ArmnnNetworkExecutor: p_Output tensor is null"};
				199	ARMNN_LOG(error) << errorMessage;
				200	ret = false;
				201	}
				202	}
				203	}
				204	else
				205	{
				206	const std::string errorMessage{"ArmnnNetworkExecutor: Invoke has failed"};
				207	ARMNN_LOG(error) << errorMessage;
				208	}
				209	m_profiling.ProfilingStopAndPrintUs("Perform inference");
				210	return ret;
				211	}
				212
				213	template <typename Tout>
				214	Size ArmnnNetworkExecutor<Tout>::GetImageAspectRatio()
				215	{
				216	assert(m_interpreter->tensor(m_interpreter->inputs()[0])->dims->size == 4);
				217	return Size(m_interpreter->tensor(m_interpreter->inputs()[0])->dims->data[2],
				218	m_interpreter->tensor(m_interpreter->inputs()[0])->dims->data[1]);
				219	}
				220
				221	template <typename Tout>
				222	armnn::DataType ArmnnNetworkExecutor<Tout>::GetInputDataType() const
				223	{
				224	return GetDataType(*(m_interpreter->tensor(m_interpreter->inputs()[0])));
				225	}
				226
				227	template <typename Tout>
				228	float ArmnnNetworkExecutor<Tout>::GetQuantizationScale()
				229	{
				230	return m_interpreter->tensor(m_interpreter->inputs()[0])->params.scale;
				231	}
				232
				233	template <typename Tout>
				234	int ArmnnNetworkExecutor<Tout>::GetQuantizationOffset()
				235	{
				236	return m_interpreter->tensor(m_interpreter->inputs()[0])->params.zero_point;
				237	}
				238
				239	template <typename Tout>
				240	float ArmnnNetworkExecutor<Tout>::GetOutputQuantizationScale(int tensorIndex)
				241	{
				242	assert(m_interpreter->outputs().size() > tensorIndex);
				243	return m_interpreter->tensor(m_interpreter->outputs()[tensorIndex])->params.scale;
				244	}
				245
				246	template <typename Tout>
				247	int ArmnnNetworkExecutor<Tout>::GetOutputQuantizationOffset(int tensorIndex)
				248	{
				249	assert(m_interpreter->outputs().size() > tensorIndex);
				250	return m_interpreter->tensor(m_interpreter->outputs()[tensorIndex])->params.zero_point;
				251	}
				252
				253	}// namespace common