Blame - tests/ExecuteNetwork/ArmNNExecutor.cpp - ml/armnn

blob: 58655c1814c6027fed2328c4c80232ac87731e33 [file] [log] [blame]

Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	1	//
Ryan OShea	b554054	2022-07-06 09:52:52 +0100	[diff] [blame]	2	// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	3	// SPDX-License-Identifier: MIT
				4	//
				5
				6
				7	#include "ArmNNExecutor.hpp"
				8	#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp"
				9
				10	#include <armnn/IAsyncExecutionCallback.hpp>
				11	#include <AsyncExecutionCallback.hpp>
				12
				13
				14	using namespace armnn;
				15	using namespace std::chrono;
				16
				17	ArmNNExecutor::ArmNNExecutor(const ExecuteNetworkParams& params, armnn::IRuntime::CreationOptions runtimeOptions)
				18	: m_Params(params)
				19	{
				20	runtimeOptions.m_EnableGpuProfiling = params.m_EnableProfiling;
				21	runtimeOptions.m_DynamicBackendsPath = params.m_DynamicBackendsPath;
Mike Kelly	5446a4d	2023-01-20 15:51:05 +0000	[diff] [blame]	22
				23	// Create/Get the static ArmNN Runtime. Note that the m_Runtime will be shared by all ArmNNExecutor
				24	// instances so the RuntimeOptions cannot be altered for different ArmNNExecutor instances.
				25	m_Runtime = GetRuntime(runtimeOptions);
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	26
				27	auto parser = CreateParser();
				28	auto network = parser->CreateNetwork(m_Params);
				29	auto optNet = OptimizeNetwork(network.get());
				30
				31	m_IOInfo = GetIOInfo(optNet.get());
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	32
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	33	armnn::ProfilingDetailsMethod profilingDetailsMethod = ProfilingDetailsMethod::Undefined;
				34	if (params.m_OutputDetailsOnlyToStdOut)
				35	{
				36	profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsOnly;
				37	}
				38	else if (params.m_OutputDetailsToStdOut)
				39	{
				40	profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsWithEvents;
				41	}
				42
				43	INetworkProperties networkProperties{m_Params.m_Concurrent,
				44	MemorySource::Undefined,
				45	MemorySource::Undefined,
				46	params.m_EnableProfiling,
				47	profilingDetailsMethod};
				48
Colm Donelan	7804481	2022-09-27 16:46:09 +0100	[diff] [blame]	49	std::string errorMsg;
				50	Status status = m_Runtime->LoadNetwork(m_NetworkId, std::move(optNet), errorMsg, networkProperties);
				51	if (status != Status::Success)
				52	{
				53	std::string message("Failed to create Arm NN Executor: ");
				54	message.append(errorMsg);
				55	// Throwing an exception at this point in the constructor causes lots of problems. We'll instead mark this
				56	// executor as not constructed.
				57	ARMNN_LOG(fatal) << message;
				58	m_constructionFailed = true;
				59	return;
				60	}
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	61
Matthew Bentham	b4f5c23	2022-11-16 10:59:12 +0000	[diff] [blame]	62	SetupInputsAndOutputs();
				63
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	64	if (m_Params.m_Iterations > 1)
				65	{
				66	std::stringstream msg;
				67	msg << "Network will be executed " << m_Params.m_Iterations;
				68	if (m_Params.m_Concurrent)
				69	{
				70	msg << " times in an asynchronous manner. ";
				71	}
				72	else
				73	{
				74	msg << " times successively. ";
				75	}
				76	msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to "
				77	"cover each execution.";
				78	ARMNN_LOG(info) << msg.str();
				79	}
				80
				81	if (m_Params.m_GenerateTensorData)
				82	{
				83	ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
				84	}
				85
				86	if (m_Params.m_DontPrintOutputs)
				87	{
				88	ARMNN_LOG(info) << "Printing outputs to console is disabled.";
				89	}
				90	}
				91
				92	void ArmNNExecutor::ExecuteAsync()
				93	{
Ryan OShea	b554054	2022-07-06 09:52:52 +0100	[diff] [blame]	94	#if !defined(ARMNN_DISABLE_THREADS)
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	95	std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
				96	std::unique_ptr<armnn::Threadpool> threadpool;
				97	armnn::AsyncCallbackManager callbackManager;
				98	std::unordered_map<armnn::InferenceId, const armnn::OutputTensors*> inferenceOutputMap;
				99
				100	for (size_t i = 0; i < m_Params.m_ThreadPoolSize; ++i)
				101	{
				102	memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkId));
				103	}
				104
				105	threadpool = std::make_unique<armnn::Threadpool>(m_Params.m_ThreadPoolSize,
Mike Kelly	5446a4d	2023-01-20 15:51:05 +0000	[diff] [blame]	106	m_Runtime,
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	107	memHandles);
				108
				109	ARMNN_LOG(info) << "Asynchronous Execution with Arm NN thread pool... \n";
				110	// Declare the latest and earliest inference times here to be used when calculating overall time
				111	std::chrono::high_resolution_clock::time_point earliestStartTime =
				112	std::chrono::high_resolution_clock::time_point::max();
				113	std::chrono::high_resolution_clock::time_point latestEndTime =
				114	std::chrono::high_resolution_clock::now();
				115
				116	// For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the
				117	// LoadedNetwork with each scheduled inference having a specific priority
				118	for (size_t i = 0; i < m_Params.m_Iterations; ++i)
				119	{
				120	std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
				121
				122	std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.GetNewCallback();
				123	inferenceOutputMap.insert({cb->GetInferenceId(), &m_OutputTensorsVec[i]});
				124	threadpool->Schedule(m_NetworkId,
				125	m_InputTensorsVec[i],
				126	m_OutputTensorsVec[i],
				127	armnn::QosExecPriority::Medium,
				128	cb);
				129	}
				130
				131	// Check the results
				132	for (size_t iteration = 0; iteration < m_Params.m_Iterations; ++iteration)
				133	{
				134	auto cb = callbackManager.GetNotifiedCallback();
				135
				136	// Get the results
				137	if (earliestStartTime > cb->GetStartTime())
				138	{
				139	earliestStartTime = cb->GetStartTime();
				140	}
				141	if (latestEndTime < cb->GetEndTime())
				142	{
				143	latestEndTime = cb->GetEndTime();
				144	}
				145
				146	auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
				147	auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
				148	auto inferenceDuration = endTime - startTime;
				149	CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
				150	if(!m_Params.m_DontPrintOutputs)
				151	{
				152	const armnn::OutputTensors* out = inferenceOutputMap[cb->GetInferenceId()];
				153	PrintOutputTensors(out, iteration);
				154	}
				155	}
				156
				157	// Print duration difference between overallStartTime and overallEndTime
				158	auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
				159	auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
				160	auto totalInferenceDuration = overallEndTime - overallStartTime;
				161	ARMNN_LOG(info) << "Overall Inference time: " << std::setprecision(2)
				162	<< std::fixed << totalInferenceDuration.count() << " ms\n";
				163
Ryan OShea	b554054	2022-07-06 09:52:52 +0100	[diff] [blame]	164	#endif
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	165	}
				166
				167	void ArmNNExecutor::ExecuteSync()
				168	{
				169	for (size_t x = 0; x < m_Params.m_Iterations; x++)
				170	{
				171	std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
				172
				173	const auto start_time = armnn::GetTimeNow();
				174	armnn::Status ret;
				175	if (m_Params.m_ImportInputsIfAligned)
				176	{
				177	ret = m_Runtime->EnqueueWorkload(m_NetworkId,
				178	m_InputTensorsVec[x],
				179	m_OutputTensorsVec[x],
				180	m_ImportedInputIds[x],
				181	m_ImportedOutputIds[x]);
				182	}
				183	else
				184	{
				185	ret = m_Runtime->EnqueueWorkload(m_NetworkId,
				186	m_InputTensorsVec[x],
				187	m_OutputTensorsVec[x]);
				188	}
				189
				190	const auto inferenceDuration = armnn::GetTimeDuration(start_time);
				191
				192	// If profiling is enabled print out the results
Kevin May	251fd95	2022-10-05 14:42:55 +0100	[diff] [blame]	193	if(profiler && profiler->IsProfilingEnabled() && x == (m_Params.m_Iterations - 1))
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	194	{
				195	profiler->Print(std::cout);
				196	}
				197
				198	if(ret == armnn::Status::Failure)
				199	{
				200	throw armnn::Exception("IRuntime::EnqueueWorkload failed");
				201	}
				202
				203	if(!m_Params.m_DontPrintOutputs)
				204	{
				205	PrintOutputTensors(&m_OutputTensorsVec[x], x);
				206	}
				207
				208	// If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
				209	CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
				210	}
				211	}
				212
				213	std::vector<const void*> ArmNNExecutor::Execute()
				214	{
Kevin May	691ceca	2023-11-28 15:38:37 +0000	[diff] [blame]	215	ARMNN_LOG(info) << "Inferences began at: "
				216	<< std::chrono::duration_cast<std::chrono::nanoseconds>(armnn::GetTimeNow().time_since_epoch()).count()
				217	<< " ns\n";
				218
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	219	if(m_Params.m_ThreadPoolSize == 0)
				220	{
				221	ExecuteSync();
				222	}
				223	else
				224	{
				225	ExecuteAsync();
				226	}
Kevin May	691ceca	2023-11-28 15:38:37 +0000	[diff] [blame]	227
				228	ARMNN_LOG(info) << "Inferences ended at: "
				229	<< std::chrono::duration_cast<std::chrono::nanoseconds>(armnn::GetTimeNow().time_since_epoch()).count()
				230	<< " ns\n";
				231
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	232	std::vector<const void*> results;
				233	for (auto& output : m_OutputStorage)
				234	{
				235	results.push_back(output.m_Mem);
				236	}
				237
				238	return results;
				239	}
				240
				241	void ArmNNExecutor::PrintNetworkInfo()
				242	{
				243	const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
				244	m_Params.m_InputNames :
				245	m_IOInfo.m_InputNames;
				246	std::stringstream ss;
				247	ss << "===== Network Info =====\n";
				248	ss << "Inputs in order:\n";
				249	for (const auto& inputName : inputNames)
				250	{
				251	const auto inputInfo = m_IOInfo.m_InputInfoMap[inputName].second;
				252	ss << inputName << ", " << inputInfo.GetShape() << ", " << GetDataTypeName(inputInfo.GetDataType());
				253	if (inputInfo.IsQuantized())
				254	{
				255	ss << " Quantization Offset: " << inputInfo.GetQuantizationOffset();
				256	if (inputInfo.HasMultipleQuantizationScales())
				257	{
				258	ss << " Quantization scales: ";
				259	for (const auto scale: inputInfo.GetQuantizationScales())
				260	{
				261	ss << scale << ", ";
				262	}
				263	}
				264	else
				265	{
				266	ss << " Quantization scale: " << inputInfo.GetQuantizationScale();
				267	}
				268	}
				269	ss << "\n";
				270	}
				271
				272	ss << "Outputs in order:\n";
				273	for (const auto& outputName : m_IOInfo.m_OutputNames)
				274	{
				275	const auto outputInfo = m_IOInfo.m_OutputInfoMap[outputName].second;
				276	ss << outputName << ", " << outputInfo.GetShape() << ", " << GetDataTypeName(outputInfo.GetDataType());
				277	if (outputInfo.IsQuantized())
				278	{
				279	ss << " Quantization Offset: " << outputInfo.GetQuantizationOffset();
				280	if (outputInfo.HasMultipleQuantizationScales())
				281	{
				282	ss << " Quantization scales: ";
				283	for (const auto scale: outputInfo.GetQuantizationScales())
				284	{
				285	ss << scale << ", ";
				286	}
				287	}
				288	else
				289	{
				290	ss << " Quantization scale: " << outputInfo.GetQuantizationScale();
				291	}
				292	}
				293	ss << "\n";
				294	}
				295
				296	std::cout << ss.str() << std::endl;
				297	}
				298
				299	void ArmNNExecutor::SetupInputsAndOutputs()
				300	{
				301	const unsigned int noOfInputs = m_IOInfo.m_InputNames.size();
				302
				303	if (m_Params.m_InputNames.size() != 0 && m_Params.m_InputNames.size() != noOfInputs)
				304	{
				305	LogAndThrow("Number of input names does not match number of inputs");
				306	}
				307
				308	const unsigned int inputFilePaths = m_Params.m_InputTensorDataFilePaths.size();
				309	const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
				310	m_Params.m_InputNames :
				311	m_IOInfo.m_InputNames;
				312	unsigned int noInputSets = 1;
				313
				314	if (inputFilePaths != 0)
				315	{
				316	if (inputFilePaths % noOfInputs != 0)
				317	{
				318	LogAndThrow("Number of input files: " + std::to_string(inputFilePaths) +
				319	" not compatible with number of inputs: " + std::to_string(noOfInputs));
				320	}
				321	noInputSets = inputFilePaths / noOfInputs;
				322	if (noInputSets != 1 && m_Params.m_ReuseBuffers)
				323	{
				324	LogAndThrow("Specifying multiple sets of inputs not compatible with ReuseBuffers");
				325	}
				326	}
				327
				328	const unsigned int noOfOutputs = m_IOInfo.m_OutputNames.size();
				329	const unsigned int outputFilePaths = m_Params.m_OutputTensorFiles.size();
				330	unsigned int noOutputSets = 1;
				331
				332	if (outputFilePaths != 0)
				333	{
				334	if (outputFilePaths % noOfOutputs != 0)
				335	{
				336	LogAndThrow("Number of output files: " + std::to_string(outputFilePaths) +
				337	", not compatible with number of outputs: " + std::to_string(noOfOutputs));
				338	}
				339	noOutputSets = outputFilePaths / noOfOutputs;
				340
				341	if (noOutputSets != 1 && m_Params.m_ReuseBuffers)
				342	{
				343	LogAndThrow("Specifying multiple sets of outputs not compatible with ReuseBuffers");
				344	}
				345	}
				346
				347	if (m_Params.m_ThreadPoolSize != 0)
				348	{
				349	// The current implementation of the Threadpool does not allow binding of outputs to a thread
				350	// So to ensure no two threads write to the same output at the same time, no output can be reused
				351	noOutputSets = m_Params.m_Iterations;
				352	}
				353
				354	if (m_Params.m_InputTensorDataFilePaths.size() > noOfInputs)
				355	{
				356	ARMNN_LOG(info) << "Given network has " << noOfInputs << " input/s. One input-tensor-data file is required "
				357	<< "for each input. The user provided "
				358	<< m_Params.m_InputTensorDataFilePaths.size()
				359	<< " input-tensor-data file/s which will be used to fill the input/s.\n";
				360	}
				361
				362	unsigned int inputCount = 0;
				363	for(unsigned int inputSet = 0; inputSet < noInputSets; ++inputSet)
				364	{
				365	armnn::InputTensors inputTensors;
				366	for (const auto& inputName: inputNames)
				367	{
				368	armnn::BindingPointInfo bindingPointInfo;
				369	try
				370	{
				371	bindingPointInfo = m_IOInfo.m_InputInfoMap.at(inputName);
				372	}
				373	catch (const std::out_of_range& e)
				374	{
				375	LogAndThrow("Input with inputName: " + inputName + " not found.");
				376	}
				377
				378	const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
				379	auto newInfo = armnn::TensorInfo{tensorInfo.GetShape(), tensorInfo.GetDataType(),
				380	tensorInfo.GetQuantizationScale(),
				381	tensorInfo.GetQuantizationOffset(),
				382	true};
				383
				384	m_InputStorage.emplace_back(IOStorage{tensorInfo.GetNumBytes()});
				385
				386	const int bindingId = bindingPointInfo.first;
				387	inputTensors.emplace_back(bindingId, armnn::ConstTensor{newInfo, m_InputStorage.back().m_Mem});
				388
				389	const armnn::Optional<std::string> dataFile = m_Params.m_GenerateTensorData ?
				390	armnn::EmptyOptional() :
				391	armnn::MakeOptional<std::string>(
				392	m_Params.m_InputTensorDataFilePaths.at(inputCount++));
				393
				394	switch (tensorInfo.GetDataType())
				395	{
				396	case armnn::DataType::Float32:
				397	{
				398	auto typedTensor = reinterpret_cast<float*>(m_InputStorage.back().m_Mem);
				399	PopulateTensorWithData<float>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
				400	break;
				401	}
				402	case armnn::DataType::QSymmS16:
				403	{
				404	auto typedTensor = reinterpret_cast<int16_t*>(m_InputStorage.back().m_Mem);
				405	PopulateTensorWithData<int16_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
				406	break;
				407	}
				408	case armnn::DataType::QSymmS8:
				409	case armnn::DataType::QAsymmS8:
				410	{
				411	auto typedTensor = reinterpret_cast<int8_t*>(m_InputStorage.back().m_Mem);
				412	PopulateTensorWithData<int8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
				413	break;
				414	}
				415	case armnn::DataType::QAsymmU8:
				416	{
				417	auto typedTensor = reinterpret_cast<uint8_t*>(m_InputStorage.back().m_Mem);
				418	PopulateTensorWithData<uint8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
				419	break;
				420	}
				421	case armnn::DataType::Signed32:
				422	{
				423	auto typedTensor = reinterpret_cast<int32_t*>(m_InputStorage.back().m_Mem);
				424	PopulateTensorWithData<int32_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
				425	break;
				426	}
				427	default:
				428	{
				429	LogAndThrow("Unexpected DataType");
				430	}
				431	}
				432
Matthew Bentham	b4f5c23	2022-11-16 10:59:12 +0000	[diff] [blame]	433	}
				434
				435	if (m_Params.m_ImportInputsIfAligned)
				436	{
				437	m_ImportedInputIds.push_back(
				438	m_Runtime->ImportInputs(m_NetworkId, inputTensors, armnn::MemorySource::Malloc));
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	439	}
				440	m_InputTensorsVec.emplace_back(inputTensors);
				441	}
				442
				443	for(unsigned int outputSet = 0; outputSet < noOutputSets; ++outputSet)
				444	{
				445	armnn::OutputTensors outputTensors;
				446	for (const auto& output: m_IOInfo.m_OutputInfoMap)
				447	{
				448	const armnn::BindingPointInfo& bindingPointInfo = output.second;
				449	const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
				450
				451	m_OutputStorage.emplace_back(tensorInfo.GetNumBytes());
				452	outputTensors.emplace_back(bindingPointInfo.first, armnn::Tensor{tensorInfo, m_OutputStorage.back().m_Mem});
				453	}
				454	m_OutputTensorsVec.emplace_back(outputTensors);
				455	if (m_Params.m_ImportInputsIfAligned)
				456	{
				457	m_ImportedOutputIds.push_back(
				458	m_Runtime->ImportOutputs(m_NetworkId, m_OutputTensorsVec.back(), armnn::MemorySource::Malloc));
				459	}
				460	}
				461
Teresa Charlin	2050842	2022-10-26 14:03:08 +0100	[diff] [blame]	462	// If iterations > noSets fill the remaining iterations repeating the given files
				463	// If iterations < noSets just ignore the extra files
				464	const unsigned int remainingInputSets = (m_Params.m_Iterations > noInputSets)
				465	? m_Params.m_Iterations - noInputSets
				466	: 0;
				467	for (unsigned int i = 0; i < remainingInputSets; ++i)
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	468	{
Teresa Charlin	2050842	2022-10-26 14:03:08 +0100	[diff] [blame]	469	m_InputTensorsVec.push_back(m_InputTensorsVec[i % noInputSets]);
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	470	if (m_Params.m_ImportInputsIfAligned)
				471	{
Teresa Charlin	2050842	2022-10-26 14:03:08 +0100	[diff] [blame]	472	m_ImportedInputIds.push_back(m_ImportedInputIds[i % noInputSets]);
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	473	}
				474	}
				475
Teresa Charlin	2050842	2022-10-26 14:03:08 +0100	[diff] [blame]	476	const unsigned int remainingOutputSets = (m_Params.m_Iterations > noOutputSets)
				477	? m_Params.m_Iterations - noOutputSets
				478	: 0;
				479	for (unsigned int i = 0; i < remainingOutputSets; ++i)
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	480	{
Teresa Charlin	2050842	2022-10-26 14:03:08 +0100	[diff] [blame]	481	m_OutputTensorsVec.push_back(m_OutputTensorsVec[i % noOutputSets]);
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	482	if (m_Params.m_ImportInputsIfAligned)
				483	{
Teresa Charlin	2050842	2022-10-26 14:03:08 +0100	[diff] [blame]	484	m_ImportedOutputIds.push_back(m_ImportedOutputIds[i % noOutputSets]);
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	485	}
				486	}
				487	}
				488
				489	ArmNNExecutor::IOInfo ArmNNExecutor::GetIOInfo(armnn::IOptimizedNetwork* optNet)
				490	{
				491	struct IOStrategy : armnn::IStrategy
				492	{
				493	void ExecuteStrategy(const armnn::IConnectableLayer* layer,
				494	const armnn::BaseDescriptor& descriptor,
				495	const std::vector<armnn::ConstTensor>& constants,
				496	const char* name,
				497	const armnn::LayerBindingId id = 0) override
				498	{
				499	armnn::IgnoreUnused(descriptor, constants, id);
				500	switch (layer->GetType())
				501	{
				502	case armnn::LayerType::Input:
				503	{
				504	m_IOInfo.m_InputNames.emplace_back(name);
				505	m_IOInfo.m_InputInfoMap[name] = {id, layer->GetOutputSlot(0).GetTensorInfo()};
				506	break;
				507	}
				508	case armnn::LayerType::Output:
				509	{
				510	m_IOInfo.m_OutputNames.emplace_back(name);
Mike Kelly	4cc341c	2023-07-07 15:43:06 +0100	[diff] [blame]	511	m_IOInfo.m_OutputInfoMap[name] = {id, layer->GetInputSlot(0).GetTensorInfo()};
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	512	break;
				513	}
				514	default: {}
				515	}
				516	}
				517	IOInfo m_IOInfo;
				518	};
				519
				520	IOStrategy ioStrategy;
				521	optNet->ExecuteStrategy(ioStrategy);
				522
				523	return ioStrategy.m_IOInfo;
				524	}
				525
				526	armnn::IOptimizedNetworkPtr ArmNNExecutor::OptimizeNetwork(armnn::INetwork* network)
				527	{
				528	armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork*){}};
				529
John Mcloughlin	c5ee0d7	2023-03-24 12:07:25 +0000	[diff] [blame]	530	armnn::OptimizerOptionsOpaque options;
				531	options.SetReduceFp32ToFp16(m_Params.m_EnableFp16TurboMode);
				532	options.SetDebugEnabled(m_Params.m_PrintIntermediate);
				533	options.SetDebugToFileEnabled(m_Params.m_PrintIntermediateOutputsToFile);
				534	options.SetShapeInferenceMethod(m_Params.m_InferOutputShape ?
				535	armnn::ShapeInferenceMethod::InferAndValidate :
				536	armnn::ShapeInferenceMethod::ValidateOnly);
				537	options.SetProfilingEnabled(m_Params.m_EnableProfiling);
				538	options.SetAllowExpandedDims(m_Params.m_AllowExpandedDims);
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	539
				540	armnn::BackendOptions gpuAcc("GpuAcc",
				541	{
				542	{ "FastMathEnabled", m_Params.m_EnableFastMath },
				543	{ "SaveCachedNetwork", m_Params.m_SaveCachedNetwork },
				544	{ "CachedNetworkFilePath", m_Params.m_CachedNetworkFilePath },
				545	{ "MLGOTuningFilePath", m_Params.m_MLGOTuningFilePath }
				546	});
				547
				548	armnn::BackendOptions cpuAcc("CpuAcc",
				549	{
				550	{ "FastMathEnabled", m_Params.m_EnableFastMath },
				551	{ "NumberOfThreads", m_Params.m_NumberOfThreads }
				552	});
John Mcloughlin	c5ee0d7	2023-03-24 12:07:25 +0000	[diff] [blame]	553	options.AddModelOption(gpuAcc);
				554	options.AddModelOption(cpuAcc);
Jim Flynn	fcc72f5	2022-10-14 11:20:07 +0100	[diff] [blame]	555	// The shapeInferenceMethod and allowExpandedDims values have to be added to the model options
				556	// because these are what are passed to the OptimizeSubgraphViews method and are used to create
				557	// the new optimized INetwork that method uses
				558	armnn::BackendOptions allowExDimOpt("AllowExpandedDims",
				559	{
				560	{ "AllowExpandedDims", m_Params.m_AllowExpandedDims }
				561	});
John Mcloughlin	c5ee0d7	2023-03-24 12:07:25 +0000	[diff] [blame]	562	options.AddModelOption(allowExDimOpt);
Jim Flynn	fcc72f5	2022-10-14 11:20:07 +0100	[diff] [blame]	563	armnn::BackendOptions shapeInferOpt("ShapeInferenceMethod",
				564	{
				565	{ "InferAndValidate", m_Params.m_InferOutputShape }
				566	});
John Mcloughlin	c5ee0d7	2023-03-24 12:07:25 +0000	[diff] [blame]	567	options.AddModelOption(shapeInferOpt);
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	568
				569	const auto optimization_start_time = armnn::GetTimeNow();
				570	optNet = armnn::Optimize(*network, m_Params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
				571
				572	ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2)
				573	<< std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms\n";
				574
				575	if (!optNet)
				576	{
				577	LogAndThrow("Optimize returned nullptr");
				578	}
				579
Teresa Charlin	98d3fd8	2022-08-02 14:17:39 +0100	[diff] [blame]	580	// If v,visualize-optimized-model is enabled then construct a file name for the dot file.
				581	if (m_Params.m_EnableLayerDetails)
				582	{
				583	fs::path filename = m_Params.m_ModelPath;
				584	filename.replace_extension("dot");
				585	std::fstream file(filename.c_str(), std::ios_base::out);
				586	optNet->SerializeToDot(file);
				587	}
				588
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	589	return optNet;
				590	}
				591
				592	std::unique_ptr<ArmNNExecutor::IParser> ArmNNExecutor::CreateParser()
				593	{
Adam Jalkemo	1e8187a	2022-10-12 15:14:04 +0200	[diff] [blame]	594	const fs::path modelFilename = m_Params.m_ModelPath;
				595	const std::string modelExtension = modelFilename.extension();
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	596
Adam Jalkemo	1e8187a	2022-10-12 15:14:04 +0200	[diff] [blame]	597	m_Params.m_IsModelBinary = modelExtension != ".json";
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	598	std::unique_ptr<IParser> parser = nullptr;
				599	// Forward to implementation based on the parser type
Adam Jalkemo	1e8187a	2022-10-12 15:14:04 +0200	[diff] [blame]	600	if (modelExtension == ".armnn")
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	601	{
				602	#if defined(ARMNN_SERIALIZER)
				603	parser = std::make_unique<ArmNNDeserializer>();
				604	#else
				605	LogAndThrow("Not built with serialization support.");
				606	#endif
				607	}
Adam Jalkemo	1e8187a	2022-10-12 15:14:04 +0200	[diff] [blame]	608	else if (modelExtension == ".tflite")
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	609	{
				610	#if defined(ARMNN_TF_LITE_PARSER)
				611	parser = std::make_unique<TfliteParser>(m_Params);
				612	#else
				613	LogAndThrow("Not built with Tensorflow-Lite parser support.");
				614	#endif
				615	}
Adam Jalkemo	1e8187a	2022-10-12 15:14:04 +0200	[diff] [blame]	616	else if (modelExtension == ".onnx")
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	617	{
				618	#if defined(ARMNN_ONNX_PARSER)
				619	parser = std::make_unique<OnnxParser>();
				620	#else
				621	LogAndThrow("Not built with Onnx parser support.");
				622	#endif
				623	}
Colm Donelan	ed928a9	2023-06-25 15:29:08 +0100	[diff] [blame]	624	if (parser == nullptr)
				625	{
				626	throw InvalidArgumentException("Unable to determine the model type based on the file name extension.");
				627	}
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	628	return parser;
				629	}
				630
				631	void ArmNNExecutor::PrintOutputTensors(const armnn::OutputTensors* outputTensors,
				632	unsigned int iteration)
				633	{
				634	auto findOutputName = [&](const armnn::LayerBindingId id)
				635	{
				636	for (auto it = m_IOInfo.m_OutputInfoMap.begin(); it != m_IOInfo.m_OutputInfoMap.end(); ++it)
				637	{
				638	if (id == it->second.first)
				639	{
				640	return it->first;
				641	}
				642	}
				643	return std::string{};
				644	};
				645
				646	unsigned int outputIndex = 0;
				647	unsigned int numOutputs = outputTensors->size();
				648	for (const auto& output: *outputTensors)
				649	{
				650	const auto bindingName = findOutputName(output.first);
				651	// We've made sure before that the number of output files either equals numOutputs, in which
				652	// case we override those files when processing the results of each iteration (only the result
				653	// of the last iteration will be stored), or there are enough
				654	// output files for each output of each iteration.
				655	size_t outputFileIndex = iteration * numOutputs + outputIndex;
				656	if (!m_Params.m_OutputTensorFiles.empty())
				657	{
				658	outputFileIndex = outputFileIndex % m_Params.m_OutputTensorFiles.size();
				659	ARMNN_LOG(info) << "Writing output: " << bindingName << " bindingId: '"
				660	<< output.first
				661	<< "' of iteration: " << iteration + 1 << " to file: '"
				662	<< m_Params.m_OutputTensorFiles[outputFileIndex] << "'";
				663	}
				664
				665	const armnn::Optional<std::string> outputTensorFile = m_Params.m_OutputTensorFiles.empty() ?
				666	armnn::EmptyOptional() :
				667	armnn::MakeOptional<std::string>(
				668	m_Params.m_OutputTensorFiles[outputFileIndex]);
				669
				670	OutputWriteInfo outputWriteInfo
				671	{
				672	outputTensorFile,
				673	bindingName,
				674	output.second,
Colm Donelan	0dfb265	2023-06-22 10:19:17 +0100	[diff] [blame]	675	!m_Params.m_DontPrintOutputs,
				676	output.second.GetDataType()
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	677	};
				678
				679	std::cout << bindingName << ": ";
				680	std::vector<float> values;
				681	switch (output.second.GetDataType())
				682	{
				683	case armnn::DataType::Float32:
				684	{
				685	PrintTensor<float>(outputWriteInfo, "%f ");
				686	break;
				687	}
				688
				689	case armnn::DataType::Signed32:
				690	{
				691	PrintTensor<int>(outputWriteInfo, "%d ");
				692	break;
				693	}
John Mcloughlin	4cf29d6	2023-09-25 14:10:32 +0100	[diff] [blame]	694	case armnn::DataType::Signed64:
				695	{
				696	PrintTensor<int64_t>(outputWriteInfo, "%ld ");
				697	break;
				698	}
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	699	case armnn::DataType::QSymmS8:
				700	case armnn::DataType::QAsymmS8:
				701	{
				702	PrintTensor<int8_t>(outputWriteInfo, "%d ");
				703	break;
				704	}
				705	case armnn::DataType::QAsymmU8:
Mike Kelly	4cc341c	2023-07-07 15:43:06 +0100	[diff] [blame]	706	case armnn::DataType::Boolean:
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	707	{
				708	PrintTensor<uint8_t>(outputWriteInfo, "%d ");
				709	break;
				710	}
				711	case armnn::DataType::Float16:
				712	case armnn::DataType::QSymmS16:
				713	case armnn::DataType::BFloat16:
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	714	default:
				715	{
				716	LogAndThrow("Unexpected DataType");
				717	}
				718	}
				719	std::cout << "\n";
Adam Jalkemo	8f39363	2022-10-13 09:04:54 +0200	[diff] [blame]	720	++outputIndex;
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	721	}
				722	}
				723
				724	void ArmNNExecutor::CompareAndPrintResult(std::vector<const void*> otherOutput)
				725	{
				726	unsigned int index = 0;
Colm Donelan	d047262	2023-03-06 12:34:54 +0000	[diff] [blame]	727	std::string typeString;
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	728	for (const auto& outputTensors: m_OutputTensorsVec)
				729	{
				730	for (const auto& outputTensor: outputTensors)
				731	{
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	732	size_t size = outputTensor.second.GetNumBytes();
Colm Donelan	d047262	2023-03-06 12:34:54 +0000	[diff] [blame]	733	double result = ComputeByteLevelRMSE(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
				734	std::cout << "Byte level root mean square error: " << result << "\n";
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	735	}
				736	}
				737	}
				738	#if defined(ARMNN_SERIALIZER)
				739	ArmNNExecutor::ArmNNDeserializer::ArmNNDeserializer() : m_Parser(armnnDeserializer::IDeserializer::Create()){}
				740
				741	armnn::INetworkPtr ArmNNExecutor::ArmNNDeserializer::CreateNetwork(const ExecuteNetworkParams& params)
				742	{
				743	const std::string& modelPath = params.m_ModelPath;
				744
				745	std::ifstream file(modelPath, std::ios::binary);
				746	return m_Parser->CreateNetworkFromBinary(file);
				747	}
				748
				749	armnn::BindingPointInfo
				750	ArmNNExecutor::ArmNNDeserializer::GetInputBindingPointInfo(size_t, const std::string& inputName)
				751	{
				752	armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkInputBindingInfo(0, inputName);
				753	return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
				754	}
				755
				756	armnn::BindingPointInfo
				757	ArmNNExecutor::ArmNNDeserializer::GetOutputBindingPointInfo(size_t, const std::string& outputName)
				758	{
				759	armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkOutputBindingInfo(0, outputName);
				760	return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
				761	}
				762	#endif
				763
				764	#if defined(ARMNN_TF_LITE_PARSER)
				765	ArmNNExecutor::TfliteParser::TfliteParser(const ExecuteNetworkParams& params)
				766	{
				767	armnnTfLiteParser::ITfLiteParser::TfLiteParserOptions options;
				768	options.m_StandInLayerForUnsupported = params.m_ParseUnsupported;
				769	options.m_InferAndValidate = params.m_InferOutputShape;
Jim Flynn	fcc72f5	2022-10-14 11:20:07 +0100	[diff] [blame]	770	options.m_AllowExpandedDims = params.m_AllowExpandedDims;
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	771
				772	m_Parser = armnnTfLiteParser::ITfLiteParser::Create(options);
				773	}
				774
				775	armnn::INetworkPtr ArmNNExecutor::TfliteParser::CreateNetwork(const ExecuteNetworkParams& params)
				776	{
				777	const std::string& modelPath = params.m_ModelPath;
				778	return m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str());
				779	}
				780
				781	armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetInputBindingPointInfo(size_t subgraphId,
				782	const std::string& inputName)
				783	{
				784	return m_Parser->GetNetworkInputBindingInfo(subgraphId, inputName);
				785	}
				786
				787	armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetOutputBindingPointInfo(size_t subgraphId,
				788	const std::string& outputName)
				789	{
				790	return m_Parser->GetNetworkOutputBindingInfo(subgraphId, outputName);
				791	}
				792	#endif
				793
				794
				795	#if defined(ARMNN_ONNX_PARSER)
				796	ArmNNExecutor::OnnxParser::OnnxParser() : m_Parser(armnnOnnxParser::IOnnxParser::Create()){}
				797
				798	armnn::INetworkPtr ArmNNExecutor::OnnxParser::CreateNetwork(const ExecuteNetworkParams& params)
				799	{
				800	const std::string& modelPath = params.m_ModelPath;
				801	m_Parser = armnnOnnxParser::IOnnxParser::Create();
				802	std::map<std::string, armnn::TensorShape> inputShapes;
				803	if(!params.m_InputTensorShapes.empty())
				804	{
				805	const size_t numInputShapes = params.m_InputTensorShapes.size();
				806	const size_t numInputBindings = params.m_InputNames.size();
				807	if(numInputShapes < numInputBindings)
				808	{
				809	throw armnn::Exception(
				810	fmt::format("Not every input has its tensor shape specified: expected={0}, got={1}",
				811	numInputBindings, numInputShapes));
				812	}
				813
				814	for (size_t i = 0; i < numInputShapes; i++)
				815	{
				816	inputShapes[params.m_InputNames[i]] = params.m_InputTensorShapes[i];
				817	}
				818
				819	return params.m_IsModelBinary ?
				820	m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes) :
				821	m_Parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes);
				822	}
				823
				824	// Handle text and binary input differently by calling the corresponding parser function
				825	return params.m_IsModelBinary ?
				826	m_Parser->CreateNetworkFromBinaryFile(params.m_ModelPath.c_str()) :
				827	m_Parser->CreateNetworkFromTextFile(params.m_ModelPath.c_str());
				828	}
				829
				830	armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetInputBindingPointInfo(size_t, const std::string& inputName)
				831	{
				832	return m_Parser->GetNetworkInputBindingInfo(inputName);
				833	}
				834
				835	armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetOutputBindingPointInfo(size_t, const std::string& outputName)
				836	{
				837	return m_Parser->GetNetworkOutputBindingInfo(outputName);
				838	}
				839	#endif