Blame - tests/ExecuteNetwork/ArmNNExecutor.cpp - ml/armnn

blob: e8b501489e0b7210870d155abacd76e9afa09e8d [file] [log] [blame]

Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	1	//
				2	// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5
				6
				7	#include "ArmNNExecutor.hpp"
				8	#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp"
				9
				10	#include <armnn/IAsyncExecutionCallback.hpp>
				11	#include <AsyncExecutionCallback.hpp>
				12
				13
				14	using namespace armnn;
				15	using namespace std::chrono;
				16
				17	ArmNNExecutor::ArmNNExecutor(const ExecuteNetworkParams& params, armnn::IRuntime::CreationOptions runtimeOptions)
				18	: m_Params(params)
				19	{
				20	runtimeOptions.m_EnableGpuProfiling = params.m_EnableProfiling;
				21	runtimeOptions.m_DynamicBackendsPath = params.m_DynamicBackendsPath;
				22	m_Runtime = armnn::IRuntime::Create(runtimeOptions);
				23
				24	auto parser = CreateParser();
				25	auto network = parser->CreateNetwork(m_Params);
				26	auto optNet = OptimizeNetwork(network.get());
				27
				28	m_IOInfo = GetIOInfo(optNet.get());
				29	SetupInputsAndOutputs();
				30
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	31	armnn::ProfilingDetailsMethod profilingDetailsMethod = ProfilingDetailsMethod::Undefined;
				32	if (params.m_OutputDetailsOnlyToStdOut)
				33	{
				34	profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsOnly;
				35	}
				36	else if (params.m_OutputDetailsToStdOut)
				37	{
				38	profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsWithEvents;
				39	}
				40
				41	INetworkProperties networkProperties{m_Params.m_Concurrent,
				42	MemorySource::Undefined,
				43	MemorySource::Undefined,
				44	params.m_EnableProfiling,
				45	profilingDetailsMethod};
				46
Colm Donelan	7804481	2022-09-27 16:46:09 +0100	[diff] [blame]	47	std::string errorMsg;
				48	Status status = m_Runtime->LoadNetwork(m_NetworkId, std::move(optNet), errorMsg, networkProperties);
				49	if (status != Status::Success)
				50	{
				51	std::string message("Failed to create Arm NN Executor: ");
				52	message.append(errorMsg);
				53	// Throwing an exception at this point in the constructor causes lots of problems. We'll instead mark this
				54	// executor as not constructed.
				55	ARMNN_LOG(fatal) << message;
				56	m_constructionFailed = true;
				57	return;
				58	}
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	59
				60	if (m_Params.m_Iterations > 1)
				61	{
				62	std::stringstream msg;
				63	msg << "Network will be executed " << m_Params.m_Iterations;
				64	if (m_Params.m_Concurrent)
				65	{
				66	msg << " times in an asynchronous manner. ";
				67	}
				68	else
				69	{
				70	msg << " times successively. ";
				71	}
				72	msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to "
				73	"cover each execution.";
				74	ARMNN_LOG(info) << msg.str();
				75	}
				76
				77	if (m_Params.m_GenerateTensorData)
				78	{
				79	ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
				80	}
				81
				82	if (m_Params.m_DontPrintOutputs)
				83	{
				84	ARMNN_LOG(info) << "Printing outputs to console is disabled.";
				85	}
				86	}
				87
				88	void ArmNNExecutor::ExecuteAsync()
				89	{
				90	std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
				91	std::unique_ptr<armnn::Threadpool> threadpool;
				92	armnn::AsyncCallbackManager callbackManager;
				93	std::unordered_map<armnn::InferenceId, const armnn::OutputTensors*> inferenceOutputMap;
				94
				95	for (size_t i = 0; i < m_Params.m_ThreadPoolSize; ++i)
				96	{
				97	memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkId));
				98	}
				99
				100	threadpool = std::make_unique<armnn::Threadpool>(m_Params.m_ThreadPoolSize,
				101	m_Runtime.get(),
				102	memHandles);
				103
				104	ARMNN_LOG(info) << "Asynchronous Execution with Arm NN thread pool... \n";
				105	// Declare the latest and earliest inference times here to be used when calculating overall time
				106	std::chrono::high_resolution_clock::time_point earliestStartTime =
				107	std::chrono::high_resolution_clock::time_point::max();
				108	std::chrono::high_resolution_clock::time_point latestEndTime =
				109	std::chrono::high_resolution_clock::now();
				110
				111	// For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the
				112	// LoadedNetwork with each scheduled inference having a specific priority
				113	for (size_t i = 0; i < m_Params.m_Iterations; ++i)
				114	{
				115	std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
				116
				117	std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.GetNewCallback();
				118	inferenceOutputMap.insert({cb->GetInferenceId(), &m_OutputTensorsVec[i]});
				119	threadpool->Schedule(m_NetworkId,
				120	m_InputTensorsVec[i],
				121	m_OutputTensorsVec[i],
				122	armnn::QosExecPriority::Medium,
				123	cb);
				124	}
				125
				126	// Check the results
				127	for (size_t iteration = 0; iteration < m_Params.m_Iterations; ++iteration)
				128	{
				129	auto cb = callbackManager.GetNotifiedCallback();
				130
				131	// Get the results
				132	if (earliestStartTime > cb->GetStartTime())
				133	{
				134	earliestStartTime = cb->GetStartTime();
				135	}
				136	if (latestEndTime < cb->GetEndTime())
				137	{
				138	latestEndTime = cb->GetEndTime();
				139	}
				140
				141	auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
				142	auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
				143	auto inferenceDuration = endTime - startTime;
				144	CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
				145	if(!m_Params.m_DontPrintOutputs)
				146	{
				147	const armnn::OutputTensors* out = inferenceOutputMap[cb->GetInferenceId()];
				148	PrintOutputTensors(out, iteration);
				149	}
				150	}
				151
				152	// Print duration difference between overallStartTime and overallEndTime
				153	auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
				154	auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
				155	auto totalInferenceDuration = overallEndTime - overallStartTime;
				156	ARMNN_LOG(info) << "Overall Inference time: " << std::setprecision(2)
				157	<< std::fixed << totalInferenceDuration.count() << " ms\n";
				158
				159	}
				160
				161	void ArmNNExecutor::ExecuteSync()
				162	{
				163	for (size_t x = 0; x < m_Params.m_Iterations; x++)
				164	{
				165	std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
				166
				167	const auto start_time = armnn::GetTimeNow();
				168	armnn::Status ret;
				169	if (m_Params.m_ImportInputsIfAligned)
				170	{
				171	ret = m_Runtime->EnqueueWorkload(m_NetworkId,
				172	m_InputTensorsVec[x],
				173	m_OutputTensorsVec[x],
				174	m_ImportedInputIds[x],
				175	m_ImportedOutputIds[x]);
				176	}
				177	else
				178	{
				179	ret = m_Runtime->EnqueueWorkload(m_NetworkId,
				180	m_InputTensorsVec[x],
				181	m_OutputTensorsVec[x]);
				182	}
				183
				184	const auto inferenceDuration = armnn::GetTimeDuration(start_time);
				185
				186	// If profiling is enabled print out the results
Kevin May	251fd95	2022-10-05 14:42:55 +0100	[diff] [blame]	187	if(profiler && profiler->IsProfilingEnabled() && x == (m_Params.m_Iterations - 1))
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	188	{
				189	profiler->Print(std::cout);
				190	}
				191
				192	if(ret == armnn::Status::Failure)
				193	{
				194	throw armnn::Exception("IRuntime::EnqueueWorkload failed");
				195	}
				196
				197	if(!m_Params.m_DontPrintOutputs)
				198	{
				199	PrintOutputTensors(&m_OutputTensorsVec[x], x);
				200	}
				201
				202	// If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
				203	CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
				204	}
				205	}
				206
				207	std::vector<const void*> ArmNNExecutor::Execute()
				208	{
				209	if(m_Params.m_ThreadPoolSize == 0)
				210	{
				211	ExecuteSync();
				212	}
				213	else
				214	{
				215	ExecuteAsync();
				216	}
				217	std::vector<const void*> results;
				218	for (auto& output : m_OutputStorage)
				219	{
				220	results.push_back(output.m_Mem);
				221	}
				222
				223	return results;
				224	}
				225
				226	void ArmNNExecutor::PrintNetworkInfo()
				227	{
				228	const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
				229	m_Params.m_InputNames :
				230	m_IOInfo.m_InputNames;
				231	std::stringstream ss;
				232	ss << "===== Network Info =====\n";
				233	ss << "Inputs in order:\n";
				234	for (const auto& inputName : inputNames)
				235	{
				236	const auto inputInfo = m_IOInfo.m_InputInfoMap[inputName].second;
				237	ss << inputName << ", " << inputInfo.GetShape() << ", " << GetDataTypeName(inputInfo.GetDataType());
				238	if (inputInfo.IsQuantized())
				239	{
				240	ss << " Quantization Offset: " << inputInfo.GetQuantizationOffset();
				241	if (inputInfo.HasMultipleQuantizationScales())
				242	{
				243	ss << " Quantization scales: ";
				244	for (const auto scale: inputInfo.GetQuantizationScales())
				245	{
				246	ss << scale << ", ";
				247	}
				248	}
				249	else
				250	{
				251	ss << " Quantization scale: " << inputInfo.GetQuantizationScale();
				252	}
				253	}
				254	ss << "\n";
				255	}
				256
				257	ss << "Outputs in order:\n";
				258	for (const auto& outputName : m_IOInfo.m_OutputNames)
				259	{
				260	const auto outputInfo = m_IOInfo.m_OutputInfoMap[outputName].second;
				261	ss << outputName << ", " << outputInfo.GetShape() << ", " << GetDataTypeName(outputInfo.GetDataType());
				262	if (outputInfo.IsQuantized())
				263	{
				264	ss << " Quantization Offset: " << outputInfo.GetQuantizationOffset();
				265	if (outputInfo.HasMultipleQuantizationScales())
				266	{
				267	ss << " Quantization scales: ";
				268	for (const auto scale: outputInfo.GetQuantizationScales())
				269	{
				270	ss << scale << ", ";
				271	}
				272	}
				273	else
				274	{
				275	ss << " Quantization scale: " << outputInfo.GetQuantizationScale();
				276	}
				277	}
				278	ss << "\n";
				279	}
				280
				281	std::cout << ss.str() << std::endl;
				282	}
				283
				284	void ArmNNExecutor::SetupInputsAndOutputs()
				285	{
				286	const unsigned int noOfInputs = m_IOInfo.m_InputNames.size();
				287
				288	if (m_Params.m_InputNames.size() != 0 && m_Params.m_InputNames.size() != noOfInputs)
				289	{
				290	LogAndThrow("Number of input names does not match number of inputs");
				291	}
				292
				293	const unsigned int inputFilePaths = m_Params.m_InputTensorDataFilePaths.size();
				294	const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
				295	m_Params.m_InputNames :
				296	m_IOInfo.m_InputNames;
				297	unsigned int noInputSets = 1;
				298
				299	if (inputFilePaths != 0)
				300	{
				301	if (inputFilePaths % noOfInputs != 0)
				302	{
				303	LogAndThrow("Number of input files: " + std::to_string(inputFilePaths) +
				304	" not compatible with number of inputs: " + std::to_string(noOfInputs));
				305	}
				306	noInputSets = inputFilePaths / noOfInputs;
				307	if (noInputSets != 1 && m_Params.m_ReuseBuffers)
				308	{
				309	LogAndThrow("Specifying multiple sets of inputs not compatible with ReuseBuffers");
				310	}
				311	}
				312
				313	const unsigned int noOfOutputs = m_IOInfo.m_OutputNames.size();
				314	const unsigned int outputFilePaths = m_Params.m_OutputTensorFiles.size();
				315	unsigned int noOutputSets = 1;
				316
				317	if (outputFilePaths != 0)
				318	{
				319	if (outputFilePaths % noOfOutputs != 0)
				320	{
				321	LogAndThrow("Number of output files: " + std::to_string(outputFilePaths) +
				322	", not compatible with number of outputs: " + std::to_string(noOfOutputs));
				323	}
				324	noOutputSets = outputFilePaths / noOfOutputs;
				325
				326	if (noOutputSets != 1 && m_Params.m_ReuseBuffers)
				327	{
				328	LogAndThrow("Specifying multiple sets of outputs not compatible with ReuseBuffers");
				329	}
				330	}
				331
				332	if (m_Params.m_ThreadPoolSize != 0)
				333	{
				334	// The current implementation of the Threadpool does not allow binding of outputs to a thread
				335	// So to ensure no two threads write to the same output at the same time, no output can be reused
				336	noOutputSets = m_Params.m_Iterations;
				337	}
				338
				339	if (m_Params.m_InputTensorDataFilePaths.size() > noOfInputs)
				340	{
				341	ARMNN_LOG(info) << "Given network has " << noOfInputs << " input/s. One input-tensor-data file is required "
				342	<< "for each input. The user provided "
				343	<< m_Params.m_InputTensorDataFilePaths.size()
				344	<< " input-tensor-data file/s which will be used to fill the input/s.\n";
				345	}
				346
				347	unsigned int inputCount = 0;
				348	for(unsigned int inputSet = 0; inputSet < noInputSets; ++inputSet)
				349	{
				350	armnn::InputTensors inputTensors;
				351	for (const auto& inputName: inputNames)
				352	{
				353	armnn::BindingPointInfo bindingPointInfo;
				354	try
				355	{
				356	bindingPointInfo = m_IOInfo.m_InputInfoMap.at(inputName);
				357	}
				358	catch (const std::out_of_range& e)
				359	{
				360	LogAndThrow("Input with inputName: " + inputName + " not found.");
				361	}
				362
				363	const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
				364	auto newInfo = armnn::TensorInfo{tensorInfo.GetShape(), tensorInfo.GetDataType(),
				365	tensorInfo.GetQuantizationScale(),
				366	tensorInfo.GetQuantizationOffset(),
				367	true};
				368
				369	m_InputStorage.emplace_back(IOStorage{tensorInfo.GetNumBytes()});
				370
				371	const int bindingId = bindingPointInfo.first;
				372	inputTensors.emplace_back(bindingId, armnn::ConstTensor{newInfo, m_InputStorage.back().m_Mem});
				373
				374	const armnn::Optional<std::string> dataFile = m_Params.m_GenerateTensorData ?
				375	armnn::EmptyOptional() :
				376	armnn::MakeOptional<std::string>(
				377	m_Params.m_InputTensorDataFilePaths.at(inputCount++));
				378
				379	switch (tensorInfo.GetDataType())
				380	{
				381	case armnn::DataType::Float32:
				382	{
				383	auto typedTensor = reinterpret_cast<float*>(m_InputStorage.back().m_Mem);
				384	PopulateTensorWithData<float>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
				385	break;
				386	}
				387	case armnn::DataType::QSymmS16:
				388	{
				389	auto typedTensor = reinterpret_cast<int16_t*>(m_InputStorage.back().m_Mem);
				390	PopulateTensorWithData<int16_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
				391	break;
				392	}
				393	case armnn::DataType::QSymmS8:
				394	case armnn::DataType::QAsymmS8:
				395	{
				396	auto typedTensor = reinterpret_cast<int8_t*>(m_InputStorage.back().m_Mem);
				397	PopulateTensorWithData<int8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
				398	break;
				399	}
				400	case armnn::DataType::QAsymmU8:
				401	{
				402	auto typedTensor = reinterpret_cast<uint8_t*>(m_InputStorage.back().m_Mem);
				403	PopulateTensorWithData<uint8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
				404	break;
				405	}
				406	case armnn::DataType::Signed32:
				407	{
				408	auto typedTensor = reinterpret_cast<int32_t*>(m_InputStorage.back().m_Mem);
				409	PopulateTensorWithData<int32_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
				410	break;
				411	}
				412	default:
				413	{
				414	LogAndThrow("Unexpected DataType");
				415	}
				416	}
				417
				418	if (m_Params.m_ImportInputsIfAligned)
				419	{
				420	m_ImportedInputIds.push_back(
				421	m_Runtime->ImportInputs(m_NetworkId, m_InputTensorsVec.back(), armnn::MemorySource::Malloc));
				422	}
				423	}
				424	m_InputTensorsVec.emplace_back(inputTensors);
				425	}
				426
				427	for(unsigned int outputSet = 0; outputSet < noOutputSets; ++outputSet)
				428	{
				429	armnn::OutputTensors outputTensors;
				430	for (const auto& output: m_IOInfo.m_OutputInfoMap)
				431	{
				432	const armnn::BindingPointInfo& bindingPointInfo = output.second;
				433	const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
				434
				435	m_OutputStorage.emplace_back(tensorInfo.GetNumBytes());
				436	outputTensors.emplace_back(bindingPointInfo.first, armnn::Tensor{tensorInfo, m_OutputStorage.back().m_Mem});
				437	}
				438	m_OutputTensorsVec.emplace_back(outputTensors);
				439	if (m_Params.m_ImportInputsIfAligned)
				440	{
				441	m_ImportedOutputIds.push_back(
				442	m_Runtime->ImportOutputs(m_NetworkId, m_OutputTensorsVec.back(), armnn::MemorySource::Malloc));
				443	}
				444	}
				445
Teresa Charlin	2050842	2022-10-26 14:03:08 +0100	[diff] [blame^]	446	// If iterations > noSets fill the remaining iterations repeating the given files
				447	// If iterations < noSets just ignore the extra files
				448	const unsigned int remainingInputSets = (m_Params.m_Iterations > noInputSets)
				449	? m_Params.m_Iterations - noInputSets
				450	: 0;
				451	for (unsigned int i = 0; i < remainingInputSets; ++i)
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	452	{
Teresa Charlin	2050842	2022-10-26 14:03:08 +0100	[diff] [blame^]	453	m_InputTensorsVec.push_back(m_InputTensorsVec[i % noInputSets]);
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	454	if (m_Params.m_ImportInputsIfAligned)
				455	{
Teresa Charlin	2050842	2022-10-26 14:03:08 +0100	[diff] [blame^]	456	m_ImportedInputIds.push_back(m_ImportedInputIds[i % noInputSets]);
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	457	}
				458	}
				459
Teresa Charlin	2050842	2022-10-26 14:03:08 +0100	[diff] [blame^]	460	const unsigned int remainingOutputSets = (m_Params.m_Iterations > noOutputSets)
				461	? m_Params.m_Iterations - noOutputSets
				462	: 0;
				463	for (unsigned int i = 0; i < remainingOutputSets; ++i)
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	464	{
Teresa Charlin	2050842	2022-10-26 14:03:08 +0100	[diff] [blame^]	465	m_OutputTensorsVec.push_back(m_OutputTensorsVec[i % noOutputSets]);
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	466	if (m_Params.m_ImportInputsIfAligned)
				467	{
Teresa Charlin	2050842	2022-10-26 14:03:08 +0100	[diff] [blame^]	468	m_ImportedOutputIds.push_back(m_ImportedOutputIds[i % noOutputSets]);
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	469	}
				470	}
				471	}
				472
				473	ArmNNExecutor::IOInfo ArmNNExecutor::GetIOInfo(armnn::IOptimizedNetwork* optNet)
				474	{
				475	struct IOStrategy : armnn::IStrategy
				476	{
				477	void ExecuteStrategy(const armnn::IConnectableLayer* layer,
				478	const armnn::BaseDescriptor& descriptor,
				479	const std::vector<armnn::ConstTensor>& constants,
				480	const char* name,
				481	const armnn::LayerBindingId id = 0) override
				482	{
				483	armnn::IgnoreUnused(descriptor, constants, id);
				484	switch (layer->GetType())
				485	{
				486	case armnn::LayerType::Input:
				487	{
				488	m_IOInfo.m_InputNames.emplace_back(name);
				489	m_IOInfo.m_InputInfoMap[name] = {id, layer->GetOutputSlot(0).GetTensorInfo()};
				490	break;
				491	}
				492	case armnn::LayerType::Output:
				493	{
				494	m_IOInfo.m_OutputNames.emplace_back(name);
				495	m_IOInfo.m_OutputInfoMap[name] = {id, layer->GetInputSlot(0).GetConnection()->GetTensorInfo()};
				496	break;
				497	}
				498	default: {}
				499	}
				500	}
				501	IOInfo m_IOInfo;
				502	};
				503
				504	IOStrategy ioStrategy;
				505	optNet->ExecuteStrategy(ioStrategy);
				506
				507	return ioStrategy.m_IOInfo;
				508	}
				509
				510	armnn::IOptimizedNetworkPtr ArmNNExecutor::OptimizeNetwork(armnn::INetwork* network)
				511	{
				512	armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork*){}};
				513
				514	armnn::OptimizerOptions options;
				515	options.m_ReduceFp32ToFp16 = m_Params.m_EnableFp16TurboMode;
				516	options.m_ReduceFp32ToBf16 = m_Params.m_EnableBf16TurboMode;
				517	options.m_Debug = m_Params.m_PrintIntermediate;
Keith Davis	15f9c68	2022-10-14 15:50:33 +0100	[diff] [blame]	518	options.m_DebugToFile = m_Params.m_PrintIntermediateOutputsToFile;
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	519	options.m_shapeInferenceMethod = m_Params.m_InferOutputShape ?
				520	armnn::ShapeInferenceMethod::InferAndValidate :
				521	armnn::ShapeInferenceMethod::ValidateOnly;
				522	options.m_ProfilingEnabled = m_Params.m_EnableProfiling;
Jim Flynn	fcc72f5	2022-10-14 11:20:07 +0100	[diff] [blame]	523	options.m_AllowExpandedDims = m_Params.m_AllowExpandedDims;
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	524
				525	armnn::BackendOptions gpuAcc("GpuAcc",
				526	{
				527	{ "FastMathEnabled", m_Params.m_EnableFastMath },
				528	{ "SaveCachedNetwork", m_Params.m_SaveCachedNetwork },
				529	{ "CachedNetworkFilePath", m_Params.m_CachedNetworkFilePath },
				530	{ "MLGOTuningFilePath", m_Params.m_MLGOTuningFilePath }
				531	});
				532
				533	armnn::BackendOptions cpuAcc("CpuAcc",
				534	{
				535	{ "FastMathEnabled", m_Params.m_EnableFastMath },
				536	{ "NumberOfThreads", m_Params.m_NumberOfThreads }
				537	});
				538	options.m_ModelOptions.push_back(gpuAcc);
				539	options.m_ModelOptions.push_back(cpuAcc);
Jim Flynn	fcc72f5	2022-10-14 11:20:07 +0100	[diff] [blame]	540	// The shapeInferenceMethod and allowExpandedDims values have to be added to the model options
				541	// because these are what are passed to the OptimizeSubgraphViews method and are used to create
				542	// the new optimized INetwork that method uses
				543	armnn::BackendOptions allowExDimOpt("AllowExpandedDims",
				544	{
				545	{ "AllowExpandedDims", m_Params.m_AllowExpandedDims }
				546	});
				547	options.m_ModelOptions.push_back(allowExDimOpt);
				548	armnn::BackendOptions shapeInferOpt("ShapeInferenceMethod",
				549	{
				550	{ "InferAndValidate", m_Params.m_InferOutputShape }
				551	});
				552	options.m_ModelOptions.push_back(shapeInferOpt);
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	553
				554	const auto optimization_start_time = armnn::GetTimeNow();
				555	optNet = armnn::Optimize(*network, m_Params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
				556
				557	ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2)
				558	<< std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms\n";
				559
				560	if (!optNet)
				561	{
				562	LogAndThrow("Optimize returned nullptr");
				563	}
				564
Teresa Charlin	98d3fd8	2022-08-02 14:17:39 +0100	[diff] [blame]	565	// If v,visualize-optimized-model is enabled then construct a file name for the dot file.
				566	if (m_Params.m_EnableLayerDetails)
				567	{
				568	fs::path filename = m_Params.m_ModelPath;
				569	filename.replace_extension("dot");
				570	std::fstream file(filename.c_str(), std::ios_base::out);
				571	optNet->SerializeToDot(file);
				572	}
				573
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	574	return optNet;
				575	}
				576
				577	std::unique_ptr<ArmNNExecutor::IParser> ArmNNExecutor::CreateParser()
				578	{
Adam Jalkemo	1e8187a	2022-10-12 15:14:04 +0200	[diff] [blame]	579	const fs::path modelFilename = m_Params.m_ModelPath;
				580	const std::string modelExtension = modelFilename.extension();
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	581
Adam Jalkemo	1e8187a	2022-10-12 15:14:04 +0200	[diff] [blame]	582	m_Params.m_IsModelBinary = modelExtension != ".json";
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	583	std::unique_ptr<IParser> parser = nullptr;
				584	// Forward to implementation based on the parser type
Adam Jalkemo	1e8187a	2022-10-12 15:14:04 +0200	[diff] [blame]	585	if (modelExtension == ".armnn")
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	586	{
				587	#if defined(ARMNN_SERIALIZER)
				588	parser = std::make_unique<ArmNNDeserializer>();
				589	#else
				590	LogAndThrow("Not built with serialization support.");
				591	#endif
				592	}
Adam Jalkemo	1e8187a	2022-10-12 15:14:04 +0200	[diff] [blame]	593	else if (modelExtension == ".tflite")
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	594	{
				595	#if defined(ARMNN_TF_LITE_PARSER)
				596	parser = std::make_unique<TfliteParser>(m_Params);
				597	#else
				598	LogAndThrow("Not built with Tensorflow-Lite parser support.");
				599	#endif
				600	}
Adam Jalkemo	1e8187a	2022-10-12 15:14:04 +0200	[diff] [blame]	601	else if (modelExtension == ".onnx")
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	602	{
				603	#if defined(ARMNN_ONNX_PARSER)
				604	parser = std::make_unique<OnnxParser>();
				605	#else
				606	LogAndThrow("Not built with Onnx parser support.");
				607	#endif
				608	}
				609
				610	return parser;
				611	}
				612
				613	void ArmNNExecutor::PrintOutputTensors(const armnn::OutputTensors* outputTensors,
				614	unsigned int iteration)
				615	{
				616	auto findOutputName = [&](const armnn::LayerBindingId id)
				617	{
				618	for (auto it = m_IOInfo.m_OutputInfoMap.begin(); it != m_IOInfo.m_OutputInfoMap.end(); ++it)
				619	{
				620	if (id == it->second.first)
				621	{
				622	return it->first;
				623	}
				624	}
				625	return std::string{};
				626	};
				627
				628	unsigned int outputIndex = 0;
				629	unsigned int numOutputs = outputTensors->size();
				630	for (const auto& output: *outputTensors)
				631	{
				632	const auto bindingName = findOutputName(output.first);
				633	// We've made sure before that the number of output files either equals numOutputs, in which
				634	// case we override those files when processing the results of each iteration (only the result
				635	// of the last iteration will be stored), or there are enough
				636	// output files for each output of each iteration.
				637	size_t outputFileIndex = iteration * numOutputs + outputIndex;
				638	if (!m_Params.m_OutputTensorFiles.empty())
				639	{
				640	outputFileIndex = outputFileIndex % m_Params.m_OutputTensorFiles.size();
				641	ARMNN_LOG(info) << "Writing output: " << bindingName << " bindingId: '"
				642	<< output.first
				643	<< "' of iteration: " << iteration + 1 << " to file: '"
				644	<< m_Params.m_OutputTensorFiles[outputFileIndex] << "'";
				645	}
				646
				647	const armnn::Optional<std::string> outputTensorFile = m_Params.m_OutputTensorFiles.empty() ?
				648	armnn::EmptyOptional() :
				649	armnn::MakeOptional<std::string>(
				650	m_Params.m_OutputTensorFiles[outputFileIndex]);
				651
				652	OutputWriteInfo outputWriteInfo
				653	{
				654	outputTensorFile,
				655	bindingName,
				656	output.second,
				657	!m_Params.m_DontPrintOutputs
				658	};
				659
				660	std::cout << bindingName << ": ";
				661	std::vector<float> values;
				662	switch (output.second.GetDataType())
				663	{
				664	case armnn::DataType::Float32:
				665	{
				666	PrintTensor<float>(outputWriteInfo, "%f ");
				667	break;
				668	}
				669
				670	case armnn::DataType::Signed32:
				671	{
				672	PrintTensor<int>(outputWriteInfo, "%d ");
				673	break;
				674	}
				675	case armnn::DataType::QSymmS8:
				676	case armnn::DataType::QAsymmS8:
				677	{
				678	PrintTensor<int8_t>(outputWriteInfo, "%d ");
				679	break;
				680	}
				681	case armnn::DataType::QAsymmU8:
				682	{
				683	PrintTensor<uint8_t>(outputWriteInfo, "%d ");
				684	break;
				685	}
				686	case armnn::DataType::Float16:
				687	case armnn::DataType::QSymmS16:
				688	case armnn::DataType::BFloat16:
				689	case armnn::DataType::Boolean:
				690	case armnn::DataType::Signed64:
				691	default:
				692	{
				693	LogAndThrow("Unexpected DataType");
				694	}
				695	}
				696	std::cout << "\n";
Adam Jalkemo	8f39363	2022-10-13 09:04:54 +0200	[diff] [blame]	697	++outputIndex;
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	698	}
				699	}
				700
				701	void ArmNNExecutor::CompareAndPrintResult(std::vector<const void*> otherOutput)
				702	{
				703	unsigned int index = 0;
				704
				705	for (const auto& outputTensors: m_OutputTensorsVec)
				706	{
				707	for (const auto& outputTensor: outputTensors)
				708	{
				709	float result = 0;
				710	size_t size = outputTensor.second.GetNumBytes();
				711
				712	switch (outputTensor.second.GetDataType())
				713	{
				714	case armnn::DataType::Float32:
				715	{
				716	result = ComputeRMSE<float>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
				717	break;
				718	}
Keith Davis	45b82a5	2022-10-04 11:53:04 +0100	[diff] [blame]	719	case armnn::DataType::Signed32:
				720	{
				721	result = ComputeRMSE<int32_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
				722	break;
				723	}
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	724	case armnn::DataType::QSymmS16:
				725	{
				726	result = ComputeRMSE<int16_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
				727	break;
				728	}
				729	case armnn::DataType::QSymmS8:
Teresa Charlin	98d3fd8	2022-08-02 14:17:39 +0100	[diff] [blame]	730	case armnn::DataType::QAsymmS8:
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	731	{
				732	result = ComputeRMSE<int8_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
				733	break;
				734	}
				735	case armnn::DataType::QAsymmU8:
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	736	{
				737	result = ComputeRMSE<uint8_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
				738	break;
				739	}
				740	default:
				741	{
				742	LogAndThrow("Unexpected DataType");
				743	}
				744	}
				745	std::cout << "RMSE: of " << result << "\n";
				746	}
				747	}
				748	}
				749	#if defined(ARMNN_SERIALIZER)
				750	ArmNNExecutor::ArmNNDeserializer::ArmNNDeserializer() : m_Parser(armnnDeserializer::IDeserializer::Create()){}
				751
				752	armnn::INetworkPtr ArmNNExecutor::ArmNNDeserializer::CreateNetwork(const ExecuteNetworkParams& params)
				753	{
				754	const std::string& modelPath = params.m_ModelPath;
				755
				756	std::ifstream file(modelPath, std::ios::binary);
				757	return m_Parser->CreateNetworkFromBinary(file);
				758	}
				759
				760	armnn::BindingPointInfo
				761	ArmNNExecutor::ArmNNDeserializer::GetInputBindingPointInfo(size_t, const std::string& inputName)
				762	{
				763	armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkInputBindingInfo(0, inputName);
				764	return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
				765	}
				766
				767	armnn::BindingPointInfo
				768	ArmNNExecutor::ArmNNDeserializer::GetOutputBindingPointInfo(size_t, const std::string& outputName)
				769	{
				770	armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkOutputBindingInfo(0, outputName);
				771	return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
				772	}
				773	#endif
				774
				775	#if defined(ARMNN_TF_LITE_PARSER)
				776	ArmNNExecutor::TfliteParser::TfliteParser(const ExecuteNetworkParams& params)
				777	{
				778	armnnTfLiteParser::ITfLiteParser::TfLiteParserOptions options;
				779	options.m_StandInLayerForUnsupported = params.m_ParseUnsupported;
				780	options.m_InferAndValidate = params.m_InferOutputShape;
Jim Flynn	fcc72f5	2022-10-14 11:20:07 +0100	[diff] [blame]	781	options.m_AllowExpandedDims = params.m_AllowExpandedDims;
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	782
				783	m_Parser = armnnTfLiteParser::ITfLiteParser::Create(options);
				784	}
				785
				786	armnn::INetworkPtr ArmNNExecutor::TfliteParser::CreateNetwork(const ExecuteNetworkParams& params)
				787	{
				788	const std::string& modelPath = params.m_ModelPath;
				789	return m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str());
				790	}
				791
				792	armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetInputBindingPointInfo(size_t subgraphId,
				793	const std::string& inputName)
				794	{
				795	return m_Parser->GetNetworkInputBindingInfo(subgraphId, inputName);
				796	}
				797
				798	armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetOutputBindingPointInfo(size_t subgraphId,
				799	const std::string& outputName)
				800	{
				801	return m_Parser->GetNetworkOutputBindingInfo(subgraphId, outputName);
				802	}
				803	#endif
				804
				805
				806	#if defined(ARMNN_ONNX_PARSER)
				807	ArmNNExecutor::OnnxParser::OnnxParser() : m_Parser(armnnOnnxParser::IOnnxParser::Create()){}
				808
				809	armnn::INetworkPtr ArmNNExecutor::OnnxParser::CreateNetwork(const ExecuteNetworkParams& params)
				810	{
				811	const std::string& modelPath = params.m_ModelPath;
				812	m_Parser = armnnOnnxParser::IOnnxParser::Create();
				813	std::map<std::string, armnn::TensorShape> inputShapes;
				814	if(!params.m_InputTensorShapes.empty())
				815	{
				816	const size_t numInputShapes = params.m_InputTensorShapes.size();
				817	const size_t numInputBindings = params.m_InputNames.size();
				818	if(numInputShapes < numInputBindings)
				819	{
				820	throw armnn::Exception(
				821	fmt::format("Not every input has its tensor shape specified: expected={0}, got={1}",
				822	numInputBindings, numInputShapes));
				823	}
				824
				825	for (size_t i = 0; i < numInputShapes; i++)
				826	{
				827	inputShapes[params.m_InputNames[i]] = params.m_InputTensorShapes[i];
				828	}
				829
				830	return params.m_IsModelBinary ?
				831	m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes) :
				832	m_Parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes);
				833	}
				834
				835	// Handle text and binary input differently by calling the corresponding parser function
				836	return params.m_IsModelBinary ?
				837	m_Parser->CreateNetworkFromBinaryFile(params.m_ModelPath.c_str()) :
				838	m_Parser->CreateNetworkFromTextFile(params.m_ModelPath.c_str());
				839	}
				840
				841	armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetInputBindingPointInfo(size_t, const std::string& inputName)
				842	{
				843	return m_Parser->GetNetworkInputBindingInfo(inputName);
				844	}
				845
				846	armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetOutputBindingPointInfo(size_t, const std::string& outputName)
				847	{
				848	return m_Parser->GetNetworkOutputBindingInfo(outputName);
				849	}
				850	#endif