Blame - tests/ExecuteNetwork/ArmNNExecutor.cpp - ml/armnn

blob: 139da5f8300f1d3cc343cc849ee7522b2c3ac73e [file] [log] [blame]

Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	1	//
Ryan OShea	b554054	2022-07-06 09:52:52 +0100	[diff] [blame^]	2	// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	3	// SPDX-License-Identifier: MIT
				4	//
				5
				6
				7	#include "ArmNNExecutor.hpp"
				8	#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp"
				9
				10	#include <armnn/IAsyncExecutionCallback.hpp>
				11	#include <AsyncExecutionCallback.hpp>
				12
				13
				14	using namespace armnn;
				15	using namespace std::chrono;
				16
				17	ArmNNExecutor::ArmNNExecutor(const ExecuteNetworkParams& params, armnn::IRuntime::CreationOptions runtimeOptions)
				18	: m_Params(params)
				19	{
				20	runtimeOptions.m_EnableGpuProfiling = params.m_EnableProfiling;
				21	runtimeOptions.m_DynamicBackendsPath = params.m_DynamicBackendsPath;
				22	m_Runtime = armnn::IRuntime::Create(runtimeOptions);
				23
				24	auto parser = CreateParser();
				25	auto network = parser->CreateNetwork(m_Params);
				26	auto optNet = OptimizeNetwork(network.get());
				27
				28	m_IOInfo = GetIOInfo(optNet.get());
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	29
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	30	armnn::ProfilingDetailsMethod profilingDetailsMethod = ProfilingDetailsMethod::Undefined;
				31	if (params.m_OutputDetailsOnlyToStdOut)
				32	{
				33	profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsOnly;
				34	}
				35	else if (params.m_OutputDetailsToStdOut)
				36	{
				37	profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsWithEvents;
				38	}
				39
				40	INetworkProperties networkProperties{m_Params.m_Concurrent,
				41	MemorySource::Undefined,
				42	MemorySource::Undefined,
				43	params.m_EnableProfiling,
				44	profilingDetailsMethod};
				45
Colm Donelan	7804481	2022-09-27 16:46:09 +0100	[diff] [blame]	46	std::string errorMsg;
				47	Status status = m_Runtime->LoadNetwork(m_NetworkId, std::move(optNet), errorMsg, networkProperties);
				48	if (status != Status::Success)
				49	{
				50	std::string message("Failed to create Arm NN Executor: ");
				51	message.append(errorMsg);
				52	// Throwing an exception at this point in the constructor causes lots of problems. We'll instead mark this
				53	// executor as not constructed.
				54	ARMNN_LOG(fatal) << message;
				55	m_constructionFailed = true;
				56	return;
				57	}
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	58
Matthew Bentham	b4f5c23	2022-11-16 10:59:12 +0000	[diff] [blame]	59	SetupInputsAndOutputs();
				60
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	61	if (m_Params.m_Iterations > 1)
				62	{
				63	std::stringstream msg;
				64	msg << "Network will be executed " << m_Params.m_Iterations;
				65	if (m_Params.m_Concurrent)
				66	{
				67	msg << " times in an asynchronous manner. ";
				68	}
				69	else
				70	{
				71	msg << " times successively. ";
				72	}
				73	msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to "
				74	"cover each execution.";
				75	ARMNN_LOG(info) << msg.str();
				76	}
				77
				78	if (m_Params.m_GenerateTensorData)
				79	{
				80	ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
				81	}
				82
				83	if (m_Params.m_DontPrintOutputs)
				84	{
				85	ARMNN_LOG(info) << "Printing outputs to console is disabled.";
				86	}
				87	}
				88
				89	void ArmNNExecutor::ExecuteAsync()
				90	{
Ryan OShea	b554054	2022-07-06 09:52:52 +0100	[diff] [blame^]	91	#if !defined(ARMNN_DISABLE_THREADS)
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	92	std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
				93	std::unique_ptr<armnn::Threadpool> threadpool;
				94	armnn::AsyncCallbackManager callbackManager;
				95	std::unordered_map<armnn::InferenceId, const armnn::OutputTensors*> inferenceOutputMap;
				96
				97	for (size_t i = 0; i < m_Params.m_ThreadPoolSize; ++i)
				98	{
				99	memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkId));
				100	}
				101
				102	threadpool = std::make_unique<armnn::Threadpool>(m_Params.m_ThreadPoolSize,
				103	m_Runtime.get(),
				104	memHandles);
				105
				106	ARMNN_LOG(info) << "Asynchronous Execution with Arm NN thread pool... \n";
				107	// Declare the latest and earliest inference times here to be used when calculating overall time
				108	std::chrono::high_resolution_clock::time_point earliestStartTime =
				109	std::chrono::high_resolution_clock::time_point::max();
				110	std::chrono::high_resolution_clock::time_point latestEndTime =
				111	std::chrono::high_resolution_clock::now();
				112
				113	// For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the
				114	// LoadedNetwork with each scheduled inference having a specific priority
				115	for (size_t i = 0; i < m_Params.m_Iterations; ++i)
				116	{
				117	std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
				118
				119	std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.GetNewCallback();
				120	inferenceOutputMap.insert({cb->GetInferenceId(), &m_OutputTensorsVec[i]});
				121	threadpool->Schedule(m_NetworkId,
				122	m_InputTensorsVec[i],
				123	m_OutputTensorsVec[i],
				124	armnn::QosExecPriority::Medium,
				125	cb);
				126	}
				127
				128	// Check the results
				129	for (size_t iteration = 0; iteration < m_Params.m_Iterations; ++iteration)
				130	{
				131	auto cb = callbackManager.GetNotifiedCallback();
				132
				133	// Get the results
				134	if (earliestStartTime > cb->GetStartTime())
				135	{
				136	earliestStartTime = cb->GetStartTime();
				137	}
				138	if (latestEndTime < cb->GetEndTime())
				139	{
				140	latestEndTime = cb->GetEndTime();
				141	}
				142
				143	auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
				144	auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
				145	auto inferenceDuration = endTime - startTime;
				146	CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
				147	if(!m_Params.m_DontPrintOutputs)
				148	{
				149	const armnn::OutputTensors* out = inferenceOutputMap[cb->GetInferenceId()];
				150	PrintOutputTensors(out, iteration);
				151	}
				152	}
				153
				154	// Print duration difference between overallStartTime and overallEndTime
				155	auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
				156	auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
				157	auto totalInferenceDuration = overallEndTime - overallStartTime;
				158	ARMNN_LOG(info) << "Overall Inference time: " << std::setprecision(2)
				159	<< std::fixed << totalInferenceDuration.count() << " ms\n";
				160
Ryan OShea	b554054	2022-07-06 09:52:52 +0100	[diff] [blame^]	161	#endif
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	162	}
				163
				164	void ArmNNExecutor::ExecuteSync()
				165	{
				166	for (size_t x = 0; x < m_Params.m_Iterations; x++)
				167	{
				168	std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
				169
				170	const auto start_time = armnn::GetTimeNow();
				171	armnn::Status ret;
				172	if (m_Params.m_ImportInputsIfAligned)
				173	{
				174	ret = m_Runtime->EnqueueWorkload(m_NetworkId,
				175	m_InputTensorsVec[x],
				176	m_OutputTensorsVec[x],
				177	m_ImportedInputIds[x],
				178	m_ImportedOutputIds[x]);
				179	}
				180	else
				181	{
				182	ret = m_Runtime->EnqueueWorkload(m_NetworkId,
				183	m_InputTensorsVec[x],
				184	m_OutputTensorsVec[x]);
				185	}
				186
				187	const auto inferenceDuration = armnn::GetTimeDuration(start_time);
				188
				189	// If profiling is enabled print out the results
Kevin May	251fd95	2022-10-05 14:42:55 +0100	[diff] [blame]	190	if(profiler && profiler->IsProfilingEnabled() && x == (m_Params.m_Iterations - 1))
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	191	{
				192	profiler->Print(std::cout);
				193	}
				194
				195	if(ret == armnn::Status::Failure)
				196	{
				197	throw armnn::Exception("IRuntime::EnqueueWorkload failed");
				198	}
				199
				200	if(!m_Params.m_DontPrintOutputs)
				201	{
				202	PrintOutputTensors(&m_OutputTensorsVec[x], x);
				203	}
				204
				205	// If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
				206	CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
				207	}
				208	}
				209
				210	std::vector<const void*> ArmNNExecutor::Execute()
				211	{
				212	if(m_Params.m_ThreadPoolSize == 0)
				213	{
				214	ExecuteSync();
				215	}
				216	else
				217	{
				218	ExecuteAsync();
				219	}
				220	std::vector<const void*> results;
				221	for (auto& output : m_OutputStorage)
				222	{
				223	results.push_back(output.m_Mem);
				224	}
				225
				226	return results;
				227	}
				228
				229	void ArmNNExecutor::PrintNetworkInfo()
				230	{
				231	const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
				232	m_Params.m_InputNames :
				233	m_IOInfo.m_InputNames;
				234	std::stringstream ss;
				235	ss << "===== Network Info =====\n";
				236	ss << "Inputs in order:\n";
				237	for (const auto& inputName : inputNames)
				238	{
				239	const auto inputInfo = m_IOInfo.m_InputInfoMap[inputName].second;
				240	ss << inputName << ", " << inputInfo.GetShape() << ", " << GetDataTypeName(inputInfo.GetDataType());
				241	if (inputInfo.IsQuantized())
				242	{
				243	ss << " Quantization Offset: " << inputInfo.GetQuantizationOffset();
				244	if (inputInfo.HasMultipleQuantizationScales())
				245	{
				246	ss << " Quantization scales: ";
				247	for (const auto scale: inputInfo.GetQuantizationScales())
				248	{
				249	ss << scale << ", ";
				250	}
				251	}
				252	else
				253	{
				254	ss << " Quantization scale: " << inputInfo.GetQuantizationScale();
				255	}
				256	}
				257	ss << "\n";
				258	}
				259
				260	ss << "Outputs in order:\n";
				261	for (const auto& outputName : m_IOInfo.m_OutputNames)
				262	{
				263	const auto outputInfo = m_IOInfo.m_OutputInfoMap[outputName].second;
				264	ss << outputName << ", " << outputInfo.GetShape() << ", " << GetDataTypeName(outputInfo.GetDataType());
				265	if (outputInfo.IsQuantized())
				266	{
				267	ss << " Quantization Offset: " << outputInfo.GetQuantizationOffset();
				268	if (outputInfo.HasMultipleQuantizationScales())
				269	{
				270	ss << " Quantization scales: ";
				271	for (const auto scale: outputInfo.GetQuantizationScales())
				272	{
				273	ss << scale << ", ";
				274	}
				275	}
				276	else
				277	{
				278	ss << " Quantization scale: " << outputInfo.GetQuantizationScale();
				279	}
				280	}
				281	ss << "\n";
				282	}
				283
				284	std::cout << ss.str() << std::endl;
				285	}
				286
				287	void ArmNNExecutor::SetupInputsAndOutputs()
				288	{
				289	const unsigned int noOfInputs = m_IOInfo.m_InputNames.size();
				290
				291	if (m_Params.m_InputNames.size() != 0 && m_Params.m_InputNames.size() != noOfInputs)
				292	{
				293	LogAndThrow("Number of input names does not match number of inputs");
				294	}
				295
				296	const unsigned int inputFilePaths = m_Params.m_InputTensorDataFilePaths.size();
				297	const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
				298	m_Params.m_InputNames :
				299	m_IOInfo.m_InputNames;
				300	unsigned int noInputSets = 1;
				301
				302	if (inputFilePaths != 0)
				303	{
				304	if (inputFilePaths % noOfInputs != 0)
				305	{
				306	LogAndThrow("Number of input files: " + std::to_string(inputFilePaths) +
				307	" not compatible with number of inputs: " + std::to_string(noOfInputs));
				308	}
				309	noInputSets = inputFilePaths / noOfInputs;
				310	if (noInputSets != 1 && m_Params.m_ReuseBuffers)
				311	{
				312	LogAndThrow("Specifying multiple sets of inputs not compatible with ReuseBuffers");
				313	}
				314	}
				315
				316	const unsigned int noOfOutputs = m_IOInfo.m_OutputNames.size();
				317	const unsigned int outputFilePaths = m_Params.m_OutputTensorFiles.size();
				318	unsigned int noOutputSets = 1;
				319
				320	if (outputFilePaths != 0)
				321	{
				322	if (outputFilePaths % noOfOutputs != 0)
				323	{
				324	LogAndThrow("Number of output files: " + std::to_string(outputFilePaths) +
				325	", not compatible with number of outputs: " + std::to_string(noOfOutputs));
				326	}
				327	noOutputSets = outputFilePaths / noOfOutputs;
				328
				329	if (noOutputSets != 1 && m_Params.m_ReuseBuffers)
				330	{
				331	LogAndThrow("Specifying multiple sets of outputs not compatible with ReuseBuffers");
				332	}
				333	}
				334
				335	if (m_Params.m_ThreadPoolSize != 0)
				336	{
				337	// The current implementation of the Threadpool does not allow binding of outputs to a thread
				338	// So to ensure no two threads write to the same output at the same time, no output can be reused
				339	noOutputSets = m_Params.m_Iterations;
				340	}
				341
				342	if (m_Params.m_InputTensorDataFilePaths.size() > noOfInputs)
				343	{
				344	ARMNN_LOG(info) << "Given network has " << noOfInputs << " input/s. One input-tensor-data file is required "
				345	<< "for each input. The user provided "
				346	<< m_Params.m_InputTensorDataFilePaths.size()
				347	<< " input-tensor-data file/s which will be used to fill the input/s.\n";
				348	}
				349
				350	unsigned int inputCount = 0;
				351	for(unsigned int inputSet = 0; inputSet < noInputSets; ++inputSet)
				352	{
				353	armnn::InputTensors inputTensors;
				354	for (const auto& inputName: inputNames)
				355	{
				356	armnn::BindingPointInfo bindingPointInfo;
				357	try
				358	{
				359	bindingPointInfo = m_IOInfo.m_InputInfoMap.at(inputName);
				360	}
				361	catch (const std::out_of_range& e)
				362	{
				363	LogAndThrow("Input with inputName: " + inputName + " not found.");
				364	}
				365
				366	const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
				367	auto newInfo = armnn::TensorInfo{tensorInfo.GetShape(), tensorInfo.GetDataType(),
				368	tensorInfo.GetQuantizationScale(),
				369	tensorInfo.GetQuantizationOffset(),
				370	true};
				371
				372	m_InputStorage.emplace_back(IOStorage{tensorInfo.GetNumBytes()});
				373
				374	const int bindingId = bindingPointInfo.first;
				375	inputTensors.emplace_back(bindingId, armnn::ConstTensor{newInfo, m_InputStorage.back().m_Mem});
				376
				377	const armnn::Optional<std::string> dataFile = m_Params.m_GenerateTensorData ?
				378	armnn::EmptyOptional() :
				379	armnn::MakeOptional<std::string>(
				380	m_Params.m_InputTensorDataFilePaths.at(inputCount++));
				381
				382	switch (tensorInfo.GetDataType())
				383	{
				384	case armnn::DataType::Float32:
				385	{
				386	auto typedTensor = reinterpret_cast<float*>(m_InputStorage.back().m_Mem);
				387	PopulateTensorWithData<float>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
				388	break;
				389	}
				390	case armnn::DataType::QSymmS16:
				391	{
				392	auto typedTensor = reinterpret_cast<int16_t*>(m_InputStorage.back().m_Mem);
				393	PopulateTensorWithData<int16_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
				394	break;
				395	}
				396	case armnn::DataType::QSymmS8:
				397	case armnn::DataType::QAsymmS8:
				398	{
				399	auto typedTensor = reinterpret_cast<int8_t*>(m_InputStorage.back().m_Mem);
				400	PopulateTensorWithData<int8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
				401	break;
				402	}
				403	case armnn::DataType::QAsymmU8:
				404	{
				405	auto typedTensor = reinterpret_cast<uint8_t*>(m_InputStorage.back().m_Mem);
				406	PopulateTensorWithData<uint8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
				407	break;
				408	}
				409	case armnn::DataType::Signed32:
				410	{
				411	auto typedTensor = reinterpret_cast<int32_t*>(m_InputStorage.back().m_Mem);
				412	PopulateTensorWithData<int32_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
				413	break;
				414	}
				415	default:
				416	{
				417	LogAndThrow("Unexpected DataType");
				418	}
				419	}
				420
Matthew Bentham	b4f5c23	2022-11-16 10:59:12 +0000	[diff] [blame]	421	}
				422
				423	if (m_Params.m_ImportInputsIfAligned)
				424	{
				425	m_ImportedInputIds.push_back(
				426	m_Runtime->ImportInputs(m_NetworkId, inputTensors, armnn::MemorySource::Malloc));
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	427	}
				428	m_InputTensorsVec.emplace_back(inputTensors);
				429	}
				430
				431	for(unsigned int outputSet = 0; outputSet < noOutputSets; ++outputSet)
				432	{
				433	armnn::OutputTensors outputTensors;
				434	for (const auto& output: m_IOInfo.m_OutputInfoMap)
				435	{
				436	const armnn::BindingPointInfo& bindingPointInfo = output.second;
				437	const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
				438
				439	m_OutputStorage.emplace_back(tensorInfo.GetNumBytes());
				440	outputTensors.emplace_back(bindingPointInfo.first, armnn::Tensor{tensorInfo, m_OutputStorage.back().m_Mem});
				441	}
				442	m_OutputTensorsVec.emplace_back(outputTensors);
				443	if (m_Params.m_ImportInputsIfAligned)
				444	{
				445	m_ImportedOutputIds.push_back(
				446	m_Runtime->ImportOutputs(m_NetworkId, m_OutputTensorsVec.back(), armnn::MemorySource::Malloc));
				447	}
				448	}
				449
Teresa Charlin	2050842	2022-10-26 14:03:08 +0100	[diff] [blame]	450	// If iterations > noSets fill the remaining iterations repeating the given files
				451	// If iterations < noSets just ignore the extra files
				452	const unsigned int remainingInputSets = (m_Params.m_Iterations > noInputSets)
				453	? m_Params.m_Iterations - noInputSets
				454	: 0;
				455	for (unsigned int i = 0; i < remainingInputSets; ++i)
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	456	{
Teresa Charlin	2050842	2022-10-26 14:03:08 +0100	[diff] [blame]	457	m_InputTensorsVec.push_back(m_InputTensorsVec[i % noInputSets]);
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	458	if (m_Params.m_ImportInputsIfAligned)
				459	{
Teresa Charlin	2050842	2022-10-26 14:03:08 +0100	[diff] [blame]	460	m_ImportedInputIds.push_back(m_ImportedInputIds[i % noInputSets]);
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	461	}
				462	}
				463
Teresa Charlin	2050842	2022-10-26 14:03:08 +0100	[diff] [blame]	464	const unsigned int remainingOutputSets = (m_Params.m_Iterations > noOutputSets)
				465	? m_Params.m_Iterations - noOutputSets
				466	: 0;
				467	for (unsigned int i = 0; i < remainingOutputSets; ++i)
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	468	{
Teresa Charlin	2050842	2022-10-26 14:03:08 +0100	[diff] [blame]	469	m_OutputTensorsVec.push_back(m_OutputTensorsVec[i % noOutputSets]);
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	470	if (m_Params.m_ImportInputsIfAligned)
				471	{
Teresa Charlin	2050842	2022-10-26 14:03:08 +0100	[diff] [blame]	472	m_ImportedOutputIds.push_back(m_ImportedOutputIds[i % noOutputSets]);
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	473	}
				474	}
				475	}
				476
				477	ArmNNExecutor::IOInfo ArmNNExecutor::GetIOInfo(armnn::IOptimizedNetwork* optNet)
				478	{
				479	struct IOStrategy : armnn::IStrategy
				480	{
				481	void ExecuteStrategy(const armnn::IConnectableLayer* layer,
				482	const armnn::BaseDescriptor& descriptor,
				483	const std::vector<armnn::ConstTensor>& constants,
				484	const char* name,
				485	const armnn::LayerBindingId id = 0) override
				486	{
				487	armnn::IgnoreUnused(descriptor, constants, id);
				488	switch (layer->GetType())
				489	{
				490	case armnn::LayerType::Input:
				491	{
				492	m_IOInfo.m_InputNames.emplace_back(name);
				493	m_IOInfo.m_InputInfoMap[name] = {id, layer->GetOutputSlot(0).GetTensorInfo()};
				494	break;
				495	}
				496	case armnn::LayerType::Output:
				497	{
				498	m_IOInfo.m_OutputNames.emplace_back(name);
				499	m_IOInfo.m_OutputInfoMap[name] = {id, layer->GetInputSlot(0).GetConnection()->GetTensorInfo()};
				500	break;
				501	}
				502	default: {}
				503	}
				504	}
				505	IOInfo m_IOInfo;
				506	};
				507
				508	IOStrategy ioStrategy;
				509	optNet->ExecuteStrategy(ioStrategy);
				510
				511	return ioStrategy.m_IOInfo;
				512	}
				513
				514	armnn::IOptimizedNetworkPtr ArmNNExecutor::OptimizeNetwork(armnn::INetwork* network)
				515	{
				516	armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork*){}};
				517
				518	armnn::OptimizerOptions options;
				519	options.m_ReduceFp32ToFp16 = m_Params.m_EnableFp16TurboMode;
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	520	options.m_Debug = m_Params.m_PrintIntermediate;
Keith Davis	15f9c68	2022-10-14 15:50:33 +0100	[diff] [blame]	521	options.m_DebugToFile = m_Params.m_PrintIntermediateOutputsToFile;
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	522	options.m_shapeInferenceMethod = m_Params.m_InferOutputShape ?
				523	armnn::ShapeInferenceMethod::InferAndValidate :
				524	armnn::ShapeInferenceMethod::ValidateOnly;
				525	options.m_ProfilingEnabled = m_Params.m_EnableProfiling;
Jim Flynn	fcc72f5	2022-10-14 11:20:07 +0100	[diff] [blame]	526	options.m_AllowExpandedDims = m_Params.m_AllowExpandedDims;
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	527
				528	armnn::BackendOptions gpuAcc("GpuAcc",
				529	{
				530	{ "FastMathEnabled", m_Params.m_EnableFastMath },
				531	{ "SaveCachedNetwork", m_Params.m_SaveCachedNetwork },
				532	{ "CachedNetworkFilePath", m_Params.m_CachedNetworkFilePath },
				533	{ "MLGOTuningFilePath", m_Params.m_MLGOTuningFilePath }
				534	});
				535
				536	armnn::BackendOptions cpuAcc("CpuAcc",
				537	{
				538	{ "FastMathEnabled", m_Params.m_EnableFastMath },
				539	{ "NumberOfThreads", m_Params.m_NumberOfThreads }
				540	});
				541	options.m_ModelOptions.push_back(gpuAcc);
				542	options.m_ModelOptions.push_back(cpuAcc);
Jim Flynn	fcc72f5	2022-10-14 11:20:07 +0100	[diff] [blame]	543	// The shapeInferenceMethod and allowExpandedDims values have to be added to the model options
				544	// because these are what are passed to the OptimizeSubgraphViews method and are used to create
				545	// the new optimized INetwork that method uses
				546	armnn::BackendOptions allowExDimOpt("AllowExpandedDims",
				547	{
				548	{ "AllowExpandedDims", m_Params.m_AllowExpandedDims }
				549	});
				550	options.m_ModelOptions.push_back(allowExDimOpt);
				551	armnn::BackendOptions shapeInferOpt("ShapeInferenceMethod",
				552	{
				553	{ "InferAndValidate", m_Params.m_InferOutputShape }
				554	});
				555	options.m_ModelOptions.push_back(shapeInferOpt);
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	556
				557	const auto optimization_start_time = armnn::GetTimeNow();
				558	optNet = armnn::Optimize(*network, m_Params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
				559
				560	ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2)
				561	<< std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms\n";
				562
				563	if (!optNet)
				564	{
				565	LogAndThrow("Optimize returned nullptr");
				566	}
				567
Teresa Charlin	98d3fd8	2022-08-02 14:17:39 +0100	[diff] [blame]	568	// If v,visualize-optimized-model is enabled then construct a file name for the dot file.
				569	if (m_Params.m_EnableLayerDetails)
				570	{
				571	fs::path filename = m_Params.m_ModelPath;
				572	filename.replace_extension("dot");
				573	std::fstream file(filename.c_str(), std::ios_base::out);
				574	optNet->SerializeToDot(file);
				575	}
				576
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	577	return optNet;
				578	}
				579
				580	std::unique_ptr<ArmNNExecutor::IParser> ArmNNExecutor::CreateParser()
				581	{
Adam Jalkemo	1e8187a	2022-10-12 15:14:04 +0200	[diff] [blame]	582	const fs::path modelFilename = m_Params.m_ModelPath;
				583	const std::string modelExtension = modelFilename.extension();
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	584
Adam Jalkemo	1e8187a	2022-10-12 15:14:04 +0200	[diff] [blame]	585	m_Params.m_IsModelBinary = modelExtension != ".json";
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	586	std::unique_ptr<IParser> parser = nullptr;
				587	// Forward to implementation based on the parser type
Adam Jalkemo	1e8187a	2022-10-12 15:14:04 +0200	[diff] [blame]	588	if (modelExtension == ".armnn")
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	589	{
				590	#if defined(ARMNN_SERIALIZER)
				591	parser = std::make_unique<ArmNNDeserializer>();
				592	#else
				593	LogAndThrow("Not built with serialization support.");
				594	#endif
				595	}
Adam Jalkemo	1e8187a	2022-10-12 15:14:04 +0200	[diff] [blame]	596	else if (modelExtension == ".tflite")
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	597	{
				598	#if defined(ARMNN_TF_LITE_PARSER)
				599	parser = std::make_unique<TfliteParser>(m_Params);
				600	#else
				601	LogAndThrow("Not built with Tensorflow-Lite parser support.");
				602	#endif
				603	}
Adam Jalkemo	1e8187a	2022-10-12 15:14:04 +0200	[diff] [blame]	604	else if (modelExtension == ".onnx")
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	605	{
				606	#if defined(ARMNN_ONNX_PARSER)
				607	parser = std::make_unique<OnnxParser>();
				608	#else
				609	LogAndThrow("Not built with Onnx parser support.");
				610	#endif
				611	}
				612
				613	return parser;
				614	}
				615
				616	void ArmNNExecutor::PrintOutputTensors(const armnn::OutputTensors* outputTensors,
				617	unsigned int iteration)
				618	{
				619	auto findOutputName = [&](const armnn::LayerBindingId id)
				620	{
				621	for (auto it = m_IOInfo.m_OutputInfoMap.begin(); it != m_IOInfo.m_OutputInfoMap.end(); ++it)
				622	{
				623	if (id == it->second.first)
				624	{
				625	return it->first;
				626	}
				627	}
				628	return std::string{};
				629	};
				630
				631	unsigned int outputIndex = 0;
				632	unsigned int numOutputs = outputTensors->size();
				633	for (const auto& output: *outputTensors)
				634	{
				635	const auto bindingName = findOutputName(output.first);
				636	// We've made sure before that the number of output files either equals numOutputs, in which
				637	// case we override those files when processing the results of each iteration (only the result
				638	// of the last iteration will be stored), or there are enough
				639	// output files for each output of each iteration.
				640	size_t outputFileIndex = iteration * numOutputs + outputIndex;
				641	if (!m_Params.m_OutputTensorFiles.empty())
				642	{
				643	outputFileIndex = outputFileIndex % m_Params.m_OutputTensorFiles.size();
				644	ARMNN_LOG(info) << "Writing output: " << bindingName << " bindingId: '"
				645	<< output.first
				646	<< "' of iteration: " << iteration + 1 << " to file: '"
				647	<< m_Params.m_OutputTensorFiles[outputFileIndex] << "'";
				648	}
				649
				650	const armnn::Optional<std::string> outputTensorFile = m_Params.m_OutputTensorFiles.empty() ?
				651	armnn::EmptyOptional() :
				652	armnn::MakeOptional<std::string>(
				653	m_Params.m_OutputTensorFiles[outputFileIndex]);
				654
				655	OutputWriteInfo outputWriteInfo
				656	{
				657	outputTensorFile,
				658	bindingName,
				659	output.second,
				660	!m_Params.m_DontPrintOutputs
				661	};
				662
				663	std::cout << bindingName << ": ";
				664	std::vector<float> values;
				665	switch (output.second.GetDataType())
				666	{
				667	case armnn::DataType::Float32:
				668	{
				669	PrintTensor<float>(outputWriteInfo, "%f ");
				670	break;
				671	}
				672
				673	case armnn::DataType::Signed32:
				674	{
				675	PrintTensor<int>(outputWriteInfo, "%d ");
				676	break;
				677	}
				678	case armnn::DataType::QSymmS8:
				679	case armnn::DataType::QAsymmS8:
				680	{
				681	PrintTensor<int8_t>(outputWriteInfo, "%d ");
				682	break;
				683	}
				684	case armnn::DataType::QAsymmU8:
				685	{
				686	PrintTensor<uint8_t>(outputWriteInfo, "%d ");
				687	break;
				688	}
				689	case armnn::DataType::Float16:
				690	case armnn::DataType::QSymmS16:
				691	case armnn::DataType::BFloat16:
				692	case armnn::DataType::Boolean:
				693	case armnn::DataType::Signed64:
				694	default:
				695	{
				696	LogAndThrow("Unexpected DataType");
				697	}
				698	}
				699	std::cout << "\n";
Adam Jalkemo	8f39363	2022-10-13 09:04:54 +0200	[diff] [blame]	700	++outputIndex;
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	701	}
				702	}
				703
				704	void ArmNNExecutor::CompareAndPrintResult(std::vector<const void*> otherOutput)
				705	{
				706	unsigned int index = 0;
				707
				708	for (const auto& outputTensors: m_OutputTensorsVec)
				709	{
				710	for (const auto& outputTensor: outputTensors)
				711	{
				712	float result = 0;
				713	size_t size = outputTensor.second.GetNumBytes();
				714
				715	switch (outputTensor.second.GetDataType())
				716	{
				717	case armnn::DataType::Float32:
				718	{
				719	result = ComputeRMSE<float>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
				720	break;
				721	}
Keith Davis	45b82a5	2022-10-04 11:53:04 +0100	[diff] [blame]	722	case armnn::DataType::Signed32:
				723	{
				724	result = ComputeRMSE<int32_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
				725	break;
				726	}
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	727	case armnn::DataType::QSymmS16:
				728	{
				729	result = ComputeRMSE<int16_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
				730	break;
				731	}
				732	case armnn::DataType::QSymmS8:
Teresa Charlin	98d3fd8	2022-08-02 14:17:39 +0100	[diff] [blame]	733	case armnn::DataType::QAsymmS8:
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	734	{
				735	result = ComputeRMSE<int8_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
				736	break;
				737	}
				738	case armnn::DataType::QAsymmU8:
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	739	{
				740	result = ComputeRMSE<uint8_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
				741	break;
				742	}
				743	default:
				744	{
				745	LogAndThrow("Unexpected DataType");
				746	}
				747	}
				748	std::cout << "RMSE: of " << result << "\n";
				749	}
				750	}
				751	}
				752	#if defined(ARMNN_SERIALIZER)
				753	ArmNNExecutor::ArmNNDeserializer::ArmNNDeserializer() : m_Parser(armnnDeserializer::IDeserializer::Create()){}
				754
				755	armnn::INetworkPtr ArmNNExecutor::ArmNNDeserializer::CreateNetwork(const ExecuteNetworkParams& params)
				756	{
				757	const std::string& modelPath = params.m_ModelPath;
				758
				759	std::ifstream file(modelPath, std::ios::binary);
				760	return m_Parser->CreateNetworkFromBinary(file);
				761	}
				762
				763	armnn::BindingPointInfo
				764	ArmNNExecutor::ArmNNDeserializer::GetInputBindingPointInfo(size_t, const std::string& inputName)
				765	{
				766	armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkInputBindingInfo(0, inputName);
				767	return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
				768	}
				769
				770	armnn::BindingPointInfo
				771	ArmNNExecutor::ArmNNDeserializer::GetOutputBindingPointInfo(size_t, const std::string& outputName)
				772	{
				773	armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkOutputBindingInfo(0, outputName);
				774	return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
				775	}
				776	#endif
				777
				778	#if defined(ARMNN_TF_LITE_PARSER)
				779	ArmNNExecutor::TfliteParser::TfliteParser(const ExecuteNetworkParams& params)
				780	{
				781	armnnTfLiteParser::ITfLiteParser::TfLiteParserOptions options;
				782	options.m_StandInLayerForUnsupported = params.m_ParseUnsupported;
				783	options.m_InferAndValidate = params.m_InferOutputShape;
Jim Flynn	fcc72f5	2022-10-14 11:20:07 +0100	[diff] [blame]	784	options.m_AllowExpandedDims = params.m_AllowExpandedDims;
Teresa Charlin	83b4291	2022-07-07 14:24:59 +0100	[diff] [blame]	785
				786	m_Parser = armnnTfLiteParser::ITfLiteParser::Create(options);
				787	}
				788
				789	armnn::INetworkPtr ArmNNExecutor::TfliteParser::CreateNetwork(const ExecuteNetworkParams& params)
				790	{
				791	const std::string& modelPath = params.m_ModelPath;
				792	return m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str());
				793	}
				794
				795	armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetInputBindingPointInfo(size_t subgraphId,
				796	const std::string& inputName)
				797	{
				798	return m_Parser->GetNetworkInputBindingInfo(subgraphId, inputName);
				799	}
				800
				801	armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetOutputBindingPointInfo(size_t subgraphId,
				802	const std::string& outputName)
				803	{
				804	return m_Parser->GetNetworkOutputBindingInfo(subgraphId, outputName);
				805	}
				806	#endif
				807
				808
				809	#if defined(ARMNN_ONNX_PARSER)
				810	ArmNNExecutor::OnnxParser::OnnxParser() : m_Parser(armnnOnnxParser::IOnnxParser::Create()){}
				811
				812	armnn::INetworkPtr ArmNNExecutor::OnnxParser::CreateNetwork(const ExecuteNetworkParams& params)
				813	{
				814	const std::string& modelPath = params.m_ModelPath;
				815	m_Parser = armnnOnnxParser::IOnnxParser::Create();
				816	std::map<std::string, armnn::TensorShape> inputShapes;
				817	if(!params.m_InputTensorShapes.empty())
				818	{
				819	const size_t numInputShapes = params.m_InputTensorShapes.size();
				820	const size_t numInputBindings = params.m_InputNames.size();
				821	if(numInputShapes < numInputBindings)
				822	{
				823	throw armnn::Exception(
				824	fmt::format("Not every input has its tensor shape specified: expected={0}, got={1}",
				825	numInputBindings, numInputShapes));
				826	}
				827
				828	for (size_t i = 0; i < numInputShapes; i++)
				829	{
				830	inputShapes[params.m_InputNames[i]] = params.m_InputTensorShapes[i];
				831	}
				832
				833	return params.m_IsModelBinary ?
				834	m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes) :
				835	m_Parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes);
				836	}
				837
				838	// Handle text and binary input differently by calling the corresponding parser function
				839	return params.m_IsModelBinary ?
				840	m_Parser->CreateNetworkFromBinaryFile(params.m_ModelPath.c_str()) :
				841	m_Parser->CreateNetworkFromTextFile(params.m_ModelPath.c_str());
				842	}
				843
				844	armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetInputBindingPointInfo(size_t, const std::string& inputName)
				845	{
				846	return m_Parser->GetNetworkInputBindingInfo(inputName);
				847	}
				848
				849	armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetOutputBindingPointInfo(size_t, const std::string& outputName)
				850	{
				851	return m_Parser->GetNetworkOutputBindingInfo(outputName);
				852	}
				853	#endif