Blame - shim/sl/canonical/ArmnnDriverImpl.cpp - ml/armnn

blob: 3223d9e8bf96bfd1f0834f88364daa049ce30539 [file] [log] [blame]

Sadik Armagan	8f397a1	2022-06-17 15:38:22 +0100	[diff] [blame^]	1	//
				2	// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5
				6	#include "ArmnnDriverImpl.hpp"
				7	#include "ArmnnPreparedModel.hpp"
				8	#include "CacheDataHandler.hpp"
				9	#include "ModelToINetworkTransformer.hpp"
				10	#include "SystemPropertiesUtils.hpp"
				11
				12	#include <armnnDeserializer/IDeserializer.hpp>
				13
				14	#include <log/log.h>
				15	#include <sys/stat.h>
				16
				17	namespace
				18	{
				19
				20	Capabilities GenerateCapabilities()
				21	{
				22	VLOG(DRIVER) << "ArmnnDriverImpl::GenerateCapabilities()";
				23
				24	float defaultPerfValue = .1f;
				25	const Capabilities::PerformanceInfo defaultPerfInfo = { /* execTime */ defaultPerfValue,
				26	/* powerUsage */ defaultPerfValue
				27	};
				28	std::vector<OperandType> operandsTypes({
				29	OperandType::FLOAT32,
				30	OperandType::INT32,
				31	OperandType::UINT32,
				32	OperandType::TENSOR_FLOAT32,
				33	OperandType::TENSOR_INT32,
				34	OperandType::TENSOR_QUANT8_ASYMM,
				35	OperandType::BOOL,
				36	OperandType::TENSOR_QUANT16_SYMM,
				37	OperandType::TENSOR_FLOAT16,
				38	OperandType::TENSOR_BOOL8,
				39	OperandType::FLOAT16,
				40	OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
				41	OperandType::TENSOR_QUANT16_ASYMM,
				42	OperandType::TENSOR_QUANT8_SYMM,
				43	OperandType::TENSOR_QUANT8_ASYMM_SIGNED,
				44	});
				45
				46	std::vector<Capabilities::OperandPerformance> operandPerformances;
				47	operandPerformances.reserve(operandsTypes.size());
				48
				49	for (auto opType : operandsTypes)
				50	{
				51	operandPerformances.push_back(
				52	Capabilities::OperandPerformance{ /* type / opType, / info */ defaultPerfInfo });
				53	}
				54
				55	auto operandPerformanceTable =
				56	Capabilities::OperandPerformanceTable::create(std::move(operandPerformances)).value();
				57
				58	return { /* relaxedFloat32toFloat16PerformanceScalar */ defaultPerfInfo,
				59	/* relaxedFloat32toFloat16PerformanceTensor */ defaultPerfInfo,
				60	/* operandPerformance */ std::move(operandPerformanceTable),
				61	/* ifPerformance */ defaultPerfInfo,
				62	/* whilePerformance */ defaultPerfInfo };
				63	}
				64
				65	} // anonymous namespace
				66
				67	using namespace android::nn;
				68
				69	namespace armnn_driver
				70	{
				71
				72	bool ArmnnDriverImpl::ValidateSharedHandle(const SharedHandle& sharedHandle)
				73	{
				74	bool valid = true;
				75
				76	if (*sharedHandle < 0)
				77	{
				78	return !valid;
				79	}
				80
				81	int dataCacheFileAccessMode = fcntl(*sharedHandle, F_GETFL) & O_ACCMODE;
				82	if (dataCacheFileAccessMode != O_RDWR)
				83	{
				84	return !valid;
				85	}
				86
				87	return valid;
				88	}
				89
				90	bool ArmnnDriverImpl::ValidateDataCacheHandle(const std::vector<SharedHandle>& dataCacheHandle, const size_t dataSize)
				91	{
				92	bool valid = true;
				93	// DataCacheHandle size should always be 1 for ArmNN model
				94	if (dataCacheHandle.size() != 1)
				95	{
				96	return !valid;
				97	}
				98
				99	if (dataSize == 0)
				100	{
				101	return !valid;
				102	}
				103
				104	struct stat statBuffer;
				105	if (fstat(*dataCacheHandle[0], &statBuffer) == 0)
				106	{
				107	unsigned long bufferSize = statBuffer.st_size;
				108	if (bufferSize != dataSize)
				109	{
				110	return !valid;
				111	}
				112	}
				113
				114	return ValidateSharedHandle(dataCacheHandle[0]);
				115	}
				116
				117	std::vector<armnn::NetworkId>& ArmnnDriverImpl::GetLoadedNetworks()
				118	{
				119	return m_NetworkIDs;
				120	}
				121
				122	GeneralResult<SharedPreparedModel> ArmnnDriverImpl::PrepareArmnnModel(
				123	const armnn::IRuntimePtr& runtime,
				124	const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
				125	const DriverOptions& options,
				126	const Model& model,
				127	const std::vector<SharedHandle>& modelCacheHandle,
				128	const std::vector<SharedHandle>& dataCacheHandle,
				129	const CacheToken& token,
				130	bool float32ToFloat16,
				131	Priority priority)
				132	{
				133	VLOG(DRIVER) << "ArmnnDriverImpl::PrepareArmnnModel()";
				134
				135	if (!runtime)
				136	{
				137	return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE) << "Device unavailable";
				138	}
				139
				140	if (const auto result = validate(model); !result.ok())
				141	{
				142	return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) << "Invalid model passed as input";
				143	}
				144
				145	// Deliberately ignore any unsupported operations requested by the options -
				146	// at this point we're being asked to prepare a model that we've already declared support for
				147	// and the operation indices may be different to those in getSupportedOperations anyway.
				148	std::set<unsigned int> unsupportedOperations;
				149	ModelToINetworkTransformer modelConverter(options.GetBackends(),
				150	model,
				151	unsupportedOperations);
				152
				153	if (modelConverter.GetConversionResult() != ConversionResult::Success)
				154	{
				155	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "ModelToINetworkConverter failed";
				156	}
				157
				158	// Serialize the network graph to a .armnn file if an output directory
				159	// has been specified in the drivers' arguments.
				160	std::vector<uint8_t> dataCacheData;
				161	bool serializeToFile = dataCacheHandle.size() < 1 ? false : true;
				162	auto serializedNetworkFileName =
				163	SerializeNetwork(*modelConverter.GetINetwork(),
				164	options.GetRequestInputsAndOutputsDumpDir(),
				165	dataCacheData,
				166	serializeToFile);
				167
				168	// Optimize the network
				169	armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
				170	armnn::OptimizerOptions OptOptions;
				171	OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
				172	OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled();
				173
				174	int cachedFd = -1;
				175	bool saveCachedNetwork = options.SaveCachedNetwork();
				176
				177	unsigned int numberOfCachedModelFiles = 0;
				178	if (modelCacheHandle.size() > 0)
				179	{
				180	unsigned int index = 0;
				181	for (auto& backend : options.GetBackends())
				182	{
				183	// modelCacheHandle size should be equal to numberOfCachedModelFiles
				184	// modelCacheHandle vector should be in same order as backends
				185	auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
				186	if (numberOfCacheFiles > 0)
				187	{
				188	numberOfCachedModelFiles += numberOfCacheFiles;
				189	// For GpuAcc numberOfCachedFiles is 1
				190	if (backend == armnn::Compute::GpuAcc)
				191	{
				192	cachedFd = *modelCacheHandle[index];
				193	saveCachedNetwork = true;
				194	}
				195	index += numberOfCachedModelFiles;
				196	}
				197	}
				198	}
				199
				200	armnn::BackendOptions gpuAcc("GpuAcc",
				201	{
				202	{ "FastMathEnabled", options.IsFastMathEnabled() },
				203	{ "SaveCachedNetwork", saveCachedNetwork },
				204	{ "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
				205	{ "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
				206	{ "CachedFileDescriptor", cachedFd }
				207	});
				208
				209	armnn::BackendOptions cpuAcc("CpuAcc",
				210	{
				211	{ "FastMathEnabled", options.IsFastMathEnabled() },
				212	{ "NumberOfThreads", options.GetNumberOfThreads() }
				213	});
				214	OptOptions.m_ModelOptions.push_back(gpuAcc);
				215	OptOptions.m_ModelOptions.push_back(cpuAcc);
				216
				217	std::vector<std::string> errMessages;
				218	try
				219	{
				220	optNet = armnn::Optimize(*modelConverter.GetINetwork(),
				221	options.GetBackends(),
				222	runtime->GetDeviceSpec(),
				223	OptOptions,
				224	errMessages);
				225	}
				226	catch (std::exception& e)
				227	{
				228	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << e.what();
				229	}
				230
				231	// Check that the optimized network is valid.
				232	if (!optNet)
				233	{
				234	std::stringstream message;
				235	message << "Invalid optimized network";
				236	for (const std::string& msg : errMessages)
				237	{
				238	message << "\n" << msg;
				239	}
				240	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
				241	}
				242
				243	// Export the optimized network graph to a dot file if an output dump directory
				244	// has been specified in the drivers' arguments.
				245	std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
				246	options.GetRequestInputsAndOutputsDumpDir());
				247
				248	// Load it into the runtime.
				249	armnn::NetworkId netId = 0;
				250	std::string msg;
				251	armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
				252	MemorySource::Undefined,
				253	MemorySource::Undefined,
				254	options.IsGpuProfilingEnabled());
				255	auto numInputs = getMainModel(model).inputIndexes.size();
				256	auto numOutputs = getMainModel(model).outputIndexes.size();
				257	try
				258	{
				259	if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
				260	{
				261	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be loaded";
				262	}
				263	}
				264	catch (std::exception& e)
				265	{
				266	std::stringstream message;
				267	message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
				268	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
				269	}
				270
				271	// Now that we have a networkId for the graph rename the exported files to use it
				272	// so that we can associate the graph file and the input/output tensor exported files
				273	RenameExportedFiles(serializedNetworkFileName,
				274	dotGraphFileName,
				275	options.GetRequestInputsAndOutputsDumpDir(),
				276	netId);
				277
				278	// Cache the model
				279	size_t hashValue = 0;
				280	if (dataCacheHandle.size() == 1 )
				281	{
				282	write(*dataCacheHandle[0], dataCacheData.data(), dataCacheData.size());
				283	hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
				284	}
				285
				286	// Cache the model data
				287	if (modelCacheHandle.size() > 0)
				288	{
				289	if (modelCacheHandle.size() == numberOfCachedModelFiles)
				290	{
				291	for (uint32_t i = 0; i < modelCacheHandle.size(); ++i)
				292	{
				293	int modelCacheFileAccessMode = fcntl(*modelCacheHandle[i], F_GETFL) & O_ACCMODE;
				294	if (modelCacheFileAccessMode != O_RDONLY)
				295	{
				296	struct stat statBuffer;
				297	if (fstat(*modelCacheHandle[i], &statBuffer) == 0)
				298	{
				299	long modelDataSize = statBuffer.st_size;
				300	if (modelDataSize > 0)
				301	{
				302	std::vector<uint8_t> modelData(modelDataSize);
				303	pread(*modelCacheHandle[i], modelData.data(), modelData.size(), 0);
				304	hashValue ^= CacheDataHandlerInstance().Hash(modelData);
				305	}
				306	}
				307	}
				308	}
				309	}
				310	}
				311	if (hashValue != 0)
				312	{
				313	CacheDataHandlerInstance().Register(token, hashValue, dataCacheData.size());
				314	}
				315
				316	bool executeWithDummyInputs = (std::find(options.GetBackends().begin(),
				317	options.GetBackends().end(),
				318	armnn::Compute::GpuAcc) != options.GetBackends().end());
				319
				320	m_NetworkIDs.push_back(netId);
				321	auto preparedModel = std::make_shared<const ArmnnPreparedModel>(netId,
				322	runtime.get(),
				323	model,
				324	options.GetRequestInputsAndOutputsDumpDir(),
				325	options.IsGpuProfilingEnabled(),
				326	priority);
				327
				328	// Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
				329	// this is enabled) before the first 'real' inference which removes the overhead of the first inference.
				330	// Only run this if the GpuAcc backend has been added to options
				331	if (std::find(options.GetBackends().begin(),
				332	options.GetBackends().end(),
				333	armnn::Compute::GpuAcc) != options.GetBackends().end())
				334	{
				335	if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs))
				336	{
				337	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be executed";
				338	}
				339
				340	if (clTunedParameters &&
				341	options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
				342	{
				343	// Now that we've done one inference the CL kernel parameters will have been tuned,
				344	// so save the updated file.
				345	try
				346	{
				347	clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
				348	}
				349	catch (std::exception& error)
				350	{
				351	VLOG(DRIVER) << "ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file"
				352	<< options.GetClTunedParametersFile().c_str() << error.what();
				353	}
				354	}
				355	}
				356	return std::move(preparedModel);
				357	}
				358
				359	std::vector<armnn::NetworkId> ArmnnDriverImpl::m_NetworkIDs = {};
				360
				361	GeneralResult<SharedPreparedModel> ArmnnDriverImpl::PrepareArmnnModelFromCache(
				362	const armnn::IRuntimePtr& runtime,
				363	const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
				364	const DriverOptions& options,
				365	const std::vector<SharedHandle>& modelCacheHandle,
				366	const std::vector<SharedHandle>& dataCacheHandle,
				367	const CacheToken& token,
				368	bool float32ToFloat16)
				369	{
				370	VLOG(DRIVER) << "ArmnnDriverImpl::PrepareArmnnModelFromCache()";
				371
				372	if (!runtime)
				373	{
				374	return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE)
				375	<< "ArmnnDriverImpl::prepareModelFromCache(): Device unavailable";
				376	}
				377
				378	if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN)
				379	{
				380	return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
				381	<< "ArmnnDriverImpl::prepareModelFromCache(): Token size does not match!";
				382	}
				383
				384	// Validate dataCacheHandle
				385	auto dataSize = CacheDataHandlerInstance().GetCacheSize(token);
				386	if (!ValidateDataCacheHandle(dataCacheHandle, dataSize))
				387	{
				388	return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
				389	<< "ArmnnDriverImpl::prepareModelFromCache(): Not valid data cache handle!";
				390	}
				391
				392	// Check if model files cached they match the expected value
				393	unsigned int numberOfCachedModelFiles = 0;
				394	for (auto& backend : options.GetBackends())
				395	{
				396	numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
				397	}
				398	if (modelCacheHandle.size() != numberOfCachedModelFiles)
				399	{
				400	return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
				401	<< "ArmnnDriverImpl::prepareModelFromCache(): Model cache handle size does not match.";
				402	}
				403
				404	// Read the model
				405	std::vector<uint8_t> dataCacheData(dataSize);
				406	pread(*dataCacheHandle[0], dataCacheData.data(), dataCacheData.size(), 0);
				407	auto hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
				408
				409	int gpuAccCachedFd = -1;
				410	if (modelCacheHandle.size() > 0)
				411	{
				412	unsigned int index = 0;
				413	for (auto& backend : options.GetBackends())
				414	{
				415	// modelCacheHandle size should be equal to numberOfCachedModelFiles
				416	// modelCacheHandle vector should be in same order as backends
				417	auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
				418	if (numberOfCacheFiles > 0)
				419	{
				420	if (!ValidateSharedHandle(modelCacheHandle[index]))
				421	{
				422	return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
				423	<< "ArmnnDriverImpl::prepareModelFromCache(): Invalid model cache handle!";
				424	}
				425	int cachedFd = *modelCacheHandle[index];
				426	struct stat statBuffer;
				427	if (fstat(cachedFd, &statBuffer) == 0)
				428	{
				429	long modelDataSize = statBuffer.st_size;
				430	if (modelDataSize > 0)
				431	{
				432	std::vector<uint8_t> modelData(modelDataSize);
				433	pread(cachedFd, modelData.data(), modelData.size(), 0);
				434	hashValue ^= CacheDataHandlerInstance().Hash(modelData);
				435
				436	if (backend == armnn::Compute::GpuAcc)
				437	{
				438	gpuAccCachedFd = cachedFd;
				439	}
				440	}
				441	}
				442	index += numberOfCacheFiles;
				443	}
				444	}
				445	}
				446
				447	if (!CacheDataHandlerInstance().Validate(token, hashValue, dataCacheData.size()))
				448	{
				449	return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
				450	<< "ArmnnDriverImpl::prepareModelFromCache(): ValidateHash() failed!";
				451	}
				452
				453	// Deserialize the network..
				454	armnn::INetworkPtr network = armnn::INetworkPtr(nullptr, [](armnn::INetwork*){});
				455	try
				456	{
				457	network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData);
				458	}
				459	catch (std::exception&)
				460	{
				461	return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
				462	<< "ArmnnDriverImpl::prepareModelFromCache(): Exception caught from Deserializer!";
				463	}
				464
				465	// Optimize the network
				466	armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
				467	armnn::OptimizerOptions OptOptions;
				468	OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
				469	OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled();
				470
				471	armnn::BackendOptions gpuAcc("GpuAcc",
				472	{
				473	{ "FastMathEnabled", options.IsFastMathEnabled() },
				474	{ "SaveCachedNetwork", false },
				475	{ "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
				476	{ "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
				477	{ "CachedFileDescriptor", gpuAccCachedFd }
				478	});
				479
				480	armnn::BackendOptions cpuAcc("CpuAcc",
				481	{
				482	{ "FastMathEnabled", options.IsFastMathEnabled() },
				483	{ "NumberOfThreads", options.GetNumberOfThreads() }
				484	});
				485	OptOptions.m_ModelOptions.push_back(gpuAcc);
				486	OptOptions.m_ModelOptions.push_back(cpuAcc);
				487
				488	std::vector<std::string> errMessages;
				489	try
				490	{
				491	optNet = armnn::Optimize(*network.get(),
				492	options.GetBackends(),
				493	runtime->GetDeviceSpec(),
				494	OptOptions,
				495	errMessages);
				496	}
				497	catch (std::exception& e)
				498	{
				499	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << e.what();
				500	}
				501
				502	// Check that the optimized network is valid.
				503	if (!optNet)
				504	{
				505	std::stringstream message;
				506	message << "Invalid optimized network";
				507	for (const std::string& msg : errMessages)
				508	{
				509	message << "\n" << msg;
				510	}
				511	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
				512	}
				513
				514	// Export the optimized network graph to a dot file if an output dump directory
				515	// has been specified in the drivers' arguments.
				516	std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
				517	options.GetRequestInputsAndOutputsDumpDir());
				518
				519	// Load it into the runtime.
				520	armnn::NetworkId netId = 0;
				521	std::string msg;
				522	armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
				523	MemorySource::Undefined,
				524	MemorySource::Undefined,
				525	options.IsGpuProfilingEnabled());
				526	try
				527	{
				528	if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
				529	{
				530	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be loaded";
				531	}
				532	}
				533	catch (std::exception& e)
				534	{
				535	std::stringstream message;
				536	message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
				537	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
				538	}
				539
				540	m_NetworkIDs.push_back(netId);
				541	return std::make_shared<const ArmnnPreparedModel>(netId,
				542	runtime.get(),
				543	options.GetRequestInputsAndOutputsDumpDir(),
				544	options.IsGpuProfilingEnabled(),
				545	Priority::MEDIUM,
				546	true);
				547	}
				548
				549	const Capabilities& ArmnnDriverImpl::GetCapabilities(const armnn::IRuntimePtr& runtime)
				550	{
				551	VLOG(DRIVER) << "ArmnnDriverImpl::GetCapabilities()";
				552	static const Capabilities theCapabilities = GenerateCapabilities();
				553	return theCapabilities;
				554	}
				555
				556	void ArmnnDriverImpl::ClearNetworks()
				557	{
				558	m_NetworkIDs.clear();
				559	}
				560
				561	} // namespace armnn_driver