Blame - shim/sl/canonical/ArmnnDriverImpl.cpp - ml/armnn

blob: 8706c382b0f9efc0ba8792ae94bdb95d09b2479f [file] [log] [blame]

Sadik Armagan	8f397a1	2022-06-17 15:38:22 +0100	[diff] [blame]	1	//
				2	// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5
				6	#include "ArmnnDriverImpl.hpp"
				7	#include "ArmnnPreparedModel.hpp"
				8	#include "CacheDataHandler.hpp"
				9	#include "ModelToINetworkTransformer.hpp"
				10	#include "SystemPropertiesUtils.hpp"
				11
				12	#include <armnnDeserializer/IDeserializer.hpp>
				13
				14	#include <log/log.h>
				15	#include <sys/stat.h>
				16
				17	namespace
				18	{
				19
				20	Capabilities GenerateCapabilities()
				21	{
				22	VLOG(DRIVER) << "ArmnnDriverImpl::GenerateCapabilities()";
				23
				24	float defaultPerfValue = .1f;
				25	const Capabilities::PerformanceInfo defaultPerfInfo = { /* execTime */ defaultPerfValue,
				26	/* powerUsage */ defaultPerfValue
				27	};
				28	std::vector<OperandType> operandsTypes({
				29	OperandType::FLOAT32,
				30	OperandType::INT32,
				31	OperandType::UINT32,
				32	OperandType::TENSOR_FLOAT32,
				33	OperandType::TENSOR_INT32,
				34	OperandType::TENSOR_QUANT8_ASYMM,
				35	OperandType::BOOL,
				36	OperandType::TENSOR_QUANT16_SYMM,
				37	OperandType::TENSOR_FLOAT16,
				38	OperandType::TENSOR_BOOL8,
				39	OperandType::FLOAT16,
				40	OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
				41	OperandType::TENSOR_QUANT16_ASYMM,
				42	OperandType::TENSOR_QUANT8_SYMM,
				43	OperandType::TENSOR_QUANT8_ASYMM_SIGNED,
				44	});
				45
				46	std::vector<Capabilities::OperandPerformance> operandPerformances;
				47	operandPerformances.reserve(operandsTypes.size());
				48
				49	for (auto opType : operandsTypes)
				50	{
				51	operandPerformances.push_back(
				52	Capabilities::OperandPerformance{ /* type / opType, / info */ defaultPerfInfo });
				53	}
				54
				55	auto operandPerformanceTable =
				56	Capabilities::OperandPerformanceTable::create(std::move(operandPerformances)).value();
				57
				58	return { /* relaxedFloat32toFloat16PerformanceScalar */ defaultPerfInfo,
				59	/* relaxedFloat32toFloat16PerformanceTensor */ defaultPerfInfo,
				60	/* operandPerformance */ std::move(operandPerformanceTable),
				61	/* ifPerformance */ defaultPerfInfo,
				62	/* whilePerformance */ defaultPerfInfo };
				63	}
				64
				65	} // anonymous namespace
				66
				67	using namespace android::nn;
				68
				69	namespace armnn_driver
				70	{
				71
				72	bool ArmnnDriverImpl::ValidateSharedHandle(const SharedHandle& sharedHandle)
				73	{
				74	bool valid = true;
				75
				76	if (*sharedHandle < 0)
				77	{
				78	return !valid;
				79	}
				80
				81	int dataCacheFileAccessMode = fcntl(*sharedHandle, F_GETFL) & O_ACCMODE;
				82	if (dataCacheFileAccessMode != O_RDWR)
				83	{
				84	return !valid;
				85	}
				86
				87	return valid;
				88	}
				89
				90	bool ArmnnDriverImpl::ValidateDataCacheHandle(const std::vector<SharedHandle>& dataCacheHandle, const size_t dataSize)
				91	{
				92	bool valid = true;
				93	// DataCacheHandle size should always be 1 for ArmNN model
				94	if (dataCacheHandle.size() != 1)
				95	{
				96	return !valid;
				97	}
				98
				99	if (dataSize == 0)
				100	{
				101	return !valid;
				102	}
				103
				104	struct stat statBuffer;
				105	if (fstat(*dataCacheHandle[0], &statBuffer) == 0)
				106	{
				107	unsigned long bufferSize = statBuffer.st_size;
				108	if (bufferSize != dataSize)
				109	{
				110	return !valid;
				111	}
				112	}
				113
				114	return ValidateSharedHandle(dataCacheHandle[0]);
				115	}
				116
Sadik Armagan	8f397a1	2022-06-17 15:38:22 +0100	[diff] [blame]	117	GeneralResult<SharedPreparedModel> ArmnnDriverImpl::PrepareArmnnModel(
				118	const armnn::IRuntimePtr& runtime,
				119	const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
				120	const DriverOptions& options,
				121	const Model& model,
				122	const std::vector<SharedHandle>& modelCacheHandle,
				123	const std::vector<SharedHandle>& dataCacheHandle,
				124	const CacheToken& token,
				125	bool float32ToFloat16,
				126	Priority priority)
				127	{
				128	VLOG(DRIVER) << "ArmnnDriverImpl::PrepareArmnnModel()";
				129
				130	if (!runtime)
				131	{
				132	return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE) << "Device unavailable";
				133	}
				134
				135	if (const auto result = validate(model); !result.ok())
				136	{
				137	return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) << "Invalid model passed as input";
				138	}
				139
				140	// Deliberately ignore any unsupported operations requested by the options -
				141	// at this point we're being asked to prepare a model that we've already declared support for
				142	// and the operation indices may be different to those in getSupportedOperations anyway.
				143	std::set<unsigned int> unsupportedOperations;
				144	ModelToINetworkTransformer modelConverter(options.GetBackends(),
				145	model,
				146	unsupportedOperations);
				147
				148	if (modelConverter.GetConversionResult() != ConversionResult::Success)
				149	{
				150	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "ModelToINetworkConverter failed";
				151	}
				152
				153	// Serialize the network graph to a .armnn file if an output directory
				154	// has been specified in the drivers' arguments.
				155	std::vector<uint8_t> dataCacheData;
				156	bool serializeToFile = dataCacheHandle.size() < 1 ? false : true;
				157	auto serializedNetworkFileName =
				158	SerializeNetwork(*modelConverter.GetINetwork(),
				159	options.GetRequestInputsAndOutputsDumpDir(),
				160	dataCacheData,
				161	serializeToFile);
				162
				163	// Optimize the network
				164	armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
				165	armnn::OptimizerOptions OptOptions;
				166	OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
				167	OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled();
				168
				169	int cachedFd = -1;
				170	bool saveCachedNetwork = options.SaveCachedNetwork();
				171
				172	unsigned int numberOfCachedModelFiles = 0;
				173	if (modelCacheHandle.size() > 0)
				174	{
				175	unsigned int index = 0;
				176	for (auto& backend : options.GetBackends())
				177	{
				178	// modelCacheHandle size should be equal to numberOfCachedModelFiles
				179	// modelCacheHandle vector should be in same order as backends
				180	auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
				181	if (numberOfCacheFiles > 0)
				182	{
				183	numberOfCachedModelFiles += numberOfCacheFiles;
				184	// For GpuAcc numberOfCachedFiles is 1
				185	if (backend == armnn::Compute::GpuAcc)
				186	{
				187	cachedFd = *modelCacheHandle[index];
				188	saveCachedNetwork = true;
				189	}
				190	index += numberOfCachedModelFiles;
				191	}
				192	}
				193	}
				194
				195	armnn::BackendOptions gpuAcc("GpuAcc",
				196	{
				197	{ "FastMathEnabled", options.IsFastMathEnabled() },
				198	{ "SaveCachedNetwork", saveCachedNetwork },
				199	{ "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
				200	{ "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
				201	{ "CachedFileDescriptor", cachedFd }
				202	});
				203
				204	armnn::BackendOptions cpuAcc("CpuAcc",
				205	{
				206	{ "FastMathEnabled", options.IsFastMathEnabled() },
				207	{ "NumberOfThreads", options.GetNumberOfThreads() }
				208	});
				209	OptOptions.m_ModelOptions.push_back(gpuAcc);
				210	OptOptions.m_ModelOptions.push_back(cpuAcc);
				211
				212	std::vector<std::string> errMessages;
				213	try
				214	{
				215	optNet = armnn::Optimize(*modelConverter.GetINetwork(),
				216	options.GetBackends(),
				217	runtime->GetDeviceSpec(),
				218	OptOptions,
				219	errMessages);
				220	}
				221	catch (std::exception& e)
				222	{
				223	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << e.what();
				224	}
				225
				226	// Check that the optimized network is valid.
				227	if (!optNet)
				228	{
				229	std::stringstream message;
				230	message << "Invalid optimized network";
				231	for (const std::string& msg : errMessages)
				232	{
				233	message << "\n" << msg;
				234	}
				235	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
				236	}
				237
				238	// Export the optimized network graph to a dot file if an output dump directory
				239	// has been specified in the drivers' arguments.
				240	std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
				241	options.GetRequestInputsAndOutputsDumpDir());
				242
				243	// Load it into the runtime.
				244	armnn::NetworkId netId = 0;
				245	std::string msg;
				246	armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
				247	MemorySource::Undefined,
				248	MemorySource::Undefined,
				249	options.IsGpuProfilingEnabled());
				250	auto numInputs = getMainModel(model).inputIndexes.size();
				251	auto numOutputs = getMainModel(model).outputIndexes.size();
				252	try
				253	{
				254	if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
				255	{
				256	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be loaded";
				257	}
				258	}
				259	catch (std::exception& e)
				260	{
				261	std::stringstream message;
				262	message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
				263	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
				264	}
				265
				266	// Now that we have a networkId for the graph rename the exported files to use it
				267	// so that we can associate the graph file and the input/output tensor exported files
				268	RenameExportedFiles(serializedNetworkFileName,
				269	dotGraphFileName,
				270	options.GetRequestInputsAndOutputsDumpDir(),
				271	netId);
				272
				273	// Cache the model
				274	size_t hashValue = 0;
				275	if (dataCacheHandle.size() == 1 )
				276	{
				277	write(*dataCacheHandle[0], dataCacheData.data(), dataCacheData.size());
				278	hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
				279	}
				280
				281	// Cache the model data
				282	if (modelCacheHandle.size() > 0)
				283	{
				284	if (modelCacheHandle.size() == numberOfCachedModelFiles)
				285	{
				286	for (uint32_t i = 0; i < modelCacheHandle.size(); ++i)
				287	{
				288	int modelCacheFileAccessMode = fcntl(*modelCacheHandle[i], F_GETFL) & O_ACCMODE;
				289	if (modelCacheFileAccessMode != O_RDONLY)
				290	{
				291	struct stat statBuffer;
				292	if (fstat(*modelCacheHandle[i], &statBuffer) == 0)
				293	{
				294	long modelDataSize = statBuffer.st_size;
				295	if (modelDataSize > 0)
				296	{
				297	std::vector<uint8_t> modelData(modelDataSize);
				298	pread(*modelCacheHandle[i], modelData.data(), modelData.size(), 0);
				299	hashValue ^= CacheDataHandlerInstance().Hash(modelData);
				300	}
				301	}
				302	}
				303	}
				304	}
				305	}
				306	if (hashValue != 0)
				307	{
				308	CacheDataHandlerInstance().Register(token, hashValue, dataCacheData.size());
				309	}
				310
				311	bool executeWithDummyInputs = (std::find(options.GetBackends().begin(),
				312	options.GetBackends().end(),
				313	armnn::Compute::GpuAcc) != options.GetBackends().end());
				314
Sadik Armagan	8f397a1	2022-06-17 15:38:22 +0100	[diff] [blame]	315	auto preparedModel = std::make_shared<const ArmnnPreparedModel>(netId,
				316	runtime.get(),
				317	model,
				318	options.GetRequestInputsAndOutputsDumpDir(),
				319	options.IsGpuProfilingEnabled(),
				320	priority);
				321
				322	// Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
				323	// this is enabled) before the first 'real' inference which removes the overhead of the first inference.
				324	// Only run this if the GpuAcc backend has been added to options
				325	if (std::find(options.GetBackends().begin(),
				326	options.GetBackends().end(),
				327	armnn::Compute::GpuAcc) != options.GetBackends().end())
				328	{
				329	if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs))
				330	{
				331	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be executed";
				332	}
				333
				334	if (clTunedParameters &&
				335	options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
				336	{
				337	// Now that we've done one inference the CL kernel parameters will have been tuned,
				338	// so save the updated file.
				339	try
				340	{
				341	clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
				342	}
				343	catch (std::exception& error)
				344	{
				345	VLOG(DRIVER) << "ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file"
				346	<< options.GetClTunedParametersFile().c_str() << error.what();
				347	}
				348	}
				349	}
				350	return std::move(preparedModel);
				351	}
				352
Sadik Armagan	8f397a1	2022-06-17 15:38:22 +0100	[diff] [blame]	353	GeneralResult<SharedPreparedModel> ArmnnDriverImpl::PrepareArmnnModelFromCache(
				354	const armnn::IRuntimePtr& runtime,
				355	const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
				356	const DriverOptions& options,
				357	const std::vector<SharedHandle>& modelCacheHandle,
				358	const std::vector<SharedHandle>& dataCacheHandle,
				359	const CacheToken& token,
				360	bool float32ToFloat16)
				361	{
				362	VLOG(DRIVER) << "ArmnnDriverImpl::PrepareArmnnModelFromCache()";
				363
				364	if (!runtime)
				365	{
				366	return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE)
				367	<< "ArmnnDriverImpl::prepareModelFromCache(): Device unavailable";
				368	}
				369
				370	if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN)
				371	{
				372	return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
				373	<< "ArmnnDriverImpl::prepareModelFromCache(): Token size does not match!";
				374	}
				375
				376	// Validate dataCacheHandle
				377	auto dataSize = CacheDataHandlerInstance().GetCacheSize(token);
				378	if (!ValidateDataCacheHandle(dataCacheHandle, dataSize))
				379	{
				380	return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
				381	<< "ArmnnDriverImpl::prepareModelFromCache(): Not valid data cache handle!";
				382	}
				383
				384	// Check if model files cached they match the expected value
				385	unsigned int numberOfCachedModelFiles = 0;
				386	for (auto& backend : options.GetBackends())
				387	{
				388	numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
				389	}
				390	if (modelCacheHandle.size() != numberOfCachedModelFiles)
				391	{
				392	return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
				393	<< "ArmnnDriverImpl::prepareModelFromCache(): Model cache handle size does not match.";
				394	}
				395
				396	// Read the model
				397	std::vector<uint8_t> dataCacheData(dataSize);
				398	pread(*dataCacheHandle[0], dataCacheData.data(), dataCacheData.size(), 0);
				399	auto hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
				400
				401	int gpuAccCachedFd = -1;
				402	if (modelCacheHandle.size() > 0)
				403	{
				404	unsigned int index = 0;
				405	for (auto& backend : options.GetBackends())
				406	{
				407	// modelCacheHandle size should be equal to numberOfCachedModelFiles
				408	// modelCacheHandle vector should be in same order as backends
				409	auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
				410	if (numberOfCacheFiles > 0)
				411	{
				412	if (!ValidateSharedHandle(modelCacheHandle[index]))
				413	{
				414	return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
				415	<< "ArmnnDriverImpl::prepareModelFromCache(): Invalid model cache handle!";
				416	}
				417	int cachedFd = *modelCacheHandle[index];
				418	struct stat statBuffer;
				419	if (fstat(cachedFd, &statBuffer) == 0)
				420	{
				421	long modelDataSize = statBuffer.st_size;
				422	if (modelDataSize > 0)
				423	{
				424	std::vector<uint8_t> modelData(modelDataSize);
				425	pread(cachedFd, modelData.data(), modelData.size(), 0);
				426	hashValue ^= CacheDataHandlerInstance().Hash(modelData);
				427
				428	if (backend == armnn::Compute::GpuAcc)
				429	{
				430	gpuAccCachedFd = cachedFd;
				431	}
				432	}
				433	}
				434	index += numberOfCacheFiles;
				435	}
				436	}
				437	}
				438
				439	if (!CacheDataHandlerInstance().Validate(token, hashValue, dataCacheData.size()))
				440	{
				441	return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
				442	<< "ArmnnDriverImpl::prepareModelFromCache(): ValidateHash() failed!";
				443	}
				444
				445	// Deserialize the network..
				446	armnn::INetworkPtr network = armnn::INetworkPtr(nullptr, [](armnn::INetwork*){});
				447	try
				448	{
				449	network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData);
				450	}
				451	catch (std::exception&)
				452	{
				453	return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
				454	<< "ArmnnDriverImpl::prepareModelFromCache(): Exception caught from Deserializer!";
				455	}
				456
				457	// Optimize the network
				458	armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
				459	armnn::OptimizerOptions OptOptions;
				460	OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
				461	OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled();
				462
				463	armnn::BackendOptions gpuAcc("GpuAcc",
				464	{
				465	{ "FastMathEnabled", options.IsFastMathEnabled() },
				466	{ "SaveCachedNetwork", false },
				467	{ "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
				468	{ "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
				469	{ "CachedFileDescriptor", gpuAccCachedFd }
				470	});
				471
				472	armnn::BackendOptions cpuAcc("CpuAcc",
				473	{
				474	{ "FastMathEnabled", options.IsFastMathEnabled() },
				475	{ "NumberOfThreads", options.GetNumberOfThreads() }
				476	});
				477	OptOptions.m_ModelOptions.push_back(gpuAcc);
				478	OptOptions.m_ModelOptions.push_back(cpuAcc);
				479
				480	std::vector<std::string> errMessages;
				481	try
				482	{
				483	optNet = armnn::Optimize(*network.get(),
				484	options.GetBackends(),
				485	runtime->GetDeviceSpec(),
				486	OptOptions,
				487	errMessages);
				488	}
				489	catch (std::exception& e)
				490	{
				491	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << e.what();
				492	}
				493
				494	// Check that the optimized network is valid.
				495	if (!optNet)
				496	{
				497	std::stringstream message;
				498	message << "Invalid optimized network";
				499	for (const std::string& msg : errMessages)
				500	{
				501	message << "\n" << msg;
				502	}
				503	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
				504	}
				505
				506	// Export the optimized network graph to a dot file if an output dump directory
				507	// has been specified in the drivers' arguments.
				508	std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
				509	options.GetRequestInputsAndOutputsDumpDir());
				510
				511	// Load it into the runtime.
				512	armnn::NetworkId netId = 0;
				513	std::string msg;
				514	armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
				515	MemorySource::Undefined,
				516	MemorySource::Undefined,
				517	options.IsGpuProfilingEnabled());
				518	try
				519	{
				520	if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
				521	{
				522	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be loaded";
				523	}
				524	}
				525	catch (std::exception& e)
				526	{
				527	std::stringstream message;
				528	message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
				529	return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
				530	}
				531
Sadik Armagan	8f397a1	2022-06-17 15:38:22 +0100	[diff] [blame]	532	return std::make_shared<const ArmnnPreparedModel>(netId,
				533	runtime.get(),
				534	options.GetRequestInputsAndOutputsDumpDir(),
				535	options.IsGpuProfilingEnabled(),
				536	Priority::MEDIUM,
				537	true);
				538	}
				539
				540	const Capabilities& ArmnnDriverImpl::GetCapabilities(const armnn::IRuntimePtr& runtime)
				541	{
				542	VLOG(DRIVER) << "ArmnnDriverImpl::GetCapabilities()";
				543	static const Capabilities theCapabilities = GenerateCapabilities();
				544	return theCapabilities;
				545	}
				546
Sadik Armagan	8f397a1	2022-06-17 15:38:22 +0100	[diff] [blame]	547	} // namespace armnn_driver