Blame - 1.3/ArmnnDriverImpl.cpp - ml/android-nn-driver

blob: b2524d3b36ec1eaa1aee34023c03c4da5cffda1e [file] [log] [blame]

Kevin May	42477c1	2020-03-26 13:34:14 +0000	[diff] [blame]	1	//
				2	// Copyright © 2020 Arm Ltd. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5
				6	#include "ArmnnDriverImpl.hpp"
				7	#include "../ArmnnPreparedModel_1_3.hpp"
				8	#include "../ModelToINetworkConverter.hpp"
				9	#include "../SystemPropertiesUtils.hpp"
				10
				11	#include <log/log.h>
				12
				13	namespace
				14	{
Kevin May	42477c1	2020-03-26 13:34:14 +0000	[diff] [blame]	15	const char *g_RelaxedFloat32toFloat16PerformanceExecTime = "ArmNN.relaxedFloat32toFloat16Performance.execTime";
				16	const char *g_RelaxedFloat32toFloat16PerformancePowerUsage = "ArmNN.relaxedFloat32toFloat16Performance.powerUsage";
				17
Kevin May	2eaa119	2020-04-15 16:50:57 +0100	[diff] [blame]	18	const char *g_ifPerformanceExecTime = "ArmNN.ifPerformance.execTime";
				19	const char *g_ifPerformancePowerUsage = "ArmNN.ifPerformance.powerUsage";
				20
				21	const char *g_whilePerformanceExecTime = "ArmNN.whilePerformance.execTime";
				22	const char *g_whilePerformancePowerUsage = "ArmNN.whilePerformance.powerUsage";
				23
Kevin May	42477c1	2020-03-26 13:34:14 +0000	[diff] [blame]	24	const char *g_OperandTypeTensorFloat32PerformanceExecTime = "Armnn.operandTypeTensorFloat32Performance.execTime";
				25	const char *g_OperandTypeTensorFloat32PerformancePowerUsage = "Armnn.operandTypeTensorFloat32Performance.powerUsage";
				26
				27	const char *g_OperandTypeFloat32PerformanceExecTime = "Armnn.operandTypeFloat32Performance.execTime";
				28	const char *g_OperandTypeFloat32PerformancePowerUsage = "Armnn.operandTypeFloat32Performance.powerUsage";
				29
				30	const char *g_OperandTypeTensorFloat16PerformanceExecTime = "Armnn.operandTypeTensorFloat16Performance.execTime";
				31	const char *g_OperandTypeTensorFloat16PerformancePowerUsage = "Armnn.operandTypeTensorFloat16Performance.powerUsage";
				32
				33	const char *g_OperandTypeFloat16PerformanceExecTime = "Armnn.operandTypeFloat16Performance.execTime";
				34	const char *g_OperandTypeFloat16PerformancePowerUsage = "Armnn.operandTypeFloat16Performance.powerUsage";
				35
				36	const char *g_OperandTypeTensorQuant8AsymmPerformanceExecTime =
				37	"Armnn.operandTypeTensorQuant8AsymmPerformance.execTime";
				38	const char *g_OperandTypeTensorQuant8AsymmPerformancePowerUsage =
				39	"Armnn.operandTypeTensorQuant8AsymmPerformance.powerUsage";
				40
				41	const char *g_OperandTypeTensorQuant8AsymmSignedPerformanceExecTime =
				42	"Armnn.operandTypeTensorQuant8AsymmSignedPerformance.execTime";
				43	const char *g_OperandTypeTensorQuant8AsymmSignedPerformancePowerUsage =
				44	"Armnn.operandTypeTensorQuant8AsymmSignedPerformance.powerUsage";
				45
				46	const char *g_OperandTypeTensorQuant16SymmPerformanceExecTime =
				47	"Armnn.operandTypeTensorQuant16SymmPerformance.execTime";
				48	const char *g_OperandTypeTensorQuant16SymmPerformancePowerUsage =
				49	"Armnn.operandTypeTensorQuant16SymmPerformance.powerUsage";
				50
				51	const char *g_OperandTypeTensorQuant8SymmPerformanceExecTime =
				52	"Armnn.operandTypeTensorQuant8SymmPerformance.execTime";
				53	const char *g_OperandTypeTensorQuant8SymmPerformancePowerUsage =
				54	"Armnn.operandTypeTensorQuant8SymmPerformance.powerUsage";
				55
				56	const char *g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime =
				57	"Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.execTime";
				58	const char *g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage =
				59	"Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.powerUsage";
				60
				61
				62	const char *g_OperandTypeTensorInt32PerformanceExecTime = "Armnn.operandTypeTensorInt32Performance.execTime";
				63	const char *g_OperandTypeTensorInt32PerformancePowerUsage = "Armnn.operandTypeTensorInt32Performance.powerUsage";
				64
				65	const char *g_OperandTypeInt32PerformanceExecTime = "Armnn.operandTypeInt32Performance.execTime";
				66	const char *g_OperandTypeInt32PerformancePowerUsage = "Armnn.operandTypeInt32Performance.powerUsage";
				67
				68
				69	void NotifyCallbackAndCheck(const sp<V1_3::IPreparedModelCallback>& callback,
				70	V1_3::ErrorStatus errorStatus,
				71	const sp<V1_3::IPreparedModel>& preparedModelPtr)
				72	{
				73	Return<void> returned = callback->notify_1_3(errorStatus, preparedModelPtr);
				74	// This check is required, if the callback fails and it isn't checked it will bring down the service
				75	if (!returned.isOk())
				76	{
				77	ALOGE("ArmnnDriverImpl::prepareModel: hidl callback failed to return properly: %s ",
				78	returned.description().c_str());
				79	}
				80	}
				81
				82	Return<V1_3::ErrorStatus> FailPrepareModel(V1_3::ErrorStatus error,
				83	const std::string& message,
				84	const sp<V1_3::IPreparedModelCallback>& callback)
				85	{
				86	ALOGW("ArmnnDriverImpl::prepareModel: %s", message.c_str());
				87	NotifyCallbackAndCheck(callback, error, nullptr);
				88	return error;
				89	}
				90
				91	} // anonymous namespace
				92
				93	namespace armnn_driver
				94	{
				95	namespace hal_1_3
				96	{
				97
				98	Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
				99	const armnn::IRuntimePtr& runtime,
				100	const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
				101	const DriverOptions& options,
				102	const V1_3::Model& model,
				103	const sp<V1_3::IPreparedModelCallback>& cb,
Narumol Prangnawarat	cad4e91	2020-06-02 12:07:43 +0100	[diff] [blame]	104	bool float32ToFloat16,
				105	V1_3::Priority priority)
Kevin May	42477c1	2020-03-26 13:34:14 +0000	[diff] [blame]	106	{
				107	ALOGV("ArmnnDriverImpl::prepareArmnnModel_1_3()");
				108
				109	if (cb.get() == nullptr)
				110	{
				111	ALOGW("ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel");
				112	return V1_3::ErrorStatus::INVALID_ARGUMENT;
				113	}
				114
				115	if (!runtime)
				116	{
				117	return FailPrepareModel(V1_3::ErrorStatus::DEVICE_UNAVAILABLE, "Device unavailable", cb);
				118	}
				119
				120	if (!android::nn::validateModel(model))
				121	{
				122	return FailPrepareModel(V1_3::ErrorStatus::INVALID_ARGUMENT, "Invalid model passed as input", cb);
				123	}
				124
				125	// Deliberately ignore any unsupported operations requested by the options -
				126	// at this point we're being asked to prepare a model that we've already declared support for
				127	// and the operation indices may be different to those in getSupportedOperations anyway.
				128	std::set<unsigned int> unsupportedOperations;
				129	ModelToINetworkConverter<HalPolicy> modelConverter(options.GetBackends(),
				130	model,
				131	unsupportedOperations);
				132
				133	if (modelConverter.GetConversionResult() != ConversionResult::Success)
				134	{
				135	FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "ModelToINetworkConverter failed", cb);
				136	return V1_3::ErrorStatus::NONE;
				137	}
				138
				139	// Optimize the network
				140	armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
				141	armnn::OptimizerOptions OptOptions;
				142	OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
				143
Mike Kelly	7ed56dd	2020-09-30 20:22:56 +0100	[diff] [blame^]	144	armnn::BackendOptions gpuAcc("GpuAcc",
				145	{
				146	{ "FastMathEnabled", options.IsFastMathEnabled() }
				147	});
				148	armnn::BackendOptions cpuAcc("CpuAcc",
				149	{
				150	{ "FastMathEnabled", options.IsFastMathEnabled() }
				151	});
				152	OptOptions.m_ModelOptions.push_back(gpuAcc);
				153	OptOptions.m_ModelOptions.push_back(cpuAcc);
				154
Kevin May	42477c1	2020-03-26 13:34:14 +0000	[diff] [blame]	155	std::vector<std::string> errMessages;
				156	try
				157	{
				158	optNet = armnn::Optimize(*modelConverter.GetINetwork(),
				159	options.GetBackends(),
				160	runtime->GetDeviceSpec(),
				161	OptOptions,
				162	errMessages);
				163	}
				164	catch (std::exception& e)
				165	{
				166	std::stringstream message;
				167	message << "Exception (" << e.what() << ") caught from optimize.";
				168	FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
				169	return V1_3::ErrorStatus::NONE;
				170	}
				171
				172	// Check that the optimized network is valid.
				173	if (!optNet)
				174	{
				175	std::stringstream message;
				176	message << "Invalid optimized network";
				177	for (const std::string& msg : errMessages)
				178	{
				179	message << "\n" << msg;
				180	}
				181	FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
				182	return V1_3::ErrorStatus::NONE;
				183	}
				184
				185	// Export the optimized network graph to a dot file if an output dump directory
				186	// has been specified in the drivers' arguments.
				187	std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
				188	options.GetRequestInputsAndOutputsDumpDir());
				189
				190	// Load it into the runtime.
				191	armnn::NetworkId netId = 0;
				192	try
				193	{
				194	if (runtime->LoadNetwork(netId, move(optNet)) != armnn::Status::Success)
				195	{
				196	return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb);
				197	}
				198	}
				199	catch (std::exception& e)
				200	{
				201	std::stringstream message;
				202	message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
				203	FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
				204	return V1_3::ErrorStatus::NONE;
				205	}
				206
				207	// Now that we have a networkId for the graph rename the dump file to use it
				208	// so that we can associate the graph file and the input/output tensor dump files
				209	RenameGraphDotFile(dotGraphFileName,
				210	options.GetRequestInputsAndOutputsDumpDir(),
				211	netId);
				212
				213	std::unique_ptr<ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>> preparedModel(
				214	new ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>(
				215	netId,
				216	runtime.get(),
				217	model,
				218	options.GetRequestInputsAndOutputsDumpDir(),
Narumol Prangnawarat	cad4e91	2020-06-02 12:07:43 +0100	[diff] [blame]	219	options.IsGpuProfilingEnabled(),
				220	priority));
Kevin May	42477c1	2020-03-26 13:34:14 +0000	[diff] [blame]	221
				222	// Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
				223	// this is enabled) before the first 'real' inference which removes the overhead of the first inference.
				224	if (!preparedModel->ExecuteWithDummyInputs())
				225	{
				226	return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
				227	}
				228
				229	if (clTunedParameters &&
				230	options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
				231	{
				232	// Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file.
				233	try
				234	{
				235	clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
				236	}
				237	catch (std::exception& error)
				238	{
				239	ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
				240	options.GetClTunedParametersFile().c_str(), error.what());
				241	}
				242	}
				243
				244	NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
				245
				246	return V1_3::ErrorStatus::NONE;
				247	}
				248
				249	Return<void> ArmnnDriverImpl::getCapabilities_1_3(const armnn::IRuntimePtr& runtime,
				250	V1_3::IDevice::getCapabilities_1_3_cb cb)
				251	{
				252	ALOGV("hal_1_3::ArmnnDriverImpl::getCapabilities()");
				253
				254	V1_3::Capabilities capabilities;
				255
				256	float defaultValue = .1f;
				257
				258	if (runtime)
				259	{
				260	capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime =
				261	ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);
				262
Kevin May	2eaa119	2020-04-15 16:50:57 +0100	[diff] [blame]	263	capabilities.relaxedFloat32toFloat16PerformanceScalar.powerUsage =
				264	ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue);
				265
				266	capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime =
				267	ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);
				268
Kevin May	42477c1	2020-03-26 13:34:14 +0000	[diff] [blame]	269	capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage =
				270	ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue);
				271
Kevin May	2eaa119	2020-04-15 16:50:57 +0100	[diff] [blame]	272	capabilities.ifPerformance.execTime =
				273	ParseSystemProperty(g_ifPerformanceExecTime, defaultValue);
				274
				275	capabilities.ifPerformance.powerUsage =
				276	ParseSystemProperty(g_ifPerformancePowerUsage, defaultValue);
				277
				278	capabilities.whilePerformance.execTime =
				279	ParseSystemProperty(g_whilePerformanceExecTime, defaultValue);
				280
				281	capabilities.whilePerformance.powerUsage =
				282	ParseSystemProperty(g_whilePerformancePowerUsage, defaultValue);
				283
Kevin May	42477c1	2020-03-26 13:34:14 +0000	[diff] [blame]	284	// Set the base value for all operand types
				285	capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_3>({FLT_MAX, FLT_MAX});
				286
				287	// Load supported operand types
				288	update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT32,
				289	{
				290	.execTime = ParseSystemProperty(g_OperandTypeTensorFloat32PerformanceExecTime, defaultValue),
				291	.powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat32PerformancePowerUsage, defaultValue)
				292	});
				293
				294	update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT32,
				295	{
				296	.execTime = ParseSystemProperty(g_OperandTypeFloat32PerformanceExecTime, defaultValue),
				297	.powerUsage = ParseSystemProperty(g_OperandTypeFloat32PerformancePowerUsage, defaultValue)
				298	});
				299
				300	update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT16,
				301	{
				302	.execTime = ParseSystemProperty(g_OperandTypeTensorFloat16PerformanceExecTime, defaultValue),
				303	.powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat16PerformancePowerUsage, defaultValue)
				304	});
				305
				306	update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT16,
				307	{
				308	.execTime = ParseSystemProperty(g_OperandTypeFloat16PerformanceExecTime, defaultValue),
				309	.powerUsage = ParseSystemProperty(g_OperandTypeFloat16PerformancePowerUsage, defaultValue)
				310	});
				311
				312	update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_ASYMM,
				313	{
				314	.execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformanceExecTime, defaultValue),
				315	.powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformancePowerUsage, defaultValue)
				316	});
				317
				318	update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_SYMM,
				319	{
				320	.execTime = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformanceExecTime, defaultValue),
				321	.powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformancePowerUsage, defaultValue)
				322	});
				323	update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_ASYMM_SIGNED,
				324	{
				325	.execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmSignedPerformanceExecTime,
				326	defaultValue),
				327	.powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmSignedPerformancePowerUsage,
				328	defaultValue)
				329	});
				330
				331	update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT16_SYMM,
				332	{
				333	.execTime = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformanceExecTime, defaultValue),
				334	.powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformancePowerUsage, defaultValue)
				335	});
				336
				337	update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
				338	{
				339	.execTime =
				340	ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime, defaultValue),
				341	.powerUsage =
				342	ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage, defaultValue)
				343	});
				344
				345	update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_INT32,
				346	{
				347	.execTime = ParseSystemProperty(g_OperandTypeTensorInt32PerformanceExecTime, defaultValue),
				348	.powerUsage = ParseSystemProperty(g_OperandTypeTensorInt32PerformancePowerUsage, defaultValue)
				349	});
				350
				351	update(&capabilities.operandPerformance, V1_3::OperandType::INT32,
				352	{
				353	.execTime = ParseSystemProperty(g_OperandTypeInt32PerformanceExecTime, defaultValue),
				354	.powerUsage = ParseSystemProperty(g_OperandTypeInt32PerformancePowerUsage, defaultValue)
				355	});
				356
				357	cb(V1_3::ErrorStatus::NONE, capabilities);
				358	}
				359	else
				360	{
Kevin May	2eaa119	2020-04-15 16:50:57 +0100	[diff] [blame]	361	capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime = 0;
				362	capabilities.relaxedFloat32toFloat16PerformanceScalar.powerUsage = 0;
				363	capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime = 0;
				364	capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage = 0;
				365	capabilities.ifPerformance.execTime = 0;
				366	capabilities.ifPerformance.powerUsage = 0;
				367	capabilities.whilePerformance.execTime = 0;
				368	capabilities.whilePerformance.powerUsage = 0;
Kevin May	42477c1	2020-03-26 13:34:14 +0000	[diff] [blame]	369
				370	// Set the base value for all operand types
				371	capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_3>({0.f, 0.0f});
				372
				373	cb(V1_3::ErrorStatus::DEVICE_UNAVAILABLE, capabilities);
				374	}
				375
				376	return Void();
				377	}
				378
				379	} // namespace hal_1_3
				380	} // namespace armnn_driver