Blame - 1.3/ArmnnDriverImpl.cpp - ml/android-nn-driver

blob: 98d038c9e50a92d166da6fb08c1c0ec242b81481 [file] [log] [blame]

Kevin May	42477c1	2020-03-26 13:34:14 +0000	[diff] [blame^]	1	//
				2	// Copyright © 2020 Arm Ltd. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5
				6	#include "ArmnnDriverImpl.hpp"
				7	#include "../ArmnnPreparedModel_1_3.hpp"
				8	#include "../ModelToINetworkConverter.hpp"
				9	#include "../SystemPropertiesUtils.hpp"
				10
				11	#include <log/log.h>
				12
				13	namespace
				14	{
				15
				16	const char *g_RelaxedFloat32toFloat16PerformanceExecTime = "ArmNN.relaxedFloat32toFloat16Performance.execTime";
				17	const char *g_RelaxedFloat32toFloat16PerformancePowerUsage = "ArmNN.relaxedFloat32toFloat16Performance.powerUsage";
				18
				19	const char *g_OperandTypeTensorFloat32PerformanceExecTime = "Armnn.operandTypeTensorFloat32Performance.execTime";
				20	const char *g_OperandTypeTensorFloat32PerformancePowerUsage = "Armnn.operandTypeTensorFloat32Performance.powerUsage";
				21
				22	const char *g_OperandTypeFloat32PerformanceExecTime = "Armnn.operandTypeFloat32Performance.execTime";
				23	const char *g_OperandTypeFloat32PerformancePowerUsage = "Armnn.operandTypeFloat32Performance.powerUsage";
				24
				25	const char *g_OperandTypeTensorFloat16PerformanceExecTime = "Armnn.operandTypeTensorFloat16Performance.execTime";
				26	const char *g_OperandTypeTensorFloat16PerformancePowerUsage = "Armnn.operandTypeTensorFloat16Performance.powerUsage";
				27
				28	const char *g_OperandTypeFloat16PerformanceExecTime = "Armnn.operandTypeFloat16Performance.execTime";
				29	const char *g_OperandTypeFloat16PerformancePowerUsage = "Armnn.operandTypeFloat16Performance.powerUsage";
				30
				31	const char *g_OperandTypeTensorQuant8AsymmPerformanceExecTime =
				32	"Armnn.operandTypeTensorQuant8AsymmPerformance.execTime";
				33	const char *g_OperandTypeTensorQuant8AsymmPerformancePowerUsage =
				34	"Armnn.operandTypeTensorQuant8AsymmPerformance.powerUsage";
				35
				36	const char *g_OperandTypeTensorQuant8AsymmSignedPerformanceExecTime =
				37	"Armnn.operandTypeTensorQuant8AsymmSignedPerformance.execTime";
				38	const char *g_OperandTypeTensorQuant8AsymmSignedPerformancePowerUsage =
				39	"Armnn.operandTypeTensorQuant8AsymmSignedPerformance.powerUsage";
				40
				41	const char *g_OperandTypeTensorQuant16SymmPerformanceExecTime =
				42	"Armnn.operandTypeTensorQuant16SymmPerformance.execTime";
				43	const char *g_OperandTypeTensorQuant16SymmPerformancePowerUsage =
				44	"Armnn.operandTypeTensorQuant16SymmPerformance.powerUsage";
				45
				46	const char *g_OperandTypeTensorQuant8SymmPerformanceExecTime =
				47	"Armnn.operandTypeTensorQuant8SymmPerformance.execTime";
				48	const char *g_OperandTypeTensorQuant8SymmPerformancePowerUsage =
				49	"Armnn.operandTypeTensorQuant8SymmPerformance.powerUsage";
				50
				51	const char *g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime =
				52	"Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.execTime";
				53	const char *g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage =
				54	"Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.powerUsage";
				55
				56
				57	const char *g_OperandTypeTensorInt32PerformanceExecTime = "Armnn.operandTypeTensorInt32Performance.execTime";
				58	const char *g_OperandTypeTensorInt32PerformancePowerUsage = "Armnn.operandTypeTensorInt32Performance.powerUsage";
				59
				60	const char *g_OperandTypeInt32PerformanceExecTime = "Armnn.operandTypeInt32Performance.execTime";
				61	const char *g_OperandTypeInt32PerformancePowerUsage = "Armnn.operandTypeInt32Performance.powerUsage";
				62
				63
				64	void NotifyCallbackAndCheck(const sp<V1_3::IPreparedModelCallback>& callback,
				65	V1_3::ErrorStatus errorStatus,
				66	const sp<V1_3::IPreparedModel>& preparedModelPtr)
				67	{
				68	Return<void> returned = callback->notify_1_3(errorStatus, preparedModelPtr);
				69	// This check is required, if the callback fails and it isn't checked it will bring down the service
				70	if (!returned.isOk())
				71	{
				72	ALOGE("ArmnnDriverImpl::prepareModel: hidl callback failed to return properly: %s ",
				73	returned.description().c_str());
				74	}
				75	}
				76
				77	Return<V1_3::ErrorStatus> FailPrepareModel(V1_3::ErrorStatus error,
				78	const std::string& message,
				79	const sp<V1_3::IPreparedModelCallback>& callback)
				80	{
				81	ALOGW("ArmnnDriverImpl::prepareModel: %s", message.c_str());
				82	NotifyCallbackAndCheck(callback, error, nullptr);
				83	return error;
				84	}
				85
				86	} // anonymous namespace
				87
				88	namespace armnn_driver
				89	{
				90	namespace hal_1_3
				91	{
				92
				93	Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
				94	const armnn::IRuntimePtr& runtime,
				95	const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
				96	const DriverOptions& options,
				97	const V1_3::Model& model,
				98	const sp<V1_3::IPreparedModelCallback>& cb,
				99	bool float32ToFloat16)
				100	{
				101	ALOGV("ArmnnDriverImpl::prepareArmnnModel_1_3()");
				102
				103	if (cb.get() == nullptr)
				104	{
				105	ALOGW("ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel");
				106	return V1_3::ErrorStatus::INVALID_ARGUMENT;
				107	}
				108
				109	if (!runtime)
				110	{
				111	return FailPrepareModel(V1_3::ErrorStatus::DEVICE_UNAVAILABLE, "Device unavailable", cb);
				112	}
				113
				114	if (!android::nn::validateModel(model))
				115	{
				116	return FailPrepareModel(V1_3::ErrorStatus::INVALID_ARGUMENT, "Invalid model passed as input", cb);
				117	}
				118
				119	// Deliberately ignore any unsupported operations requested by the options -
				120	// at this point we're being asked to prepare a model that we've already declared support for
				121	// and the operation indices may be different to those in getSupportedOperations anyway.
				122	std::set<unsigned int> unsupportedOperations;
				123	ModelToINetworkConverter<HalPolicy> modelConverter(options.GetBackends(),
				124	model,
				125	unsupportedOperations);
				126
				127	if (modelConverter.GetConversionResult() != ConversionResult::Success)
				128	{
				129	FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "ModelToINetworkConverter failed", cb);
				130	return V1_3::ErrorStatus::NONE;
				131	}
				132
				133	// Optimize the network
				134	armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
				135	armnn::OptimizerOptions OptOptions;
				136	OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
				137
				138	std::vector<std::string> errMessages;
				139	try
				140	{
				141	optNet = armnn::Optimize(*modelConverter.GetINetwork(),
				142	options.GetBackends(),
				143	runtime->GetDeviceSpec(),
				144	OptOptions,
				145	errMessages);
				146	}
				147	catch (std::exception& e)
				148	{
				149	std::stringstream message;
				150	message << "Exception (" << e.what() << ") caught from optimize.";
				151	FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
				152	return V1_3::ErrorStatus::NONE;
				153	}
				154
				155	// Check that the optimized network is valid.
				156	if (!optNet)
				157	{
				158	std::stringstream message;
				159	message << "Invalid optimized network";
				160	for (const std::string& msg : errMessages)
				161	{
				162	message << "\n" << msg;
				163	}
				164	FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
				165	return V1_3::ErrorStatus::NONE;
				166	}
				167
				168	// Export the optimized network graph to a dot file if an output dump directory
				169	// has been specified in the drivers' arguments.
				170	std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
				171	options.GetRequestInputsAndOutputsDumpDir());
				172
				173	// Load it into the runtime.
				174	armnn::NetworkId netId = 0;
				175	try
				176	{
				177	if (runtime->LoadNetwork(netId, move(optNet)) != armnn::Status::Success)
				178	{
				179	return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb);
				180	}
				181	}
				182	catch (std::exception& e)
				183	{
				184	std::stringstream message;
				185	message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
				186	FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
				187	return V1_3::ErrorStatus::NONE;
				188	}
				189
				190	// Now that we have a networkId for the graph rename the dump file to use it
				191	// so that we can associate the graph file and the input/output tensor dump files
				192	RenameGraphDotFile(dotGraphFileName,
				193	options.GetRequestInputsAndOutputsDumpDir(),
				194	netId);
				195
				196	std::unique_ptr<ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>> preparedModel(
				197	new ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>(
				198	netId,
				199	runtime.get(),
				200	model,
				201	options.GetRequestInputsAndOutputsDumpDir(),
				202	options.IsGpuProfilingEnabled()));
				203
				204	// Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
				205	// this is enabled) before the first 'real' inference which removes the overhead of the first inference.
				206	if (!preparedModel->ExecuteWithDummyInputs())
				207	{
				208	return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
				209	}
				210
				211	if (clTunedParameters &&
				212	options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
				213	{
				214	// Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file.
				215	try
				216	{
				217	clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
				218	}
				219	catch (std::exception& error)
				220	{
				221	ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
				222	options.GetClTunedParametersFile().c_str(), error.what());
				223	}
				224	}
				225
				226	NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
				227
				228	return V1_3::ErrorStatus::NONE;
				229	}
				230
				231	Return<void> ArmnnDriverImpl::getCapabilities_1_3(const armnn::IRuntimePtr& runtime,
				232	V1_3::IDevice::getCapabilities_1_3_cb cb)
				233	{
				234	ALOGV("hal_1_3::ArmnnDriverImpl::getCapabilities()");
				235
				236	V1_3::Capabilities capabilities;
				237
				238	float defaultValue = .1f;
				239
				240	if (runtime)
				241	{
				242	capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime =
				243	ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);
				244
				245	capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage =
				246	ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue);
				247
				248	// Set the base value for all operand types
				249	capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_3>({FLT_MAX, FLT_MAX});
				250
				251	// Load supported operand types
				252	update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT32,
				253	{
				254	.execTime = ParseSystemProperty(g_OperandTypeTensorFloat32PerformanceExecTime, defaultValue),
				255	.powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat32PerformancePowerUsage, defaultValue)
				256	});
				257
				258	update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT32,
				259	{
				260	.execTime = ParseSystemProperty(g_OperandTypeFloat32PerformanceExecTime, defaultValue),
				261	.powerUsage = ParseSystemProperty(g_OperandTypeFloat32PerformancePowerUsage, defaultValue)
				262	});
				263
				264	update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT16,
				265	{
				266	.execTime = ParseSystemProperty(g_OperandTypeTensorFloat16PerformanceExecTime, defaultValue),
				267	.powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat16PerformancePowerUsage, defaultValue)
				268	});
				269
				270	update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT16,
				271	{
				272	.execTime = ParseSystemProperty(g_OperandTypeFloat16PerformanceExecTime, defaultValue),
				273	.powerUsage = ParseSystemProperty(g_OperandTypeFloat16PerformancePowerUsage, defaultValue)
				274	});
				275
				276	update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_ASYMM,
				277	{
				278	.execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformanceExecTime, defaultValue),
				279	.powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformancePowerUsage, defaultValue)
				280	});
				281
				282	update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_SYMM,
				283	{
				284	.execTime = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformanceExecTime, defaultValue),
				285	.powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformancePowerUsage, defaultValue)
				286	});
				287	update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_ASYMM_SIGNED,
				288	{
				289	.execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmSignedPerformanceExecTime,
				290	defaultValue),
				291	.powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmSignedPerformancePowerUsage,
				292	defaultValue)
				293	});
				294
				295	update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT16_SYMM,
				296	{
				297	.execTime = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformanceExecTime, defaultValue),
				298	.powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformancePowerUsage, defaultValue)
				299	});
				300
				301	update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
				302	{
				303	.execTime =
				304	ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime, defaultValue),
				305	.powerUsage =
				306	ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage, defaultValue)
				307	});
				308
				309	update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_INT32,
				310	{
				311	.execTime = ParseSystemProperty(g_OperandTypeTensorInt32PerformanceExecTime, defaultValue),
				312	.powerUsage = ParseSystemProperty(g_OperandTypeTensorInt32PerformancePowerUsage, defaultValue)
				313	});
				314
				315	update(&capabilities.operandPerformance, V1_3::OperandType::INT32,
				316	{
				317	.execTime = ParseSystemProperty(g_OperandTypeInt32PerformanceExecTime, defaultValue),
				318	.powerUsage = ParseSystemProperty(g_OperandTypeInt32PerformancePowerUsage, defaultValue)
				319	});
				320
				321	cb(V1_3::ErrorStatus::NONE, capabilities);
				322	}
				323	else
				324	{
				325	capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime = 0;
				326	capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime = 0;
				327
				328	// Set the base value for all operand types
				329	capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_3>({0.f, 0.0f});
				330
				331	cb(V1_3::ErrorStatus::DEVICE_UNAVAILABLE, capabilities);
				332	}
				333
				334	return Void();
				335	}
				336
				337	} // namespace hal_1_3
				338	} // namespace armnn_driver