Blame - 1.2/ArmnnDriverImpl.cpp - ml/android-nn-driver

blob: 8a444e5dfd3c9f84ce4fdf2113d12c315f479430 [file] [log] [blame]

Mike Kelly	b5fdf38	2019-06-11 16:35:25 +0100	[diff] [blame]	1	//
				2	// Copyright © 2017 Arm Ltd. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5
				6	#include "ArmnnDriverImpl.hpp"
				7	#include "../ArmnnPreparedModel_1_2.hpp"
				8	#include "../ModelToINetworkConverter.hpp"
				9	#include "../SystemPropertiesUtils.hpp"
				10
				11	#include <log/log.h>
				12
				13	namespace
				14	{
				15
Ferran Balaguer	d7c8eb9	2019-07-01 13:37:44 +0100	[diff] [blame]	16	const char *g_RelaxedFloat32toFloat16PerformanceExecTime = "ArmNN.relaxedFloat32toFloat16Performance.execTime";
FinnWilliamsArm	df655ee	2019-07-24 16:04:18 +0100	[diff] [blame]	17	const char *g_RelaxedFloat32toFloat16PerformancePowerUsage = "ArmNN.relaxedFloat32toFloat16Performance.powerUsage";
Ferran Balaguer	d7c8eb9	2019-07-01 13:37:44 +0100	[diff] [blame]	18
				19	const char *g_OperandTypeTensorFloat32PerformanceExecTime = "Armnn.operandTypeTensorFloat32Performance.execTime";
				20	const char *g_OperandTypeTensorFloat32PerformancePowerUsage = "Armnn.operandTypeTensorFloat32Performance.powerUsage";
				21
				22	const char *g_OperandTypeFloat32PerformanceExecTime = "Armnn.operandTypeFloat32Performance.execTime";
				23	const char *g_OperandTypeFloat32PerformancePowerUsage = "Armnn.operandTypeFloat32Performance.powerUsage";
				24
				25	const char *g_OperandTypeTensorFloat16PerformanceExecTime = "Armnn.operandTypeTensorFloat16Performance.execTime";
				26	const char *g_OperandTypeTensorFloat16PerformancePowerUsage = "Armnn.operandTypeTensorFloat16Performance.powerUsage";
				27
				28	const char *g_OperandTypeFloat16PerformanceExecTime = "Armnn.operandTypeFloat16Performance.execTime";
				29	const char *g_OperandTypeFloat16PerformancePowerUsage = "Armnn.operandTypeFloat16Performance.powerUsage";
				30
				31	const char *g_OperandTypeTensorQuant8AsymmPerformanceExecTime =
				32	"Armnn.operandTypeTensorQuant8AsymmPerformance.execTime";
				33	const char *g_OperandTypeTensorQuant8AsymmPerformancePowerUsage =
				34	"Armnn.operandTypeTensorQuant8AsymmPerformance.powerUsage";
				35
				36	const char *g_OperandTypeTensorQuant16SymmPerformanceExecTime =
				37	"Armnn.operandTypeTensorQuant16SymmPerformance.execTime";
				38	const char *g_OperandTypeTensorQuant16SymmPerformancePowerUsage =
				39	"Armnn.operandTypeTensorQuant16SymmPerformance.powerUsage";
				40
				41	const char *g_OperandTypeTensorInt32PerformanceExecTime = "Armnn.operandTypeTensorInt32Performance.execTime";
				42	const char *g_OperandTypeTensorInt32PerformancePowerUsage = "Armnn.operandTypeTensorInt32Performance.powerUsage";
				43
				44	const char *g_OperandTypeInt32PerformanceExecTime = "Armnn.operandTypeInt32Performance.execTime";
				45	const char *g_OperandTypeInt32PerformancePowerUsage = "Armnn.operandTypeInt32Performance.powerUsage";
				46
				47
Mike Kelly	b5fdf38	2019-06-11 16:35:25 +0100	[diff] [blame]	48	void NotifyCallbackAndCheck(const sp<V1_2::IPreparedModelCallback>& callback,
				49	ErrorStatus errorStatus,
				50	const sp<V1_2::IPreparedModel>& preparedModelPtr)
				51	{
Ferran Balaguer	b2397fd	2019-07-25 12:12:39 +0100	[diff] [blame^]	52	Return<void> returned = callback->notify_1_2(errorStatus, preparedModelPtr);
Mike Kelly	b5fdf38	2019-06-11 16:35:25 +0100	[diff] [blame]	53	// This check is required, if the callback fails and it isn't checked it will bring down the service
				54	if (!returned.isOk())
				55	{
				56	ALOGE("ArmnnDriverImpl::prepareModel: hidl callback failed to return properly: %s ",
				57	returned.description().c_str());
				58	}
				59	}
				60
				61	Return<ErrorStatus> FailPrepareModel(ErrorStatus error,
				62	const std::string& message,
				63	const sp<V1_2::IPreparedModelCallback>& callback)
				64	{
				65	ALOGW("ArmnnDriverImpl::prepareModel: %s", message.c_str());
				66	NotifyCallbackAndCheck(callback, error, nullptr);
				67	return error;
				68	}
				69
				70	} // anonymous namespace
				71
				72	namespace armnn_driver
				73	{
				74	namespace hal_1_2
				75	{
				76
				77	Return<ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_2(const armnn::IRuntimePtr& runtime,
				78	const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
				79	const DriverOptions& options,
				80	const V1_2::Model& model,
				81	const sp<V1_2::IPreparedModelCallback>& cb,
				82	bool float32ToFloat16)
				83	{
Matteo Martincigh	0bd89a8	2019-07-02 16:53:10 +0100	[diff] [blame]	84	ALOGV("ArmnnDriverImpl::prepareArmnnModel_1_2()");
Mike Kelly	b5fdf38	2019-06-11 16:35:25 +0100	[diff] [blame]	85
				86	if (cb.get() == nullptr)
				87	{
				88	ALOGW("ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel");
				89	return ErrorStatus::INVALID_ARGUMENT;
				90	}
				91
				92	if (!runtime)
				93	{
				94	return FailPrepareModel(ErrorStatus::DEVICE_UNAVAILABLE, "Device unavailable", cb);
				95	}
				96
				97	if (!android::nn::validateModel(model))
				98	{
				99	return FailPrepareModel(ErrorStatus::INVALID_ARGUMENT, "Invalid model passed as input", cb);
				100	}
				101
				102	// Deliberately ignore any unsupported operations requested by the options -
				103	// at this point we're being asked to prepare a model that we've already declared support for
				104	// and the operation indices may be different to those in getSupportedOperations anyway.
				105	std::set<unsigned int> unsupportedOperations;
				106	ModelToINetworkConverter<HalPolicy> modelConverter(options.GetBackends(),
				107	model,
				108	unsupportedOperations);
				109
				110	if (modelConverter.GetConversionResult() != ConversionResult::Success)
				111	{
				112	FailPrepareModel(ErrorStatus::GENERAL_FAILURE, "ModelToINetworkConverter failed", cb);
				113	return ErrorStatus::NONE;
				114	}
				115
				116	// Optimize the network
				117	armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
				118	armnn::OptimizerOptions OptOptions;
				119	OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
				120
				121	std::vector<std::string> errMessages;
				122	try
				123	{
				124	optNet = armnn::Optimize(*modelConverter.GetINetwork(),
				125	options.GetBackends(),
				126	runtime->GetDeviceSpec(),
				127	OptOptions,
				128	errMessages);
				129	}
				130	catch (armnn::Exception &e)
				131	{
				132	std::stringstream message;
				133	message << "armnn::Exception (" << e.what() << ") caught from optimize.";
				134	FailPrepareModel(ErrorStatus::GENERAL_FAILURE, message.str(), cb);
				135	return ErrorStatus::NONE;
				136	}
				137
				138	// Check that the optimized network is valid.
				139	if (!optNet)
				140	{
				141	std::stringstream message;
				142	message << "Invalid optimized network";
				143	for (const std::string& msg : errMessages)
				144	{
				145	message << "\n" << msg;
				146	}
				147	FailPrepareModel(ErrorStatus::GENERAL_FAILURE, message.str(), cb);
				148	return ErrorStatus::NONE;
				149	}
				150
				151	// Export the optimized network graph to a dot file if an output dump directory
				152	// has been specified in the drivers' arguments.
				153	ExportNetworkGraphToDotFile<hal_1_2::HalPolicy::Model>(*optNet, options.GetRequestInputsAndOutputsDumpDir(),
				154	model);
				155
				156	// Load it into the runtime.
				157	armnn::NetworkId netId = 0;
				158	try
				159	{
				160	if (runtime->LoadNetwork(netId, move(optNet)) != armnn::Status::Success)
				161	{
				162	return FailPrepareModel(ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb);
				163	}
				164	}
				165	catch (armnn::Exception& e)
				166	{
				167	std::stringstream message;
				168	message << "armnn::Exception (" << e.what()<< ") caught from LoadNetwork.";
				169	FailPrepareModel(ErrorStatus::GENERAL_FAILURE, message.str(), cb);
				170	return ErrorStatus::NONE;
				171	}
				172
				173	std::unique_ptr<ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>> preparedModel(
				174	new ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>(
				175	netId,
				176	runtime.get(),
				177	model,
				178	options.GetRequestInputsAndOutputsDumpDir(),
				179	options.IsGpuProfilingEnabled()));
				180
				181	// Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
				182	// this is enabled) before the first 'real' inference which removes the overhead of the first inference.
				183	if (!preparedModel->ExecuteWithDummyInputs())
				184	{
				185	return FailPrepareModel(ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
				186	}
				187
				188	if (clTunedParameters &&
				189	options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
				190	{
				191	// Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file.
				192	try
				193	{
				194	clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
				195	}
				196	catch (const armnn::Exception& error)
				197	{
				198	ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
				199	options.GetClTunedParametersFile().c_str(), error.what());
				200	}
				201	}
				202
				203	NotifyCallbackAndCheck(cb, ErrorStatus::NONE, preparedModel.release());
				204
				205	return ErrorStatus::NONE;
				206	}
				207
				208	Return<void> ArmnnDriverImpl::getCapabilities_1_2(const armnn::IRuntimePtr& runtime,
				209	V1_2::IDevice::getCapabilities_1_2_cb cb)
				210	{
				211	ALOGV("hal_1_2::ArmnnDriverImpl::getCapabilities()");
				212
				213	V1_2::Capabilities capabilities;
				214
Ferran Balaguer	d7c8eb9	2019-07-01 13:37:44 +0100	[diff] [blame]	215	float defaultValue = .1f;
				216
Mike Kelly	b5fdf38	2019-06-11 16:35:25 +0100	[diff] [blame]	217	if (runtime)
				218	{
				219	capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime =
Ferran Balaguer	d7c8eb9	2019-07-01 13:37:44 +0100	[diff] [blame]	220	ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);
Mike Kelly	b5fdf38	2019-06-11 16:35:25 +0100	[diff] [blame]	221
FinnWilliamsArm	df655ee	2019-07-24 16:04:18 +0100	[diff] [blame]	222	capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage =
				223	ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue);
Ferran Balaguer	d7c8eb9	2019-07-01 13:37:44 +0100	[diff] [blame]	224
				225	// Set the base value for all operand types
				226	capabilities.operandPerformance = nonExtensionOperandPerformance({FLT_MAX, FLT_MAX});
				227
				228	// Load supported operand types
				229	update(&capabilities.operandPerformance, OperandType::TENSOR_FLOAT32,
				230	{
				231	.execTime = ParseSystemProperty(g_OperandTypeTensorFloat32PerformanceExecTime, defaultValue),
				232	.powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat32PerformancePowerUsage, defaultValue)
				233	});
				234
				235	update(&capabilities.operandPerformance, OperandType::FLOAT32,
				236	{
				237	.execTime = ParseSystemProperty(g_OperandTypeFloat32PerformanceExecTime, defaultValue),
				238	.powerUsage = ParseSystemProperty(g_OperandTypeFloat32PerformancePowerUsage, defaultValue)
				239	});
				240
				241	update(&capabilities.operandPerformance, OperandType::TENSOR_FLOAT16,
				242	{
				243	.execTime = ParseSystemProperty(g_OperandTypeTensorFloat16PerformanceExecTime, defaultValue),
				244	.powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat16PerformancePowerUsage, defaultValue)
				245	});
				246
				247	update(&capabilities.operandPerformance, OperandType::FLOAT16,
				248	{
				249	.execTime = ParseSystemProperty(g_OperandTypeFloat16PerformanceExecTime, defaultValue),
				250	.powerUsage = ParseSystemProperty(g_OperandTypeFloat16PerformancePowerUsage, defaultValue)
				251	});
				252
				253	update(&capabilities.operandPerformance, OperandType::TENSOR_QUANT8_ASYMM,
				254	{
				255	.execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformanceExecTime, defaultValue),
				256	.powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformancePowerUsage, defaultValue)
				257	});
				258
				259	update(&capabilities.operandPerformance, OperandType::TENSOR_QUANT16_SYMM,
				260	{
				261	.execTime = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformanceExecTime, defaultValue),
				262	.powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformancePowerUsage, defaultValue)
				263	});
				264
				265	update(&capabilities.operandPerformance, OperandType::TENSOR_INT32,
				266	{
				267	.execTime = ParseSystemProperty(g_OperandTypeTensorInt32PerformanceExecTime, defaultValue),
				268	.powerUsage = ParseSystemProperty(g_OperandTypeTensorInt32PerformancePowerUsage, defaultValue)
				269	});
				270
				271	update(&capabilities.operandPerformance, OperandType::INT32,
				272	{
				273	.execTime = ParseSystemProperty(g_OperandTypeInt32PerformanceExecTime, defaultValue),
				274	.powerUsage = ParseSystemProperty(g_OperandTypeInt32PerformancePowerUsage, defaultValue)
				275	});
Mike Kelly	b5fdf38	2019-06-11 16:35:25 +0100	[diff] [blame]	276
				277	cb(ErrorStatus::NONE, capabilities);
				278	}
				279	else
				280	{
				281	capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime = 0;
				282	capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime = 0;
				283
Ferran Balaguer	d7c8eb9	2019-07-01 13:37:44 +0100	[diff] [blame]	284	// Set the base value for all operand types
				285	capabilities.operandPerformance = nonExtensionOperandPerformance({0.f, 0.0f});
				286
Mike Kelly	b5fdf38	2019-06-11 16:35:25 +0100	[diff] [blame]	287	cb(ErrorStatus::DEVICE_UNAVAILABLE, capabilities);
				288	}
				289
				290	return Void();
				291	}
				292
				293	} // namespace hal_1_2
Matteo Martincigh	0bd89a8	2019-07-02 16:53:10 +0100	[diff] [blame]	294	} // namespace armnn_driver