blob: 3ecd2f824531d5d955b9d45d58bc2166a2e86652 [file] [log] [blame]
Kevin May42477c12020-03-26 13:34:14 +00001//
2// Copyright © 2020 Arm Ltd. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#include "ArmnnDriverImpl.hpp"
7#include "../ArmnnPreparedModel_1_3.hpp"
8#include "../ModelToINetworkConverter.hpp"
9#include "../SystemPropertiesUtils.hpp"
10
11#include <log/log.h>
12
13namespace
14{
Kevin May42477c12020-03-26 13:34:14 +000015const char *g_RelaxedFloat32toFloat16PerformanceExecTime = "ArmNN.relaxedFloat32toFloat16Performance.execTime";
16const char *g_RelaxedFloat32toFloat16PerformancePowerUsage = "ArmNN.relaxedFloat32toFloat16Performance.powerUsage";
17
Kevin May2eaa1192020-04-15 16:50:57 +010018const char *g_ifPerformanceExecTime = "ArmNN.ifPerformance.execTime";
19const char *g_ifPerformancePowerUsage = "ArmNN.ifPerformance.powerUsage";
20
21const char *g_whilePerformanceExecTime = "ArmNN.whilePerformance.execTime";
22const char *g_whilePerformancePowerUsage = "ArmNN.whilePerformance.powerUsage";
23
Kevin May42477c12020-03-26 13:34:14 +000024const char *g_OperandTypeTensorFloat32PerformanceExecTime = "Armnn.operandTypeTensorFloat32Performance.execTime";
25const char *g_OperandTypeTensorFloat32PerformancePowerUsage = "Armnn.operandTypeTensorFloat32Performance.powerUsage";
26
27const char *g_OperandTypeFloat32PerformanceExecTime = "Armnn.operandTypeFloat32Performance.execTime";
28const char *g_OperandTypeFloat32PerformancePowerUsage = "Armnn.operandTypeFloat32Performance.powerUsage";
29
30const char *g_OperandTypeTensorFloat16PerformanceExecTime = "Armnn.operandTypeTensorFloat16Performance.execTime";
31const char *g_OperandTypeTensorFloat16PerformancePowerUsage = "Armnn.operandTypeTensorFloat16Performance.powerUsage";
32
33const char *g_OperandTypeFloat16PerformanceExecTime = "Armnn.operandTypeFloat16Performance.execTime";
34const char *g_OperandTypeFloat16PerformancePowerUsage = "Armnn.operandTypeFloat16Performance.powerUsage";
35
36const char *g_OperandTypeTensorQuant8AsymmPerformanceExecTime =
37 "Armnn.operandTypeTensorQuant8AsymmPerformance.execTime";
38const char *g_OperandTypeTensorQuant8AsymmPerformancePowerUsage =
39 "Armnn.operandTypeTensorQuant8AsymmPerformance.powerUsage";
40
41const char *g_OperandTypeTensorQuant8AsymmSignedPerformanceExecTime =
42 "Armnn.operandTypeTensorQuant8AsymmSignedPerformance.execTime";
43const char *g_OperandTypeTensorQuant8AsymmSignedPerformancePowerUsage =
44 "Armnn.operandTypeTensorQuant8AsymmSignedPerformance.powerUsage";
45
46const char *g_OperandTypeTensorQuant16SymmPerformanceExecTime =
47 "Armnn.operandTypeTensorQuant16SymmPerformance.execTime";
48const char *g_OperandTypeTensorQuant16SymmPerformancePowerUsage =
49 "Armnn.operandTypeTensorQuant16SymmPerformance.powerUsage";
50
51const char *g_OperandTypeTensorQuant8SymmPerformanceExecTime =
52 "Armnn.operandTypeTensorQuant8SymmPerformance.execTime";
53const char *g_OperandTypeTensorQuant8SymmPerformancePowerUsage =
54 "Armnn.operandTypeTensorQuant8SymmPerformance.powerUsage";
55
56const char *g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime =
57 "Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.execTime";
58const char *g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage =
59 "Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.powerUsage";
60
61
62const char *g_OperandTypeTensorInt32PerformanceExecTime = "Armnn.operandTypeTensorInt32Performance.execTime";
63const char *g_OperandTypeTensorInt32PerformancePowerUsage = "Armnn.operandTypeTensorInt32Performance.powerUsage";
64
65const char *g_OperandTypeInt32PerformanceExecTime = "Armnn.operandTypeInt32Performance.execTime";
66const char *g_OperandTypeInt32PerformancePowerUsage = "Armnn.operandTypeInt32Performance.powerUsage";
67
68
Sadik Armagan188675f2021-02-12 17:16:42 +000069void NotifyCallbackAndCheck(const android::sp<V1_3::IPreparedModelCallback>& callback,
Kevin May42477c12020-03-26 13:34:14 +000070 V1_3::ErrorStatus errorStatus,
Sadik Armagan188675f2021-02-12 17:16:42 +000071 const android::sp<V1_3::IPreparedModel>& preparedModelPtr)
Kevin May42477c12020-03-26 13:34:14 +000072{
73 Return<void> returned = callback->notify_1_3(errorStatus, preparedModelPtr);
74 // This check is required, if the callback fails and it isn't checked it will bring down the service
75 if (!returned.isOk())
76 {
77 ALOGE("ArmnnDriverImpl::prepareModel: hidl callback failed to return properly: %s ",
78 returned.description().c_str());
79 }
80}
81
82Return<V1_3::ErrorStatus> FailPrepareModel(V1_3::ErrorStatus error,
83 const std::string& message,
Sadik Armagan188675f2021-02-12 17:16:42 +000084 const android::sp<V1_3::IPreparedModelCallback>& callback)
Kevin May42477c12020-03-26 13:34:14 +000085{
86 ALOGW("ArmnnDriverImpl::prepareModel: %s", message.c_str());
87 NotifyCallbackAndCheck(callback, error, nullptr);
88 return error;
89}
90
91} // anonymous namespace
92
93namespace armnn_driver
94{
95namespace hal_1_3
96{
97
98Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
99 const armnn::IRuntimePtr& runtime,
100 const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
101 const DriverOptions& options,
102 const V1_3::Model& model,
Sadik Armagan188675f2021-02-12 17:16:42 +0000103 const android::sp<V1_3::IPreparedModelCallback>& cb,
Narumol Prangnawaratcad4e912020-06-02 12:07:43 +0100104 bool float32ToFloat16,
105 V1_3::Priority priority)
Kevin May42477c12020-03-26 13:34:14 +0000106{
107 ALOGV("ArmnnDriverImpl::prepareArmnnModel_1_3()");
108
109 if (cb.get() == nullptr)
110 {
111 ALOGW("ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel");
112 return V1_3::ErrorStatus::INVALID_ARGUMENT;
113 }
114
115 if (!runtime)
116 {
117 return FailPrepareModel(V1_3::ErrorStatus::DEVICE_UNAVAILABLE, "Device unavailable", cb);
118 }
119
120 if (!android::nn::validateModel(model))
121 {
122 return FailPrepareModel(V1_3::ErrorStatus::INVALID_ARGUMENT, "Invalid model passed as input", cb);
123 }
124
125 // Deliberately ignore any unsupported operations requested by the options -
126 // at this point we're being asked to prepare a model that we've already declared support for
127 // and the operation indices may be different to those in getSupportedOperations anyway.
128 std::set<unsigned int> unsupportedOperations;
129 ModelToINetworkConverter<HalPolicy> modelConverter(options.GetBackends(),
130 model,
131 unsupportedOperations);
132
133 if (modelConverter.GetConversionResult() != ConversionResult::Success)
134 {
135 FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "ModelToINetworkConverter failed", cb);
136 return V1_3::ErrorStatus::NONE;
137 }
138
Sadik Armaganb3021432021-01-13 15:56:51 +0000139 // Serialize the network graph to a .armnn file if an output directory
140 // has been specified in the drivers' arguments.
141 auto serializedNetworkFileName =
142 SerializeNetwork(*modelConverter.GetINetwork(), options.GetRequestInputsAndOutputsDumpDir());
143
Kevin May42477c12020-03-26 13:34:14 +0000144 // Optimize the network
145 armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
146 armnn::OptimizerOptions OptOptions;
147 OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
Kevin Maydaf7dd02021-10-22 11:57:30 +0100148 OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled();
Kevin May42477c12020-03-26 13:34:14 +0000149
Mike Kelly7ed56dd2020-09-30 20:22:56 +0100150 armnn::BackendOptions gpuAcc("GpuAcc",
151 {
Sadik Armaganf36e10b2021-01-11 16:34:01 +0000152 { "FastMathEnabled", options.IsFastMathEnabled() },
153 { "SaveCachedNetwork", options.SaveCachedNetwork() },
Finn Williamsf5ca16c2021-02-12 14:26:23 +0000154 { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
155 { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() }
Mike Kelly7ed56dd2020-09-30 20:22:56 +0100156 });
Finn Williamsf5ca16c2021-02-12 14:26:23 +0000157
Mike Kelly7ed56dd2020-09-30 20:22:56 +0100158 armnn::BackendOptions cpuAcc("CpuAcc",
159 {
Matthew Sloyancd639c92021-02-11 16:57:38 +0000160 { "FastMathEnabled", options.IsFastMathEnabled() },
161 { "NumberOfThreads", options.GetNumberOfThreads() }
Mike Kelly7ed56dd2020-09-30 20:22:56 +0100162 });
163 OptOptions.m_ModelOptions.push_back(gpuAcc);
164 OptOptions.m_ModelOptions.push_back(cpuAcc);
165
Kevin May42477c12020-03-26 13:34:14 +0000166 std::vector<std::string> errMessages;
167 try
168 {
169 optNet = armnn::Optimize(*modelConverter.GetINetwork(),
170 options.GetBackends(),
171 runtime->GetDeviceSpec(),
172 OptOptions,
173 errMessages);
174 }
175 catch (std::exception& e)
176 {
177 std::stringstream message;
178 message << "Exception (" << e.what() << ") caught from optimize.";
179 FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
180 return V1_3::ErrorStatus::NONE;
181 }
182
183 // Check that the optimized network is valid.
184 if (!optNet)
185 {
186 std::stringstream message;
187 message << "Invalid optimized network";
188 for (const std::string& msg : errMessages)
189 {
190 message << "\n" << msg;
191 }
192 FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
193 return V1_3::ErrorStatus::NONE;
194 }
195
196 // Export the optimized network graph to a dot file if an output dump directory
197 // has been specified in the drivers' arguments.
198 std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
199 options.GetRequestInputsAndOutputsDumpDir());
200
201 // Load it into the runtime.
202 armnn::NetworkId netId = 0;
Finn Williamsd8fb5402021-05-19 20:52:00 +0100203 std::string msg;
204 armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
205 MemorySource::Undefined,
Finn Williamsca3a3e02021-06-11 15:04:02 +0100206 MemorySource::Undefined);
Kevin May42477c12020-03-26 13:34:14 +0000207 try
208 {
Finn Williamsd8fb5402021-05-19 20:52:00 +0100209 if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
Kevin May42477c12020-03-26 13:34:14 +0000210 {
211 return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb);
212 }
213 }
214 catch (std::exception& e)
215 {
216 std::stringstream message;
217 message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
218 FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
219 return V1_3::ErrorStatus::NONE;
220 }
221
Sadik Armaganb3021432021-01-13 15:56:51 +0000222 // Now that we have a networkId for the graph rename the exported files to use it
223 // so that we can associate the graph file and the input/output tensor exported files
224 RenameExportedFiles(serializedNetworkFileName,
225 dotGraphFileName,
226 options.GetRequestInputsAndOutputsDumpDir(),
227 netId);
Kevin May42477c12020-03-26 13:34:14 +0000228
229 std::unique_ptr<ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>> preparedModel(
230 new ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>(
231 netId,
232 runtime.get(),
233 model,
234 options.GetRequestInputsAndOutputsDumpDir(),
Narumol Prangnawaratcad4e912020-06-02 12:07:43 +0100235 options.IsGpuProfilingEnabled(),
Finn Williamsd8fb5402021-05-19 20:52:00 +0100236 priority,
Finn Williamsca3a3e02021-06-11 15:04:02 +0100237 options.isAsyncModelExecutionEnabled(),
238 options.getNoOfArmnnThreads()));
Kevin May42477c12020-03-26 13:34:14 +0000239
240 // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
241 // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
242 if (!preparedModel->ExecuteWithDummyInputs())
243 {
244 return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
245 }
246
247 if (clTunedParameters &&
248 options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
249 {
250 // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file.
251 try
252 {
253 clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
254 }
255 catch (std::exception& error)
256 {
257 ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
258 options.GetClTunedParametersFile().c_str(), error.what());
259 }
260 }
261
262 NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
263
264 return V1_3::ErrorStatus::NONE;
265}
266
267Return<void> ArmnnDriverImpl::getCapabilities_1_3(const armnn::IRuntimePtr& runtime,
268 V1_3::IDevice::getCapabilities_1_3_cb cb)
269{
270 ALOGV("hal_1_3::ArmnnDriverImpl::getCapabilities()");
271
272 V1_3::Capabilities capabilities;
273
274 float defaultValue = .1f;
275
276 if (runtime)
277 {
278 capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime =
279 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);
280
Kevin May2eaa1192020-04-15 16:50:57 +0100281 capabilities.relaxedFloat32toFloat16PerformanceScalar.powerUsage =
282 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue);
283
284 capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime =
285 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);
286
Kevin May42477c12020-03-26 13:34:14 +0000287 capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage =
288 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue);
289
Kevin May2eaa1192020-04-15 16:50:57 +0100290 capabilities.ifPerformance.execTime =
291 ParseSystemProperty(g_ifPerformanceExecTime, defaultValue);
292
293 capabilities.ifPerformance.powerUsage =
294 ParseSystemProperty(g_ifPerformancePowerUsage, defaultValue);
295
296 capabilities.whilePerformance.execTime =
297 ParseSystemProperty(g_whilePerformanceExecTime, defaultValue);
298
299 capabilities.whilePerformance.powerUsage =
300 ParseSystemProperty(g_whilePerformancePowerUsage, defaultValue);
301
Kevin May42477c12020-03-26 13:34:14 +0000302 // Set the base value for all operand types
303 capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_3>({FLT_MAX, FLT_MAX});
304
305 // Load supported operand types
306 update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT32,
307 {
308 .execTime = ParseSystemProperty(g_OperandTypeTensorFloat32PerformanceExecTime, defaultValue),
309 .powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat32PerformancePowerUsage, defaultValue)
310 });
311
312 update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT32,
313 {
314 .execTime = ParseSystemProperty(g_OperandTypeFloat32PerformanceExecTime, defaultValue),
315 .powerUsage = ParseSystemProperty(g_OperandTypeFloat32PerformancePowerUsage, defaultValue)
316 });
317
318 update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT16,
319 {
320 .execTime = ParseSystemProperty(g_OperandTypeTensorFloat16PerformanceExecTime, defaultValue),
321 .powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat16PerformancePowerUsage, defaultValue)
322 });
323
324 update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT16,
325 {
326 .execTime = ParseSystemProperty(g_OperandTypeFloat16PerformanceExecTime, defaultValue),
327 .powerUsage = ParseSystemProperty(g_OperandTypeFloat16PerformancePowerUsage, defaultValue)
328 });
329
330 update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_ASYMM,
331 {
332 .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformanceExecTime, defaultValue),
333 .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformancePowerUsage, defaultValue)
334 });
335
336 update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_SYMM,
337 {
338 .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformanceExecTime, defaultValue),
339 .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformancePowerUsage, defaultValue)
340 });
341 update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_ASYMM_SIGNED,
342 {
343 .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmSignedPerformanceExecTime,
344 defaultValue),
345 .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmSignedPerformancePowerUsage,
346 defaultValue)
347 });
348
349 update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT16_SYMM,
350 {
351 .execTime = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformanceExecTime, defaultValue),
352 .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformancePowerUsage, defaultValue)
353 });
354
355 update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
356 {
357 .execTime =
358 ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime, defaultValue),
359 .powerUsage =
360 ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage, defaultValue)
361 });
362
363 update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_INT32,
364 {
365 .execTime = ParseSystemProperty(g_OperandTypeTensorInt32PerformanceExecTime, defaultValue),
366 .powerUsage = ParseSystemProperty(g_OperandTypeTensorInt32PerformancePowerUsage, defaultValue)
367 });
368
369 update(&capabilities.operandPerformance, V1_3::OperandType::INT32,
370 {
371 .execTime = ParseSystemProperty(g_OperandTypeInt32PerformanceExecTime, defaultValue),
372 .powerUsage = ParseSystemProperty(g_OperandTypeInt32PerformancePowerUsage, defaultValue)
373 });
374
375 cb(V1_3::ErrorStatus::NONE, capabilities);
376 }
377 else
378 {
Kevin May2eaa1192020-04-15 16:50:57 +0100379 capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime = 0;
380 capabilities.relaxedFloat32toFloat16PerformanceScalar.powerUsage = 0;
381 capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime = 0;
382 capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage = 0;
383 capabilities.ifPerformance.execTime = 0;
384 capabilities.ifPerformance.powerUsage = 0;
385 capabilities.whilePerformance.execTime = 0;
386 capabilities.whilePerformance.powerUsage = 0;
Kevin May42477c12020-03-26 13:34:14 +0000387
388 // Set the base value for all operand types
389 capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_3>({0.f, 0.0f});
390
391 cb(V1_3::ErrorStatus::DEVICE_UNAVAILABLE, capabilities);
392 }
393
394 return Void();
395}
396
397} // namespace hal_1_3
398} // namespace armnn_driver