blob: 98d038c9e50a92d166da6fb08c1c0ec242b81481 [file] [log] [blame]
Kevin May42477c12020-03-26 13:34:14 +00001//
2// Copyright © 2020 Arm Ltd. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#include "ArmnnDriverImpl.hpp"
7#include "../ArmnnPreparedModel_1_3.hpp"
8#include "../ModelToINetworkConverter.hpp"
9#include "../SystemPropertiesUtils.hpp"
10
11#include <log/log.h>
12
13namespace
14{
15
16const char *g_RelaxedFloat32toFloat16PerformanceExecTime = "ArmNN.relaxedFloat32toFloat16Performance.execTime";
17const char *g_RelaxedFloat32toFloat16PerformancePowerUsage = "ArmNN.relaxedFloat32toFloat16Performance.powerUsage";
18
19const char *g_OperandTypeTensorFloat32PerformanceExecTime = "Armnn.operandTypeTensorFloat32Performance.execTime";
20const char *g_OperandTypeTensorFloat32PerformancePowerUsage = "Armnn.operandTypeTensorFloat32Performance.powerUsage";
21
22const char *g_OperandTypeFloat32PerformanceExecTime = "Armnn.operandTypeFloat32Performance.execTime";
23const char *g_OperandTypeFloat32PerformancePowerUsage = "Armnn.operandTypeFloat32Performance.powerUsage";
24
25const char *g_OperandTypeTensorFloat16PerformanceExecTime = "Armnn.operandTypeTensorFloat16Performance.execTime";
26const char *g_OperandTypeTensorFloat16PerformancePowerUsage = "Armnn.operandTypeTensorFloat16Performance.powerUsage";
27
28const char *g_OperandTypeFloat16PerformanceExecTime = "Armnn.operandTypeFloat16Performance.execTime";
29const char *g_OperandTypeFloat16PerformancePowerUsage = "Armnn.operandTypeFloat16Performance.powerUsage";
30
31const char *g_OperandTypeTensorQuant8AsymmPerformanceExecTime =
32 "Armnn.operandTypeTensorQuant8AsymmPerformance.execTime";
33const char *g_OperandTypeTensorQuant8AsymmPerformancePowerUsage =
34 "Armnn.operandTypeTensorQuant8AsymmPerformance.powerUsage";
35
36const char *g_OperandTypeTensorQuant8AsymmSignedPerformanceExecTime =
37 "Armnn.operandTypeTensorQuant8AsymmSignedPerformance.execTime";
38const char *g_OperandTypeTensorQuant8AsymmSignedPerformancePowerUsage =
39 "Armnn.operandTypeTensorQuant8AsymmSignedPerformance.powerUsage";
40
41const char *g_OperandTypeTensorQuant16SymmPerformanceExecTime =
42 "Armnn.operandTypeTensorQuant16SymmPerformance.execTime";
43const char *g_OperandTypeTensorQuant16SymmPerformancePowerUsage =
44 "Armnn.operandTypeTensorQuant16SymmPerformance.powerUsage";
45
46const char *g_OperandTypeTensorQuant8SymmPerformanceExecTime =
47 "Armnn.operandTypeTensorQuant8SymmPerformance.execTime";
48const char *g_OperandTypeTensorQuant8SymmPerformancePowerUsage =
49 "Armnn.operandTypeTensorQuant8SymmPerformance.powerUsage";
50
51const char *g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime =
52 "Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.execTime";
53const char *g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage =
54 "Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.powerUsage";
55
56
57const char *g_OperandTypeTensorInt32PerformanceExecTime = "Armnn.operandTypeTensorInt32Performance.execTime";
58const char *g_OperandTypeTensorInt32PerformancePowerUsage = "Armnn.operandTypeTensorInt32Performance.powerUsage";
59
60const char *g_OperandTypeInt32PerformanceExecTime = "Armnn.operandTypeInt32Performance.execTime";
61const char *g_OperandTypeInt32PerformancePowerUsage = "Armnn.operandTypeInt32Performance.powerUsage";
62
63
64void NotifyCallbackAndCheck(const sp<V1_3::IPreparedModelCallback>& callback,
65 V1_3::ErrorStatus errorStatus,
66 const sp<V1_3::IPreparedModel>& preparedModelPtr)
67{
68 Return<void> returned = callback->notify_1_3(errorStatus, preparedModelPtr);
69 // This check is required, if the callback fails and it isn't checked it will bring down the service
70 if (!returned.isOk())
71 {
72 ALOGE("ArmnnDriverImpl::prepareModel: hidl callback failed to return properly: %s ",
73 returned.description().c_str());
74 }
75}
76
77Return<V1_3::ErrorStatus> FailPrepareModel(V1_3::ErrorStatus error,
78 const std::string& message,
79 const sp<V1_3::IPreparedModelCallback>& callback)
80{
81 ALOGW("ArmnnDriverImpl::prepareModel: %s", message.c_str());
82 NotifyCallbackAndCheck(callback, error, nullptr);
83 return error;
84}
85
86} // anonymous namespace
87
88namespace armnn_driver
89{
90namespace hal_1_3
91{
92
93Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
94 const armnn::IRuntimePtr& runtime,
95 const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
96 const DriverOptions& options,
97 const V1_3::Model& model,
98 const sp<V1_3::IPreparedModelCallback>& cb,
99 bool float32ToFloat16)
100{
101 ALOGV("ArmnnDriverImpl::prepareArmnnModel_1_3()");
102
103 if (cb.get() == nullptr)
104 {
105 ALOGW("ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel");
106 return V1_3::ErrorStatus::INVALID_ARGUMENT;
107 }
108
109 if (!runtime)
110 {
111 return FailPrepareModel(V1_3::ErrorStatus::DEVICE_UNAVAILABLE, "Device unavailable", cb);
112 }
113
114 if (!android::nn::validateModel(model))
115 {
116 return FailPrepareModel(V1_3::ErrorStatus::INVALID_ARGUMENT, "Invalid model passed as input", cb);
117 }
118
119 // Deliberately ignore any unsupported operations requested by the options -
120 // at this point we're being asked to prepare a model that we've already declared support for
121 // and the operation indices may be different to those in getSupportedOperations anyway.
122 std::set<unsigned int> unsupportedOperations;
123 ModelToINetworkConverter<HalPolicy> modelConverter(options.GetBackends(),
124 model,
125 unsupportedOperations);
126
127 if (modelConverter.GetConversionResult() != ConversionResult::Success)
128 {
129 FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "ModelToINetworkConverter failed", cb);
130 return V1_3::ErrorStatus::NONE;
131 }
132
133 // Optimize the network
134 armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
135 armnn::OptimizerOptions OptOptions;
136 OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
137
138 std::vector<std::string> errMessages;
139 try
140 {
141 optNet = armnn::Optimize(*modelConverter.GetINetwork(),
142 options.GetBackends(),
143 runtime->GetDeviceSpec(),
144 OptOptions,
145 errMessages);
146 }
147 catch (std::exception& e)
148 {
149 std::stringstream message;
150 message << "Exception (" << e.what() << ") caught from optimize.";
151 FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
152 return V1_3::ErrorStatus::NONE;
153 }
154
155 // Check that the optimized network is valid.
156 if (!optNet)
157 {
158 std::stringstream message;
159 message << "Invalid optimized network";
160 for (const std::string& msg : errMessages)
161 {
162 message << "\n" << msg;
163 }
164 FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
165 return V1_3::ErrorStatus::NONE;
166 }
167
168 // Export the optimized network graph to a dot file if an output dump directory
169 // has been specified in the drivers' arguments.
170 std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
171 options.GetRequestInputsAndOutputsDumpDir());
172
173 // Load it into the runtime.
174 armnn::NetworkId netId = 0;
175 try
176 {
177 if (runtime->LoadNetwork(netId, move(optNet)) != armnn::Status::Success)
178 {
179 return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb);
180 }
181 }
182 catch (std::exception& e)
183 {
184 std::stringstream message;
185 message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
186 FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
187 return V1_3::ErrorStatus::NONE;
188 }
189
190 // Now that we have a networkId for the graph rename the dump file to use it
191 // so that we can associate the graph file and the input/output tensor dump files
192 RenameGraphDotFile(dotGraphFileName,
193 options.GetRequestInputsAndOutputsDumpDir(),
194 netId);
195
196 std::unique_ptr<ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>> preparedModel(
197 new ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>(
198 netId,
199 runtime.get(),
200 model,
201 options.GetRequestInputsAndOutputsDumpDir(),
202 options.IsGpuProfilingEnabled()));
203
204 // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
205 // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
206 if (!preparedModel->ExecuteWithDummyInputs())
207 {
208 return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
209 }
210
211 if (clTunedParameters &&
212 options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
213 {
214 // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file.
215 try
216 {
217 clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
218 }
219 catch (std::exception& error)
220 {
221 ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
222 options.GetClTunedParametersFile().c_str(), error.what());
223 }
224 }
225
226 NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
227
228 return V1_3::ErrorStatus::NONE;
229}
230
231Return<void> ArmnnDriverImpl::getCapabilities_1_3(const armnn::IRuntimePtr& runtime,
232 V1_3::IDevice::getCapabilities_1_3_cb cb)
233{
234 ALOGV("hal_1_3::ArmnnDriverImpl::getCapabilities()");
235
236 V1_3::Capabilities capabilities;
237
238 float defaultValue = .1f;
239
240 if (runtime)
241 {
242 capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime =
243 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);
244
245 capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage =
246 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue);
247
248 // Set the base value for all operand types
249 capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_3>({FLT_MAX, FLT_MAX});
250
251 // Load supported operand types
252 update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT32,
253 {
254 .execTime = ParseSystemProperty(g_OperandTypeTensorFloat32PerformanceExecTime, defaultValue),
255 .powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat32PerformancePowerUsage, defaultValue)
256 });
257
258 update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT32,
259 {
260 .execTime = ParseSystemProperty(g_OperandTypeFloat32PerformanceExecTime, defaultValue),
261 .powerUsage = ParseSystemProperty(g_OperandTypeFloat32PerformancePowerUsage, defaultValue)
262 });
263
264 update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT16,
265 {
266 .execTime = ParseSystemProperty(g_OperandTypeTensorFloat16PerformanceExecTime, defaultValue),
267 .powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat16PerformancePowerUsage, defaultValue)
268 });
269
270 update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT16,
271 {
272 .execTime = ParseSystemProperty(g_OperandTypeFloat16PerformanceExecTime, defaultValue),
273 .powerUsage = ParseSystemProperty(g_OperandTypeFloat16PerformancePowerUsage, defaultValue)
274 });
275
276 update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_ASYMM,
277 {
278 .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformanceExecTime, defaultValue),
279 .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformancePowerUsage, defaultValue)
280 });
281
282 update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_SYMM,
283 {
284 .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformanceExecTime, defaultValue),
285 .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformancePowerUsage, defaultValue)
286 });
287 update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_ASYMM_SIGNED,
288 {
289 .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmSignedPerformanceExecTime,
290 defaultValue),
291 .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmSignedPerformancePowerUsage,
292 defaultValue)
293 });
294
295 update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT16_SYMM,
296 {
297 .execTime = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformanceExecTime, defaultValue),
298 .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformancePowerUsage, defaultValue)
299 });
300
301 update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
302 {
303 .execTime =
304 ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime, defaultValue),
305 .powerUsage =
306 ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage, defaultValue)
307 });
308
309 update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_INT32,
310 {
311 .execTime = ParseSystemProperty(g_OperandTypeTensorInt32PerformanceExecTime, defaultValue),
312 .powerUsage = ParseSystemProperty(g_OperandTypeTensorInt32PerformancePowerUsage, defaultValue)
313 });
314
315 update(&capabilities.operandPerformance, V1_3::OperandType::INT32,
316 {
317 .execTime = ParseSystemProperty(g_OperandTypeInt32PerformanceExecTime, defaultValue),
318 .powerUsage = ParseSystemProperty(g_OperandTypeInt32PerformancePowerUsage, defaultValue)
319 });
320
321 cb(V1_3::ErrorStatus::NONE, capabilities);
322 }
323 else
324 {
325 capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime = 0;
326 capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime = 0;
327
328 // Set the base value for all operand types
329 capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_3>({0.f, 0.0f});
330
331 cb(V1_3::ErrorStatus::DEVICE_UNAVAILABLE, capabilities);
332 }
333
334 return Void();
335}
336
337} // namespace hal_1_3
338} // namespace armnn_driver