blob: 3b2cb744e6a5cad54e6b9be542d463639a57acb9 [file] [log] [blame]
Mike Kellyb5fdf382019-06-11 16:35:25 +01001//
2// Copyright © 2017 Arm Ltd. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#include "ArmnnDriverImpl.hpp"
7#include "../ArmnnPreparedModel_1_2.hpp"
8#include "../ModelToINetworkConverter.hpp"
9#include "../SystemPropertiesUtils.hpp"
10
11#include <log/log.h>
12
13namespace
14{
15
Ferran Balaguerd7c8eb92019-07-01 13:37:44 +010016const char *g_RelaxedFloat32toFloat16PerformanceExecTime = "ArmNN.relaxedFloat32toFloat16Performance.execTime";
17
18const char *g_OperandTypeTensorFloat32PerformanceExecTime = "Armnn.operandTypeTensorFloat32Performance.execTime";
19const char *g_OperandTypeTensorFloat32PerformancePowerUsage = "Armnn.operandTypeTensorFloat32Performance.powerUsage";
20
21const char *g_OperandTypeFloat32PerformanceExecTime = "Armnn.operandTypeFloat32Performance.execTime";
22const char *g_OperandTypeFloat32PerformancePowerUsage = "Armnn.operandTypeFloat32Performance.powerUsage";
23
24const char *g_OperandTypeTensorFloat16PerformanceExecTime = "Armnn.operandTypeTensorFloat16Performance.execTime";
25const char *g_OperandTypeTensorFloat16PerformancePowerUsage = "Armnn.operandTypeTensorFloat16Performance.powerUsage";
26
27const char *g_OperandTypeFloat16PerformanceExecTime = "Armnn.operandTypeFloat16Performance.execTime";
28const char *g_OperandTypeFloat16PerformancePowerUsage = "Armnn.operandTypeFloat16Performance.powerUsage";
29
30const char *g_OperandTypeTensorQuant8AsymmPerformanceExecTime =
31 "Armnn.operandTypeTensorQuant8AsymmPerformance.execTime";
32const char *g_OperandTypeTensorQuant8AsymmPerformancePowerUsage =
33 "Armnn.operandTypeTensorQuant8AsymmPerformance.powerUsage";
34
35const char *g_OperandTypeTensorQuant16SymmPerformanceExecTime =
36 "Armnn.operandTypeTensorQuant16SymmPerformance.execTime";
37const char *g_OperandTypeTensorQuant16SymmPerformancePowerUsage =
38 "Armnn.operandTypeTensorQuant16SymmPerformance.powerUsage";
39
40const char *g_OperandTypeTensorInt32PerformanceExecTime = "Armnn.operandTypeTensorInt32Performance.execTime";
41const char *g_OperandTypeTensorInt32PerformancePowerUsage = "Armnn.operandTypeTensorInt32Performance.powerUsage";
42
43const char *g_OperandTypeInt32PerformanceExecTime = "Armnn.operandTypeInt32Performance.execTime";
44const char *g_OperandTypeInt32PerformancePowerUsage = "Armnn.operandTypeInt32Performance.powerUsage";
45
46
Mike Kellyb5fdf382019-06-11 16:35:25 +010047void NotifyCallbackAndCheck(const sp<V1_2::IPreparedModelCallback>& callback,
48 ErrorStatus errorStatus,
49 const sp<V1_2::IPreparedModel>& preparedModelPtr)
50{
51 Return<void> returned = callback->notify(errorStatus, preparedModelPtr);
52 // This check is required, if the callback fails and it isn't checked it will bring down the service
53 if (!returned.isOk())
54 {
55 ALOGE("ArmnnDriverImpl::prepareModel: hidl callback failed to return properly: %s ",
56 returned.description().c_str());
57 }
58}
59
60Return<ErrorStatus> FailPrepareModel(ErrorStatus error,
61 const std::string& message,
62 const sp<V1_2::IPreparedModelCallback>& callback)
63{
64 ALOGW("ArmnnDriverImpl::prepareModel: %s", message.c_str());
65 NotifyCallbackAndCheck(callback, error, nullptr);
66 return error;
67}
68
69} // anonymous namespace
70
71namespace armnn_driver
72{
73namespace hal_1_2
74{
75
76Return<ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_2(const armnn::IRuntimePtr& runtime,
77 const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
78 const DriverOptions& options,
79 const V1_2::Model& model,
80 const sp<V1_2::IPreparedModelCallback>& cb,
81 bool float32ToFloat16)
82{
Matteo Martincigh0bd89a82019-07-02 16:53:10 +010083 ALOGV("ArmnnDriverImpl::prepareArmnnModel_1_2()");
Mike Kellyb5fdf382019-06-11 16:35:25 +010084
85 if (cb.get() == nullptr)
86 {
87 ALOGW("ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel");
88 return ErrorStatus::INVALID_ARGUMENT;
89 }
90
91 if (!runtime)
92 {
93 return FailPrepareModel(ErrorStatus::DEVICE_UNAVAILABLE, "Device unavailable", cb);
94 }
95
96 if (!android::nn::validateModel(model))
97 {
98 return FailPrepareModel(ErrorStatus::INVALID_ARGUMENT, "Invalid model passed as input", cb);
99 }
100
101 // Deliberately ignore any unsupported operations requested by the options -
102 // at this point we're being asked to prepare a model that we've already declared support for
103 // and the operation indices may be different to those in getSupportedOperations anyway.
104 std::set<unsigned int> unsupportedOperations;
105 ModelToINetworkConverter<HalPolicy> modelConverter(options.GetBackends(),
106 model,
107 unsupportedOperations);
108
109 if (modelConverter.GetConversionResult() != ConversionResult::Success)
110 {
111 FailPrepareModel(ErrorStatus::GENERAL_FAILURE, "ModelToINetworkConverter failed", cb);
112 return ErrorStatus::NONE;
113 }
114
115 // Optimize the network
116 armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
117 armnn::OptimizerOptions OptOptions;
118 OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
119
120 std::vector<std::string> errMessages;
121 try
122 {
123 optNet = armnn::Optimize(*modelConverter.GetINetwork(),
124 options.GetBackends(),
125 runtime->GetDeviceSpec(),
126 OptOptions,
127 errMessages);
128 }
129 catch (armnn::Exception &e)
130 {
131 std::stringstream message;
132 message << "armnn::Exception (" << e.what() << ") caught from optimize.";
133 FailPrepareModel(ErrorStatus::GENERAL_FAILURE, message.str(), cb);
134 return ErrorStatus::NONE;
135 }
136
137 // Check that the optimized network is valid.
138 if (!optNet)
139 {
140 std::stringstream message;
141 message << "Invalid optimized network";
142 for (const std::string& msg : errMessages)
143 {
144 message << "\n" << msg;
145 }
146 FailPrepareModel(ErrorStatus::GENERAL_FAILURE, message.str(), cb);
147 return ErrorStatus::NONE;
148 }
149
150 // Export the optimized network graph to a dot file if an output dump directory
151 // has been specified in the drivers' arguments.
152 ExportNetworkGraphToDotFile<hal_1_2::HalPolicy::Model>(*optNet, options.GetRequestInputsAndOutputsDumpDir(),
153 model);
154
155 // Load it into the runtime.
156 armnn::NetworkId netId = 0;
157 try
158 {
159 if (runtime->LoadNetwork(netId, move(optNet)) != armnn::Status::Success)
160 {
161 return FailPrepareModel(ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb);
162 }
163 }
164 catch (armnn::Exception& e)
165 {
166 std::stringstream message;
167 message << "armnn::Exception (" << e.what()<< ") caught from LoadNetwork.";
168 FailPrepareModel(ErrorStatus::GENERAL_FAILURE, message.str(), cb);
169 return ErrorStatus::NONE;
170 }
171
172 std::unique_ptr<ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>> preparedModel(
173 new ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>(
174 netId,
175 runtime.get(),
176 model,
177 options.GetRequestInputsAndOutputsDumpDir(),
178 options.IsGpuProfilingEnabled()));
179
180 // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
181 // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
182 if (!preparedModel->ExecuteWithDummyInputs())
183 {
184 return FailPrepareModel(ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
185 }
186
187 if (clTunedParameters &&
188 options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
189 {
190 // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file.
191 try
192 {
193 clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
194 }
195 catch (const armnn::Exception& error)
196 {
197 ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
198 options.GetClTunedParametersFile().c_str(), error.what());
199 }
200 }
201
202 NotifyCallbackAndCheck(cb, ErrorStatus::NONE, preparedModel.release());
203
204 return ErrorStatus::NONE;
205}
206
207Return<void> ArmnnDriverImpl::getCapabilities_1_2(const armnn::IRuntimePtr& runtime,
208 V1_2::IDevice::getCapabilities_1_2_cb cb)
209{
210 ALOGV("hal_1_2::ArmnnDriverImpl::getCapabilities()");
211
212 V1_2::Capabilities capabilities;
213
Ferran Balaguerd7c8eb92019-07-01 13:37:44 +0100214 float defaultValue = .1f;
215
Mike Kellyb5fdf382019-06-11 16:35:25 +0100216 if (runtime)
217 {
218 capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime =
Ferran Balaguerd7c8eb92019-07-01 13:37:44 +0100219 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);
Mike Kellyb5fdf382019-06-11 16:35:25 +0100220
221 capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime =
Ferran Balaguerd7c8eb92019-07-01 13:37:44 +0100222 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);
223
224 // Set the base value for all operand types
225 capabilities.operandPerformance = nonExtensionOperandPerformance({FLT_MAX, FLT_MAX});
226
227 // Load supported operand types
228 update(&capabilities.operandPerformance, OperandType::TENSOR_FLOAT32,
229 {
230 .execTime = ParseSystemProperty(g_OperandTypeTensorFloat32PerformanceExecTime, defaultValue),
231 .powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat32PerformancePowerUsage, defaultValue)
232 });
233
234 update(&capabilities.operandPerformance, OperandType::FLOAT32,
235 {
236 .execTime = ParseSystemProperty(g_OperandTypeFloat32PerformanceExecTime, defaultValue),
237 .powerUsage = ParseSystemProperty(g_OperandTypeFloat32PerformancePowerUsage, defaultValue)
238 });
239
240 update(&capabilities.operandPerformance, OperandType::TENSOR_FLOAT16,
241 {
242 .execTime = ParseSystemProperty(g_OperandTypeTensorFloat16PerformanceExecTime, defaultValue),
243 .powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat16PerformancePowerUsage, defaultValue)
244 });
245
246 update(&capabilities.operandPerformance, OperandType::FLOAT16,
247 {
248 .execTime = ParseSystemProperty(g_OperandTypeFloat16PerformanceExecTime, defaultValue),
249 .powerUsage = ParseSystemProperty(g_OperandTypeFloat16PerformancePowerUsage, defaultValue)
250 });
251
252 update(&capabilities.operandPerformance, OperandType::TENSOR_QUANT8_ASYMM,
253 {
254 .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformanceExecTime, defaultValue),
255 .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformancePowerUsage, defaultValue)
256 });
257
258 update(&capabilities.operandPerformance, OperandType::TENSOR_QUANT16_SYMM,
259 {
260 .execTime = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformanceExecTime, defaultValue),
261 .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformancePowerUsage, defaultValue)
262 });
263
264 update(&capabilities.operandPerformance, OperandType::TENSOR_INT32,
265 {
266 .execTime = ParseSystemProperty(g_OperandTypeTensorInt32PerformanceExecTime, defaultValue),
267 .powerUsage = ParseSystemProperty(g_OperandTypeTensorInt32PerformancePowerUsage, defaultValue)
268 });
269
270 update(&capabilities.operandPerformance, OperandType::INT32,
271 {
272 .execTime = ParseSystemProperty(g_OperandTypeInt32PerformanceExecTime, defaultValue),
273 .powerUsage = ParseSystemProperty(g_OperandTypeInt32PerformancePowerUsage, defaultValue)
274 });
Mike Kellyb5fdf382019-06-11 16:35:25 +0100275
276 cb(ErrorStatus::NONE, capabilities);
277 }
278 else
279 {
280 capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime = 0;
281 capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime = 0;
282
Ferran Balaguerd7c8eb92019-07-01 13:37:44 +0100283 // Set the base value for all operand types
284 capabilities.operandPerformance = nonExtensionOperandPerformance({0.f, 0.0f});
285
Mike Kellyb5fdf382019-06-11 16:35:25 +0100286 cb(ErrorStatus::DEVICE_UNAVAILABLE, capabilities);
287 }
288
289 return Void();
290}
291
292} // namespace hal_1_2
Matteo Martincigh0bd89a82019-07-02 16:53:10 +0100293} // namespace armnn_driver