Blame - 1.3/ArmnnDriverImpl.cpp - ml/android-nn-driver

Kevin May

42477c1

2020-03-26 13:34:14 +0000

[diff] [blame]

1

//

2

3

// SPDX-License-Identifier: MIT

4

//

5

6

#include "ArmnnDriverImpl.hpp"

7

#include "../ArmnnPreparedModel_1_3.hpp"

8

#include "../ModelToINetworkConverter.hpp"

9

#include "../SystemPropertiesUtils.hpp"

#include <log/log.h>

namespace

{

Kevin May

42477c1

2020-03-26 13:34:14 +0000

[diff] [blame]

15

const char *g_RelaxedFloat32toFloat16PerformanceExecTime = "ArmNN.relaxedFloat32toFloat16Performance.execTime";

16

const char *g_RelaxedFloat32toFloat16PerformancePowerUsage = "ArmNN.relaxedFloat32toFloat16Performance.powerUsage";

17

Kevin May

2eaa119

2020-04-15 16:50:57 +0100

[diff] [blame]

18

const char *g_ifPerformanceExecTime = "ArmNN.ifPerformance.execTime";

19

const char *g_ifPerformancePowerUsage = "ArmNN.ifPerformance.powerUsage";

20

21

const char *g_whilePerformanceExecTime = "ArmNN.whilePerformance.execTime";

22

const char *g_whilePerformancePowerUsage = "ArmNN.whilePerformance.powerUsage";

23

Kevin May

42477c1

2020-03-26 13:34:14 +0000

[diff] [blame]

24

const char *g_OperandTypeTensorFloat32PerformanceExecTime = "Armnn.operandTypeTensorFloat32Performance.execTime";

25

const char *g_OperandTypeTensorFloat32PerformancePowerUsage = "Armnn.operandTypeTensorFloat32Performance.powerUsage";

26

27

const char *g_OperandTypeFloat32PerformanceExecTime = "Armnn.operandTypeFloat32Performance.execTime";

28

const char *g_OperandTypeFloat32PerformancePowerUsage = "Armnn.operandTypeFloat32Performance.powerUsage";

29

30

const char *g_OperandTypeTensorFloat16PerformanceExecTime = "Armnn.operandTypeTensorFloat16Performance.execTime";

31

const char *g_OperandTypeTensorFloat16PerformancePowerUsage = "Armnn.operandTypeTensorFloat16Performance.powerUsage";

32

33

const char *g_OperandTypeFloat16PerformanceExecTime = "Armnn.operandTypeFloat16Performance.execTime";

34

const char *g_OperandTypeFloat16PerformancePowerUsage = "Armnn.operandTypeFloat16Performance.powerUsage";

35

36

const char *g_OperandTypeTensorQuant8AsymmPerformanceExecTime =

37

"Armnn.operandTypeTensorQuant8AsymmPerformance.execTime";

38

const char *g_OperandTypeTensorQuant8AsymmPerformancePowerUsage =

39

"Armnn.operandTypeTensorQuant8AsymmPerformance.powerUsage";

40

41

const char *g_OperandTypeTensorQuant8AsymmSignedPerformanceExecTime =

42

"Armnn.operandTypeTensorQuant8AsymmSignedPerformance.execTime";

43

const char *g_OperandTypeTensorQuant8AsymmSignedPerformancePowerUsage =

44

"Armnn.operandTypeTensorQuant8AsymmSignedPerformance.powerUsage";

45

46

const char *g_OperandTypeTensorQuant16SymmPerformanceExecTime =

47

"Armnn.operandTypeTensorQuant16SymmPerformance.execTime";

48

const char *g_OperandTypeTensorQuant16SymmPerformancePowerUsage =

49

"Armnn.operandTypeTensorQuant16SymmPerformance.powerUsage";

50

51

const char *g_OperandTypeTensorQuant8SymmPerformanceExecTime =

52

"Armnn.operandTypeTensorQuant8SymmPerformance.execTime";

53

const char *g_OperandTypeTensorQuant8SymmPerformancePowerUsage =

54

"Armnn.operandTypeTensorQuant8SymmPerformance.powerUsage";

55

56

const char *g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime =

57

"Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.execTime";

58

const char *g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage =

59

"Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.powerUsage";

60

61

62

const char *g_OperandTypeTensorInt32PerformanceExecTime = "Armnn.operandTypeTensorInt32Performance.execTime";

63

const char *g_OperandTypeTensorInt32PerformancePowerUsage = "Armnn.operandTypeTensorInt32Performance.powerUsage";

64

65

const char *g_OperandTypeInt32PerformanceExecTime = "Armnn.operandTypeInt32Performance.execTime";

66

const char *g_OperandTypeInt32PerformancePowerUsage = "Armnn.operandTypeInt32Performance.powerUsage";

67

68

69

void NotifyCallbackAndCheck(const sp<V1_3::IPreparedModelCallback>& callback,

70

V1_3::ErrorStatus errorStatus,

71

const sp<V1_3::IPreparedModel>& preparedModelPtr)

72

{

73

Return<void> returned = callback->notify_1_3(errorStatus, preparedModelPtr);

74

// This check is required, if the callback fails and it isn't checked it will bring down the service

75

if (!returned.isOk())

76

{

77

ALOGE("ArmnnDriverImpl::prepareModel: hidl callback failed to return properly: %s ",

78

returned.description().c_str());

}

}

Return<V1_3::ErrorStatus> FailPrepareModel(V1_3::ErrorStatus error,

83

const std::string& message,

84

const sp<V1_3::IPreparedModelCallback>& callback)

85

{

86

ALOGW("ArmnnDriverImpl::prepareModel: %s", message.c_str());

87

NotifyCallbackAndCheck(callback, error, nullptr);

return error;

}

} // anonymous namespace

92

93

namespace armnn_driver

{

namespace hal_1_3

{

Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(

99

const armnn::IRuntimePtr& runtime,

100

const armnn::IGpuAccTunedParametersPtr& clTunedParameters,

101

const DriverOptions& options,

102

const V1_3::Model& model,

103

const sp<V1_3::IPreparedModelCallback>& cb,

Narumol Prangnawarat

cad4e91

2020-06-02 12:07:43 +0100

[diff] [blame]

104

bool float32ToFloat16,

105

V1_3::Priority priority)

Kevin May

42477c1

2020-03-26 13:34:14 +0000

[diff] [blame]

106

{

107

ALOGV("ArmnnDriverImpl::prepareArmnnModel_1_3()");

108

109

if (cb.get() == nullptr)

110

{

111

ALOGW("ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel");

112

return V1_3::ErrorStatus::INVALID_ARGUMENT;

}

if (!runtime)

{

return FailPrepareModel(V1_3::ErrorStatus::DEVICE_UNAVAILABLE, "Device unavailable", cb);

118

}

119

120

if (!android::nn::validateModel(model))

121

{

122

return FailPrepareModel(V1_3::ErrorStatus::INVALID_ARGUMENT, "Invalid model passed as input", cb);

123

}

124

125

// Deliberately ignore any unsupported operations requested by the options -

126

// at this point we're being asked to prepare a model that we've already declared support for

127

// and the operation indices may be different to those in getSupportedOperations anyway.

128

std::set<unsigned int> unsupportedOperations;

129

ModelToINetworkConverter<HalPolicy> modelConverter(options.GetBackends(),

130

model,

131

unsupportedOperations);

132

133

if (modelConverter.GetConversionResult() != ConversionResult::Success)

134

{

135

FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "ModelToINetworkConverter failed", cb);

136

return V1_3::ErrorStatus::NONE;

137

}

138

Sadik Armagan

b302143

2021-01-13 15:56:51 +0000

[diff] [blame]

139

// Serialize the network graph to a .armnn file if an output directory

140

// has been specified in the drivers' arguments.

141

auto serializedNetworkFileName =

142

SerializeNetwork(*modelConverter.GetINetwork(), options.GetRequestInputsAndOutputsDumpDir());

143

Kevin May

42477c1

2020-03-26 13:34:14 +0000

[diff] [blame]

144

// Optimize the network

145

armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);

146

armnn::OptimizerOptions OptOptions;

147

OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;

148

Mike Kelly

7ed56dd

2020-09-30 20:22:56 +0100

[diff] [blame]

149

armnn::BackendOptions gpuAcc("GpuAcc",

150

{

Sadik Armagan

f36e10b

2021-01-11 16:34:01 +0000

[diff] [blame]

151

{ "FastMathEnabled", options.IsFastMathEnabled() },

152

{ "SaveCachedNetwork", options.SaveCachedNetwork() },

153

{ "CachedNetworkFilePath", options.GetCachedNetworkFilePath() }

Mike Kelly

7ed56dd

2020-09-30 20:22:56 +0100

[diff] [blame]

154

});

155

armnn::BackendOptions cpuAcc("CpuAcc",

156

{

Matthew Sloyan

cd639c9

2021-02-11 16:57:38 +0000

[diff] [blame^]

157

{ "FastMathEnabled", options.IsFastMathEnabled() },

158

{ "NumberOfThreads", options.GetNumberOfThreads() }

Mike Kelly

7ed56dd

2020-09-30 20:22:56 +0100

[diff] [blame]

159

});

160

OptOptions.m_ModelOptions.push_back(gpuAcc);

161

OptOptions.m_ModelOptions.push_back(cpuAcc);

162

Kevin May

42477c1

2020-03-26 13:34:14 +0000

[diff] [blame]

163

std::vector<std::string> errMessages;

164

try

165

{

166

optNet = armnn::Optimize(*modelConverter.GetINetwork(),

167

options.GetBackends(),

168

runtime->GetDeviceSpec(),

OptOptions,

errMessages);

}

catch (std::exception& e)

173

{

174

std::stringstream message;

175

message << "Exception (" << e.what() << ") caught from optimize.";

176

FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);

177

return V1_3::ErrorStatus::NONE;

178

}

179

180

// Check that the optimized network is valid.

181

if (!optNet)

182

{

183

std::stringstream message;

184

message << "Invalid optimized network";

185

for (const std::string& msg : errMessages)

186

{

187

message << "\n" << msg;

188

}

189

FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);

190

return V1_3::ErrorStatus::NONE;

191

}

192

193

// Export the optimized network graph to a dot file if an output dump directory

194

// has been specified in the drivers' arguments.

195

std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,

196

options.GetRequestInputsAndOutputsDumpDir());

197

198

// Load it into the runtime.

199

armnn::NetworkId netId = 0;

200

try

201

{

202

if (runtime->LoadNetwork(netId, move(optNet)) != armnn::Status::Success)

203

{

204

return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb);

205

}

206

}

207

catch (std::exception& e)

208

{

209

std::stringstream message;

210

message << "Exception (" << e.what()<< ") caught from LoadNetwork.";

211

FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);

212

return V1_3::ErrorStatus::NONE;

213

}

214

Sadik Armagan

b302143

2021-01-13 15:56:51 +0000

[diff] [blame]

215

// Now that we have a networkId for the graph rename the exported files to use it

216

// so that we can associate the graph file and the input/output tensor exported files

217

RenameExportedFiles(serializedNetworkFileName,

218

dotGraphFileName,

219

options.GetRequestInputsAndOutputsDumpDir(),

220

netId);

Kevin May

42477c1

2020-03-26 13:34:14 +0000

[diff] [blame]

221

222

std::unique_ptr<ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>> preparedModel(

223

new ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>(

netId,

runtime.get(),

model,

options.GetRequestInputsAndOutputsDumpDir(),

Narumol Prangnawarat

cad4e91

2020-06-02 12:07:43 +0100

[diff] [blame]

228

options.IsGpuProfilingEnabled(),

229

priority));

Kevin May

42477c1

2020-03-26 13:34:14 +0000

[diff] [blame]

230

231

// Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if

232

// this is enabled) before the first 'real' inference which removes the overhead of the first inference.

233

if (!preparedModel->ExecuteWithDummyInputs())

234

{

235

return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);

236

}

237

238

if (clTunedParameters &&

239

options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)

240

{

241

// Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file.

242

try

243

{

244

clTunedParameters->Save(options.GetClTunedParametersFile().c_str());

245

}

246

catch (std::exception& error)

247

{

248

ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",

249

options.GetClTunedParametersFile().c_str(), error.what());

}

}

NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());

254

255

return V1_3::ErrorStatus::NONE;

256

}

257

258

Return<void> ArmnnDriverImpl::getCapabilities_1_3(const armnn::IRuntimePtr& runtime,

259

V1_3::IDevice::getCapabilities_1_3_cb cb)

260

{

261

ALOGV("hal_1_3::ArmnnDriverImpl::getCapabilities()");

262

263

V1_3::Capabilities capabilities;

264

265

float defaultValue = .1f;

if (runtime)

{

capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime =

270

ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);

271

Kevin May

2eaa119

2020-04-15 16:50:57 +0100

[diff] [blame]

272

capabilities.relaxedFloat32toFloat16PerformanceScalar.powerUsage =

273

ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue);

274

275

capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime =

276

ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);

277

Kevin May

42477c1

2020-03-26 13:34:14 +0000

[diff] [blame]

278

capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage =

279

ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue);

280

Kevin May

2eaa119

2020-04-15 16:50:57 +0100

[diff] [blame]

281

capabilities.ifPerformance.execTime =

282

ParseSystemProperty(g_ifPerformanceExecTime, defaultValue);

283

284

capabilities.ifPerformance.powerUsage =

285

ParseSystemProperty(g_ifPerformancePowerUsage, defaultValue);

286

287

capabilities.whilePerformance.execTime =

288

ParseSystemProperty(g_whilePerformanceExecTime, defaultValue);

289

290

capabilities.whilePerformance.powerUsage =

291

ParseSystemProperty(g_whilePerformancePowerUsage, defaultValue);

292

Kevin May

42477c1

2020-03-26 13:34:14 +0000

[diff] [blame]

293

// Set the base value for all operand types

294

capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_3>({FLT_MAX, FLT_MAX});

295

296

// Load supported operand types

297

update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT32,

298

{

299

.execTime = ParseSystemProperty(g_OperandTypeTensorFloat32PerformanceExecTime, defaultValue),

300

.powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat32PerformancePowerUsage, defaultValue)

301

});

302

303

update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT32,

304

{

305

.execTime = ParseSystemProperty(g_OperandTypeFloat32PerformanceExecTime, defaultValue),

306

.powerUsage = ParseSystemProperty(g_OperandTypeFloat32PerformancePowerUsage, defaultValue)

307

});

308

309

update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT16,

310

{

311

.execTime = ParseSystemProperty(g_OperandTypeTensorFloat16PerformanceExecTime, defaultValue),

312

.powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat16PerformancePowerUsage, defaultValue)

313

});

314

315

update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT16,

316

{

317

.execTime = ParseSystemProperty(g_OperandTypeFloat16PerformanceExecTime, defaultValue),

318

.powerUsage = ParseSystemProperty(g_OperandTypeFloat16PerformancePowerUsage, defaultValue)

319

});

320

321

update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_ASYMM,

322

{

323

.execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformanceExecTime, defaultValue),

324

.powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformancePowerUsage, defaultValue)

325

});

326

327

update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_SYMM,

328

{

329

.execTime = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformanceExecTime, defaultValue),

330

.powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformancePowerUsage, defaultValue)

331

});

332

update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_ASYMM_SIGNED,

333

{

334

.execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmSignedPerformanceExecTime,

335

defaultValue),

336

.powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmSignedPerformancePowerUsage,

defaultValue)

});

update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT16_SYMM,

341

{

342

.execTime = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformanceExecTime, defaultValue),

343

.powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformancePowerUsage, defaultValue)

344

});

345

346

update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,

347

{

348

.execTime =

349

ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime, defaultValue),

350

.powerUsage =

351

ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage, defaultValue)

352

});

353

354

update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_INT32,

355

{

356

.execTime = ParseSystemProperty(g_OperandTypeTensorInt32PerformanceExecTime, defaultValue),

357

.powerUsage = ParseSystemProperty(g_OperandTypeTensorInt32PerformancePowerUsage, defaultValue)

358

});

359

360

update(&capabilities.operandPerformance, V1_3::OperandType::INT32,

361

{

362

.execTime = ParseSystemProperty(g_OperandTypeInt32PerformanceExecTime, defaultValue),

363

.powerUsage = ParseSystemProperty(g_OperandTypeInt32PerformancePowerUsage, defaultValue)

364

});

365

366

cb(V1_3::ErrorStatus::NONE, capabilities);

367

}

368

else

369

{

Kevin May

2eaa119

2020-04-15 16:50:57 +0100

[diff] [blame]

370

capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime = 0;

371

capabilities.relaxedFloat32toFloat16PerformanceScalar.powerUsage = 0;

372

capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime = 0;

373

capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage = 0;

374

capabilities.ifPerformance.execTime = 0;

375

capabilities.ifPerformance.powerUsage = 0;

376

capabilities.whilePerformance.execTime = 0;

377

capabilities.whilePerformance.powerUsage = 0;

Kevin May

42477c1

2020-03-26 13:34:14 +0000

[diff] [blame]

378

379

// Set the base value for all operand types

380

capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_3>({0.f, 0.0f});

381

382

cb(V1_3::ErrorStatus::DEVICE_UNAVAILABLE, capabilities);

}

return Void();

}

} // namespace hal_1_3

389

} // namespace armnn_driver