Blame - 1.3/ArmnnDriverImpl.cpp - ml/android-nn-driver

2020-03-26 13:34:14 +0000

[diff] [blame]

1

//

2

3

// SPDX-License-Identifier: MIT

4

//

5

6

#include "ArmnnDriverImpl.hpp"

7

#include "../ArmnnPreparedModel_1_3.hpp"

8

#include "../ModelToINetworkConverter.hpp"

9

#include "../SystemPropertiesUtils.hpp"

#include <log/log.h>

namespace

{

Kevin May

2020-03-26 13:34:14 +0000

[diff] [blame]

15

const char *g_RelaxedFloat32toFloat16PerformanceExecTime = "ArmNN.relaxedFloat32toFloat16Performance.execTime";

16

const char *g_RelaxedFloat32toFloat16PerformancePowerUsage = "ArmNN.relaxedFloat32toFloat16Performance.powerUsage";

17

Kevin May

2020-04-15 16:50:57 +0100

[diff] [blame]

18

const char *g_ifPerformanceExecTime = "ArmNN.ifPerformance.execTime";

19

const char *g_ifPerformancePowerUsage = "ArmNN.ifPerformance.powerUsage";

20

21

const char *g_whilePerformanceExecTime = "ArmNN.whilePerformance.execTime";

22

const char *g_whilePerformancePowerUsage = "ArmNN.whilePerformance.powerUsage";

23

Kevin May

2020-03-26 13:34:14 +0000

[diff] [blame]

24

const char *g_OperandTypeTensorFloat32PerformanceExecTime = "Armnn.operandTypeTensorFloat32Performance.execTime";

25

const char *g_OperandTypeTensorFloat32PerformancePowerUsage = "Armnn.operandTypeTensorFloat32Performance.powerUsage";

26

27

const char *g_OperandTypeFloat32PerformanceExecTime = "Armnn.operandTypeFloat32Performance.execTime";

28

const char *g_OperandTypeFloat32PerformancePowerUsage = "Armnn.operandTypeFloat32Performance.powerUsage";

29

30

const char *g_OperandTypeTensorFloat16PerformanceExecTime = "Armnn.operandTypeTensorFloat16Performance.execTime";

31

const char *g_OperandTypeTensorFloat16PerformancePowerUsage = "Armnn.operandTypeTensorFloat16Performance.powerUsage";

32

33

const char *g_OperandTypeFloat16PerformanceExecTime = "Armnn.operandTypeFloat16Performance.execTime";

34

const char *g_OperandTypeFloat16PerformancePowerUsage = "Armnn.operandTypeFloat16Performance.powerUsage";

35

36

const char *g_OperandTypeTensorQuant8AsymmPerformanceExecTime =

37

"Armnn.operandTypeTensorQuant8AsymmPerformance.execTime";

38

const char *g_OperandTypeTensorQuant8AsymmPerformancePowerUsage =

39

"Armnn.operandTypeTensorQuant8AsymmPerformance.powerUsage";

40

41

const char *g_OperandTypeTensorQuant8AsymmSignedPerformanceExecTime =

42

"Armnn.operandTypeTensorQuant8AsymmSignedPerformance.execTime";

43

const char *g_OperandTypeTensorQuant8AsymmSignedPerformancePowerUsage =

44

"Armnn.operandTypeTensorQuant8AsymmSignedPerformance.powerUsage";

45

46

const char *g_OperandTypeTensorQuant16SymmPerformanceExecTime =

47

"Armnn.operandTypeTensorQuant16SymmPerformance.execTime";

48

const char *g_OperandTypeTensorQuant16SymmPerformancePowerUsage =

49

"Armnn.operandTypeTensorQuant16SymmPerformance.powerUsage";

50

51

const char *g_OperandTypeTensorQuant8SymmPerformanceExecTime =

52

"Armnn.operandTypeTensorQuant8SymmPerformance.execTime";

53

const char *g_OperandTypeTensorQuant8SymmPerformancePowerUsage =

54

"Armnn.operandTypeTensorQuant8SymmPerformance.powerUsage";

55

56

const char *g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime =

57

"Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.execTime";

58

const char *g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage =

59

"Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.powerUsage";

60

61

62

const char *g_OperandTypeTensorInt32PerformanceExecTime = "Armnn.operandTypeTensorInt32Performance.execTime";

63

const char *g_OperandTypeTensorInt32PerformancePowerUsage = "Armnn.operandTypeTensorInt32Performance.powerUsage";

64

65

const char *g_OperandTypeInt32PerformanceExecTime = "Armnn.operandTypeInt32Performance.execTime";

66

const char *g_OperandTypeInt32PerformancePowerUsage = "Armnn.operandTypeInt32Performance.powerUsage";

67

68

Sadik Armagan

2021-02-12 17:16:42 +0000

[diff] [blame]

69

void NotifyCallbackAndCheck(const android::sp<V1_3::IPreparedModelCallback>& callback,

Kevin May

2020-03-26 13:34:14 +0000

[diff] [blame]

70

V1_3::ErrorStatus errorStatus,

Sadik Armagan

2021-02-12 17:16:42 +0000

[diff] [blame]

71

const android::sp<V1_3::IPreparedModel>& preparedModelPtr)

Kevin May

2020-03-26 13:34:14 +0000

[diff] [blame]

72

{

73

Return<void> returned = callback->notify_1_3(errorStatus, preparedModelPtr);

74

// This check is required, if the callback fails and it isn't checked it will bring down the service

75

if (!returned.isOk())

76

{

77

ALOGE("ArmnnDriverImpl::prepareModel: hidl callback failed to return properly: %s ",

78

returned.description().c_str());

}

}

Return<V1_3::ErrorStatus> FailPrepareModel(V1_3::ErrorStatus error,

83

const std::string& message,

Sadik Armagan

2021-02-12 17:16:42 +0000

[diff] [blame]

84

const android::sp<V1_3::IPreparedModelCallback>& callback)

Kevin May

2020-03-26 13:34:14 +0000

[diff] [blame]

85

{

86

ALOGW("ArmnnDriverImpl::prepareModel: %s", message.c_str());

87

NotifyCallbackAndCheck(callback, error, nullptr);

return error;

}

} // anonymous namespace

92

93

namespace armnn_driver

{

namespace hal_1_3

{

Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(

99

const armnn::IRuntimePtr& runtime,

100

const armnn::IGpuAccTunedParametersPtr& clTunedParameters,

101

const DriverOptions& options,

102

const V1_3::Model& model,

Sadik Armagan

2021-02-12 17:16:42 +0000

[diff] [blame]

103

const android::sp<V1_3::IPreparedModelCallback>& cb,

Narumol Prangnawarat

cad4e91

2020-06-02 12:07:43 +0100

[diff] [blame]

104

bool float32ToFloat16,

105

V1_3::Priority priority)

Kevin May

2020-03-26 13:34:14 +0000

[diff] [blame]

106

{

107

ALOGV("ArmnnDriverImpl::prepareArmnnModel_1_3()");

108

109

if (cb.get() == nullptr)

110

{

111

ALOGW("ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel");

112

return V1_3::ErrorStatus::INVALID_ARGUMENT;

}

if (!runtime)

{

return FailPrepareModel(V1_3::ErrorStatus::DEVICE_UNAVAILABLE, "Device unavailable", cb);

118

}

119

120

if (!android::nn::validateModel(model))

121

{

122

return FailPrepareModel(V1_3::ErrorStatus::INVALID_ARGUMENT, "Invalid model passed as input", cb);

123

}

124

125

// Deliberately ignore any unsupported operations requested by the options -

126

// at this point we're being asked to prepare a model that we've already declared support for

127

// and the operation indices may be different to those in getSupportedOperations anyway.

128

std::set<unsigned int> unsupportedOperations;

129

ModelToINetworkConverter<HalPolicy> modelConverter(options.GetBackends(),

130

model,

131

unsupportedOperations);

132

133

if (modelConverter.GetConversionResult() != ConversionResult::Success)

134

{

135

FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "ModelToINetworkConverter failed", cb);

136

return V1_3::ErrorStatus::NONE;

137

}

138

Sadik Armagan

b302143

2021-01-13 15:56:51 +0000

[diff] [blame]

139

// Serialize the network graph to a .armnn file if an output directory

140

// has been specified in the drivers' arguments.

141

auto serializedNetworkFileName =

142

SerializeNetwork(*modelConverter.GetINetwork(), options.GetRequestInputsAndOutputsDumpDir());

143

Kevin May

2020-03-26 13:34:14 +0000

[diff] [blame]

144

// Optimize the network

145

armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);

146

armnn::OptimizerOptions OptOptions;

147

OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;

Kevin May

daf7dd0

2021-10-22 11:57:30 +0100

[diff] [blame^]

148

OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled();

Kevin May

2020-03-26 13:34:14 +0000

[diff] [blame]

149

Mike Kelly

2020-09-30 20:22:56 +0100

[diff] [blame]

150

armnn::BackendOptions gpuAcc("GpuAcc",

151

{

Sadik Armagan

f36e10b

2021-01-11 16:34:01 +0000

[diff] [blame]

152

{ "FastMathEnabled", options.IsFastMathEnabled() },

153

{ "SaveCachedNetwork", options.SaveCachedNetwork() },

Finn Williams

f5ca16c

2021-02-12 14:26:23 +0000

[diff] [blame]

154

{ "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },

155

{ "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() }

Mike Kelly

2020-09-30 20:22:56 +0100

[diff] [blame]

156

});

Finn Williams

f5ca16c

2021-02-12 14:26:23 +0000

[diff] [blame]

157

Mike Kelly

2020-09-30 20:22:56 +0100

[diff] [blame]

158

armnn::BackendOptions cpuAcc("CpuAcc",

159

{

Matthew Sloyan

cd639c9

2021-02-11 16:57:38 +0000

[diff] [blame]

160

{ "FastMathEnabled", options.IsFastMathEnabled() },

161

{ "NumberOfThreads", options.GetNumberOfThreads() }

Mike Kelly

2020-09-30 20:22:56 +0100

[diff] [blame]

162

});

163

OptOptions.m_ModelOptions.push_back(gpuAcc);

164

OptOptions.m_ModelOptions.push_back(cpuAcc);

165

Kevin May

2020-03-26 13:34:14 +0000

[diff] [blame]

166

std::vector<std::string> errMessages;

167

try

168

{

169

optNet = armnn::Optimize(*modelConverter.GetINetwork(),

170

options.GetBackends(),

171

runtime->GetDeviceSpec(),

OptOptions,

errMessages);

}

catch (std::exception& e)

176

{

177

std::stringstream message;

178

message << "Exception (" << e.what() << ") caught from optimize.";

179

FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);

180

return V1_3::ErrorStatus::NONE;

181

}

182

183

// Check that the optimized network is valid.

184

if (!optNet)

185

{

186

std::stringstream message;

187

message << "Invalid optimized network";

188

for (const std::string& msg : errMessages)

189

{

190

message << "\n" << msg;

191

}

192

FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);

193

return V1_3::ErrorStatus::NONE;

194

}

195

196

// Export the optimized network graph to a dot file if an output dump directory

197

// has been specified in the drivers' arguments.

198

std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,

199

options.GetRequestInputsAndOutputsDumpDir());

200

201

// Load it into the runtime.

202

armnn::NetworkId netId = 0;

Finn Williams

d8fb540

2021-05-19 20:52:00 +0100

[diff] [blame]

203

std::string msg;

204

armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),

205

MemorySource::Undefined,

Finn Williams

ca3a3e0

2021-06-11 15:04:02 +0100

[diff] [blame]

206

MemorySource::Undefined);

Kevin May

2020-03-26 13:34:14 +0000

[diff] [blame]

207

try

208

{

Finn Williams

d8fb540

2021-05-19 20:52:00 +0100

[diff] [blame]

209

if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)

Kevin May

2020-03-26 13:34:14 +0000

[diff] [blame]

210

{

211

return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb);

212

}

213

}

214

catch (std::exception& e)

215

{

216

std::stringstream message;

217

message << "Exception (" << e.what()<< ") caught from LoadNetwork.";

218

FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);

219

return V1_3::ErrorStatus::NONE;

220

}

221

Sadik Armagan

b302143

2021-01-13 15:56:51 +0000

[diff] [blame]

222

// Now that we have a networkId for the graph rename the exported files to use it

223

// so that we can associate the graph file and the input/output tensor exported files

224

RenameExportedFiles(serializedNetworkFileName,

225

dotGraphFileName,

226

options.GetRequestInputsAndOutputsDumpDir(),

227

netId);

Kevin May

2020-03-26 13:34:14 +0000

[diff] [blame]

228

229

std::unique_ptr<ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>> preparedModel(

230

new ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>(

netId,

runtime.get(),

model,

options.GetRequestInputsAndOutputsDumpDir(),

Narumol Prangnawarat

cad4e91

2020-06-02 12:07:43 +0100

[diff] [blame]

235

options.IsGpuProfilingEnabled(),

Finn Williams

d8fb540

2021-05-19 20:52:00 +0100

[diff] [blame]

236

priority,

Finn Williams

ca3a3e0

2021-06-11 15:04:02 +0100

[diff] [blame]

237

options.isAsyncModelExecutionEnabled(),

238

options.getNoOfArmnnThreads()));

Kevin May

2020-03-26 13:34:14 +0000

[diff] [blame]

239

240

// Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if

241

// this is enabled) before the first 'real' inference which removes the overhead of the first inference.

242

if (!preparedModel->ExecuteWithDummyInputs())

243

{

244

return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);

245

}

246

247

if (clTunedParameters &&

248

options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)

249

{

250

// Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file.

251

try

252

{

253

clTunedParameters->Save(options.GetClTunedParametersFile().c_str());

254

}

255

catch (std::exception& error)

256

{

257

ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",

258

options.GetClTunedParametersFile().c_str(), error.what());

}

}

NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());

263

264

return V1_3::ErrorStatus::NONE;

265

}

266

267

Return<void> ArmnnDriverImpl::getCapabilities_1_3(const armnn::IRuntimePtr& runtime,

268

V1_3::IDevice::getCapabilities_1_3_cb cb)

269

{

270

ALOGV("hal_1_3::ArmnnDriverImpl::getCapabilities()");

271

272

V1_3::Capabilities capabilities;

273

274

float defaultValue = .1f;

if (runtime)

{

capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime =

279

ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);

280

Kevin May

2020-04-15 16:50:57 +0100

[diff] [blame]

281

capabilities.relaxedFloat32toFloat16PerformanceScalar.powerUsage =

282

ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue);

283

284

capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime =

285

ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);

286

Kevin May

2020-03-26 13:34:14 +0000

[diff] [blame]

287

capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage =

288

ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue);

289

Kevin May

2020-04-15 16:50:57 +0100

[diff] [blame]

290

capabilities.ifPerformance.execTime =

291

ParseSystemProperty(g_ifPerformanceExecTime, defaultValue);

292

293

capabilities.ifPerformance.powerUsage =

294

ParseSystemProperty(g_ifPerformancePowerUsage, defaultValue);

295

296

capabilities.whilePerformance.execTime =

297

ParseSystemProperty(g_whilePerformanceExecTime, defaultValue);

298

299

capabilities.whilePerformance.powerUsage =

300

ParseSystemProperty(g_whilePerformancePowerUsage, defaultValue);

301

Kevin May

2020-03-26 13:34:14 +0000

[diff] [blame]

302

// Set the base value for all operand types

303

capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_3>({FLT_MAX, FLT_MAX});

304

305

// Load supported operand types

306

update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT32,

307

{

308

.execTime = ParseSystemProperty(g_OperandTypeTensorFloat32PerformanceExecTime, defaultValue),

309

.powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat32PerformancePowerUsage, defaultValue)

310

});

311

312

update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT32,

313

{

314

.execTime = ParseSystemProperty(g_OperandTypeFloat32PerformanceExecTime, defaultValue),

315

.powerUsage = ParseSystemProperty(g_OperandTypeFloat32PerformancePowerUsage, defaultValue)

316

});

317

318

update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT16,

319

{

320

.execTime = ParseSystemProperty(g_OperandTypeTensorFloat16PerformanceExecTime, defaultValue),

321

.powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat16PerformancePowerUsage, defaultValue)

322

});

323

324

update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT16,

325

{

326

.execTime = ParseSystemProperty(g_OperandTypeFloat16PerformanceExecTime, defaultValue),

327

.powerUsage = ParseSystemProperty(g_OperandTypeFloat16PerformancePowerUsage, defaultValue)

328

});

329

330

update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_ASYMM,

331

{

332

.execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformanceExecTime, defaultValue),

333

.powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformancePowerUsage, defaultValue)

334

});

335

336

update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_SYMM,

337

{

338

.execTime = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformanceExecTime, defaultValue),

339

.powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformancePowerUsage, defaultValue)

340

});

341

update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_ASYMM_SIGNED,

342

{

343

.execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmSignedPerformanceExecTime,

344

defaultValue),

345

.powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmSignedPerformancePowerUsage,

defaultValue)

});

update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT16_SYMM,

350

{

351

.execTime = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformanceExecTime, defaultValue),

352

.powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformancePowerUsage, defaultValue)

353

});

354

355

update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,

356

{

357

.execTime =

358

ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime, defaultValue),

359

.powerUsage =

360

ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage, defaultValue)

361

});

362

363

update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_INT32,

364

{

365

.execTime = ParseSystemProperty(g_OperandTypeTensorInt32PerformanceExecTime, defaultValue),

366

.powerUsage = ParseSystemProperty(g_OperandTypeTensorInt32PerformancePowerUsage, defaultValue)

367

});

368

369

update(&capabilities.operandPerformance, V1_3::OperandType::INT32,

370

{

371

.execTime = ParseSystemProperty(g_OperandTypeInt32PerformanceExecTime, defaultValue),

372

.powerUsage = ParseSystemProperty(g_OperandTypeInt32PerformancePowerUsage, defaultValue)

373

});

374

375

cb(V1_3::ErrorStatus::NONE, capabilities);

376

}

377

else

378

{

Kevin May

2020-04-15 16:50:57 +0100

[diff] [blame]

379

capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime = 0;

380

capabilities.relaxedFloat32toFloat16PerformanceScalar.powerUsage = 0;

381

capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime = 0;

382

capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage = 0;

383

capabilities.ifPerformance.execTime = 0;

384

capabilities.ifPerformance.powerUsage = 0;

385

capabilities.whilePerformance.execTime = 0;

386

capabilities.whilePerformance.powerUsage = 0;

Kevin May