blob: 3223d9e8bf96bfd1f0834f88364daa049ce30539 [file] [log] [blame]
Sadik Armagan8f397a12022-06-17 15:38:22 +01001//
2// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#include "ArmnnDriverImpl.hpp"
7#include "ArmnnPreparedModel.hpp"
8#include "CacheDataHandler.hpp"
9#include "ModelToINetworkTransformer.hpp"
10#include "SystemPropertiesUtils.hpp"
11
12#include <armnnDeserializer/IDeserializer.hpp>
13
14#include <log/log.h>
15#include <sys/stat.h>
16
17namespace
18{
19
20Capabilities GenerateCapabilities()
21{
22 VLOG(DRIVER) << "ArmnnDriverImpl::GenerateCapabilities()";
23
24 float defaultPerfValue = .1f;
25 const Capabilities::PerformanceInfo defaultPerfInfo = { /* execTime */ defaultPerfValue,
26 /* powerUsage */ defaultPerfValue
27 };
28 std::vector<OperandType> operandsTypes({
29 OperandType::FLOAT32,
30 OperandType::INT32,
31 OperandType::UINT32,
32 OperandType::TENSOR_FLOAT32,
33 OperandType::TENSOR_INT32,
34 OperandType::TENSOR_QUANT8_ASYMM,
35 OperandType::BOOL,
36 OperandType::TENSOR_QUANT16_SYMM,
37 OperandType::TENSOR_FLOAT16,
38 OperandType::TENSOR_BOOL8,
39 OperandType::FLOAT16,
40 OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
41 OperandType::TENSOR_QUANT16_ASYMM,
42 OperandType::TENSOR_QUANT8_SYMM,
43 OperandType::TENSOR_QUANT8_ASYMM_SIGNED,
44 });
45
46 std::vector<Capabilities::OperandPerformance> operandPerformances;
47 operandPerformances.reserve(operandsTypes.size());
48
49 for (auto opType : operandsTypes)
50 {
51 operandPerformances.push_back(
52 Capabilities::OperandPerformance{ /* type */ opType, /* info */ defaultPerfInfo });
53 }
54
55 auto operandPerformanceTable =
56 Capabilities::OperandPerformanceTable::create(std::move(operandPerformances)).value();
57
58 return { /* relaxedFloat32toFloat16PerformanceScalar */ defaultPerfInfo,
59 /* relaxedFloat32toFloat16PerformanceTensor */ defaultPerfInfo,
60 /* operandPerformance */ std::move(operandPerformanceTable),
61 /* ifPerformance */ defaultPerfInfo,
62 /* whilePerformance */ defaultPerfInfo };
63}
64
65} // anonymous namespace
66
67using namespace android::nn;
68
69namespace armnn_driver
70{
71
72bool ArmnnDriverImpl::ValidateSharedHandle(const SharedHandle& sharedHandle)
73{
74 bool valid = true;
75
76 if (*sharedHandle < 0)
77 {
78 return !valid;
79 }
80
81 int dataCacheFileAccessMode = fcntl(*sharedHandle, F_GETFL) & O_ACCMODE;
82 if (dataCacheFileAccessMode != O_RDWR)
83 {
84 return !valid;
85 }
86
87 return valid;
88}
89
90bool ArmnnDriverImpl::ValidateDataCacheHandle(const std::vector<SharedHandle>& dataCacheHandle, const size_t dataSize)
91{
92 bool valid = true;
93 // DataCacheHandle size should always be 1 for ArmNN model
94 if (dataCacheHandle.size() != 1)
95 {
96 return !valid;
97 }
98
99 if (dataSize == 0)
100 {
101 return !valid;
102 }
103
104 struct stat statBuffer;
105 if (fstat(*dataCacheHandle[0], &statBuffer) == 0)
106 {
107 unsigned long bufferSize = statBuffer.st_size;
108 if (bufferSize != dataSize)
109 {
110 return !valid;
111 }
112 }
113
114 return ValidateSharedHandle(dataCacheHandle[0]);
115}
116
117std::vector<armnn::NetworkId>& ArmnnDriverImpl::GetLoadedNetworks()
118{
119 return m_NetworkIDs;
120}
121
122GeneralResult<SharedPreparedModel> ArmnnDriverImpl::PrepareArmnnModel(
123 const armnn::IRuntimePtr& runtime,
124 const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
125 const DriverOptions& options,
126 const Model& model,
127 const std::vector<SharedHandle>& modelCacheHandle,
128 const std::vector<SharedHandle>& dataCacheHandle,
129 const CacheToken& token,
130 bool float32ToFloat16,
131 Priority priority)
132{
133 VLOG(DRIVER) << "ArmnnDriverImpl::PrepareArmnnModel()";
134
135 if (!runtime)
136 {
137 return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE) << "Device unavailable";
138 }
139
140 if (const auto result = validate(model); !result.ok())
141 {
142 return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) << "Invalid model passed as input";
143 }
144
145 // Deliberately ignore any unsupported operations requested by the options -
146 // at this point we're being asked to prepare a model that we've already declared support for
147 // and the operation indices may be different to those in getSupportedOperations anyway.
148 std::set<unsigned int> unsupportedOperations;
149 ModelToINetworkTransformer modelConverter(options.GetBackends(),
150 model,
151 unsupportedOperations);
152
153 if (modelConverter.GetConversionResult() != ConversionResult::Success)
154 {
155 return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "ModelToINetworkConverter failed";
156 }
157
158 // Serialize the network graph to a .armnn file if an output directory
159 // has been specified in the drivers' arguments.
160 std::vector<uint8_t> dataCacheData;
161 bool serializeToFile = dataCacheHandle.size() < 1 ? false : true;
162 auto serializedNetworkFileName =
163 SerializeNetwork(*modelConverter.GetINetwork(),
164 options.GetRequestInputsAndOutputsDumpDir(),
165 dataCacheData,
166 serializeToFile);
167
168 // Optimize the network
169 armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
170 armnn::OptimizerOptions OptOptions;
171 OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
172 OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled();
173
174 int cachedFd = -1;
175 bool saveCachedNetwork = options.SaveCachedNetwork();
176
177 unsigned int numberOfCachedModelFiles = 0;
178 if (modelCacheHandle.size() > 0)
179 {
180 unsigned int index = 0;
181 for (auto& backend : options.GetBackends())
182 {
183 // modelCacheHandle size should be equal to numberOfCachedModelFiles
184 // modelCacheHandle vector should be in same order as backends
185 auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
186 if (numberOfCacheFiles > 0)
187 {
188 numberOfCachedModelFiles += numberOfCacheFiles;
189 // For GpuAcc numberOfCachedFiles is 1
190 if (backend == armnn::Compute::GpuAcc)
191 {
192 cachedFd = *modelCacheHandle[index];
193 saveCachedNetwork = true;
194 }
195 index += numberOfCachedModelFiles;
196 }
197 }
198 }
199
200 armnn::BackendOptions gpuAcc("GpuAcc",
201 {
202 { "FastMathEnabled", options.IsFastMathEnabled() },
203 { "SaveCachedNetwork", saveCachedNetwork },
204 { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
205 { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
206 { "CachedFileDescriptor", cachedFd }
207 });
208
209 armnn::BackendOptions cpuAcc("CpuAcc",
210 {
211 { "FastMathEnabled", options.IsFastMathEnabled() },
212 { "NumberOfThreads", options.GetNumberOfThreads() }
213 });
214 OptOptions.m_ModelOptions.push_back(gpuAcc);
215 OptOptions.m_ModelOptions.push_back(cpuAcc);
216
217 std::vector<std::string> errMessages;
218 try
219 {
220 optNet = armnn::Optimize(*modelConverter.GetINetwork(),
221 options.GetBackends(),
222 runtime->GetDeviceSpec(),
223 OptOptions,
224 errMessages);
225 }
226 catch (std::exception& e)
227 {
228 return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << e.what();
229 }
230
231 // Check that the optimized network is valid.
232 if (!optNet)
233 {
234 std::stringstream message;
235 message << "Invalid optimized network";
236 for (const std::string& msg : errMessages)
237 {
238 message << "\n" << msg;
239 }
240 return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
241 }
242
243 // Export the optimized network graph to a dot file if an output dump directory
244 // has been specified in the drivers' arguments.
245 std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
246 options.GetRequestInputsAndOutputsDumpDir());
247
248 // Load it into the runtime.
249 armnn::NetworkId netId = 0;
250 std::string msg;
251 armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
252 MemorySource::Undefined,
253 MemorySource::Undefined,
254 options.IsGpuProfilingEnabled());
255 auto numInputs = getMainModel(model).inputIndexes.size();
256 auto numOutputs = getMainModel(model).outputIndexes.size();
257 try
258 {
259 if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
260 {
261 return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be loaded";
262 }
263 }
264 catch (std::exception& e)
265 {
266 std::stringstream message;
267 message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
268 return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
269 }
270
271 // Now that we have a networkId for the graph rename the exported files to use it
272 // so that we can associate the graph file and the input/output tensor exported files
273 RenameExportedFiles(serializedNetworkFileName,
274 dotGraphFileName,
275 options.GetRequestInputsAndOutputsDumpDir(),
276 netId);
277
278 // Cache the model
279 size_t hashValue = 0;
280 if (dataCacheHandle.size() == 1 )
281 {
282 write(*dataCacheHandle[0], dataCacheData.data(), dataCacheData.size());
283 hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
284 }
285
286 // Cache the model data
287 if (modelCacheHandle.size() > 0)
288 {
289 if (modelCacheHandle.size() == numberOfCachedModelFiles)
290 {
291 for (uint32_t i = 0; i < modelCacheHandle.size(); ++i)
292 {
293 int modelCacheFileAccessMode = fcntl(*modelCacheHandle[i], F_GETFL) & O_ACCMODE;
294 if (modelCacheFileAccessMode != O_RDONLY)
295 {
296 struct stat statBuffer;
297 if (fstat(*modelCacheHandle[i], &statBuffer) == 0)
298 {
299 long modelDataSize = statBuffer.st_size;
300 if (modelDataSize > 0)
301 {
302 std::vector<uint8_t> modelData(modelDataSize);
303 pread(*modelCacheHandle[i], modelData.data(), modelData.size(), 0);
304 hashValue ^= CacheDataHandlerInstance().Hash(modelData);
305 }
306 }
307 }
308 }
309 }
310 }
311 if (hashValue != 0)
312 {
313 CacheDataHandlerInstance().Register(token, hashValue, dataCacheData.size());
314 }
315
316 bool executeWithDummyInputs = (std::find(options.GetBackends().begin(),
317 options.GetBackends().end(),
318 armnn::Compute::GpuAcc) != options.GetBackends().end());
319
320 m_NetworkIDs.push_back(netId);
321 auto preparedModel = std::make_shared<const ArmnnPreparedModel>(netId,
322 runtime.get(),
323 model,
324 options.GetRequestInputsAndOutputsDumpDir(),
325 options.IsGpuProfilingEnabled(),
326 priority);
327
328 // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
329 // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
330 // Only run this if the GpuAcc backend has been added to options
331 if (std::find(options.GetBackends().begin(),
332 options.GetBackends().end(),
333 armnn::Compute::GpuAcc) != options.GetBackends().end())
334 {
335 if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs))
336 {
337 return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be executed";
338 }
339
340 if (clTunedParameters &&
341 options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
342 {
343 // Now that we've done one inference the CL kernel parameters will have been tuned,
344 // so save the updated file.
345 try
346 {
347 clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
348 }
349 catch (std::exception& error)
350 {
351 VLOG(DRIVER) << "ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file"
352 << options.GetClTunedParametersFile().c_str() << error.what();
353 }
354 }
355 }
356 return std::move(preparedModel);
357}
358
359std::vector<armnn::NetworkId> ArmnnDriverImpl::m_NetworkIDs = {};
360
361GeneralResult<SharedPreparedModel> ArmnnDriverImpl::PrepareArmnnModelFromCache(
362 const armnn::IRuntimePtr& runtime,
363 const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
364 const DriverOptions& options,
365 const std::vector<SharedHandle>& modelCacheHandle,
366 const std::vector<SharedHandle>& dataCacheHandle,
367 const CacheToken& token,
368 bool float32ToFloat16)
369{
370 VLOG(DRIVER) << "ArmnnDriverImpl::PrepareArmnnModelFromCache()";
371
372 if (!runtime)
373 {
374 return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE)
375 << "ArmnnDriverImpl::prepareModelFromCache(): Device unavailable";
376 }
377
378 if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN)
379 {
380 return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
381 << "ArmnnDriverImpl::prepareModelFromCache(): Token size does not match!";
382 }
383
384 // Validate dataCacheHandle
385 auto dataSize = CacheDataHandlerInstance().GetCacheSize(token);
386 if (!ValidateDataCacheHandle(dataCacheHandle, dataSize))
387 {
388 return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
389 << "ArmnnDriverImpl::prepareModelFromCache(): Not valid data cache handle!";
390 }
391
392 // Check if model files cached they match the expected value
393 unsigned int numberOfCachedModelFiles = 0;
394 for (auto& backend : options.GetBackends())
395 {
396 numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
397 }
398 if (modelCacheHandle.size() != numberOfCachedModelFiles)
399 {
400 return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
401 << "ArmnnDriverImpl::prepareModelFromCache(): Model cache handle size does not match.";
402 }
403
404 // Read the model
405 std::vector<uint8_t> dataCacheData(dataSize);
406 pread(*dataCacheHandle[0], dataCacheData.data(), dataCacheData.size(), 0);
407 auto hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
408
409 int gpuAccCachedFd = -1;
410 if (modelCacheHandle.size() > 0)
411 {
412 unsigned int index = 0;
413 for (auto& backend : options.GetBackends())
414 {
415 // modelCacheHandle size should be equal to numberOfCachedModelFiles
416 // modelCacheHandle vector should be in same order as backends
417 auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
418 if (numberOfCacheFiles > 0)
419 {
420 if (!ValidateSharedHandle(modelCacheHandle[index]))
421 {
422 return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
423 << "ArmnnDriverImpl::prepareModelFromCache(): Invalid model cache handle!";
424 }
425 int cachedFd = *modelCacheHandle[index];
426 struct stat statBuffer;
427 if (fstat(cachedFd, &statBuffer) == 0)
428 {
429 long modelDataSize = statBuffer.st_size;
430 if (modelDataSize > 0)
431 {
432 std::vector<uint8_t> modelData(modelDataSize);
433 pread(cachedFd, modelData.data(), modelData.size(), 0);
434 hashValue ^= CacheDataHandlerInstance().Hash(modelData);
435
436 if (backend == armnn::Compute::GpuAcc)
437 {
438 gpuAccCachedFd = cachedFd;
439 }
440 }
441 }
442 index += numberOfCacheFiles;
443 }
444 }
445 }
446
447 if (!CacheDataHandlerInstance().Validate(token, hashValue, dataCacheData.size()))
448 {
449 return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
450 << "ArmnnDriverImpl::prepareModelFromCache(): ValidateHash() failed!";
451 }
452
453 // Deserialize the network..
454 armnn::INetworkPtr network = armnn::INetworkPtr(nullptr, [](armnn::INetwork*){});
455 try
456 {
457 network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData);
458 }
459 catch (std::exception&)
460 {
461 return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
462 << "ArmnnDriverImpl::prepareModelFromCache(): Exception caught from Deserializer!";
463 }
464
465 // Optimize the network
466 armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
467 armnn::OptimizerOptions OptOptions;
468 OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
469 OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled();
470
471 armnn::BackendOptions gpuAcc("GpuAcc",
472 {
473 { "FastMathEnabled", options.IsFastMathEnabled() },
474 { "SaveCachedNetwork", false },
475 { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
476 { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
477 { "CachedFileDescriptor", gpuAccCachedFd }
478 });
479
480 armnn::BackendOptions cpuAcc("CpuAcc",
481 {
482 { "FastMathEnabled", options.IsFastMathEnabled() },
483 { "NumberOfThreads", options.GetNumberOfThreads() }
484 });
485 OptOptions.m_ModelOptions.push_back(gpuAcc);
486 OptOptions.m_ModelOptions.push_back(cpuAcc);
487
488 std::vector<std::string> errMessages;
489 try
490 {
491 optNet = armnn::Optimize(*network.get(),
492 options.GetBackends(),
493 runtime->GetDeviceSpec(),
494 OptOptions,
495 errMessages);
496 }
497 catch (std::exception& e)
498 {
499 return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << e.what();
500 }
501
502 // Check that the optimized network is valid.
503 if (!optNet)
504 {
505 std::stringstream message;
506 message << "Invalid optimized network";
507 for (const std::string& msg : errMessages)
508 {
509 message << "\n" << msg;
510 }
511 return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
512 }
513
514 // Export the optimized network graph to a dot file if an output dump directory
515 // has been specified in the drivers' arguments.
516 std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
517 options.GetRequestInputsAndOutputsDumpDir());
518
519 // Load it into the runtime.
520 armnn::NetworkId netId = 0;
521 std::string msg;
522 armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
523 MemorySource::Undefined,
524 MemorySource::Undefined,
525 options.IsGpuProfilingEnabled());
526 try
527 {
528 if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
529 {
530 return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be loaded";
531 }
532 }
533 catch (std::exception& e)
534 {
535 std::stringstream message;
536 message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
537 return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
538 }
539
540 m_NetworkIDs.push_back(netId);
541 return std::make_shared<const ArmnnPreparedModel>(netId,
542 runtime.get(),
543 options.GetRequestInputsAndOutputsDumpDir(),
544 options.IsGpuProfilingEnabled(),
545 Priority::MEDIUM,
546 true);
547}
548
549const Capabilities& ArmnnDriverImpl::GetCapabilities(const armnn::IRuntimePtr& runtime)
550{
551 VLOG(DRIVER) << "ArmnnDriverImpl::GetCapabilities()";
552 static const Capabilities theCapabilities = GenerateCapabilities();
553 return theCapabilities;
554}
555
556void ArmnnDriverImpl::ClearNetworks()
557{
558 m_NetworkIDs.clear();
559}
560
561} // namespace armnn_driver