blob: 8706c382b0f9efc0ba8792ae94bdb95d09b2479f [file] [log] [blame]
Sadik Armagan8f397a12022-06-17 15:38:22 +01001//
2// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#include "ArmnnDriverImpl.hpp"
7#include "ArmnnPreparedModel.hpp"
8#include "CacheDataHandler.hpp"
9#include "ModelToINetworkTransformer.hpp"
10#include "SystemPropertiesUtils.hpp"
11
12#include <armnnDeserializer/IDeserializer.hpp>
13
14#include <log/log.h>
15#include <sys/stat.h>
16
17namespace
18{
19
20Capabilities GenerateCapabilities()
21{
22 VLOG(DRIVER) << "ArmnnDriverImpl::GenerateCapabilities()";
23
24 float defaultPerfValue = .1f;
25 const Capabilities::PerformanceInfo defaultPerfInfo = { /* execTime */ defaultPerfValue,
26 /* powerUsage */ defaultPerfValue
27 };
28 std::vector<OperandType> operandsTypes({
29 OperandType::FLOAT32,
30 OperandType::INT32,
31 OperandType::UINT32,
32 OperandType::TENSOR_FLOAT32,
33 OperandType::TENSOR_INT32,
34 OperandType::TENSOR_QUANT8_ASYMM,
35 OperandType::BOOL,
36 OperandType::TENSOR_QUANT16_SYMM,
37 OperandType::TENSOR_FLOAT16,
38 OperandType::TENSOR_BOOL8,
39 OperandType::FLOAT16,
40 OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
41 OperandType::TENSOR_QUANT16_ASYMM,
42 OperandType::TENSOR_QUANT8_SYMM,
43 OperandType::TENSOR_QUANT8_ASYMM_SIGNED,
44 });
45
46 std::vector<Capabilities::OperandPerformance> operandPerformances;
47 operandPerformances.reserve(operandsTypes.size());
48
49 for (auto opType : operandsTypes)
50 {
51 operandPerformances.push_back(
52 Capabilities::OperandPerformance{ /* type */ opType, /* info */ defaultPerfInfo });
53 }
54
55 auto operandPerformanceTable =
56 Capabilities::OperandPerformanceTable::create(std::move(operandPerformances)).value();
57
58 return { /* relaxedFloat32toFloat16PerformanceScalar */ defaultPerfInfo,
59 /* relaxedFloat32toFloat16PerformanceTensor */ defaultPerfInfo,
60 /* operandPerformance */ std::move(operandPerformanceTable),
61 /* ifPerformance */ defaultPerfInfo,
62 /* whilePerformance */ defaultPerfInfo };
63}
64
65} // anonymous namespace
66
67using namespace android::nn;
68
69namespace armnn_driver
70{
71
72bool ArmnnDriverImpl::ValidateSharedHandle(const SharedHandle& sharedHandle)
73{
74 bool valid = true;
75
76 if (*sharedHandle < 0)
77 {
78 return !valid;
79 }
80
81 int dataCacheFileAccessMode = fcntl(*sharedHandle, F_GETFL) & O_ACCMODE;
82 if (dataCacheFileAccessMode != O_RDWR)
83 {
84 return !valid;
85 }
86
87 return valid;
88}
89
90bool ArmnnDriverImpl::ValidateDataCacheHandle(const std::vector<SharedHandle>& dataCacheHandle, const size_t dataSize)
91{
92 bool valid = true;
93 // DataCacheHandle size should always be 1 for ArmNN model
94 if (dataCacheHandle.size() != 1)
95 {
96 return !valid;
97 }
98
99 if (dataSize == 0)
100 {
101 return !valid;
102 }
103
104 struct stat statBuffer;
105 if (fstat(*dataCacheHandle[0], &statBuffer) == 0)
106 {
107 unsigned long bufferSize = statBuffer.st_size;
108 if (bufferSize != dataSize)
109 {
110 return !valid;
111 }
112 }
113
114 return ValidateSharedHandle(dataCacheHandle[0]);
115}
116
Sadik Armagan8f397a12022-06-17 15:38:22 +0100117GeneralResult<SharedPreparedModel> ArmnnDriverImpl::PrepareArmnnModel(
118 const armnn::IRuntimePtr& runtime,
119 const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
120 const DriverOptions& options,
121 const Model& model,
122 const std::vector<SharedHandle>& modelCacheHandle,
123 const std::vector<SharedHandle>& dataCacheHandle,
124 const CacheToken& token,
125 bool float32ToFloat16,
126 Priority priority)
127{
128 VLOG(DRIVER) << "ArmnnDriverImpl::PrepareArmnnModel()";
129
130 if (!runtime)
131 {
132 return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE) << "Device unavailable";
133 }
134
135 if (const auto result = validate(model); !result.ok())
136 {
137 return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) << "Invalid model passed as input";
138 }
139
140 // Deliberately ignore any unsupported operations requested by the options -
141 // at this point we're being asked to prepare a model that we've already declared support for
142 // and the operation indices may be different to those in getSupportedOperations anyway.
143 std::set<unsigned int> unsupportedOperations;
144 ModelToINetworkTransformer modelConverter(options.GetBackends(),
145 model,
146 unsupportedOperations);
147
148 if (modelConverter.GetConversionResult() != ConversionResult::Success)
149 {
150 return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "ModelToINetworkConverter failed";
151 }
152
153 // Serialize the network graph to a .armnn file if an output directory
154 // has been specified in the drivers' arguments.
155 std::vector<uint8_t> dataCacheData;
156 bool serializeToFile = dataCacheHandle.size() < 1 ? false : true;
157 auto serializedNetworkFileName =
158 SerializeNetwork(*modelConverter.GetINetwork(),
159 options.GetRequestInputsAndOutputsDumpDir(),
160 dataCacheData,
161 serializeToFile);
162
163 // Optimize the network
164 armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
165 armnn::OptimizerOptions OptOptions;
166 OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
167 OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled();
168
169 int cachedFd = -1;
170 bool saveCachedNetwork = options.SaveCachedNetwork();
171
172 unsigned int numberOfCachedModelFiles = 0;
173 if (modelCacheHandle.size() > 0)
174 {
175 unsigned int index = 0;
176 for (auto& backend : options.GetBackends())
177 {
178 // modelCacheHandle size should be equal to numberOfCachedModelFiles
179 // modelCacheHandle vector should be in same order as backends
180 auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
181 if (numberOfCacheFiles > 0)
182 {
183 numberOfCachedModelFiles += numberOfCacheFiles;
184 // For GpuAcc numberOfCachedFiles is 1
185 if (backend == armnn::Compute::GpuAcc)
186 {
187 cachedFd = *modelCacheHandle[index];
188 saveCachedNetwork = true;
189 }
190 index += numberOfCachedModelFiles;
191 }
192 }
193 }
194
195 armnn::BackendOptions gpuAcc("GpuAcc",
196 {
197 { "FastMathEnabled", options.IsFastMathEnabled() },
198 { "SaveCachedNetwork", saveCachedNetwork },
199 { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
200 { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
201 { "CachedFileDescriptor", cachedFd }
202 });
203
204 armnn::BackendOptions cpuAcc("CpuAcc",
205 {
206 { "FastMathEnabled", options.IsFastMathEnabled() },
207 { "NumberOfThreads", options.GetNumberOfThreads() }
208 });
209 OptOptions.m_ModelOptions.push_back(gpuAcc);
210 OptOptions.m_ModelOptions.push_back(cpuAcc);
211
212 std::vector<std::string> errMessages;
213 try
214 {
215 optNet = armnn::Optimize(*modelConverter.GetINetwork(),
216 options.GetBackends(),
217 runtime->GetDeviceSpec(),
218 OptOptions,
219 errMessages);
220 }
221 catch (std::exception& e)
222 {
223 return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << e.what();
224 }
225
226 // Check that the optimized network is valid.
227 if (!optNet)
228 {
229 std::stringstream message;
230 message << "Invalid optimized network";
231 for (const std::string& msg : errMessages)
232 {
233 message << "\n" << msg;
234 }
235 return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
236 }
237
238 // Export the optimized network graph to a dot file if an output dump directory
239 // has been specified in the drivers' arguments.
240 std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
241 options.GetRequestInputsAndOutputsDumpDir());
242
243 // Load it into the runtime.
244 armnn::NetworkId netId = 0;
245 std::string msg;
246 armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
247 MemorySource::Undefined,
248 MemorySource::Undefined,
249 options.IsGpuProfilingEnabled());
250 auto numInputs = getMainModel(model).inputIndexes.size();
251 auto numOutputs = getMainModel(model).outputIndexes.size();
252 try
253 {
254 if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
255 {
256 return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be loaded";
257 }
258 }
259 catch (std::exception& e)
260 {
261 std::stringstream message;
262 message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
263 return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
264 }
265
266 // Now that we have a networkId for the graph rename the exported files to use it
267 // so that we can associate the graph file and the input/output tensor exported files
268 RenameExportedFiles(serializedNetworkFileName,
269 dotGraphFileName,
270 options.GetRequestInputsAndOutputsDumpDir(),
271 netId);
272
273 // Cache the model
274 size_t hashValue = 0;
275 if (dataCacheHandle.size() == 1 )
276 {
277 write(*dataCacheHandle[0], dataCacheData.data(), dataCacheData.size());
278 hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
279 }
280
281 // Cache the model data
282 if (modelCacheHandle.size() > 0)
283 {
284 if (modelCacheHandle.size() == numberOfCachedModelFiles)
285 {
286 for (uint32_t i = 0; i < modelCacheHandle.size(); ++i)
287 {
288 int modelCacheFileAccessMode = fcntl(*modelCacheHandle[i], F_GETFL) & O_ACCMODE;
289 if (modelCacheFileAccessMode != O_RDONLY)
290 {
291 struct stat statBuffer;
292 if (fstat(*modelCacheHandle[i], &statBuffer) == 0)
293 {
294 long modelDataSize = statBuffer.st_size;
295 if (modelDataSize > 0)
296 {
297 std::vector<uint8_t> modelData(modelDataSize);
298 pread(*modelCacheHandle[i], modelData.data(), modelData.size(), 0);
299 hashValue ^= CacheDataHandlerInstance().Hash(modelData);
300 }
301 }
302 }
303 }
304 }
305 }
306 if (hashValue != 0)
307 {
308 CacheDataHandlerInstance().Register(token, hashValue, dataCacheData.size());
309 }
310
311 bool executeWithDummyInputs = (std::find(options.GetBackends().begin(),
312 options.GetBackends().end(),
313 armnn::Compute::GpuAcc) != options.GetBackends().end());
314
Sadik Armagan8f397a12022-06-17 15:38:22 +0100315 auto preparedModel = std::make_shared<const ArmnnPreparedModel>(netId,
316 runtime.get(),
317 model,
318 options.GetRequestInputsAndOutputsDumpDir(),
319 options.IsGpuProfilingEnabled(),
320 priority);
321
322 // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
323 // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
324 // Only run this if the GpuAcc backend has been added to options
325 if (std::find(options.GetBackends().begin(),
326 options.GetBackends().end(),
327 armnn::Compute::GpuAcc) != options.GetBackends().end())
328 {
329 if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs))
330 {
331 return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be executed";
332 }
333
334 if (clTunedParameters &&
335 options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
336 {
337 // Now that we've done one inference the CL kernel parameters will have been tuned,
338 // so save the updated file.
339 try
340 {
341 clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
342 }
343 catch (std::exception& error)
344 {
345 VLOG(DRIVER) << "ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file"
346 << options.GetClTunedParametersFile().c_str() << error.what();
347 }
348 }
349 }
350 return std::move(preparedModel);
351}
352
Sadik Armagan8f397a12022-06-17 15:38:22 +0100353GeneralResult<SharedPreparedModel> ArmnnDriverImpl::PrepareArmnnModelFromCache(
354 const armnn::IRuntimePtr& runtime,
355 const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
356 const DriverOptions& options,
357 const std::vector<SharedHandle>& modelCacheHandle,
358 const std::vector<SharedHandle>& dataCacheHandle,
359 const CacheToken& token,
360 bool float32ToFloat16)
361{
362 VLOG(DRIVER) << "ArmnnDriverImpl::PrepareArmnnModelFromCache()";
363
364 if (!runtime)
365 {
366 return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE)
367 << "ArmnnDriverImpl::prepareModelFromCache(): Device unavailable";
368 }
369
370 if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN)
371 {
372 return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
373 << "ArmnnDriverImpl::prepareModelFromCache(): Token size does not match!";
374 }
375
376 // Validate dataCacheHandle
377 auto dataSize = CacheDataHandlerInstance().GetCacheSize(token);
378 if (!ValidateDataCacheHandle(dataCacheHandle, dataSize))
379 {
380 return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
381 << "ArmnnDriverImpl::prepareModelFromCache(): Not valid data cache handle!";
382 }
383
384 // Check if model files cached they match the expected value
385 unsigned int numberOfCachedModelFiles = 0;
386 for (auto& backend : options.GetBackends())
387 {
388 numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
389 }
390 if (modelCacheHandle.size() != numberOfCachedModelFiles)
391 {
392 return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
393 << "ArmnnDriverImpl::prepareModelFromCache(): Model cache handle size does not match.";
394 }
395
396 // Read the model
397 std::vector<uint8_t> dataCacheData(dataSize);
398 pread(*dataCacheHandle[0], dataCacheData.data(), dataCacheData.size(), 0);
399 auto hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
400
401 int gpuAccCachedFd = -1;
402 if (modelCacheHandle.size() > 0)
403 {
404 unsigned int index = 0;
405 for (auto& backend : options.GetBackends())
406 {
407 // modelCacheHandle size should be equal to numberOfCachedModelFiles
408 // modelCacheHandle vector should be in same order as backends
409 auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
410 if (numberOfCacheFiles > 0)
411 {
412 if (!ValidateSharedHandle(modelCacheHandle[index]))
413 {
414 return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
415 << "ArmnnDriverImpl::prepareModelFromCache(): Invalid model cache handle!";
416 }
417 int cachedFd = *modelCacheHandle[index];
418 struct stat statBuffer;
419 if (fstat(cachedFd, &statBuffer) == 0)
420 {
421 long modelDataSize = statBuffer.st_size;
422 if (modelDataSize > 0)
423 {
424 std::vector<uint8_t> modelData(modelDataSize);
425 pread(cachedFd, modelData.data(), modelData.size(), 0);
426 hashValue ^= CacheDataHandlerInstance().Hash(modelData);
427
428 if (backend == armnn::Compute::GpuAcc)
429 {
430 gpuAccCachedFd = cachedFd;
431 }
432 }
433 }
434 index += numberOfCacheFiles;
435 }
436 }
437 }
438
439 if (!CacheDataHandlerInstance().Validate(token, hashValue, dataCacheData.size()))
440 {
441 return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
442 << "ArmnnDriverImpl::prepareModelFromCache(): ValidateHash() failed!";
443 }
444
445 // Deserialize the network..
446 armnn::INetworkPtr network = armnn::INetworkPtr(nullptr, [](armnn::INetwork*){});
447 try
448 {
449 network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData);
450 }
451 catch (std::exception&)
452 {
453 return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
454 << "ArmnnDriverImpl::prepareModelFromCache(): Exception caught from Deserializer!";
455 }
456
457 // Optimize the network
458 armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
459 armnn::OptimizerOptions OptOptions;
460 OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
461 OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled();
462
463 armnn::BackendOptions gpuAcc("GpuAcc",
464 {
465 { "FastMathEnabled", options.IsFastMathEnabled() },
466 { "SaveCachedNetwork", false },
467 { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
468 { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
469 { "CachedFileDescriptor", gpuAccCachedFd }
470 });
471
472 armnn::BackendOptions cpuAcc("CpuAcc",
473 {
474 { "FastMathEnabled", options.IsFastMathEnabled() },
475 { "NumberOfThreads", options.GetNumberOfThreads() }
476 });
477 OptOptions.m_ModelOptions.push_back(gpuAcc);
478 OptOptions.m_ModelOptions.push_back(cpuAcc);
479
480 std::vector<std::string> errMessages;
481 try
482 {
483 optNet = armnn::Optimize(*network.get(),
484 options.GetBackends(),
485 runtime->GetDeviceSpec(),
486 OptOptions,
487 errMessages);
488 }
489 catch (std::exception& e)
490 {
491 return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << e.what();
492 }
493
494 // Check that the optimized network is valid.
495 if (!optNet)
496 {
497 std::stringstream message;
498 message << "Invalid optimized network";
499 for (const std::string& msg : errMessages)
500 {
501 message << "\n" << msg;
502 }
503 return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
504 }
505
506 // Export the optimized network graph to a dot file if an output dump directory
507 // has been specified in the drivers' arguments.
508 std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
509 options.GetRequestInputsAndOutputsDumpDir());
510
511 // Load it into the runtime.
512 armnn::NetworkId netId = 0;
513 std::string msg;
514 armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
515 MemorySource::Undefined,
516 MemorySource::Undefined,
517 options.IsGpuProfilingEnabled());
518 try
519 {
520 if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
521 {
522 return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be loaded";
523 }
524 }
525 catch (std::exception& e)
526 {
527 std::stringstream message;
528 message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
529 return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
530 }
531
Sadik Armagan8f397a12022-06-17 15:38:22 +0100532 return std::make_shared<const ArmnnPreparedModel>(netId,
533 runtime.get(),
534 options.GetRequestInputsAndOutputsDumpDir(),
535 options.IsGpuProfilingEnabled(),
536 Priority::MEDIUM,
537 true);
538}
539
540const Capabilities& ArmnnDriverImpl::GetCapabilities(const armnn::IRuntimePtr& runtime)
541{
542 VLOG(DRIVER) << "ArmnnDriverImpl::GetCapabilities()";
543 static const Capabilities theCapabilities = GenerateCapabilities();
544 return theCapabilities;
545}
546
Sadik Armagan8f397a12022-06-17 15:38:22 +0100547} // namespace armnn_driver