| // |
| // Copyright © 2017, 2023 Arm Ltd. All rights reserved. |
| // SPDX-License-Identifier: MIT |
| // |
| |
| #include "ArmnnDriverImpl.hpp" |
| #include "../ArmnnPreparedModel_1_2.hpp" |
| #include "../ModelToINetworkConverter.hpp" |
| #include "../SystemPropertiesUtils.hpp" |
| |
| #include <armnnDeserializer/IDeserializer.hpp> |
| |
| #include <log/log.h> |
| #include <sys/stat.h> |
| #include <chrono> |
| |
| namespace |
| { |
| |
| const char *g_RelaxedFloat32toFloat16PerformanceExecTime = "ArmNN.relaxedFloat32toFloat16Performance.execTime"; |
| const char *g_RelaxedFloat32toFloat16PerformancePowerUsage = "ArmNN.relaxedFloat32toFloat16Performance.powerUsage"; |
| |
| const char *g_OperandTypeTensorFloat32PerformanceExecTime = "Armnn.operandTypeTensorFloat32Performance.execTime"; |
| const char *g_OperandTypeTensorFloat32PerformancePowerUsage = "Armnn.operandTypeTensorFloat32Performance.powerUsage"; |
| |
| const char *g_OperandTypeFloat32PerformanceExecTime = "Armnn.operandTypeFloat32Performance.execTime"; |
| const char *g_OperandTypeFloat32PerformancePowerUsage = "Armnn.operandTypeFloat32Performance.powerUsage"; |
| |
| const char *g_OperandTypeTensorFloat16PerformanceExecTime = "Armnn.operandTypeTensorFloat16Performance.execTime"; |
| const char *g_OperandTypeTensorFloat16PerformancePowerUsage = "Armnn.operandTypeTensorFloat16Performance.powerUsage"; |
| |
| const char *g_OperandTypeFloat16PerformanceExecTime = "Armnn.operandTypeFloat16Performance.execTime"; |
| const char *g_OperandTypeFloat16PerformancePowerUsage = "Armnn.operandTypeFloat16Performance.powerUsage"; |
| |
| const char *g_OperandTypeTensorQuant8AsymmPerformanceExecTime = |
| "Armnn.operandTypeTensorQuant8AsymmPerformance.execTime"; |
| const char *g_OperandTypeTensorQuant8AsymmPerformancePowerUsage = |
| "Armnn.operandTypeTensorQuant8AsymmPerformance.powerUsage"; |
| |
| const char *g_OperandTypeTensorQuant16SymmPerformanceExecTime = |
| "Armnn.operandTypeTensorQuant16SymmPerformance.execTime"; |
| const char *g_OperandTypeTensorQuant16SymmPerformancePowerUsage = |
| "Armnn.operandTypeTensorQuant16SymmPerformance.powerUsage"; |
| |
| const char *g_OperandTypeTensorQuant8SymmPerformanceExecTime = |
| "Armnn.operandTypeTensorQuant8SymmPerformance.execTime"; |
| const char *g_OperandTypeTensorQuant8SymmPerformancePowerUsage = |
| "Armnn.operandTypeTensorQuant8SymmPerformance.powerUsage"; |
| |
| const char *g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime = |
| "Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.execTime"; |
| const char *g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage = |
| "Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.powerUsage"; |
| |
| |
| const char *g_OperandTypeTensorInt32PerformanceExecTime = "Armnn.operandTypeTensorInt32Performance.execTime"; |
| const char *g_OperandTypeTensorInt32PerformancePowerUsage = "Armnn.operandTypeTensorInt32Performance.powerUsage"; |
| |
| const char *g_OperandTypeInt32PerformanceExecTime = "Armnn.operandTypeInt32Performance.execTime"; |
| const char *g_OperandTypeInt32PerformancePowerUsage = "Armnn.operandTypeInt32Performance.powerUsage"; |
| |
| |
| void NotifyCallbackAndCheck(const android::sp<V1_2::IPreparedModelCallback>& callback, |
| V1_0::ErrorStatus errorStatus, |
| const android::sp<V1_2::IPreparedModel>& preparedModelPtr) |
| { |
| Return<void> returned = callback->notify_1_2(errorStatus, preparedModelPtr); |
| // This check is required, if the callback fails and it isn't checked it will bring down the service |
| if (!returned.isOk()) |
| { |
| ALOGE("ArmnnDriverImpl::prepareModel: hidl callback failed to return properly: %s ", |
| returned.description().c_str()); |
| } |
| } |
| |
| Return<V1_0::ErrorStatus> FailPrepareModel(V1_0::ErrorStatus error, |
| const std::string& message, |
| const android::sp<V1_2::IPreparedModelCallback>& callback) |
| { |
| ALOGW("ArmnnDriverImpl::prepareModel: %s", message.c_str()); |
| NotifyCallbackAndCheck(callback, error, nullptr); |
| return error; |
| } |
| |
| } // anonymous namespace |
| |
| namespace armnn_driver |
| { |
| namespace hal_1_2 |
| { |
| |
| Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_2( |
| const armnn::IRuntimePtr& runtime, |
| const armnn::IGpuAccTunedParametersPtr& clTunedParameters, |
| const DriverOptions& options, |
| const V1_2::Model& model, |
| const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle, |
| const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle, |
| const HidlToken& token, |
| const android::sp<V1_2::IPreparedModelCallback>& cb, |
| bool float32ToFloat16) |
| { |
| ALOGV("ArmnnDriverImpl::prepareArmnnModel_1_2()"); |
| |
| std::chrono::time_point<std::chrono::system_clock> prepareModelTimepoint = std::chrono::system_clock::now(); |
| |
| if (cb.get() == nullptr) |
| { |
| ALOGW("ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel"); |
| return V1_0::ErrorStatus::INVALID_ARGUMENT; |
| } |
| |
| if (!runtime) |
| { |
| return FailPrepareModel(V1_0::ErrorStatus::DEVICE_UNAVAILABLE, "Device unavailable", cb); |
| } |
| |
| if (!android::nn::validateModel(model)) |
| { |
| return FailPrepareModel(V1_0::ErrorStatus::INVALID_ARGUMENT, "Invalid model passed as input", cb); |
| } |
| |
| // Deliberately ignore any unsupported operations requested by the options - |
| // at this point we're being asked to prepare a model that we've already declared support for |
| // and the operation indices may be different to those in getSupportedOperations anyway. |
| std::set<unsigned int> unsupportedOperations; |
| ModelToINetworkConverter<HalPolicy> modelConverter(options.GetBackends(), |
| model, |
| unsupportedOperations); |
| |
| if (modelConverter.GetConversionResult() != ConversionResult::Success) |
| { |
| FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "ModelToINetworkConverter failed", cb); |
| return V1_0::ErrorStatus::NONE; |
| } |
| |
| // Serialize the network graph to a .armnn file if an output directory |
| // has been specified in the drivers' arguments. |
| std::vector<uint8_t> dataCacheData; |
| bool serializeToFile = dataCacheHandle.size() < 1 ? false : true; |
| auto serializedNetworkFileName = |
| SerializeNetwork(*modelConverter.GetINetwork(), |
| options.GetRequestInputsAndOutputsDumpDir(), |
| dataCacheData, |
| serializeToFile); |
| |
| // Optimize the network |
| armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr); |
| armnn::OptimizerOptionsOpaque OptOptions; |
| OptOptions.SetReduceFp32ToFp16(float32ToFloat16); |
| OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled()); |
| |
| int cachedFd = -1; |
| bool saveCachedNetwork = options.SaveCachedNetwork(); |
| |
| unsigned int numberOfCachedModelFiles = 0; |
| if (modelCacheHandle.size() > 0) |
| { |
| unsigned int index = 0; |
| for (auto& backend : options.GetBackends()) |
| { |
| // modelCacheHandle size should be equal to numberOfCachedModelFiles |
| // modelCacheHandle vector should be in same order as backends |
| auto numberOfCacheFiles = GetNumberOfCacheFiles(backend); |
| if (numberOfCacheFiles > 0) |
| { |
| numberOfCachedModelFiles += numberOfCacheFiles; |
| if (modelCacheHandle[index]->numFds == 1) |
| { |
| if (backend == armnn::Compute::GpuAcc) |
| { |
| cachedFd = modelCacheHandle[index]->data[0]; |
| saveCachedNetwork = true; |
| } |
| } |
| index += numberOfCachedModelFiles; |
| } |
| } |
| } |
| |
| armnn::BackendOptions gpuAcc("GpuAcc", |
| { |
| { "FastMathEnabled", options.IsFastMathEnabled() }, |
| { "SaveCachedNetwork", saveCachedNetwork }, |
| { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() }, |
| { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() }, |
| { "CachedFileDescriptor", cachedFd } |
| }); |
| |
| armnn::BackendOptions cpuAcc("CpuAcc", |
| { |
| { "FastMathEnabled", options.IsFastMathEnabled() }, |
| { "NumberOfThreads", options.GetNumberOfThreads() } |
| }); |
| OptOptions.AddModelOption(gpuAcc); |
| OptOptions.AddModelOption(cpuAcc); |
| |
| std::vector<std::string> errMessages; |
| try |
| { |
| optNet = armnn::Optimize(*modelConverter.GetINetwork(), |
| options.GetBackends(), |
| runtime->GetDeviceSpec(), |
| OptOptions, |
| errMessages); |
| } |
| catch (std::exception &e) |
| { |
| std::stringstream message; |
| message << "Exception (" << e.what() << ") caught from optimize."; |
| FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb); |
| return V1_0::ErrorStatus::NONE; |
| } |
| |
| // Check that the optimized network is valid. |
| if (!optNet) |
| { |
| std::stringstream message; |
| message << "Invalid optimized network"; |
| for (const std::string& msg : errMessages) |
| { |
| message << "\n" << msg; |
| } |
| FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb); |
| return V1_0::ErrorStatus::NONE; |
| } |
| |
| // Export the optimized network graph to a dot file if an output dump directory |
| // has been specified in the drivers' arguments. |
| std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet, |
| options.GetRequestInputsAndOutputsDumpDir()); |
| |
| // Load it into the runtime. |
| armnn::NetworkId netId = 0; |
| std::string msg; |
| armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(), |
| MemorySource::Undefined, |
| MemorySource::Undefined, |
| options.IsGpuProfilingEnabled()); |
| |
| auto numInputs = getMainModel(model).inputIndexes.size(); |
| auto numOutputs = getMainModel(model).outputIndexes.size(); |
| try |
| { |
| if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success) |
| { |
| return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, msg, cb); |
| } |
| } |
| catch (std::exception& e) |
| { |
| std::stringstream message; |
| message << "Exception (" << e.what()<< ") caught from LoadNetwork."; |
| FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb); |
| return V1_0::ErrorStatus::NONE; |
| } |
| |
| // Now that we have a networkId for the graph rename the exported files to use it |
| // so that we can associate the graph file and the input/output tensor exported files |
| RenameExportedFiles(serializedNetworkFileName, |
| dotGraphFileName, |
| options.GetRequestInputsAndOutputsDumpDir(), |
| netId); |
| |
| std::unique_ptr<ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>> preparedModel( |
| new ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>( |
| netId, |
| runtime.get(), |
| model, |
| options.GetRequestInputsAndOutputsDumpDir(), |
| options.IsGpuProfilingEnabled(), |
| options.isAsyncModelExecutionEnabled(), |
| options.getNoOfArmnnThreads(), |
| options.isImportEnabled(), |
| options.isExportEnabled())); |
| |
| // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if |
| // this is enabled) before the first 'real' inference which removes the overhead of the first inference. |
| // Only run this if the GpuAcc backend has been added to options |
| if (std::find(options.GetBackends().begin(), |
| options.GetBackends().end(), |
| armnn::Compute::GpuAcc) != options.GetBackends().end()) |
| { |
| if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs)) |
| { |
| return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb); |
| } |
| |
| if (clTunedParameters && |
| options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters) |
| { |
| // Now that we've done one inference the CL kernel parameters will have been tuned, |
| // so save the updated file. |
| try |
| { |
| clTunedParameters->Save(options.GetClTunedParametersFile().c_str()); |
| } |
| catch (std::exception& error) |
| { |
| ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s", |
| options.GetClTunedParametersFile().c_str(), error.what()); |
| } |
| } |
| } |
| |
| size_t hashValue = 0; |
| // Cache the model |
| if (dataCacheHandle.size() > 0) |
| { |
| // Cache the Arm NN model, should be only 1 |
| if (dataCacheHandle.size() != 1) |
| { |
| NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release()); |
| return V1_0::ErrorStatus::NONE; |
| } |
| |
| if (dataCacheHandle[0]->numFds != 1) |
| { |
| ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, numFds != 1."); |
| NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release()); |
| return V1_0::ErrorStatus::NONE; |
| } |
| |
| if (dataCacheHandle[0]->data[0] < 0) |
| { |
| ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, fd < 0"); |
| NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release()); |
| return V1_0::ErrorStatus::NONE; |
| } |
| |
| int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE; |
| if (dataCacheFileAccessMode != O_RDWR) |
| { |
| ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_2(): Invalid Access Mode."); |
| NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release()); |
| return V1_0::ErrorStatus::NONE; |
| } |
| |
| write(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size()); |
| hashValue = CacheDataHandlerInstance().Hash(dataCacheData); |
| } |
| |
| if (modelCacheHandle.size() > 0) |
| { |
| if (modelCacheHandle.size() != numberOfCachedModelFiles) |
| { |
| NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release()); |
| return V1_0::ErrorStatus::NONE; |
| } |
| for (uint32_t i = 0; i < modelCacheHandle.size(); ++i) |
| { |
| if (modelCacheHandle[i]->numFds == 1) |
| { |
| int modelCacheFileAccessMode = fcntl(modelCacheHandle[i]->data[0], F_GETFL) & O_ACCMODE; |
| if (modelCacheFileAccessMode != O_RDONLY) |
| { |
| struct stat statBuffer; |
| if (fstat(modelCacheHandle[i]->data[0], &statBuffer) == 0) |
| { |
| long modelDataSize = statBuffer.st_size; |
| if (modelDataSize > 0) |
| { |
| std::vector <uint8_t> modelData(modelDataSize); |
| pread(modelCacheHandle[i]->data[0], modelData.data(), modelData.size(), 0); |
| hashValue ^= CacheDataHandlerInstance().Hash(modelData); |
| } |
| } |
| } |
| } |
| } |
| } |
| if (hashValue != 0) |
| { |
| CacheDataHandlerInstance().Register(token, hashValue, dataCacheData.size()); |
| } |
| |
| NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release()); |
| |
| ALOGV("ArmnnDriverImpl::prepareModel cache timing = %lld µs", std::chrono::duration_cast<std::chrono::microseconds> |
| (std::chrono::system_clock::now() - prepareModelTimepoint).count()); |
| |
| return V1_0::ErrorStatus::NONE; |
| } |
| |
| Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareModelFromCache( |
| const armnn::IRuntimePtr& runtime, |
| const DriverOptions& options, |
| const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle, |
| const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle, |
| const HidlToken& token, |
| const android::sp<V1_2::IPreparedModelCallback>& cb, |
| bool float32ToFloat16) |
| { |
| ALOGV("ArmnnDriverImpl::prepareModelFromCache()"); |
| std::chrono::time_point<std::chrono::system_clock> modelFromCacheTimepoint = std::chrono::system_clock::now(); |
| |
| if (cb.get() == nullptr) |
| { |
| ALOGW("ArmnnDriverImpl::prepareModelFromCache: Invalid callback passed to prepareModel"); |
| return V1_0::ErrorStatus::INVALID_ARGUMENT; |
| } |
| |
| if (!runtime) |
| { |
| return FailPrepareModel(V1_0::ErrorStatus::DEVICE_UNAVAILABLE, "Device unavailable", cb); |
| } |
| |
| if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN) |
| { |
| FailPrepareModel(V1_0::ErrorStatus::INVALID_ARGUMENT, "Invalid token passed!", cb); |
| return V1_0::ErrorStatus::INVALID_ARGUMENT; |
| } |
| |
| // DataCacheHandle size should always be 1 |
| // Arm NN model |
| if (dataCacheHandle.size() != 1) |
| { |
| FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "No data cache!", cb); |
| return V1_0::ErrorStatus::GENERAL_FAILURE; |
| } |
| |
| // Check if model files cached they match the expected value |
| unsigned int numberOfCachedModelFiles = 0; |
| for (auto& backend : options.GetBackends()) |
| { |
| numberOfCachedModelFiles += GetNumberOfCacheFiles(backend); |
| } |
| if (modelCacheHandle.size() != numberOfCachedModelFiles) |
| { |
| FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid model cache!", cb); |
| return V1_0::ErrorStatus::GENERAL_FAILURE; |
| } |
| |
| if (dataCacheHandle[0]->numFds != 1) |
| { |
| ALOGW("ArmnnDriverImpl::prepareModelFromCache: Cannot read from the cache data, numFds != 1."); |
| FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "No data cache!", cb); |
| return V1_0::ErrorStatus::GENERAL_FAILURE; |
| } |
| |
| if (dataCacheHandle[0]->data[0] < 0) |
| { |
| ALOGW("ArmnnDriverImpl::prepareModelFromCache: Cannot read from the cache data, fd < 0"); |
| FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "No data cache!", cb); |
| return V1_0::ErrorStatus::GENERAL_FAILURE; |
| } |
| |
| int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE; |
| if (dataCacheFileAccessMode != O_RDWR) |
| { |
| FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid Access Mode!", cb); |
| return V1_0::ErrorStatus::GENERAL_FAILURE; |
| } |
| |
| auto dataSize = CacheDataHandlerInstance().GetCacheSize(token); |
| if (dataSize == 0) |
| { |
| ALOGW("ArmnnDriverImpl::prepareModelFromCache: Invalid data to deserialize!"); |
| FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid data to deserialize!", cb); |
| return V1_0::ErrorStatus::GENERAL_FAILURE; |
| } |
| |
| int offset = 0; |
| { |
| struct stat statBuffer; |
| if (fstat(dataCacheHandle[0]->data[0], &statBuffer) == 0) |
| { |
| unsigned long bufferSize = statBuffer.st_size; |
| if (bufferSize != dataSize) |
| { |
| ALOGW("ArmnnDriverImpl::prepareModelFromCache: Invalid data to deserialize!"); |
| FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid data to deserialize!", cb); |
| return V1_0::ErrorStatus::GENERAL_FAILURE; |
| } |
| } |
| } |
| std::vector<uint8_t> dataCacheData(dataSize); |
| pread(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size(), offset); |
| auto hashValue = CacheDataHandlerInstance().Hash(dataCacheData); |
| |
| int gpuAccCachedFd = -1; |
| bool saveCachedNetwork = false; |
| if (modelCacheHandle.size() > 0) |
| { |
| unsigned int index = 0; |
| for (auto& backend : options.GetBackends()) |
| { |
| // modelCacheHandle size should be equal to numberOfCachedModelFiles |
| // modelCacheHandle vector should be in same order as backends |
| auto numberOfCacheFiles = GetNumberOfCacheFiles(backend); |
| if (numberOfCacheFiles > 0) |
| { |
| if (modelCacheHandle[index]->numFds != 1) |
| { |
| ALOGW("ArmnnDriverImpl::prepareModelFromCache: Cannot read from the model cache, numFds != 1."); |
| FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, |
| "Cannot read from the model cache, numFds != 1.", cb); |
| return V1_0::ErrorStatus::GENERAL_FAILURE; |
| } |
| auto cachedFd = modelCacheHandle[index]->data[0]; |
| |
| int modelCacheFileAccessMode = fcntl(cachedFd, F_GETFL) & O_ACCMODE; |
| if (modelCacheFileAccessMode != O_RDWR) |
| { |
| FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid Access Mode!", cb); |
| return V1_0::ErrorStatus::GENERAL_FAILURE; |
| } |
| |
| struct stat statBuffer; |
| if (cachedFd != -1 && fstat(cachedFd, &statBuffer) == 0) |
| { |
| long modelDataSize = statBuffer.st_size; |
| if (modelDataSize <= 0) |
| { |
| FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Wrong cached model size!", cb); |
| return V1_0::ErrorStatus::NONE; |
| } |
| std::vector<uint8_t> modelData(modelDataSize); |
| pread(cachedFd, modelData.data(), modelData.size(), 0); |
| hashValue ^= CacheDataHandlerInstance().Hash(modelData); |
| |
| // For GpuAcc numberOfCachedFiles is 1 |
| if (backend == armnn::Compute::GpuAcc) |
| { |
| gpuAccCachedFd = cachedFd; |
| } |
| } |
| index += numberOfCacheFiles; |
| } |
| } |
| } |
| |
| if (!CacheDataHandlerInstance().Validate(token, hashValue, dataCacheData.size())) |
| { |
| ALOGW("ArmnnDriverImpl::prepareModelFromCache: ValidateHash() failed!"); |
| FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "ValidateHash Failed!", cb); |
| return V1_0::ErrorStatus::GENERAL_FAILURE; |
| } |
| |
| // Deserialize the network.. |
| armnn::INetworkPtr network = armnn::INetworkPtr(nullptr, [](armnn::INetwork*){}); |
| try |
| { |
| network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData); |
| } |
| catch (std::exception& e) |
| { |
| std::stringstream message; |
| message << "Exception (" << e.what() << ") caught from Deserializer."; |
| FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb); |
| return V1_0::ErrorStatus::GENERAL_FAILURE; |
| } |
| |
| // Optimize the network |
| armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr); |
| armnn::OptimizerOptionsOpaque OptOptions; |
| OptOptions.SetReduceFp32ToFp16(float32ToFloat16); |
| OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled()); |
| |
| armnn::BackendOptions gpuAcc("GpuAcc", |
| { |
| {"FastMathEnabled", options.IsFastMathEnabled()}, |
| {"SaveCachedNetwork", saveCachedNetwork}, |
| {"CachedNetworkFilePath", options.GetCachedNetworkFilePath()}, |
| {"MLGOTuningFilePath", options.GetClMLGOTunedParametersFile()}, |
| {"CachedFileDescriptor", gpuAccCachedFd} |
| }); |
| |
| armnn::BackendOptions cpuAcc("CpuAcc", |
| { |
| {"FastMathEnabled", options.IsFastMathEnabled()}, |
| {"NumberOfThreads", options.GetNumberOfThreads()} |
| }); |
| OptOptions.AddModelOption(gpuAcc); |
| OptOptions.AddModelOption(cpuAcc); |
| |
| std::vector<std::string> errMessages; |
| try |
| { |
| optNet = armnn::Optimize(*network.get(), |
| options.GetBackends(), |
| runtime->GetDeviceSpec(), |
| OptOptions, |
| errMessages); |
| } |
| catch (std::exception& e) |
| { |
| std::stringstream message; |
| message << "Exception (" << e.what() << ") caught from optimize."; |
| FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb); |
| return V1_0::ErrorStatus::NONE; |
| } |
| |
| // Check that the optimized network is valid. |
| if (!optNet) |
| { |
| std::stringstream message; |
| message << "Invalid optimized network"; |
| for (const std::string& msg : errMessages) |
| { |
| message << "\n" << msg; |
| } |
| FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb); |
| return V1_0::ErrorStatus::NONE; |
| } |
| |
| // Export the optimized network graph to a dot file if an output dump directory |
| // has been specified in the drivers' arguments. |
| std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet, |
| options.GetRequestInputsAndOutputsDumpDir()); |
| |
| // Load it into the runtime. |
| armnn::NetworkId netId = 0; |
| std::string msg; |
| armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(), |
| MemorySource::Undefined, |
| MemorySource::Undefined, |
| options.IsGpuProfilingEnabled()); |
| |
| try |
| { |
| if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success) |
| { |
| return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, msg, cb); |
| } |
| } |
| catch (std::exception& e) |
| { |
| std::stringstream message; |
| message << "Exception (" << e.what() << ") caught from LoadNetwork."; |
| FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb); |
| return V1_0::ErrorStatus::NONE; |
| } |
| |
| std::unique_ptr<ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>> preparedModel( |
| new ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>( |
| netId, |
| runtime.get(), |
| options.GetRequestInputsAndOutputsDumpDir(), |
| options.IsGpuProfilingEnabled(), |
| options.isAsyncModelExecutionEnabled(), |
| options.getNoOfArmnnThreads(), |
| options.isImportEnabled(), |
| options.isExportEnabled(), |
| true)); |
| |
| NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release()); |
| |
| ALOGV("ArmnnDriverImpl::prepareModelFromCache cache timing = %lld µs", |
| std::chrono::duration_cast<std::chrono::microseconds> |
| (std::chrono::system_clock::now() - modelFromCacheTimepoint).count()); |
| |
| return V1_0::ErrorStatus::NONE; |
| } |
| |
| Return<void> ArmnnDriverImpl::getCapabilities_1_2(const armnn::IRuntimePtr& runtime, |
| V1_2::IDevice::getCapabilities_1_2_cb cb) |
| { |
| ALOGV("hal_1_2::ArmnnDriverImpl::getCapabilities()"); |
| |
| V1_2::Capabilities capabilities; |
| |
| float defaultValue = .1f; |
| |
| if (runtime) |
| { |
| capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime = |
| ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue); |
| |
| capabilities.relaxedFloat32toFloat16PerformanceScalar.powerUsage = |
| ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue); |
| |
| capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime = |
| ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue); |
| |
| capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage = |
| ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue); |
| |
| // Set the base value for all operand types |
| #if defined(ARMNN_ANDROID_R) || defined(ARMNN_ANDROID_S) |
| capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_2>({FLT_MAX, FLT_MAX}); |
| #else |
| capabilities.operandPerformance = nonExtensionOperandPerformance({FLT_MAX, FLT_MAX}); |
| #endif |
| |
| // Load supported operand types |
| update(&capabilities.operandPerformance, V1_2::OperandType::TENSOR_FLOAT32, |
| { |
| .execTime = ParseSystemProperty(g_OperandTypeTensorFloat32PerformanceExecTime, defaultValue), |
| .powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat32PerformancePowerUsage, defaultValue) |
| }); |
| |
| update(&capabilities.operandPerformance, V1_2::OperandType::FLOAT32, |
| { |
| .execTime = ParseSystemProperty(g_OperandTypeFloat32PerformanceExecTime, defaultValue), |
| .powerUsage = ParseSystemProperty(g_OperandTypeFloat32PerformancePowerUsage, defaultValue) |
| }); |
| |
| update(&capabilities.operandPerformance, V1_2::OperandType::TENSOR_FLOAT16, |
| { |
| .execTime = ParseSystemProperty(g_OperandTypeTensorFloat16PerformanceExecTime, defaultValue), |
| .powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat16PerformancePowerUsage, defaultValue) |
| }); |
| |
| update(&capabilities.operandPerformance, V1_2::OperandType::FLOAT16, |
| { |
| .execTime = ParseSystemProperty(g_OperandTypeFloat16PerformanceExecTime, defaultValue), |
| .powerUsage = ParseSystemProperty(g_OperandTypeFloat16PerformancePowerUsage, defaultValue) |
| }); |
| |
| update(&capabilities.operandPerformance, V1_2::OperandType::TENSOR_QUANT8_ASYMM, |
| { |
| .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformanceExecTime, defaultValue), |
| .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformancePowerUsage, defaultValue) |
| }); |
| |
| update(&capabilities.operandPerformance, V1_2::OperandType::TENSOR_QUANT8_SYMM, |
| { |
| .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformanceExecTime, defaultValue), |
| .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformancePowerUsage, defaultValue) |
| }); |
| |
| update(&capabilities.operandPerformance, V1_2::OperandType::TENSOR_QUANT16_SYMM, |
| { |
| .execTime = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformanceExecTime, defaultValue), |
| .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformancePowerUsage, defaultValue) |
| }); |
| |
| update(&capabilities.operandPerformance, V1_2::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL, |
| { |
| .execTime = |
| ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime, defaultValue), |
| .powerUsage = |
| ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage, defaultValue) |
| }); |
| |
| update(&capabilities.operandPerformance, V1_2::OperandType::TENSOR_INT32, |
| { |
| .execTime = ParseSystemProperty(g_OperandTypeTensorInt32PerformanceExecTime, defaultValue), |
| .powerUsage = ParseSystemProperty(g_OperandTypeTensorInt32PerformancePowerUsage, defaultValue) |
| }); |
| |
| update(&capabilities.operandPerformance, V1_2::OperandType::INT32, |
| { |
| .execTime = ParseSystemProperty(g_OperandTypeInt32PerformanceExecTime, defaultValue), |
| .powerUsage = ParseSystemProperty(g_OperandTypeInt32PerformancePowerUsage, defaultValue) |
| }); |
| |
| cb(V1_0::ErrorStatus::NONE, capabilities); |
| } |
| else |
| { |
| capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime = 0; |
| capabilities.relaxedFloat32toFloat16PerformanceScalar.powerUsage = 0; |
| capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime = 0; |
| capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage = 0; |
| |
| // Set the base value for all operand types |
| #if defined(ARMNN_ANDROID_R) || defined(ARMNN_ANDROID_S) |
| capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_2>({0.f, 0.0f}); |
| #else |
| capabilities.operandPerformance = nonExtensionOperandPerformance({0.f, 0.0f}); |
| #endif |
| |
| cb(V1_0::ErrorStatus::DEVICE_UNAVAILABLE, capabilities); |
| } |
| |
| return Void(); |
| } |
| |
| } // namespace hal_1_2 |
| } // namespace armnn_driver |