| // |
| // Copyright © 2017 Arm Ltd. All rights reserved. |
| // SPDX-License-Identifier: MIT |
| // |
| #pragma once |
| |
| #include "BackendOptions.hpp" |
| #include "INetwork.hpp" |
| #include "IProfiler.hpp" |
| #include "IWorkingMemHandle.hpp" |
| #include "IAsyncExecutionCallback.hpp" |
| #include "Tensor.hpp" |
| #include "Types.hpp" |
| #include "TypesUtils.hpp" |
| #include "profiling/ILocalPacketHandler.hpp" |
| |
| #include <armnn/backends/ICustomAllocator.hpp> |
| #include <armnn/backends/IMemoryOptimizerStrategy.hpp> |
| #include <memory> |
| #include <map> |
| |
| namespace armnn |
| { |
| |
| using NetworkId = int; |
| |
| class IGpuAccTunedParameters; |
| |
| struct RuntimeImpl; |
| class IRuntime; |
| using IRuntimePtr = std::unique_ptr<IRuntime, void(*)(IRuntime* runtime)>; |
| |
| struct INetworkProperties |
| { |
| INetworkProperties(bool asyncEnabled, |
| MemorySource inputSource, |
| MemorySource outputSource, |
| bool profilingEnabled = false, |
| ProfilingDetailsMethod detailsMethod = ProfilingDetailsMethod::Undefined, |
| bool externalMemoryManagementEnabled = false) |
| : m_ImportEnabled(inputSource != MemorySource::Undefined), |
| m_ExportEnabled(outputSource != MemorySource::Undefined), |
| m_AsyncEnabled(asyncEnabled), |
| m_ProfilingEnabled(profilingEnabled), |
| m_OutputNetworkDetailsMethod(detailsMethod), |
| m_InputSource(inputSource), |
| m_OutputSource(outputSource), |
| m_ExternalMemoryManagementEnabled(externalMemoryManagementEnabled) |
| {} |
| |
| /// Deprecated and will be removed in future release. |
| const bool m_ImportEnabled; |
| /// Deprecated and will be removed in future release. |
| const bool m_ExportEnabled; |
| |
| const bool m_AsyncEnabled; |
| |
| const bool m_ProfilingEnabled; |
| |
| const ProfilingDetailsMethod m_OutputNetworkDetailsMethod; |
| |
| const MemorySource m_InputSource; |
| const MemorySource m_OutputSource; |
| |
| const bool m_ExternalMemoryManagementEnabled; |
| |
| virtual ~INetworkProperties() {} |
| }; |
| |
| using namespace armnn::experimental; |
| |
| class IRuntime |
| { |
| public: |
| struct CreationOptions |
| { |
| CreationOptions() |
| : m_GpuAccTunedParameters(nullptr) |
| , m_EnableGpuProfiling(false) |
| , m_DynamicBackendsPath("") |
| , m_ProtectedMode(false) |
| , m_CustomAllocatorMap() |
| , m_MemoryOptimizerStrategyMap() |
| {} |
| |
| /// If set, uses the GpuAcc tuned parameters from the given object when executing GPU workloads. |
| /// It will also be updated with new tuned parameters if it is configured to do so. |
| std::shared_ptr<IGpuAccTunedParameters> m_GpuAccTunedParameters; |
| |
| /// Setting this flag will allow the user to obtain GPU profiling information from the runtime. |
| bool m_EnableGpuProfiling; |
| |
| /// Setting this value will override the paths set by the DYNAMIC_BACKEND_PATHS compiler directive |
| /// Only a single path is allowed for the override |
| /// It defines the path to search for any [dynamic backend libraries](src/dynamic/README.md). |
| std::string m_DynamicBackendsPath; |
| |
| /// Setting this flag will allow the user to create the Runtime in protected mode. |
| /// It will run all the inferences on protected memory and will make sure that |
| /// INetworkProperties::m_ImportEnabled set to true with MemorySource::DmaBufProtected option |
| /// This requires that the backend supports Protected Memory and has an allocator capable of |
| /// allocating Protected Memory associated with it. |
| bool m_ProtectedMode; |
| |
| /// @brief A map to define a custom memory allocator for specific backend Ids. |
| /// |
| /// @details A Custom Allocator is used for allocation of working memory in the backends. |
| /// Set this if you need to take control of how memory is allocated on a backend. Required for |
| /// Protected Mode in order to correctly allocate Protected Memory |
| /// |
| /// @note Only supported for GpuAcc |
| std::map<BackendId, std::shared_ptr<ICustomAllocator>> m_CustomAllocatorMap; |
| |
| /// @brief A map to define a custom memory optimizer strategy for specific backend Ids. |
| /// |
| /// @details A Memory Optimizer Strategy provides a solution to an abstract representation of |
| /// a network's memory requirements. This can also be used to return a pre-computed solution |
| /// for a specific network. Set this if you want to implement a Custom Memory Optimizer Strategy |
| /// for a given backend. |
| std::map<BackendId, std::shared_ptr<IMemoryOptimizerStrategy>> m_MemoryOptimizerStrategyMap; |
| |
| struct ExternalProfilingOptions |
| { |
| ExternalProfilingOptions() |
| : m_EnableProfiling(false) |
| , m_TimelineEnabled(false) |
| , m_OutgoingCaptureFile("") |
| , m_IncomingCaptureFile("") |
| , m_FileOnly(false) |
| , m_CapturePeriod(LOWEST_CAPTURE_PERIOD) |
| , m_FileFormat("binary") |
| , m_LocalPacketHandlers() |
| {} |
| |
| /// Indicates whether external profiling is enabled or not. |
| bool m_EnableProfiling; |
| /// Indicates whether external timeline profiling is enabled or not. |
| bool m_TimelineEnabled; |
| /// Path to a file in which outgoing timeline profiling messages will be stored. |
| std::string m_OutgoingCaptureFile; |
| /// Path to a file in which incoming timeline profiling messages will be stored. |
| std::string m_IncomingCaptureFile; |
| /// Enable profiling output to file only. |
| bool m_FileOnly; |
| /// The duration at which captured profiling messages will be flushed. |
| uint32_t m_CapturePeriod; |
| /// The format of the file used for outputting profiling data. |
| std::string m_FileFormat; |
| std::vector<arm::pipe::ILocalPacketHandlerSharedPtr> m_LocalPacketHandlers; |
| }; |
| ExternalProfilingOptions m_ProfilingOptions; |
| |
| /// Pass backend specific options. |
| /// |
| /// For example, to enable GpuAcc tuning add the following |
| /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~.cpp |
| /// m_BackendOption.emplace_back( |
| /// BackendOptions{"GpuAcc", |
| /// { |
| /// {"TuningLevel", 2}, |
| /// {"TuningFile", filename} |
| /// {"MemoryOptimizerStrategy", strategyname} |
| /// } |
| /// }); |
| /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
| /// Execute representative workloads through the runtime to generate tuning data. |
| /// The tuning file is written once the runtime is destroyed |
| |
| /// To execute with the tuning data, start up with just the tuning file specified. |
| /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~.cpp |
| /// m_BackendOption.emplace_back( |
| /// BackendOptions{"GpuAcc", |
| /// { |
| /// {"TuningFile", filename} |
| /// } |
| /// }); |
| /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
| |
| /// The following backend options are available: |
| /// AllBackends: |
| /// "MemoryOptimizerStrategy" : string [stategynameString] |
| /// (Existing Memory Optimizer Strategies: ConstantMemoryStrategy) |
| /// GpuAcc: |
| /// "TuningLevel" : int [0..3] (0=UseOnly(default) | 1=RapidTuning | 2=NormalTuning | 3=ExhaustiveTuning) |
| /// "TuningFile" : string [filenameString] |
| /// "KernelProfilingEnabled" : bool [true | false] |
| std::vector<BackendOptions> m_BackendOptions; |
| }; |
| |
| static IRuntime* CreateRaw(const CreationOptions& options); |
| static IRuntimePtr Create(const CreationOptions& options); |
| static void Destroy(IRuntime* runtime); |
| |
| /// Loads a complete network into the IRuntime. |
| /// @param [out] networkIdOut - Unique identifier for the network is returned in this reference. |
| /// @param [in] network - Complete network to load into the IRuntime. |
| /// The runtime takes ownership of the network once passed in. |
| /// @return armnn::Status |
| Status LoadNetwork(NetworkId& networkIdOut, IOptimizedNetworkPtr network); |
| |
| /// Load a complete network into the IRuntime. |
| /// @param [out] networkIdOut Unique identifier for the network is returned in this reference. |
| /// @param [in] network Complete network to load into the IRuntime. |
| /// @param [out] errorMessage Error message if there were any errors. |
| /// The runtime takes ownership of the network once passed in. |
| /// @return armnn::Status |
| Status LoadNetwork(NetworkId& networkIdOut, |
| IOptimizedNetworkPtr network, |
| std::string& errorMessage); |
| |
| Status LoadNetwork(NetworkId& networkIdOut, |
| IOptimizedNetworkPtr network, |
| std::string& errorMessage, |
| const INetworkProperties& networkProperties); |
| |
| TensorInfo GetInputTensorInfo(NetworkId networkId, LayerBindingId layerId) const; |
| TensorInfo GetOutputTensorInfo(NetworkId networkId, LayerBindingId layerId) const; |
| |
| /// ImportInputs separates the importing and mapping of InputTensors from network execution. |
| /// Allowing for a set of InputTensors to be imported and mapped once, but used in execution many times. |
| /// This function is not thread safe and must not be used while other threads are calling Execute(). |
| /// Only compatible with AsyncEnabled networks and aligned memory import |
| std::vector<ImportedInputId> ImportInputs(NetworkId networkId, const InputTensors& inputTensors, |
| MemorySource forceImportMemorySource = MemorySource::Undefined); |
| |
| /// ImportOutputs separates the importing and mapping of OutputTensors from network execution. |
| /// Allowing for a set of OutputTensors to be imported and mapped once, but used in execution many times. |
| /// This function is not thread safe and must not be used while other threads are calling Execute(). |
| /// Only compatible with AsyncEnabled networks and aligned memory import |
| std::vector<ImportedOutputId> ImportOutputs(NetworkId networkId, const OutputTensors& outputTensors, |
| MemorySource forceImportMemorySource = MemorySource::Undefined); |
| |
| /// Un-import and delete the imported InputTensor/s |
| /// This function is not thread safe and must not be used while other threads are calling Execute(). |
| /// Only compatible with AsyncEnabled networks |
| void ClearImportedInputs(NetworkId networkId, const std::vector<ImportedInputId> inputIds); |
| |
| /// Un-import and delete the imported OutputTensor/s |
| /// This function is not thread safe and must not be used while other threads are calling Execute(). |
| /// Only compatible with AsyncEnabled networks |
| void ClearImportedOutputs(NetworkId networkId, const std::vector<ImportedOutputId> outputIds); |
| |
| /// Evaluates a network using input in inputTensors and outputs filled into outputTensors |
| Status EnqueueWorkload(NetworkId networkId, |
| const InputTensors& inputTensors, |
| const OutputTensors& outputTensors, |
| std::vector<ImportedInputId> preImportedInputIds = {}, |
| std::vector<ImportedOutputId> preImportedOutputIds = {}); |
| |
| /// This is an experimental function. |
| /// Evaluates a network using input in inputTensors and outputs filled into outputTensors. |
| /// This function performs a thread safe execution of the network. Returns once execution is complete. |
| /// Will block until this and any other thread using the same workingMem object completes. |
| Status Execute(IWorkingMemHandle& workingMemHandle, |
| const InputTensors& inputTensors, |
| const OutputTensors& outputTensors, |
| std::vector<ImportedInputId> preImportedInputs = {}, |
| std::vector<ImportedOutputId> preImportedOutputs = {}); |
| |
| /// Unloads a network from the IRuntime. |
| /// At the moment this only removes the network from the m_Impl->m_Network. |
| /// This might need more work in the future to be AndroidNN compliant. |
| /// @param [in] networkId - Unique identifier for the network to be unloaded. Generated in LoadNetwork(). |
| /// @return armnn::Status |
| Status UnloadNetwork(NetworkId networkId); |
| |
| const IDeviceSpec& GetDeviceSpec() const; |
| |
| /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have |
| /// overlapped Execution by calling this function from different threads. |
| std::unique_ptr<IWorkingMemHandle> CreateWorkingMemHandle(NetworkId networkId); |
| |
| /// Gets the profiler corresponding to the given network id. |
| /// @param networkId The id of the network for which to get the profile. |
| /// @return A pointer to the requested profiler, or nullptr if not found. |
| const std::shared_ptr<IProfiler> GetProfiler(NetworkId networkId) const; |
| |
| /// Registers a callback function to debug layers performing custom computations on intermediate tensors. |
| /// @param networkId The id of the network to register the callback. |
| /// @param func callback function to pass to the debug layer. |
| void RegisterDebugCallback(NetworkId networkId, const DebugCallbackFunction& func); |
| |
| protected: |
| IRuntime(); |
| IRuntime(const IRuntime::CreationOptions& options); |
| ~IRuntime(); |
| |
| std::unique_ptr<RuntimeImpl> pRuntimeImpl; |
| }; |
| |
| |
| /// The following API is replaced by the backend options API. |
| using IGpuAccTunedParametersPtr = std::shared_ptr<IGpuAccTunedParameters>; |
| |
| /// Manages a set of GpuAcc parameters which have been tuned for maximum performance. |
| /// Passes an instance of this object to the IRuntime::Create() method (via IRuntime::CreationOptions) to use it |
| /// for all GPU workload execution. |
| /// |
| /// Can be created in two modes: |
| /// - In UseTunedParameters mode, the parameters stored in this object are used to execute GPU workloads. |
| /// - In UpdateTunedParameters mode, additionally, whenever a GPU workload is executed for the first time, the |
| /// optimum parameters will be found and stored in this object. WARNING - This tuning can be slow. |
| /// |
| /// The parameters can be loaded from and saved to a file so that you can first run a slow initial read-write |
| /// execution, save the parameters for later and then run fast read-only executions using the optimised parameters. |
| class IGpuAccTunedParameters |
| { |
| public: |
| enum class Mode |
| { |
| UseTunedParameters, |
| UpdateTunedParameters |
| }; |
| |
| enum class TuningLevel |
| { |
| Rapid = 1, |
| Normal = 2, |
| Exhaustive = 3 |
| }; |
| |
| /// Creates an IClTunedParameters with the given mode. |
| /// @{ |
| static IGpuAccTunedParameters* CreateRaw(Mode mode, TuningLevel tunerMode); |
| static IGpuAccTunedParametersPtr Create(Mode mode, TuningLevel tunerMode); |
| /// @} |
| static void Destroy(IGpuAccTunedParameters* params); |
| |
| /// Loads an existing set of tuned parameters from the given file. |
| /// If there is an error loading the file, an armnn::Exception is thrown. |
| virtual void Load(const char* filename) = 0; |
| |
| /// Saves the current set of tuned parameters to the given file. |
| /// If there is an error saving to the file, an armnn::Exception is thrown. |
| virtual void Save(const char* filename) const = 0; |
| |
| protected: |
| virtual ~IGpuAccTunedParameters() {}; |
| }; |
| |
| } // namespace armnn |