| // |
| // Copyright © 2021 Arm Ltd and Contributors. All rights reserved. |
| // SPDX-License-Identifier: MIT |
| // |
| |
| #include "AsyncNetwork.hpp" |
| #include "Graph.hpp" |
| #include "Layer.hpp" |
| #include "Profiling.hpp" |
| |
| #include <armnn/BackendRegistry.hpp> |
| #include <armnn/Logging.hpp> |
| #include <armnn/utility/Assert.hpp> |
| |
| #include <armnn/backends/IMemoryManager.hpp> |
| #include <backendsCommon/CpuTensorHandle.hpp> |
| #include <backendsCommon/WorkloadData.hpp> |
| #include <backendsCommon/MemCopyWorkload.hpp> |
| #include <LabelsAndEventClasses.hpp> |
| |
| #include <fmt/format.h> |
| |
| namespace armnn |
| { |
| |
| namespace experimental |
| { |
| |
| IAsyncNetwork::IAsyncNetwork(std::unique_ptr<IOptimizedNetwork> net, |
| const INetworkProperties& networkProperties, |
| profiling::ProfilingService& profilingService) |
| : pAsyncNetworkImpl( new AsyncNetworkImpl(std::move(net), networkProperties, profilingService)) {}; |
| |
| IAsyncNetwork::~IAsyncNetwork() = default; |
| |
| TensorInfo IAsyncNetwork::GetInputTensorInfo(LayerBindingId layerId) const |
| { |
| return pAsyncNetworkImpl->GetInputTensorInfo(layerId); |
| } |
| |
| TensorInfo IAsyncNetwork::GetOutputTensorInfo(LayerBindingId layerId) const |
| { |
| return pAsyncNetworkImpl->GetOutputTensorInfo(layerId); |
| } |
| |
| Status IAsyncNetwork::Execute(const InputTensors& inputTensors, |
| const OutputTensors& outputTensors, |
| IWorkingMemHandle& workingMemHandle) |
| { |
| return pAsyncNetworkImpl->Execute(inputTensors, outputTensors, workingMemHandle); |
| } |
| |
| std::unique_ptr<IWorkingMemHandle> IAsyncNetwork::CreateWorkingMemHandle() |
| { |
| return pAsyncNetworkImpl->CreateWorkingMemHandle(); |
| } |
| |
| std::shared_ptr<IProfiler> IAsyncNetwork::GetProfiler() const |
| { |
| return pAsyncNetworkImpl->GetProfiler(); |
| } |
| |
| void IAsyncNetwork::RegisterDebugCallback(const DebugCallbackFunction& func) |
| { |
| pAsyncNetworkImpl->RegisterDebugCallback(func); |
| } |
| |
| void AddLayerStructure(std::unique_ptr<profiling::TimelineUtilityMethods>& timelineUtils, |
| const Layer& layer, |
| profiling::ProfilingGuid networkGuid) |
| { |
| // Add layer to the post-optimisation network structure |
| std::string layerName = layer.GetNameStr().empty() ? "<Unnamed>" : layer.GetNameStr(); |
| timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(), |
| networkGuid, |
| layerName, |
| profiling::LabelsAndEventClasses::LAYER_GUID); |
| for (auto&& input : layer.GetInputSlots()) |
| { |
| const IOutputSlot* source = input.GetConnectedOutputSlot(); |
| ARMNN_ASSERT(source != NULL); |
| timelineUtils->CreateConnectionRelationship(profiling::ProfilingRelationshipType::RetentionLink, |
| source->GetOwningLayerGuid(), |
| layer.GetGuid()); |
| } |
| } |
| |
| void AddWorkloadStructure(std::unique_ptr<profiling::TimelineUtilityMethods>& timelineUtils, |
| std::unique_ptr<IWorkload>& workload, |
| const Layer& layer) |
| { |
| // Add workload to the post-optimisation network structure |
| timelineUtils->CreateTypedEntity(workload->GetGuid(), profiling::LabelsAndEventClasses::WORKLOAD_GUID); |
| timelineUtils->MarkEntityWithLabel(workload->GetGuid(), |
| layer.GetBackendId().Get(), |
| profiling::LabelsAndEventClasses::BACKENDID_GUID); |
| |
| // Link the workload to the layer |
| timelineUtils->CreateRelationship(profiling::ProfilingRelationshipType::RetentionLink, |
| layer.GetGuid(), |
| workload->GetGuid(), |
| profiling::LabelsAndEventClasses::CHILD_GUID); |
| } |
| |
| TensorInfo AsyncNetworkImpl::GetInputTensorInfo(LayerBindingId layerId) const |
| { |
| for (auto&& inputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetInputLayers()) |
| { |
| ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot"); |
| if (inputLayer->GetBindingId() == layerId) |
| { |
| return inputLayer->GetOutputSlot(0).GetTensorInfo(); |
| } |
| } |
| |
| throw InvalidArgumentException(fmt::format("No input layer is associated with id {0}}", layerId)); |
| } |
| |
| TensorInfo AsyncNetworkImpl::GetOutputTensorInfo(LayerBindingId layerId) const |
| { |
| for (auto&& outputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetOutputLayers()) |
| { |
| ARMNN_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1, "Output layer should have exactly 1 input slot"); |
| ARMNN_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(), "Input slot on Output layer must be connected"); |
| if (outputLayer->GetBindingId() == layerId) |
| { |
| return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo(); |
| } |
| } |
| |
| throw InvalidArgumentException(fmt::format("No output layer is associated with id {0}}", layerId)); |
| } |
| |
| // Need something like the collectors to get the correct tensors for the inputs |
| void AsyncNetworkImpl::CollectInputTensorHandles( |
| std::unordered_map<LayerGuid, std::vector<ITensorHandle*> >& tensorHandles, |
| std::vector<ITensorHandle*>& inputs, |
| const armnn::Layer* layer, |
| const TensorHandleFactoryRegistry& registry, |
| const bool isMemoryManaged) |
| { |
| for (auto&& inputSlot : layer->GetInputSlots()) |
| { |
| // The graph must be well-formed at this point. |
| ARMNN_ASSERT(inputSlot.GetConnection()); |
| auto outputSlot = inputSlot.GetConnectedOutputSlot(); |
| auto key = outputSlot->GetOwningLayer().GetGuid(); |
| auto search = tensorHandles.find(key); |
| |
| if (search == tensorHandles.end()) |
| { |
| ITensorHandleFactory::FactoryId factoryId = outputSlot->GetTensorHandleFactoryId(); |
| const TensorInfo& tensorInfo = outputSlot->GetTensorInfo(); |
| |
| ARMNN_ASSERT(factoryId != ITensorHandleFactory::LegacyFactoryId); |
| ITensorHandleFactory* handleFactory = registry.GetFactory(factoryId); |
| ARMNN_ASSERT(handleFactory); |
| std::unique_ptr<ITensorHandle> tensor = handleFactory->CreateTensorHandle(tensorInfo, isMemoryManaged); |
| ITensorHandle* tensorPtr = tensor.release(); |
| inputs.push_back(tensorPtr); |
| } |
| else |
| { |
| unsigned int index = outputSlot->CalculateIndexOnOwner(); |
| inputs.push_back(search->second[index]); |
| } |
| } |
| } |
| |
| void AsyncNetworkImpl::CreateOutputTensorHandles( |
| std::unordered_map<LayerGuid, std::vector<ITensorHandle*> >& tensorHandles, |
| std::vector<ITensorHandle*>& outputs, |
| const armnn::Layer* layer, |
| const TensorHandleFactoryRegistry& registry, |
| const bool isMemoryManaged) |
| { |
| auto guid = layer->GetGuid(); |
| std::vector<ITensorHandle*> tensorHandleVectors; |
| tensorHandleVectors.reserve(layer->GetNumOutputSlots()); |
| |
| for (unsigned int idx=0; idx < layer->GetNumOutputSlots(); idx++) |
| { |
| const OutputSlot& slot = layer->GetOutputSlot(idx); |
| ITensorHandleFactory::FactoryId factoryId = slot.GetTensorHandleFactoryId(); |
| const TensorInfo& tensorInfo = slot.GetTensorInfo(); |
| |
| ARMNN_ASSERT(factoryId != ITensorHandleFactory::LegacyFactoryId); |
| ITensorHandleFactory* handleFactory = registry.GetFactory(factoryId); |
| ARMNN_ASSERT(handleFactory); |
| std::unique_ptr<ITensorHandle> tensor = handleFactory->CreateTensorHandle(tensorInfo, isMemoryManaged); |
| ITensorHandle* tensorPtr = tensor.release(); |
| outputs.push_back(tensorPtr); |
| tensorHandleVectors.push_back(tensorPtr); |
| } |
| tensorHandles.insert({guid, tensorHandleVectors}); |
| } |
| |
| const IWorkloadFactory& AsyncNetworkImpl::GetWorkloadFactory(const Layer& layer) const |
| { |
| const IWorkloadFactory* workloadFactory = nullptr; |
| |
| auto it = m_WorkloadFactories.find(layer.GetBackendId()); |
| if (it == m_WorkloadFactories.end()) |
| { |
| throw RuntimeException( |
| fmt::format("No workload factory for {0} to be used for layer: {1}}", |
| layer.GetBackendId().Get(), |
| layer.GetNameStr()), |
| CHECK_LOCATION()); |
| } |
| |
| workloadFactory = it->second.first.get(); |
| |
| ARMNN_ASSERT_MSG(workloadFactory, "No workload factory"); |
| |
| std::string reasonIfUnsupported; |
| ARMNN_ASSERT_MSG(IWorkloadFactory::IsLayerSupported(layer, {}, reasonIfUnsupported), |
| "Factory does not support layer"); |
| IgnoreUnused(reasonIfUnsupported); |
| return *workloadFactory; |
| } |
| |
| void AsyncNetworkImpl::EnqueueInput(const BindableLayer& layer, |
| const ConstTensor& inputTensor, |
| WorkingMemHandle& context) |
| { |
| if (layer.GetType() != LayerType::Input) |
| { |
| throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer"); |
| } |
| LayerGuid id = layer.GetOutputSlot(0).GetConnection(0)->GetOwningLayer().GetGuid(); |
| WorkingMemDescriptor descriptor = context.GetWorkingMemDescriptor(id); |
| ARMNN_ASSERT_MSG(descriptor.m_Outputs.size() == 1, "Can only handle Input Layer with one output"); |
| |
| MemorySourceFlags importFlags = descriptor.m_Outputs[0]->GetImportFlags(); |
| if (m_NetworkProperties.m_ImportEnabled) // Try import the input tensor |
| { |
| if (CheckFlag(importFlags, MemorySource::Malloc) ) |
| { |
| // This assumes a CPU Tensor handle |
| std::unique_ptr<ITensorHandle> tensorHandle = |
| std::make_unique<ConstPassthroughCpuTensorHandle>(inputTensor.GetInfo(), |
| inputTensor.GetMemoryArea()); |
| |
| void* mem = tensorHandle->Map(false); |
| if (descriptor.m_Outputs[0]->Import(mem, MemorySource::Malloc)) |
| { |
| tensorHandle->Unmap(); |
| return; |
| } |
| tensorHandle->Unmap(); |
| throw MemoryImportException("EnqueueInput: Memory Import failed"); |
| } |
| else |
| { |
| throw MemoryImportException("EnqueueInput: Memory Import failed, backend does not support Import"); |
| } |
| } |
| else |
| { |
| std::unique_ptr<ITensorHandle> tensorHandle = |
| std::make_unique<ConstPassthroughCpuTensorHandle>(inputTensor.GetInfo(), inputTensor.GetMemoryArea()); |
| |
| auto copyFunc = [](void* dst, const void* src, size_t size) |
| { |
| memcpy(dst, src, size); |
| }; |
| |
| for (const auto& input : descriptor.m_Inputs) |
| { |
| CopyTensorContentsGeneric(tensorHandle.get(), input, copyFunc); |
| } |
| } |
| } |
| |
| void AsyncNetworkImpl::EnqueueOutput(const BindableLayer& layer, const Tensor& outputTensor, WorkingMemHandle& handle) |
| { |
| if (layer.GetType() != LayerType::Output) |
| { |
| throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer"); |
| } |
| ARMNN_ASSERT_MSG(layer.GetNumInputSlots() == 1, "Output Layer should have exactly one input."); |
| |
| LayerGuid id = layer.GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayerGuid(); |
| WorkingMemDescriptor descriptor = handle.GetWorkingMemDescriptor(id); |
| |
| ITensorHandle* inputTensorHandle = descriptor.m_Inputs[0]; |
| ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated."); |
| |
| // Try import the output tensor. |
| // Note: We can only import the output pointer if all of the following hold true: |
| // a) The imported pointer is aligned sufficiently |
| // b) The tensor has zero padding |
| // c) There is only one connection to the OutputSlot and it is to an OutputLayer. |
| // d) The output pointer is allocated via malloc. (Other types will be supported in a later release) |
| // e) m_IsExportEnabled must be set to true |
| if (m_NetworkProperties.m_ExportEnabled && |
| (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1)) |
| { |
| if (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input) |
| { |
| MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags(); |
| if (CheckFlag(importFlags, MemorySource::Malloc)) |
| { |
| std::unique_ptr<ITensorHandle> tensorHandle = |
| std::make_unique<PassthroughCpuTensorHandle>(outputTensor.GetInfo(), |
| outputTensor.GetMemoryArea()); |
| |
| void* mem = tensorHandle->Map(false); |
| bool importOk = inputTensorHandle->Import(mem, MemorySource::Malloc); |
| tensorHandle->Unmap(); |
| |
| if (importOk) |
| { |
| ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "SyncMemGeneric_Execute"); |
| descriptor.m_Inputs[0]->Map(true); |
| descriptor.m_Inputs[0]->Unmap(); |
| } |
| else |
| { |
| throw MemoryExportException("EnqueueOutput: Memory Export failed"); |
| } |
| } |
| else |
| { |
| throw MemoryExportException("EnqueueOutput: Memory Export failed, backend does not support Export"); |
| } |
| } |
| else |
| { |
| throw MemoryExportException("EnqueueOutput: Memory Export failed, attempting to export Input Layer"); |
| } |
| } |
| else |
| { |
| auto copyFunc = [](void* dst, const void* src, size_t size) |
| { |
| memcpy(dst, src, size); |
| }; |
| |
| std::unique_ptr<ITensorHandle> tensorHandle = |
| std::make_unique<PassthroughCpuTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea()); |
| |
| CopyTensorContentsGeneric(descriptor.m_Outputs[0], tensorHandle.get(), copyFunc); |
| } |
| } |
| |
| AsyncNetworkImpl::AsyncNetworkImpl(std::unique_ptr<IOptimizedNetwork> net, |
| const INetworkProperties& networkProperties, |
| profiling::ProfilingService& profilingService) : |
| m_OptimizedNetwork(std::move(net)), |
| m_NetworkProperties(networkProperties), |
| m_ProfilingService(profilingService) |
| { |
| // Create a profiler and register it for the current thread. |
| m_Profiler = std::make_shared<IProfiler>(); |
| ProfilerManager::GetInstance().RegisterProfiler(m_Profiler.get()); |
| |
| Graph &order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort(); |
| |
| //First create tensor handlers, backends and workload factories. |
| //Handlers are created before workloads are. |
| //Because workload creation can modify some of the handlers, |
| //(for example the splitter and concat layers). |
| for (auto &&layer : order) |
| { |
| auto const &backendId = layer->GetBackendId(); |
| if (m_Backends.count(backendId) == 0) |
| { |
| auto createBackend = BackendRegistryInstance().GetFactory(backendId); |
| auto it = m_Backends.emplace(std::make_pair(backendId, createBackend())); |
| |
| IBackendInternal* backend = it.first->second.get(); |
| |
| if (backend->SupportsTensorAllocatorAPI()) |
| { |
| backend->RegisterTensorHandleFactories(m_TensorHandleFactoryRegistry); |
| |
| auto workloadFactory = backend->CreateWorkloadFactory(m_TensorHandleFactoryRegistry); |
| m_WorkloadFactories.emplace( |
| std::make_pair(backendId, std::make_pair(std::move(workloadFactory), nullptr))); |
| } |
| else |
| { |
| IBackendInternal::IMemoryManagerSharedPtr memoryManager = backend->CreateMemoryManager(); |
| auto workloadFactory = backend->CreateWorkloadFactory(memoryManager); |
| |
| m_WorkloadFactories.emplace( |
| std::make_pair(backendId, std::make_pair(std::move(workloadFactory), memoryManager))); |
| } |
| } |
| } |
| |
| profiling::ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid(); |
| std::unique_ptr<profiling::TimelineUtilityMethods> timelineUtils = |
| profiling::TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService); |
| if (timelineUtils) |
| { |
| timelineUtils->CreateTypedEntity(networkGuid, profiling::LabelsAndEventClasses::NETWORK_GUID); |
| } |
| |
| //Then create workloads. |
| for (auto &&layer : order) |
| { |
| if (timelineUtils) |
| { |
| // Add layer to the post-optimisation network structure |
| AddLayerStructure(timelineUtils, *layer, networkGuid); |
| } |
| |
| const IWorkloadFactory &workloadFactory = GetWorkloadFactory(*layer); |
| |
| switch (layer->GetType()) |
| { |
| case LayerType::Input: |
| case LayerType::Output: |
| { |
| // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput(). |
| break; |
| } |
| default: |
| { |
| auto workload = layer->CreateWorkload(workloadFactory); |
| |
| if (!workload) |
| { |
| const char* const layerName = |
| layer->GetNameStr().length() != 0 ? layer->GetName() : "<Unnamed>"; |
| throw InvalidArgumentException( |
| fmt::format("No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')", |
| layerName, |
| static_cast<int>(layer->GetType()), |
| layer->GetBackendId().Get() |
| )); |
| } |
| |
| if (timelineUtils) |
| { |
| // Add workload to the post-optimisation network structure |
| AddWorkloadStructure(timelineUtils, workload, *layer); |
| } |
| |
| m_WorkloadQueue.push_back(move(workload)); |
| // release the constant data in the layer.. |
| layer->ReleaseConstantData(); |
| break; |
| } |
| } |
| } |
| |
| if (timelineUtils) |
| { |
| // Commit to send the post-optimisation network structure |
| timelineUtils->Commit(); |
| } |
| |
| // Now that the intermediate tensor memory has been set-up, do any post allocation configuration for each workload. |
| // PostAllocationConfiguure will now need to be handled in the ExecuteOn(WorkingMemDescriptor) |
| for (auto &workload : m_WorkloadQueue) |
| { |
| workload->PostAllocationConfigure(); |
| } |
| } |
| |
| Status AsyncNetworkImpl::Execute(const InputTensors& inputTensors, |
| const OutputTensors& outputTensors, |
| IWorkingMemHandle& iWorkingMemHandle) |
| { |
| const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph(); |
| |
| // Walk graph to determine the order of execution. |
| if (graph.GetNumLayers() < 2) |
| { |
| ARMNN_LOG(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph"; |
| return Status::Failure; |
| } |
| |
| if (graph.GetNumInputs() != inputTensors.size()) |
| { |
| throw InvalidArgumentException("Number of inputs provided does not match network."); |
| } |
| |
| std::unique_ptr<profiling::TimelineUtilityMethods> timelineUtils = |
| profiling::TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService); |
| profiling::ProfilingGuid inferenceGuid = m_ProfilingService.GetNextGuid(); |
| if (timelineUtils) |
| { |
| // Add inference timeline trace if profiling is enabled. |
| profiling::ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid(); |
| timelineUtils->CreateTypedEntity(inferenceGuid, profiling::LabelsAndEventClasses::INFERENCE_GUID); |
| timelineUtils->CreateRelationship(profiling::ProfilingRelationshipType::RetentionLink, |
| networkGuid, |
| inferenceGuid, |
| profiling::LabelsAndEventClasses::EXECUTION_OF_GUID); |
| timelineUtils->RecordEvent(inferenceGuid, profiling::LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS); |
| } |
| |
| bool executionSucceeded = true; |
| |
| if (timelineUtils) |
| { |
| // Add end of life of the inference timeline if profiling is enabled. |
| timelineUtils->RecordEvent(inferenceGuid, profiling::LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS); |
| timelineUtils->Commit(); |
| } |
| WorkingMemHandle& workingMemHandle = dynamic_cast<WorkingMemHandle&>(iWorkingMemHandle); |
| std::lock_guard<std::mutex> lockGuard(workingMemHandle.GetMutex()); |
| |
| if (!workingMemHandle.IsAllocated()) |
| { |
| workingMemHandle.Allocate(); |
| } |
| |
| { |
| ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareInputs"); |
| unsigned int i = 0; |
| |
| for (const BindableLayer* inputLayer : graph.GetInputLayers()) |
| { |
| EnqueueInput(*inputLayer, inputTensors[i].second, workingMemHandle); |
| ++i; |
| } |
| } |
| |
| auto Fail = [&](const std::exception& error) |
| { |
| ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what(); |
| executionSucceeded = false; |
| }; |
| profiling::ProfilingDynamicGuid workloadInferenceID(0); |
| |
| try |
| { |
| for (unsigned int i = 0; i < m_WorkloadQueue.size(); ++i) |
| { |
| auto& workload = m_WorkloadQueue[i]; |
| if (timelineUtils) |
| { |
| workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(), |
| inferenceGuid); |
| } |
| workload->ExecuteAsync(workingMemHandle.GetWorkingMemDescriptorAt(i)); |
| |
| if (timelineUtils) |
| { |
| timelineUtils->RecordEndOfLifeEvent(workloadInferenceID); |
| } |
| } |
| } |
| catch (const RuntimeException& error) |
| { |
| Fail(error); |
| } |
| catch (const std::runtime_error& error) |
| { |
| Fail(error); |
| } |
| // For each output to the network, call EnqueueOutput with the data passed by the user. |
| { |
| ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareOutputs"); |
| unsigned int i = static_cast<unsigned int>(m_WorkloadQueue.size() - graph.GetNumOutputs()); |
| |
| for (const BindableLayer* outputLayer : graph.GetOutputLayers()) |
| { |
| EnqueueOutput(*outputLayer, outputTensors[i].second, workingMemHandle); |
| ++i; |
| } |
| } |
| return executionSucceeded ? Status::Success : Status::Failure; |
| } |
| |
| /// Get the profiler used for this network |
| std::shared_ptr<IProfiler> AsyncNetworkImpl::GetProfiler() const |
| { |
| return m_Profiler; |
| } |
| |
| void AsyncNetworkImpl::RegisterDebugCallback(const DebugCallbackFunction& func) |
| { |
| for (auto&& workloadPtr: m_WorkloadQueue) |
| { |
| workloadPtr.get()->RegisterDebugCallback(func); |
| } |
| } |
| |
| /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have |
| /// overlapped Execution by calling this function from different threads. |
| std::unique_ptr<IWorkingMemHandle> AsyncNetworkImpl::CreateWorkingMemHandle() |
| { |
| Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph(); |
| std::unordered_map<LayerGuid, std::vector<ITensorHandle*> > tensorHandles; |
| std::vector<WorkingMemDescriptor> workingMemDescriptors; |
| std::unordered_map<LayerGuid, WorkingMemDescriptor> workingMemDescriptorMap; |
| |
| for (auto&& layer : order) |
| { |
| if (layer->GetType() == LayerType::Input || layer->GetType() == LayerType::Output) |
| { |
| continue; |
| } |
| WorkingMemDescriptor workingMemDescriptor; |
| // Look for the layer with 1 OutputSlot which has 1 connection and that connection is an Output Layer |
| // If Export is enabled disable memory management so we can export, otherwise we do a copy |
| if((layer->GetNumOutputSlots() == 1) && |
| (layer->GetOutputSlots()[0].GetNumConnections() == 1) && |
| (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() == LayerType::Output)) |
| { |
| CollectInputTensorHandles(tensorHandles, |
| workingMemDescriptor.m_Inputs, |
| layer, |
| m_TensorHandleFactoryRegistry, |
| !m_NetworkProperties.m_ExportEnabled); |
| CreateOutputTensorHandles(tensorHandles, |
| workingMemDescriptor.m_Outputs, |
| layer, |
| m_TensorHandleFactoryRegistry, |
| !m_NetworkProperties.m_ExportEnabled); |
| } |
| else |
| { |
| CollectInputTensorHandles(tensorHandles, |
| workingMemDescriptor.m_Inputs, |
| layer, |
| m_TensorHandleFactoryRegistry); |
| CreateOutputTensorHandles(tensorHandles, |
| workingMemDescriptor.m_Outputs, |
| layer, |
| m_TensorHandleFactoryRegistry); |
| } |
| workingMemDescriptorMap.insert({layer->GetGuid(), workingMemDescriptor}); |
| workingMemDescriptors.push_back(workingMemDescriptor); |
| } |
| return std::make_unique<WorkingMemHandle>(workingMemDescriptors, workingMemDescriptorMap); |
| } |
| |
| void AsyncNetworkImpl::FreeWorkingMemory() |
| { |
| // Informs the memory managers to release memory in it's respective memory group |
| for (auto&& workloadFactory : m_WorkloadFactories) |
| { |
| IBackendInternal::IMemoryManagerSharedPtr memoryManager = workloadFactory.second.second; |
| if (memoryManager) |
| { |
| memoryManager->Release(); |
| } |
| } |
| m_TensorHandleFactoryRegistry.ReleaseMemory(); |
| } |
| |
| } // end experimental namespace |
| |
| } // end armnn namespace |