blob: 1367552f98462d58c35f8e7a5fd51b5a48e7f1bc [file] [log] [blame]
Laurent Carlier749294b2020-06-01 09:03:17 +01001//
Jim Flynn6398a982020-05-27 17:05:21 +01002// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
David Beckecb56cd2018-09-05 12:52:57 +01003// SPDX-License-Identifier: MIT
telsoa014fcda012018-03-09 14:13:49 +00004//
5
6#include "LoadedNetwork.hpp"
7#include "Layer.hpp"
telsoa014fcda012018-03-09 14:13:49 +00008#include "Graph.hpp"
Jim Flynnf7713212020-07-14 09:50:59 +01009#include <Processes.hpp>
telsoa014fcda012018-03-09 14:13:49 +000010#include "Profiling.hpp"
surmeh013537c2c2018-05-18 16:31:43 +010011#include "HeapProfiling.hpp"
Mike Kelly55a8ffd2021-04-07 20:10:49 +010012#include "WorkingMemHandle.hpp"
telsoa014fcda012018-03-09 14:13:49 +000013
Matteo Martincighc601aa62019-10-29 15:03:22 +000014#include <armnn/BackendRegistry.hpp>
Matthew Benthamf48afc62020-01-15 17:55:08 +000015#include <armnn/Logging.hpp>
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +010016#include <armnn/utility/Assert.hpp>
Matteo Martincighc601aa62019-10-29 15:03:22 +000017
Colm Donelan0c479742021-12-10 12:43:54 +000018#include <armnn/backends/TensorHandle.hpp>
Matteo Martincighe5b8eb92019-11-28 15:45:42 +000019#include <armnn/backends/IMemoryManager.hpp>
Colm Donelan0c479742021-12-10 12:43:54 +000020#include <armnn/backends/MemCopyWorkload.hpp>
Derek Lambertif674aa02019-08-01 15:56:25 +010021#include <backendsCommon/MemSyncWorkload.hpp>
Finn Williamsdbf5f312021-08-26 11:08:01 +010022#include <armnn/BackendHelper.hpp>
Matteo Martincighe5b8eb92019-11-28 15:45:42 +000023
Colm Donelan5b5c2222020-09-09 12:48:16 +010024#include <fmt/format.h>
telsoa014fcda012018-03-09 14:13:49 +000025
26namespace armnn
27{
28
29using namespace std;
Cathal Corbett5aa9fd72022-02-25 15:33:28 +000030using namespace arm::pipe;
telsoa014fcda012018-03-09 14:13:49 +000031
telsoa01c577f2c2018-08-31 09:22:23 +010032namespace
33{
34
35template <typename ExceptionType>
36std::string ToErrorMessage(const char * prefix, const ExceptionType & error)
37{
38 std::stringstream ss;
39 ss << prefix << " " << error.what();
40 return ss.str();
41}
42
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +000043void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
44 const Layer& layer,
45 ProfilingGuid networkGuid)
46{
47 // Add layer to the post-optimisation network structure
48 std::string layerName = layer.GetNameStr().empty() ? "<Unnamed>" : layer.GetNameStr();
49 timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
50 networkGuid,
51 layerName,
52 LabelsAndEventClasses::LAYER_GUID);
53 for (auto&& input : layer.GetInputSlots())
54 {
55 const IOutputSlot* source = input.GetConnectedOutputSlot();
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +010056 ARMNN_ASSERT(source != NULL);
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +000057 timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
58 source->GetOwningLayerGuid(),
59 layer.GetGuid());
60 }
61}
62
63void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
64 std::unique_ptr<IWorkload>& workload,
65 const Layer& layer)
66{
67 // Add workload to the post-optimisation network structure
68 timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
69 timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
70 layer.GetBackendId().Get(),
71 LabelsAndEventClasses::BACKENDID_GUID);
72
73 // Link the workload to the layer
74 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
75 layer.GetGuid(),
Jim Flynn6398a982020-05-27 17:05:21 +010076 workload->GetGuid(),
77 LabelsAndEventClasses::CHILD_GUID);
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +000078}
79
telsoa01c577f2c2018-08-31 09:22:23 +010080} // anonymous
81
Francis Murtagh3d2b4b22021-02-15 18:23:17 +000082std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
David Monahan4f1e8e42019-09-04 09:22:10 +010083 std::string& errorMessage,
Sadik Armagan3184c902020-03-18 10:57:30 +000084 const INetworkProperties& networkProperties,
Cathal Corbett5aa9fd72022-02-25 15:33:28 +000085 ProfilingService& profilingService)
telsoa014fcda012018-03-09 14:13:49 +000086{
87 std::unique_ptr<LoadedNetwork> loadedNetwork;
88
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +010089 auto Fail = [&](const std::exception& error) -> std::unique_ptr<LoadedNetwork>
90 {
91 errorMessage = ToErrorMessage("An error occurred when preparing the network workloads: ", error);
Derek Lamberti08446972019-11-26 16:38:31 +000092 ARMNN_LOG(error) << errorMessage;
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +010093
94 return std::unique_ptr<LoadedNetwork>();
95 };
96
telsoa014fcda012018-03-09 14:13:49 +000097 try
98 {
Finn Williamsf364d532021-06-09 17:07:33 +010099 loadedNetwork.reset(new LoadedNetwork(std::move(net), networkProperties, profilingService));
telsoa014fcda012018-03-09 14:13:49 +0000100 }
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +0100101 catch (const armnn::RuntimeException& error)
telsoa014fcda012018-03-09 14:13:49 +0000102 {
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +0100103 return Fail(error);
telsoa014fcda012018-03-09 14:13:49 +0000104 }
105 catch (const armnn::Exception& error)
106 {
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +0100107 return Fail(error);
telsoa014fcda012018-03-09 14:13:49 +0000108 }
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +0100109 catch (const std::runtime_error& error)
telsoa014fcda012018-03-09 14:13:49 +0000110 {
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +0100111 return Fail(error);
telsoa014fcda012018-03-09 14:13:49 +0000112 }
telsoa014fcda012018-03-09 14:13:49 +0000113
114 return loadedNetwork;
115}
116
Francis Murtagh3d2b4b22021-02-15 18:23:17 +0000117LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
Sadik Armagan3184c902020-03-18 10:57:30 +0000118 const INetworkProperties& networkProperties,
Cathal Corbett5aa9fd72022-02-25 15:33:28 +0000119 ProfilingService& profilingService) :
David Monahan4f1e8e42019-09-04 09:22:10 +0100120 m_OptimizedNetwork(std::move(net)),
Mike Kelly55a8ffd2021-04-07 20:10:49 +0100121 m_NetworkProperties(networkProperties),
Narumol Prangnawarat549cb7a2020-07-10 17:50:53 +0100122 m_TensorHandleFactoryRegistry(),
Sadik Armagan3184c902020-03-18 10:57:30 +0000123 m_ProfilingService(profilingService)
telsoa014fcda012018-03-09 14:13:49 +0000124{
Kevin May4692e112021-10-18 14:41:50 +0100125 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadedNetwork");
Derek Lambertie155bbf2021-10-13 14:32:12 +0100126 // Get the profiler and register it for the current thread.
127 const std::shared_ptr<IProfiler>& profiler = m_OptimizedNetwork->GetProfiler();
128 ProfilerManager::GetInstance().RegisterProfiler(profiler.get());
telsoa01c577f2c2018-08-31 09:22:23 +0100129
Derek Lambertie155bbf2021-10-13 14:32:12 +0100130 profiler->EnableProfiling(networkProperties.m_ProfilingEnabled);
Keith Davis554fa092021-07-20 11:25:22 +0100131
Derek Lambertie155bbf2021-10-13 14:32:12 +0100132 profiler->EnableNetworkDetailsToStdOut(networkProperties.m_OutputNetworkDetailsMethod);
Keith Davisf4874862021-08-09 16:49:18 +0100133
David Beck29c75de2018-10-23 13:35:58 +0100134 //First create tensor handlers, backends and workload factories.
telsoa01c577f2c2018-08-31 09:22:23 +0100135 //Handlers are created before workloads are.
136 //Because workload creation can modify some of the handlers,
Jim Flynne242f2d2019-05-22 14:24:13 +0100137 //(for example the splitter and concat layers).
Finn Williamsb1aad422021-10-28 19:07:32 +0100138
139 bool useExternalMemoryManager = false;
140 bool useInternalMemoryManager = false;
141 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
Finn Williams73c547d2022-02-15 20:47:34 +0000142
143 if (!networkProperties.m_AsyncEnabled)
144 {
145 m_IsInputImported = std::vector<bool>(order.GetNumInputs(), false);
146 m_IsOutputImported = std::vector<bool>(order.GetNumOutputs(), false);
147 }
148
telsoa014fcda012018-03-09 14:13:49 +0000149 for (auto&& layer : order)
150 {
Derek Lamberti84da38b2019-06-13 11:40:08 +0100151 auto const& backendId = layer->GetBackendId();
152 if (m_Backends.count(backendId) == 0)
David Beck29c75de2018-10-23 13:35:58 +0100153 {
Derek Lamberti84da38b2019-06-13 11:40:08 +0100154 auto createBackend = BackendRegistryInstance().GetFactory(backendId);
155 auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
Aron Virginas-Tar56055192018-11-12 18:10:43 +0000156
Derek Lamberti84da38b2019-06-13 11:40:08 +0100157 IBackendInternal* backend = it.first->second.get();
Aron Virginas-Tar56055192018-11-12 18:10:43 +0000158
Finn Williamsdbf5f312021-08-26 11:08:01 +0100159 if (networkProperties.m_AsyncEnabled &&
Finn Williamsf37b9702021-09-01 18:06:04 +0100160 !HasCapability(BackendOptions::BackendOption{"AsyncExecution", true}, backend->GetCapabilities()))
Finn Williamsdbf5f312021-08-26 11:08:01 +0100161 {
162 std::string er = backend->GetId();
163 er += " does not support AsyncExecution";
164 throw BackendCapabilityException(er);
165 }
166
Finn Williamsb1aad422021-10-28 19:07:32 +0100167 if (networkProperties.m_AsyncEnabled &&
168 !HasCapability(BackendOptions::BackendOption{"ExternallyManagedMemory", true},
169 backend->GetCapabilities()))
170 {
171 std::string er = backend->GetId();
172 er += " does not support ExternallyManagedMemory\n";
173 er += "AsyncEnabled networks require all backends to support ExternallyManagedMemory";
174 throw BackendCapabilityException(er);
175 }
176
177 if (HasCapability(BackendOptions::BackendOption{"ExternallyManagedMemory", true},backend->GetCapabilities())
178 && (m_NetworkProperties.m_ExternalMemoryManagementEnabled || m_NetworkProperties.m_AsyncEnabled))
179 {
180 m_SupportsExternallyManagedMemory[backend->GetId()] = true;
181 useExternalMemoryManager = true;
182 }
183 else
184 {
185 m_SupportsExternallyManagedMemory[backend->GetId()] = false;
186 useInternalMemoryManager = true;
187 }
188
189 IBackendInternal::IWorkloadFactoryPtr workloadFactory;
Derek Lamberti84da38b2019-06-13 11:40:08 +0100190 if (backend->SupportsTensorAllocatorAPI())
191 {
Finn Williamsb1aad422021-10-28 19:07:32 +0100192 workloadFactory = backend->CreateWorkloadFactory(
Finn Williamsf37b9702021-09-01 18:06:04 +0100193 m_TensorHandleFactoryRegistry,
194 m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions(),
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100195 static_cast<MemorySourceFlags>(m_NetworkProperties.m_InputSource),
196 static_cast<MemorySourceFlags>(m_NetworkProperties.m_OutputSource));
Derek Lamberti84da38b2019-06-13 11:40:08 +0100197 }
198 else
199 {
Finn Williamsb1aad422021-10-28 19:07:32 +0100200 m_BackendMemoryMangers.emplace_back(backend->CreateMemoryManager());
201 workloadFactory = backend->CreateWorkloadFactory(
202 m_BackendMemoryMangers.back(), m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
Derek Lamberti84da38b2019-06-13 11:40:08 +0100203 }
Finn Williamsb1aad422021-10-28 19:07:32 +0100204 m_WorkloadFactories[backendId ] = std::move(workloadFactory);
David Beck29c75de2018-10-23 13:35:58 +0100205 }
Derek Lamberti84da38b2019-06-13 11:40:08 +0100206 }
Keith Davise813d672021-04-22 10:10:34 +0100207
Finn Williams01097942021-04-26 12:06:34 +0100208 if (!networkProperties.m_AsyncEnabled)
Derek Lamberti84da38b2019-06-13 11:40:08 +0100209 {
Kevin Mayb4b3ac92021-05-21 16:42:21 +0100210 for (auto&& layer : order)
David Monahan3fb7e102019-08-20 11:25:29 +0100211 {
Kevin Mayb4b3ac92021-05-21 16:42:21 +0100212 auto& workloadFactory = GetWorkloadFactory(*layer);
Finn Williamsb1aad422021-10-28 19:07:32 +0100213 bool supportsExternalManager = m_SupportsExternallyManagedMemory[layer->GetBackendId()];
Finn Williams01097942021-04-26 12:06:34 +0100214
215 switch (layer->GetType())
David Monahan3fb7e102019-08-20 11:25:29 +0100216 {
Finn Williams01097942021-04-26 12:06:34 +0100217 case LayerType::Input:
218 case LayerType::MemImport:
Ferran Balaguer83239f92019-09-19 11:49:25 +0100219 {
Finn Williams01097942021-04-26 12:06:34 +0100220 // If IsImportEnabled is true then we need to set IsMemoryManaged
221 // to false when creating TensorHandles
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100222 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
223 workloadFactory,
Finn Williamsb1aad422021-10-28 19:07:32 +0100224 !supportsExternalManager && !m_NetworkProperties.m_ImportEnabled);
225 break;
226 }
227 case LayerType::Constant:
228 {
229 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, true);
Finn Williams01097942021-04-26 12:06:34 +0100230 break;
Ferran Balaguer83239f92019-09-19 11:49:25 +0100231 }
Finn Williams01097942021-04-26 12:06:34 +0100232 default:
Ferran Balaguer83239f92019-09-19 11:49:25 +0100233 {
Finn Williams01097942021-04-26 12:06:34 +0100234 // Look for a layer with 1 OutputSlot which has 1 connection and that connection is an Output Layer
235 // If Export is enabled disable memory management so we can export, otherwise we do a copy
236 if ((layer->GetNumOutputSlots() == 1) &&
Finn Williamsb1aad422021-10-28 19:07:32 +0100237 (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
238 (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() == LayerType::Output))
Finn Williams01097942021-04-26 12:06:34 +0100239 {
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100240 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
241 workloadFactory,
Finn Williamsb1aad422021-10-28 19:07:32 +0100242 !supportsExternalManager && !m_NetworkProperties.m_ExportEnabled);
Finn Williams01097942021-04-26 12:06:34 +0100243 }
244 else
245 {
Finn Williamsb1aad422021-10-28 19:07:32 +0100246 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
247 workloadFactory,
248 !supportsExternalManager);
Finn Williams01097942021-04-26 12:06:34 +0100249 }
Ferran Balaguer83239f92019-09-19 11:49:25 +0100250 }
David Monahan3fb7e102019-08-20 11:25:29 +0100251 }
252 }
telsoa014fcda012018-03-09 14:13:49 +0000253 }
254
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +0000255 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
Sadik Armagan3184c902020-03-18 10:57:30 +0000256 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
257 TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +0000258 if (timelineUtils)
259 {
260 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
Jim Flynnf7713212020-07-14 09:50:59 +0100261 // Mark the network with a start of life event
262 timelineUtils->RecordEvent(networkGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
263 // and with the process ID
264 int processID = armnnUtils::Processes::GetCurrentId();
265 std::stringstream ss;
266 ss << processID;
267 timelineUtils->MarkEntityWithLabel(networkGuid, ss.str(), LabelsAndEventClasses::PROCESS_ID_GUID);
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +0000268 }
269
telsoa01c577f2c2018-08-31 09:22:23 +0100270 //Then create workloads.
telsoa014fcda012018-03-09 14:13:49 +0000271 {
Derek Lambertiac002532021-10-13 18:26:16 +0100272 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_CreateWorkloads");
273 for (auto&& layer: order)
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +0000274 {
Derek Lambertiac002532021-10-13 18:26:16 +0100275 if (timelineUtils)
telsoa014fcda012018-03-09 14:13:49 +0000276 {
Derek Lambertiac002532021-10-13 18:26:16 +0100277 // Add layer to the post-optimisation network structure
278 AddLayerStructure(timelineUtils, *layer, networkGuid);
telsoa014fcda012018-03-09 14:13:49 +0000279 }
Derek Lambertiac002532021-10-13 18:26:16 +0100280
281 const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
282
283 switch (layer->GetType())
telsoa014fcda012018-03-09 14:13:49 +0000284 {
Derek Lambertiac002532021-10-13 18:26:16 +0100285 case LayerType::Input:
286 case LayerType::Output:
telsoa014fcda012018-03-09 14:13:49 +0000287 {
Derek Lambertiac002532021-10-13 18:26:16 +0100288 // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
289 break;
telsoa014fcda012018-03-09 14:13:49 +0000290 }
Finn Williamsb1aad422021-10-28 19:07:32 +0100291 default:
292 {
Derek Lambertiac002532021-10-13 18:26:16 +0100293 auto workload = layer->CreateWorkload(workloadFactory);
telsoa014fcda012018-03-09 14:13:49 +0000294
Derek Lambertiac002532021-10-13 18:26:16 +0100295 if (!workload)
296 {
297 const char* const layerName =
298 layer->GetNameStr().length() != 0 ? layer->GetName() : "<Unnamed>";
299 throw InvalidArgumentException(
300 fmt::format("No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
301 layerName, static_cast<int>(layer->GetType()), layer->GetBackendId().Get()
302 ));
303 }
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +0000304
Derek Lambertiac002532021-10-13 18:26:16 +0100305 if (timelineUtils)
306 {
307 // Add workload to the post-optimisation network structure
308 AddWorkloadStructure(timelineUtils, workload, *layer);
309 }
Finn Williams01097942021-04-26 12:06:34 +0100310
Derek Lambertiac002532021-10-13 18:26:16 +0100311 // For async networks ConstantWorkloads are managed exclusively by LoadedNetwork
312 // and are separated out from the other workloads
Finn Williamsb1aad422021-10-28 19:07:32 +0100313 if((networkProperties.m_AsyncEnabled || useExternalMemoryManager) &&
314 layer->GetType() == LayerType::Constant)
Derek Lambertiac002532021-10-13 18:26:16 +0100315 {
Finn Williamsb1aad422021-10-28 19:07:32 +0100316 m_ConstantTensorHandles[layer->GetGuid()] =
317 layer->GetOutputSlot(0).GetOutputHandler().GetData();
Derek Lambertiac002532021-10-13 18:26:16 +0100318 m_ConstantWorkloads[layer->GetGuid()] = std::move(workload);
Finn Williamsb1aad422021-10-28 19:07:32 +0100319 }
320 else
321 {
322 m_WorkloadQueue.push_back(std::move(workload));
Derek Lambertiac002532021-10-13 18:26:16 +0100323 }
324
325 // release the constant data in the layer..
326 layer->ReleaseConstantData();
327 break;
328 }
telsoa014fcda012018-03-09 14:13:49 +0000329 }
330 }
331 }
332
Finn Williams73c547d2022-02-15 20:47:34 +0000333 // Gather information about workloads for inputs & outputs
334 if (!networkProperties.m_AsyncEnabled && m_WorkloadQueue.size() != 0)
335 {
336 const int noOfInputs = armnn::numeric_cast<int>(order.GetNumInputs());
337
338 // Get indices of all workloads connected to each input and
339 // check if they support tensor handle replacement
340 for (const BindableLayer* layer: order.GetInputLayers())
341 {
342 const auto bindingId = layer->GetBindingId();
343
344 bool supportsReplacement = true;
345
346 for (const auto inputSlot: layer->GetOutputSlot(0).GetConnections())
347 {
348 auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(inputSlot->GetOwningLayer()));
349 workloadIndex -= noOfInputs;
350
351 m_InputWorkloadSlotPairs[bindingId].emplace_back(WorkloadIndices{
352 armnn::numeric_cast<unsigned int>(workloadIndex), inputSlot->GetSlotIndex()});
353
354 auto workload = m_WorkloadQueue[m_InputWorkloadSlotPairs[bindingId].back().m_WorkloadIndex].get();
355 supportsReplacement &= workload->SupportsTensorHandleReplacement();
356 }
357
358 ITensorHandleFactory::FactoryId factoryId = layer->GetOutputSlot(0).GetTensorHandleFactoryId();
359 // Get matching import factory Id
360 ITensorHandleFactory::FactoryId importFactoryId =
361 m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
362
363 ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);
364
365 if (supportsReplacement && importFactory)
366 {
367 m_PreImportedInputHandles.emplace_back(
368 bindingId, importFactory->CreateTensorHandle(layer->GetOutputSlot(0).GetTensorInfo(), false));
369 }
370 else
371 {
372 m_PreImportedInputHandles.emplace_back(bindingId, nullptr);
373 }
374 }
375
376 // Get indices of all workloads connected to each output and
377 // check if they support tensor handle replacement
378 for (const BindableLayer* layer: order.GetOutputLayers())
379 {
380 const auto bindingId = layer->GetBindingId();
381
382 const auto outputSlot = layer->GetInputSlot(0).GetConnectedOutputSlot();
383 auto& indices = m_OutputWorkloadSlotPairs[bindingId];
384
385 auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(outputSlot->GetOwningLayer()));
386 workloadIndex -= noOfInputs;
387
388 indices.m_OutputSlotIndices = WorkloadIndices{numeric_cast<unsigned int>(workloadIndex),
389 outputSlot->CalculateIndexOnOwner()};
390
391 bool supportsReplacement = true;
392 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
393 supportsReplacement &= outputWorkload->SupportsTensorHandleReplacement();
394
395 for (auto &inputSlot: outputSlot->GetConnections())
396 {
397 if(inputSlot->GetOwningLayer().GetType() != LayerType::Output)
398 {
399 auto inWorkloadIndex = std::distance(order.begin(),
400 order.GetPosInGraph(inputSlot->GetOwningLayer()));
401 inWorkloadIndex -= noOfInputs;
402 indices.m_InputSlotIndices.emplace_back(WorkloadIndices{numeric_cast<unsigned int>(inWorkloadIndex),
403 inputSlot->GetSlotIndex()});
404 auto inputWorkload = m_WorkloadQueue[indices.m_InputSlotIndices.back().m_WorkloadIndex].get();
405 supportsReplacement &= inputWorkload->SupportsTensorHandleReplacement();
406 }
407 }
408
409 ITensorHandleFactory::FactoryId factoryId = outputSlot->GetTensorHandleFactoryId();
410 // Get matching import factory Id
411 ITensorHandleFactory::FactoryId importFactoryId =
412 m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
413 ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);
414
415 if (supportsReplacement && importFactory)
416 {
417 m_PreImportedOutputHandles.emplace_back(
418 bindingId, importFactory->CreateTensorHandle(outputSlot->GetTensorInfo(), false));
419 }
420 else
421 {
422 m_PreImportedOutputHandles.emplace_back(bindingId, nullptr);
423 }
424 }
425 }
426
Sadik Armagandea8fb62020-11-26 10:38:11 +0000427 for (auto&& workloadFactory : m_WorkloadFactories)
428 {
Finn Williamsb1aad422021-10-28 19:07:32 +0100429 workloadFactory.second->AfterWorkloadsCreated();
Sadik Armagandea8fb62020-11-26 10:38:11 +0000430 }
431
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +0000432 if (timelineUtils)
433 {
434 // Commit to send the post-optimisation network structure
435 timelineUtils->Commit();
436 }
437
Finn Williamsb1aad422021-10-28 19:07:32 +0100438 if (useExternalMemoryManager)
439 {
440 if (networkProperties.m_AsyncEnabled)
441 {
442 CreateMemoryProfileAsync();
443 }
444 else
445 {
446 CreateMemoryProfile();
447 }
448
449 auto backendStrategyMap = BackendRegistryInstance().GetMemoryOptimizerStrategies();
450 for (auto& backendMemoryProfile : m_MemBlockMap)
451 {
452 const BackendId& backendId = backendMemoryProfile.first;
453 if (backendStrategyMap.find(backendId) != backendStrategyMap.end())
454 {
455 m_MemBinMap[backendId] = backendStrategyMap[backendId]->Optimize(backendMemoryProfile.second);
456 }
457 else
458 {
459 m_MemBinMap[backendId] = m_ConstantStrategy->Optimize(backendMemoryProfile.second);
460 }
461 }
462
463 if (!networkProperties.m_AsyncEnabled)
464 {
465 m_ExternalMemoryManager = CreateExternalMemoryManger(m_TensorMemory);
466
467 // Sort m_TensorMemory, so it's order matches m_Tensorhandles
468 std::sort(m_TensorMemory.begin(), m_TensorMemory.end(),
469 [](const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& lhs,
470 const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& rhs)
471 {
472 return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
473 });
474 }
475 }
476
477 // Now that the intermediate tensor memory has been set-up,
478 // do any post allocation configuration for each workload.
Finn Williams01097942021-04-26 12:06:34 +0100479 if (!networkProperties.m_AsyncEnabled)
Derek Lambertif30f7d32019-04-09 10:25:02 +0100480 {
Finn Williamsb1aad422021-10-28 19:07:32 +0100481 if (useInternalMemoryManager)
482 {
483 // Set up memory.
484 m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().AllocateDynamicBuffers();
485 }
Finn Williams01097942021-04-26 12:06:34 +0100486
Finn Williams01097942021-04-26 12:06:34 +0100487 for (auto &workload : m_WorkloadQueue)
488 {
489 workload->PostAllocationConfigure();
490 }
491 }
Finn Williamsb1aad422021-10-28 19:07:32 +0100492
493 if (useExternalMemoryManager)
Finn Williams01097942021-04-26 12:06:34 +0100494 {
Finn Williamsb1aad422021-10-28 19:07:32 +0100495 if (!networkProperties.m_AsyncEnabled)
496 {
497 AllocateAndExecuteConstantWorkloads();
498 }
499 else
500 {
501 AllocateAndExecuteConstantWorkloadsAsync();
502 }
Derek Lambertif30f7d32019-04-09 10:25:02 +0100503 }
telsoa014fcda012018-03-09 14:13:49 +0000504}
505
Finn Williams01097942021-04-26 12:06:34 +0100506void LoadedNetwork::AllocateAndExecuteConstantWorkloads()
507{
Derek Lambertiac002532021-10-13 18:26:16 +0100508 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_AllocateAndExecuteConstants");
Finn Williamsb1aad422021-10-28 19:07:32 +0100509 for (auto& pair : m_ConstantWorkloads)
510 {
511 auto tensorHandle = m_ConstantTensorHandles[pair.first];
512 tensorHandle->Allocate();
513 pair.second->Execute();
514 }
515}
516
517
518
519void LoadedNetwork::AllocateAndExecuteConstantWorkloadsAsync()
520{
521 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_AllocateAndExecuteConstants");
Finn Williams01097942021-04-26 12:06:34 +0100522 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
523 for (auto&& layer : order)
524 {
525 if (layer->GetType() == LayerType::Constant)
526 {
527 const auto& outSlot = layer->GetOutputSlots()[0];
528 const auto factoryId = outSlot.GetTensorHandleFactoryId();
529 ARMNN_ASSERT(factoryId != ITensorHandleFactory::LegacyFactoryId);
530 auto& workloadFactory = GetWorkloadFactory(*layer);
531
532 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
533 ITensorHandle* tensorHandle = outSlot.GetOutputHandler().GetData();
534
535 m_ConstantTensorHandles[layer->GetGuid()] = tensorHandle;
536 tensorHandle->Allocate();
537
538 WorkingMemDescriptor memDesc;
539 memDesc.m_Outputs.push_back(tensorHandle);
540 m_ConstantWorkloads[layer->GetGuid()]->ExecuteAsync(memDesc);
541 }
542 }
543}
544
Keith Davis33ed2212020-03-30 10:43:41 +0100545void LoadedNetwork::SendNetworkStructure()
546{
Derek Lambertiac002532021-10-13 18:26:16 +0100547 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_SendNetworkStructure");
Francis Murtagh3d2b4b22021-02-15 18:23:17 +0000548 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
Keith Davis33ed2212020-03-30 10:43:41 +0100549 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
550
551 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
552 TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
553
554 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
555
556 for (auto&& layer : order)
557 {
558 // Add layer to the post-optimisation network structure
559 AddLayerStructure(timelineUtils, *layer, networkGuid);
560 switch (layer->GetType())
561 {
562 case LayerType::Input:
563 case LayerType::Output:
564 {
565 // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
566 break;
567 }
568 default:
569 {
570 for (auto& workload : m_WorkloadQueue)
571 {
572 // Add workload to the post-optimisation network structure
573 AddWorkloadStructure(timelineUtils, workload, *layer);
574 }
575 break;
576 }
577 }
578 }
579 // Commit to send the post-optimisation network structure
580 timelineUtils->Commit();
581}
582
Cathal Corbett5aa9fd72022-02-25 15:33:28 +0000583ProfilingGuid LoadedNetwork::GetNetworkGuid()
Jim Flynnf7713212020-07-14 09:50:59 +0100584{
585 return m_OptimizedNetwork->GetGuid();
586}
587
telsoa014fcda012018-03-09 14:13:49 +0000588TensorInfo LoadedNetwork::GetInputTensorInfo(LayerBindingId layerId) const
589{
Francis Murtagh3d2b4b22021-02-15 18:23:17 +0000590 for (auto&& inputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetInputLayers())
telsoa014fcda012018-03-09 14:13:49 +0000591 {
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100592 ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot");
telsoa014fcda012018-03-09 14:13:49 +0000593 if (inputLayer->GetBindingId() == layerId)
594 {
595 return inputLayer->GetOutputSlot(0).GetTensorInfo();
596 }
597 }
598
Colm Donelan5b5c2222020-09-09 12:48:16 +0100599 throw InvalidArgumentException(fmt::format("No input layer is associated with id {}", layerId));
telsoa014fcda012018-03-09 14:13:49 +0000600}
601
602TensorInfo LoadedNetwork::GetOutputTensorInfo(LayerBindingId layerId) const
603{
Francis Murtagh3d2b4b22021-02-15 18:23:17 +0000604 for (auto&& outputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetOutputLayers())
telsoa014fcda012018-03-09 14:13:49 +0000605 {
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100606 ARMNN_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1, "Output layer should have exactly 1 input slot");
607 ARMNN_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(), "Input slot on Output layer must be connected");
telsoa014fcda012018-03-09 14:13:49 +0000608 if (outputLayer->GetBindingId() == layerId)
609 {
610 return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo();
611 }
612 }
613
Colm Donelan5b5c2222020-09-09 12:48:16 +0100614 throw InvalidArgumentException(fmt::format("No output layer is associated with id {}", layerId));
telsoa014fcda012018-03-09 14:13:49 +0000615}
616
surmeh013537c2c2018-05-18 16:31:43 +0100617const IWorkloadFactory& LoadedNetwork::GetWorkloadFactory(const Layer& layer) const
telsoa014fcda012018-03-09 14:13:49 +0000618{
surmeh013537c2c2018-05-18 16:31:43 +0100619 const IWorkloadFactory* workloadFactory = nullptr;
telsoa014fcda012018-03-09 14:13:49 +0000620
David Beck29c75de2018-10-23 13:35:58 +0100621 auto it = m_WorkloadFactories.find(layer.GetBackendId());
622 if (it == m_WorkloadFactories.end())
telsoa014fcda012018-03-09 14:13:49 +0000623 {
Colm Donelan5b5c2222020-09-09 12:48:16 +0100624 throw RuntimeException(fmt::format("No workload factory for {0} to be used for layer: {1}",
625 layer.GetBackendId().Get(),
626 layer.GetNameStr()),
627 CHECK_LOCATION());
David Beck33f0ae02018-10-18 15:13:56 +0100628 }
David Beck29c75de2018-10-23 13:35:58 +0100629
Finn Williamsb1aad422021-10-28 19:07:32 +0100630 workloadFactory = it->second.get();
telsoa014fcda012018-03-09 14:13:49 +0000631
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100632 ARMNN_ASSERT_MSG(workloadFactory, "No workload factory");
telsoa014fcda012018-03-09 14:13:49 +0000633
634 std::string reasonIfUnsupported;
Sadik Armagan04a72972020-09-14 15:44:18 +0100635 ARMNN_ASSERT_MSG(IWorkloadFactory::IsLayerSupported(layer,
636 {},
637 reasonIfUnsupported,
Francis Murtagh3d2b4b22021-02-15 18:23:17 +0000638 m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions()),
David Beck29c75de2018-10-23 13:35:58 +0100639 "Factory does not support layer");
Jan Eilers8eb25602020-03-09 12:13:48 +0000640 IgnoreUnused(reasonIfUnsupported);
surmeh013537c2c2018-05-18 16:31:43 +0100641 return *workloadFactory;
telsoa014fcda012018-03-09 14:13:49 +0000642}
643
644namespace {
645
646// Non-copyable class owning accelerator-specific tensor data.
647class TensorPin
648{
649public:
650 TensorPin(std::unique_ptr<ITensorHandle> handle, const TensorInfo& info, LayerBindingId id)
651 : m_TensorHandle(std::move(handle))
652 , m_TensorInfo(info)
653 , m_Id(id)
654 {
655 }
656
657 ITensorHandle* GetTensorHandle() const { return m_TensorHandle.get(); }
658 const TensorInfo& GetTensorInfo() const { return m_TensorInfo; }
659 LayerBindingId GetBindingId() const { return m_Id; }
660
661private:
662 std::unique_ptr<ITensorHandle> m_TensorHandle;
663 TensorInfo m_TensorInfo;
664 LayerBindingId m_Id;
665};
666
667static const TensorPin& GetTensorPin(LayerBindingId id,
668 const std::vector<TensorPin>& pins,
669 char const* bindingPointDesc)
670{
671 auto it = std::find_if(pins.begin(), pins.end(),
672 [id](const TensorPin& pin)
673 {
674 return pin.GetBindingId() == id;
675 });
676
677 if (it != pins.end())
678 {
679 return *it;
680 }
681 else
682 {
Colm Donelan5b5c2222020-09-09 12:48:16 +0100683 throw InvalidArgumentException(fmt::format("No tensor supplied for {0} {1}", bindingPointDesc, id));
telsoa014fcda012018-03-09 14:13:49 +0000684 }
685}
686
687// Stores data that needs to be kept accessible for the entire execution of a workload.
688class WorkloadData
689{
690public:
691 WorkloadData(const InputTensors& inputTensors, const OutputTensors& outputTensors)
692 {
693 m_InputTensorPins.reserve(inputTensors.size());
694 m_OutputTensorPins.reserve(outputTensors.size());
695
696 for (auto inputTensorPair : inputTensors)
697 {
698 auto inputTensor = inputTensorPair.second;
699
700 std::unique_ptr<ITensorHandle> tensorHandle =
James Conroy1f58f032021-04-27 17:13:27 +0100701 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
telsoa014fcda012018-03-09 14:13:49 +0000702 LayerBindingId layerId = inputTensorPair.first;
703
704 m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
705 }
706
707 for (auto outputTensorPair : outputTensors)
708 {
709 auto outputTensor = outputTensorPair.second;
710
711 std::unique_ptr<ITensorHandle> tensorHandle =
James Conroy1f58f032021-04-27 17:13:27 +0100712 std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
telsoa014fcda012018-03-09 14:13:49 +0000713 LayerBindingId layerId = outputTensorPair.first;
714
715 m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
716 }
717 }
718
719 const TensorPin& GetInputTensorPin(LayerBindingId id) const
720 {
721 return GetTensorPin(id, m_InputTensorPins, "input");
722 }
723
724 const TensorPin& GetOutputTensorPin(LayerBindingId id) const
725 {
726 return GetTensorPin(id, m_OutputTensorPins, "output");
727 }
728
729private:
730
731 std::vector<TensorPin> m_InputTensorPins;
732 std::vector<TensorPin> m_OutputTensorPins;
733};
734
735}
736
737Status LoadedNetwork::EnqueueWorkload(const InputTensors& inputTensors,
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000738 const OutputTensors& outputTensors,
739 std::vector<ImportedInputId> preImportedInputIds,
740 std::vector<ImportedOutputId> preImportedOutputIds)
telsoa014fcda012018-03-09 14:13:49 +0000741{
Francis Murtagh3d2b4b22021-02-15 18:23:17 +0000742 const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
telsoa014fcda012018-03-09 14:13:49 +0000743
telsoa01c577f2c2018-08-31 09:22:23 +0100744 // Walk graph to determine the order of execution.
telsoa014fcda012018-03-09 14:13:49 +0000745 if (graph.GetNumLayers() < 2)
746 {
Derek Lamberti08446972019-11-26 16:38:31 +0000747 ARMNN_LOG(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph";
telsoa014fcda012018-03-09 14:13:49 +0000748 return Status::Failure;
749 }
750
telsoa01c577f2c2018-08-31 09:22:23 +0100751 // Data that must be kept alive for the entire execution of the workload.
telsoa014fcda012018-03-09 14:13:49 +0000752 WorkloadData workloadData(inputTensors, outputTensors);
753
754 if (graph.GetNumInputs() != inputTensors.size())
755 {
756 throw InvalidArgumentException("Number of inputs provided does not match network.");
757 }
758
telsoa01c577f2c2018-08-31 09:22:23 +0100759 // For each input to the network, call EnqueueInput with the data passed by the user.
telsoa014fcda012018-03-09 14:13:49 +0000760 {
Derek Lambertia08d29b2020-06-19 14:33:05 +0100761 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareInputs");
762 m_InputQueue.clear();
763 m_InputQueue.reserve(graph.GetNumInputs());
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000764
Finn Williams73c547d2022-02-15 20:47:34 +0000765 if (preImportedInputIds.size() > graph.GetNumInputs())
766 {
767 throw InvalidArgumentException("Invalid number of preImportedInputIds");
768 }
769
770 unsigned int inputIndex = 0;
771 unsigned int importedInputIdIndex = 0;
772 std::sort(preImportedInputIds.begin(), preImportedInputIds.end());
Derek Lambertia08d29b2020-06-19 14:33:05 +0100773 for (const BindableLayer* inputLayer : graph.GetInputLayers())
774 {
Finn Williams73c547d2022-02-15 20:47:34 +0000775 if (importedInputIdIndex < preImportedInputIds.size() &&
776 inputIndex == preImportedInputIds[importedInputIdIndex])
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000777 {
Finn Williams73c547d2022-02-15 20:47:34 +0000778 // Only replace tensorhandles if they have not already been replaced
779 if (!m_IsInputImported[inputIndex])
780 {
781 auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000782
Finn Williams73c547d2022-02-15 20:47:34 +0000783 for (const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
784 {
785 auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
786 workload->ReplaceInputTensorHandle(outputTensorHandle, workloadInfo.m_SlotIndex);
787 }
788 m_IsInputImported[inputIndex] = true;
789 }
790 importedInputIdIndex++;
791 }
792 else
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000793 {
Finn Williams73c547d2022-02-15 20:47:34 +0000794 if (m_IsInputImported[inputIndex])
795 {
796 OutputHandler& handler = const_cast<OutputHandler&>(inputLayer->GetOutputHandler(0));
797
798 for (const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
799 {
800 auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
801 workload->ReplaceInputTensorHandle(handler.GetData(), workloadInfo.m_SlotIndex);
802 }
803
804 m_IsInputImported[inputIndex] = false;
805 }
806
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000807 // InputTensorHandle is not imported yet, process to enqueue input
808 const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
809 EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
810 }
Finn Williams73c547d2022-02-15 20:47:34 +0000811 inputIndex++;
Derek Lambertia08d29b2020-06-19 14:33:05 +0100812 }
telsoa014fcda012018-03-09 14:13:49 +0000813 }
telsoa01c577f2c2018-08-31 09:22:23 +0100814 // For each output to the network, call EnqueueOutput with the data passed by the user.
telsoa014fcda012018-03-09 14:13:49 +0000815 {
Derek Lambertia08d29b2020-06-19 14:33:05 +0100816 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareOutputs");
817 m_OutputQueue.clear();
818 m_OutputQueue.reserve(graph.GetNumOutputs());
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000819
Finn Williams73c547d2022-02-15 20:47:34 +0000820 if (preImportedOutputIds.size() > graph.GetNumOutputs())
821 {
822 throw InvalidArgumentException("Invalid number of preImportedOutputIds");
823 }
824
825 unsigned int outputIndex = 0;
826 unsigned int importedOutputIdIndex = 0;
827 std::sort(preImportedOutputIds.begin(), preImportedOutputIds.end());
Derek Lambertia08d29b2020-06-19 14:33:05 +0100828 for (const BindableLayer* outputLayer : graph.GetOutputLayers())
829 {
Finn Williams73c547d2022-02-15 20:47:34 +0000830 if (importedOutputIdIndex < preImportedOutputIds.size() &&
831 outputIndex == preImportedOutputIds[importedOutputIdIndex])
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000832 {
Finn Williams73c547d2022-02-15 20:47:34 +0000833 // Only replace tensorhandles if they have not already been replaced
834 ITensorHandle* inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000835
Finn Williams73c547d2022-02-15 20:47:34 +0000836 if (!m_IsOutputImported[outputIndex])
837 {
838 const auto bindingId = outputLayer->GetBindingId();
839 const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000840
Finn Williams73c547d2022-02-15 20:47:34 +0000841 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000842
Finn Williams73c547d2022-02-15 20:47:34 +0000843 outputWorkload->ReplaceOutputTensorHandle(inputTensorHandle,
844 indices.m_OutputSlotIndices.m_SlotIndex);
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000845
Finn Williams73c547d2022-02-15 20:47:34 +0000846 for (const auto& workloadInfo: indices.m_InputSlotIndices)
847 {
848 auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
849 inputWorkload->ReplaceInputTensorHandle(inputTensorHandle, workloadInfo.m_SlotIndex);
850 }
851 m_IsOutputImported[outputIndex] = true;
852 }
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000853
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000854 ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
855 MemSyncQueueDescriptor syncDesc;
856 syncDesc.m_Inputs.push_back(inputTensorHandle);
Finn Williams73c547d2022-02-15 20:47:34 +0000857 WorkloadInfo info;
858 info.m_InputTensorInfos.push_back(
859 outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo());
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000860 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
861 ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created");
862 m_OutputQueue.push_back(move(syncWorkload));
Finn Williams73c547d2022-02-15 20:47:34 +0000863 importedOutputIdIndex++;
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000864 }
Finn Williams73c547d2022-02-15 20:47:34 +0000865 else
866 {
867 if (m_IsOutputImported[outputIndex])
868 {
869 const auto bindingId = outputLayer->GetBindingId();
870 const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
871
872 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
873 const OutputHandler& outputHandler =
874 outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOutputHandler();
875
876 outputWorkload->ReplaceOutputTensorHandle(
877 outputHandler.GetData(), indices.m_OutputSlotIndices.m_SlotIndex);
878
879 for (const auto& workloadInfo: indices.m_InputSlotIndices)
880 {
881 auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
882 inputWorkload->ReplaceInputTensorHandle(outputHandler.GetData(), workloadInfo.m_SlotIndex);
883 }
884 m_IsOutputImported[outputIndex] = false;
885 }
886
887 const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
888 // OutputTensorHandle is not imported yet, process to enqueue Output
889 EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
890 }
891 outputIndex++;
Derek Lambertia08d29b2020-06-19 14:33:05 +0100892 }
telsoa014fcda012018-03-09 14:13:49 +0000893 }
894
Sadik Armagan3184c902020-03-18 10:57:30 +0000895 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
896 TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
897 ProfilingGuid inferenceGuid = m_ProfilingService.GetNextGuid();
David Monahan6198fe02019-12-02 08:35:43 +0000898 if (timelineUtils)
899 {
900 // Add inference timeline trace if profiling is enabled.
901 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
902 timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
Jim Flynn6398a982020-05-27 17:05:21 +0100903 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
904 networkGuid,
905 inferenceGuid,
906 LabelsAndEventClasses::EXECUTION_OF_GUID);
David Monahan6198fe02019-12-02 08:35:43 +0000907 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
908 }
909
telsoa014fcda012018-03-09 14:13:49 +0000910 bool executionSucceeded = true;
911
912 {
Sadik Armagan3184c902020-03-18 10:57:30 +0000913 if (m_ProfilingService.IsProfilingEnabled())
Keith Davise394bd92019-12-02 15:12:19 +0000914 {
Cathal Corbett5aa9fd72022-02-25 15:33:28 +0000915 m_ProfilingService.IncrementCounterValue(INFERENCES_RUN);
Keith Davise394bd92019-12-02 15:12:19 +0000916 }
telsoa014fcda012018-03-09 14:13:49 +0000917 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Execute");
surmeh013537c2c2018-05-18 16:31:43 +0100918 ARMNN_SCOPED_HEAP_PROFILING("Executing");
David Monahan6198fe02019-12-02 08:35:43 +0000919 executionSucceeded = Execute(timelineUtils, inferenceGuid);
telsoa014fcda012018-03-09 14:13:49 +0000920 }
921
David Monahan6198fe02019-12-02 08:35:43 +0000922 if (timelineUtils)
923 {
924 // Add end of life of the inference timeline if profiling is enabled.
925 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
926 timelineUtils->Commit();
927 }
Finn Williams73c547d2022-02-15 20:47:34 +0000928
telsoa014fcda012018-03-09 14:13:49 +0000929 return executionSucceeded ? Status::Success : Status::Failure;
930}
931
surmeh013537c2c2018-05-18 16:31:43 +0100932void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
telsoa014fcda012018-03-09 14:13:49 +0000933{
934 if (layer.GetType() != LayerType::Input)
935 {
936 throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer");
937 }
938
939 if (tensorHandle == nullptr)
940 {
941 throw InvalidArgumentException("EnqueueInput: tensorHandle must not be NULL");
942 }
943
944 InputQueueDescriptor inputQueueDescriptor;
945 WorkloadInfo info;
946
947 inputQueueDescriptor.m_Inputs.push_back(tensorHandle);
948 info.m_InputTensorInfos.push_back(tensorInfo);
949
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100950 ARMNN_ASSERT_MSG(layer.GetNumOutputSlots() == 1, "Can only handle Input Layer with one output");
telsoa014fcda012018-03-09 14:13:49 +0000951 const OutputHandler& handler = layer.GetOutputHandler();
952 const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
953 ITensorHandle* outputTensorHandle = handler.GetData();
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100954 ARMNN_ASSERT_MSG(outputTensorHandle != nullptr,
telsoa014fcda012018-03-09 14:13:49 +0000955 "Data should have been allocated.");
956 inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
957 info.m_OutputTensorInfos.push_back(outputTensorInfo);
958
Derek Lambertif674aa02019-08-01 15:56:25 +0100959 MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000960 bool needMemCopy = true;
Mike Kelly55a8ffd2021-04-07 20:10:49 +0100961 if (m_NetworkProperties.m_ImportEnabled) // Try import the input tensor
Derek Lambertif674aa02019-08-01 15:56:25 +0100962 {
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100963 if(CheckFlag(importFlags, m_NetworkProperties.m_InputSource))
Derek Lambertif674aa02019-08-01 15:56:25 +0100964 {
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000965 needMemCopy = false;
Ferran Balaguer83239f92019-09-19 11:49:25 +0100966 // This assumes a CPU Tensor handle
967 void* mem = tensorHandle->Map(false);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100968 if (outputTensorHandle->Import(mem, m_NetworkProperties.m_InputSource))
Ferran Balaguer83239f92019-09-19 11:49:25 +0100969 {
970 tensorHandle->Unmap();
971 return; // No need for a workload since the import has been done.
972 }
Derek Lambertif674aa02019-08-01 15:56:25 +0100973 tensorHandle->Unmap();
Ferran Balaguer83239f92019-09-19 11:49:25 +0100974 throw MemoryImportException("EnqueueInput: Memory Import failed");
Derek Lambertif674aa02019-08-01 15:56:25 +0100975 }
Derek Lambertif674aa02019-08-01 15:56:25 +0100976 }
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000977 if (needMemCopy)
David Monahan4f1e8e42019-09-04 09:22:10 +0100978 {
979 // Create a mem copy workload for input since we did not import
Narumol Prangnawarataa68e012019-11-29 17:17:43 +0000980 std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
Derek Lambertif674aa02019-08-01 15:56:25 +0100981
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100982 ARMNN_ASSERT_MSG(inputWorkload, "No input workload created");
Narumol Prangnawarataa68e012019-11-29 17:17:43 +0000983
Sadik Armagan3184c902020-03-18 10:57:30 +0000984 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
985 TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
Narumol Prangnawarataa68e012019-11-29 17:17:43 +0000986 if (timelineUtils)
987 {
988 // Add Input Workload to the post-optimisation network structure
989 AddWorkloadStructure(timelineUtils, inputWorkload, layer);
990 timelineUtils->Commit();
991 }
992
David Monahan4f1e8e42019-09-04 09:22:10 +0100993 m_InputQueue.push_back(move(inputWorkload));
994 }
telsoa014fcda012018-03-09 14:13:49 +0000995}
996
surmeh013537c2c2018-05-18 16:31:43 +0100997void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
telsoa014fcda012018-03-09 14:13:49 +0000998{
999 if (layer.GetType() != LayerType::Output)
1000 {
1001 throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer");
1002 }
1003
1004 if (tensorHandle == nullptr)
1005 {
1006 throw InvalidArgumentException("EnqueueOutput: tensorHandle must not be NULL");
1007 }
1008
1009 OutputQueueDescriptor outputQueueDescriptor;
1010 WorkloadInfo info;
1011
1012 outputQueueDescriptor.m_Outputs.push_back(tensorHandle);
1013 info.m_OutputTensorInfos.push_back(tensorInfo);
1014
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +01001015 ARMNN_ASSERT_MSG(layer.GetNumInputSlots() == 1, "Output Layer should have exactly one input.");
telsoa014fcda012018-03-09 14:13:49 +00001016
telsoa01c577f2c2018-08-31 09:22:23 +01001017 // Gets the output handler from the previous node.
telsoa014fcda012018-03-09 14:13:49 +00001018 const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
1019
1020 const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
1021 ITensorHandle* inputTensorHandle = outputHandler.GetData();
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +01001022 ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
telsoa014fcda012018-03-09 14:13:49 +00001023
Derek Lambertif674aa02019-08-01 15:56:25 +01001024 // Try import the output tensor.
1025 // Note: We can only import the output pointer if all of the following hold true:
1026 // a) The imported pointer is aligned sufficiently
1027 // b) The tensor has zero padding
1028 // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
1029 // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
Ferran Balaguer83239f92019-09-19 11:49:25 +01001030 // e) m_IsExportEnabled must be set to true
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +00001031 bool needMemCopy = true;
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001032 if (m_NetworkProperties.m_ExportEnabled &&
1033 (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
Derek Lambertif674aa02019-08-01 15:56:25 +01001034 {
Ferran Balaguer83239f92019-09-19 11:49:25 +01001035 if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
Derek Lambertif674aa02019-08-01 15:56:25 +01001036 {
Ferran Balaguerbfeb2712019-08-07 15:14:56 +01001037 MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +01001038 if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
Derek Lambertif674aa02019-08-01 15:56:25 +01001039 {
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +00001040 needMemCopy = false;
Ferran Balaguerbfeb2712019-08-07 15:14:56 +01001041 void *mem = tensorHandle->Map(false);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +01001042 bool importOk = inputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource);
Ferran Balaguerbfeb2712019-08-07 15:14:56 +01001043 tensorHandle->Unmap();
Derek Lambertif674aa02019-08-01 15:56:25 +01001044
Ferran Balaguerbfeb2712019-08-07 15:14:56 +01001045 if (importOk)
1046 {
1047 // Insert synchronization workload
1048 MemSyncQueueDescriptor syncDesc;
1049 syncDesc.m_Inputs.push_back(inputTensorHandle);
1050 info.m_InputTensorInfos.push_back(inputTensorInfo);
1051 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +01001052 ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created");
Ferran Balaguerbfeb2712019-08-07 15:14:56 +01001053 m_OutputQueue.push_back(move(syncWorkload));
Ferran Balaguerbfeb2712019-08-07 15:14:56 +01001054 }
David Monahan4f1e8e42019-09-04 09:22:10 +01001055 else
1056 {
1057 throw MemoryExportException("EnqueueOutput: Memory Export failed");
1058 }
Derek Lambertif674aa02019-08-01 15:56:25 +01001059 }
1060 }
1061 }
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +00001062 if (needMemCopy)
David Monahan4f1e8e42019-09-04 09:22:10 +01001063 {
Sadik Armagan23969e82020-11-18 14:17:04 +00001064 // If we got here then we didn't export the memory, so add an output workload which performs a memcopy.
1065 outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
1066 info.m_InputTensorInfos.push_back(inputTensorInfo);
1067
1068 std::unique_ptr<IWorkload> outputWorkload =
1069 std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
1070 ARMNN_ASSERT_MSG(outputWorkload, "No output workload created");
1071
1072 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1073 TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
1074 if (timelineUtils)
Narumol Prangnawarataa68e012019-11-29 17:17:43 +00001075 {
Sadik Armagan23969e82020-11-18 14:17:04 +00001076 // Add Output Workload to the post-optimisation network structure
1077 AddWorkloadStructure(timelineUtils, outputWorkload, layer);
1078 timelineUtils->Commit();
Sadik Armagan890bf652020-09-29 15:12:36 +01001079 }
Sadik Armagan23969e82020-11-18 14:17:04 +00001080
1081 m_OutputQueue.push_back(move(outputWorkload));
David Monahan4f1e8e42019-09-04 09:22:10 +01001082 }
Derek Lamberti03614f62018-10-02 15:52:46 +01001083}
1084
Derek Lambertia08d29b2020-06-19 14:33:05 +01001085void LoadedNetwork::AllocateWorkingMemory(std::lock_guard<std::mutex>& lock)
Derek Lamberti03614f62018-10-02 15:52:46 +01001086{
Derek Lambertia08d29b2020-06-19 14:33:05 +01001087 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Working Memory Allocation");
1088
1089 // this unused parameter makes sure we can only call this function with a valid lock
1090 IgnoreUnused(lock);
1091
Derek Lamberti03614f62018-10-02 15:52:46 +01001092 if (m_IsWorkingMemAllocated)
1093 {
1094 return;
1095 }
Finn Williamsb1aad422021-10-28 19:07:32 +01001096
1097 if (m_ExternalMemoryManager)
David Beck29c75de2018-10-23 13:35:58 +01001098 {
Finn Williamsb1aad422021-10-28 19:07:32 +01001099 m_ExternalMemoryManager->Allocate();
1100
1101 for (unsigned int i = 0; i < m_TensorMemory.size(); ++i)
1102 {
1103 m_Tensorhandles[i]->Import(m_TensorMemory[i].first->m_Data, m_TensorMemory[i].second);
1104 }
1105 }
1106
1107 for (auto&& memoryManager : m_BackendMemoryMangers)
1108 {
Aron Virginas-Tar5caf9072018-11-14 18:35:18 +00001109 if (memoryManager)
1110 {
1111 memoryManager->Acquire();
1112 }
David Beck29c75de2018-10-23 13:35:58 +01001113 }
Narumol Prangnawarat11bd2612019-08-13 10:26:53 +01001114 m_TensorHandleFactoryRegistry.AquireMemory();
Derek Lamberti03614f62018-10-02 15:52:46 +01001115 m_IsWorkingMemAllocated = true;
1116}
1117
1118void LoadedNetwork::FreeWorkingMemory()
1119{
Matthew Bentham2a326b52019-03-19 10:11:01 +00001120 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
Finn Williamsb1aad422021-10-28 19:07:32 +01001121
Derek Lamberti03614f62018-10-02 15:52:46 +01001122 if (!m_IsWorkingMemAllocated)
1123 {
1124 return;
1125 }
Finn Williamsb1aad422021-10-28 19:07:32 +01001126
1127 if (m_ExternalMemoryManager)
David Beck29c75de2018-10-23 13:35:58 +01001128 {
Finn Williamsb1aad422021-10-28 19:07:32 +01001129 m_ExternalMemoryManager->Deallocate();
1130 }
1131
1132 // Informs the memory managers to release memory in its respective memory group
1133 for (auto&& memoryManager : m_BackendMemoryMangers)
1134 {
Aron Virginas-Tar5caf9072018-11-14 18:35:18 +00001135 if (memoryManager)
1136 {
1137 memoryManager->Release();
1138 }
David Beck29c75de2018-10-23 13:35:58 +01001139 }
Narumol Prangnawarat11bd2612019-08-13 10:26:53 +01001140 m_TensorHandleFactoryRegistry.ReleaseMemory();
Derek Lamberti03614f62018-10-02 15:52:46 +01001141 m_IsWorkingMemAllocated = false;
telsoa014fcda012018-03-09 14:13:49 +00001142}
1143
David Monahan6198fe02019-12-02 08:35:43 +00001144bool LoadedNetwork::Execute(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
Cathal Corbett5aa9fd72022-02-25 15:33:28 +00001145 ProfilingGuid inferenceGuid)
telsoa014fcda012018-03-09 14:13:49 +00001146{
1147 bool success = true;
1148
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +01001149 auto Fail = [&](const std::exception& error)
1150 {
Derek Lamberti08446972019-11-26 16:38:31 +00001151 ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +01001152 success = false;
1153 };
1154
telsoa014fcda012018-03-09 14:13:49 +00001155 try
1156 {
Matthew Bentham2a326b52019-03-19 10:11:01 +00001157 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
Derek Lambertia08d29b2020-06-19 14:33:05 +01001158 AllocateWorkingMemory(lockGuard);
Derek Lamberti03614f62018-10-02 15:52:46 +01001159
David Monahan6198fe02019-12-02 08:35:43 +00001160 ProfilingDynamicGuid workloadInferenceID(0);
Derek Lambertia08d29b2020-06-19 14:33:05 +01001161 auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](WorkloadQueue& queue)
telsoa014fcda012018-03-09 14:13:49 +00001162 {
Derek Lambertia08d29b2020-06-19 14:33:05 +01001163 for (auto& workload : queue)
David Monahan6198fe02019-12-02 08:35:43 +00001164 {
Derek Lambertia08d29b2020-06-19 14:33:05 +01001165 if(timelineUtils)
1166 {
1167 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1168 inferenceGuid);
1169 }
1170 workload->Execute();
1171 if(timelineUtils)
1172 {
1173 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1174 }
David Monahan6198fe02019-12-02 08:35:43 +00001175 }
Derek Lambertia08d29b2020-06-19 14:33:05 +01001176 };
Derek Lamberti03614f62018-10-02 15:52:46 +01001177
Derek Lambertia08d29b2020-06-19 14:33:05 +01001178 ExecuteQueue(m_InputQueue);
1179 ExecuteQueue(m_WorkloadQueue);
1180 ExecuteQueue(m_OutputQueue);
telsoa014fcda012018-03-09 14:13:49 +00001181 }
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +01001182 catch (const RuntimeException& error)
telsoa014fcda012018-03-09 14:13:49 +00001183 {
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +01001184 Fail(error);
telsoa014fcda012018-03-09 14:13:49 +00001185 }
telsoa014fcda012018-03-09 14:13:49 +00001186 catch (const std::runtime_error& error)
1187 {
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +01001188 Fail(error);
telsoa014fcda012018-03-09 14:13:49 +00001189 }
1190
telsoa014fcda012018-03-09 14:13:49 +00001191 return success;
1192}
1193
Finn Williams8636bc72021-10-02 15:06:39 +01001194void LoadedNetwork::EnqueueInput(const ConstTensor& inputTensor, ITensorHandle* inputTensorHandle)
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001195{
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001196 if (m_NetworkProperties.m_ImportEnabled) // Try import the input tensor
1197 {
Finn Williams8636bc72021-10-02 15:06:39 +01001198 MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +01001199 if (CheckFlag(importFlags, m_NetworkProperties.m_InputSource) )
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001200 {
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001201 std::unique_ptr<ITensorHandle> tensorHandle =
James Conroy1f58f032021-04-27 17:13:27 +01001202 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),
Finn Williamsf37b9702021-09-01 18:06:04 +01001203 inputTensor.GetMemoryArea());
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001204 void* mem = tensorHandle->Map(false);
Finn Williamsf37b9702021-09-01 18:06:04 +01001205
1206 if (inputTensorHandle->Import(mem, m_NetworkProperties.m_InputSource))
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001207 {
1208 tensorHandle->Unmap();
1209 return;
1210 }
1211 tensorHandle->Unmap();
1212 throw MemoryImportException("EnqueueInput: Memory Import failed");
1213 }
1214 else
1215 {
1216 throw MemoryImportException("EnqueueInput: Memory Import failed, backend does not support Import");
1217 }
1218 }
1219 else
1220 {
1221 std::unique_ptr<ITensorHandle> tensorHandle =
James Conroy1f58f032021-04-27 17:13:27 +01001222 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(), inputTensor.GetMemoryArea());
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001223
1224 auto copyFunc = [](void* dst, const void* src, size_t size)
1225 {
1226 memcpy(dst, src, size);
1227 };
1228
Finn Williamsf37b9702021-09-01 18:06:04 +01001229 CopyTensorContentsGeneric(tensorHandle.get(), inputTensorHandle, copyFunc);
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001230 }
1231}
1232
Finn Williams8636bc72021-10-02 15:06:39 +01001233// Note: We can only import the output pointer if all of the following hold true:
1234// a) The imported pointer is aligned sufficiently
1235// b) The tensor has zero padding
1236// c) There is only one connection to the OutputSlot and it is to an OutputLayer.
1237// d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
1238// e) m_IsExportEnabled must be set to true
1239void LoadedNetwork::ImportOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001240{
Finn Williams8636bc72021-10-02 15:06:39 +01001241 ARMNN_ASSERT_MSG(outputTensorHandle != nullptr, "Data should have been allocated.");
1242 MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
1243 if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001244 {
Finn Williams8636bc72021-10-02 15:06:39 +01001245 std::unique_ptr<ITensorHandle> tensorHandle =
1246 std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1247 outputTensor.GetMemoryArea());
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001248
Finn Williams8636bc72021-10-02 15:06:39 +01001249 void* mem = tensorHandle->Map(false);
1250 bool importOk = outputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource);
1251 tensorHandle->Unmap();
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001252
Finn Williams8636bc72021-10-02 15:06:39 +01001253 if (!importOk)
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001254 {
Finn Williams8636bc72021-10-02 15:06:39 +01001255 throw MemoryExportException("ImportOutputTensor: Memory Export failed");
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001256 }
1257 }
1258 else
1259 {
Finn Williams8636bc72021-10-02 15:06:39 +01001260 throw MemoryExportException("ImportOutputTensor: Memory Export failed, attempting to export Input Layer");
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001261 }
Finn Williams8636bc72021-10-02 15:06:39 +01001262
1263}
1264
1265void CopyToOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
1266{
1267 auto copyFunc = [](void* dst, const void* src, size_t size)
1268 {
1269 memcpy(dst, src, size);
1270 };
1271
1272 std::unique_ptr<ITensorHandle> tensorHandle =
1273 std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1274 outputTensor.GetMemoryArea());
1275
1276 CopyTensorContentsGeneric(outputTensorHandle, tensorHandle.get(), copyFunc);
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001277}
1278
Finn Williams01097942021-04-26 12:06:34 +01001279
1280const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors& inputTensors)
1281{
1282 for (auto inputTensorPair : inputTensors)
1283 {
1284 LayerBindingId id = inputTensorPair.first;
1285 if (id == layerId)
1286 {
1287 return inputTensorPair.second;
1288 }
1289 }
1290 throw InvalidArgumentException("Input does not exist.");
1291}
1292
1293const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors& outputTensors)
1294{
1295 for (auto outputTensorPair : outputTensors)
1296 {
1297 LayerBindingId id = outputTensorPair.first;
1298 if (id == layerId)
1299 {
1300 return outputTensorPair.second;
1301 }
1302 }
1303 throw InvalidArgumentException("Output does not exist.");
1304}
1305
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +00001306std::vector<ImportedInputId> LoadedNetwork::ImportInputs(const InputTensors& inputTensors,
1307 MemorySource forceImportMemorySource)
Finn Williamsf37b9702021-09-01 18:06:04 +01001308{
Finn Williams73c547d2022-02-15 20:47:34 +00001309 if (!m_NetworkProperties.m_AsyncEnabled)
Finn Williamsf37b9702021-09-01 18:06:04 +01001310 {
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +00001311 // Cannot import if import is not enabled and forceImportMemorySource is undefined
1312 if (forceImportMemorySource == MemorySource::Undefined)
1313 {
1314 throw MemoryImportException("ImportInputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1315 }
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +00001316 if (inputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumInputs())
1317 {
1318 throw MemoryImportException("ImportInputs: Force Import failed, incorrect number of tensors");
1319 }
1320
1321 std::vector<ImportedInputId> importedInputs;
1322 Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
Finn Williams73c547d2022-02-15 20:47:34 +00001323 unsigned int inputIndex = 0;
1324 for (const BindableLayer* inputLayer : graph.GetInputLayers())
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +00001325 {
Finn Williams73c547d2022-02-15 20:47:34 +00001326 auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
1327
1328 if (!outputTensorHandle)
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +00001329 {
Finn Williams73c547d2022-02-15 20:47:34 +00001330 inputIndex++;
1331 continue;
1332 }
1333
1334 auto layerBindingId = inputLayer->GetBindingId();
1335 auto it = std::find_if(inputTensors.begin(), inputTensors.end(), [=](const auto& inputTensor)
1336 {
1337 return inputTensor.first == layerBindingId;
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +00001338 });
1339
Finn Williams73c547d2022-02-15 20:47:34 +00001340 if (it == inputTensors.end())
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +00001341 {
Finn Williams73c547d2022-02-15 20:47:34 +00001342 inputIndex++;
1343 continue;
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +00001344 }
1345
Finn Williams73c547d2022-02-15 20:47:34 +00001346 const auto& inputTensor = *it;
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +00001347 std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1348 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1349 inputTensor.second.GetMemoryArea());
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +00001350
Finn Williams73c547d2022-02-15 20:47:34 +00001351 if (outputTensorHandle->CanBeImported(passThroughTensorHandle->Map(), forceImportMemorySource)
1352 && (outputTensorHandle->Import(passThroughTensorHandle->Map(), forceImportMemorySource)))
1353 {
1354 importedInputs.push_back(inputIndex);
1355 }
1356 passThroughTensorHandle->Unmap();
1357
1358 inputIndex++;
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +00001359 }
1360
1361 return importedInputs;
Finn Williamsf37b9702021-09-01 18:06:04 +01001362 }
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +00001363 else
Finn Williamsf37b9702021-09-01 18:06:04 +01001364 {
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +00001365 // Import when the import of network properties is enabled
1366 std::vector<ImportedInputId> importedInputs;
1367 Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
Finn Williamsf37b9702021-09-01 18:06:04 +01001368
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +00001369 for (auto inputTensor : inputTensors)
Finn Williamsf37b9702021-09-01 18:06:04 +01001370 {
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +00001371 auto layerBindingId = inputTensor.first;
1372 auto it = std::find_if(graph.GetInputLayers().begin(), graph.GetInputLayers().end(), [=](auto* layer)
1373 {
1374 return layer->GetBindingId() == layerBindingId;
1375 });
1376
1377 if (it == graph.GetInputLayers().end())
1378 {
1379 throw MemoryImportException(fmt::format(
1380 "ImportInputs: Memory Import failed, unknown LayerBindingId: {}", layerBindingId));
1381 }
1382
1383 const Layer* layer = *it;
1384 if (layer->GetType() != LayerType::Input)
1385 {
1386 throw InvalidArgumentException("ImportInputs: given layer not an InputLayer");
1387 }
1388
1389 auto& backend = m_Backends.at(layer->GetBackendId());
1390 if (!HasCapability(BackendOptions::BackendOption{"PreImportIOTensors", true}, backend->GetCapabilities()))
1391 {
1392 std::string er = backend->GetId();
1393 er += " does not have PreImportIOTensors capability";
1394 throw BackendCapabilityException(er);
1395 }
1396
1397 const OutputSlot& outputSlot = layer->GetOutputSlots()[0];
1398
1399 ITensorHandleFactory::FactoryId factoryId = outputSlot.GetTensorHandleFactoryId();
1400 const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
1401
1402 ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
1403 ARMNN_ASSERT(handleFactory);
1404
1405 ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1406 handleFactory->CreateTensorHandle(tensorInfo, false)};
1407
1408 ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1409
1410 if (!CheckFlag(tensorHandle->GetImportFlags(), m_NetworkProperties.m_InputSource))
1411 {
1412 throw MemoryImportException(
1413 fmt::format("ImportInputs: Memory Import failed, backend: "
1414 "{} does not support importing from source {}"
1415 , factoryId, m_NetworkProperties.m_InputSource));
1416 }
1417
1418 std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1419 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1420 inputTensor.second.GetMemoryArea());
1421
1422 if (tensorHandle->Import(passThroughTensorHandle->Map(), m_NetworkProperties.m_InputSource))
1423 {
1424 importedInputs.push_back(m_CurImportedInputId++);
1425 passThroughTensorHandle->Unmap();
1426 }
1427 else
1428 {
1429 passThroughTensorHandle->Unmap();
1430 throw MemoryImportException("ImportInputs: Memory Import failed");
1431 }
1432
1433 m_PreImportedInputHandles.push_back(std::move(importedTensorHandlePin));
Finn Williamsf37b9702021-09-01 18:06:04 +01001434 }
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +00001435 return importedInputs;
Finn Williamsf37b9702021-09-01 18:06:04 +01001436 }
Finn Williamsf37b9702021-09-01 18:06:04 +01001437}
1438
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +00001439std::vector<ImportedOutputId> LoadedNetwork::ImportOutputs(const OutputTensors& outputTensors,
1440 MemorySource forceImportMemorySource)
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001441{
Finn Williams73c547d2022-02-15 20:47:34 +00001442 if (!m_NetworkProperties.m_AsyncEnabled)
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001443 {
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +00001444 // Cannot import if import is not enabled and forceImportMemorySource is undefined
1445 if (forceImportMemorySource == MemorySource::Undefined)
1446 {
1447 throw MemoryImportException("ImportOutputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1448 }
1449 // If forceImportMemorySource is defined, try import if memory is aligned
1450 if (outputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumOutputs())
1451 {
1452 throw MemoryImportException("ImportOutputs: Force Import failed, incorrect number of tensors");
1453 }
1454 std::vector<ImportedInputId> importedOutputs;
1455 Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
Finn Williams73c547d2022-02-15 20:47:34 +00001456
1457 unsigned int outputIndex = 0;
1458 for (const BindableLayer* const outputLayer : graph.GetOutputLayers())
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +00001459 {
Finn Williams73c547d2022-02-15 20:47:34 +00001460 auto inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
1461
1462 if (!inputTensorHandle)
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +00001463 {
Finn Williams73c547d2022-02-15 20:47:34 +00001464 outputIndex++;
1465 continue;
1466 }
1467
1468 auto layerBindingId = outputLayer->GetBindingId();
1469 auto it = std::find_if(outputTensors.begin(), outputTensors.end(), [=] (const auto& outputTensor)
1470 {
1471 return outputTensor.first == layerBindingId;
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +00001472 });
1473
Finn Williams73c547d2022-02-15 20:47:34 +00001474 if (it == outputTensors.end())
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +00001475 {
Finn Williams73c547d2022-02-15 20:47:34 +00001476 outputIndex++;
1477 continue;
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +00001478 }
1479
Finn Williams73c547d2022-02-15 20:47:34 +00001480 const auto outputTensor = *it;
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +00001481 // Check if the output memory can be imported
Finn Williams73c547d2022-02-15 20:47:34 +00001482 if (inputTensorHandle->CanBeImported(outputTensor.second.GetMemoryArea(), forceImportMemorySource)
1483 && inputTensorHandle->Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +00001484 {
Finn Williams73c547d2022-02-15 20:47:34 +00001485 importedOutputs.push_back(outputIndex);
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +00001486 }
Finn Williams73c547d2022-02-15 20:47:34 +00001487 outputIndex++;
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +00001488 }
1489 return importedOutputs;
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001490 }
1491
Finn Williams8636bc72021-10-02 15:06:39 +01001492 std::vector<ImportedOutputId> importedOutputs;
1493 Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1494
1495 for (const auto& outputTensor : outputTensors)
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001496 {
Finn Williams8636bc72021-10-02 15:06:39 +01001497 auto layerBindingId = outputTensor.first;
1498 auto it = std::find_if(graph.GetOutputLayers().begin(), graph.GetOutputLayers().end(), [=](auto* layer)
Finn Williamsf37b9702021-09-01 18:06:04 +01001499 {
Finn Williams8636bc72021-10-02 15:06:39 +01001500 return layer->GetBindingId() == layerBindingId;
1501 });
1502
1503 if (it == graph.GetOutputLayers().end())
1504 {
1505 throw MemoryImportException(fmt::format("ImportOutputs: Memory Import failed, unknown LayerBindingId: {}",
1506 layerBindingId));
1507 }
1508
1509 const Layer* layer = *it;
1510 if (layer->GetType() != LayerType::Output)
1511 {
1512 throw InvalidArgumentException("ImportOutputs: given layer not an OutputLayer");
1513 }
1514
1515 auto& backend = m_Backends.at(layer->GetBackendId());
1516 if (!HasCapability(BackendOptions::BackendOption{"PreImportIOTensors", true}, backend->GetCapabilities()))
1517 {
1518 std::string er = backend->GetId();
1519 er += " does not have PreImportIOTensors capability";
1520 throw BackendCapabilityException(er);
1521 }
1522
1523 const InputSlot& inputSlot = layer->GetInputSlots()[0];
1524 ITensorHandleFactory::FactoryId factoryId = inputSlot.GetConnectedOutputSlot()->GetTensorHandleFactoryId();
1525 const TensorInfo& tensorInfo = inputSlot.GetConnectedOutputSlot()->GetTensorInfo();
1526
1527 ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
1528 ARMNN_ASSERT(handleFactory);
1529
1530 ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1531 handleFactory->CreateTensorHandle(tensorInfo, false)};
1532
1533 ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1534
1535 if (!CheckFlag(tensorHandle->GetImportFlags(), m_NetworkProperties.m_OutputSource))
1536 {
1537 throw MemoryImportException(fmt::format("ImportInputs: Memory Import failed, backend: "
1538 "{} does not support importing from source {}"
1539 , factoryId, m_NetworkProperties.m_OutputSource));
1540 }
1541
1542 if (tensorHandle->Import(outputTensor.second.GetMemoryArea(), m_NetworkProperties.m_OutputSource))
1543 {
1544 importedOutputs.push_back(m_CurImportedOutputId++);
Finn Williamsf37b9702021-09-01 18:06:04 +01001545 }
1546 else
1547 {
Finn Williams8636bc72021-10-02 15:06:39 +01001548 throw MemoryImportException("ImportInputs: Memory Import failed");
1549 }
1550
1551 m_PreImportedOutputHandles.push_back(std::move(importedTensorHandlePin));
1552 }
1553
1554 return importedOutputs;
1555}
1556
1557void LoadedNetwork::ClearImportedInputs(const std::vector<ImportedInputId> inputIds)
1558{
1559 for (auto id : inputIds)
1560 {
1561 if (id > m_PreImportedInputHandles.size())
1562 {
1563 throw InvalidArgumentException(fmt::format("ClearImportedInputs::Unknown ImportedInputId: {}", id));
1564 }
1565
1566 auto& importedTensorHandle = m_PreImportedInputHandles[id].m_TensorHandle;
1567 if (!importedTensorHandle)
1568 {
1569 throw InvalidArgumentException(
1570 fmt::format("ClearImportedInputs::ImportedInput with id: {} has already been deleted", id));
1571 }
1572 // Call Unimport then destroy the tensorHandle
1573 importedTensorHandle->Unimport();
1574 importedTensorHandle = {};
1575 }
1576}
1577
1578void LoadedNetwork::ClearImportedOutputs(const std::vector<ImportedOutputId> outputIds)
1579{
1580 for (auto id : outputIds)
1581 {
1582 if (id > m_PreImportedOutputHandles.size())
1583 {
1584 throw InvalidArgumentException(fmt::format("ClearImportedOutputs::Unknown ImportedOutputId: {}", id));
1585 }
1586
1587 auto& importedTensorHandle = m_PreImportedOutputHandles[id].m_TensorHandle;
1588 if (!importedTensorHandle)
1589 {
1590 throw InvalidArgumentException(
1591 fmt::format("ClearImportedOutputs::ImportedOutput with id: {} has already been deleted", id));
1592 }
1593 // Call Unimport then destroy the tensorHandle
1594 importedTensorHandle->Unimport();
1595 importedTensorHandle = {};
1596 }
1597}
1598
1599Status LoadedNetwork::Execute(const InputTensors& inputTensors,
1600 const OutputTensors& outputTensors,
1601 IWorkingMemHandle& iWorkingMemHandle,
1602 std::vector<ImportedInputId> preImportedInputs,
1603 std::vector<ImportedOutputId> preImportedOutputs)
1604{
1605 const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1606
1607 if (inputTensors.size() + preImportedInputs.size() != graph.GetNumInputs())
1608 {
1609 if (preImportedInputs.empty())
1610 {
1611 throw InvalidArgumentException("LoadedNetwork::Execute: Number of inputs provided does not match network.");
1612 }
1613 else
1614 {
1615 throw InvalidArgumentException("LoadedNetwork::Execute: "
1616 "Number of inputs + preImportedInputs provided does not match network.");
1617 }
1618 }
1619
1620 if (outputTensors.size() + preImportedOutputs.size() != graph.GetNumOutputs())
1621 {
1622 if (preImportedOutputs.empty())
1623 {
1624 throw InvalidArgumentException("LoadedNetwork::Execute: "
1625 "Number of outputs provided does not match network.");
1626 }
1627 else
1628 {
1629 throw InvalidArgumentException("LoadedNetwork::Execute: "
1630 "Number of outputs + preImportedOutputs provided does not match network.");
Finn Williamsf37b9702021-09-01 18:06:04 +01001631 }
1632 }
1633
1634 WorkingMemHandle& workingMemHandle = dynamic_cast<WorkingMemHandle&>(iWorkingMemHandle);
Finn Williams8636bc72021-10-02 15:06:39 +01001635 // Collect all the given LayerBindingIds and check them for duplicates and unknowns.
1636 std::vector<LayerBindingId>& bindingIds = workingMemHandle.GetBindingIdVector();
1637 unsigned int index = 0;
Finn Williamsf37b9702021-09-01 18:06:04 +01001638 for (auto pair : inputTensors)
1639 {
Finn Williams8636bc72021-10-02 15:06:39 +01001640 bindingIds[index++] = pair.first;
1641 }
1642 for (ImportedInputId id : preImportedInputs)
1643 {
1644 bindingIds[index++] = ValidateImportedInputID(id);
1645 }
1646 for (auto pair : outputTensors)
1647 {
1648 bindingIds[index++] = pair.first;
1649 }
1650 for (ImportedOutputId id : preImportedOutputs)
1651 {
1652 bindingIds[index++] = ValidateImportedOutputID(id);
Finn Williamsf37b9702021-09-01 18:06:04 +01001653 }
1654
Finn Williams8636bc72021-10-02 15:06:39 +01001655 workingMemHandle.ValidateBindingIds();
1656
1657 auto resetMemHandle = [&]()
Finn Williamsf37b9702021-09-01 18:06:04 +01001658 {
Finn Williams8636bc72021-10-02 15:06:39 +01001659 for (ImportedInputId id: preImportedInputs)
Finn Williamsf37b9702021-09-01 18:06:04 +01001660 {
1661 const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1662
Finn Williams8636bc72021-10-02 15:06:39 +01001663 auto inputHandle = workingMemHandle.GetInputHandle(layerBindingId);
1664 auto inputConnections = workingMemHandle.GetInputConnections(layerBindingId);
1665 for (auto it : inputConnections)
Finn Williamsf37b9702021-09-01 18:06:04 +01001666 {
Finn Williams8636bc72021-10-02 15:06:39 +01001667 *it = inputHandle;
Finn Williamsf37b9702021-09-01 18:06:04 +01001668 }
1669 }
Finn Williams8636bc72021-10-02 15:06:39 +01001670
1671 for (ImportedOutputId id: preImportedOutputs)
1672 {
1673 const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1674
1675 auto outputHandle = workingMemHandle.GetOutputHandle(layerBindingId);
1676 auto outputConnections = workingMemHandle.GetOutputConnection(layerBindingId);
1677
1678 for (auto it : outputConnections)
1679 {
1680 *it = outputHandle;
1681 }
1682 }
1683 };
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001684
Cathal Corbett5aa9fd72022-02-25 15:33:28 +00001685 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1686 TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
1687 ProfilingGuid inferenceGuid = m_ProfilingService.GetNextGuid();
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001688 if (timelineUtils)
1689 {
1690 // Add inference timeline trace if profiling is enabled.
Cathal Corbett5aa9fd72022-02-25 15:33:28 +00001691 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1692 timelineUtils->CreateTypedEntity(inferenceGuid,LabelsAndEventClasses::INFERENCE_GUID);
1693 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001694 networkGuid,
1695 inferenceGuid,
Cathal Corbett5aa9fd72022-02-25 15:33:28 +00001696 LabelsAndEventClasses::EXECUTION_OF_GUID);
1697 timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001698 }
1699
1700 bool executionSucceeded = true;
1701
1702 if (timelineUtils)
1703 {
1704 // Add end of life of the inference timeline if profiling is enabled.
Cathal Corbett5aa9fd72022-02-25 15:33:28 +00001705 timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001706 timelineUtils->Commit();
1707 }
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001708
1709 if (!workingMemHandle.IsAllocated())
1710 {
1711 workingMemHandle.Allocate();
1712 }
1713
1714 {
1715 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareInputs");
Finn Williamsf37b9702021-09-01 18:06:04 +01001716 for (auto pair : inputTensors)
1717 {
1718 EnqueueInput(pair.second, workingMemHandle.GetInputHandle(pair.first));
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001719 }
Finn Williams8636bc72021-10-02 15:06:39 +01001720
1721 // Swap in the pre-imported inputs if any
1722 for (ImportedInputId id : preImportedInputs)
1723 {
1724 const ImportedTensorHandlePin& importedInputPin = m_PreImportedInputHandles[id];
1725 const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1726 const auto& preimportedHandle = importedInputPin.m_TensorHandle;
1727
1728 auto inputConnections = workingMemHandle.GetInputConnections(layerBindingId);
1729 for (auto it : inputConnections)
1730 {
1731 *it = preimportedHandle.get();
1732 }
1733 }
1734 }
1735 {
1736 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareOutputs");
1737 if (m_NetworkProperties.m_ExportEnabled)
1738 {
1739 for (auto pair: outputTensors)
1740 {
1741 ImportOutputTensor(pair.second, workingMemHandle.GetOutputHandle(pair.first));
1742 }
1743 }
1744
1745 for (ImportedOutputId id : preImportedOutputs)
1746 {
1747 const ImportedTensorHandlePin& importedOutputPin = m_PreImportedOutputHandles[id];
1748 const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1749 const auto& preimportedHandle = importedOutputPin.m_TensorHandle;
1750
1751 auto outputConnections = workingMemHandle.GetOutputConnection(layerBindingId);
1752
1753 for (auto it : outputConnections)
1754 {
1755 *it = preimportedHandle.get();
1756 }
1757 }
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001758 }
1759
1760 auto Fail = [&](const std::exception& error)
1761 {
1762 ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
1763 executionSucceeded = false;
1764 };
Cathal Corbett5aa9fd72022-02-25 15:33:28 +00001765 ProfilingDynamicGuid workloadInferenceID(0);
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001766
1767 try
1768 {
1769 for (unsigned int i = 0; i < m_WorkloadQueue.size(); ++i)
1770 {
1771 auto& workload = m_WorkloadQueue[i];
1772 if (timelineUtils)
1773 {
1774 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1775 inferenceGuid);
1776 }
1777 workload->ExecuteAsync(workingMemHandle.GetWorkingMemDescriptorAt(i));
1778
1779 if (timelineUtils)
1780 {
1781 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1782 }
1783 }
1784 }
1785 catch (const RuntimeException& error)
1786 {
Finn Williams8636bc72021-10-02 15:06:39 +01001787 resetMemHandle();
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001788 Fail(error);
1789 }
1790 catch (const std::runtime_error& error)
1791 {
Finn Williams8636bc72021-10-02 15:06:39 +01001792 resetMemHandle();
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001793 Fail(error);
1794 }
Finn Williams8636bc72021-10-02 15:06:39 +01001795 catch (...)
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001796 {
Finn Williams8636bc72021-10-02 15:06:39 +01001797 resetMemHandle();
1798 throw;
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001799 }
Keith Davise813d672021-04-22 10:10:34 +01001800
Finn Williams8636bc72021-10-02 15:06:39 +01001801 if (!m_NetworkProperties.m_ExportEnabled)
Finn Williamsf37b9702021-09-01 18:06:04 +01001802 {
Finn Williams8636bc72021-10-02 15:06:39 +01001803 for (auto pair: outputTensors)
Finn Williamsf37b9702021-09-01 18:06:04 +01001804 {
Finn Williams8636bc72021-10-02 15:06:39 +01001805 CopyToOutputTensor(pair.second, workingMemHandle.GetOutputHandle(pair.first));
Finn Williamsf37b9702021-09-01 18:06:04 +01001806 }
1807 }
Finn Williams8636bc72021-10-02 15:06:39 +01001808 else
1809 {
1810 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "SyncMemGeneric_Execute");
1811 workingMemHandle.MemSyncOutputs();
1812 }
1813
1814 resetMemHandle();
Finn Williamsf37b9702021-09-01 18:06:04 +01001815
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001816 return executionSucceeded ? Status::Success : Status::Failure;
1817}
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001818
1819/// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
1820/// overlapped Execution by calling this function from different threads.
1821std::unique_ptr<IWorkingMemHandle> LoadedNetwork::CreateWorkingMemHandle(NetworkId networkId)
1822{
1823 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
Finn Williamsb1aad422021-10-28 19:07:32 +01001824
1825 // Tensors that will need to be allocated internally within armnn
1826 std::vector<std::unique_ptr<ITensorHandle>> managedTensorHandles;
1827 // Tensors that will be allocated externally by the user
1828 std::vector<std::unique_ptr<ITensorHandle>> unmanagedTensorHandles;
1829
Mike Kelly55a8ffd2021-04-07 20:10:49 +01001830 std::vector<WorkingMemDescriptor> workingMemDescriptors;
1831 std::unordered_map<LayerGuid, WorkingMemDescriptor> workingMemDescriptorMap;
1832
Finn Williamsb1aad422021-10-28 19:07:32 +01001833 auto GetTensorHandle = [&](Layer* layer, const OutputSlot& outputSlot)
Finn Williams01097942021-04-26 12:06:34 +01001834 {
1835 ITensorHandleFactory::FactoryId factoryId = outputSlot.GetTensorHandleFactoryId();
1836 const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
1837
1838 if (factoryId == ITensorHandleFactory::LegacyFactoryId)
1839 {
1840 BackendId id = layer->GetBackendId();
1841 ARMNN_NO_DEPRECATE_WARN_BEGIN
Finn Williamsb1aad422021-10-28 19:07:32 +01001842 return m_WorkloadFactories.at(id)->CreateTensorHandle(tensorInfo, false);
Finn Williams01097942021-04-26 12:06:34 +01001843 ARMNN_NO_DEPRECATE_WARN_END
1844 }
1845 else
1846 {
Finn Williamsb1aad422021-10-28 19:07:32 +01001847 ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
Finn Williams01097942021-04-26 12:06:34 +01001848 ARMNN_ASSERT(handleFactory);
Finn Williamsb1aad422021-10-28 19:07:32 +01001849 return handleFactory->CreateTensorHandle(tensorInfo, false);
Finn Williams01097942021-04-26 12:06:34 +01001850 }
1851 };
1852
Finn Williamsf37b9702021-09-01 18:06:04 +01001853 struct HandleInfo
1854 {
Finn Williamsb1aad422021-10-28 19:07:32 +01001855 ITensorHandle* m_TensorHandle;
1856
1857 bool m_IsInputLayerHandle = false;
1858 bool m_IsOutputLayerHandle = false;
Finn Williams8636bc72021-10-02 15:06:39 +01001859
1860 WorkingMemHandle::InputMemDescriptorCoords m_InputMemDescriptorCoords;
1861 WorkingMemHandle::OutputMemDescriptorCoords m_OutputMemDescriptorCoords;
Finn Williamsf37b9702021-09-01 18:06:04 +01001862 };
1863
Finn Williamsb1aad422021-10-28 19:07:32 +01001864 std::unordered_map<const OutputSlot*, HandleInfo> outputToHandleInfoMap;
1865
Finn Williams8636bc72021-10-02 15:06:39 +01001866 unsigned int layerIndex = 0;
Finn Williams01097942021-04-26 12:06:34 +01001867 for (auto&& layer : order)
1868 {
Finn Williams01097942021-04-26 12:06:34 +01001869 // Constant layers execution and management is handled during loaded network construction
1870 if (layer->GetType() == LayerType::Constant)
1871 {
1872 continue;
1873 }
Finn Williams8636bc72021-10-02 15:06:39 +01001874
1875 WorkingMemDescriptor workingMemDescriptor;
1876
Finn Williams01097942021-04-26 12:06:34 +01001877 bool isMemoryManaged = true;
Finn Williamsf37b9702021-09-01 18:06:04 +01001878 bool isInputLayer = false;
Finn Williams8636bc72021-10-02 15:06:39 +01001879 bool isOutputLayer = false;
1880 bool isConnectedToOutputLayer = false;
1881
1882 if (layer->GetType() == LayerType::Input || layer->GetType() == LayerType::MemImport)
Finn Williams01097942021-04-26 12:06:34 +01001883 {
1884 // Input layers/workloads will not be executed so the descriptor is not added to workingMemDescriptors
1885 // However we will still need to manage the tensorHandle
Finn Williamsf37b9702021-09-01 18:06:04 +01001886 isInputLayer = true;
1887 isMemoryManaged = !m_NetworkProperties.m_ImportEnabled;
Finn Williams01097942021-04-26 12:06:34 +01001888 }
Finn Williams8636bc72021-10-02 15:06:39 +01001889 else if (layer->GetType() == LayerType::Output)
1890 {
1891 isOutputLayer = true;
1892 }
Finn Williams01097942021-04-26 12:06:34 +01001893
Finn Williams8636bc72021-10-02 15:06:39 +01001894 unsigned int slotIndex = 0;
Finn Williams01097942021-04-26 12:06:34 +01001895 // Create a tensor handle for each output slot of a layer
1896 // Once we create it, we start managing its lifetime
1897 for (auto& slot : layer->GetOutputSlots())
1898 {
Finn Williams8636bc72021-10-02 15:06:39 +01001899 for (unsigned int i = 0; i < slot.GetNumConnections(); ++i)
1900 {
1901 if ((slot.GetConnection(i)->GetOwningLayer().GetType() == LayerType::Output))
1902 {
1903 if (!isConnectedToOutputLayer)
1904 {
1905 isConnectedToOutputLayer = true;
1906 // If Export is enabled disable memory management, so we can export, otherwise we do a copy
1907 isMemoryManaged = !m_NetworkProperties.m_ExportEnabled;
1908 }
1909 else
1910 {
1911 // Importing in this case would likely cause unexpected behaviour, so we disallow it.
1912 ARMNN_LOG(warning) <<
1913 fmt::format("Layer name: '{0}' guid: '{1}' has two or more OutputLayers connected to it. "
1914 "This will prevent importing on the connected OutputLayers.",
1915 layer->GetName(), layer->GetGuid());
1916 isMemoryManaged = true;
1917 }
1918 }
1919 }
1920
Finn Williamsb1aad422021-10-28 19:07:32 +01001921 ITensorHandle* tensorHandle;
1922 if (isMemoryManaged)
1923 {
1924 managedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
1925 tensorHandle = managedTensorHandles.back().get();
1926 }
1927 else
1928 {
1929 unmanagedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
1930 tensorHandle = unmanagedTensorHandles.back().get();
1931 }
Finn Williams01097942021-04-26 12:06:34 +01001932
1933 workingMemDescriptor.m_Outputs.push_back(tensorHandle);
Finn Williams01097942021-04-26 12:06:34 +01001934
Finn Williamsb1aad422021-10-28 19:07:32 +01001935 HandleInfo& handleInfo = outputToHandleInfoMap[&slot];
1936 handleInfo.m_TensorHandle = tensorHandle;
Finn Williamsf37b9702021-09-01 18:06:04 +01001937
Finn Williams8636bc72021-10-02 15:06:39 +01001938 // Store the coordinates of the current layer's OutputSlot that is connected to the OutputLayer
1939 if (isConnectedToOutputLayer)
1940 {
Finn Williamsb1aad422021-10-28 19:07:32 +01001941 handleInfo.m_IsOutputLayerHandle = true;
Finn Williams8636bc72021-10-02 15:06:39 +01001942 handleInfo.m_OutputMemDescriptorCoords.m_OutputSlotCoords = {layerIndex, slotIndex};
1943 }
1944 // Store the LayerBindingId of the InputLayer
Finn Williamsf37b9702021-09-01 18:06:04 +01001945 if (isInputLayer)
1946 {
Finn Williamsb1aad422021-10-28 19:07:32 +01001947 handleInfo.m_IsInputLayerHandle = true;
Finn Williamsf37b9702021-09-01 18:06:04 +01001948 LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
Finn Williams8636bc72021-10-02 15:06:39 +01001949 handleInfo.m_InputMemDescriptorCoords.m_LayerBindingId = bindingId;
Finn Williamsf37b9702021-09-01 18:06:04 +01001950 }
Finn Williams8636bc72021-10-02 15:06:39 +01001951 slotIndex++;
Finn Williams01097942021-04-26 12:06:34 +01001952 }
1953 // Loop through the input slots in the same layer and decrement the reference counter associated
1954 // to each tensor handle we encounter.
Finn Williams8636bc72021-10-02 15:06:39 +01001955 // Once it reaches zero, the lifetime of the tensor handle has ended, and we mark its memory as available
1956 // so that the next tensor handle with a non overlapping lifetime can share its memory.
Finn Williams01097942021-04-26 12:06:34 +01001957 for (auto& slot : layer->GetInputSlots())
1958 {
1959 ARMNN_ASSERT(slot.GetConnection());
1960 auto outputSlot = slot.GetConnectedOutputSlot();
1961 auto key = outputSlot->GetOwningLayer().GetGuid();
1962
1963 // Constant layers execution and management is handled during loaded network construction
1964 auto found = m_ConstantTensorHandles.find(key);
1965 if (found != m_ConstantTensorHandles.end())
1966 {
Finn Williamsea0712e2021-10-14 11:22:19 +01001967 ITensorHandle* tensorHandle = found->second;
1968 workingMemDescriptor.m_Inputs.push_back(tensorHandle);
1969
1970 // Odd case where a constant layer is connected to an output layer
1971 // We will need to create a HandleInfo to track it
1972 if (isOutputLayer)
1973 {
1974 LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
1975
Finn Williamsb1aad422021-10-28 19:07:32 +01001976 HandleInfo& handleInfo = outputToHandleInfoMap[outputSlot];
1977 handleInfo.m_TensorHandle = tensorHandle;
1978 handleInfo.m_IsOutputLayerHandle = true;
Finn Williamsea0712e2021-10-14 11:22:19 +01001979 handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
1980 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
1981 }
Finn Williams01097942021-04-26 12:06:34 +01001982 continue;
1983 }
1984
Finn Williamsb1aad422021-10-28 19:07:32 +01001985 HandleInfo& handleInfo = outputToHandleInfoMap.at(outputSlot);
Finn Williamsf37b9702021-09-01 18:06:04 +01001986
Finn Williamsb1aad422021-10-28 19:07:32 +01001987 ITensorHandle* inputTensorHandle = handleInfo.m_TensorHandle;
1988 workingMemDescriptor.m_Inputs.push_back(inputTensorHandle);
Finn Williamsf37b9702021-09-01 18:06:04 +01001989
Finn Williams8636bc72021-10-02 15:06:39 +01001990 // Store the LayerBindingId of the OutputLayer
1991 if (isOutputLayer)
Finn Williamsf37b9702021-09-01 18:06:04 +01001992 {
Finn Williams8636bc72021-10-02 15:06:39 +01001993 LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
1994 handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
Finn Williamsea0712e2021-10-14 11:22:19 +01001995 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
Finn Williams8636bc72021-10-02 15:06:39 +01001996 }
Finn Williamsea0712e2021-10-14 11:22:19 +01001997 // In this case the layer is not an Output Layer but shares its input tensorhandle with an OutputLayer
Finn Williams8636bc72021-10-02 15:06:39 +01001998 // It will need to be updated as well, if we swap out the tensorhandle
Finn Williamsb1aad422021-10-28 19:07:32 +01001999 else if (handleInfo.m_IsOutputLayerHandle)
Finn Williams8636bc72021-10-02 15:06:39 +01002000 {
2001 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, slot.GetSlotIndex()});
2002 }
2003
2004 // Store the coordinates of the InputSlots connected to the InputLayer
2005 // There can be more than one InputSlot connected to an InputLayer, so we use a vector
Finn Williamsb1aad422021-10-28 19:07:32 +01002006 if (handleInfo.m_IsInputLayerHandle)
Finn Williams8636bc72021-10-02 15:06:39 +01002007 {
2008 std::pair<LayerGuid, unsigned int> connectionLocation{layerIndex, slot.GetSlotIndex()};
2009 handleInfo.m_InputMemDescriptorCoords.m_InputSlotCoords.emplace_back(connectionLocation);
Finn Williamsf37b9702021-09-01 18:06:04 +01002010 }
Mike Kelly55a8ffd2021-04-07 20:10:49 +01002011 }
2012 workingMemDescriptorMap.insert({layer->GetGuid(), workingMemDescriptor});
Finn Williams01097942021-04-26 12:06:34 +01002013
Finn Williams8636bc72021-10-02 15:06:39 +01002014 // Input/Output layers/workloads will not be executed, so the descriptor is not added to workingMemDescriptors
Finn Williams01097942021-04-26 12:06:34 +01002015 // However we will still need to manage the tensorHandle
Finn Williamsf37b9702021-09-01 18:06:04 +01002016 if (!isInputLayer)
Finn Williams01097942021-04-26 12:06:34 +01002017 {
2018 workingMemDescriptors.push_back(workingMemDescriptor);
Finn Williams8636bc72021-10-02 15:06:39 +01002019 layerIndex++;
2020 }
2021 }
2022
Finn Williamsb1aad422021-10-28 19:07:32 +01002023 std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>> tensorMemory;
2024
2025 auto externalMemoryManager = CreateExternalMemoryManger(tensorMemory);
2026
2027 // Sort m_TensorMemory, so it's order matches the outputSlot order
2028 std::sort(tensorMemory.begin(), tensorMemory.end(),
2029 [](const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& lhs,
2030 const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& rhs)
2031 {
2032 return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
2033 });
2034
Finn Williams8636bc72021-10-02 15:06:39 +01002035 std::vector<WorkingMemHandle::InputMemDescriptorCoords> inputConnectionsInfo;
2036 std::vector<WorkingMemHandle::OutputMemDescriptorCoords> outputConnectionsInfo;
2037
Finn Williamsb1aad422021-10-28 19:07:32 +01002038 for (const auto& handleInfo: outputToHandleInfoMap)
Finn Williams8636bc72021-10-02 15:06:39 +01002039 {
Finn Williamsb1aad422021-10-28 19:07:32 +01002040 if (handleInfo.second.m_IsOutputLayerHandle)
Finn Williams8636bc72021-10-02 15:06:39 +01002041 {
2042 outputConnectionsInfo.emplace_back(handleInfo.second.m_OutputMemDescriptorCoords);
2043 }
2044
Finn Williamsb1aad422021-10-28 19:07:32 +01002045 if (handleInfo.second.m_IsInputLayerHandle)
Finn Williams8636bc72021-10-02 15:06:39 +01002046 {
2047 inputConnectionsInfo.emplace_back(handleInfo.second.m_InputMemDescriptorCoords);
Finn Williams01097942021-04-26 12:06:34 +01002048 }
Mike Kelly55a8ffd2021-04-07 20:10:49 +01002049 }
Finn Williams01097942021-04-26 12:06:34 +01002050
Mike Kelly55a8ffd2021-04-07 20:10:49 +01002051 return std::make_unique<WorkingMemHandle>(networkId,
Finn Williams8636bc72021-10-02 15:06:39 +01002052 inputConnectionsInfo,
2053 outputConnectionsInfo,
Mike Kelly55a8ffd2021-04-07 20:10:49 +01002054 workingMemDescriptors,
Finn Williams01097942021-04-26 12:06:34 +01002055 workingMemDescriptorMap,
Finn Williamsb1aad422021-10-28 19:07:32 +01002056 std::move(externalMemoryManager),
2057 std::move(tensorMemory),
2058 std::move(managedTensorHandles),
2059 std::move(unmanagedTensorHandles));
Mike Kelly55a8ffd2021-04-07 20:10:49 +01002060}
2061
Nattapat Chaimanowong6e948202019-03-22 14:01:46 +00002062void LoadedNetwork::RegisterDebugCallback(const DebugCallbackFunction& func)
2063{
2064 for (auto&& workloadPtr: m_WorkloadQueue)
2065 {
2066 workloadPtr.get()->RegisterDebugCallback(func);
2067 }
2068}
2069
Finn Williamsb1aad422021-10-28 19:07:32 +01002070
2071void LoadedNetwork::CreateMemoryProfileAsync()
2072{
2073 struct PartialBlock
2074 {
2075 unsigned int m_StartOfLife;
2076 unsigned int m_Lifetime;
2077
2078 size_t m_MemSize;
2079 unsigned int m_Index;
2080
2081 BackendId m_BackendId;
2082 };
2083
2084 auto align = [](size_t numToAlign)
2085 {
2086 const size_t alignment = sizeof(float);
2087 return ((numToAlign + alignment - 1) / alignment) * alignment;
2088 };
2089
2090 std::unordered_map<const OutputSlot*, PartialBlock> memBlockTrackerMap;
2091
2092 const bool inputImportingEnabled = m_NetworkProperties.m_InputSource != MemorySource::Undefined;
2093 const bool outputImportingEnabled = m_NetworkProperties.m_OutputSource != MemorySource::Undefined;
2094
2095 unsigned int timestep = 0;
2096 unsigned int outputIndex = 0;
2097 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
2098
2099 for (auto&& layer : order)
2100 {
2101 const LayerType& layerType = layer->GetType();
2102 // Don't manage memory if importing.
2103 if (layerType == LayerType::Input && inputImportingEnabled)
2104 {
2105 continue;
2106 }
2107 // Don't manage memory if importing.
2108 if (layerType == LayerType::Output && outputImportingEnabled
2109 && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
2110 {
2111 continue;
2112 }
2113 // Because Constant Layer memory can not be shared, the memory must persist for the lifetime of execution,
2114 // management is done separately.
2115 if (layerType == LayerType::Constant)
2116 {
2117 continue;
2118 }
2119
2120 BackendId backendId = layer->GetBackendId();
2121 for (auto& outputSlot : layer->GetOutputSlots())
2122 {
2123 if (!m_SupportsExternallyManagedMemory[backendId])
2124 {
2125 continue;
2126 }
2127
2128 PartialBlock partialBlock;
2129
2130 partialBlock.m_StartOfLife = timestep;
2131
2132 size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2133 partialBlock.m_MemSize = alignedSize;
2134 partialBlock.m_Index = outputIndex++;
2135 partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2136 partialBlock.m_BackendId = backendId;
2137
2138 if (partialBlock.m_Lifetime == 0)
2139 {
2140 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2141 partialBlock.m_StartOfLife,
2142 partialBlock.m_MemSize,
2143 0,
2144 partialBlock.m_Index);
2145 }
2146 else
2147 {
2148 memBlockTrackerMap[&outputSlot] = partialBlock;
2149 }
2150 }
2151
2152 for (auto& inputSlot : layer->GetInputSlots())
2153 {
2154 const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2155 const LayerType& owningLayerType = connectedInputLayer.GetType();
2156
2157 if (owningLayerType == LayerType::Constant)
2158 {
2159 continue;
2160 }
2161 if (inputImportingEnabled && owningLayerType == LayerType::Input)
2162 {
2163 continue;
2164 }
2165
2166 auto outputSlot = inputSlot.GetConnectedOutputSlot();
2167
2168 PartialBlock& partialBlock = memBlockTrackerMap.at(outputSlot);
2169
2170 auto& lifetime = partialBlock.m_Lifetime;
2171 --lifetime;
2172
2173 if (lifetime == 0)
2174 {
2175 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2176 timestep,
2177 partialBlock.m_MemSize,
2178 0,
2179 partialBlock.m_Index);
2180 }
2181 }
2182 ++timestep;
2183 }
2184}
2185
2186void LoadedNetwork::CreateMemoryProfile()
2187{
2188 // Finds the first TensorHandle ancestor of a SubTensorHandle. If the ITensorHandle provided
2189 // is a TensorHandle, the function just returns it
2190 auto TraceSubTensorHandleAncestry = [](ITensorHandle* const subTensorHandle)
2191 {
2192 ITensorHandle* ancestor = subTensorHandle;
2193 while (ancestor && ancestor->GetParent())
2194 {
2195 ancestor = ancestor->GetParent();
2196 }
2197 return ancestor;
2198 };
2199
2200 struct PartialBlock
2201 {
2202 unsigned int m_StartOfLife;
2203 unsigned int m_Lifetime;
2204
2205 size_t m_MemSize;
2206 unsigned int m_Index;
2207
2208 BackendId m_BackendId;
2209 };
2210
2211 auto align = [](size_t numToAlign)
2212 {
2213 const size_t alignment = sizeof(float);
2214 return ((numToAlign + alignment - 1) / alignment) * alignment;
2215 };
2216
2217 std::unordered_map<ITensorHandle*, PartialBlock> memBlockTrackerMap;
2218
2219 const bool inputImportingEnabled = m_NetworkProperties.m_InputSource != MemorySource::Undefined;
2220 const bool outputImportingEnabled = m_NetworkProperties.m_OutputSource != MemorySource::Undefined;
2221
2222 unsigned int timestep = 0;
2223 unsigned int outputIndex = 0;
2224 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
2225
2226 for (auto&& layer : order)
2227 {
2228 const LayerType& layerType = layer->GetType();
2229 // Don't manage memory if importing.
2230 if (layerType == LayerType::Input && inputImportingEnabled)
2231 {
2232 continue;
2233 }
2234 // Don't manage memory if importing.
2235 if (layerType == LayerType::Output && outputImportingEnabled
2236 && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
2237 {
2238 continue;
2239 }
2240 // Because Constant Layer memory can not be shared, the memory must persist for the lifetime of execution,
2241 // management is done separately.
2242 if (layerType == LayerType::Constant)
2243 {
2244 continue;
2245 }
2246
2247 BackendId backendId = layer->GetBackendId();
2248 for (auto& outputSlot : layer->GetOutputSlots())
2249 {
2250 if (!m_SupportsExternallyManagedMemory[backendId])
2251 {
2252 continue;
2253 }
2254
2255 ITensorHandle* tensorHandle = outputSlot.GetOutputHandler().GetData();
2256 tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2257
2258 if (memBlockTrackerMap.find(tensorHandle) == memBlockTrackerMap.end())
2259 {
2260 PartialBlock partialBlock;
2261
2262 partialBlock.m_StartOfLife = timestep;
2263
2264 size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2265 partialBlock.m_MemSize = alignedSize;
2266 partialBlock.m_Index = outputIndex++;
2267 partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2268 partialBlock.m_BackendId = backendId;
2269
2270 if (partialBlock.m_Lifetime == 0)
2271 {
2272 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2273 partialBlock.m_StartOfLife,
2274 partialBlock.m_MemSize,
2275 0,
2276 partialBlock.m_Index);
2277 }
2278 else
2279 {
2280 memBlockTrackerMap[tensorHandle] = partialBlock;
2281 }
2282 m_Tensorhandles.push_back(tensorHandle);
2283
2284 }
2285 else
2286 {
2287 memBlockTrackerMap.at(tensorHandle).m_Lifetime += outputSlot.GetNumConnections();
2288 }
2289 }
2290
2291 for (auto& inputSlot : layer->GetInputSlots())
2292 {
2293 const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2294 const LayerType& owningLayerType = connectedInputLayer.GetType();
2295
2296 if (owningLayerType == LayerType::Constant)
2297 {
2298 continue;
2299 }
2300 if (inputImportingEnabled && owningLayerType == LayerType::Input)
2301 {
2302 continue;
2303 }
2304 if (!m_SupportsExternallyManagedMemory[connectedInputLayer.GetBackendId()])
2305 {
2306 continue;
2307 }
2308
2309 auto outputSlot = inputSlot.GetConnectedOutputSlot();
2310
2311 ITensorHandle* tensorHandle = outputSlot->GetOutputHandler().GetData();
2312 tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2313
2314 PartialBlock& partialBlock = memBlockTrackerMap.at(tensorHandle);
2315
2316 auto& lifetime = partialBlock.m_Lifetime;
2317 --lifetime;
2318
2319 if (lifetime == 0)
2320 {
2321 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2322 timestep,
2323 partialBlock.m_MemSize,
2324 0,
2325 partialBlock.m_Index);
2326 }
2327 }
2328 ++timestep;
2329 }
2330
2331}
2332
2333std::unique_ptr<MemoryManager> LoadedNetwork::CreateExternalMemoryManger(
2334 std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>>& tensorMemoryVec)
2335{
2336 std::unique_ptr<MemoryManager> memoryManager = std::make_unique<MemoryManager>();
2337 auto allocatorMap = BackendRegistryInstance().GetAllocators();
2338
2339 for (auto& backend : m_MemBinMap)
2340 {
2341 std::vector<BufferStorage> bufferStorageVec;
2342
2343 std::shared_ptr<ICustomAllocator> backendAllocator;
2344 if (allocatorMap.find(backend.first) != allocatorMap.end())
2345 {
2346 backendAllocator = allocatorMap[backend.first];
2347 }
2348 else
2349 {
2350 backendAllocator = m_Backends[backend.first]->GetDefaultAllocator();
2351 }
2352
2353 for (auto& memBin : backend.second)
2354 {
2355 BufferStorage bufferStorage;
2356 bufferStorage.m_BufferSize = memBin.m_MemSize;
2357 bufferStorage.m_TensorMemoryVector.reserve(memBin.m_MemBlocks.size());
2358
2359 for (auto& memBlock : memBin.m_MemBlocks)
2360 {
2361 auto tensorMemory = std::make_shared<TensorMemory>(TensorMemory{memBlock.m_Offset, memBlock.m_Index});
2362
2363 tensorMemoryVec.emplace_back(tensorMemory, backendAllocator->GetMemorySourceType());
2364 bufferStorage.m_TensorMemoryVector.emplace_back(tensorMemory);
2365 }
2366
2367 bufferStorageVec.emplace_back(std::move(bufferStorage));
2368 }
2369
2370 memoryManager->StoreMemToAllocate(bufferStorageVec, backendAllocator, 4);
2371 }
2372
2373 return memoryManager;
2374}
2375
Finn Williams8636bc72021-10-02 15:06:39 +01002376LayerBindingId LoadedNetwork::ValidateImportedInputID(ImportedInputId id)
2377{
2378 try
2379 {
2380 const auto& importedTensorHandlePin = m_PreImportedInputHandles.at(id);
2381 if (!importedTensorHandlePin.m_TensorHandle)
2382 {
2383 throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute:"
2384 "PreImportedInput: {} has been deleted", id));
2385 }
2386 return importedTensorHandlePin.m_LayerBindingId;
2387 }
2388 catch (const std::out_of_range&)
2389 {
2390 throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: Unknown ImportedInputId: {}", id));
2391 }
2392}
2393
2394LayerBindingId LoadedNetwork::ValidateImportedOutputID(ImportedOutputId id)
2395{
2396 try
2397 {
2398 const auto& importedTensorHandlePin = m_PreImportedOutputHandles.at(id);
2399 if (!importedTensorHandlePin.m_TensorHandle)
2400 {
2401 throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: "
2402 "PreImportedOutput: {} has been deleted", id));
2403 }
2404 return importedTensorHandlePin.m_LayerBindingId;
2405 }
2406 catch (const std::out_of_range&)
2407 {
2408 throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: Unknown ImportedOutputId: {}", id));
2409 }
2410}
2411
telsoa014fcda012018-03-09 14:13:49 +00002412}