blob: b5a1b392b46edce66ffbc446e510d275755bb85a [file] [log] [blame]
Laurent Carlier749294b2020-06-01 09:03:17 +01001//
Jim Flynn6398a982020-05-27 17:05:21 +01002// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
David Beckecb56cd2018-09-05 12:52:57 +01003// SPDX-License-Identifier: MIT
telsoa014fcda012018-03-09 14:13:49 +00004//
5
6#include "LoadedNetwork.hpp"
7#include "Layer.hpp"
telsoa014fcda012018-03-09 14:13:49 +00008#include "Graph.hpp"
9#include "Network.hpp"
Jim Flynnf7713212020-07-14 09:50:59 +010010#include <Processes.hpp>
telsoa014fcda012018-03-09 14:13:49 +000011#include "Profiling.hpp"
surmeh013537c2c2018-05-18 16:31:43 +010012#include "HeapProfiling.hpp"
telsoa014fcda012018-03-09 14:13:49 +000013
Matteo Martincighc601aa62019-10-29 15:03:22 +000014#include <armnn/BackendRegistry.hpp>
Matthew Benthamf48afc62020-01-15 17:55:08 +000015#include <armnn/Logging.hpp>
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +010016#include <armnn/utility/Assert.hpp>
Matteo Martincighc601aa62019-10-29 15:03:22 +000017
Aron Virginas-Tarc9cc8042018-11-01 16:15:57 +000018#include <backendsCommon/CpuTensorHandle.hpp>
Matteo Martincighe5b8eb92019-11-28 15:45:42 +000019#include <armnn/backends/IMemoryManager.hpp>
Derek Lambertif674aa02019-08-01 15:56:25 +010020#include <backendsCommon/MemCopyWorkload.hpp>
21#include <backendsCommon/MemSyncWorkload.hpp>
Matteo Martincighe5b8eb92019-11-28 15:45:42 +000022
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +000023#include <LabelsAndEventClasses.hpp>
telsoa014fcda012018-03-09 14:13:49 +000024
Colm Donelan5b5c2222020-09-09 12:48:16 +010025#include <fmt/format.h>
telsoa014fcda012018-03-09 14:13:49 +000026
27namespace armnn
28{
29
30using namespace std;
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +000031using namespace armnn::profiling;
telsoa014fcda012018-03-09 14:13:49 +000032
telsoa01c577f2c2018-08-31 09:22:23 +010033namespace
34{
35
36template <typename ExceptionType>
37std::string ToErrorMessage(const char * prefix, const ExceptionType & error)
38{
39 std::stringstream ss;
40 ss << prefix << " " << error.what();
41 return ss.str();
42}
43
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +000044void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
45 const Layer& layer,
46 ProfilingGuid networkGuid)
47{
48 // Add layer to the post-optimisation network structure
49 std::string layerName = layer.GetNameStr().empty() ? "<Unnamed>" : layer.GetNameStr();
50 timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
51 networkGuid,
52 layerName,
53 LabelsAndEventClasses::LAYER_GUID);
54 for (auto&& input : layer.GetInputSlots())
55 {
56 const IOutputSlot* source = input.GetConnectedOutputSlot();
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +010057 ARMNN_ASSERT(source != NULL);
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +000058 timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
59 source->GetOwningLayerGuid(),
60 layer.GetGuid());
61 }
62}
63
64void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
65 std::unique_ptr<IWorkload>& workload,
66 const Layer& layer)
67{
68 // Add workload to the post-optimisation network structure
69 timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
70 timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
71 layer.GetBackendId().Get(),
72 LabelsAndEventClasses::BACKENDID_GUID);
73
74 // Link the workload to the layer
75 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
76 layer.GetGuid(),
Jim Flynn6398a982020-05-27 17:05:21 +010077 workload->GetGuid(),
78 LabelsAndEventClasses::CHILD_GUID);
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +000079}
80
telsoa01c577f2c2018-08-31 09:22:23 +010081} // anonymous
82
telsoa014fcda012018-03-09 14:13:49 +000083std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
David Monahan4f1e8e42019-09-04 09:22:10 +010084 std::string& errorMessage,
Sadik Armagan3184c902020-03-18 10:57:30 +000085 const INetworkProperties& networkProperties,
86 profiling::ProfilingService& profilingService)
telsoa014fcda012018-03-09 14:13:49 +000087{
88 std::unique_ptr<LoadedNetwork> loadedNetwork;
89
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +010090 auto Fail = [&](const std::exception& error) -> std::unique_ptr<LoadedNetwork>
91 {
92 errorMessage = ToErrorMessage("An error occurred when preparing the network workloads: ", error);
Derek Lamberti08446972019-11-26 16:38:31 +000093 ARMNN_LOG(error) << errorMessage;
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +010094
95 return std::unique_ptr<LoadedNetwork>();
96 };
97
telsoa014fcda012018-03-09 14:13:49 +000098 try
99 {
Sadik Armagan3184c902020-03-18 10:57:30 +0000100 loadedNetwork.reset(new LoadedNetwork(std::move(net), networkProperties, profilingService));
telsoa014fcda012018-03-09 14:13:49 +0000101 }
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +0100102 catch (const armnn::RuntimeException& error)
telsoa014fcda012018-03-09 14:13:49 +0000103 {
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +0100104 return Fail(error);
telsoa014fcda012018-03-09 14:13:49 +0000105 }
106 catch (const armnn::Exception& error)
107 {
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +0100108 return Fail(error);
telsoa014fcda012018-03-09 14:13:49 +0000109 }
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +0100110 catch (const std::runtime_error& error)
telsoa014fcda012018-03-09 14:13:49 +0000111 {
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +0100112 return Fail(error);
telsoa014fcda012018-03-09 14:13:49 +0000113 }
telsoa014fcda012018-03-09 14:13:49 +0000114
115 return loadedNetwork;
116}
117
David Monahan4f1e8e42019-09-04 09:22:10 +0100118LoadedNetwork::LoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
Sadik Armagan3184c902020-03-18 10:57:30 +0000119 const INetworkProperties& networkProperties,
120 profiling::ProfilingService& profilingService) :
David Monahan4f1e8e42019-09-04 09:22:10 +0100121 m_OptimizedNetwork(std::move(net)),
122 m_IsImportEnabled(networkProperties.m_ImportEnabled),
Sadik Armagan3184c902020-03-18 10:57:30 +0000123 m_IsExportEnabled(networkProperties.m_ExportEnabled),
Narumol Prangnawarat549cb7a2020-07-10 17:50:53 +0100124 m_TensorHandleFactoryRegistry(),
Sadik Armagan3184c902020-03-18 10:57:30 +0000125 m_ProfilingService(profilingService)
telsoa014fcda012018-03-09 14:13:49 +0000126{
telsoa01c577f2c2018-08-31 09:22:23 +0100127 // Create a profiler and register it for the current thread.
128 m_Profiler = std::make_shared<Profiler>();
129 ProfilerManager::GetInstance().RegisterProfiler(m_Profiler.get());
130
telsoa014fcda012018-03-09 14:13:49 +0000131 Graph& order = m_OptimizedNetwork->GetGraph().TopologicalSort();
David Beck29c75de2018-10-23 13:35:58 +0100132 //First create tensor handlers, backends and workload factories.
telsoa01c577f2c2018-08-31 09:22:23 +0100133 //Handlers are created before workloads are.
134 //Because workload creation can modify some of the handlers,
Jim Flynne242f2d2019-05-22 14:24:13 +0100135 //(for example the splitter and concat layers).
telsoa014fcda012018-03-09 14:13:49 +0000136 for (auto&& layer : order)
137 {
Derek Lamberti84da38b2019-06-13 11:40:08 +0100138 auto const& backendId = layer->GetBackendId();
139 if (m_Backends.count(backendId) == 0)
David Beck29c75de2018-10-23 13:35:58 +0100140 {
Derek Lamberti84da38b2019-06-13 11:40:08 +0100141 auto createBackend = BackendRegistryInstance().GetFactory(backendId);
142 auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
Aron Virginas-Tar56055192018-11-12 18:10:43 +0000143
Derek Lamberti84da38b2019-06-13 11:40:08 +0100144 IBackendInternal* backend = it.first->second.get();
Aron Virginas-Tar56055192018-11-12 18:10:43 +0000145
Derek Lamberti84da38b2019-06-13 11:40:08 +0100146 if (backend->SupportsTensorAllocatorAPI())
147 {
Sadik Armagan04a72972020-09-14 15:44:18 +0100148 auto workloadFactory = backend->CreateWorkloadFactory(
149 m_TensorHandleFactoryRegistry, m_OptimizedNetwork->GetModelOptions());
Derek Lamberti84da38b2019-06-13 11:40:08 +0100150 m_WorkloadFactories.emplace(
151 std::make_pair(backendId, std::make_pair(std::move(workloadFactory), nullptr)));
152 }
153 else
154 {
155 IBackendInternal::IMemoryManagerSharedPtr memoryManager = backend->CreateMemoryManager();
Sadik Armagan04a72972020-09-14 15:44:18 +0100156 auto workloadFactory = backend->CreateWorkloadFactory(
157 memoryManager, m_OptimizedNetwork->GetModelOptions());
Derek Lamberti84da38b2019-06-13 11:40:08 +0100158
159 m_WorkloadFactories.emplace(
160 std::make_pair(backendId, std::make_pair(std::move(workloadFactory), memoryManager)));
161 }
David Beck29c75de2018-10-23 13:35:58 +0100162 }
Derek Lamberti84da38b2019-06-13 11:40:08 +0100163 }
164
165 for (auto&& layer : order)
166 {
David Monahan3fb7e102019-08-20 11:25:29 +0100167 auto& workloadFactory = GetWorkloadFactory(*layer);
168
169 switch (layer->GetType())
170 {
171 case LayerType::Input:
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +0100172 case LayerType::MemImport:
David Monahan3fb7e102019-08-20 11:25:29 +0100173 {
174 // If IsImportEnabled is true then we need to set IsMemoryManaged to false when creating TensorHandles
175 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, !m_IsImportEnabled);
176 break;
177 }
178 default:
179 {
Ferran Balaguer83239f92019-09-19 11:49:25 +0100180 // Look for the layer with 1 OutputSlot which has 1 connection and that connection is an Output Layer
181 // If Export is enabled disable memory management so we can export, otherwise we do a copy
182 if((layer->GetNumOutputSlots() == 1) &&
183 (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
184 (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() == LayerType::Output))
185 {
186 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, !m_IsExportEnabled);
187 }
188 else
189 {
190 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
191 }
David Monahan3fb7e102019-08-20 11:25:29 +0100192 }
193 }
telsoa014fcda012018-03-09 14:13:49 +0000194 }
195
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +0000196 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
Sadik Armagan3184c902020-03-18 10:57:30 +0000197 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
198 TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +0000199 if (timelineUtils)
200 {
201 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
Jim Flynnf7713212020-07-14 09:50:59 +0100202 // Mark the network with a start of life event
203 timelineUtils->RecordEvent(networkGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
204 // and with the process ID
205 int processID = armnnUtils::Processes::GetCurrentId();
206 std::stringstream ss;
207 ss << processID;
208 timelineUtils->MarkEntityWithLabel(networkGuid, ss.str(), LabelsAndEventClasses::PROCESS_ID_GUID);
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +0000209 }
210
telsoa01c577f2c2018-08-31 09:22:23 +0100211 //Then create workloads.
telsoa014fcda012018-03-09 14:13:49 +0000212 for (auto&& layer : order)
213 {
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +0000214 if (timelineUtils)
215 {
216 // Add layer to the post-optimisation network structure
217 AddLayerStructure(timelineUtils, *layer, networkGuid);
218 }
219
surmeh013537c2c2018-05-18 16:31:43 +0100220 const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
telsoa014fcda012018-03-09 14:13:49 +0000221
222 switch (layer->GetType())
223 {
224 case LayerType::Input:
225 case LayerType::Output:
226 {
telsoa01c577f2c2018-08-31 09:22:23 +0100227 // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
telsoa014fcda012018-03-09 14:13:49 +0000228 break;
229 }
230 default:
231 {
Derek Lamberti94a88d22019-12-10 21:12:59 +0000232 auto workload = layer->CreateWorkload(workloadFactory);
telsoa014fcda012018-03-09 14:13:49 +0000233
234 if (!workload)
235 {
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +0000236 const char* const layerName =
237 layer->GetNameStr().length() != 0 ? layer->GetName() : "<Unnamed>";
Colm Donelan5b5c2222020-09-09 12:48:16 +0100238 throw InvalidArgumentException(
239 fmt::format("No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
240 layerName, static_cast<int>(layer->GetType()), layer->GetBackendId().Get()
telsoa014fcda012018-03-09 14:13:49 +0000241 ));
242 }
243
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +0000244 if (timelineUtils)
245 {
246 // Add workload to the post-optimisation network structure
247 AddWorkloadStructure(timelineUtils, workload, *layer);
248 }
249
telsoa014fcda012018-03-09 14:13:49 +0000250 m_WorkloadQueue.push_back(move(workload));
telsoa01c577f2c2018-08-31 09:22:23 +0100251 // release the constant data in the layer..
252 layer->ReleaseConstantData();
telsoa014fcda012018-03-09 14:13:49 +0000253 break;
254 }
255 }
256 }
257
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +0000258 if (timelineUtils)
259 {
260 // Commit to send the post-optimisation network structure
261 timelineUtils->Commit();
262 }
263
telsoa01c577f2c2018-08-31 09:22:23 +0100264 // Set up memory.
telsoa014fcda012018-03-09 14:13:49 +0000265 m_OptimizedNetwork->GetGraph().AllocateDynamicBuffers();
Derek Lambertif30f7d32019-04-09 10:25:02 +0100266
267 // Now that the intermediate tensor memory has been set-up, do any post allocation configuration for each workload.
268 for (auto& workload : m_WorkloadQueue)
269 {
270 workload->PostAllocationConfigure();
271 }
telsoa014fcda012018-03-09 14:13:49 +0000272}
273
Keith Davis33ed2212020-03-30 10:43:41 +0100274void LoadedNetwork::SendNetworkStructure()
275{
276 Graph& order = m_OptimizedNetwork->GetGraph().TopologicalSort();
277 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
278
279 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
280 TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
281
282 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
283
284 for (auto&& layer : order)
285 {
286 // Add layer to the post-optimisation network structure
287 AddLayerStructure(timelineUtils, *layer, networkGuid);
288 switch (layer->GetType())
289 {
290 case LayerType::Input:
291 case LayerType::Output:
292 {
293 // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
294 break;
295 }
296 default:
297 {
298 for (auto& workload : m_WorkloadQueue)
299 {
300 // Add workload to the post-optimisation network structure
301 AddWorkloadStructure(timelineUtils, workload, *layer);
302 }
303 break;
304 }
305 }
306 }
307 // Commit to send the post-optimisation network structure
308 timelineUtils->Commit();
309}
310
Jim Flynnf7713212020-07-14 09:50:59 +0100311profiling::ProfilingGuid LoadedNetwork::GetNetworkGuid()
312{
313 return m_OptimizedNetwork->GetGuid();
314}
315
telsoa014fcda012018-03-09 14:13:49 +0000316TensorInfo LoadedNetwork::GetInputTensorInfo(LayerBindingId layerId) const
317{
318 for (auto&& inputLayer : m_OptimizedNetwork->GetGraph().GetInputLayers())
319 {
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100320 ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot");
telsoa014fcda012018-03-09 14:13:49 +0000321 if (inputLayer->GetBindingId() == layerId)
322 {
323 return inputLayer->GetOutputSlot(0).GetTensorInfo();
324 }
325 }
326
Colm Donelan5b5c2222020-09-09 12:48:16 +0100327 throw InvalidArgumentException(fmt::format("No input layer is associated with id {}", layerId));
telsoa014fcda012018-03-09 14:13:49 +0000328}
329
330TensorInfo LoadedNetwork::GetOutputTensorInfo(LayerBindingId layerId) const
331{
332 for (auto&& outputLayer : m_OptimizedNetwork->GetGraph().GetOutputLayers())
333 {
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100334 ARMNN_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1, "Output layer should have exactly 1 input slot");
335 ARMNN_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(), "Input slot on Output layer must be connected");
telsoa014fcda012018-03-09 14:13:49 +0000336 if (outputLayer->GetBindingId() == layerId)
337 {
338 return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo();
339 }
340 }
341
Colm Donelan5b5c2222020-09-09 12:48:16 +0100342 throw InvalidArgumentException(fmt::format("No output layer is associated with id {}", layerId));
telsoa014fcda012018-03-09 14:13:49 +0000343}
344
surmeh013537c2c2018-05-18 16:31:43 +0100345const IWorkloadFactory& LoadedNetwork::GetWorkloadFactory(const Layer& layer) const
telsoa014fcda012018-03-09 14:13:49 +0000346{
surmeh013537c2c2018-05-18 16:31:43 +0100347 const IWorkloadFactory* workloadFactory = nullptr;
telsoa014fcda012018-03-09 14:13:49 +0000348
David Beck29c75de2018-10-23 13:35:58 +0100349 auto it = m_WorkloadFactories.find(layer.GetBackendId());
350 if (it == m_WorkloadFactories.end())
telsoa014fcda012018-03-09 14:13:49 +0000351 {
Colm Donelan5b5c2222020-09-09 12:48:16 +0100352 throw RuntimeException(fmt::format("No workload factory for {0} to be used for layer: {1}",
353 layer.GetBackendId().Get(),
354 layer.GetNameStr()),
355 CHECK_LOCATION());
David Beck33f0ae02018-10-18 15:13:56 +0100356 }
David Beck29c75de2018-10-23 13:35:58 +0100357
Aron Virginas-Tar5caf9072018-11-14 18:35:18 +0000358 workloadFactory = it->second.first.get();
telsoa014fcda012018-03-09 14:13:49 +0000359
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100360 ARMNN_ASSERT_MSG(workloadFactory, "No workload factory");
telsoa014fcda012018-03-09 14:13:49 +0000361
362 std::string reasonIfUnsupported;
Sadik Armagan04a72972020-09-14 15:44:18 +0100363 ARMNN_ASSERT_MSG(IWorkloadFactory::IsLayerSupported(layer,
364 {},
365 reasonIfUnsupported,
366 m_OptimizedNetwork->GetModelOptions()),
David Beck29c75de2018-10-23 13:35:58 +0100367 "Factory does not support layer");
Jan Eilers8eb25602020-03-09 12:13:48 +0000368 IgnoreUnused(reasonIfUnsupported);
surmeh013537c2c2018-05-18 16:31:43 +0100369 return *workloadFactory;
telsoa014fcda012018-03-09 14:13:49 +0000370}
371
372namespace {
373
374// Non-copyable class owning accelerator-specific tensor data.
375class TensorPin
376{
377public:
378 TensorPin(std::unique_ptr<ITensorHandle> handle, const TensorInfo& info, LayerBindingId id)
379 : m_TensorHandle(std::move(handle))
380 , m_TensorInfo(info)
381 , m_Id(id)
382 {
383 }
384
385 ITensorHandle* GetTensorHandle() const { return m_TensorHandle.get(); }
386 const TensorInfo& GetTensorInfo() const { return m_TensorInfo; }
387 LayerBindingId GetBindingId() const { return m_Id; }
388
389private:
390 std::unique_ptr<ITensorHandle> m_TensorHandle;
391 TensorInfo m_TensorInfo;
392 LayerBindingId m_Id;
393};
394
395static const TensorPin& GetTensorPin(LayerBindingId id,
396 const std::vector<TensorPin>& pins,
397 char const* bindingPointDesc)
398{
399 auto it = std::find_if(pins.begin(), pins.end(),
400 [id](const TensorPin& pin)
401 {
402 return pin.GetBindingId() == id;
403 });
404
405 if (it != pins.end())
406 {
407 return *it;
408 }
409 else
410 {
Colm Donelan5b5c2222020-09-09 12:48:16 +0100411 throw InvalidArgumentException(fmt::format("No tensor supplied for {0} {1}", bindingPointDesc, id));
telsoa014fcda012018-03-09 14:13:49 +0000412 }
413}
414
415// Stores data that needs to be kept accessible for the entire execution of a workload.
416class WorkloadData
417{
418public:
419 WorkloadData(const InputTensors& inputTensors, const OutputTensors& outputTensors)
420 {
421 m_InputTensorPins.reserve(inputTensors.size());
422 m_OutputTensorPins.reserve(outputTensors.size());
423
424 for (auto inputTensorPair : inputTensors)
425 {
426 auto inputTensor = inputTensorPair.second;
427
428 std::unique_ptr<ITensorHandle> tensorHandle =
429 std::make_unique<ConstPassthroughCpuTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
430 LayerBindingId layerId = inputTensorPair.first;
431
432 m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
433 }
434
435 for (auto outputTensorPair : outputTensors)
436 {
437 auto outputTensor = outputTensorPair.second;
438
439 std::unique_ptr<ITensorHandle> tensorHandle =
440 std::make_unique<PassthroughCpuTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
441 LayerBindingId layerId = outputTensorPair.first;
442
443 m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
444 }
445 }
446
447 const TensorPin& GetInputTensorPin(LayerBindingId id) const
448 {
449 return GetTensorPin(id, m_InputTensorPins, "input");
450 }
451
452 const TensorPin& GetOutputTensorPin(LayerBindingId id) const
453 {
454 return GetTensorPin(id, m_OutputTensorPins, "output");
455 }
456
457private:
458
459 std::vector<TensorPin> m_InputTensorPins;
460 std::vector<TensorPin> m_OutputTensorPins;
461};
462
463}
464
465Status LoadedNetwork::EnqueueWorkload(const InputTensors& inputTensors,
surmeh013537c2c2018-05-18 16:31:43 +0100466 const OutputTensors& outputTensors)
telsoa014fcda012018-03-09 14:13:49 +0000467{
telsoa014fcda012018-03-09 14:13:49 +0000468 const Graph& graph = m_OptimizedNetwork->GetGraph();
469
telsoa01c577f2c2018-08-31 09:22:23 +0100470 // Walk graph to determine the order of execution.
telsoa014fcda012018-03-09 14:13:49 +0000471 if (graph.GetNumLayers() < 2)
472 {
Derek Lamberti08446972019-11-26 16:38:31 +0000473 ARMNN_LOG(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph";
telsoa014fcda012018-03-09 14:13:49 +0000474 return Status::Failure;
475 }
476
telsoa01c577f2c2018-08-31 09:22:23 +0100477 // Data that must be kept alive for the entire execution of the workload.
telsoa014fcda012018-03-09 14:13:49 +0000478 WorkloadData workloadData(inputTensors, outputTensors);
479
480 if (graph.GetNumInputs() != inputTensors.size())
481 {
482 throw InvalidArgumentException("Number of inputs provided does not match network.");
483 }
484
telsoa01c577f2c2018-08-31 09:22:23 +0100485 // For each input to the network, call EnqueueInput with the data passed by the user.
telsoa014fcda012018-03-09 14:13:49 +0000486 {
Derek Lambertia08d29b2020-06-19 14:33:05 +0100487 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareInputs");
488 m_InputQueue.clear();
489 m_InputQueue.reserve(graph.GetNumInputs());
490 for (const BindableLayer* inputLayer : graph.GetInputLayers())
491 {
492 const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
493 EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
494 }
telsoa014fcda012018-03-09 14:13:49 +0000495 }
496
telsoa01c577f2c2018-08-31 09:22:23 +0100497 // For each output to the network, call EnqueueOutput with the data passed by the user.
telsoa014fcda012018-03-09 14:13:49 +0000498 {
Derek Lambertia08d29b2020-06-19 14:33:05 +0100499 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareOutputs");
500 m_OutputQueue.clear();
501 m_OutputQueue.reserve(graph.GetNumOutputs());
502 for (const BindableLayer* outputLayer : graph.GetOutputLayers())
503 {
504 const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
505 EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
506 }
telsoa014fcda012018-03-09 14:13:49 +0000507 }
508
Sadik Armagan3184c902020-03-18 10:57:30 +0000509 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
510 TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
511 ProfilingGuid inferenceGuid = m_ProfilingService.GetNextGuid();
David Monahan6198fe02019-12-02 08:35:43 +0000512 if (timelineUtils)
513 {
514 // Add inference timeline trace if profiling is enabled.
515 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
516 timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
Jim Flynn6398a982020-05-27 17:05:21 +0100517 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
518 networkGuid,
519 inferenceGuid,
520 LabelsAndEventClasses::EXECUTION_OF_GUID);
David Monahan6198fe02019-12-02 08:35:43 +0000521 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
522 }
523
telsoa014fcda012018-03-09 14:13:49 +0000524 bool executionSucceeded = true;
525
526 {
Sadik Armagan3184c902020-03-18 10:57:30 +0000527 if (m_ProfilingService.IsProfilingEnabled())
Keith Davise394bd92019-12-02 15:12:19 +0000528 {
Sadik Armagan3184c902020-03-18 10:57:30 +0000529 m_ProfilingService.IncrementCounterValue(armnn::profiling::INFERENCES_RUN);
Keith Davise394bd92019-12-02 15:12:19 +0000530 }
telsoa014fcda012018-03-09 14:13:49 +0000531 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Execute");
surmeh013537c2c2018-05-18 16:31:43 +0100532 ARMNN_SCOPED_HEAP_PROFILING("Executing");
David Monahan6198fe02019-12-02 08:35:43 +0000533 executionSucceeded = Execute(timelineUtils, inferenceGuid);
telsoa014fcda012018-03-09 14:13:49 +0000534 }
535
David Monahan6198fe02019-12-02 08:35:43 +0000536 if (timelineUtils)
537 {
538 // Add end of life of the inference timeline if profiling is enabled.
539 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
540 timelineUtils->Commit();
541 }
telsoa014fcda012018-03-09 14:13:49 +0000542 return executionSucceeded ? Status::Success : Status::Failure;
543}
544
surmeh013537c2c2018-05-18 16:31:43 +0100545void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
telsoa014fcda012018-03-09 14:13:49 +0000546{
547 if (layer.GetType() != LayerType::Input)
548 {
549 throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer");
550 }
551
552 if (tensorHandle == nullptr)
553 {
554 throw InvalidArgumentException("EnqueueInput: tensorHandle must not be NULL");
555 }
556
557 InputQueueDescriptor inputQueueDescriptor;
558 WorkloadInfo info;
559
560 inputQueueDescriptor.m_Inputs.push_back(tensorHandle);
561 info.m_InputTensorInfos.push_back(tensorInfo);
562
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100563 ARMNN_ASSERT_MSG(layer.GetNumOutputSlots() == 1, "Can only handle Input Layer with one output");
telsoa014fcda012018-03-09 14:13:49 +0000564 const OutputHandler& handler = layer.GetOutputHandler();
565 const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
566 ITensorHandle* outputTensorHandle = handler.GetData();
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100567 ARMNN_ASSERT_MSG(outputTensorHandle != nullptr,
telsoa014fcda012018-03-09 14:13:49 +0000568 "Data should have been allocated.");
569 inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
570 info.m_OutputTensorInfos.push_back(outputTensorInfo);
571
Derek Lambertif674aa02019-08-01 15:56:25 +0100572 MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000573 bool needMemCopy = true;
Ferran Balaguer83239f92019-09-19 11:49:25 +0100574 if (m_IsImportEnabled) // Try import the input tensor
Derek Lambertif674aa02019-08-01 15:56:25 +0100575 {
Ferran Balaguer83239f92019-09-19 11:49:25 +0100576 if(CheckFlag(importFlags, MemorySource::Malloc) )
Derek Lambertif674aa02019-08-01 15:56:25 +0100577 {
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000578 needMemCopy = false;
Ferran Balaguer83239f92019-09-19 11:49:25 +0100579 // This assumes a CPU Tensor handle
580 void* mem = tensorHandle->Map(false);
581 if (outputTensorHandle->Import(mem, MemorySource::Malloc))
582 {
583 tensorHandle->Unmap();
584 return; // No need for a workload since the import has been done.
585 }
Derek Lambertif674aa02019-08-01 15:56:25 +0100586 tensorHandle->Unmap();
Ferran Balaguer83239f92019-09-19 11:49:25 +0100587 throw MemoryImportException("EnqueueInput: Memory Import failed");
Derek Lambertif674aa02019-08-01 15:56:25 +0100588 }
Derek Lambertif674aa02019-08-01 15:56:25 +0100589 }
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000590 if (needMemCopy)
David Monahan4f1e8e42019-09-04 09:22:10 +0100591 {
592 // Create a mem copy workload for input since we did not import
Narumol Prangnawarataa68e012019-11-29 17:17:43 +0000593 std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
Derek Lambertif674aa02019-08-01 15:56:25 +0100594
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100595 ARMNN_ASSERT_MSG(inputWorkload, "No input workload created");
Narumol Prangnawarataa68e012019-11-29 17:17:43 +0000596
Sadik Armagan3184c902020-03-18 10:57:30 +0000597 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
598 TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
Narumol Prangnawarataa68e012019-11-29 17:17:43 +0000599 if (timelineUtils)
600 {
601 // Add Input Workload to the post-optimisation network structure
602 AddWorkloadStructure(timelineUtils, inputWorkload, layer);
603 timelineUtils->Commit();
604 }
605
David Monahan4f1e8e42019-09-04 09:22:10 +0100606 m_InputQueue.push_back(move(inputWorkload));
607 }
telsoa014fcda012018-03-09 14:13:49 +0000608}
609
surmeh013537c2c2018-05-18 16:31:43 +0100610void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
telsoa014fcda012018-03-09 14:13:49 +0000611{
612 if (layer.GetType() != LayerType::Output)
613 {
614 throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer");
615 }
616
617 if (tensorHandle == nullptr)
618 {
619 throw InvalidArgumentException("EnqueueOutput: tensorHandle must not be NULL");
620 }
621
622 OutputQueueDescriptor outputQueueDescriptor;
623 WorkloadInfo info;
624
625 outputQueueDescriptor.m_Outputs.push_back(tensorHandle);
626 info.m_OutputTensorInfos.push_back(tensorInfo);
627
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100628 ARMNN_ASSERT_MSG(layer.GetNumInputSlots() == 1, "Output Layer should have exactly one input.");
telsoa014fcda012018-03-09 14:13:49 +0000629
telsoa01c577f2c2018-08-31 09:22:23 +0100630 // Gets the output handler from the previous node.
telsoa014fcda012018-03-09 14:13:49 +0000631 const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
632
633 const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
634 ITensorHandle* inputTensorHandle = outputHandler.GetData();
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100635 ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
telsoa014fcda012018-03-09 14:13:49 +0000636
Derek Lambertif674aa02019-08-01 15:56:25 +0100637 // Try import the output tensor.
638 // Note: We can only import the output pointer if all of the following hold true:
639 // a) The imported pointer is aligned sufficiently
640 // b) The tensor has zero padding
641 // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
642 // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
Ferran Balaguer83239f92019-09-19 11:49:25 +0100643 // e) m_IsExportEnabled must be set to true
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000644 bool needMemCopy = true;
Ferran Balaguer83239f92019-09-19 11:49:25 +0100645 if (m_IsExportEnabled && (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
Derek Lambertif674aa02019-08-01 15:56:25 +0100646 {
Ferran Balaguer83239f92019-09-19 11:49:25 +0100647 if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
Derek Lambertif674aa02019-08-01 15:56:25 +0100648 {
Ferran Balaguerbfeb2712019-08-07 15:14:56 +0100649 MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
650 if (CheckFlag(importFlags, MemorySource::Malloc))
Derek Lambertif674aa02019-08-01 15:56:25 +0100651 {
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000652 needMemCopy = false;
Ferran Balaguerbfeb2712019-08-07 15:14:56 +0100653 void *mem = tensorHandle->Map(false);
654 bool importOk = inputTensorHandle->Import(mem, MemorySource::Malloc);
655 tensorHandle->Unmap();
Derek Lambertif674aa02019-08-01 15:56:25 +0100656
Ferran Balaguerbfeb2712019-08-07 15:14:56 +0100657 if (importOk)
658 {
659 // Insert synchronization workload
660 MemSyncQueueDescriptor syncDesc;
661 syncDesc.m_Inputs.push_back(inputTensorHandle);
662 info.m_InputTensorInfos.push_back(inputTensorInfo);
663 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100664 ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created");
Ferran Balaguerbfeb2712019-08-07 15:14:56 +0100665 m_OutputQueue.push_back(move(syncWorkload));
Ferran Balaguerbfeb2712019-08-07 15:14:56 +0100666 }
David Monahan4f1e8e42019-09-04 09:22:10 +0100667 else
668 {
669 throw MemoryExportException("EnqueueOutput: Memory Export failed");
670 }
Derek Lambertif674aa02019-08-01 15:56:25 +0100671 }
672 }
673 }
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000674 if (needMemCopy)
David Monahan4f1e8e42019-09-04 09:22:10 +0100675 {
Sadik Armagan890bf652020-09-29 15:12:36 +0100676 const Layer& connectedLayer = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer();
677 // Do not add MemCopy Layer if OutputLayer is already connected the MemCopy Layer
678 if (connectedLayer.GetType() != LayerType::MemCopy)
Narumol Prangnawarataa68e012019-11-29 17:17:43 +0000679 {
Sadik Armagan890bf652020-09-29 15:12:36 +0100680 // If we got here then we didn't export the memory, so add an output workload which performs a memcopy.
681 outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
682 info.m_InputTensorInfos.push_back(inputTensorInfo);
Narumol Prangnawarataa68e012019-11-29 17:17:43 +0000683
Sadik Armagan890bf652020-09-29 15:12:36 +0100684 std::unique_ptr<IWorkload> outputWorkload =
685 std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
686 ARMNN_ASSERT_MSG(outputWorkload, "No output workload created");
687
688 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
689 TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
690 if (timelineUtils)
691 {
692 // Add Output Workload to the post-optimisation network structure
693 AddWorkloadStructure(timelineUtils, outputWorkload, layer);
694 timelineUtils->Commit();
695 }
696
697 m_OutputQueue.push_back(move(outputWorkload));
698 }
David Monahan4f1e8e42019-09-04 09:22:10 +0100699 }
Derek Lamberti03614f62018-10-02 15:52:46 +0100700}
701
Derek Lambertia08d29b2020-06-19 14:33:05 +0100702void LoadedNetwork::AllocateWorkingMemory(std::lock_guard<std::mutex>& lock)
Derek Lamberti03614f62018-10-02 15:52:46 +0100703{
Derek Lambertia08d29b2020-06-19 14:33:05 +0100704 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Working Memory Allocation");
705
706 // this unused parameter makes sure we can only call this function with a valid lock
707 IgnoreUnused(lock);
708
Derek Lamberti03614f62018-10-02 15:52:46 +0100709 if (m_IsWorkingMemAllocated)
710 {
711 return;
712 }
David Beck29c75de2018-10-23 13:35:58 +0100713 for (auto&& workloadFactory : m_WorkloadFactories)
714 {
Aron Virginas-Tar5caf9072018-11-14 18:35:18 +0000715 IBackendInternal::IMemoryManagerSharedPtr memoryManager = workloadFactory.second.second;
716 if (memoryManager)
717 {
718 memoryManager->Acquire();
719 }
David Beck29c75de2018-10-23 13:35:58 +0100720 }
Narumol Prangnawarat11bd2612019-08-13 10:26:53 +0100721 m_TensorHandleFactoryRegistry.AquireMemory();
Derek Lamberti03614f62018-10-02 15:52:46 +0100722 m_IsWorkingMemAllocated = true;
723}
724
725void LoadedNetwork::FreeWorkingMemory()
726{
Matthew Bentham2a326b52019-03-19 10:11:01 +0000727 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
Derek Lamberti03614f62018-10-02 15:52:46 +0100728 if (!m_IsWorkingMemAllocated)
729 {
730 return;
731 }
732 // Informs the memory managers to release memory in it's respective memory group
David Beck29c75de2018-10-23 13:35:58 +0100733 for (auto&& workloadFactory : m_WorkloadFactories)
734 {
Aron Virginas-Tar5caf9072018-11-14 18:35:18 +0000735 IBackendInternal::IMemoryManagerSharedPtr memoryManager = workloadFactory.second.second;
736 if (memoryManager)
737 {
738 memoryManager->Release();
739 }
David Beck29c75de2018-10-23 13:35:58 +0100740 }
Narumol Prangnawarat11bd2612019-08-13 10:26:53 +0100741 m_TensorHandleFactoryRegistry.ReleaseMemory();
Derek Lamberti03614f62018-10-02 15:52:46 +0100742 m_IsWorkingMemAllocated = false;
telsoa014fcda012018-03-09 14:13:49 +0000743}
744
David Monahan6198fe02019-12-02 08:35:43 +0000745bool LoadedNetwork::Execute(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
746 profiling::ProfilingGuid inferenceGuid)
telsoa014fcda012018-03-09 14:13:49 +0000747{
748 bool success = true;
749
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +0100750 auto Fail = [&](const std::exception& error)
751 {
Derek Lamberti08446972019-11-26 16:38:31 +0000752 ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +0100753 success = false;
754 };
755
telsoa014fcda012018-03-09 14:13:49 +0000756 try
757 {
Matthew Bentham2a326b52019-03-19 10:11:01 +0000758 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
Derek Lambertia08d29b2020-06-19 14:33:05 +0100759 AllocateWorkingMemory(lockGuard);
Derek Lamberti03614f62018-10-02 15:52:46 +0100760
David Monahan6198fe02019-12-02 08:35:43 +0000761 ProfilingDynamicGuid workloadInferenceID(0);
Derek Lambertia08d29b2020-06-19 14:33:05 +0100762 auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](WorkloadQueue& queue)
telsoa014fcda012018-03-09 14:13:49 +0000763 {
Derek Lambertia08d29b2020-06-19 14:33:05 +0100764 for (auto& workload : queue)
David Monahan6198fe02019-12-02 08:35:43 +0000765 {
Derek Lambertia08d29b2020-06-19 14:33:05 +0100766 if(timelineUtils)
767 {
768 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
769 inferenceGuid);
770 }
771 workload->Execute();
772 if(timelineUtils)
773 {
774 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
775 }
David Monahan6198fe02019-12-02 08:35:43 +0000776 }
Derek Lambertia08d29b2020-06-19 14:33:05 +0100777 };
Derek Lamberti03614f62018-10-02 15:52:46 +0100778
Derek Lambertia08d29b2020-06-19 14:33:05 +0100779 ExecuteQueue(m_InputQueue);
780 ExecuteQueue(m_WorkloadQueue);
781 ExecuteQueue(m_OutputQueue);
telsoa014fcda012018-03-09 14:13:49 +0000782 }
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +0100783 catch (const RuntimeException& error)
telsoa014fcda012018-03-09 14:13:49 +0000784 {
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +0100785 Fail(error);
telsoa014fcda012018-03-09 14:13:49 +0000786 }
telsoa014fcda012018-03-09 14:13:49 +0000787 catch (const std::runtime_error& error)
788 {
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +0100789 Fail(error);
telsoa014fcda012018-03-09 14:13:49 +0000790 }
791
telsoa014fcda012018-03-09 14:13:49 +0000792 return success;
793}
794
Nattapat Chaimanowong6e948202019-03-22 14:01:46 +0000795void LoadedNetwork::RegisterDebugCallback(const DebugCallbackFunction& func)
796{
797 for (auto&& workloadPtr: m_WorkloadQueue)
798 {
799 workloadPtr.get()->RegisterDebugCallback(func);
800 }
801}
802
telsoa014fcda012018-03-09 14:13:49 +0000803}