blob: 0dd75aa6493a73a94ef6acc096966404ce4af5c4 [file] [log] [blame]
Laurent Carlier749294b2020-06-01 09:03:17 +01001//
Jim Flynn6398a982020-05-27 17:05:21 +01002// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
David Beckecb56cd2018-09-05 12:52:57 +01003// SPDX-License-Identifier: MIT
telsoa014fcda012018-03-09 14:13:49 +00004//
5
6#include "LoadedNetwork.hpp"
7#include "Layer.hpp"
telsoa014fcda012018-03-09 14:13:49 +00008#include "Graph.hpp"
9#include "Network.hpp"
10#include "Runtime.hpp"
11#include "Profiling.hpp"
surmeh013537c2c2018-05-18 16:31:43 +010012#include "HeapProfiling.hpp"
telsoa014fcda012018-03-09 14:13:49 +000013
Matteo Martincighc601aa62019-10-29 15:03:22 +000014#include <armnn/BackendRegistry.hpp>
Matthew Benthamf48afc62020-01-15 17:55:08 +000015#include <armnn/Logging.hpp>
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +010016#include <armnn/utility/Assert.hpp>
Matteo Martincighc601aa62019-10-29 15:03:22 +000017
Aron Virginas-Tarc9cc8042018-11-01 16:15:57 +000018#include <backendsCommon/CpuTensorHandle.hpp>
Matteo Martincighe5b8eb92019-11-28 15:45:42 +000019#include <armnn/backends/IMemoryManager.hpp>
Derek Lambertif674aa02019-08-01 15:56:25 +010020#include <backendsCommon/MemCopyWorkload.hpp>
21#include <backendsCommon/MemSyncWorkload.hpp>
Matteo Martincighe5b8eb92019-11-28 15:45:42 +000022
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +000023#include <LabelsAndEventClasses.hpp>
telsoa014fcda012018-03-09 14:13:49 +000024
telsoa014fcda012018-03-09 14:13:49 +000025#include <boost/format.hpp>
telsoa014fcda012018-03-09 14:13:49 +000026
27namespace armnn
28{
29
30using namespace std;
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +000031using namespace armnn::profiling;
telsoa014fcda012018-03-09 14:13:49 +000032
telsoa01c577f2c2018-08-31 09:22:23 +010033namespace
34{
35
36template <typename ExceptionType>
37std::string ToErrorMessage(const char * prefix, const ExceptionType & error)
38{
39 std::stringstream ss;
40 ss << prefix << " " << error.what();
41 return ss.str();
42}
43
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +000044void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
45 const Layer& layer,
46 ProfilingGuid networkGuid)
47{
48 // Add layer to the post-optimisation network structure
49 std::string layerName = layer.GetNameStr().empty() ? "<Unnamed>" : layer.GetNameStr();
50 timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
51 networkGuid,
52 layerName,
53 LabelsAndEventClasses::LAYER_GUID);
54 for (auto&& input : layer.GetInputSlots())
55 {
56 const IOutputSlot* source = input.GetConnectedOutputSlot();
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +010057 ARMNN_ASSERT(source != NULL);
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +000058 timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
59 source->GetOwningLayerGuid(),
60 layer.GetGuid());
61 }
62}
63
64void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
65 std::unique_ptr<IWorkload>& workload,
66 const Layer& layer)
67{
68 // Add workload to the post-optimisation network structure
69 timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
70 timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
71 layer.GetBackendId().Get(),
72 LabelsAndEventClasses::BACKENDID_GUID);
73
74 // Link the workload to the layer
75 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
76 layer.GetGuid(),
Jim Flynn6398a982020-05-27 17:05:21 +010077 workload->GetGuid(),
78 LabelsAndEventClasses::CHILD_GUID);
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +000079}
80
telsoa01c577f2c2018-08-31 09:22:23 +010081} // anonymous
82
telsoa014fcda012018-03-09 14:13:49 +000083std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
David Monahan4f1e8e42019-09-04 09:22:10 +010084 std::string& errorMessage,
Sadik Armagan3184c902020-03-18 10:57:30 +000085 const INetworkProperties& networkProperties,
86 profiling::ProfilingService& profilingService)
telsoa014fcda012018-03-09 14:13:49 +000087{
88 std::unique_ptr<LoadedNetwork> loadedNetwork;
89
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +010090 auto Fail = [&](const std::exception& error) -> std::unique_ptr<LoadedNetwork>
91 {
92 errorMessage = ToErrorMessage("An error occurred when preparing the network workloads: ", error);
Derek Lamberti08446972019-11-26 16:38:31 +000093 ARMNN_LOG(error) << errorMessage;
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +010094
95 return std::unique_ptr<LoadedNetwork>();
96 };
97
telsoa014fcda012018-03-09 14:13:49 +000098 try
99 {
Sadik Armagan3184c902020-03-18 10:57:30 +0000100 loadedNetwork.reset(new LoadedNetwork(std::move(net), networkProperties, profilingService));
telsoa014fcda012018-03-09 14:13:49 +0000101 }
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +0100102 catch (const armnn::RuntimeException& error)
telsoa014fcda012018-03-09 14:13:49 +0000103 {
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +0100104 return Fail(error);
telsoa014fcda012018-03-09 14:13:49 +0000105 }
106 catch (const armnn::Exception& error)
107 {
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +0100108 return Fail(error);
telsoa014fcda012018-03-09 14:13:49 +0000109 }
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +0100110 catch (const std::runtime_error& error)
telsoa014fcda012018-03-09 14:13:49 +0000111 {
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +0100112 return Fail(error);
telsoa014fcda012018-03-09 14:13:49 +0000113 }
telsoa014fcda012018-03-09 14:13:49 +0000114
115 return loadedNetwork;
116}
117
David Monahan4f1e8e42019-09-04 09:22:10 +0100118LoadedNetwork::LoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
Sadik Armagan3184c902020-03-18 10:57:30 +0000119 const INetworkProperties& networkProperties,
120 profiling::ProfilingService& profilingService) :
David Monahan4f1e8e42019-09-04 09:22:10 +0100121 m_OptimizedNetwork(std::move(net)),
122 m_IsImportEnabled(networkProperties.m_ImportEnabled),
Sadik Armagan3184c902020-03-18 10:57:30 +0000123 m_IsExportEnabled(networkProperties.m_ExportEnabled),
Narumol Prangnawarat549cb7a2020-07-10 17:50:53 +0100124 m_TensorHandleFactoryRegistry(),
Sadik Armagan3184c902020-03-18 10:57:30 +0000125 m_ProfilingService(profilingService)
telsoa014fcda012018-03-09 14:13:49 +0000126{
telsoa01c577f2c2018-08-31 09:22:23 +0100127 // Create a profiler and register it for the current thread.
128 m_Profiler = std::make_shared<Profiler>();
129 ProfilerManager::GetInstance().RegisterProfiler(m_Profiler.get());
130
telsoa014fcda012018-03-09 14:13:49 +0000131 Graph& order = m_OptimizedNetwork->GetGraph().TopologicalSort();
David Beck29c75de2018-10-23 13:35:58 +0100132 //First create tensor handlers, backends and workload factories.
telsoa01c577f2c2018-08-31 09:22:23 +0100133 //Handlers are created before workloads are.
134 //Because workload creation can modify some of the handlers,
Jim Flynne242f2d2019-05-22 14:24:13 +0100135 //(for example the splitter and concat layers).
telsoa014fcda012018-03-09 14:13:49 +0000136 for (auto&& layer : order)
137 {
Derek Lamberti84da38b2019-06-13 11:40:08 +0100138 auto const& backendId = layer->GetBackendId();
139 if (m_Backends.count(backendId) == 0)
David Beck29c75de2018-10-23 13:35:58 +0100140 {
Derek Lamberti84da38b2019-06-13 11:40:08 +0100141 auto createBackend = BackendRegistryInstance().GetFactory(backendId);
142 auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
Aron Virginas-Tar56055192018-11-12 18:10:43 +0000143
Derek Lamberti84da38b2019-06-13 11:40:08 +0100144 IBackendInternal* backend = it.first->second.get();
Aron Virginas-Tar56055192018-11-12 18:10:43 +0000145
Derek Lamberti84da38b2019-06-13 11:40:08 +0100146 if (backend->SupportsTensorAllocatorAPI())
147 {
Narumol Prangnawarat11bd2612019-08-13 10:26:53 +0100148 auto workloadFactory = backend->CreateWorkloadFactory(m_TensorHandleFactoryRegistry);
Derek Lamberti84da38b2019-06-13 11:40:08 +0100149 m_WorkloadFactories.emplace(
150 std::make_pair(backendId, std::make_pair(std::move(workloadFactory), nullptr)));
151 }
152 else
153 {
154 IBackendInternal::IMemoryManagerSharedPtr memoryManager = backend->CreateMemoryManager();
155 auto workloadFactory = backend->CreateWorkloadFactory(memoryManager);
156
157 m_WorkloadFactories.emplace(
158 std::make_pair(backendId, std::make_pair(std::move(workloadFactory), memoryManager)));
159 }
David Beck29c75de2018-10-23 13:35:58 +0100160 }
Derek Lamberti84da38b2019-06-13 11:40:08 +0100161 }
162
163 for (auto&& layer : order)
164 {
David Monahan3fb7e102019-08-20 11:25:29 +0100165 auto& workloadFactory = GetWorkloadFactory(*layer);
166
167 switch (layer->GetType())
168 {
169 case LayerType::Input:
170 {
171 // If IsImportEnabled is true then we need to set IsMemoryManaged to false when creating TensorHandles
172 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, !m_IsImportEnabled);
173 break;
174 }
175 default:
176 {
Ferran Balaguer83239f92019-09-19 11:49:25 +0100177 // Look for the layer with 1 OutputSlot which has 1 connection and that connection is an Output Layer
178 // If Export is enabled disable memory management so we can export, otherwise we do a copy
179 if((layer->GetNumOutputSlots() == 1) &&
180 (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
181 (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() == LayerType::Output))
182 {
183 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, !m_IsExportEnabled);
184 }
185 else
186 {
187 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
188 }
David Monahan3fb7e102019-08-20 11:25:29 +0100189 }
190 }
telsoa014fcda012018-03-09 14:13:49 +0000191 }
192
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +0000193 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
Sadik Armagan3184c902020-03-18 10:57:30 +0000194 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
195 TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +0000196 if (timelineUtils)
197 {
198 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
199 }
200
telsoa01c577f2c2018-08-31 09:22:23 +0100201 //Then create workloads.
telsoa014fcda012018-03-09 14:13:49 +0000202 for (auto&& layer : order)
203 {
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +0000204 if (timelineUtils)
205 {
206 // Add layer to the post-optimisation network structure
207 AddLayerStructure(timelineUtils, *layer, networkGuid);
208 }
209
surmeh013537c2c2018-05-18 16:31:43 +0100210 const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
telsoa014fcda012018-03-09 14:13:49 +0000211
212 switch (layer->GetType())
213 {
214 case LayerType::Input:
215 case LayerType::Output:
216 {
telsoa01c577f2c2018-08-31 09:22:23 +0100217 // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
telsoa014fcda012018-03-09 14:13:49 +0000218 break;
219 }
220 default:
221 {
Derek Lamberti94a88d22019-12-10 21:12:59 +0000222 auto workload = layer->CreateWorkload(workloadFactory);
telsoa014fcda012018-03-09 14:13:49 +0000223
224 if (!workload)
225 {
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +0000226 const char* const layerName =
227 layer->GetNameStr().length() != 0 ? layer->GetName() : "<Unnamed>";
telsoa014fcda012018-03-09 14:13:49 +0000228 throw InvalidArgumentException(boost::str(
229 boost::format("No workload created for layer (name: '%1%' type: '%2%') (compute '%3%')")
David Beck33f0ae02018-10-18 15:13:56 +0100230 % layerName % static_cast<int>(layer->GetType()) % layer->GetBackendId().Get()
telsoa014fcda012018-03-09 14:13:49 +0000231 ));
232 }
233
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +0000234 if (timelineUtils)
235 {
236 // Add workload to the post-optimisation network structure
237 AddWorkloadStructure(timelineUtils, workload, *layer);
238 }
239
telsoa014fcda012018-03-09 14:13:49 +0000240 m_WorkloadQueue.push_back(move(workload));
telsoa01c577f2c2018-08-31 09:22:23 +0100241 // release the constant data in the layer..
242 layer->ReleaseConstantData();
telsoa014fcda012018-03-09 14:13:49 +0000243 break;
244 }
245 }
246 }
247
Narumol Prangnawaratdf31cfe2019-11-22 11:26:06 +0000248 if (timelineUtils)
249 {
250 // Commit to send the post-optimisation network structure
251 timelineUtils->Commit();
252 }
253
telsoa01c577f2c2018-08-31 09:22:23 +0100254 // Set up memory.
telsoa014fcda012018-03-09 14:13:49 +0000255 m_OptimizedNetwork->GetGraph().AllocateDynamicBuffers();
Derek Lambertif30f7d32019-04-09 10:25:02 +0100256
257 // Now that the intermediate tensor memory has been set-up, do any post allocation configuration for each workload.
258 for (auto& workload : m_WorkloadQueue)
259 {
260 workload->PostAllocationConfigure();
261 }
telsoa014fcda012018-03-09 14:13:49 +0000262}
263
Keith Davis33ed2212020-03-30 10:43:41 +0100264void LoadedNetwork::SendNetworkStructure()
265{
266 Graph& order = m_OptimizedNetwork->GetGraph().TopologicalSort();
267 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
268
269 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
270 TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
271
272 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
273
274 for (auto&& layer : order)
275 {
276 // Add layer to the post-optimisation network structure
277 AddLayerStructure(timelineUtils, *layer, networkGuid);
278 switch (layer->GetType())
279 {
280 case LayerType::Input:
281 case LayerType::Output:
282 {
283 // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
284 break;
285 }
286 default:
287 {
288 for (auto& workload : m_WorkloadQueue)
289 {
290 // Add workload to the post-optimisation network structure
291 AddWorkloadStructure(timelineUtils, workload, *layer);
292 }
293 break;
294 }
295 }
296 }
297 // Commit to send the post-optimisation network structure
298 timelineUtils->Commit();
299}
300
telsoa014fcda012018-03-09 14:13:49 +0000301TensorInfo LoadedNetwork::GetInputTensorInfo(LayerBindingId layerId) const
302{
303 for (auto&& inputLayer : m_OptimizedNetwork->GetGraph().GetInputLayers())
304 {
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100305 ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot");
telsoa014fcda012018-03-09 14:13:49 +0000306 if (inputLayer->GetBindingId() == layerId)
307 {
308 return inputLayer->GetOutputSlot(0).GetTensorInfo();
309 }
310 }
311
312 throw InvalidArgumentException(boost::str(boost::format("No input layer is associated with id %1%") % layerId));
313}
314
315TensorInfo LoadedNetwork::GetOutputTensorInfo(LayerBindingId layerId) const
316{
317 for (auto&& outputLayer : m_OptimizedNetwork->GetGraph().GetOutputLayers())
318 {
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100319 ARMNN_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1, "Output layer should have exactly 1 input slot");
320 ARMNN_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(), "Input slot on Output layer must be connected");
telsoa014fcda012018-03-09 14:13:49 +0000321 if (outputLayer->GetBindingId() == layerId)
322 {
323 return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo();
324 }
325 }
326
327 throw InvalidArgumentException(boost::str(boost::format("No output layer is associated with id %1%") % layerId));
328}
329
surmeh013537c2c2018-05-18 16:31:43 +0100330const IWorkloadFactory& LoadedNetwork::GetWorkloadFactory(const Layer& layer) const
telsoa014fcda012018-03-09 14:13:49 +0000331{
surmeh013537c2c2018-05-18 16:31:43 +0100332 const IWorkloadFactory* workloadFactory = nullptr;
telsoa014fcda012018-03-09 14:13:49 +0000333
David Beck29c75de2018-10-23 13:35:58 +0100334 auto it = m_WorkloadFactories.find(layer.GetBackendId());
335 if (it == m_WorkloadFactories.end())
telsoa014fcda012018-03-09 14:13:49 +0000336 {
David Beck29c75de2018-10-23 13:35:58 +0100337 throw RuntimeException(
338 boost::str(
339 boost::format("No workload factory for %1% to be used for layer: %2%")
340 % layer.GetBackendId().Get()
341 % layer.GetNameStr()),
342 CHECK_LOCATION());
David Beck33f0ae02018-10-18 15:13:56 +0100343 }
David Beck29c75de2018-10-23 13:35:58 +0100344
Aron Virginas-Tar5caf9072018-11-14 18:35:18 +0000345 workloadFactory = it->second.first.get();
telsoa014fcda012018-03-09 14:13:49 +0000346
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100347 ARMNN_ASSERT_MSG(workloadFactory, "No workload factory");
telsoa014fcda012018-03-09 14:13:49 +0000348
349 std::string reasonIfUnsupported;
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100350 ARMNN_ASSERT_MSG(IWorkloadFactory::IsLayerSupported(layer, {}, reasonIfUnsupported),
David Beck29c75de2018-10-23 13:35:58 +0100351 "Factory does not support layer");
Jan Eilers8eb25602020-03-09 12:13:48 +0000352 IgnoreUnused(reasonIfUnsupported);
surmeh013537c2c2018-05-18 16:31:43 +0100353 return *workloadFactory;
telsoa014fcda012018-03-09 14:13:49 +0000354}
355
356namespace {
357
358// Non-copyable class owning accelerator-specific tensor data.
359class TensorPin
360{
361public:
362 TensorPin(std::unique_ptr<ITensorHandle> handle, const TensorInfo& info, LayerBindingId id)
363 : m_TensorHandle(std::move(handle))
364 , m_TensorInfo(info)
365 , m_Id(id)
366 {
367 }
368
369 ITensorHandle* GetTensorHandle() const { return m_TensorHandle.get(); }
370 const TensorInfo& GetTensorInfo() const { return m_TensorInfo; }
371 LayerBindingId GetBindingId() const { return m_Id; }
372
373private:
374 std::unique_ptr<ITensorHandle> m_TensorHandle;
375 TensorInfo m_TensorInfo;
376 LayerBindingId m_Id;
377};
378
379static const TensorPin& GetTensorPin(LayerBindingId id,
380 const std::vector<TensorPin>& pins,
381 char const* bindingPointDesc)
382{
383 auto it = std::find_if(pins.begin(), pins.end(),
384 [id](const TensorPin& pin)
385 {
386 return pin.GetBindingId() == id;
387 });
388
389 if (it != pins.end())
390 {
391 return *it;
392 }
393 else
394 {
395 throw InvalidArgumentException(boost::str(
396 boost::format("No tensor supplied for %1% %2%") % bindingPointDesc % id));
397 }
398}
399
400// Stores data that needs to be kept accessible for the entire execution of a workload.
401class WorkloadData
402{
403public:
404 WorkloadData(const InputTensors& inputTensors, const OutputTensors& outputTensors)
405 {
406 m_InputTensorPins.reserve(inputTensors.size());
407 m_OutputTensorPins.reserve(outputTensors.size());
408
409 for (auto inputTensorPair : inputTensors)
410 {
411 auto inputTensor = inputTensorPair.second;
412
413 std::unique_ptr<ITensorHandle> tensorHandle =
414 std::make_unique<ConstPassthroughCpuTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
415 LayerBindingId layerId = inputTensorPair.first;
416
417 m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
418 }
419
420 for (auto outputTensorPair : outputTensors)
421 {
422 auto outputTensor = outputTensorPair.second;
423
424 std::unique_ptr<ITensorHandle> tensorHandle =
425 std::make_unique<PassthroughCpuTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
426 LayerBindingId layerId = outputTensorPair.first;
427
428 m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
429 }
430 }
431
432 const TensorPin& GetInputTensorPin(LayerBindingId id) const
433 {
434 return GetTensorPin(id, m_InputTensorPins, "input");
435 }
436
437 const TensorPin& GetOutputTensorPin(LayerBindingId id) const
438 {
439 return GetTensorPin(id, m_OutputTensorPins, "output");
440 }
441
442private:
443
444 std::vector<TensorPin> m_InputTensorPins;
445 std::vector<TensorPin> m_OutputTensorPins;
446};
447
448}
449
450Status LoadedNetwork::EnqueueWorkload(const InputTensors& inputTensors,
surmeh013537c2c2018-05-18 16:31:43 +0100451 const OutputTensors& outputTensors)
telsoa014fcda012018-03-09 14:13:49 +0000452{
telsoa014fcda012018-03-09 14:13:49 +0000453 const Graph& graph = m_OptimizedNetwork->GetGraph();
454
telsoa01c577f2c2018-08-31 09:22:23 +0100455 // Walk graph to determine the order of execution.
telsoa014fcda012018-03-09 14:13:49 +0000456 if (graph.GetNumLayers() < 2)
457 {
Derek Lamberti08446972019-11-26 16:38:31 +0000458 ARMNN_LOG(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph";
telsoa014fcda012018-03-09 14:13:49 +0000459 return Status::Failure;
460 }
461
telsoa01c577f2c2018-08-31 09:22:23 +0100462 // Data that must be kept alive for the entire execution of the workload.
telsoa014fcda012018-03-09 14:13:49 +0000463 WorkloadData workloadData(inputTensors, outputTensors);
464
465 if (graph.GetNumInputs() != inputTensors.size())
466 {
467 throw InvalidArgumentException("Number of inputs provided does not match network.");
468 }
469
telsoa01c577f2c2018-08-31 09:22:23 +0100470 // For each input to the network, call EnqueueInput with the data passed by the user.
telsoa014fcda012018-03-09 14:13:49 +0000471 {
Derek Lambertia08d29b2020-06-19 14:33:05 +0100472 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareInputs");
473 m_InputQueue.clear();
474 m_InputQueue.reserve(graph.GetNumInputs());
475 for (const BindableLayer* inputLayer : graph.GetInputLayers())
476 {
477 const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
478 EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
479 }
telsoa014fcda012018-03-09 14:13:49 +0000480 }
481
telsoa01c577f2c2018-08-31 09:22:23 +0100482 // For each output to the network, call EnqueueOutput with the data passed by the user.
telsoa014fcda012018-03-09 14:13:49 +0000483 {
Derek Lambertia08d29b2020-06-19 14:33:05 +0100484 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareOutputs");
485 m_OutputQueue.clear();
486 m_OutputQueue.reserve(graph.GetNumOutputs());
487 for (const BindableLayer* outputLayer : graph.GetOutputLayers())
488 {
489 const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
490 EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
491 }
telsoa014fcda012018-03-09 14:13:49 +0000492 }
493
Sadik Armagan3184c902020-03-18 10:57:30 +0000494 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
495 TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
496 ProfilingGuid inferenceGuid = m_ProfilingService.GetNextGuid();
David Monahan6198fe02019-12-02 08:35:43 +0000497 if (timelineUtils)
498 {
499 // Add inference timeline trace if profiling is enabled.
500 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
501 timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
Jim Flynn6398a982020-05-27 17:05:21 +0100502 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
503 networkGuid,
504 inferenceGuid,
505 LabelsAndEventClasses::EXECUTION_OF_GUID);
David Monahan6198fe02019-12-02 08:35:43 +0000506 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
507 }
508
telsoa014fcda012018-03-09 14:13:49 +0000509 bool executionSucceeded = true;
510
511 {
Sadik Armagan3184c902020-03-18 10:57:30 +0000512 if (m_ProfilingService.IsProfilingEnabled())
Keith Davise394bd92019-12-02 15:12:19 +0000513 {
Sadik Armagan3184c902020-03-18 10:57:30 +0000514 m_ProfilingService.IncrementCounterValue(armnn::profiling::INFERENCES_RUN);
Keith Davise394bd92019-12-02 15:12:19 +0000515 }
telsoa014fcda012018-03-09 14:13:49 +0000516 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Execute");
surmeh013537c2c2018-05-18 16:31:43 +0100517 ARMNN_SCOPED_HEAP_PROFILING("Executing");
David Monahan6198fe02019-12-02 08:35:43 +0000518 executionSucceeded = Execute(timelineUtils, inferenceGuid);
telsoa014fcda012018-03-09 14:13:49 +0000519 }
520
David Monahan6198fe02019-12-02 08:35:43 +0000521 if (timelineUtils)
522 {
523 // Add end of life of the inference timeline if profiling is enabled.
524 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
525 timelineUtils->Commit();
526 }
telsoa014fcda012018-03-09 14:13:49 +0000527 return executionSucceeded ? Status::Success : Status::Failure;
528}
529
surmeh013537c2c2018-05-18 16:31:43 +0100530void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
telsoa014fcda012018-03-09 14:13:49 +0000531{
532 if (layer.GetType() != LayerType::Input)
533 {
534 throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer");
535 }
536
537 if (tensorHandle == nullptr)
538 {
539 throw InvalidArgumentException("EnqueueInput: tensorHandle must not be NULL");
540 }
541
542 InputQueueDescriptor inputQueueDescriptor;
543 WorkloadInfo info;
544
545 inputQueueDescriptor.m_Inputs.push_back(tensorHandle);
546 info.m_InputTensorInfos.push_back(tensorInfo);
547
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100548 ARMNN_ASSERT_MSG(layer.GetNumOutputSlots() == 1, "Can only handle Input Layer with one output");
telsoa014fcda012018-03-09 14:13:49 +0000549 const OutputHandler& handler = layer.GetOutputHandler();
550 const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
551 ITensorHandle* outputTensorHandle = handler.GetData();
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100552 ARMNN_ASSERT_MSG(outputTensorHandle != nullptr,
telsoa014fcda012018-03-09 14:13:49 +0000553 "Data should have been allocated.");
554 inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
555 info.m_OutputTensorInfos.push_back(outputTensorInfo);
556
Derek Lambertif674aa02019-08-01 15:56:25 +0100557 MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
Ferran Balaguer83239f92019-09-19 11:49:25 +0100558 if (m_IsImportEnabled) // Try import the input tensor
Derek Lambertif674aa02019-08-01 15:56:25 +0100559 {
Ferran Balaguer83239f92019-09-19 11:49:25 +0100560 if(CheckFlag(importFlags, MemorySource::Malloc) )
Derek Lambertif674aa02019-08-01 15:56:25 +0100561 {
Ferran Balaguer83239f92019-09-19 11:49:25 +0100562 // This assumes a CPU Tensor handle
563 void* mem = tensorHandle->Map(false);
564 if (outputTensorHandle->Import(mem, MemorySource::Malloc))
565 {
566 tensorHandle->Unmap();
567 return; // No need for a workload since the import has been done.
568 }
Derek Lambertif674aa02019-08-01 15:56:25 +0100569 tensorHandle->Unmap();
Ferran Balaguer83239f92019-09-19 11:49:25 +0100570 throw MemoryImportException("EnqueueInput: Memory Import failed");
Derek Lambertif674aa02019-08-01 15:56:25 +0100571 }
Ferran Balaguer83239f92019-09-19 11:49:25 +0100572 else
573 {
574 throw MemoryImportException("EnqueueInput: Memory Import failed, backend does not support Import");
575 }
Derek Lambertif674aa02019-08-01 15:56:25 +0100576 }
David Monahan4f1e8e42019-09-04 09:22:10 +0100577 else
578 {
579 // Create a mem copy workload for input since we did not import
Narumol Prangnawarataa68e012019-11-29 17:17:43 +0000580 std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
Derek Lambertif674aa02019-08-01 15:56:25 +0100581
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100582 ARMNN_ASSERT_MSG(inputWorkload, "No input workload created");
Narumol Prangnawarataa68e012019-11-29 17:17:43 +0000583
Sadik Armagan3184c902020-03-18 10:57:30 +0000584 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
585 TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
Narumol Prangnawarataa68e012019-11-29 17:17:43 +0000586 if (timelineUtils)
587 {
588 // Add Input Workload to the post-optimisation network structure
589 AddWorkloadStructure(timelineUtils, inputWorkload, layer);
590 timelineUtils->Commit();
591 }
592
David Monahan4f1e8e42019-09-04 09:22:10 +0100593 m_InputQueue.push_back(move(inputWorkload));
594 }
telsoa014fcda012018-03-09 14:13:49 +0000595}
596
surmeh013537c2c2018-05-18 16:31:43 +0100597void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
telsoa014fcda012018-03-09 14:13:49 +0000598{
599 if (layer.GetType() != LayerType::Output)
600 {
601 throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer");
602 }
603
604 if (tensorHandle == nullptr)
605 {
606 throw InvalidArgumentException("EnqueueOutput: tensorHandle must not be NULL");
607 }
608
609 OutputQueueDescriptor outputQueueDescriptor;
610 WorkloadInfo info;
611
612 outputQueueDescriptor.m_Outputs.push_back(tensorHandle);
613 info.m_OutputTensorInfos.push_back(tensorInfo);
614
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100615 ARMNN_ASSERT_MSG(layer.GetNumInputSlots() == 1, "Output Layer should have exactly one input.");
telsoa014fcda012018-03-09 14:13:49 +0000616
telsoa01c577f2c2018-08-31 09:22:23 +0100617 // Gets the output handler from the previous node.
telsoa014fcda012018-03-09 14:13:49 +0000618 const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
619
620 const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
621 ITensorHandle* inputTensorHandle = outputHandler.GetData();
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100622 ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
telsoa014fcda012018-03-09 14:13:49 +0000623
Derek Lambertif674aa02019-08-01 15:56:25 +0100624 // Try import the output tensor.
625 // Note: We can only import the output pointer if all of the following hold true:
626 // a) The imported pointer is aligned sufficiently
627 // b) The tensor has zero padding
628 // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
629 // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
Ferran Balaguer83239f92019-09-19 11:49:25 +0100630 // e) m_IsExportEnabled must be set to true
631 if (m_IsExportEnabled && (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
Derek Lambertif674aa02019-08-01 15:56:25 +0100632 {
Ferran Balaguer83239f92019-09-19 11:49:25 +0100633 if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
Derek Lambertif674aa02019-08-01 15:56:25 +0100634 {
Ferran Balaguerbfeb2712019-08-07 15:14:56 +0100635 MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
636 if (CheckFlag(importFlags, MemorySource::Malloc))
Derek Lambertif674aa02019-08-01 15:56:25 +0100637 {
Ferran Balaguerbfeb2712019-08-07 15:14:56 +0100638 void *mem = tensorHandle->Map(false);
639 bool importOk = inputTensorHandle->Import(mem, MemorySource::Malloc);
640 tensorHandle->Unmap();
Derek Lambertif674aa02019-08-01 15:56:25 +0100641
Ferran Balaguerbfeb2712019-08-07 15:14:56 +0100642 if (importOk)
643 {
644 // Insert synchronization workload
645 MemSyncQueueDescriptor syncDesc;
646 syncDesc.m_Inputs.push_back(inputTensorHandle);
647 info.m_InputTensorInfos.push_back(inputTensorInfo);
648 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100649 ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created");
Ferran Balaguerbfeb2712019-08-07 15:14:56 +0100650 m_OutputQueue.push_back(move(syncWorkload));
Ferran Balaguerbfeb2712019-08-07 15:14:56 +0100651 }
David Monahan4f1e8e42019-09-04 09:22:10 +0100652 else
653 {
654 throw MemoryExportException("EnqueueOutput: Memory Export failed");
655 }
Derek Lambertif674aa02019-08-01 15:56:25 +0100656 }
Ferran Balaguer83239f92019-09-19 11:49:25 +0100657 else
658 {
659 throw MemoryExportException("EnqueueOutput: Memory Export failed, backend does not support Export");
660 }
661 }
662 else
663 {
664 throw MemoryExportException("EnqueueOutput: Memory Export failed, attempting to export Input Layer");
Derek Lambertif674aa02019-08-01 15:56:25 +0100665 }
666 }
David Monahan4f1e8e42019-09-04 09:22:10 +0100667 else
668 {
Ferran Balaguer83239f92019-09-19 11:49:25 +0100669 // If we got here then we didn't export the memory, so add an output workload which performs a memcopy.
David Monahan4f1e8e42019-09-04 09:22:10 +0100670 outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
671 info.m_InputTensorInfos.push_back(inputTensorInfo);
Derek Lambertif674aa02019-08-01 15:56:25 +0100672
Narumol Prangnawarataa68e012019-11-29 17:17:43 +0000673 std::unique_ptr<IWorkload> outputWorkload =
674 std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +0100675 ARMNN_ASSERT_MSG(outputWorkload, "No output workload created");
Narumol Prangnawarataa68e012019-11-29 17:17:43 +0000676
Sadik Armagan3184c902020-03-18 10:57:30 +0000677 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
678 TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
Narumol Prangnawarataa68e012019-11-29 17:17:43 +0000679 if (timelineUtils)
680 {
681 // Add Output Workload to the post-optimisation network structure
682 AddWorkloadStructure(timelineUtils, outputWorkload, layer);
683 timelineUtils->Commit();
684 }
685
David Monahan4f1e8e42019-09-04 09:22:10 +0100686 m_OutputQueue.push_back(move(outputWorkload));
687 }
Derek Lamberti03614f62018-10-02 15:52:46 +0100688}
689
Derek Lambertia08d29b2020-06-19 14:33:05 +0100690void LoadedNetwork::AllocateWorkingMemory(std::lock_guard<std::mutex>& lock)
Derek Lamberti03614f62018-10-02 15:52:46 +0100691{
Derek Lambertia08d29b2020-06-19 14:33:05 +0100692 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Working Memory Allocation");
693
694 // this unused parameter makes sure we can only call this function with a valid lock
695 IgnoreUnused(lock);
696
Derek Lamberti03614f62018-10-02 15:52:46 +0100697 if (m_IsWorkingMemAllocated)
698 {
699 return;
700 }
David Beck29c75de2018-10-23 13:35:58 +0100701 for (auto&& workloadFactory : m_WorkloadFactories)
702 {
Aron Virginas-Tar5caf9072018-11-14 18:35:18 +0000703 IBackendInternal::IMemoryManagerSharedPtr memoryManager = workloadFactory.second.second;
704 if (memoryManager)
705 {
706 memoryManager->Acquire();
707 }
David Beck29c75de2018-10-23 13:35:58 +0100708 }
Narumol Prangnawarat11bd2612019-08-13 10:26:53 +0100709 m_TensorHandleFactoryRegistry.AquireMemory();
Derek Lamberti03614f62018-10-02 15:52:46 +0100710 m_IsWorkingMemAllocated = true;
711}
712
713void LoadedNetwork::FreeWorkingMemory()
714{
Matthew Bentham2a326b52019-03-19 10:11:01 +0000715 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
Derek Lamberti03614f62018-10-02 15:52:46 +0100716 if (!m_IsWorkingMemAllocated)
717 {
718 return;
719 }
720 // Informs the memory managers to release memory in it's respective memory group
David Beck29c75de2018-10-23 13:35:58 +0100721 for (auto&& workloadFactory : m_WorkloadFactories)
722 {
Aron Virginas-Tar5caf9072018-11-14 18:35:18 +0000723 IBackendInternal::IMemoryManagerSharedPtr memoryManager = workloadFactory.second.second;
724 if (memoryManager)
725 {
726 memoryManager->Release();
727 }
David Beck29c75de2018-10-23 13:35:58 +0100728 }
Narumol Prangnawarat11bd2612019-08-13 10:26:53 +0100729 m_TensorHandleFactoryRegistry.ReleaseMemory();
Derek Lamberti03614f62018-10-02 15:52:46 +0100730 m_IsWorkingMemAllocated = false;
telsoa014fcda012018-03-09 14:13:49 +0000731}
732
David Monahan6198fe02019-12-02 08:35:43 +0000733bool LoadedNetwork::Execute(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
734 profiling::ProfilingGuid inferenceGuid)
telsoa014fcda012018-03-09 14:13:49 +0000735{
736 bool success = true;
737
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +0100738 auto Fail = [&](const std::exception& error)
739 {
Derek Lamberti08446972019-11-26 16:38:31 +0000740 ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +0100741 success = false;
742 };
743
telsoa014fcda012018-03-09 14:13:49 +0000744 try
745 {
Matthew Bentham2a326b52019-03-19 10:11:01 +0000746 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
Derek Lambertia08d29b2020-06-19 14:33:05 +0100747 AllocateWorkingMemory(lockGuard);
Derek Lamberti03614f62018-10-02 15:52:46 +0100748
David Monahan6198fe02019-12-02 08:35:43 +0000749 ProfilingDynamicGuid workloadInferenceID(0);
Derek Lambertia08d29b2020-06-19 14:33:05 +0100750 auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](WorkloadQueue& queue)
telsoa014fcda012018-03-09 14:13:49 +0000751 {
Derek Lambertia08d29b2020-06-19 14:33:05 +0100752 for (auto& workload : queue)
David Monahan6198fe02019-12-02 08:35:43 +0000753 {
Derek Lambertia08d29b2020-06-19 14:33:05 +0100754 if(timelineUtils)
755 {
756 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
757 inferenceGuid);
758 }
759 workload->Execute();
760 if(timelineUtils)
761 {
762 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
763 }
David Monahan6198fe02019-12-02 08:35:43 +0000764 }
Derek Lambertia08d29b2020-06-19 14:33:05 +0100765 };
Derek Lamberti03614f62018-10-02 15:52:46 +0100766
Derek Lambertia08d29b2020-06-19 14:33:05 +0100767 ExecuteQueue(m_InputQueue);
768 ExecuteQueue(m_WorkloadQueue);
769 ExecuteQueue(m_OutputQueue);
telsoa014fcda012018-03-09 14:13:49 +0000770 }
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +0100771 catch (const RuntimeException& error)
telsoa014fcda012018-03-09 14:13:49 +0000772 {
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +0100773 Fail(error);
telsoa014fcda012018-03-09 14:13:49 +0000774 }
telsoa014fcda012018-03-09 14:13:49 +0000775 catch (const std::runtime_error& error)
776 {
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +0100777 Fail(error);
telsoa014fcda012018-03-09 14:13:49 +0000778 }
779
telsoa014fcda012018-03-09 14:13:49 +0000780 return success;
781}
782
Nattapat Chaimanowong6e948202019-03-22 14:01:46 +0000783void LoadedNetwork::RegisterDebugCallback(const DebugCallbackFunction& func)
784{
785 for (auto&& workloadPtr: m_WorkloadQueue)
786 {
787 workloadPtr.get()->RegisterDebugCallback(func);
788 }
789}
790
telsoa014fcda012018-03-09 14:13:49 +0000791}