blob: 6d191a594b541c678503caf021185ed18eabd4e9 [file] [log] [blame]
arovir014424b0a2018-10-04 10:46:04 +01001//
Mike Kelly3ec30772023-03-08 13:47:17 +00002// Copyright © 2017-2023 Arm Ltd. All rights reserved.
arovir014424b0a2018-10-04 10:46:04 +01003// SPDX-License-Identifier: MIT
4//
5
6#include "ClBackend.hpp"
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +01007#include "ClBackendContext.hpp"
David Monahan6642b8a2021-11-04 16:31:46 +00008#include "ClBackendDefaultAllocator.hpp"
David Beck3e9e1152018-10-17 14:17:50 +01009#include "ClBackendId.hpp"
Sadik Armagan045f6be2020-09-10 13:37:32 +010010#include "ClBackendModelContext.hpp"
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +010011#include "ClImportTensorHandleFactory.hpp"
David Beck111b5d92018-11-12 14:59:37 +000012#include "ClLayerSupport.hpp"
Jan Eilerse9f0f0f2019-08-16 10:28:37 +010013#include "ClTensorHandleFactory.hpp"
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +010014#include "ClWorkloadFactory.hpp"
arovir01a0944792018-10-11 15:00:58 +010015
Matteo Martincighc601aa62019-10-29 15:03:22 +000016#include <armnn/BackendRegistry.hpp>
Mike Kelly07810fc2020-11-12 10:58:48 +000017#include <armnn/Descriptors.hpp>
Matteo Martincighc601aa62019-10-29 15:03:22 +000018
Mike Kelly07810fc2020-11-12 10:58:48 +000019#include <aclCommon/ArmComputeSubgraphUtils.hpp>
20#include <aclCommon/ArmComputeUtils.hpp>
Aron Virginas-Tar56055192018-11-12 18:10:43 +000021#include <aclCommon/BaseMemoryManager.hpp>
22
Matteo Martincighe5b8eb92019-11-28 15:45:42 +000023#include <armnn/backends/IBackendContext.hpp>
24#include <armnn/backends/IMemoryManager.hpp>
Jan Eilers3c9e0452020-04-10 13:00:44 +010025#include <armnn/utility/PolymorphicDowncast.hpp>
26
Mike Kelly07810fc2020-11-12 10:58:48 +000027#include "workloads/ClAdditionWorkload.hpp"
28#include "workloads/ClBatchNormalizationFloatWorkload.hpp"
29#include "workloads/ClConvolution2dWorkload.hpp"
30#include "workloads/ClDepthwiseConvolutionWorkload.hpp"
Teresa Charline11e63d2021-04-21 12:56:45 +010031#include "workloads/ClDivisionWorkload.hpp"
Mike Kelly07810fc2020-11-12 10:58:48 +000032#include "workloads/ClFullyConnectedWorkload.hpp"
33#include "workloads/ClMultiplicationWorkload.hpp"
Matthew Sloyan5fc0fd62021-05-03 12:22:03 +010034#include "workloads/ClReduceWorkload.hpp"
Mike Kelly07810fc2020-11-12 10:58:48 +000035#include "workloads/ClSubtractionWorkload.hpp"
36
David Beck263e3492018-11-09 14:46:40 +000037#include <Optimizer.hpp>
arovir014424b0a2018-10-04 10:46:04 +010038
Mike Kelly07810fc2020-11-12 10:58:48 +000039#include <arm_compute/core/Types.h>
Aron Virginas-Tar56055192018-11-12 18:10:43 +000040#include <arm_compute/runtime/CL/CLBufferAllocator.h>
41
arovir014424b0a2018-10-04 10:46:04 +010042namespace armnn
43{
44
David Beck3cc9a622018-10-12 10:38:31 +010045const BackendId& ClBackend::GetIdStatic()
arovir014424b0a2018-10-04 10:46:04 +010046{
David Beck3e9e1152018-10-17 14:17:50 +010047 static const BackendId s_Id{ClBackendId()};
arovir014424b0a2018-10-04 10:46:04 +010048 return s_Id;
49}
50
Aron Virginas-Tar56055192018-11-12 18:10:43 +000051IBackendInternal::IMemoryManagerUniquePtr ClBackend::CreateMemoryManager() const
arovir014424b0a2018-10-04 10:46:04 +010052{
Jan Eilersc1c872f2021-07-22 13:17:04 +010053 if (m_UsingCustomAllocator)
54 {
55 return std::make_unique<ClMemoryManager>(m_CustomAllocator);
56 }
Aron Virginas-Tar56055192018-11-12 18:10:43 +000057 return std::make_unique<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
58}
59
60IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
61 const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const
62{
63 return std::make_unique<ClWorkloadFactory>(
Jan Eilers3c9e0452020-04-10 13:00:44 +010064 PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
arovir014424b0a2018-10-04 10:46:04 +010065}
66
Jan Eilerse9f0f0f2019-08-16 10:28:37 +010067IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
Sadik Armagan04a72972020-09-14 15:44:18 +010068 const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const
69{
70 return std::make_unique<ClWorkloadFactory>(
71 PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
72}
73
74IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
Jan Eilerse9f0f0f2019-08-16 10:28:37 +010075 TensorHandleFactoryRegistry& registry) const
76{
Jan Eilersc1c872f2021-07-22 13:17:04 +010077 std::shared_ptr<ClMemoryManager> memoryManager;
78 if (m_UsingCustomAllocator)
79 {
80 memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
81 }
82 else
83 {
84 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
85 }
Jan Eilerse9f0f0f2019-08-16 10:28:37 +010086
Narumol Prangnawaratd12b4072022-01-17 18:03:14 +000087 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
88 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
89 static_cast<MemorySourceFlags>(MemorySource::Malloc), static_cast<MemorySourceFlags>(MemorySource::Malloc));
90
91 registry.RegisterCopyAndImportFactoryPair(factory->GetId(), importFactory->GetId());
92 registry.RegisterCopyAndImportFactoryPair(importFactory->GetId(), factory->GetId());
93
Jan Eilerse9f0f0f2019-08-16 10:28:37 +010094 registry.RegisterMemoryManager(memoryManager);
Narumol Prangnawaratd12b4072022-01-17 18:03:14 +000095 registry.RegisterFactory(std::move(factory));
96 registry.RegisterFactory(std::move(importFactory));
Jan Eilerse9f0f0f2019-08-16 10:28:37 +010097
98 return std::make_unique<ClWorkloadFactory>(
Jan Eilers3c9e0452020-04-10 13:00:44 +010099 PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
Jan Eilerse9f0f0f2019-08-16 10:28:37 +0100100}
101
Sadik Armagan04a72972020-09-14 15:44:18 +0100102IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
103 TensorHandleFactoryRegistry& registry, const ModelOptions& modelOptions) const
104{
Jan Eilersc1c872f2021-07-22 13:17:04 +0100105 std::shared_ptr<ClMemoryManager> memoryManager;
106 if (m_UsingCustomAllocator)
107 {
108 memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
109 }
110 else
111 {
112 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
113 }
Sadik Armagan04a72972020-09-14 15:44:18 +0100114
Narumol Prangnawaratd12b4072022-01-17 18:03:14 +0000115 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
116 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
117 static_cast<MemorySourceFlags>(MemorySource::Malloc), static_cast<MemorySourceFlags>(MemorySource::Malloc));
118
119 registry.RegisterCopyAndImportFactoryPair(factory->GetId(), importFactory->GetId());
120 registry.RegisterCopyAndImportFactoryPair(importFactory->GetId(), factory->GetId());
121
Sadik Armagan04a72972020-09-14 15:44:18 +0100122 registry.RegisterMemoryManager(memoryManager);
Narumol Prangnawaratd12b4072022-01-17 18:03:14 +0000123 registry.RegisterFactory(std::move(factory));
124 registry.RegisterFactory(std::move(importFactory));
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100125
126 return std::make_unique<ClWorkloadFactory>(
127 PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
128}
129
130IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
131 TensorHandleFactoryRegistry& registry,
132 const ModelOptions& modelOptions,
133 MemorySourceFlags inputFlags,
134 MemorySourceFlags outputFlags) const
135{
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000136 // To allow force import if inputFlags/outputFlags are Undefined, set it as Malloc
137 if (inputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined))
138 {
139 inputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc);
140 }
141 if (outputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined))
142 {
143 outputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc);
144 }
Jan Eilersc1c872f2021-07-22 13:17:04 +0100145 std::shared_ptr<ClMemoryManager> memoryManager;
146 if (m_UsingCustomAllocator)
147 {
148 memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
149 }
150 else
151 {
152 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
153 }
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100154
Narumol Prangnawaratd12b4072022-01-17 18:03:14 +0000155 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
156 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
157 inputFlags, outputFlags);
158
159 registry.RegisterCopyAndImportFactoryPair(factory->GetId(), importFactory->GetId());
160 registry.RegisterCopyAndImportFactoryPair(importFactory->GetId(), factory->GetId());
161
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100162 registry.RegisterMemoryManager(memoryManager);
Narumol Prangnawaratd12b4072022-01-17 18:03:14 +0000163 registry.RegisterFactory(std::move(factory));
164 registry.RegisterFactory(std::move(importFactory));
Sadik Armagan04a72972020-09-14 15:44:18 +0100165
166 return std::make_unique<ClWorkloadFactory>(
167 PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
168}
169
Jan Eilerse9f0f0f2019-08-16 10:28:37 +0100170std::vector<ITensorHandleFactory::FactoryId> ClBackend::GetHandleFactoryPreferences() const
171{
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100172 return std::vector<ITensorHandleFactory::FactoryId> {ClTensorHandleFactory::GetIdStatic(),
173 ClImportTensorHandleFactory::GetIdStatic()};
Jan Eilerse9f0f0f2019-08-16 10:28:37 +0100174}
175
176void ClBackend::RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry)
177{
Jan Eilersc1c872f2021-07-22 13:17:04 +0100178 std::shared_ptr<ClMemoryManager> memoryManager;
179 if (m_UsingCustomAllocator)
180 {
181 memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
182 }
183 else
184 {
185 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
186 }
Jan Eilerse9f0f0f2019-08-16 10:28:37 +0100187
Narumol Prangnawaratd12b4072022-01-17 18:03:14 +0000188 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
189 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
190 static_cast<MemorySourceFlags>(MemorySource::Malloc), static_cast<MemorySourceFlags>(MemorySource::Malloc));
191
192 registry.RegisterCopyAndImportFactoryPair(factory->GetId(), importFactory->GetId());
193 registry.RegisterCopyAndImportFactoryPair(importFactory->GetId(), factory->GetId());
194
Jan Eilersc1c872f2021-07-22 13:17:04 +0100195 registry.RegisterMemoryManager(memoryManager);
Narumol Prangnawaratd12b4072022-01-17 18:03:14 +0000196 registry.RegisterFactory(std::move(factory));
197 registry.RegisterFactory(std::move(importFactory));
198
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100199}
200
201void ClBackend::RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry,
202 MemorySourceFlags inputFlags,
203 MemorySourceFlags outputFlags)
204{
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000205 // To allow force import if inputFlags/outputFlags are Undefined, set it as Malloc
206 if (inputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined))
207 {
208 inputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc);
209 }
210 if (outputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined))
211 {
212 outputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc);
213 }
Jan Eilersc1c872f2021-07-22 13:17:04 +0100214 std::shared_ptr<ClMemoryManager> memoryManager;
215 if (m_UsingCustomAllocator)
216 {
217 memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
218 }
219 else
220 {
221 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
222 }
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100223
Narumol Prangnawaratd12b4072022-01-17 18:03:14 +0000224 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
225 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
226 inputFlags, outputFlags);
227
228 registry.RegisterCopyAndImportFactoryPair(factory->GetId(), importFactory->GetId());
229 registry.RegisterCopyAndImportFactoryPair(importFactory->GetId(), factory->GetId());
230
Jan Eilersc1c872f2021-07-22 13:17:04 +0100231 registry.RegisterMemoryManager(memoryManager);
Narumol Prangnawaratd12b4072022-01-17 18:03:14 +0000232 registry.RegisterFactory(std::move(factory));
233 registry.RegisterFactory(std::move(importFactory));
Jan Eilerse9f0f0f2019-08-16 10:28:37 +0100234}
235
Sadik Armagan045f6be2020-09-10 13:37:32 +0100236IBackendInternal::IBackendContextPtr ClBackend::CreateBackendContext(const IRuntime::CreationOptions& options) const
David Beck1b61be52018-11-08 09:19:14 +0000237{
238 return IBackendContextPtr{new ClBackendContext{options}};
239}
240
Colm Donelane49755b2020-01-29 15:22:43 +0000241IBackendInternal::IBackendProfilingContextPtr ClBackend::CreateBackendProfilingContext(
Colm Donelan1aff3932020-02-05 17:48:59 +0000242 const IRuntime::CreationOptions&, IBackendProfilingPtr&)
Colm Donelane49755b2020-01-29 15:22:43 +0000243{
244 return IBackendProfilingContextPtr{};
245}
246
Sadik Armagan045f6be2020-09-10 13:37:32 +0100247IBackendInternal::IBackendSpecificModelContextPtr ClBackend::CreateBackendSpecificModelContext(
248 const ModelOptions& modelOptions) const
249{
250 return IBackendSpecificModelContextPtr{new ClBackendModelContext{modelOptions}};
251}
252
David Beck111b5d92018-11-12 14:59:37 +0000253IBackendInternal::ILayerSupportSharedPtr ClBackend::GetLayerSupport() const
254{
Sadik Armagan045f6be2020-09-10 13:37:32 +0100255 static ILayerSupportSharedPtr layerSupport
256 {
257 new ClLayerSupport(IBackendInternal::IBackendSpecificModelContextPtr{})
258 };
259 return layerSupport;
260}
261
262IBackendInternal::ILayerSupportSharedPtr ClBackend::GetLayerSupport(const ModelOptions& modelOptions) const
263{
264 static ILayerSupportSharedPtr layerSupport
265 {
266 new ClLayerSupport(CreateBackendSpecificModelContext(modelOptions))
267 };
David Beck111b5d92018-11-12 14:59:37 +0000268 return layerSupport;
269}
270
David Monahan6642b8a2021-11-04 16:31:46 +0000271std::unique_ptr<ICustomAllocator> ClBackend::GetDefaultAllocator() const
272{
273 return std::make_unique<ClBackendDefaultAllocator>();
274}
275
Ryan OSheab4c49342023-07-25 14:28:27 +0100276BackendCapabilities ClBackend::GetCapabilities() const
277{
278 // add new capabilities here..
279 return BackendCapabilities ("GpuAcc",
280 {
281 {"NonConstWeights", true},
282 {"AsyncExecution", false},
283 {"ProtectedContentAllocation", true},
284 {"ConstantTensorsAsInputs", true},
285 {"PreImportIOTensors", false},
286 {"ExternallyManagedMemory", true},
287 {"MultiAxisPacking", false},
288 {"SingleAxisPacking", true},
289 {"HasFp16", arm_compute::CLKernelLibrary::get().fp16_supported()}
290 });
291}
292
Mike Kelly07810fc2020-11-12 10:58:48 +0000293OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph,
294 const ModelOptions& modelOptions) const
Matteo Martincighadddddb2019-01-24 14:06:23 +0000295{
Mike Kelly80512b02022-05-16 23:10:42 +0100296 OptimizationViews optimizationViews(modelOptions);
Matteo Martincighadddddb2019-01-24 14:06:23 +0000297
Francis Murtagh0f3e9a02023-07-28 14:29:46 +0100298 auto it = subgraph.end();
Mike Kelly07810fc2020-11-12 10:58:48 +0000299 bool isFastMathEnabled = false;
Mike Kelly1ac690a2020-11-17 11:41:38 +0000300 std::map<LayerGuid, Layer*> untouched;
Mike Kelly07810fc2020-11-12 10:58:48 +0000301
Francis Murtagh0f3e9a02023-07-28 14:29:46 +0100302 while (it != subgraph.begin())
Mike Kelly1ac690a2020-11-17 11:41:38 +0000303 {
304 --it;
Francis Murtagh56ccf682021-12-13 18:48:12 +0000305 Layer& base = *(PolymorphicDowncast<Layer*>(*it));
Mike Kelly1ac690a2020-11-17 11:41:38 +0000306 untouched.insert({base.GetGuid(), &base});
307 }
308
Francis Murtagh0f3e9a02023-07-28 14:29:46 +0100309 it = subgraph.end();
Mike Kelly07810fc2020-11-12 10:58:48 +0000310#if defined(ARMCOMPUTECL_ENABLED)
311 IBackendInternal::IBackendSpecificModelContextPtr modelContextPtr = CreateBackendSpecificModelContext(modelOptions);
312
313 if (modelContextPtr)
314 {
315 auto clModelOptions = dynamic_cast<ClBackendModelContext*>(modelContextPtr.get());
316 if (clModelOptions)
317 {
318 isFastMathEnabled = clModelOptions->IsFastMathEnabled();
319 }
320 }
321#endif
Francis Murtagh0f3e9a02023-07-28 14:29:46 +0100322 while (it != subgraph.begin())
Mike Kelly07810fc2020-11-12 10:58:48 +0000323 {
324 --it;
Francis Murtagh56ccf682021-12-13 18:48:12 +0000325 Layer& base = *(PolymorphicDowncast<Layer*>(*it));
Mike Kelly07810fc2020-11-12 10:58:48 +0000326
Matthew Sloyan5fc0fd62021-05-03 12:22:03 +0100327 // Fuse activation into previous layer if supported by backend
Mike Kelly07810fc2020-11-12 10:58:48 +0000328 if ((base.GetType() == LayerType::DepthwiseConvolution2d || base.GetType() == LayerType::Convolution2d
329 || base.GetType() == LayerType::BatchNormalization || base.GetType() == LayerType::FullyConnected
330 || base.GetType() == LayerType::Addition || base.GetType() == LayerType::Multiplication
Mike Kelly3ec30772023-03-08 13:47:17 +0000331 || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division
332 || base.GetType() == LayerType::ElementwiseBinary)
Mike Kelly07810fc2020-11-12 10:58:48 +0000333 && (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr))
334 {
335 for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output)
336 {
337 if (output->GetNumConnections() == 1)
338 {
339 for (auto&& childInput : output->GetConnections())
340 {
Teresa Charlind672f5d2021-01-18 18:07:57 +0000341 if ((childInput->GetOwningLayer().GetType() == LayerType::Activation) &&
342 (checkDataTypeInputandOutput(childInput->GetOwningLayer())))
Mike Kelly07810fc2020-11-12 10:58:48 +0000343 {
344 Layer& child = childInput->GetOwningLayer();
345
346 auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
347
348 const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") +
349 base.GetName();
350
351 // Get params from activation layer
352 ActivationDescriptor activationDesc = activationLayer->GetParameters();
353
354 if (base.GetType() == LayerType::Convolution2d)
355 {
356 Convolution2dLayer* baseLayer = PolymorphicDowncast<Convolution2dLayer*>(&base);
357
358 Optional<TensorInfo> biases;
359
360 if (baseLayer->GetParameters().m_BiasEnabled)
361 {
Keith Davisb4dd5cc2022-04-07 11:32:00 +0100362 biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
Mike Kelly07810fc2020-11-12 10:58:48 +0000363 }
364
365 arm_compute::Status status = ClConvolution2dWorkloadValidate(
366 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
367 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
368 baseLayer->GetParameters(),
Keith Davisb4dd5cc2022-04-07 11:32:00 +0100369 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
Mike Kelly07810fc2020-11-12 10:58:48 +0000370 biases,
371 isFastMathEnabled,
372 &activationDesc);
373
374 if (status)
375 {
Cathal Corbettcbfd7182021-12-15 17:12:59 +0000376 FuseConvolution2dLayer<Convolution2dLayer>(optimizationViews,
377 baseLayer,
378 activationLayer,
379 activationDesc,
380 name);
Mike Kelly1ac690a2020-11-17 11:41:38 +0000381 untouched.erase(baseLayer->GetGuid());
382 untouched.erase(activationLayer->GetGuid());
Mike Kelly07810fc2020-11-12 10:58:48 +0000383 }
384 }
385 else if (base.GetType() == LayerType::DepthwiseConvolution2d)
386 {
387 DepthwiseConvolution2dLayer* baseLayer =
388 PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
389
390 Optional<TensorInfo> biases;
391
392 if (baseLayer->GetParameters().m_BiasEnabled)
393 {
Cathal Corbett06902652022-04-14 17:55:11 +0100394 biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
Mike Kelly07810fc2020-11-12 10:58:48 +0000395 }
396
397 arm_compute::Status status = ClDepthwiseConvolutionWorkloadValidate(
398 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
399 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
400 baseLayer->GetParameters(),
Cathal Corbett06902652022-04-14 17:55:11 +0100401 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
Mike Kelly07810fc2020-11-12 10:58:48 +0000402 biases,
403 &activationDesc);
404
405 if (status)
406 {
Cathal Corbettcbfd7182021-12-15 17:12:59 +0000407 FuseDepthwiseConvolution2dLayer<DepthwiseConvolution2dLayer>(optimizationViews,
408 baseLayer,
409 activationLayer,
410 activationDesc,
411 name);
Mike Kelly1ac690a2020-11-17 11:41:38 +0000412 untouched.erase(baseLayer->GetGuid());
413 untouched.erase(activationLayer->GetGuid());
Mike Kelly07810fc2020-11-12 10:58:48 +0000414 }
415 }
416 else if (base.GetType() == LayerType::FullyConnected)
417 {
418 FullyConnectedLayer* baseLayer = PolymorphicDowncast<FullyConnectedLayer*>(&base);
Cathal Corbett4452baf2022-05-13 09:55:59 +0100419 FullyConnectedDescriptor descriptor = baseLayer->GetParameters();
Mike Kelly07810fc2020-11-12 10:58:48 +0000420
Cathal Corbett4452baf2022-05-13 09:55:59 +0100421 // As bias is optional only try to get TensorInfo from input if bias is enabled.
Matthew Bentham67d63902022-02-08 15:03:07 +0000422 Optional<TensorInfo> biases;
Cathal Corbett4452baf2022-05-13 09:55:59 +0100423 if (descriptor.m_BiasEnabled)
Matthew Bentham67d63902022-02-08 15:03:07 +0000424 {
Cathal Corbett4452baf2022-05-13 09:55:59 +0100425 biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
Matthew Bentham67d63902022-02-08 15:03:07 +0000426 }
427
Mike Kelly07810fc2020-11-12 10:58:48 +0000428 arm_compute::Status status = ClFullyConnectedWorkloadValidate(
429 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
430 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
Cathal Corbett4452baf2022-05-13 09:55:59 +0100431 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
Matthew Bentham67d63902022-02-08 15:03:07 +0000432 biases,
Mike Kelly07810fc2020-11-12 10:58:48 +0000433 baseLayer->GetParameters(),
434 &activationDesc);
435
436 if (status)
437 {
Cathal Corbettcbfd7182021-12-15 17:12:59 +0000438 FuseFullyConnectedLayer<FullyConnectedLayer>(optimizationViews,
439 baseLayer,
440 activationLayer,
441 activationDesc,
442 name);
Mike Kelly1ac690a2020-11-17 11:41:38 +0000443 untouched.erase(baseLayer->GetGuid());
444 untouched.erase(activationLayer->GetGuid());
Mike Kelly07810fc2020-11-12 10:58:48 +0000445 }
446 }
447 else if (base.GetType() == LayerType::BatchNormalization)
448 {
449 BatchNormalizationLayer* baseLayer =
450 PolymorphicDowncast<BatchNormalizationLayer*>(&base);
451
452 arm_compute::Status status = ClBatchNormalizationValidate(
453 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
454 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
455 baseLayer->m_Mean->GetTensorInfo(),
456 baseLayer->m_Variance->GetTensorInfo(),
457 baseLayer->m_Beta->GetTensorInfo(),
458 baseLayer->m_Gamma->GetTensorInfo(),
459 baseLayer->GetParameters(),
460 &activationDesc);
461
462 if (status)
463 {
464 BatchNormalizationLayer* replacementLayer =
Cathal Corbettcbfd7182021-12-15 17:12:59 +0000465 FuseBatchNormalizationLayer<BatchNormalizationLayer>(optimizationViews,
Mike Kelly07810fc2020-11-12 10:58:48 +0000466 baseLayer,
467 activationLayer,
468 activationDesc,
469 name);
470
471 replacementLayer->m_Beta = std::move(baseLayer->m_Beta);
472 replacementLayer->m_Gamma = std::move(baseLayer->m_Gamma);
473 replacementLayer->m_Mean = std::move(baseLayer->m_Mean);
474 replacementLayer->m_Variance = std::move(baseLayer->m_Variance);
Mike Kelly4cc341c2023-07-07 15:43:06 +0100475
Mike Kelly1ac690a2020-11-17 11:41:38 +0000476 untouched.erase(baseLayer->GetGuid());
477 untouched.erase(activationLayer->GetGuid());
Mike Kelly07810fc2020-11-12 10:58:48 +0000478 }
479 }
480 else if (base.GetType() == LayerType::Addition)
481 {
482 AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
483
484 arm_compute::Status status = ClAdditionValidate(
485 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
486 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
487 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
488 &activationDesc);
489
490 if (status)
491 {
Cathal Corbettcbfd7182021-12-15 17:12:59 +0000492 FuseAdditionLayer<AdditionLayer>(optimizationViews,
493 baseLayer,
494 activationLayer,
495 activationDesc,
496 name);
Mike Kelly4cc341c2023-07-07 15:43:06 +0100497
Mike Kelly1ac690a2020-11-17 11:41:38 +0000498 untouched.erase(baseLayer->GetGuid());
499 untouched.erase(activationLayer->GetGuid());
Mike Kelly07810fc2020-11-12 10:58:48 +0000500 }
501 }
502 else if (base.GetType() == LayerType::Division)
503 {
504 DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
505
506 arm_compute::Status status = ClDivisionWorkloadValidate(
507 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
508 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
509 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
510 &activationDesc);
511
512 if (status)
513 {
Cathal Corbettcbfd7182021-12-15 17:12:59 +0000514 FuseDivisionLayer<DivisionLayer>(optimizationViews,
515 baseLayer,
516 activationLayer,
517 activationDesc,
518 name);
Mike Kelly1ac690a2020-11-17 11:41:38 +0000519 untouched.erase(baseLayer->GetGuid());
520 untouched.erase(activationLayer->GetGuid());
Mike Kelly07810fc2020-11-12 10:58:48 +0000521 }
522 }
523 else if (base.GetType() == LayerType::Multiplication)
524 {
525 MultiplicationLayer* baseLayer = PolymorphicDowncast<MultiplicationLayer*>(&base);
526
527 arm_compute::Status status = ClMultiplicationWorkloadValidate(
528 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
529 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
530 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
531 &activationDesc);
532
533 if (status)
534 {
Cathal Corbettcbfd7182021-12-15 17:12:59 +0000535 FuseMultiplicationLayer<MultiplicationLayer>(optimizationViews,
536 baseLayer,
537 activationLayer,
538 activationDesc,
539 name);
Mike Kelly1ac690a2020-11-17 11:41:38 +0000540 untouched.erase(baseLayer->GetGuid());
541 untouched.erase(activationLayer->GetGuid());
Mike Kelly07810fc2020-11-12 10:58:48 +0000542 }
543 }
544 else if (base.GetType() == LayerType::Subtraction)
545 {
546 SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
547
548 arm_compute::Status status = ClSubtractionValidate(
549 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
550 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
551 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
552 &activationDesc);
553
554 if (status)
555 {
Cathal Corbettcbfd7182021-12-15 17:12:59 +0000556 FuseSubtractionLayer<SubtractionLayer>(optimizationViews,
557 baseLayer,
558 activationLayer,
559 activationDesc,
560 name);
Mike Kelly1ac690a2020-11-17 11:41:38 +0000561 untouched.erase(baseLayer->GetGuid());
562 untouched.erase(activationLayer->GetGuid());
Mike Kelly07810fc2020-11-12 10:58:48 +0000563 }
564 }
Mike Kelly3ec30772023-03-08 13:47:17 +0000565 else if (base.GetType() == LayerType::ElementwiseBinary)
566 {
567 ElementwiseBinaryLayer* baseLayer = PolymorphicDowncast<ElementwiseBinaryLayer*>(&base);
568
569 if (baseLayer->GetParameters().m_Operation == BinaryOperation::Add)
570 {
571 arm_compute::Status status = ClAdditionValidate(
572 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
573 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
574 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
575 &activationDesc);
576
577 if (status)
578 {
579 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
580 baseLayer,
581 activationLayer,
582 activationDesc,
583 BinaryOperation::Add,
584 name);
585 untouched.erase(baseLayer->GetGuid());
586 untouched.erase(activationLayer->GetGuid());
587 }
588 }
589 else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Div)
590 {
591 arm_compute::Status status = ClDivisionWorkloadValidate(
592 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
593 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
594 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
595 &activationDesc);
596
597 if (status)
598 {
599 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
600 baseLayer,
601 activationLayer,
602 activationDesc,
603 BinaryOperation::Div,
604 name);
605 untouched.erase(baseLayer->GetGuid());
606 untouched.erase(activationLayer->GetGuid());
607 }
608 }
609 else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Mul)
610 {
611 arm_compute::Status status = ClMultiplicationWorkloadValidate(
612 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
613 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
614 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
615 &activationDesc);
616
617 if (status)
618 {
619 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
620 baseLayer,
621 activationLayer,
622 activationDesc,
623 BinaryOperation::Mul,
624 name);
625 untouched.erase(baseLayer->GetGuid());
626 untouched.erase(activationLayer->GetGuid());
627 }
628 }
629 else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Sub)
630 {
631 arm_compute::Status status = ClSubtractionValidate(
632 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
633 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
634 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
635 &activationDesc);
636
637 if (status)
638 {
639 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
640 baseLayer,
641 activationLayer,
642 activationDesc,
643 BinaryOperation::Sub,
644 name);
Mike Kelly4cc341c2023-07-07 15:43:06 +0100645 untouched.erase(baseLayer->GetGuid());
646 untouched.erase(activationLayer->GetGuid());
Mike Kelly3ec30772023-03-08 13:47:17 +0000647 }
648 }
649 // No fusion available for other BinaryOperations
650 }
Mike Kelly07810fc2020-11-12 10:58:48 +0000651 }
652 }
653 }
654 }
655 }
Matthew Sloyan5fc0fd62021-05-03 12:22:03 +0100656
657 // Separate reduce layer with multiple axes into multiple reduce layers with 1 axis.
658 if (base.GetType() == LayerType::Reduce)
659 {
660 ReduceLayer* baseLayer = PolymorphicDowncast<ReduceLayer*>(&base);
661 ReduceDescriptor reduceDescriptor = baseLayer->GetParameters();
662
663 if (!reduceDescriptor.m_vAxis.empty() && reduceDescriptor.m_vAxis.size() > 1)
664 {
665 // Add new layers to the graph and connect them.
Francis Murtagh56ccf682021-12-13 18:48:12 +0000666 std::vector<IConnectableLayer*> layers = ChainReduceLayers<ReduceLayer>(optimizationViews,
667 baseLayer,
668 reduceDescriptor);
Matthew Sloyan5fc0fd62021-05-03 12:22:03 +0100669
670 // Replace existing baselayer with new subgraph.
671 ReplaceLayers<ReduceLayer>(optimizationViews, baseLayer, layers);
672 untouched.erase(baseLayer->GetGuid());
673 }
674 }
Cathal Corbett3883b272022-07-22 16:03:36 +0100675
Mike Kellya7bd3fa2023-12-20 12:48:02 +0000676 // Remove Reshape where possible
677 if (base.GetType() == LayerType::Reshape)
678 {
679 ReshapeLayer* baseLayer = PolymorphicDowncast<ReshapeLayer*>(&base);
680
681 // Cannot remove a Reshape if it's connected to any layer that has an NCHW layout
682 if (ConnectedToLayerWithNCHW(baseLayer))
683 {
684 continue;
685 }
686 RemoveReshapeLayer(baseLayer, untouched, optimizationViews);
687 }
688
Cathal Corbett3883b272022-07-22 16:03:36 +0100689 // Special case to fuse padding into average pooling 2d for quantized datatype.
690 // Required to be done as a backend specific optimization as Neon does not support this special case.
691 if (base.GetType() == LayerType::Pooling2d)
692 {
693 Pooling2dLayer* baseLayer = PolymorphicDowncast<Pooling2dLayer*>(&base);
694 Pooling2dDescriptor poolingDescriptor = baseLayer->GetParameters();
695
696 if (baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer().GetType() == LayerType::Pad)
697 {
698 PadLayer* padLayer = PolymorphicDowncast<PadLayer*>(
699 &baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer());
700 if (padLayer->GetOutputSlot(0).GetNumConnections() == 1 &&
701 optimizations::pad_fold::TryFoldPadIntoLayer2d(padLayer->GetParameters(),
702 poolingDescriptor,
703 padLayer->GetOutputSlot().GetTensorInfo(),
704 true))
705 {
706 FoldPadIntoAveragePool2d<Pooling2dLayer>(optimizationViews, baseLayer,
707 poolingDescriptor, padLayer);
708 untouched.erase(baseLayer->GetGuid());
709 untouched.erase(padLayer->GetGuid());
710 }
711 }
712 }
Mike Kelly07810fc2020-11-12 10:58:48 +0000713 }
Mike Kelly1ac690a2020-11-17 11:41:38 +0000714
Mike Kelly4cc341c2023-07-07 15:43:06 +0100715 if (optimizationViews.GetSubstitutions().empty() && optimizationViews.GetDeletedSubgraphs().empty())
Mike Kelly07810fc2020-11-12 10:58:48 +0000716 {
717 optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
718 }
Mike Kelly1ac690a2020-11-17 11:41:38 +0000719 else
720 {
721 ReportUntouchedLayers(optimizationViews, untouched);
722 }
Matteo Martincighc3ba50e2019-05-22 14:28:16 +0100723
724 return optimizationViews;
Matteo Martincighadddddb2019-01-24 14:06:23 +0000725}
726
David Beck9efb57d2018-11-05 13:40:33 +0000727} // namespace armnn