blob: ebce1a69dea116d5d7531c458b2c1f01da4cfe80 [file] [log] [blame]
arovir014424b0a2018-10-04 10:46:04 +01001//
Mike Kelly3ec30772023-03-08 13:47:17 +00002// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
arovir014424b0a2018-10-04 10:46:04 +01003// SPDX-License-Identifier: MIT
4//
5
6#include "NeonBackend.hpp"
David Beck3e9e1152018-10-17 14:17:50 +01007#include "NeonBackendId.hpp"
Sadik Armagan045f6be2020-09-10 13:37:32 +01008#include "NeonBackendModelContext.hpp"
arovir01a0944792018-10-11 15:00:58 +01009#include "NeonWorkloadFactory.hpp"
David Beck111b5d92018-11-12 14:59:37 +000010#include "NeonLayerSupport.hpp"
Narumol Prangnawarat4e3e8182019-08-14 12:25:50 +010011#include "NeonTensorHandleFactory.hpp"
Tracy Narine6440ce82023-09-20 14:19:07 +010012#include "NeonBackendOptimizationUtils.hpp"
arovir01a0944792018-10-11 15:00:58 +010013
Matteo Martincighc601aa62019-10-29 15:03:22 +000014#include <armnn/BackendRegistry.hpp>
Mike Kelly07810fc2020-11-12 10:58:48 +000015#include <armnn/Descriptors.hpp>
Matteo Martincighc601aa62019-10-29 15:03:22 +000016
Mike Kelly07810fc2020-11-12 10:58:48 +000017#include <aclCommon/ArmComputeSubgraphUtils.hpp>
18#include <aclCommon/ArmComputeUtils.hpp>
Aron Virginas-Tar56055192018-11-12 18:10:43 +000019#include <aclCommon/BaseMemoryManager.hpp>
20
Matteo Martincighe5b8eb92019-11-28 15:45:42 +000021#include <armnn/backends/IBackendContext.hpp>
22#include <armnn/backends/IMemoryManager.hpp>
Aron Virginas-Tar56055192018-11-12 18:10:43 +000023
Jan Eilers3c9e0452020-04-10 13:00:44 +010024#include <armnn/utility/PolymorphicDowncast.hpp>
25
Francis Murtaghe8d7ccb2021-10-14 17:30:24 +010026#include <neon/workloads/NeonAdditionWorkload.hpp>
27#include <neon/workloads/NeonBatchNormalizationWorkload.hpp>
28#include <neon/workloads/NeonConvolution2dWorkload.hpp>
29#include <neon/workloads/NeonDepthwiseConvolutionWorkload.hpp>
30#include <neon/workloads/NeonDivisionWorkload.hpp>
31#include <neon/workloads/NeonFullyConnectedWorkload.hpp>
Tracy Narine6440ce82023-09-20 14:19:07 +010032#include <neon/workloads/NeonFusedWorkload.hpp>
Francis Murtaghe8d7ccb2021-10-14 17:30:24 +010033#include <neon/workloads/NeonMultiplicationWorkload.hpp>
34#include <neon/workloads/NeonReduceWorkload.hpp>
35#include <neon/workloads/NeonSubtractionWorkload.hpp>
36#include <backendsCommon/DefaultAllocator.hpp>
Mike Kelly07810fc2020-11-12 10:58:48 +000037
David Beck263e3492018-11-09 14:46:40 +000038#include <Optimizer.hpp>
arovir01a0944792018-10-11 15:00:58 +010039
Mike Kelly07810fc2020-11-12 10:58:48 +000040#include <arm_compute/core/Types.h>
Aron Virginas-Tar56055192018-11-12 18:10:43 +000041#include <arm_compute/runtime/Allocator.h>
42
arovir014424b0a2018-10-04 10:46:04 +010043namespace armnn
44{
45
David Beck3cc9a622018-10-12 10:38:31 +010046const BackendId& NeonBackend::GetIdStatic()
arovir014424b0a2018-10-04 10:46:04 +010047{
David Beck3e9e1152018-10-17 14:17:50 +010048 static const BackendId s_Id{NeonBackendId()};
arovir014424b0a2018-10-04 10:46:04 +010049 return s_Id;
50}
51
Aron Virginas-Tar56055192018-11-12 18:10:43 +000052IBackendInternal::IMemoryManagerUniquePtr NeonBackend::CreateMemoryManager() const
arovir014424b0a2018-10-04 10:46:04 +010053{
Aron Virginas-Tar56055192018-11-12 18:10:43 +000054 return std::make_unique<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
Sadik Armagan13a9fa62019-04-26 16:04:34 +010055 BaseMemoryManager::MemoryAffinity::Offset);
Aron Virginas-Tar56055192018-11-12 18:10:43 +000056}
57
58IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory(
59 const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const
60{
61 return std::make_unique<NeonWorkloadFactory>(
Jan Eilers3c9e0452020-04-10 13:00:44 +010062 PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager));
arovir014424b0a2018-10-04 10:46:04 +010063}
64
Narumol Prangnawarat4e3e8182019-08-14 12:25:50 +010065IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory(
Sadik Armagan04a72972020-09-14 15:44:18 +010066 const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const
67{
68 return std::make_unique<NeonWorkloadFactory>(
69 PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
70}
71
72IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory(
Narumol Prangnawarat4e3e8182019-08-14 12:25:50 +010073 class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const
74{
75 auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
76 BaseMemoryManager::MemoryAffinity::Offset);
77
78 tensorHandleFactoryRegistry.RegisterMemoryManager(memoryManager);
Narumol Prangnawarat77400452022-01-13 17:43:41 +000079
80 auto factory = std::make_unique<NeonTensorHandleFactory>(memoryManager);
81 // Register copy and import factory pair
82 tensorHandleFactoryRegistry.RegisterCopyAndImportFactoryPair(factory->GetId(), factory->GetId());
83 // Register the factory
84 tensorHandleFactoryRegistry.RegisterFactory(std::move(factory));
85
Narumol Prangnawarat549cb7a2020-07-10 17:50:53 +010086
Narumol Prangnawarat4e3e8182019-08-14 12:25:50 +010087 return std::make_unique<NeonWorkloadFactory>(
Jan Eilers3c9e0452020-04-10 13:00:44 +010088 PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager));
Narumol Prangnawarat4e3e8182019-08-14 12:25:50 +010089}
90
Sadik Armagan04a72972020-09-14 15:44:18 +010091IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory(
92 TensorHandleFactoryRegistry& tensorHandleFactoryRegistry, const ModelOptions& modelOptions) const
93{
94 auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
95 BaseMemoryManager::MemoryAffinity::Offset);
96
97 tensorHandleFactoryRegistry.RegisterMemoryManager(memoryManager);
Narumol Prangnawarat77400452022-01-13 17:43:41 +000098
99 auto factory = std::make_unique<NeonTensorHandleFactory>(memoryManager);
100 // Register copy and import factory pair
101 tensorHandleFactoryRegistry.RegisterCopyAndImportFactoryPair(factory->GetId(), factory->GetId());
102 // Register the factory
103 tensorHandleFactoryRegistry.RegisterFactory(std::move(factory));
Sadik Armagan04a72972020-09-14 15:44:18 +0100104
105 return std::make_unique<NeonWorkloadFactory>(
106 PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
107}
108
David Beck263e3492018-11-09 14:46:40 +0000109IBackendInternal::IBackendContextPtr NeonBackend::CreateBackendContext(const IRuntime::CreationOptions&) const
110{
111 return IBackendContextPtr{};
112}
113
Colm Donelane49755b2020-01-29 15:22:43 +0000114IBackendInternal::IBackendProfilingContextPtr NeonBackend::CreateBackendProfilingContext(
Colm Donelan1aff3932020-02-05 17:48:59 +0000115 const IRuntime::CreationOptions&, IBackendProfilingPtr&)
Colm Donelane49755b2020-01-29 15:22:43 +0000116{
117 return IBackendProfilingContextPtr{};
118}
119
Sadik Armagan045f6be2020-09-10 13:37:32 +0100120IBackendInternal::IBackendSpecificModelContextPtr NeonBackend::CreateBackendSpecificModelContext(
121 const ModelOptions& modelOptions) const
122{
123 return IBackendSpecificModelContextPtr{new NeonBackendModelContext{modelOptions}};
124}
125
David Beck111b5d92018-11-12 14:59:37 +0000126IBackendInternal::ILayerSupportSharedPtr NeonBackend::GetLayerSupport() const
127{
Sadik Armagan045f6be2020-09-10 13:37:32 +0100128 static ILayerSupportSharedPtr layerSupport
129 {
130 new NeonLayerSupport(IBackendInternal::IBackendSpecificModelContextPtr{})
131 };
132 return layerSupport;
133}
134
135IBackendInternal::ILayerSupportSharedPtr NeonBackend::GetLayerSupport(const ModelOptions& modelOptions) const
136{
137 static ILayerSupportSharedPtr layerSupport
138 {
139 new NeonLayerSupport(CreateBackendSpecificModelContext(modelOptions))
140 };
David Beck111b5d92018-11-12 14:59:37 +0000141 return layerSupport;
142}
143
Mike Kelly80512b02022-05-16 23:10:42 +0100144OptimizationViews NeonBackend::OptimizeSubgraphView(const SubgraphView& subgraph,
145 const ModelOptions& modelOptions) const
Matteo Martincighadddddb2019-01-24 14:06:23 +0000146{
Mike Kelly80512b02022-05-16 23:10:42 +0100147 OptimizationViews optimizationViews(modelOptions);
Matteo Martincighadddddb2019-01-24 14:06:23 +0000148
Francis Murtagh0f3e9a02023-07-28 14:29:46 +0100149 auto it = subgraph.end();
Mike Kelly1ac690a2020-11-17 11:41:38 +0000150 std::map<LayerGuid, Layer*> untouched;
Mike Kelly07810fc2020-11-12 10:58:48 +0000151
Francis Murtagh0f3e9a02023-07-28 14:29:46 +0100152 while (it != subgraph.begin())
Mike Kelly07810fc2020-11-12 10:58:48 +0000153 {
154 --it;
Francis Murtagh56ccf682021-12-13 18:48:12 +0000155 Layer& base = *(PolymorphicDowncast<Layer*>(*it));
Mike Kelly1ac690a2020-11-17 11:41:38 +0000156 untouched.insert({base.GetGuid(), &base});
157 }
158
Francis Murtagh0f3e9a02023-07-28 14:29:46 +0100159 it = subgraph.end();
160 while (it != subgraph.begin())
Mike Kelly1ac690a2020-11-17 11:41:38 +0000161 {
162 --it;
Francis Murtagh56ccf682021-12-13 18:48:12 +0000163 Layer& base = *(PolymorphicDowncast<Layer*>(*it));
Mike Kelly07810fc2020-11-12 10:58:48 +0000164
Matthew Sloyan5fc0fd62021-05-03 12:22:03 +0100165 // Fuse activation into previous layer if supported by backend
Mike Kelly07810fc2020-11-12 10:58:48 +0000166 if ((base.GetType() == LayerType::DepthwiseConvolution2d || base.GetType() == LayerType::Convolution2d
167 || base.GetType() == LayerType::BatchNormalization || base.GetType() == LayerType::FullyConnected
168 || base.GetType() == LayerType::Addition || base.GetType() == LayerType::Multiplication
Teresa Charlin0aa080d2023-09-19 16:46:54 +0100169 || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division
170 || base.GetType() == LayerType::ElementwiseBinary)
Mike Kelly07810fc2020-11-12 10:58:48 +0000171 && (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr))
172 {
173 for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output)
174 {
175 if (output->GetNumConnections() == 1)
176 {
177 for (auto&& childInput : output->GetConnections())
178 {
Teresa Charlind672f5d2021-01-18 18:07:57 +0000179 if ((childInput->GetOwningLayer().GetType() == LayerType::Activation) &&
180 (checkDataTypeInputandOutput(childInput->GetOwningLayer())))
Mike Kelly07810fc2020-11-12 10:58:48 +0000181 {
182 Layer& child = childInput->GetOwningLayer();
183
184 auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
185
186 const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") +
187 base.GetName();
188
189 // Get params from activation layer
190 ActivationDescriptor activationDesc = activationLayer->GetParameters();
191
192 if (base.GetType() == LayerType::Convolution2d)
193 {
194 Convolution2dLayer* baseLayer = PolymorphicDowncast<Convolution2dLayer*>(&base);
195
196 Optional<TensorInfo> biases;
197
198 if (baseLayer->GetParameters().m_BiasEnabled)
199 {
Keith Davisb4dd5cc2022-04-07 11:32:00 +0100200 biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
Mike Kelly07810fc2020-11-12 10:58:48 +0000201 }
202
203 arm_compute::Status status = NeonConvolution2dWorkloadValidate(
204 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
205 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
206 baseLayer->GetParameters(),
Keith Davisb4dd5cc2022-04-07 11:32:00 +0100207 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
Mike Kelly07810fc2020-11-12 10:58:48 +0000208 biases,
209 false,
210 &activationDesc);
211
212 if (status)
213 {
Cathal Corbettcbfd7182021-12-15 17:12:59 +0000214 FuseConvolution2dLayer<Convolution2dLayer>(optimizationViews,
215 baseLayer,
216 activationLayer,
217 activationDesc,
218 name);
Mike Kelly1ac690a2020-11-17 11:41:38 +0000219 untouched.erase(baseLayer->GetGuid());
220 untouched.erase(activationLayer->GetGuid());
Mike Kelly07810fc2020-11-12 10:58:48 +0000221 }
222 }
223 else if (base.GetType() == LayerType::DepthwiseConvolution2d)
224 {
225 DepthwiseConvolution2dLayer* baseLayer =
226 PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
227
228 Optional<TensorInfo> biases;
229
230 if (baseLayer->GetParameters().m_BiasEnabled)
231 {
Cathal Corbett06902652022-04-14 17:55:11 +0100232 biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
Mike Kelly07810fc2020-11-12 10:58:48 +0000233 }
234
235 arm_compute::Status status = NeonDepthwiseConvolutionWorkloadValidate(
236 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
237 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
238 baseLayer->GetParameters(),
Cathal Corbett06902652022-04-14 17:55:11 +0100239 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
Mike Kelly07810fc2020-11-12 10:58:48 +0000240 biases,
241 &activationDesc);
242
243 if (status)
244 {
Cathal Corbettcbfd7182021-12-15 17:12:59 +0000245 FuseDepthwiseConvolution2dLayer<DepthwiseConvolution2dLayer>(optimizationViews,
246 baseLayer,
247 activationLayer,
248 activationDesc,
249 name);
Mike Kelly1ac690a2020-11-17 11:41:38 +0000250 untouched.erase(baseLayer->GetGuid());
251 untouched.erase(activationLayer->GetGuid());
Mike Kelly07810fc2020-11-12 10:58:48 +0000252 }
253 }
254 else if (base.GetType() == LayerType::FullyConnected)
255 {
256 FullyConnectedLayer* baseLayer = PolymorphicDowncast<FullyConnectedLayer*>(&base);
Cathal Corbett4452baf2022-05-13 09:55:59 +0100257 FullyConnectedDescriptor descriptor = baseLayer->GetParameters();
Matthew Bentham67d63902022-02-08 15:03:07 +0000258
Cathal Corbett4452baf2022-05-13 09:55:59 +0100259 // As bias is optional only try to get TensorInfo from input if bias is enabled.
260 Optional<TensorInfo> biases;
261 if (descriptor.m_BiasEnabled)
Matthew Bentham67d63902022-02-08 15:03:07 +0000262 {
Cathal Corbett4452baf2022-05-13 09:55:59 +0100263 biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
Matthew Bentham67d63902022-02-08 15:03:07 +0000264 }
Mike Kelly07810fc2020-11-12 10:58:48 +0000265
266 arm_compute::Status status = NeonFullyConnectedWorkloadValidate(
267 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
268 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
Cathal Corbett4452baf2022-05-13 09:55:59 +0100269 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
Matthew Bentham67d63902022-02-08 15:03:07 +0000270 biases,
Mike Kelly07810fc2020-11-12 10:58:48 +0000271 baseLayer->GetParameters(),
272 &activationDesc);
273
274 if (status)
275 {
Cathal Corbettcbfd7182021-12-15 17:12:59 +0000276 FuseFullyConnectedLayer<FullyConnectedLayer>(optimizationViews,
277 baseLayer,
278 activationLayer,
279 activationDesc,
280 name);
Mike Kelly1ac690a2020-11-17 11:41:38 +0000281 untouched.erase(baseLayer->GetGuid());
282 untouched.erase(activationLayer->GetGuid());
Mike Kelly07810fc2020-11-12 10:58:48 +0000283 }
284 }
285 else if (base.GetType() == LayerType::BatchNormalization)
286 {
287 BatchNormalizationLayer* baseLayer =
288 PolymorphicDowncast<BatchNormalizationLayer*>(&base);
289
290 arm_compute::Status status = NeonBatchNormalizationValidate(
291 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
292 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
293 baseLayer->m_Mean->GetTensorInfo(),
294 baseLayer->m_Variance->GetTensorInfo(),
295 baseLayer->m_Beta->GetTensorInfo(),
296 baseLayer->m_Gamma->GetTensorInfo(),
297 baseLayer->GetParameters(),
298 &activationDesc);
299
300 if (status)
301 {
302 BatchNormalizationLayer* replacementLayer =
Cathal Corbettcbfd7182021-12-15 17:12:59 +0000303 FuseBatchNormalizationLayer<BatchNormalizationLayer>(optimizationViews,
304 baseLayer,
305 activationLayer,
306 activationDesc,
307 name);
Mike Kelly07810fc2020-11-12 10:58:48 +0000308
309 replacementLayer->m_Beta = std::move(baseLayer->m_Beta);
310 replacementLayer->m_Gamma = std::move(baseLayer->m_Gamma);
311 replacementLayer->m_Mean = std::move(baseLayer->m_Mean);
312 replacementLayer->m_Variance = std::move(baseLayer->m_Variance);
Mike Kelly1ac690a2020-11-17 11:41:38 +0000313 untouched.erase(baseLayer->GetGuid());
314 untouched.erase(activationLayer->GetGuid());
Mike Kelly07810fc2020-11-12 10:58:48 +0000315 }
316 }
317 else if (base.GetType() == LayerType::Addition)
318 {
319 AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
320
321 arm_compute::Status status = NeonAdditionWorkloadValidate(
322 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
323 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
324 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
325 &activationDesc);
326
327 if (status)
328 {
Cathal Corbettcbfd7182021-12-15 17:12:59 +0000329 FuseAdditionLayer<AdditionLayer>(optimizationViews,
330 baseLayer,
331 activationLayer,
332 activationDesc,
333 name);
Mike Kelly1ac690a2020-11-17 11:41:38 +0000334 untouched.erase(baseLayer->GetGuid());
335 untouched.erase(activationLayer->GetGuid());
Mike Kelly07810fc2020-11-12 10:58:48 +0000336 }
337 }
338 else if (base.GetType() == LayerType::Division)
339 {
340 DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
341
342 arm_compute::Status status = NeonDivisionWorkloadValidate(
343 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
344 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
345 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
346 &activationDesc);
347
348 if (status)
349 {
Cathal Corbettcbfd7182021-12-15 17:12:59 +0000350 FuseDivisionLayer<DivisionLayer>(optimizationViews,
351 baseLayer,
352 activationLayer,
353 activationDesc,
354 name);
Mike Kelly1ac690a2020-11-17 11:41:38 +0000355 untouched.erase(baseLayer->GetGuid());
356 untouched.erase(activationLayer->GetGuid());
Mike Kelly07810fc2020-11-12 10:58:48 +0000357 }
358 }
359 else if (base.GetType() == LayerType::Multiplication)
360 {
361 MultiplicationLayer* baseLayer = PolymorphicDowncast<MultiplicationLayer*>(&base);
362
363 arm_compute::Status status = NeonMultiplicationWorkloadValidate(
364 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
365 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
366 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
367 &activationDesc);
368
369 if (status)
370 {
Cathal Corbettcbfd7182021-12-15 17:12:59 +0000371 FuseMultiplicationLayer<MultiplicationLayer>(optimizationViews,
372 baseLayer,
373 activationLayer,
374 activationDesc,
375 name);
Mike Kelly1ac690a2020-11-17 11:41:38 +0000376 untouched.erase(baseLayer->GetGuid());
377 untouched.erase(activationLayer->GetGuid());
Mike Kelly07810fc2020-11-12 10:58:48 +0000378 }
379 }
380 else if (base.GetType() == LayerType::Subtraction)
381 {
382 SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
383
384 arm_compute::Status status = NeonSubtractionWorkloadValidate(
385 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
386 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
387 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
388 &activationDesc);
389
390 if (status)
391 {
Cathal Corbettcbfd7182021-12-15 17:12:59 +0000392 FuseSubtractionLayer<SubtractionLayer>(optimizationViews,
393 baseLayer,
394 activationLayer,
395 activationDesc,
396 name);
Mike Kelly1ac690a2020-11-17 11:41:38 +0000397 untouched.erase(baseLayer->GetGuid());
398 untouched.erase(activationLayer->GetGuid());
Mike Kelly07810fc2020-11-12 10:58:48 +0000399 }
400 }
Mike Kelly3ec30772023-03-08 13:47:17 +0000401 else if (base.GetType() == LayerType::ElementwiseBinary)
402 {
403 ElementwiseBinaryLayer* baseLayer = PolymorphicDowncast<ElementwiseBinaryLayer*>(&base);
404
405 if (baseLayer->GetParameters().m_Operation == BinaryOperation::Add)
406 {
407 arm_compute::Status status = NeonAdditionWorkloadValidate(
408 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
409 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
410 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
411 &activationDesc);
412
413 if (status)
414 {
415 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
416 baseLayer,
417 activationLayer,
418 activationDesc,
419 BinaryOperation::Add,
420 name);
421 untouched.erase(baseLayer->GetGuid());
422 untouched.erase(activationLayer->GetGuid());
423 }
424 }
425 else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Div)
426 {
427 arm_compute::Status status = NeonDivisionWorkloadValidate(
428 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
429 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
430 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
431 &activationDesc);
432
433 if (status)
434 {
435 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
436 baseLayer,
437 activationLayer,
438 activationDesc,
439 BinaryOperation::Div,
440 name);
441 untouched.erase(baseLayer->GetGuid());
442 untouched.erase(activationLayer->GetGuid());
443 }
444 }
445 else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Mul)
446 {
447 arm_compute::Status status = NeonMultiplicationWorkloadValidate(
448 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
449 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
450 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
451 &activationDesc);
452
453 if (status)
454 {
455 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
456 baseLayer,
457 activationLayer,
458 activationDesc,
459 BinaryOperation::Mul,
460 name);
461 untouched.erase(baseLayer->GetGuid());
462 untouched.erase(activationLayer->GetGuid());
463 }
464 }
465 else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Sub)
466 {
467 arm_compute::Status status = NeonSubtractionWorkloadValidate(
468 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
469 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
470 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
471 &activationDesc);
472
473 if (status)
474 {
475 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
476 baseLayer,
477 activationLayer,
478 activationDesc,
479 BinaryOperation::Sub,
480 name);
481 untouched.erase(baseLayer->GetGuid());
482 untouched.erase(activationLayer->GetGuid());
483 }
484 }
485 // No fusion available for other BinaryOperations
486 }
Mike Kelly07810fc2020-11-12 10:58:48 +0000487 }
488 }
489 }
490 }
491 }
Matthew Sloyan5fc0fd62021-05-03 12:22:03 +0100492
493 // Separate reduce layer with multiple axes into multiple reduce layers with 1 axis.
494 if (base.GetType() == LayerType::Reduce)
495 {
496 ReduceLayer* baseLayer = PolymorphicDowncast<ReduceLayer*>(&base);
497 ReduceDescriptor reduceDescriptor = baseLayer->GetParameters();
498
499 if (!reduceDescriptor.m_vAxis.empty() && reduceDescriptor.m_vAxis.size() > 1)
500 {
501 // Add new layers to the graph and connect them.
Francis Murtagh56ccf682021-12-13 18:48:12 +0000502 std::vector<IConnectableLayer*> layers = ChainReduceLayers<ReduceLayer>(optimizationViews,
503 baseLayer,
504 reduceDescriptor);
Matthew Sloyan5fc0fd62021-05-03 12:22:03 +0100505
506 // Replace existing baselayer with new subgraph.
507 ReplaceLayers<ReduceLayer>(optimizationViews, baseLayer, layers);
508 untouched.erase(baseLayer->GetGuid());
509 }
510 }
Mike Kelly4cc341c2023-07-07 15:43:06 +0100511
512 // Remove Reshape where possible
513 if (base.GetType() == LayerType::Reshape)
514 {
515 ReshapeLayer* baseLayer = PolymorphicDowncast<ReshapeLayer*>(&base);
Mike Kelly4cc341c2023-07-07 15:43:06 +0100516
Mike Kellybe06f102023-07-17 17:49:55 +0100517 // Cannot remove a Reshape if it's connected to any layer that has an NCHW layout
518 if (ConnectedToLayerWithNCHW(baseLayer))
Mike Kelly4cc341c2023-07-07 15:43:06 +0100519 {
520 continue;
521 }
Mike Kelly4cc341c2023-07-07 15:43:06 +0100522 RemoveReshapeLayer(baseLayer, untouched, optimizationViews);
523 }
Tracy Narine6440ce82023-09-20 14:19:07 +0100524
525 // Replace Add/Mul/Add where possible
526 Layer* layerList[4] = {nullptr, nullptr, nullptr, nullptr};
527 const std::vector<ActivationFunction> validActivates = { ActivationFunction::ReLu,
528 ActivationFunction::BoundedReLu };
529 if (IsLayerSequence<BinaryOperation>(base,
530 BinaryOperation::Add, BinaryOperation::Mul, BinaryOperation::Add,
531 layerList,
532 true, // handleValidActivates
533 validActivates))
534 {
535 bool fuseReLu = false;
536 unsigned int numInputs = 0;
537 unsigned int numOutputs = 0;
538 std::vector<TensorInfo> inputInfos;
539 std::vector<TensorInfo> outputInfos;
540 const ActivationDescriptor* activationDescriptor = nullptr;
541
542 if (BuildAddMulAddTensorInfoLists<Layer>(layerList,
543 numInputs,
544 numOutputs,
545 inputInfos,
546 outputInfos,
547 activationDescriptor,
548 fuseReLu))
549 {
550 // Create the new Add/Mul/Add layer and set the Relu activation function
551 FusedDescriptor fusedDescriptor(numInputs, numOutputs, FusedKernelType::AddMulAdd);
552 arm_compute::Status status = NeonFusedWorkloadValidate({inputInfos.begin(), inputInfos.end()},
553 {outputInfos.begin(), outputInfos.end()},
554 fusedDescriptor,
555 activationDescriptor);
556 if (status)
557 {
558 std::string fusedName;
559 GetFusedName(layerList, fusedName);
560
561 IConnectableLayer* addMulAddLayer =
562 optimizationViews.GetINetwork()->AddFusedLayer(fusedDescriptor, fusedName.c_str());
563
564 if (fuseReLu)
565 {
566 FusedLayer* addMulAddFusedLayer = PolymorphicDowncast<FusedLayer*>(addMulAddLayer);
567 addMulAddFusedLayer->SetAdditionalInfoForObject(
568 std::make_shared<ActivationDescriptor>(*activationDescriptor));
569 }
570
571 // Update the graph
572 std::vector<IConnectableLayer*> originalLayers;
573 for (unsigned int layerIdx = 0; layerIdx < 4; ++layerIdx)
574 {
575 if (layerList[layerIdx])
576 {
577 originalLayers.push_back(layerList[layerIdx]);
578 }
579 }
580
581 std::vector<SlotList> inputLayersSlotLists, outputLayersSlotLists;
582 BuildAddMulAddSlotLists<SlotList>(fuseReLu,
583 outputInfos.size() > 1,
584 inputLayersSlotLists,
585 outputLayersSlotLists);
586
587 ReplaceMultipleLayers<FusedLayer>(optimizationViews,
588 originalLayers,
589 PolymorphicDowncast<FusedLayer*>(addMulAddLayer),
590 inputLayersSlotLists,
591 outputLayersSlotLists);
592
593 // Remove unused layers
594 for (unsigned int layerIdx = 0; layerIdx < 4; ++layerIdx)
595 {
596 if (layerList[layerIdx])
597 {
598 untouched.erase(layerList[layerIdx]->GetGuid());
599 }
600 }
601 }
602 }
603 }
Mike Kelly07810fc2020-11-12 10:58:48 +0000604 }
605
Mike Kelly4cc341c2023-07-07 15:43:06 +0100606 if (optimizationViews.GetSubstitutions().empty() && optimizationViews.GetDeletedSubgraphs().empty())
Mike Kelly07810fc2020-11-12 10:58:48 +0000607 {
608 optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
609 }
Mike Kelly1ac690a2020-11-17 11:41:38 +0000610 else
611 {
612 ReportUntouchedLayers(optimizationViews, untouched);
613 }
Matteo Martincighc3ba50e2019-05-22 14:28:16 +0100614
615 return optimizationViews;
Matteo Martincighadddddb2019-01-24 14:06:23 +0000616}
617
Narumol Prangnawarat4e3e8182019-08-14 12:25:50 +0100618std::vector<ITensorHandleFactory::FactoryId> NeonBackend::GetHandleFactoryPreferences() const
619{
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000620 return std::vector<ITensorHandleFactory::FactoryId>() = { NeonTensorHandleFactory::GetIdStatic() };
Narumol Prangnawarat4e3e8182019-08-14 12:25:50 +0100621}
622
623void NeonBackend::RegisterTensorHandleFactories(class TensorHandleFactoryRegistry& registry)
624{
625 auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
626 BaseMemoryManager::MemoryAffinity::Offset);
627
628 registry.RegisterMemoryManager(memoryManager);
Narumol Prangnawarat77400452022-01-13 17:43:41 +0000629
630 auto factory = std::make_unique<NeonTensorHandleFactory>(memoryManager);
631 // Register copy and import factory pair
632 registry.RegisterCopyAndImportFactoryPair(factory->GetId(), factory->GetId());
633 // Register the factory
634 registry.RegisterFactory(std::move(factory));
Narumol Prangnawarat4e3e8182019-08-14 12:25:50 +0100635}
636
Francis Murtaghe8d7ccb2021-10-14 17:30:24 +0100637std::unique_ptr<ICustomAllocator> NeonBackend::GetDefaultAllocator() const
638{
639 return std::make_unique<DefaultAllocator>();
640}
641
642
Matthew Bentham42bad952018-12-17 09:23:36 +0000643} // namespace armnn