blob: 928760ef5f29f0c07000096ca8c41ccdc08268cc [file] [log] [blame]
arovir014424b0a2018-10-04 10:46:04 +01001//
2// Copyright © 2017 Arm Ltd. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#include "ClBackend.hpp"
David Beck3e9e1152018-10-17 14:17:50 +01007#include "ClBackendId.hpp"
Sadik Armagan045f6be2020-09-10 13:37:32 +01008#include "ClBackendModelContext.hpp"
arovir01a0944792018-10-11 15:00:58 +01009#include "ClWorkloadFactory.hpp"
David Beck1b61be52018-11-08 09:19:14 +000010#include "ClBackendContext.hpp"
David Beck111b5d92018-11-12 14:59:37 +000011#include "ClLayerSupport.hpp"
Jan Eilerse9f0f0f2019-08-16 10:28:37 +010012#include "ClTensorHandleFactory.hpp"
arovir01a0944792018-10-11 15:00:58 +010013
Matteo Martincighc601aa62019-10-29 15:03:22 +000014#include <armnn/BackendRegistry.hpp>
Mike Kelly07810fc2020-11-12 10:58:48 +000015#include <armnn/Descriptors.hpp>
Matteo Martincighc601aa62019-10-29 15:03:22 +000016
Mike Kelly07810fc2020-11-12 10:58:48 +000017#include <aclCommon/ArmComputeSubgraphUtils.hpp>
18#include <aclCommon/ArmComputeUtils.hpp>
Aron Virginas-Tar56055192018-11-12 18:10:43 +000019#include <aclCommon/BaseMemoryManager.hpp>
20
Matteo Martincighe5b8eb92019-11-28 15:45:42 +000021#include <armnn/backends/IBackendContext.hpp>
22#include <armnn/backends/IMemoryManager.hpp>
Jan Eilers3c9e0452020-04-10 13:00:44 +010023#include <armnn/utility/PolymorphicDowncast.hpp>
24
Mike Kelly07810fc2020-11-12 10:58:48 +000025#include "workloads/ClAdditionWorkload.hpp"
26#include "workloads/ClBatchNormalizationFloatWorkload.hpp"
27#include "workloads/ClConvolution2dWorkload.hpp"
28#include "workloads/ClDepthwiseConvolutionWorkload.hpp"
29#include "workloads/ClDivisionFloatWorkload.hpp"
30#include "workloads/ClFullyConnectedWorkload.hpp"
31#include "workloads/ClMultiplicationWorkload.hpp"
32#include "workloads/ClSubtractionWorkload.hpp"
33
David Beck263e3492018-11-09 14:46:40 +000034#include <Optimizer.hpp>
arovir014424b0a2018-10-04 10:46:04 +010035
Mike Kelly07810fc2020-11-12 10:58:48 +000036#include <arm_compute/core/Types.h>
Aron Virginas-Tar56055192018-11-12 18:10:43 +000037#include <arm_compute/runtime/CL/CLBufferAllocator.h>
38
arovir014424b0a2018-10-04 10:46:04 +010039namespace armnn
40{
41
David Beck3cc9a622018-10-12 10:38:31 +010042const BackendId& ClBackend::GetIdStatic()
arovir014424b0a2018-10-04 10:46:04 +010043{
David Beck3e9e1152018-10-17 14:17:50 +010044 static const BackendId s_Id{ClBackendId()};
arovir014424b0a2018-10-04 10:46:04 +010045 return s_Id;
46}
47
Aron Virginas-Tar56055192018-11-12 18:10:43 +000048IBackendInternal::IMemoryManagerUniquePtr ClBackend::CreateMemoryManager() const
arovir014424b0a2018-10-04 10:46:04 +010049{
Aron Virginas-Tar56055192018-11-12 18:10:43 +000050 return std::make_unique<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
51}
52
53IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
54 const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const
55{
56 return std::make_unique<ClWorkloadFactory>(
Jan Eilers3c9e0452020-04-10 13:00:44 +010057 PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
arovir014424b0a2018-10-04 10:46:04 +010058}
59
Jan Eilerse9f0f0f2019-08-16 10:28:37 +010060IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
Sadik Armagan04a72972020-09-14 15:44:18 +010061 const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const
62{
63 return std::make_unique<ClWorkloadFactory>(
64 PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
65}
66
67IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
Jan Eilerse9f0f0f2019-08-16 10:28:37 +010068 TensorHandleFactoryRegistry& registry) const
69{
70 auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
71
72 registry.RegisterMemoryManager(memoryManager);
Narumol Prangnawarat549cb7a2020-07-10 17:50:53 +010073 registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
Jan Eilerse9f0f0f2019-08-16 10:28:37 +010074
75 return std::make_unique<ClWorkloadFactory>(
Jan Eilers3c9e0452020-04-10 13:00:44 +010076 PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
Jan Eilerse9f0f0f2019-08-16 10:28:37 +010077}
78
Sadik Armagan04a72972020-09-14 15:44:18 +010079IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
80 TensorHandleFactoryRegistry& registry, const ModelOptions& modelOptions) const
81{
82 auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
83
84 registry.RegisterMemoryManager(memoryManager);
85 registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
86
87 return std::make_unique<ClWorkloadFactory>(
88 PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
89}
90
Jan Eilerse9f0f0f2019-08-16 10:28:37 +010091std::vector<ITensorHandleFactory::FactoryId> ClBackend::GetHandleFactoryPreferences() const
92{
93 return std::vector<ITensorHandleFactory::FactoryId> {ClTensorHandleFactory::GetIdStatic()};
94}
95
96void ClBackend::RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry)
97{
98 auto mgr = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
99
100 registry.RegisterMemoryManager(mgr);
101 registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(mgr));
102}
103
Sadik Armagan045f6be2020-09-10 13:37:32 +0100104IBackendInternal::IBackendContextPtr ClBackend::CreateBackendContext(const IRuntime::CreationOptions& options) const
David Beck1b61be52018-11-08 09:19:14 +0000105{
106 return IBackendContextPtr{new ClBackendContext{options}};
107}
108
Colm Donelane49755b2020-01-29 15:22:43 +0000109IBackendInternal::IBackendProfilingContextPtr ClBackend::CreateBackendProfilingContext(
Colm Donelan1aff3932020-02-05 17:48:59 +0000110 const IRuntime::CreationOptions&, IBackendProfilingPtr&)
Colm Donelane49755b2020-01-29 15:22:43 +0000111{
112 return IBackendProfilingContextPtr{};
113}
114
David Beck263e3492018-11-09 14:46:40 +0000115IBackendInternal::Optimizations ClBackend::GetOptimizations() const
116{
117 return Optimizations{};
118}
David Beck1b61be52018-11-08 09:19:14 +0000119
Sadik Armagan045f6be2020-09-10 13:37:32 +0100120IBackendInternal::IBackendSpecificModelContextPtr ClBackend::CreateBackendSpecificModelContext(
121 const ModelOptions& modelOptions) const
122{
123 return IBackendSpecificModelContextPtr{new ClBackendModelContext{modelOptions}};
124}
125
David Beck111b5d92018-11-12 14:59:37 +0000126IBackendInternal::ILayerSupportSharedPtr ClBackend::GetLayerSupport() const
127{
Sadik Armagan045f6be2020-09-10 13:37:32 +0100128 static ILayerSupportSharedPtr layerSupport
129 {
130 new ClLayerSupport(IBackendInternal::IBackendSpecificModelContextPtr{})
131 };
132 return layerSupport;
133}
134
135IBackendInternal::ILayerSupportSharedPtr ClBackend::GetLayerSupport(const ModelOptions& modelOptions) const
136{
137 static ILayerSupportSharedPtr layerSupport
138 {
139 new ClLayerSupport(CreateBackendSpecificModelContext(modelOptions))
140 };
David Beck111b5d92018-11-12 14:59:37 +0000141 return layerSupport;
142}
143
Mike Kelly07810fc2020-11-12 10:58:48 +0000144OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph,
145 const ModelOptions& modelOptions) const
Matteo Martincighadddddb2019-01-24 14:06:23 +0000146{
Matteo Martincighc3ba50e2019-05-22 14:28:16 +0100147 OptimizationViews optimizationViews;
Matteo Martincighadddddb2019-01-24 14:06:23 +0000148
Mike Kelly07810fc2020-11-12 10:58:48 +0000149 auto it = subgraph.end();
150 bool isFastMathEnabled = false;
Mike Kelly1ac690a2020-11-17 11:41:38 +0000151 std::map<LayerGuid, Layer*> untouched;
Mike Kelly07810fc2020-11-12 10:58:48 +0000152
Mike Kelly1ac690a2020-11-17 11:41:38 +0000153 while (it != subgraph.begin())
154 {
155 --it;
156 Layer& base = **it;
157 untouched.insert({base.GetGuid(), &base});
158 }
159
160 it = subgraph.end();
Mike Kelly07810fc2020-11-12 10:58:48 +0000161#if defined(ARMCOMPUTECL_ENABLED)
162 IBackendInternal::IBackendSpecificModelContextPtr modelContextPtr = CreateBackendSpecificModelContext(modelOptions);
163
164 if (modelContextPtr)
165 {
166 auto clModelOptions = dynamic_cast<ClBackendModelContext*>(modelContextPtr.get());
167 if (clModelOptions)
168 {
169 isFastMathEnabled = clModelOptions->IsFastMathEnabled();
170 }
171 }
172#endif
Mike Kelly07810fc2020-11-12 10:58:48 +0000173 while (it != subgraph.begin())
174 {
175 --it;
176 Layer& base = **it;
177
178 if ((base.GetType() == LayerType::DepthwiseConvolution2d || base.GetType() == LayerType::Convolution2d
179 || base.GetType() == LayerType::BatchNormalization || base.GetType() == LayerType::FullyConnected
180 || base.GetType() == LayerType::Addition || base.GetType() == LayerType::Multiplication
181 || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division)
182 && (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr))
183 {
184 for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output)
185 {
186 if (output->GetNumConnections() == 1)
187 {
188 for (auto&& childInput : output->GetConnections())
189 {
Teresa Charlind672f5d2021-01-18 18:07:57 +0000190 if ((childInput->GetOwningLayer().GetType() == LayerType::Activation) &&
191 (checkDataTypeInputandOutput(childInput->GetOwningLayer())))
Mike Kelly07810fc2020-11-12 10:58:48 +0000192 {
193 Layer& child = childInput->GetOwningLayer();
194
195 auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
196
197 const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") +
198 base.GetName();
199
200 // Get params from activation layer
201 ActivationDescriptor activationDesc = activationLayer->GetParameters();
202
203 if (base.GetType() == LayerType::Convolution2d)
204 {
205 Convolution2dLayer* baseLayer = PolymorphicDowncast<Convolution2dLayer*>(&base);
206
207 Optional<TensorInfo> biases;
208
209 if (baseLayer->GetParameters().m_BiasEnabled)
210 {
Mike Kelly1ac690a2020-11-17 11:41:38 +0000211 biases = baseLayer->m_Bias->GetTensorInfo();
Mike Kelly07810fc2020-11-12 10:58:48 +0000212 }
213
214 arm_compute::Status status = ClConvolution2dWorkloadValidate(
215 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
216 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
217 baseLayer->GetParameters(),
218 baseLayer->m_Weight->GetTensorInfo(),
219 biases,
220 isFastMathEnabled,
221 &activationDesc);
222
223 if (status)
224 {
225 FuseLayerWithWeightsAndBiases<Convolution2dLayer>(optimizationViews,
226 baseLayer,
227 activationLayer,
228 activationDesc,
229 name);
Mike Kelly1ac690a2020-11-17 11:41:38 +0000230 untouched.erase(baseLayer->GetGuid());
231 untouched.erase(activationLayer->GetGuid());
Mike Kelly07810fc2020-11-12 10:58:48 +0000232 }
233 }
234 else if (base.GetType() == LayerType::DepthwiseConvolution2d)
235 {
236 DepthwiseConvolution2dLayer* baseLayer =
237 PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
238
239 Optional<TensorInfo> biases;
240
241 if (baseLayer->GetParameters().m_BiasEnabled)
242 {
Mike Kelly1ac690a2020-11-17 11:41:38 +0000243 biases = baseLayer->m_Bias->GetTensorInfo();
Mike Kelly07810fc2020-11-12 10:58:48 +0000244 }
245
246 arm_compute::Status status = ClDepthwiseConvolutionWorkloadValidate(
247 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
248 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
249 baseLayer->GetParameters(),
250 baseLayer->m_Weight->GetTensorInfo(),
251 biases,
252 &activationDesc);
253
254 if (status)
255 {
256 FuseLayerWithWeightsAndBiases<DepthwiseConvolution2dLayer>(optimizationViews,
257 baseLayer,
258 activationLayer,
259 activationDesc,
260 name);
Mike Kelly1ac690a2020-11-17 11:41:38 +0000261 untouched.erase(baseLayer->GetGuid());
262 untouched.erase(activationLayer->GetGuid());
Mike Kelly07810fc2020-11-12 10:58:48 +0000263 }
264 }
265 else if (base.GetType() == LayerType::FullyConnected)
266 {
267 FullyConnectedLayer* baseLayer = PolymorphicDowncast<FullyConnectedLayer*>(&base);
268
269 arm_compute::Status status = ClFullyConnectedWorkloadValidate(
270 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
271 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
272 baseLayer->m_Weight->GetTensorInfo(),
273 baseLayer->m_Bias->GetTensorInfo(),
274 baseLayer->GetParameters(),
275 &activationDesc);
276
277 if (status)
278 {
279 FuseLayerWithWeightsAndBiases<FullyConnectedLayer>(optimizationViews,
280 baseLayer,
281 activationLayer,
282 activationDesc,
283 name);
Mike Kelly1ac690a2020-11-17 11:41:38 +0000284 untouched.erase(baseLayer->GetGuid());
285 untouched.erase(activationLayer->GetGuid());
Mike Kelly07810fc2020-11-12 10:58:48 +0000286 }
287 }
288 else if (base.GetType() == LayerType::BatchNormalization)
289 {
290 BatchNormalizationLayer* baseLayer =
291 PolymorphicDowncast<BatchNormalizationLayer*>(&base);
292
293 arm_compute::Status status = ClBatchNormalizationValidate(
294 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
295 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
296 baseLayer->m_Mean->GetTensorInfo(),
297 baseLayer->m_Variance->GetTensorInfo(),
298 baseLayer->m_Beta->GetTensorInfo(),
299 baseLayer->m_Gamma->GetTensorInfo(),
300 baseLayer->GetParameters(),
301 &activationDesc);
302
303 if (status)
304 {
305 BatchNormalizationLayer* replacementLayer =
306 FuseLayerWithParameters<BatchNormalizationLayer>(optimizationViews,
307 baseLayer,
308 activationLayer,
309 activationDesc,
310 name);
311
312 replacementLayer->m_Beta = std::move(baseLayer->m_Beta);
313 replacementLayer->m_Gamma = std::move(baseLayer->m_Gamma);
314 replacementLayer->m_Mean = std::move(baseLayer->m_Mean);
315 replacementLayer->m_Variance = std::move(baseLayer->m_Variance);
Mike Kelly1ac690a2020-11-17 11:41:38 +0000316 untouched.erase(baseLayer->GetGuid());
317 untouched.erase(activationLayer->GetGuid());
Mike Kelly07810fc2020-11-12 10:58:48 +0000318 }
319 }
320 else if (base.GetType() == LayerType::Addition)
321 {
322 AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
323
324 arm_compute::Status status = ClAdditionValidate(
325 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
326 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
327 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
328 &activationDesc);
329
330 if (status)
331 {
332 FuseLayerWithoutParameters<AdditionLayer>(optimizationViews,
333 baseLayer,
334 activationLayer,
335 activationDesc,
336 name);
Mike Kelly1ac690a2020-11-17 11:41:38 +0000337 untouched.erase(baseLayer->GetGuid());
338 untouched.erase(activationLayer->GetGuid());
Mike Kelly07810fc2020-11-12 10:58:48 +0000339 }
340 }
341 else if (base.GetType() == LayerType::Division)
342 {
343 DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
344
345 arm_compute::Status status = ClDivisionWorkloadValidate(
346 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
347 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
348 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
349 &activationDesc);
350
351 if (status)
352 {
353 FuseLayerWithoutParameters<DivisionLayer>(optimizationViews,
354 baseLayer,
355 activationLayer,
356 activationDesc,
357 name);
Mike Kelly1ac690a2020-11-17 11:41:38 +0000358 untouched.erase(baseLayer->GetGuid());
359 untouched.erase(activationLayer->GetGuid());
Mike Kelly07810fc2020-11-12 10:58:48 +0000360 }
361 }
362 else if (base.GetType() == LayerType::Multiplication)
363 {
364 MultiplicationLayer* baseLayer = PolymorphicDowncast<MultiplicationLayer*>(&base);
365
366 arm_compute::Status status = ClMultiplicationWorkloadValidate(
367 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
368 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
369 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
370 &activationDesc);
371
372 if (status)
373 {
374 FuseLayerWithoutParameters<MultiplicationLayer>(optimizationViews,
375 baseLayer,
376 activationLayer,
377 activationDesc,
378 name);
Mike Kelly1ac690a2020-11-17 11:41:38 +0000379 untouched.erase(baseLayer->GetGuid());
380 untouched.erase(activationLayer->GetGuid());
Mike Kelly07810fc2020-11-12 10:58:48 +0000381 }
382 }
383 else if (base.GetType() == LayerType::Subtraction)
384 {
385 SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
386
387 arm_compute::Status status = ClSubtractionValidate(
388 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
389 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
390 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
391 &activationDesc);
392
393 if (status)
394 {
395 FuseLayerWithoutParameters<SubtractionLayer>(optimizationViews,
396 baseLayer,
397 activationLayer,
398 activationDesc,
399 name);
Mike Kelly1ac690a2020-11-17 11:41:38 +0000400 untouched.erase(baseLayer->GetGuid());
401 untouched.erase(activationLayer->GetGuid());
Mike Kelly07810fc2020-11-12 10:58:48 +0000402 }
403 }
404 }
405 }
406 }
407 }
408 }
409 }
Mike Kelly1ac690a2020-11-17 11:41:38 +0000410
Mike Kelly07810fc2020-11-12 10:58:48 +0000411 if (optimizationViews.GetSubstitutions().empty())
412 {
413 optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
414 }
Mike Kelly1ac690a2020-11-17 11:41:38 +0000415 else
416 {
417 ReportUntouchedLayers(optimizationViews, untouched);
418 }
Matteo Martincighc3ba50e2019-05-22 14:28:16 +0100419
420 return optimizationViews;
Matteo Martincighadddddb2019-01-24 14:06:23 +0000421}
422
David Beck9efb57d2018-11-05 13:40:33 +0000423} // namespace armnn