Blame - src/backends/neon/NeonBackend.cpp - ml/armnn

2018-10-04 10:46:04 +0100

[diff] [blame]

1

//

Mike Kelly

3ec3077

2023-03-08 13:47:17 +0000

[diff] [blame]

2

arovir01

2018-10-04 10:46:04 +0100

[diff] [blame]

3

// SPDX-License-Identifier: MIT

4

//

5

6

#include "NeonBackend.hpp"

David Beck

3e9e115

2018-10-17 14:17:50 +0100

[diff] [blame]

7

#include "NeonBackendId.hpp"

Sadik Armagan

045f6be

2020-09-10 13:37:32 +0100

[diff] [blame]

8

#include "NeonBackendModelContext.hpp"

arovir01

a094479

2018-10-11 15:00:58 +0100

[diff] [blame]

9

#include "NeonWorkloadFactory.hpp"

David Beck

111b5d9

2018-11-12 14:59:37 +0000

[diff] [blame]

10

#include "NeonLayerSupport.hpp"

Narumol Prangnawarat

2019-08-14 12:25:50 +0100

[diff] [blame]

11

#include "NeonTensorHandleFactory.hpp"

Tracy Narine

6440ce8

2023-09-20 14:19:07 +0100

[diff] [blame]

12

#include "NeonBackendOptimizationUtils.hpp"

arovir01

a094479

2018-10-11 15:00:58 +0100

[diff] [blame]

13

Matteo Martincigh

c601aa6

2019-10-29 15:03:22 +0000

[diff] [blame]

14

#include <armnn/BackendRegistry.hpp>

Mike Kelly

2020-11-12 10:58:48 +0000

[diff] [blame]

15

#include <armnn/Descriptors.hpp>

Matteo Martincigh

c601aa6

2019-10-29 15:03:22 +0000

[diff] [blame]

16

Mike Kelly

2020-11-12 10:58:48 +0000

[diff] [blame]

17

#include <aclCommon/ArmComputeSubgraphUtils.hpp>

18

#include <aclCommon/ArmComputeUtils.hpp>

Aron Virginas-Tar

2018-11-12 18:10:43 +0000

[diff] [blame]

19

#include <aclCommon/BaseMemoryManager.hpp>

20

Matteo Martincigh

e5b8eb9

2019-11-28 15:45:42 +0000

[diff] [blame]

21

#include <armnn/backends/IBackendContext.hpp>

22

#include <armnn/backends/IMemoryManager.hpp>

Aron Virginas-Tar

2018-11-12 18:10:43 +0000

[diff] [blame]

23

Jan Eilers

3c9e045

2020-04-10 13:00:44 +0100

[diff] [blame]

24

#include <armnn/utility/PolymorphicDowncast.hpp>

25

Francis Murtagh

e8d7ccb

2021-10-14 17:30:24 +0100

[diff] [blame]

26

#include <neon/workloads/NeonAdditionWorkload.hpp>

27

#include <neon/workloads/NeonBatchNormalizationWorkload.hpp>

28

#include <neon/workloads/NeonConvolution2dWorkload.hpp>

29

#include <neon/workloads/NeonDepthwiseConvolutionWorkload.hpp>

30

#include <neon/workloads/NeonDivisionWorkload.hpp>

31

#include <neon/workloads/NeonFullyConnectedWorkload.hpp>

Tracy Narine

6440ce8

2023-09-20 14:19:07 +0100

[diff] [blame]

32

#include <neon/workloads/NeonFusedWorkload.hpp>

Francis Murtagh

e8d7ccb

2021-10-14 17:30:24 +0100

[diff] [blame]

33

#include <neon/workloads/NeonMultiplicationWorkload.hpp>

34

#include <neon/workloads/NeonReduceWorkload.hpp>

35

#include <neon/workloads/NeonSubtractionWorkload.hpp>

36

#include <backendsCommon/DefaultAllocator.hpp>

Mike Kelly

2020-11-12 10:58:48 +0000

[diff] [blame]

37

David Beck

263e349

2018-11-09 14:46:40 +0000

[diff] [blame]

38

#include <Optimizer.hpp>

arovir01

a094479

2018-10-11 15:00:58 +0100

[diff] [blame]

39

Mike Kelly

2020-11-12 10:58:48 +0000

[diff] [blame]

40

#include <arm_compute/core/Types.h>

Aron Virginas-Tar

2018-11-12 18:10:43 +0000

[diff] [blame]

41

#include <arm_compute/runtime/Allocator.h>

42

arovir01

2018-10-04 10:46:04 +0100

[diff] [blame]

namespace armnn

{

David Beck

2018-10-12 10:38:31 +0100

[diff] [blame]

46

const BackendId& NeonBackend::GetIdStatic()

arovir01

2018-10-04 10:46:04 +0100

[diff] [blame]

47

{

David Beck

3e9e115

2018-10-17 14:17:50 +0100

[diff] [blame]

48

static const BackendId s_Id{NeonBackendId()};

arovir01

2018-10-04 10:46:04 +0100

[diff] [blame]

return s_Id;

}

Aron Virginas-Tar

2018-11-12 18:10:43 +0000

[diff] [blame]

52

IBackendInternal::IMemoryManagerUniquePtr NeonBackend::CreateMemoryManager() const

arovir01

2018-10-04 10:46:04 +0100

[diff] [blame]

53

{

Aron Virginas-Tar

2018-11-12 18:10:43 +0000

[diff] [blame]

54

return std::make_unique<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),

Sadik Armagan

13a9fa6

2019-04-26 16:04:34 +0100

[diff] [blame]

55

BaseMemoryManager::MemoryAffinity::Offset);

Aron Virginas-Tar

2018-11-12 18:10:43 +0000

[diff] [blame]

56

}

57

58

IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory(

59

const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const

60

{

61

return std::make_unique<NeonWorkloadFactory>(

Jan Eilers

3c9e045

2020-04-10 13:00:44 +0100

[diff] [blame]

62

PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager));

arovir01

2018-10-04 10:46:04 +0100

[diff] [blame]

63

}

64

Narumol Prangnawarat

2019-08-14 12:25:50 +0100

[diff] [blame]

65

IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory(

Sadik Armagan

04a7297

2020-09-14 15:44:18 +0100

[diff] [blame]

66

const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const

67

{

68

return std::make_unique<NeonWorkloadFactory>(

69

PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));

70

}

71

72

IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory(

Narumol Prangnawarat

2019-08-14 12:25:50 +0100

[diff] [blame]

73

class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const

74

{

75

auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),

76

BaseMemoryManager::MemoryAffinity::Offset);

77

78

tensorHandleFactoryRegistry.RegisterMemoryManager(memoryManager);

Narumol Prangnawarat

7740045

2022-01-13 17:43:41 +0000

[diff] [blame]

79

80

auto factory = std::make_unique<NeonTensorHandleFactory>(memoryManager);

81

// Register copy and import factory pair

82

tensorHandleFactoryRegistry.RegisterCopyAndImportFactoryPair(factory->GetId(), factory->GetId());

83

// Register the factory

84

tensorHandleFactoryRegistry.RegisterFactory(std::move(factory));

85

Narumol Prangnawarat

549cb7a

2020-07-10 17:50:53 +0100

[diff] [blame]

86

Narumol Prangnawarat

2019-08-14 12:25:50 +0100

[diff] [blame]

87

return std::make_unique<NeonWorkloadFactory>(

Jan Eilers

3c9e045

2020-04-10 13:00:44 +0100

[diff] [blame]

88

PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager));

Narumol Prangnawarat

2019-08-14 12:25:50 +0100

[diff] [blame]

89

}

90

Sadik Armagan

04a7297

2020-09-14 15:44:18 +0100

[diff] [blame]

91

IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory(

92

TensorHandleFactoryRegistry& tensorHandleFactoryRegistry, const ModelOptions& modelOptions) const

93

{

94

auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),

95

BaseMemoryManager::MemoryAffinity::Offset);

96

97

tensorHandleFactoryRegistry.RegisterMemoryManager(memoryManager);

Narumol Prangnawarat

7740045

2022-01-13 17:43:41 +0000

[diff] [blame]

98

99

auto factory = std::make_unique<NeonTensorHandleFactory>(memoryManager);

100

// Register copy and import factory pair

101

tensorHandleFactoryRegistry.RegisterCopyAndImportFactoryPair(factory->GetId(), factory->GetId());

102

// Register the factory

103

tensorHandleFactoryRegistry.RegisterFactory(std::move(factory));

Sadik Armagan

04a7297

2020-09-14 15:44:18 +0100

[diff] [blame]

104

105

return std::make_unique<NeonWorkloadFactory>(

106

PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));

107

}

108

David Beck

263e349

2018-11-09 14:46:40 +0000

[diff] [blame]

109

IBackendInternal::IBackendContextPtr NeonBackend::CreateBackendContext(const IRuntime::CreationOptions&) const

110

{

111

return IBackendContextPtr{};

112

}

113

Colm Donelan

e49755b

2020-01-29 15:22:43 +0000

[diff] [blame]

114

IBackendInternal::IBackendProfilingContextPtr NeonBackend::CreateBackendProfilingContext(

Colm Donelan

1aff393

2020-02-05 17:48:59 +0000

[diff] [blame]

115

const IRuntime::CreationOptions&, IBackendProfilingPtr&)

Colm Donelan

e49755b

2020-01-29 15:22:43 +0000

[diff] [blame]

116

{

117

return IBackendProfilingContextPtr{};

118

}

119

Sadik Armagan

045f6be

2020-09-10 13:37:32 +0100

[diff] [blame]

120

IBackendInternal::IBackendSpecificModelContextPtr NeonBackend::CreateBackendSpecificModelContext(

121

const ModelOptions& modelOptions) const

122

{

123

return IBackendSpecificModelContextPtr{new NeonBackendModelContext{modelOptions}};

124

}

125

David Beck

111b5d9

2018-11-12 14:59:37 +0000

[diff] [blame]

126

IBackendInternal::ILayerSupportSharedPtr NeonBackend::GetLayerSupport() const

127

{

Sadik Armagan

045f6be

2020-09-10 13:37:32 +0100

[diff] [blame]

128

static ILayerSupportSharedPtr layerSupport

129

{

130

new NeonLayerSupport(IBackendInternal::IBackendSpecificModelContextPtr{})

};

return layerSupport;

}

IBackendInternal::ILayerSupportSharedPtr NeonBackend::GetLayerSupport(const ModelOptions& modelOptions) const

136

{

137

static ILayerSupportSharedPtr layerSupport

138

{

139

new NeonLayerSupport(CreateBackendSpecificModelContext(modelOptions))

140

};

David Beck

111b5d9

2018-11-12 14:59:37 +0000

[diff] [blame]

return layerSupport;

}

Mike Kelly

2022-05-16 23:10:42 +0100

[diff] [blame]

144

OptimizationViews NeonBackend::OptimizeSubgraphView(const SubgraphView& subgraph,

145

const ModelOptions& modelOptions) const

Matteo Martincigh

adddddb

2019-01-24 14:06:23 +0000

[diff] [blame]

146

{

Mike Kelly

80512b0

2022-05-16 23:10:42 +0100

[diff] [blame]

147

OptimizationViews optimizationViews(modelOptions);

Matteo Martincigh

adddddb

2019-01-24 14:06:23 +0000

[diff] [blame]

148

Francis Murtagh

0f3e9a0

2023-07-28 14:29:46 +0100

[diff] [blame]

149

auto it = subgraph.end();

Mike Kelly

2020-11-17 11:41:38 +0000

[diff] [blame]

150

std::map<LayerGuid, Layer*> untouched;

Mike Kelly

2020-11-12 10:58:48 +0000

[diff] [blame]

151

Francis Murtagh

0f3e9a0

2023-07-28 14:29:46 +0100

[diff] [blame]

152

while (it != subgraph.begin())

Mike Kelly

2020-11-12 10:58:48 +0000

[diff] [blame]

153

{

154

--it;

Francis Murtagh

56ccf68

2021-12-13 18:48:12 +0000

[diff] [blame]

155

Layer& base = *(PolymorphicDowncast<Layer*>(*it));

Mike Kelly

2020-11-17 11:41:38 +0000

[diff] [blame]

156

untouched.insert({base.GetGuid(), &base});

157

}

158

Francis Murtagh

0f3e9a0

2023-07-28 14:29:46 +0100

[diff] [blame]

159

it = subgraph.end();

160

while (it != subgraph.begin())

Mike Kelly

2020-11-17 11:41:38 +0000

[diff] [blame]

161

{

162

--it;

Francis Murtagh

56ccf68

2021-12-13 18:48:12 +0000

[diff] [blame]

163

Layer& base = *(PolymorphicDowncast<Layer*>(*it));

Mike Kelly

2020-11-12 10:58:48 +0000

[diff] [blame]

164

Matthew Sloyan

5fc0fd6

2021-05-03 12:22:03 +0100

[diff] [blame]

165

// Fuse activation into previous layer if supported by backend

Mike Kelly

2020-11-12 10:58:48 +0000

[diff] [blame]

166

if ((base.GetType() == LayerType::DepthwiseConvolution2d || base.GetType() == LayerType::Convolution2d

167

|| base.GetType() == LayerType::BatchNormalization || base.GetType() == LayerType::FullyConnected

168

|| base.GetType() == LayerType::Addition || base.GetType() == LayerType::Multiplication

Teresa Charlin

0aa080d

2023-09-19 16:46:54 +0100

[diff] [blame]

169

|| base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division

170

|| base.GetType() == LayerType::ElementwiseBinary)

Mike Kelly

2020-11-12 10:58:48 +0000

[diff] [blame]

171

&& (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr))

172

{

173

for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output)

174

{

175

if (output->GetNumConnections() == 1)

176

{

177

for (auto&& childInput : output->GetConnections())

178

{

Teresa Charlin

d672f5d

2021-01-18 18:07:57 +0000

[diff] [blame]

179

if ((childInput->GetOwningLayer().GetType() == LayerType::Activation) &&

180

(checkDataTypeInputandOutput(childInput->GetOwningLayer())))

Mike Kelly

2020-11-12 10:58:48 +0000

[diff] [blame]

181

{

182

Layer& child = childInput->GetOwningLayer();

183

184

auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);

185

186

const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") +

187

base.GetName();

188

189

// Get params from activation layer

190

ActivationDescriptor activationDesc = activationLayer->GetParameters();

191

192

if (base.GetType() == LayerType::Convolution2d)

193

{

194

Convolution2dLayer* baseLayer = PolymorphicDowncast<Convolution2dLayer*>(&base);

195

196

Optional<TensorInfo> biases;

197

198

if (baseLayer->GetParameters().m_BiasEnabled)

199

{

Keith Davis

b4dd5cc

2022-04-07 11:32:00 +0100

[diff] [blame]

200

biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();

Mike Kelly

2020-11-12 10:58:48 +0000

[diff] [blame]

201

}

202

203

arm_compute::Status status = NeonConvolution2dWorkloadValidate(

204

baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

205

activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

206

baseLayer->GetParameters(),

Keith Davis

b4dd5cc

2022-04-07 11:32:00 +0100

[diff] [blame]

207

baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),

Mike Kelly

2020-11-12 10:58:48 +0000

[diff] [blame]

biases,

false,

&activationDesc);

if (status)

{

Cathal Corbett

2021-12-15 17:12:59 +0000

[diff] [blame]

214

FuseConvolution2dLayer<Convolution2dLayer>(optimizationViews,

baseLayer,

activationLayer,

activationDesc,

name);

Mike Kelly

2020-11-17 11:41:38 +0000

[diff] [blame]

219

untouched.erase(baseLayer->GetGuid());

220

untouched.erase(activationLayer->GetGuid());

Mike Kelly

2020-11-12 10:58:48 +0000

[diff] [blame]

221

}

222

}

223

else if (base.GetType() == LayerType::DepthwiseConvolution2d)

224

{

225

DepthwiseConvolution2dLayer* baseLayer =

226

PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);

227

228

Optional<TensorInfo> biases;

229

230

if (baseLayer->GetParameters().m_BiasEnabled)

231

{

Cathal Corbett

0690265

2022-04-14 17:55:11 +0100

[diff] [blame]

232

biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();

Mike Kelly

2020-11-12 10:58:48 +0000

[diff] [blame]

233

}

234

235

arm_compute::Status status = NeonDepthwiseConvolutionWorkloadValidate(

236

baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

237

activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

238

baseLayer->GetParameters(),

Cathal Corbett

0690265

2022-04-14 17:55:11 +0100

[diff] [blame]

239

baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),

Mike Kelly

2020-11-12 10:58:48 +0000

[diff] [blame]

biases,

&activationDesc);

if (status)

{

Cathal Corbett

2021-12-15 17:12:59 +0000

[diff] [blame]

245

FuseDepthwiseConvolution2dLayer<DepthwiseConvolution2dLayer>(optimizationViews,

baseLayer,

activationLayer,

activationDesc,

name);

Mike Kelly

2020-11-17 11:41:38 +0000

[diff] [blame]

250

untouched.erase(baseLayer->GetGuid());

251

untouched.erase(activationLayer->GetGuid());

Mike Kelly

2020-11-12 10:58:48 +0000

[diff] [blame]

252

}

253

}

254

else if (base.GetType() == LayerType::FullyConnected)

255

{

256

FullyConnectedLayer* baseLayer = PolymorphicDowncast<FullyConnectedLayer*>(&base);

Cathal Corbett

2022-05-13 09:55:59 +0100

[diff] [blame]

257

FullyConnectedDescriptor descriptor = baseLayer->GetParameters();

Matthew Bentham

2022-02-08 15:03:07 +0000

[diff] [blame]

258

Cathal Corbett

2022-05-13 09:55:59 +0100

[diff] [blame]

259

// As bias is optional only try to get TensorInfo from input if bias is enabled.

260

Optional<TensorInfo> biases;

261

if (descriptor.m_BiasEnabled)

Matthew Bentham

2022-02-08 15:03:07 +0000

[diff] [blame]

262

{

Cathal Corbett

2022-05-13 09:55:59 +0100

[diff] [blame]

263

biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();

Matthew Bentham

2022-02-08 15:03:07 +0000

[diff] [blame]

264

}

Mike Kelly

2020-11-12 10:58:48 +0000

[diff] [blame]

265

266

arm_compute::Status status = NeonFullyConnectedWorkloadValidate(

267

baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

268

activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

Cathal Corbett

2022-05-13 09:55:59 +0100

[diff] [blame]

269

baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),

Matthew Bentham

2022-02-08 15:03:07 +0000

[diff] [blame]

270

biases,

Mike Kelly

2020-11-12 10:58:48 +0000

[diff] [blame]

271

baseLayer->GetParameters(),

&activationDesc);

if (status)

{

Cathal Corbett

2021-12-15 17:12:59 +0000

[diff] [blame]

276

FuseFullyConnectedLayer<FullyConnectedLayer>(optimizationViews,

baseLayer,

activationLayer,

activationDesc,

name);

Mike Kelly

2020-11-17 11:41:38 +0000

[diff] [blame]

281

untouched.erase(baseLayer->GetGuid());

282

untouched.erase(activationLayer->GetGuid());

Mike Kelly

2020-11-12 10:58:48 +0000

[diff] [blame]

283

}

284

}

285

else if (base.GetType() == LayerType::BatchNormalization)

286

{

287

BatchNormalizationLayer* baseLayer =

288

PolymorphicDowncast<BatchNormalizationLayer*>(&base);

289

290

arm_compute::Status status = NeonBatchNormalizationValidate(

291

baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

292

activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

293

baseLayer->m_Mean->GetTensorInfo(),

294

baseLayer->m_Variance->GetTensorInfo(),

295

baseLayer->m_Beta->GetTensorInfo(),

296

baseLayer->m_Gamma->GetTensorInfo(),

297

baseLayer->GetParameters(),

&activationDesc);

if (status)

{

BatchNormalizationLayer* replacementLayer =

Cathal Corbett

2021-12-15 17:12:59 +0000

[diff] [blame]

303

FuseBatchNormalizationLayer<BatchNormalizationLayer>(optimizationViews,

baseLayer,

activationLayer,

activationDesc,

name);

Mike Kelly

2020-11-12 10:58:48 +0000

[diff] [blame]

308

309

replacementLayer->m_Beta = std::move(baseLayer->m_Beta);

310

replacementLayer->m_Gamma = std::move(baseLayer->m_Gamma);

311

replacementLayer->m_Mean = std::move(baseLayer->m_Mean);

312

replacementLayer->m_Variance = std::move(baseLayer->m_Variance);

Mike Kelly

2020-11-17 11:41:38 +0000

[diff] [blame]

313

untouched.erase(baseLayer->GetGuid());

314

untouched.erase(activationLayer->GetGuid());

Mike Kelly

2020-11-12 10:58:48 +0000

[diff] [blame]

315

}

316

}

317

else if (base.GetType() == LayerType::Addition)

318

{

319

AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);

320

321

arm_compute::Status status = NeonAdditionWorkloadValidate(

322

baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

323

baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),

324

activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

&activationDesc);

if (status)

{

Cathal Corbett

2021-12-15 17:12:59 +0000

[diff] [blame]

329

FuseAdditionLayer<AdditionLayer>(optimizationViews,

baseLayer,

activationLayer,

activationDesc,

name);

Mike Kelly

2020-11-17 11:41:38 +0000

[diff] [blame]

334

untouched.erase(baseLayer->GetGuid());

335

untouched.erase(activationLayer->GetGuid());

Mike Kelly

2020-11-12 10:58:48 +0000

[diff] [blame]

336

}

337

}

338

else if (base.GetType() == LayerType::Division)

339

{

340

DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);

341

342

arm_compute::Status status = NeonDivisionWorkloadValidate(

343

baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

344

baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),

345

activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

&activationDesc);

if (status)

{

Cathal Corbett

2021-12-15 17:12:59 +0000

[diff] [blame]

350

FuseDivisionLayer<DivisionLayer>(optimizationViews,

baseLayer,

activationLayer,

activationDesc,

name);

Mike Kelly

2020-11-17 11:41:38 +0000

[diff] [blame]

355

untouched.erase(baseLayer->GetGuid());

356

untouched.erase(activationLayer->GetGuid());

Mike Kelly

2020-11-12 10:58:48 +0000

[diff] [blame]

357

}

358

}

359

else if (base.GetType() == LayerType::Multiplication)

360

{

361

MultiplicationLayer* baseLayer = PolymorphicDowncast<MultiplicationLayer*>(&base);

362

363

arm_compute::Status status = NeonMultiplicationWorkloadValidate(

364

baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

365

baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),

366

activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

&activationDesc);

if (status)

{

Cathal Corbett

2021-12-15 17:12:59 +0000

[diff] [blame]

371

FuseMultiplicationLayer<MultiplicationLayer>(optimizationViews,

baseLayer,

activationLayer,

activationDesc,

name);

Mike Kelly

2020-11-17 11:41:38 +0000

[diff] [blame]

376

untouched.erase(baseLayer->GetGuid());

377

untouched.erase(activationLayer->GetGuid());

Mike Kelly

2020-11-12 10:58:48 +0000

[diff] [blame]

378

}

379

}

380

else if (base.GetType() == LayerType::Subtraction)

381

{

382

SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);

383

384

arm_compute::Status status = NeonSubtractionWorkloadValidate(

385

baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

386

baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),

387

activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

&activationDesc);

if (status)

{

Cathal Corbett

2021-12-15 17:12:59 +0000

[diff] [blame]

392

FuseSubtractionLayer<SubtractionLayer>(optimizationViews,

baseLayer,

activationLayer,

activationDesc,

name);

Mike Kelly

2020-11-17 11:41:38 +0000

[diff] [blame]

397

untouched.erase(baseLayer->GetGuid());

398

untouched.erase(activationLayer->GetGuid());

Mike Kelly

2020-11-12 10:58:48 +0000

[diff] [blame]

399

}

400

}

Mike Kelly

3ec3077

2023-03-08 13:47:17 +0000

[diff] [blame]

401

else if (base.GetType() == LayerType::ElementwiseBinary)

402

{

403

ElementwiseBinaryLayer* baseLayer = PolymorphicDowncast<ElementwiseBinaryLayer*>(&base);

404

405

if (baseLayer->GetParameters().m_Operation == BinaryOperation::Add)

406

{

407

arm_compute::Status status = NeonAdditionWorkloadValidate(

408

baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

409

baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),

410

activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

&activationDesc);

if (status)

{

FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,

baseLayer,

activationLayer,

activationDesc,

BinaryOperation::Add,

420

name);

421

untouched.erase(baseLayer->GetGuid());

422

untouched.erase(activationLayer->GetGuid());

423

}

424

}

425

else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Div)

426

{

427

arm_compute::Status status = NeonDivisionWorkloadValidate(

428

baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

429

baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),

430

activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

&activationDesc);

if (status)

{

FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,

baseLayer,

activationLayer,

activationDesc,

BinaryOperation::Div,

440

name);

441

untouched.erase(baseLayer->GetGuid());

442

untouched.erase(activationLayer->GetGuid());

443

}

444

}

445

else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Mul)

446

{

447

arm_compute::Status status = NeonMultiplicationWorkloadValidate(

448

baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

449

baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),

450

activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

&activationDesc);

if (status)

{

FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,

baseLayer,

activationLayer,

activationDesc,

BinaryOperation::Mul,

460

name);

461

untouched.erase(baseLayer->GetGuid());

462

untouched.erase(activationLayer->GetGuid());

463

}

464

}

465

else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Sub)

466

{

467

arm_compute::Status status = NeonSubtractionWorkloadValidate(

468

baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

469

baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),

470

activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

&activationDesc);

if (status)

{

FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,

baseLayer,

activationLayer,

activationDesc,

BinaryOperation::Sub,

480

name);

481

untouched.erase(baseLayer->GetGuid());

482

untouched.erase(activationLayer->GetGuid());

483

}

484

}

485

// No fusion available for other BinaryOperations

486

}

Mike Kelly

2020-11-12 10:58:48 +0000

[diff] [blame]

}

}

}

}

}

Matthew Sloyan

5fc0fd6

2021-05-03 12:22:03 +0100

[diff] [blame]

492

493

// Separate reduce layer with multiple axes into multiple reduce layers with 1 axis.

494

if (base.GetType() == LayerType::Reduce)

495

{

496

ReduceLayer* baseLayer = PolymorphicDowncast<ReduceLayer*>(&base);

497

ReduceDescriptor reduceDescriptor = baseLayer->GetParameters();

498

499

if (!reduceDescriptor.m_vAxis.empty() && reduceDescriptor.m_vAxis.size() > 1)

500

{

501

// Add new layers to the graph and connect them.

Francis Murtagh

56ccf68

2021-12-13 18:48:12 +0000

[diff] [blame]

502

std::vector<IConnectableLayer*> layers = ChainReduceLayers<ReduceLayer>(optimizationViews,

503

baseLayer,

504

reduceDescriptor);

Matthew Sloyan

5fc0fd6

2021-05-03 12:22:03 +0100

[diff] [blame]

505

506

// Replace existing baselayer with new subgraph.

507

ReplaceLayers<ReduceLayer>(optimizationViews, baseLayer, layers);

508

untouched.erase(baseLayer->GetGuid());

509

}

510

}

Mike Kelly

2023-07-07 15:43:06 +0100

[diff] [blame]

511

512

// Remove Reshape where possible

513

if (base.GetType() == LayerType::Reshape)

514

{

515

ReshapeLayer* baseLayer = PolymorphicDowncast<ReshapeLayer*>(&base);

Mike Kelly

2023-07-07 15:43:06 +0100

[diff] [blame]

516

Mike Kelly

be06f10

2023-07-17 17:49:55 +0100

[diff] [blame]

517

// Cannot remove a Reshape if it's connected to any layer that has an NCHW layout

518

if (ConnectedToLayerWithNCHW(baseLayer))

Mike Kelly

2023-07-07 15:43:06 +0100

[diff] [blame]

519

{

520

continue;

521

}

Mike Kelly

2023-07-07 15:43:06 +0100

[diff] [blame]

522

RemoveReshapeLayer(baseLayer, untouched, optimizationViews);

523

}

Tracy Narine

6440ce8

2023-09-20 14:19:07 +0100

[diff] [blame]

524

525

// Replace Add/Mul/Add where possible

526

Layer* layerList[4] = {nullptr, nullptr, nullptr, nullptr};

527

const std::vector<ActivationFunction> validActivates = { ActivationFunction::ReLu,

528

ActivationFunction::BoundedReLu };

529

if (IsLayerSequence<BinaryOperation>(base,

530

BinaryOperation::Add, BinaryOperation::Mul, BinaryOperation::Add,

531

layerList,

532

true, // handleValidActivates

533

validActivates))

534

{

535

bool fuseReLu = false;

536

unsigned int numInputs = 0;

537

unsigned int numOutputs = 0;

538

std::vector<TensorInfo> inputInfos;

539

std::vector<TensorInfo> outputInfos;

540

const ActivationDescriptor* activationDescriptor = nullptr;

541

542

if (BuildAddMulAddTensorInfoLists<Layer>(layerList,

numInputs,

numOutputs,

inputInfos,

outputInfos,

activationDescriptor,

548

fuseReLu))

549

{

550

// Create the new Add/Mul/Add layer and set the Relu activation function

551

FusedDescriptor fusedDescriptor(numInputs, numOutputs, FusedKernelType::AddMulAdd);

552

arm_compute::Status status = NeonFusedWorkloadValidate({inputInfos.begin(), inputInfos.end()},

553

{outputInfos.begin(), outputInfos.end()},

554

fusedDescriptor,

555

activationDescriptor);

556

if (status)

557

{

558

std::string fusedName;

559

GetFusedName(layerList, fusedName);

560

561

IConnectableLayer* addMulAddLayer =

562

optimizationViews.GetINetwork()->AddFusedLayer(fusedDescriptor, fusedName.c_str());

if (fuseReLu)

{

FusedLayer* addMulAddFusedLayer = PolymorphicDowncast<FusedLayer*>(addMulAddLayer);

567

addMulAddFusedLayer->SetAdditionalInfoForObject(

568

std::make_shared<ActivationDescriptor>(*activationDescriptor));

}

// Update the graph

std::vector<IConnectableLayer*> originalLayers;

573

for (unsigned int layerIdx = 0; layerIdx < 4; ++layerIdx)

574

{

575

if (layerList[layerIdx])

576

{

577

originalLayers.push_back(layerList[layerIdx]);

}

}

std::vector<SlotList> inputLayersSlotLists, outputLayersSlotLists;

582

BuildAddMulAddSlotLists<SlotList>(fuseReLu,

583

outputInfos.size() > 1,

584

inputLayersSlotLists,

585

outputLayersSlotLists);

586

587

ReplaceMultipleLayers<FusedLayer>(optimizationViews,

588

originalLayers,

589

PolymorphicDowncast<FusedLayer*>(addMulAddLayer),

590

inputLayersSlotLists,

591

outputLayersSlotLists);

592

593

// Remove unused layers

594

for (unsigned int layerIdx = 0; layerIdx < 4; ++layerIdx)

595

{

596

if (layerList[layerIdx])

597

{

598

untouched.erase(layerList[layerIdx]->GetGuid());

}

}

}

}

}

Mike Kelly

2020-11-12 10:58:48 +0000

[diff] [blame]

604

}

605

Mike Kelly

2023-07-07 15:43:06 +0100

[diff] [blame]

606

if (optimizationViews.GetSubstitutions().empty() && optimizationViews.GetDeletedSubgraphs().empty())

Mike Kelly

2020-11-12 10:58:48 +0000

[diff] [blame]

607

{

608

optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));

609

}

Mike Kelly

2020-11-17 11:41:38 +0000

[diff] [blame]

610

else

611

{

612

ReportUntouchedLayers(optimizationViews, untouched);

613

}

Matteo Martincigh

c3ba50e

2019-05-22 14:28:16 +0100

[diff] [blame]

614

615

return optimizationViews;

Matteo Martincigh

adddddb

2019-01-24 14:06:23 +0000

[diff] [blame]

616

}

617

Narumol Prangnawarat

2019-08-14 12:25:50 +0100

[diff] [blame]

618

std::vector<ITensorHandleFactory::FactoryId> NeonBackend::GetHandleFactoryPreferences() const

619

{

Narumol Prangnawarat

265e53e

2020-10-30 16:06:55 +0000

[diff] [blame]

620

return std::vector<ITensorHandleFactory::FactoryId>() = { NeonTensorHandleFactory::GetIdStatic() };

Narumol Prangnawarat

2019-08-14 12:25:50 +0100

[diff] [blame]

621

}

622

623

void NeonBackend::RegisterTensorHandleFactories(class TensorHandleFactoryRegistry& registry)

624

{

625

auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),

626

BaseMemoryManager::MemoryAffinity::Offset);

627

628

registry.RegisterMemoryManager(memoryManager);

Narumol Prangnawarat

7740045

2022-01-13 17:43:41 +0000

[diff] [blame]

629

630

auto factory = std::make_unique<NeonTensorHandleFactory>(memoryManager);

631

// Register copy and import factory pair

632

registry.RegisterCopyAndImportFactoryPair(factory->GetId(), factory->GetId());

633

// Register the factory

634

registry.RegisterFactory(std::move(factory));

Narumol Prangnawarat