Blame - src/graph/mutators/NodeFusionMutator.cpp - ml/ComputeLibrary

2021-11-03 21:24:00 +0000

[diff] [blame]

209

auto fused_node = g.node(fused_id);

210

auto bn_node_name = bn_node->name();

Manuel Bottini

bffb41e

2019-06-20 16:00:27 +0100

[diff] [blame]

211

Sheri Zhang

2021-11-03 21:24:00 +0000

[diff] [blame]

212

transfer_driving_nodes_and_remove_old_node(g, fused_node, bn_node, true);

Manuel Bottini

bffb41e

2019-06-20 16:00:27 +0100

[diff] [blame]

213

Manuel Bottini

bffb41e

2019-06-20 16:00:27 +0100

[diff] [blame]

214

fused_node->set_assigned_target(assigned_target);

215

fused_node->set_common_node_parameters(NodeParams{ depth_conv_node->name() + "+" + bn_node_name, assigned_target });

216

217

// Remove convolution node

218

g.remove_node(depth_conv_node->id());

}

else

{

ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of depthwise convolution with batch normalization due to the presence of an output accessor\n");

}

}

Georgios Pinitas

2018-10-16 19:10:46 +0100

[diff] [blame]

226

template <typename N>

giuros01

2019-02-21 17:32:34 +0000

[diff] [blame]

227

void fuse_node_with_activation(Graph &g, const Edge *output_edge, const std::set<Activation> &supported_fused_activations)

228

{

229

ARM_COMPUTE_ERROR_ON(output_edge == nullptr);

230

231

auto *n_node = arm_compute::utils::cast::polymorphic_downcast<N *>(output_edge->producer());

232

auto *act_node = arm_compute::utils::cast::polymorphic_downcast<ActivationLayerNode *>(output_edge->consumer());

233

234

ARM_COMPUTE_ERROR_ON(act_node->output(0) == nullptr || n_node->output(0) == nullptr);

235

236

// Check if activation is supported for fusion

237

if(supported_fused_activations.count(act_node->activation_info().activation()) == 0)

{

return;

}

Sheri Zhang

2020-05-27 15:03:48 +0100

[diff] [blame]

242

// EltwiseLayerNode can only be fused when dataype is float

243

if(n_node->type() == NodeType::EltwiseLayer && !is_data_type_float(n_node->output(0)->desc().data_type))

{

return;

}

giuros01

2019-02-21 17:32:34 +0000

[diff] [blame]

248

ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing node with ID : " << output_edge->producer_id()

249

<< " with Activation Layer node with ID : " << output_edge->consumer_id() << std::endl);

250

251

// Prevent fusion if fused node has an output accessor

252

if(n_node->output(0)->accessor() == nullptr)

253

{

giuros01

2019-02-21 17:32:34 +0000

[diff] [blame]

254

// Set activation info to fused node

255

n_node->set_fused_activation(act_node->activation_info());

256

Sheri Zhang

2021-11-03 21:24:00 +0000

[diff] [blame]

257

transfer_driving_nodes_and_remove_old_node(g, n_node, act_node, false);

giuros01

2019-02-21 17:32:34 +0000

[diff] [blame]

}

else

{

ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of node with activation due to the presence of an output accessor\n");

}

}

Gunes Bayir

2021-09-01 16:20:54 +0100

[diff] [blame]

265

template <typename N>

266

void fuse_pad_with_convolution(Graph &g, const Edge *output_edge)

267

{

268

auto *pad_node = arm_compute::utils::cast::polymorphic_downcast<PadLayerNode *>(output_edge->producer());

269

auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<N *>(output_edge->consumer());

270

271

const Edge *input_edge = pad_node->input_edge(0);

272

if(input_edge != nullptr && input_edge->tensor() != nullptr && pad_node->output(0)->accessor() == nullptr

273

&& pad_node->pad_value().get<float>() == 0.0)

274

{

275

const DataLayout layout = input_edge->tensor()->desc().layout;

276

const PaddingList padding_list = pad_node->padding();

Gunes Bayir

814bddf

2021-09-01 16:20:54 +0100

[diff] [blame]

277

Gunes Bayir

cc171f9

2021-09-13 13:38:29 +0100

[diff] [blame]

278

const unsigned int height_index = get_dimension_idx(layout, DataLayoutDimension::HEIGHT);

279

const unsigned int width_index = get_dimension_idx(layout, DataLayoutDimension::WIDTH);

280

281

const PaddingInfo pad_w = width_index < padding_list.size() ? padding_list[width_index] : PaddingInfo(0, 0);

282

const PaddingInfo pad_h = height_index < padding_list.size() ? padding_list[height_index] : PaddingInfo(0, 0);

283

284

if(is_padding_in_height_or_width(layout, padding_list))

Gunes Bayir

814bddf

2021-09-01 16:20:54 +0100

[diff] [blame]

285

{

286

// Add paddings to the convolution node

287

const PadStrideInfo conv_info = conv_node->convolution_info();

288

const PadStrideInfo new_conv_info(

289

conv_info.stride().first,

290

conv_info.stride().second,

291

conv_info.pad_left() + pad_w.first,

292

conv_info.pad_right() + pad_w.second,

293

conv_info.pad_top() + pad_h.first,

294

conv_info.pad_bottom() + pad_h.second,

295

conv_info.round());

296

conv_node->set_convolution_info(new_conv_info);

297

298

// Update drivers of the convolution node

299

std::vector<NodeIdxPair> pad_driver_nodes = get_driver_nodes(*pad_node);

300

g.remove_node(pad_node->id());

301

302

// Update fused node inputs

303

for(auto &driver_node : pad_driver_nodes)

304

{

305

g.add_connection(driver_node.node_id, driver_node.index, conv_node->id(), 0);

}

}

}

}

giuros01

2019-02-21 17:32:34 +0000

[diff] [blame]

311

template <typename N1, typename N2, typename F, typename... Args>

312

void fuse_layer(Graph &g, std::function<bool(INode &)> const &prec, const F fuse_fcn, Args &&... optional_arguments)

Georgios Pinitas

2017-12-22 15:27:52 +0000

[diff] [blame]

313

{

SiCongLi

bc91297

2021-05-25 14:29:21 +0100

[diff] [blame]

314

// Note that fused nodes may be added to the end of the node list.

315

// Instead of only looping over the original list of nodes, we loop over the current node list which could be growing.

316

// This is intentional as it probes the newly added fused nodes for further fusing opportunities.

317

for(unsigned int i = 0; i < g.nodes().size(); ++i)

Georgios Pinitas

2017-12-22 15:27:52 +0000

[diff] [blame]

318

{

SiCongLi

bc91297

2021-05-25 14:29:21 +0100

[diff] [blame]

319

auto node = g.node(i);

Sheri Zhang

2021-11-02 10:45:07 +0000

[diff] [blame]

320

// Check if the node is of type N1 and not a branching node

giuros01

2019-02-21 17:32:34 +0000

[diff] [blame]

321

if(node && node->type() == N1::node_type && node->output_edges().size() == 1)

Georgios Pinitas

2017-12-22 15:27:52 +0000

[diff] [blame]

322

{

giuros01

2019-02-21 17:32:34 +0000

[diff] [blame]

323

const auto output_edge_id = *node->output_edges().begin();

324

const auto output_edge = g.edge(output_edge_id);

325

Sheri Zhang

2021-11-02 10:45:07 +0000

[diff] [blame]

326

// Check if following node is a type N2 node

giuros01

2019-02-21 17:32:34 +0000

[diff] [blame]

327

if((output_edge != nullptr) && (output_edge->consumer() != nullptr) && (output_edge->consumer()->type() == N2::node_type) && prec(*output_edge->producer()))

Georgios Pinitas

2017-12-22 15:27:52 +0000

[diff] [blame]

328

{

giuros01

2019-02-21 17:32:34 +0000

[diff] [blame]

329

fuse_fcn(g, output_edge, optional_arguments...);

Georgios Pinitas

2017-12-22 15:27:52 +0000

[diff] [blame]

}

}

}

}

Sheri Zhang

2021-11-02 10:45:07 +0000

[diff] [blame]

334

Sheri Zhang

2021-11-03 21:24:00 +0000

[diff] [blame]

335

/** Check valid combinations:

336

*

337

* | Main operator | Post operators |

338

* |:--------------|:---------------------------|

339

* |conv | add |

340

* |conv | act + add |

341

* |conv | add + act |

342

* |conv | act + add + act |

343

*

344

*/

345

#define MAX_VALIDE_COMBINATION 4

346

#define MAX_POST_OP_NUM 3

347

NodeType valide_post_op_type[MAX_VALIDE_COMBINATION][MAX_POST_OP_NUM] = { { EltwiseLayerNode::node_type },

348

{ EltwiseLayerNode::node_type, ActivationLayerNode::node_type },

349

{ ActivationLayerNode::node_type, EltwiseLayerNode::node_type },

350

{ ActivationLayerNode::node_type, EltwiseLayerNode::node_type, ActivationLayerNode::node_type }

351

};

352

353

bool check_post_op_type(NodeType *post_op_type, int len)

354

{

355

if(len > MAX_POST_OP_NUM || len <= 0)

{

return false;

}

bool found = false;

for(int i = 0; i < MAX_VALIDE_COMBINATION; ++i)

362

{

363

for(int j = 0; j < len; ++j)

364

{

365

if(post_op_type[j] != valide_post_op_type[i][j])

{

found = false;

break;

}

found = true;

}

if(found)

break;

}

return found;

}

void fuse_convolution_with_post_op(Graph &g, INode *fused_node, std::list<INode *> post_op_node_list, int prev_op_dst_pos)

380

{

381

unsigned int op_idx = 0;

382

// Fuse post operators with conv

383

for(const auto &post_op : post_op_node_list)

384

{

385

switch(post_op->type())

386

{

387

case EltwiseLayerNode::node_type:

388

{

389

auto *eltwise_node = arm_compute::utils::cast::polymorphic_downcast<EltwiseLayerNode *>(post_op);

390

ARM_COMPUTE_ERROR_ON(eltwise_node->output(0) == nullptr);

391

392

fused_node->post_op_info_list().push_back(std::make_unique<ConvPostOpInfoEltwiseAdd>(prev_op_dst_pos, eltwise_node->convert_policy()));

393

ARM_COMPUTE_LOG_GRAPH_VERBOSE(" with Elementwise Layer node with ID : " << post_op->id());

394

break;

395

}

396

case ActivationLayerNode::node_type:

397

{

398

auto *act_node = arm_compute::utils::cast::polymorphic_downcast<ActivationLayerNode *>(post_op);

399

ARM_COMPUTE_ERROR_ON(act_node->output(0) == nullptr);

400

401

fused_node->post_op_info_list().push_back(std::make_unique<ConvPostOpInfoActivation>(act_node->activation_info()));

402

ARM_COMPUTE_LOG_GRAPH_VERBOSE(" with Activation Layer node with ID : " << post_op->id());

break;

}

default:

{

break;

}

}

if(op_idx == post_op_node_list.size() - 1) // last fusable node

412

{

413

transfer_driving_nodes_and_remove_old_node(g, fused_node, post_op, true);

}

else

{

// Remove node

g.remove_node(post_op->id());

}

op_idx++;

}

}

ramelg01

2021-11-26 19:12:40 +0000

[diff] [blame]

424

std::list<INode *> get_post_op_list(Graph &g, int &eltwise_operand_id, int &prev_op_dst_pos, unsigned int conv_node_id, const std::set<Activation> &supported_fused_activations)

Sheri Zhang

2021-11-03 21:24:00 +0000

[diff] [blame]

425

{

426

std::list<INode *> post_op_node_list = {};

427

NodeID prev_op_dst_id = conv_node_id;

428

NodeType post_op_type_list[3] = { NodeType::Dummy, NodeType::Dummy, NodeType::Dummy };

429

int post_op_idx = 0;

ramelg01

2021-11-26 19:12:40 +0000

[diff] [blame]

430

431

// Get list of the connected nodes

432

auto current_node = g.node(conv_node_id);

433

434

while(post_op_node_list.size() < 3)

Sheri Zhang

2021-11-03 21:24:00 +0000

[diff] [blame]

435

{

ramelg01

2021-11-26 19:12:40 +0000

[diff] [blame]

436

// This convolution node must have only one output edge, otherwise this function would not have been called

437

438

auto current_output_edge_id = current_node->output_edges().begin();

439

auto current_output_edge = g.edge(*current_output_edge_id);

440

auto post_op_node = current_output_edge->consumer();

441

Sheri Zhang

2021-11-03 21:24:00 +0000

[diff] [blame]

442

bool fusable_post_op = false;

443

if(post_op_node != nullptr && post_op_node->output_edges().size() > 0)

444

{

ramelg01

2021-11-26 19:12:40 +0000

[diff] [blame]

445

switch(post_op_node->type())

Sheri Zhang

2021-11-03 21:24:00 +0000

[diff] [blame]

446

{

ramelg01

2021-11-26 19:12:40 +0000

[diff] [blame]

447

case EltwiseLayerNode::node_type:

Sheri Zhang

2021-11-03 21:24:00 +0000

[diff] [blame]

448

{

ramelg01

2021-11-26 19:12:40 +0000

[diff] [blame]

449

auto *eltwise_node = arm_compute::utils::cast::polymorphic_downcast<EltwiseLayerNode *>(post_op_node);

450

ARM_COMPUTE_ERROR_ON(eltwise_node->output(0) == nullptr);

451

if(eltwise_node->output(0)->accessor() == nullptr)

Sheri Zhang

2021-11-03 21:24:00 +0000

[diff] [blame]

452

{

ramelg01

2021-11-26 19:12:40 +0000

[diff] [blame]

453

post_op_node_list.push_back(post_op_node);

454

fusable_post_op = true;

455

post_op_type_list[post_op_idx++] = eltwise_node->type();

Sheri Zhang

2021-11-03 21:24:00 +0000

[diff] [blame]

456

ramelg01

2021-11-26 19:12:40 +0000

[diff] [blame]

457

// Extract elementwise inputs

458

const auto eltwise_input_id_0 = eltwise_node->input_edge(0)->producer_id();

459

const auto eltwise_input_id_1 = eltwise_node->input_edge(1)->producer_id();

460

if(eltwise_input_id_0 == prev_op_dst_id)

Sheri Zhang

2021-11-03 21:24:00 +0000

[diff] [blame]

461

{

ramelg01

2021-11-26 19:12:40 +0000

[diff] [blame]

462

eltwise_operand_id = eltwise_input_id_1;

463

prev_op_dst_pos = 0;

Sheri Zhang

2021-11-03 21:24:00 +0000

[diff] [blame]

464

}

ramelg01

2021-11-26 19:12:40 +0000

[diff] [blame]

465

else if(eltwise_input_id_1 == prev_op_dst_id)

466

{

467

eltwise_operand_id = eltwise_input_id_0;

468

prev_op_dst_pos = 1;

469

}

Sheri Zhang

2021-11-03 21:24:00 +0000

[diff] [blame]

470

}

ramelg01

2021-11-26 19:12:40 +0000

[diff] [blame]

471

else

Sheri Zhang

2021-11-03 21:24:00 +0000

[diff] [blame]

472

{

ramelg01

2021-11-26 19:12:40 +0000

[diff] [blame]

473

ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of convolution node with elementwise due to the presence of an output accessor\n");

Sheri Zhang

2021-11-03 21:24:00 +0000

[diff] [blame]

474

}

ramelg01

2021-11-26 19:12:40 +0000

[diff] [blame]

475

break;

476

}

477

case ActivationLayerNode::node_type:

478

{

479

auto *act_node = arm_compute::utils::cast::polymorphic_downcast<ActivationLayerNode *>(post_op_node);

480

ARM_COMPUTE_ERROR_ON(act_node->output(0) == nullptr);

481

// Check if activation is supported for fusion

482

if(supported_fused_activations.count(act_node->activation_info().activation()) == 0)

Sheri Zhang

2021-11-03 21:24:00 +0000

[diff] [blame]

483

{

484

break;

485

}

ramelg01

2021-11-26 19:12:40 +0000

[diff] [blame]

486

if(act_node->output(0)->accessor() == nullptr)

487

{

488

post_op_node_list.push_back(post_op_node);

489

fusable_post_op = true;

490

post_op_type_list[post_op_idx++] = act_node->type();

491

prev_op_dst_id = act_node->id();

}

else

{

ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of convolution node with post ops due to the presence of an output accessor\n");

}

break;

}

default:

{

break;

Sheri Zhang

2021-11-03 21:24:00 +0000

[diff] [blame]

}

}

// Check if the node is not a branching node and current node is fusable

ramelg01

2021-11-26 19:12:40 +0000

[diff] [blame]

506

if(post_op_node->output_edges().size() == 1 && fusable_post_op == true)

Sheri Zhang

2021-11-03 21:24:00 +0000

[diff] [blame]

507

{

ramelg01

2021-11-26 19:12:40 +0000

[diff] [blame]

508

current_node = post_op_node;

Sheri Zhang

2021-11-03 21:24:00 +0000

[diff] [blame]

}

else

{

break;

}

}

}

// Check whether it's valid post op list

518

if(post_op_node_list.size() > 0)

519

{

520

bool fuse_with_post_op = check_post_op_type(post_op_type_list, post_op_node_list.size());

521

if(!fuse_with_post_op)

522

{

523

post_op_node_list.clear();

}

}

return post_op_node_list;

528

}

529

Sheri Zhang

2021-11-02 10:45:07 +0000

[diff] [blame]

530

/** Fuse below operators:

531

*

532

* | Main operator | Post operators |

533

* |:--------------|:---------------------------|

534

* |conv | add |

535

* |conv | act + add |

536

* |conv | add + act |

537

* |conv | act + add + act |

538

*

539

* Notes: currently, only GEMM supports fusion with post operator

540

*/

ramelg01

2021-11-26 19:12:40 +0000

[diff] [blame]

541

void fuse_convolution_with_post_ops(Graph &g, const Edge *output_edge, unsigned int conv_node_id, const std::set<Activation> &supported_fused_activations)

Sheri Zhang

2021-11-02 10:45:07 +0000

[diff] [blame]

542

{

543

ARM_COMPUTE_ERROR_ON(output_edge == nullptr);

544

ramelg01

2021-11-26 19:12:40 +0000

[diff] [blame]

545

auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(output_edge->producer());

Sheri Zhang

2021-11-02 10:45:07 +0000

[diff] [blame]

546

ARM_COMPUTE_ERROR_ON(conv_node->output(0) == nullptr);

Sheri Zhang

2021-11-03 21:24:00 +0000

[diff] [blame]

547

548

const ConvolutionMethod conv_algorithm = conv_node->convolution_method();

549

if(conv_algorithm != ConvolutionMethod::GEMM)

550

{

551

ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of convolution node with post ops due to non GEMM convolution\n");

return;

}

Sheri Zhang

2021-11-02 10:45:07 +0000

[diff] [blame]

555

// Prevent fusion if fused node has an output accessor

556

if(conv_node->output(0)->accessor() == nullptr)

557

{

ramelg01

2021-11-26 19:12:40 +0000

[diff] [blame]

558

// If data type is FP32/FP16, data layout is NHWC, and filter size is 1x1, fuse convolution with post op, as Conv1x1 always leads to GEMM.

Sheri Zhang

2021-11-02 10:45:07 +0000

[diff] [blame]

559

const Edge *input_edge = conv_node->input_edge(1);

560

if(input_edge != nullptr && input_edge->tensor() != nullptr)

561

{

562

const DataLayout data_layout = input_edge->tensor()->desc().layout;

563

const DataType data_type = input_edge->tensor()->desc().data_type;

564

const TensorShape tensor_shape = input_edge->tensor()->desc().shape;

ramelg01

2021-11-26 19:12:40 +0000

[diff] [blame]

565

if((data_layout != DataLayout::NHWC) || (is_data_type_float(data_type) == false) || (tensor_shape.y() != 1) || (tensor_shape.z() != 1))

Sheri Zhang

2021-11-02 10:45:07 +0000

[diff] [blame]

566

{

567

ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of convolution node with post ops due to non GEMM convolution\n");

return;

}

}

else

{

return;

}

Sheri Zhang

2021-11-03 21:24:00 +0000

[diff] [blame]

576

// Get post op list

577

int eltwise_operand_id = 0;

578

int prev_op_dst_pos = 0; // Previous operator dst's postion in current operator

579

std::list<INode *> post_op_node_list = get_post_op_list(g, eltwise_operand_id, prev_op_dst_pos, conv_node_id, supported_fused_activations);

Sheri Zhang

2021-11-02 10:45:07 +0000

[diff] [blame]

580

581

if(post_op_node_list.size() == 0)

582

{

583

return;

584

}

Sheri Zhang

2021-11-03 21:24:00 +0000

[diff] [blame]

585

else // Do convolution fusion with post op if there're one(elementwise), two or more operators

Sheri Zhang

2021-11-02 10:45:07 +0000

[diff] [blame]

586

{

587

const Target assigned_target = conv_node->assigned_target();

588

589

// Extract conv inputs

590

const auto conv_input_id = conv_node->input_edge(0)->producer_id();

591

const auto conv_weights_id = conv_node->input_edge(1)->producer_id();

592

const auto conv_info = conv_node->convolution_info();

593

const auto conv_method = conv_node->convolution_method();

594

const auto num_groups = conv_node->num_groups();

595

FastMathHint fast_math_hint = conv_node->fast_math_hint();

596

597

// Create the fused node

598

const NodeID fused_id = g.add_node<FusedConvolutionWithPostOpNode>(conv_info, num_groups, conv_method, fast_math_hint);

599

ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing convolution node with ID : " << conv_node->id());

600

601

// Add connections from the conv inputs to the fused node

602

g.add_connection(conv_input_id, 0, fused_id, 0);

603

g.add_connection(conv_weights_id, 0, fused_id, 1);

604

if(conv_node->input_edge(2) != nullptr)

605

{

606

auto conv_bias_id = conv_node->input_edge(2)->producer_id();

607

g.add_connection(conv_bias_id, 0, fused_id, 2);

608

}

ramelg01

2021-11-26 19:12:40 +0000

[diff] [blame]

609

// Adding the Element wise operand in case the post op is element wise operation

610

auto it = std::find_if(post_op_node_list.begin(),

611

post_op_node_list.end(),

612

[&](const INode * nd)

613

{

614

return (nd->type() == graph::NodeType::EltwiseLayer);

615

});

616

617

if(it != post_op_node_list.end())

618

{

619

g.add_connection(eltwise_operand_id, 0, fused_id, 3);

620

}

Sheri Zhang

2021-11-02 10:45:07 +0000

[diff] [blame]

621

g.remove_node(conv_node->id());

622

623

// Update fused node outputs

Sheri Zhang

2021-11-03 21:24:00 +0000

[diff] [blame]

624

auto fused_node = g.node(fused_id);

Sheri Zhang

2021-11-02 10:45:07 +0000

[diff] [blame]

625

fused_node->set_assigned_target(assigned_target);

626

Sheri Zhang

2021-11-03 21:24:00 +0000

[diff] [blame]

627

// Fuse convolution with post op

628

fuse_convolution_with_post_op(g, fused_node, post_op_node_list, prev_op_dst_pos);

Sheri Zhang

2021-11-02 10:45:07 +0000

[diff] [blame]

629

Sheri Zhang

2021-11-02 10:45:07 +0000

[diff] [blame]

630

post_op_node_list.clear();

631

ARM_COMPUTE_LOG_GRAPH_VERBOSE(std::endl);

}

}

else

{

ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of convolution node with post ops due to the presence of an output accessor\n");

}

}

ramelg01

2021-11-26 19:12:40 +0000

[diff] [blame]

640

void fuse_convolution_batch_normalization_with_post_ops(Graph &g, const Edge *output_edge, unsigned int conv_node_id, const std::set<Activation> &supported_fused_activations)

641

{

642

ARM_COMPUTE_ERROR_ON(output_edge == nullptr);

643

644

auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<FusedConvolutionBatchNormalizationNode *>(output_edge->producer());

645

ARM_COMPUTE_ERROR_ON(conv_node->output(0) == nullptr);

646

const ConvolutionMethod conv_algorithm = conv_node->convolution_method();

647

if(conv_algorithm != ConvolutionMethod::GEMM)

648

{

649

ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of convolution node with post ops due to non GEMM convolution\n");

return;

}

// Prevent fusion if fused node has an output accessor

654

if(conv_node->output(0)->accessor() == nullptr)

655

{

656

// If data type is FP32/FP16, data layout is NHWC, and filter size is 1x1, fuse convolution with post op, as Conv1x1 always leads to GEMM.

657

const Edge *input_edge = conv_node->input_edge(1);

658

if(input_edge != nullptr && input_edge->tensor() != nullptr)

659

{

660

const DataLayout data_layout = input_edge->tensor()->desc().layout;

661

const DataType data_type = input_edge->tensor()->desc().data_type;

662

const TensorShape tensor_shape = input_edge->tensor()->desc().shape;

663

if((data_layout != DataLayout::NHWC) || (is_data_type_float(data_type) == false) || (tensor_shape.y() != 1) || (tensor_shape.z() != 1))

664

{

665

ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of convolution node with post ops due to non GEMM convolution\n");

return;

}

}

else

{

return;

}

// Get post op list

int eltwise_operand_id = 0;

676

int prev_op_dst_pos = 0; // Previous operator dst's postion in current operator

677

std::list<INode *> post_op_node_list = get_post_op_list(g, eltwise_operand_id, prev_op_dst_pos, conv_node_id, supported_fused_activations);

678

679

if(post_op_node_list.size() == 0)

{

return;

}

else // Do convolution fusion with post op if there're one(elementwise), two or more operators

684

{

685

const Target assigned_target = conv_node->assigned_target();

686

687

// Extract conv inputs

688

const auto conv_input_id = conv_node->input_edge(0)->producer_id();

689

const auto conv_weights_id = conv_node->input_edge(1)->producer_id();

690

const auto bn_mean_id = conv_node->input_edge(3)->producer_id();

691

const auto bn_var_id = conv_node->input_edge(4)->producer_id();

692

const auto conv_info = conv_node->convolution_info();

693

const auto conv_method = conv_node->convolution_method();

694

const auto num_groups = conv_node->num_groups();

695

FastMathHint fast_math_hint = conv_node->fast_math_hint();

696

697

// Create the fused node

698

699

const float epsilon = conv_node->epsilon();

700

const NodeID fused_id = g.add_node<FusedConvolutionBatchNormalizationWithPostOpsNode>(epsilon, conv_info, num_groups, conv_method, fast_math_hint);

701

702

ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing FusedConvolutionBatchNormalization node with ID : " << conv_node->id());

703

704

// Add connections from the conv inputs to the fused node

705

g.add_connection(conv_input_id, 0, fused_id, 0);

706

g.add_connection(conv_weights_id, 0, fused_id, 1);

707

708

if(conv_node->input_edge(2) != nullptr)

709

{

710

auto conv_bias_id = conv_node->input_edge(2)->producer_id();

711

g.add_connection(conv_bias_id, 0, fused_id, 2);

712

}

713

g.add_connection(bn_mean_id, 0, fused_id, 3);

714

g.add_connection(bn_var_id, 0, fused_id, 4);

715

716

// Move connections of old FusedConvolutionBatchNormalization to the fused node

717

if(conv_node->input_edge(5) != nullptr)

718

{

719

const auto bn_beta_id = conv_node->input_edge(5)->producer_id();

720

g.add_connection(bn_beta_id, 0, fused_id, 5);

721

}

722

723

if(conv_node->input_edge(6) != nullptr)

724

{

725

const auto bn_gamma_id = conv_node->input_edge(6)->producer_id();

726

g.add_connection(bn_gamma_id, 0, fused_id, 6);

727

}

728

729

// Adding the Element wise operand in case the post op is element wise operation

730

auto it = std::find_if(post_op_node_list.begin(),

731

post_op_node_list.end(),

732

[&](const INode * nd)

733

{

734

return (nd->type() == graph::NodeType::EltwiseLayer);

735

});

736

737

if(it != post_op_node_list.end())

738

{

739

g.add_connection(eltwise_operand_id, 0, fused_id, 7);

740

}

741

742

// Update fused node outputs

743

auto fused_node = g.node(fused_id);

744

fused_node->set_assigned_target(assigned_target);

745

746

auto conv_node_name = conv_node->name();

747

748

// collect the post ops names

749

std::string post_ops_name = "";

750

for(auto &post_op : post_op_node_list)

751

{

752

post_ops_name += post_op->name();

753

}

754

fused_node->set_common_node_parameters(NodeParams{ conv_node->name() + "+" + post_ops_name, assigned_target });

755

756

// Fuse convolution with post op

757

fuse_convolution_with_post_op(g, fused_node, post_op_node_list, prev_op_dst_pos);

758

759

post_op_node_list.clear();

760

g.remove_node(conv_node->id());

761

ARM_COMPUTE_LOG_GRAPH_VERBOSE(std::endl);

}

}

else

{

ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of convolution node with post ops due to the presence of an output accessor\n");

}

}

Sheri Zhang

2021-11-02 10:45:07 +0000

[diff] [blame]

770

template <typename N1, typename F, typename... Args>

771

void fuse_layer(Graph &g, std::function<bool(INode &)> const &prec, const F fuse_fcn, Args &&... optional_arguments)

772

{

773

// Note that fused nodes may be added to the end of the node list.

774

// Instead of only looping over the original list of nodes, we loop over the current node list which could be growing.

775

// This is intentional as it probes the newly added fused nodes for further fusing opportunities.

776

for(unsigned int i = 0; i < g.nodes().size(); ++i)

777

{

778

auto node = g.node(i);

779

// Check if the node is of type N1 and not a branching node

780

if(node && node->type() == N1::node_type && node->output_edges().size() == 1)

781

{

782

const auto output_edge_id = *node->output_edges().begin();

783

const auto output_edge = g.edge(output_edge_id);

784

785

// Check if it's the correct target

786

if((output_edge != nullptr) && (output_edge->consumer() != nullptr) && prec(*output_edge->producer()))

787

{

788

fuse_fcn(g, output_edge, i, optional_arguments...);

}

}

}

}

Georgios Pinitas

2017-12-22 15:27:52 +0000

[diff] [blame]

793

} // namespace detail

794

795

const char *NodeFusionMutator::name()

796

{

797

return "NodeFusionMutator";

798

}

799

Georgios Pinitas

f4261ad

2019-12-02 11:58:19 +0000

[diff] [blame]

800

IGraphMutator::MutationType NodeFusionMutator::type() const

801

{

802

return IGraphMutator::MutationType::Backend;

803

}

804

Georgios Pinitas

2017-12-22 15:27:52 +0000

[diff] [blame]

805

void NodeFusionMutator::mutate(Graph &g)

806

{

Georgios Pinitas

08346e9

2018-10-16 19:10:46 +0100

[diff] [blame]

807

// Supported activations when fusing

Sang-Hoon Park

68dd25f

2020-10-19 16:00:11 +0100

[diff] [blame]

808

const std::set<Activation> supported_fused_activations = { Activation::ABS, Activation::BOUNDED_RELU, Activation::ELU,

809

Activation::HARD_SWISH, Activation::IDENTITY, Activation::LEAKY_RELU,

810

Activation::LINEAR, Activation::LOGISTIC, Activation::LU_BOUNDED_RELU,

811

Activation::RELU, Activation::SOFT_RELU, Activation::SQRT,

812

Activation::SQUARE, Activation::TANH

813

};

Georgios Pinitas

08346e9

2018-10-16 19:10:46 +0100

[diff] [blame]

814

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

815

// Preconditions

Michalis Spyrou

299fdd3

2019-05-01 13:03:59 +0100

[diff] [blame]

816

auto empty_prec = [](INode &)

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

817

{

818

return true;

819

};

Giorgio Arena

8b2a7d3

2020-02-11 17:21:31 +0000

[diff] [blame]

820

auto cl_target_prec = [](INode & n)

821

{

822

return n.assigned_target() == Target::CL;

823

};

Isabella Gottardi

0ae5de9

2019-03-14 10:32:11 +0000

[diff] [blame]

824

auto qs8_prec = [&g](INode & n)

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

825

{

826

ARM_COMPUTE_ERROR_ON(n.output(0) == nullptr);

Isabella Gottardi

0ae5de9

2019-03-14 10:32:11 +0000

[diff] [blame]

827

828

const auto output_edge_id = *n.output_edges().begin();

829

const auto output_edge = g.edge(output_edge_id);

830

// To perform fusion the two nodes must have same output quantization information

831

const bool same_qinfo = n.output(0)->desc().quant_info == output_edge->producer()->output(0)->desc().quant_info;

832

const bool output_qasymm8 = n.output(0)->desc().data_type == DataType::QASYMM8;

833

Georgios Pinitas

cadb368

2019-03-29 10:54:36 +0000

[diff] [blame]

834

return (output_qasymm8 && same_qinfo) || !output_qasymm8;

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

835

};

836

837

// Fusion mutations

Sheri Zhang

2021-11-02 10:45:07 +0000

[diff] [blame]

838

Gunes Bayir

814bddf

2021-09-01 16:20:54 +0100

[diff] [blame]

839

detail::fuse_layer<PadLayerNode, ConvolutionLayerNode>(g, empty_prec, detail::fuse_pad_with_convolution<ConvolutionLayerNode>);

840

detail::fuse_layer<PadLayerNode, DepthwiseConvolutionLayerNode>(g, empty_prec, detail::fuse_pad_with_convolution<DepthwiseConvolutionLayerNode>);

SiCongLi

1d4a320

2021-11-12 15:38:00 +0000

[diff] [blame]

841

// The fusion of PostOps to ConvolutionLayer:

842

// It must occur after the fusion of PadLayer into ConvolutionLayer

843

// It must occur before the fusion of normal ActivationLayer into ConvolutionLayer as it takes precedence

ramelg01

2021-11-26 19:12:40 +0000

[diff] [blame]

844

detail::fuse_layer<ConvolutionLayerNode>(g, cl_target_prec, detail::fuse_convolution_with_post_ops, supported_fused_activations);

Gian Marco Iodice

047c6fc

2020-09-21 14:22:25 +0100

[diff] [blame]

845

detail::fuse_layer<BatchNormalizationLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<BatchNormalizationLayerNode>, supported_fused_activations);

SiCongLi

1d4a320

2021-11-12 15:38:00 +0000

[diff] [blame]

846

detail::fuse_layer<ConvolutionLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<ConvolutionLayerNode>, supported_fused_activations);

Gian Marco Iodice

047c6fc

2020-09-21 14:22:25 +0100

[diff] [blame]

847

detail::fuse_layer<DepthwiseConvolutionLayerNode, ActivationLayerNode>(g, qs8_prec, detail::fuse_node_with_activation<DepthwiseConvolutionLayerNode>, supported_fused_activations);

848

detail::fuse_layer<FullyConnectedLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<FullyConnectedLayerNode>, supported_fused_activations);

849

detail::fuse_layer<EltwiseLayerNode, ActivationLayerNode>(g, cl_target_prec, detail::fuse_node_with_activation<EltwiseLayerNode>, supported_fused_activations);

SiCongLi

1d4a320

2021-11-12 15:38:00 +0000

[diff] [blame]

850

// The fusion of BatchNormalizationLayer must occur after the fusion of ActivationLayer. Because FusedConvolutionBatchNormalizationNode assumes the BatchNormalization is already fused with activation, if any

851

detail::fuse_layer<ConvolutionLayerNode, BatchNormalizationLayerNode>(g, empty_prec, detail::fuse_convolution_with_batch_normalization);

852

detail::fuse_layer<DepthwiseConvolutionLayerNode, BatchNormalizationLayerNode>(g, empty_prec, detail::fuse_depthwise_convolution_with_batch_normalization);

ramelg01

2021-11-26 19:12:40 +0000

[diff] [blame]

853

detail::fuse_layer<FusedConvolutionBatchNormalizationNode>(g, cl_target_prec, detail::fuse_convolution_batch_normalization_with_post_ops, supported_fused_activations);

Georgios Pinitas