Blame - src/runtime/CPP/CPPScheduler.cpp - ml/ComputeLibrary

2017-09-04 18:44:23 +0100

[diff] [blame]

367

{

368

ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel");

369

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame^]

370

const Window &max_window = kernel->window();

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

371

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

372

if(hints.split_dimension() == IScheduler::split_dimensions_all)

Moritz Pflanzer

2fd5d95

2017-09-24 12:10:46 +0100

[diff] [blame]

373

{

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

374

/*

375

* if the split dim is size_t max then this signals we should parallelise over

376

* all dimensions

377

*/

378

const std::size_t m = max_window.num_iterations(Window::DimX);

379

const std::size_t n = max_window.num_iterations(Window::DimY);

Moritz Pflanzer

2fd5d95

2017-09-24 12:10:46 +0100

[diff] [blame]

380

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame^]

381

//in c++17 this can be swapped for auto [ m_threads, n_threads ] = split_2d(...

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

382

unsigned m_threads, n_threads;

383

std::tie(m_threads, n_threads) = split_2d(_impl->_num_threads, m, n);

384

385

std::vector<IScheduler::Workload> workloads;

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame^]

386

for(unsigned int ni = 0; ni != n_threads; ++ni)

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

387

{

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame^]

388

for(unsigned int mi = 0; mi != m_threads; ++mi)

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

389

{

390

workloads.push_back(

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame^]

391

[ni, mi, m_threads, n_threads, &max_window, &kernel](const ThreadInfo & info)

392

{

393

//narrow the window to our mi-ni workload

394

Window win = max_window.split_window(Window::DimX, mi, m_threads)

395

.split_window(Window::DimY, ni, n_threads);

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

396

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame^]

397

win.validate();

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

398

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame^]

399

Window thread_locator;

400

thread_locator.set(Window::DimX, Window::Dimension(mi, m_threads));

401

thread_locator.set(Window::DimY, Window::Dimension(ni, n_threads));

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

402

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame^]

403

thread_locator.validate();

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

404

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame^]

405

kernel->run_nd(win, info, thread_locator);

406

});

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

407

}

408

}

409

run_workloads(workloads);

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

410

}

411

else

412

{

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

413

const unsigned int num_iterations = max_window.num_iterations(hints.split_dimension());

414

const unsigned int num_threads = std::min(num_iterations, _impl->_num_threads);

415

416

if(num_iterations == 0)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

417

{

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

return;

}

if(!kernel->is_parallelisable() || num_threads == 1)

422

{

423

ThreadInfo info;

424

info.cpu_info = &_cpu_info;

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame^]

425

if(inputs.empty())

426

{

427

kernel->run(max_window, info);

}

else

{

kernel->run_op(inputs, outputs, max_window, info);

432

}

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

}

else

{

unsigned int num_windows = 0;

437

switch(hints.strategy())

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

438

{

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

439

case StrategyHint::STATIC:

440

num_windows = num_threads;

441

break;

442

case StrategyHint::DYNAMIC:

443

{

444

const unsigned int granule_threshold = (hints.threshold() <= 0) ? num_threads : static_cast<unsigned int>(hints.threshold());

445

// Make sure we don't use some windows which are too small as this might create some contention on the ThreadFeeder

446

num_windows = num_iterations > granule_threshold ? granule_threshold : num_iterations;

break;

}

default:

ARM_COMPUTE_ERROR("Unknown strategy");

Anthony Barbier

376c85f

2018-05-25 14:17:21 +0100

[diff] [blame]

451

}

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

452

std::vector<IScheduler::Workload> workloads(num_windows);

453

for(unsigned int t = 0; t < num_windows; t++)

Anthony Barbier

376c85f

2018-05-25 14:17:21 +0100

[diff] [blame]

454

{

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

455

//Capture 't' by copy, all the other variables by reference:

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame^]

456

workloads[t] = [t, &hints, &max_window, &num_windows, &kernel, &inputs, &outputs](const ThreadInfo & info)

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

457

{

458

Window win = max_window.split_window(hints.split_dimension(), t, num_windows);

459

win.validate();

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame^]

if(inputs.empty())

{

kernel->run(win, info);

}

else

{

kernel->run_op(inputs, outputs, win, info);

468

}

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

469

};

470

}

471

run_workloads(workloads);

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

472

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

473

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

474

}

Michalis Spyrou