Blame - src/runtime/CPP/CPPScheduler.cpp - ml/ComputeLibrary

2017-09-04 18:44:23 +0100

[diff] [blame]

1

/*

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

2

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

3

*

4

* SPDX-License-Identifier: MIT

5

*

6

* Permission is hereby granted, free of charge, to any person obtaining a copy

7

* of this software and associated documentation files (the "Software"), to

8

* deal in the Software without restriction, including without limitation the

9

* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

10

* sell copies of the Software, and to permit persons to whom the Software is

11

* furnished to do so, subject to the following conditions:

12

*

13

* The above copyright notice and this permission notice shall be included in all

14

* copies or substantial portions of the Software.

15

*

16

* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

17

* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

18

* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

19

* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

20

* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

21

* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

22

* SOFTWARE.

23

*/

24

#include "arm_compute/runtime/CPP/CPPScheduler.h"

25

26

#include "arm_compute/core/CPP/ICPPKernel.h"

27

#include "arm_compute/core/Error.h"

28

#include "arm_compute/core/Helpers.h"

29

#include "arm_compute/core/Utils.h"

Pablo Tello

7fad9b1

2018-03-14 17:55:27 +0000

[diff] [blame]

30

#include "arm_compute/runtime/CPUUtils.h"

Pablo Tello

2019-09-19 16:39:04 +0100

[diff] [blame]

31

#include "support/Mutex.h"

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

32

Anthony Barbier

2018-06-28 13:39:35 +0100

[diff] [blame]

33

#include <atomic>

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

34

#include <condition_variable>

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

35

#include <iostream>

Georgios Pinitas

2019-07-25 13:31:10 +0100

[diff] [blame]

36

#include <list>

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

37

#include <mutex>

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

38

#include <system_error>

39

#include <thread>

40

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

41

namespace arm_compute

42

{

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

namespace

{

class ThreadFeeder

{

public:

/** Constructor

*

* @param[in] start First value that will be returned by the feeder

51

* @param[in] end End condition (The last value returned by get_next() will be end - 1)

52

*/

53

explicit ThreadFeeder(unsigned int start = 0, unsigned int end = 0)

Anthony Barbier

2018-06-28 13:39:35 +0100

[diff] [blame]

54

: _atomic_counter(start), _end(end)

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

55

{

56

}

57

/** Return the next element in the range if there is one.

58

*

59

* @param[out] next Will contain the next element if there is one.

60

*

61

* @return False if the end of the range has been reached and next wasn't set.

62

*/

63

bool get_next(unsigned int &next)

64

{

Anthony Barbier

2018-06-28 13:39:35 +0100

[diff] [blame]

65

next = atomic_fetch_add_explicit(&_atomic_counter, 1u, std::memory_order_relaxed);

66

return next < _end;

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

67

}

68

69

private:

Anthony Barbier

2018-06-28 13:39:35 +0100

[diff] [blame]

70

std::atomic_uint _atomic_counter;

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

71

const unsigned int _end;

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

72

};

73

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

74

/** Given two dimensions and a maxium number of threads to utilise, calcualte the best

75

* combination of threads that fit in (mutliplied together) max_threads.

76

*

77

* This algorithm assumes that work in either of the dimensions is equally difficult

78

* to compute

79

*

80

* @returns [m_nthreads, n_nthreads] A pair of the threads that should be used in each dimension

81

*/

82

std::pair<unsigned, unsigned> split_2d(unsigned max_threads, std::size_t m, std::size_t n)

83

{

84

/*

85

* We want the same ratio of threads in M & N to the ratio of m and n problem size

86

*

87

* Therefore: mt/nt == m/n where mt*nt == max_threads

88

*

89

* max_threads/nt = mt & (max_threads/nt) * (m/n) = nt

90

* nt^2 = max_threads * (m/n)

91

* nt = sqrt( max_threads * (m/n) )

92

*/

93

//ratio of m to n in problem dimensions

94

double ratio = m / static_cast<double>(n);

95

96

// nt = sqrt(max_threads * (m / n) )

97

const unsigned adjusted = std::round(

98

std::sqrt(max_threads * ratio));

99

100

//find the nearest factor of max_threads

101

for(unsigned i = 0; i!= adjusted; ++i)

102

{

103

//try down

104

const unsigned adj_down = adjusted - i;

105

if(max_threads % adj_down == 0)

106

{

107

return { adj_down, max_threads / adj_down };

}

//try up

const unsigned adj_up = adjusted + i;

112

if(max_threads % adj_up == 0)

113

{

114

return { adj_up, max_threads / adj_up };

}

}

//we didn't find anything so lets bail out with maxes biased to the largest dimension

119

if(m > n)

120

{

121

return{ std::min<unsigned>(m, max_threads), 1 };

}

else

{

return{ 1, std::min<unsigned>(n, max_threads) };

}

}

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

129

/** Execute workloads[info.thread_id] first, then call the feeder to get the index of the next workload to run.

130

*

131

* Will run workloads until the feeder reaches the end of its range.

132

*

133

* @param[in] workloads The array of workloads

134

* @param[in,out] feeder The feeder indicating which workload to execute next.

135

* @param[in] info Threading and CPU info.

136

*/

137

void process_workloads(std::vector<IScheduler::Workload> &workloads, ThreadFeeder &feeder, const ThreadInfo &info)

138

{

139

unsigned int workload_index = info.thread_id;

140

do

141

{

142

ARM_COMPUTE_ERROR_ON(workload_index >= workloads.size());

143

workloads[workload_index](info);

144

}

145

while(feeder.get_next(workload_index));

}

} //namespace

Pablo Tello

2019-09-19 16:39:04 +0100

[diff] [blame]

150

struct CPPScheduler::Impl final

Georgios Pinitas

2019-07-25 13:31:10 +0100

[diff] [blame]

151

{

Pablo Tello

2019-09-19 16:39:04 +0100

[diff] [blame]

152

explicit Impl(unsigned int thread_hint)

Georgios Pinitas

2019-07-25 13:31:10 +0100

[diff] [blame]

153

: _num_threads(thread_hint), _threads(_num_threads - 1)

154

{

155

}

156

void set_num_threads(unsigned int num_threads, unsigned int thead_hint)

157

{

158

_num_threads = num_threads == 0 ? thead_hint : num_threads;

159

_threads.resize(_num_threads - 1);

160

}

161

unsigned int num_threads() const

{

return _num_threads;

}

void run_workloads(std::vector<IScheduler::Workload> &workloads);

class Thread;

Pablo Tello

2019-09-19 16:39:04 +0100

[diff] [blame]

170

unsigned int _num_threads;

171

std::list<Thread> _threads;

172

arm_compute::Mutex _run_workloads_mutex{};

Georgios Pinitas

2019-07-25 13:31:10 +0100

[diff] [blame]

173

};

174

Pablo Tello

2019-09-19 16:39:04 +0100

[diff] [blame]

175

class CPPScheduler::Impl::Thread final

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

176

{

177

public:

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

178

/** Start a new thread. */

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

179

Thread();

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

180

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

181

Thread(const Thread &) = delete;

182

Thread &operator=(const Thread &) = delete;

183

Thread(Thread &&) = delete;

184

Thread &operator=(Thread &&) = delete;

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

185

186

/** Destructor. Make the thread join. */

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

187

~Thread();

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

188

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

189

/** Request the worker thread to start executing workloads.

190

*

191

* The thread will start by executing workloads[info.thread_id] and will then call the feeder to

192

* get the index of the following workload to run.

193

*

194

* @note This function will return as soon as the workloads have been sent to the worker thread.

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

195

* wait() needs to be called to ensure the execution is complete.

196

*/

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

197

void start(std::vector<IScheduler::Workload> *workloads, ThreadFeeder &feeder, const ThreadInfo &info);

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

198

199

/** Wait for the current kernel execution to complete. */

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

200

void wait();

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

201

202

/** Function ran by the worker thread. */

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

203

void worker_thread();

204

205

private:

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

206

std::thread _thread{};

207

ThreadInfo _info{};

208

std::vector<IScheduler::Workload> *_workloads{ nullptr };

209

ThreadFeeder *_feeder{ nullptr };

210

std::mutex _m{};

211

std::condition_variable _cv{};

212

bool _wait_for_work{ false };

213

bool _job_complete{ true };

214

std::exception_ptr _current_exception{ nullptr };

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

215

};

216

Georgios Pinitas

2019-07-25 13:31:10 +0100

[diff] [blame]

217

CPPScheduler::Impl::Thread::Thread()

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

218

{

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

219

_thread = std::thread(&Thread::worker_thread, this);

220

}

221

Georgios Pinitas

2019-07-25 13:31:10 +0100

[diff] [blame]

222

CPPScheduler::Impl::Thread::~Thread()

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

223

{

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

224

// Make sure worker thread has ended

225

if(_thread.joinable())

226

{

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

227

ThreadFeeder feeder;

228

start(nullptr, feeder, ThreadInfo());

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

229

_thread.join();

230

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

231

}

232

Georgios Pinitas

2019-07-25 13:31:10 +0100

[diff] [blame]

233

void CPPScheduler::Impl::Thread::start(std::vector<IScheduler::Workload> *workloads, ThreadFeeder &feeder, const ThreadInfo &info)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

234

{

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

235

_workloads = workloads;

236

_feeder = &feeder;

237

_info = info;

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

238

{

239

std::lock_guard<std::mutex> lock(_m);

240

_wait_for_work = true;

241

_job_complete = false;

242

}

243

_cv.notify_one();

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

244

}

245

Georgios Pinitas

2019-07-25 13:31:10 +0100

[diff] [blame]

246

void CPPScheduler::Impl::Thread::wait()

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

247

{

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

248

{

249

std::unique_lock<std::mutex> lock(_m);

250

_cv.wait(lock, [&] { return _job_complete; });

251

}

252

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

253

if(_current_exception)

254

{

255

std::rethrow_exception(_current_exception);

}

}

Georgios Pinitas

2019-07-25 13:31:10 +0100

[diff] [blame]

259

void CPPScheduler::Impl::Thread::worker_thread()

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

260

{

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

261

while(true)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

262

{

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

263

std::unique_lock<std::mutex> lock(_m);

264

_cv.wait(lock, [&] { return _wait_for_work; });

265

_wait_for_work = false;

266

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

267

_current_exception = nullptr;

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

268

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

269

// Time to exit

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

270

if(_workloads == nullptr)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

{

return;

}

Michalis Spyrou

2018-11-30 16:30:43 +0000

[diff] [blame]

275

#ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

276

try

277

{

Michalis Spyrou

2018-11-30 16:30:43 +0000

[diff] [blame]

278

#endif /* ARM_COMPUTE_EXCEPTIONS_ENABLED */

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

279

process_workloads(*_workloads, *_feeder, _info);

Michalis Spyrou

2018-11-30 16:30:43 +0000

[diff] [blame]

280

281

#ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

}

catch(...)

{

_current_exception = std::current_exception();

286

}

Michalis Spyrou

2018-11-30 16:30:43 +0000

[diff] [blame]

287

#endif /* ARM_COMPUTE_EXCEPTIONS_DISABLED */

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

288

_job_complete = true;

289

lock.unlock();

290

_cv.notify_one();

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

291

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

292

}

293

Georgios Pinitas

2019-07-25 13:31:10 +0100

[diff] [blame]

294

/*

295

* This singleton has been deprecated and will be removed in the next release

296

*/

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

297

CPPScheduler &CPPScheduler::get()

298

{

299

static CPPScheduler scheduler;

return scheduler;

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

303

CPPScheduler::CPPScheduler()

Georgios Pinitas

2019-07-25 13:31:10 +0100

[diff] [blame]

304

: _impl(support::cpp14::make_unique<Impl>(num_threads_hint()))

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

{

}

Georgios Pinitas

2019-07-25 13:31:10 +0100

[diff] [blame]

308

CPPScheduler::~CPPScheduler() = default;

309

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

310

void CPPScheduler::set_num_threads(unsigned int num_threads)

311

{

Pablo Tello

2019-09-19 16:39:04 +0100

[diff] [blame]

312

// No changes in the number of threads while current workloads are running

313

arm_compute::lock_guard<std::mutex> lock(_impl->_run_workloads_mutex);

Georgios Pinitas

2019-07-25 13:31:10 +0100

[diff] [blame]

314

_impl->set_num_threads(num_threads, num_threads_hint());

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

315

}

316

Moritz Pflanzer

d929b9c

2017-06-28 10:15:48 +0100

[diff] [blame]

317

unsigned int CPPScheduler::num_threads() const

318

{

Georgios Pinitas

2019-07-25 13:31:10 +0100

[diff] [blame]

319

return _impl->num_threads();

Moritz Pflanzer

d929b9c

2017-06-28 10:15:48 +0100

[diff] [blame]

320

}

321

Vidhya Sudhan Loganathan

d646ae1

2018-11-19 15:18:20 +0000

[diff] [blame]

322

#ifndef DOXYGEN_SKIP_THIS

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

323

void CPPScheduler::run_workloads(std::vector<IScheduler::Workload> &workloads)

324

{

Pablo Tello

2019-09-19 16:39:04 +0100

[diff] [blame]

325

// Mutex to ensure other threads won't interfere with the setup of the current thread's workloads

326

// Other thread's workloads will be scheduled after the current thread's workloads have finished

327

// This is not great because different threads workloads won't run in parallel but at least they

328

// won't interfere each other and deadlock.

329

arm_compute::lock_guard<std::mutex> lock(_impl->_run_workloads_mutex);

330

const unsigned int num_threads = std::min(_impl->num_threads(), static_cast<unsigned int>(workloads.size()));

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

if(num_threads < 1)

{

return;

}

ThreadFeeder feeder(num_threads, workloads.size());

336

ThreadInfo info;

337

info.cpu_info = &_cpu_info;

338

info.num_threads = num_threads;

339

unsigned int t = 0;

Georgios Pinitas

2019-07-25 13:31:10 +0100

[diff] [blame]

340

auto thread_it = _impl->_threads.begin();

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

341

for(; t < num_threads - 1; ++t, ++thread_it)

342

{

343

info.thread_id = t;

344

thread_it->start(&workloads, feeder, info);

}

info.thread_id = t;

process_workloads(workloads, feeder, info);

Michalis Spyrou

2018-11-30 16:30:43 +0000

[diff] [blame]

349

#ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

350

try

351

{

Michalis Spyrou

2018-11-30 16:30:43 +0000

[diff] [blame]

352

#endif /* ARM_COMPUTE_EXCEPTIONS_DISABLED */

Georgios Pinitas

2019-07-25 13:31:10 +0100

[diff] [blame]

353

for(auto &thread : _impl->_threads)

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

354

{

355

thread.wait();

356

}

Michalis Spyrou

2018-11-30 16:30:43 +0000

[diff] [blame]

357

#ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

358

}

359

catch(const std::system_error &e)

360

{

361

std::cerr << "Caught system_error with code " << e.code() << " meaning " << e.what() << '\n';

362

}

Michalis Spyrou

2018-11-30 16:30:43 +0000

[diff] [blame]

363

#endif /* ARM_COMPUTE_EXCEPTIONS_DISABLED */

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

364

}

Vidhya Sudhan Loganathan

d646ae1

2018-11-19 15:18:20 +0000

[diff] [blame]

365

#endif /* DOXYGEN_SKIP_THIS */

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

366

Anthony Barbier

376c85f

2018-05-25 14:17:21 +0100

[diff] [blame]

367

void CPPScheduler::schedule(ICPPKernel *kernel, const Hints &hints)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

368

{

369

ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel");

370

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

371

const Window &max_window = kernel->window();

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

372

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

373

if(hints.split_dimension() == IScheduler::split_dimensions_all)

Moritz Pflanzer

2fd5d95

2017-09-24 12:10:46 +0100

[diff] [blame]

374

{

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

375

/*

376

* if the split dim is size_t max then this signals we should parallelise over

377

* all dimensions

378

*/

379

const std::size_t m = max_window.num_iterations(Window::DimX);

380

const std::size_t n = max_window.num_iterations(Window::DimY);

Moritz Pflanzer

2fd5d95

2017-09-24 12:10:46 +0100

[diff] [blame]

381

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

382

//in c++17 this can be swapped for auto [ m_threads, n_threads ] = split_2d(...

383

unsigned m_threads, n_threads;

384

std::tie(m_threads, n_threads) = split_2d(_impl->_num_threads, m, n);

385

386

std::vector<IScheduler::Workload> workloads;

387

for(unsigned int ni = 0; ni != n_threads; ++ni)

388

{

389

for(unsigned int mi = 0; mi != m_threads; ++mi)

390

{

391

workloads.push_back(

392

[ ni, mi, m_threads, n_threads, &max_window, &kernel ]

393

(const ThreadInfo & info)

394

{

395

//narrow the window to our mi-ni workload

396

Window win = max_window.split_window(Window::DimX, mi, m_threads)

397

.split_window(Window::DimY, ni, n_threads);

win.validate();

Window thread_locator;

402

thread_locator.set(Window::DimX, Window::Dimension(mi, m_threads));

403

thread_locator.set(Window::DimY, Window::Dimension(ni, n_threads));

404

405

thread_locator.validate();

406

407

kernel->run_nd(win, info, thread_locator);

}

);

}

}

run_workloads(workloads);

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

413

}

414

else

415

{

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

416

const unsigned int num_iterations = max_window.num_iterations(hints.split_dimension());

417

const unsigned int num_threads = std::min(num_iterations, _impl->_num_threads);

418

419

if(num_iterations == 0)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

420

{

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

return;

}

if(!kernel->is_parallelisable() || num_threads == 1)

425

{

426

ThreadInfo info;

427

info.cpu_info = &_cpu_info;

428

kernel->run(max_window, info);

}

else

{

unsigned int num_windows = 0;

433

switch(hints.strategy())

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

434

{

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

435

case StrategyHint::STATIC:

436

num_windows = num_threads;

437

break;

438

case StrategyHint::DYNAMIC:

439

{

440

const unsigned int granule_threshold = (hints.threshold() <= 0) ? num_threads : static_cast<unsigned int>(hints.threshold());

441

// Make sure we don't use some windows which are too small as this might create some contention on the ThreadFeeder

442

num_windows = num_iterations > granule_threshold ? granule_threshold : num_iterations;

break;

}

default:

ARM_COMPUTE_ERROR("Unknown strategy");

Anthony Barbier

376c85f

2018-05-25 14:17:21 +0100

[diff] [blame]

447

}

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

448

std::vector<IScheduler::Workload> workloads(num_windows);

449

for(unsigned int t = 0; t < num_windows; t++)

Anthony Barbier

376c85f

2018-05-25 14:17:21 +0100

[diff] [blame]

450

{

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

451

//Capture 't' by copy, all the other variables by reference:

452

workloads[t] = [t, &hints, &max_window, &num_windows, &kernel](const ThreadInfo & info)

453

{

454

Window win = max_window.split_window(hints.split_dimension(), t, num_windows);

455

win.validate();

456

kernel->run(win, info);

457

};

458

}

459

run_workloads(workloads);

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

460

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

461

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

462

}

Moritz Pflanzer