Blame - src/runtime/CPP/CPPScheduler.cpp - ml/ComputeLibrary

2017-09-04 18:44:23 +0100

[diff] [blame]

1

/*

Michele Di Giorgio

d9eaf61

2020-07-08 11:12:57 +0100

[diff] [blame]

2

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

3

*

4

* SPDX-License-Identifier: MIT

5

*

6

* Permission is hereby granted, free of charge, to any person obtaining a copy

7

* of this software and associated documentation files (the "Software"), to

8

* deal in the Software without restriction, including without limitation the

9

* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

10

* sell copies of the Software, and to permit persons to whom the Software is

11

* furnished to do so, subject to the following conditions:

12

*

13

* The above copyright notice and this permission notice shall be included in all

14

* copies or substantial portions of the Software.

15

*

16

* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

17

* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

18

* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

19

* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

20

* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

21

* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

22

* SOFTWARE.

23

*/

24

#include "arm_compute/runtime/CPP/CPPScheduler.h"

25

26

#include "arm_compute/core/CPP/ICPPKernel.h"

27

#include "arm_compute/core/Error.h"

28

#include "arm_compute/core/Helpers.h"

29

#include "arm_compute/core/Utils.h"

Pablo Tello

7fad9b1

2018-03-14 17:55:27 +0000

[diff] [blame]

30

#include "arm_compute/runtime/CPUUtils.h"

Pablo Tello

2725197

2019-09-19 16:39:04 +0100

[diff] [blame]

31

#include "support/Mutex.h"

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

32

Anthony Barbier

2018-06-28 13:39:35 +0100

[diff] [blame]

33

#include <atomic>

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

34

#include <condition_variable>

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

35

#include <iostream>

Georgios Pinitas

2019-07-25 13:31:10 +0100

[diff] [blame]

36

#include <list>

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

37

#include <mutex>

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

38

#include <system_error>

39

#include <thread>

40

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

41

namespace arm_compute

42

{

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

namespace

{

class ThreadFeeder

{

public:

/** Constructor

*

* @param[in] start First value that will be returned by the feeder

51

* @param[in] end End condition (The last value returned by get_next() will be end - 1)

52

*/

53

explicit ThreadFeeder(unsigned int start = 0, unsigned int end = 0)

Anthony Barbier

2018-06-28 13:39:35 +0100

[diff] [blame]

54

: _atomic_counter(start), _end(end)

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

55

{

56

}

57

/** Return the next element in the range if there is one.

58

*

59

* @param[out] next Will contain the next element if there is one.

60

*

61

* @return False if the end of the range has been reached and next wasn't set.

62

*/

63

bool get_next(unsigned int &next)

64

{

Anthony Barbier

2018-06-28 13:39:35 +0100

[diff] [blame]

65

next = atomic_fetch_add_explicit(&_atomic_counter, 1u, std::memory_order_relaxed);

66

return next < _end;

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

67

}

68

69

private:

Anthony Barbier

2018-06-28 13:39:35 +0100

[diff] [blame]

70

std::atomic_uint _atomic_counter;

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

71

const unsigned int _end;

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

72

};

73

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

74

/** Given two dimensions and a maxium number of threads to utilise, calcualte the best

75

* combination of threads that fit in (mutliplied together) max_threads.

76

*

77

* This algorithm assumes that work in either of the dimensions is equally difficult

78

* to compute

79

*

80

* @returns [m_nthreads, n_nthreads] A pair of the threads that should be used in each dimension

81

*/

82

std::pair<unsigned, unsigned> split_2d(unsigned max_threads, std::size_t m, std::size_t n)

83

{

84

/*

85

* We want the same ratio of threads in M & N to the ratio of m and n problem size

86

*

87

* Therefore: mt/nt == m/n where mt*nt == max_threads

88

*

89

* max_threads/nt = mt & (max_threads/nt) * (m/n) = nt

90

* nt^2 = max_threads * (m/n)

91

* nt = sqrt( max_threads * (m/n) )

92

*/

93

//ratio of m to n in problem dimensions

94

double ratio = m / static_cast<double>(n);

95

96

// nt = sqrt(max_threads * (m / n) )

97

const unsigned adjusted = std::round(

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame]

98

std::sqrt(max_threads * ratio));

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

99

100

//find the nearest factor of max_threads

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame]

101

for(unsigned i = 0; i != adjusted; ++i)

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

102

{

103

//try down

104

const unsigned adj_down = adjusted - i;

105

if(max_threads % adj_down == 0)

106

{

107

return { adj_down, max_threads / adj_down };

}

//try up

const unsigned adj_up = adjusted + i;

112

if(max_threads % adj_up == 0)

113

{

114

return { adj_up, max_threads / adj_up };

}

}

//we didn't find anything so lets bail out with maxes biased to the largest dimension

119

if(m > n)

120

{

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame]

121

return { std::min<unsigned>(m, max_threads), 1 };

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

122

}

123

else

124

{

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame]

125

return { 1, std::min<unsigned>(n, max_threads) };

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

}

}

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

129

/** Execute workloads[info.thread_id] first, then call the feeder to get the index of the next workload to run.

130

*

131

* Will run workloads until the feeder reaches the end of its range.

132

*

133

* @param[in] workloads The array of workloads

134

* @param[in,out] feeder The feeder indicating which workload to execute next.

135

* @param[in] info Threading and CPU info.

136

*/

137

void process_workloads(std::vector<IScheduler::Workload> &workloads, ThreadFeeder &feeder, const ThreadInfo &info)

138

{

139

unsigned int workload_index = info.thread_id;

140

do

141

{

142

ARM_COMPUTE_ERROR_ON(workload_index >= workloads.size());

143

workloads[workload_index](info);

144

}

145

while(feeder.get_next(workload_index));

146

}

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

147

Georgios Pinitas

2020-07-09 18:38:34 +0100

[diff] [blame]

148

void set_thread_affinity(int core_id)

Georgios Pinitas

2019-07-25 13:31:10 +0100

[diff] [blame]

149

{

Georgios Pinitas

2020-07-09 18:38:34 +0100

[diff] [blame]

150

if(core_id < 0)

Georgios Pinitas

2019-07-25 13:31:10 +0100

[diff] [blame]

151

{

Georgios Pinitas

2020-07-09 18:38:34 +0100

[diff] [blame]

152

return;

Georgios Pinitas

2019-07-25 13:31:10 +0100

[diff] [blame]

153

}

154

Georgios Pinitas

2020-07-09 18:38:34 +0100

[diff] [blame]

155

cpu_set_t set;

156

CPU_ZERO(&set);

157

CPU_SET(core_id, &set);

158

ARM_COMPUTE_EXIT_ON_MSG(sched_setaffinity(0, sizeof(set), &set),

159

"Error setting thread affinity");

160

}

Georgios Pinitas

2019-07-25 13:31:10 +0100

[diff] [blame]

161

Georgios Pinitas

2020-07-09 18:38:34 +0100

[diff] [blame]

162

class Thread final

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

163

{

164

public:

Georgios Pinitas

2020-07-09 18:38:34 +0100

[diff] [blame]

165

/** Start a new thread

166

*

167

* Thread will be pinned to a given core id if value is non-negative

168

*

169

* @param[in] core_pin Core id to pin the thread on. If negative no thread pinning will take place

170

*/

171

explicit Thread(int core_pin = -1);

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

172

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

173

Thread(const Thread &) = delete;

174

Thread &operator=(const Thread &) = delete;

175

Thread(Thread &&) = delete;

176

Thread &operator=(Thread &&) = delete;

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

177

178

/** Destructor. Make the thread join. */

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

179

~Thread();

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

180

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

181

/** Request the worker thread to start executing workloads.

182

*

183

* The thread will start by executing workloads[info.thread_id] and will then call the feeder to

184

* get the index of the following workload to run.

185

*

186

* @note This function will return as soon as the workloads have been sent to the worker thread.

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

187

* wait() needs to be called to ensure the execution is complete.

188

*/

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

189

void start(std::vector<IScheduler::Workload> *workloads, ThreadFeeder &feeder, const ThreadInfo &info);

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

190

191

/** Wait for the current kernel execution to complete. */

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

192

void wait();

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

193

194

/** Function ran by the worker thread. */

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

195

void worker_thread();

196

197

private:

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

198

std::thread _thread{};

199

ThreadInfo _info{};

200

std::vector<IScheduler::Workload> *_workloads{ nullptr };

201

ThreadFeeder *_feeder{ nullptr };

202

std::mutex _m{};

203

std::condition_variable _cv{};

204

bool _wait_for_work{ false };

205

bool _job_complete{ true };

206

std::exception_ptr _current_exception{ nullptr };

Georgios Pinitas

2020-07-09 18:38:34 +0100

[diff] [blame]

207

int _core_pin{ -1 };

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

208

};

209

Georgios Pinitas

2020-07-09 18:38:34 +0100

[diff] [blame]

210

Thread::Thread(int core_pin)

211

: _core_pin(core_pin)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

212

{

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

213

_thread = std::thread(&Thread::worker_thread, this);

214

}

215

Georgios Pinitas

2020-07-09 18:38:34 +0100

[diff] [blame]

216

Thread::~Thread()

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

217

{

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

218

// Make sure worker thread has ended

219

if(_thread.joinable())

220

{

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

221

ThreadFeeder feeder;

222

start(nullptr, feeder, ThreadInfo());

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

223

_thread.join();

224

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

225

}

226

Georgios Pinitas

2020-07-09 18:38:34 +0100

[diff] [blame]

227

void Thread::start(std::vector<IScheduler::Workload> *workloads, ThreadFeeder &feeder, const ThreadInfo &info)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

228

{

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

229

_workloads = workloads;

230

_feeder = &feeder;

231

_info = info;

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

232

{

233

std::lock_guard<std::mutex> lock(_m);

234

_wait_for_work = true;

235

_job_complete = false;

236

}

237

_cv.notify_one();

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

238

}

239

Georgios Pinitas

2020-07-09 18:38:34 +0100

[diff] [blame]

240

void Thread::wait()

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

241

{

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

242

{

243

std::unique_lock<std::mutex> lock(_m);

244

_cv.wait(lock, [&] { return _job_complete; });

245

}

246

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

247

if(_current_exception)

248

{

249

std::rethrow_exception(_current_exception);

}

}

Georgios Pinitas

2020-07-09 18:38:34 +0100

[diff] [blame]

253

void Thread::worker_thread()

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

254

{

Georgios Pinitas

2020-07-09 18:38:34 +0100

[diff] [blame]

255

set_thread_affinity(_core_pin);

256

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

257

while(true)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

258

{

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

259

std::unique_lock<std::mutex> lock(_m);

260

_cv.wait(lock, [&] { return _wait_for_work; });

261

_wait_for_work = false;

262

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

263

_current_exception = nullptr;

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

264

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

265

// Time to exit

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

266

if(_workloads == nullptr)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

{

return;

}

Michalis Spyrou

2018-11-30 16:30:43 +0000

[diff] [blame]

271

#ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

272

try

273

{

Michalis Spyrou

2018-11-30 16:30:43 +0000

[diff] [blame]

274

#endif /* ARM_COMPUTE_EXCEPTIONS_ENABLED */

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

275

process_workloads(*_workloads, *_feeder, _info);

Michalis Spyrou

2018-11-30 16:30:43 +0000

[diff] [blame]

276

277

#ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

}

catch(...)

{

_current_exception = std::current_exception();

282

}

Michalis Spyrou

2018-11-30 16:30:43 +0000

[diff] [blame]

283

#endif /* ARM_COMPUTE_EXCEPTIONS_DISABLED */

Moritz Pflanzer

2017-09-08 13:48:23 +0100

[diff] [blame]

284

_job_complete = true;

285

lock.unlock();

286

_cv.notify_one();

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

287

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

288

}

Georgios Pinitas

2020-07-09 18:38:34 +0100

[diff] [blame]

289

} //namespace

290

291

struct CPPScheduler::Impl final

292

{

293

explicit Impl(unsigned int thread_hint)

294

: _num_threads(thread_hint), _threads(_num_threads - 1)

295

{

296

}

297

void set_num_threads(unsigned int num_threads, unsigned int thread_hint)

298

{

299

_num_threads = num_threads == 0 ? thread_hint : num_threads;

300

_threads.resize(_num_threads - 1);

301

}

302

void set_num_threads_with_affinity(unsigned int num_threads, unsigned int thread_hint, BindFunc func)

303

{

304

_num_threads = num_threads == 0 ? thread_hint : num_threads;

305

306

// Set affinity on main thread

307

set_thread_affinity(func(0, thread_hint));

308

309

// Set affinity on worked threads

310

_threads.clear();

311

for(auto i = 1U; i < _num_threads; ++i)

312

{

313

_threads.emplace_back(func(i, thread_hint));

314

}

315

}

316

unsigned int num_threads() const

{

return _num_threads;

}

void run_workloads(std::vector<IScheduler::Workload> &workloads);

322

323

unsigned int _num_threads;

324

std::list<Thread> _threads;

325

arm_compute::Mutex _run_workloads_mutex{};

326

};

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

327

Georgios Pinitas

2019-07-25 13:31:10 +0100

[diff] [blame]

328

/*

329

* This singleton has been deprecated and will be removed in the next release

330

*/

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

331

CPPScheduler &CPPScheduler::get()

332

{

333

static CPPScheduler scheduler;

return scheduler;

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

337

CPPScheduler::CPPScheduler()

Georgios Pinitas

2019-07-25 13:31:10 +0100

[diff] [blame]

338

: _impl(support::cpp14::make_unique<Impl>(num_threads_hint()))

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

{

}

Georgios Pinitas

2019-07-25 13:31:10 +0100

[diff] [blame]

342

CPPScheduler::~CPPScheduler() = default;

343

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

344

void CPPScheduler::set_num_threads(unsigned int num_threads)

345

{

Pablo Tello

2725197

2019-09-19 16:39:04 +0100

[diff] [blame]

346

// No changes in the number of threads while current workloads are running

347

arm_compute::lock_guard<std::mutex> lock(_impl->_run_workloads_mutex);

Georgios Pinitas

2019-07-25 13:31:10 +0100

[diff] [blame]

348

_impl->set_num_threads(num_threads, num_threads_hint());

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

349

}

350

Georgios Pinitas

2020-07-09 18:38:34 +0100

[diff] [blame]

351

void CPPScheduler::set_num_threads_with_affinity(unsigned int num_threads, BindFunc func)

352

{

353

// No changes in the number of threads while current workloads are running

354

arm_compute::lock_guard<std::mutex> lock(_impl->_run_workloads_mutex);

355

_impl->set_num_threads_with_affinity(num_threads, num_threads_hint(), func);

356

}

357

Moritz Pflanzer

d929b9c

2017-06-28 10:15:48 +0100

[diff] [blame]

358

unsigned int CPPScheduler::num_threads() const

359

{

Georgios Pinitas

2019-07-25 13:31:10 +0100

[diff] [blame]

360

return _impl->num_threads();

Moritz Pflanzer

d929b9c

2017-06-28 10:15:48 +0100

[diff] [blame]

361

}

362

Vidhya Sudhan Loganathan

d646ae1

2018-11-19 15:18:20 +0000

[diff] [blame]

363

#ifndef DOXYGEN_SKIP_THIS

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

364

void CPPScheduler::run_workloads(std::vector<IScheduler::Workload> &workloads)

365

{

Pablo Tello

2725197

2019-09-19 16:39:04 +0100

[diff] [blame]

366

// Mutex to ensure other threads won't interfere with the setup of the current thread's workloads

367

// Other thread's workloads will be scheduled after the current thread's workloads have finished

368

// This is not great because different threads workloads won't run in parallel but at least they

369

// won't interfere each other and deadlock.

370

arm_compute::lock_guard<std::mutex> lock(_impl->_run_workloads_mutex);

371

const unsigned int num_threads = std::min(_impl->num_threads(), static_cast<unsigned int>(workloads.size()));

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

if(num_threads < 1)

{

return;

}

ThreadFeeder feeder(num_threads, workloads.size());

377

ThreadInfo info;

378

info.cpu_info = &_cpu_info;

379

info.num_threads = num_threads;

380

unsigned int t = 0;

Georgios Pinitas

2019-07-25 13:31:10 +0100

[diff] [blame]

381

auto thread_it = _impl->_threads.begin();

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

382

for(; t < num_threads - 1; ++t, ++thread_it)

383

{

384

info.thread_id = t;

385

thread_it->start(&workloads, feeder, info);

}

info.thread_id = t;

process_workloads(workloads, feeder, info);

Michalis Spyrou

2018-11-30 16:30:43 +0000

[diff] [blame]

390

#ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

391

try

392

{

Michalis Spyrou

2018-11-30 16:30:43 +0000

[diff] [blame]

393

#endif /* ARM_COMPUTE_EXCEPTIONS_DISABLED */

Georgios Pinitas

2019-07-25 13:31:10 +0100

[diff] [blame]

394

for(auto &thread : _impl->_threads)

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

395

{

396

thread.wait();

397

}

Michalis Spyrou

2018-11-30 16:30:43 +0000

[diff] [blame]

398

#ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

399

}

400

catch(const std::system_error &e)

401

{

402

std::cerr << "Caught system_error with code " << e.code() << " meaning " << e.what() << '\n';

403

}

Michalis Spyrou

2018-11-30 16:30:43 +0000

[diff] [blame]

404

#endif /* ARM_COMPUTE_EXCEPTIONS_DISABLED */

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

405

}

Vidhya Sudhan Loganathan

d646ae1

2018-11-19 15:18:20 +0000

[diff] [blame]

406

#endif /* DOXYGEN_SKIP_THIS */

Anthony Barbier

2018-05-25 13:32:10 +0100

[diff] [blame]

407

Michalis Spyrou

ce0c675

2020-06-18 10:14:57 +0100

[diff] [blame]

408

void CPPScheduler::schedule_common(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

409

{

410

ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel");

411

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame]

412

const Window &max_window = kernel->window();

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

413

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

414

if(hints.split_dimension() == IScheduler::split_dimensions_all)

Moritz Pflanzer

2fd5d95

2017-09-24 12:10:46 +0100

[diff] [blame]

415

{

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

416

/*

417

* if the split dim is size_t max then this signals we should parallelise over

418

* all dimensions

419

*/

420

const std::size_t m = max_window.num_iterations(Window::DimX);

421

const std::size_t n = max_window.num_iterations(Window::DimY);

Moritz Pflanzer

2fd5d95

2017-09-24 12:10:46 +0100

[diff] [blame]

422

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame]

423

//in c++17 this can be swapped for auto [ m_threads, n_threads ] = split_2d(...

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

424

unsigned m_threads, n_threads;

425

std::tie(m_threads, n_threads) = split_2d(_impl->_num_threads, m, n);

426

427

std::vector<IScheduler::Workload> workloads;

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame]

428

for(unsigned int ni = 0; ni != n_threads; ++ni)

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

429

{

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame]

430

for(unsigned int mi = 0; mi != m_threads; ++mi)

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

431

{

432

workloads.push_back(

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame]

433

[ni, mi, m_threads, n_threads, &max_window, &kernel](const ThreadInfo & info)

434

{

435

//narrow the window to our mi-ni workload

436

Window win = max_window.split_window(Window::DimX, mi, m_threads)

437

.split_window(Window::DimY, ni, n_threads);

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

438

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame]

439

win.validate();

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

440

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame]

441

Window thread_locator;

442

thread_locator.set(Window::DimX, Window::Dimension(mi, m_threads));

443

thread_locator.set(Window::DimY, Window::Dimension(ni, n_threads));

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

444

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame]

445

thread_locator.validate();

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

446

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame]

447

kernel->run_nd(win, info, thread_locator);

448

});

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

449

}

450

}

451

run_workloads(workloads);

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

452

}

453

else

454

{

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

455

const unsigned int num_iterations = max_window.num_iterations(hints.split_dimension());

456

const unsigned int num_threads = std::min(num_iterations, _impl->_num_threads);

457

458

if(num_iterations == 0)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

459

{

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

return;

}

if(!kernel->is_parallelisable() || num_threads == 1)

464

{

465

ThreadInfo info;

466

info.cpu_info = &_cpu_info;

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame]

467

if(inputs.empty())

468

{

469

kernel->run(max_window, info);

}

else

{

kernel->run_op(inputs, outputs, max_window, info);

474

}

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

}

else

{

unsigned int num_windows = 0;

479

switch(hints.strategy())

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

480

{

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

481

case StrategyHint::STATIC:

482

num_windows = num_threads;

483

break;

484

case StrategyHint::DYNAMIC:

485

{

486

const unsigned int granule_threshold = (hints.threshold() <= 0) ? num_threads : static_cast<unsigned int>(hints.threshold());

487

// Make sure we don't use some windows which are too small as this might create some contention on the ThreadFeeder

488

num_windows = num_iterations > granule_threshold ? granule_threshold : num_iterations;

break;

}

default:

ARM_COMPUTE_ERROR("Unknown strategy");

Anthony Barbier

376c85f

2018-05-25 14:17:21 +0100

[diff] [blame]

493

}

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

494

std::vector<IScheduler::Workload> workloads(num_windows);

495

for(unsigned int t = 0; t < num_windows; t++)

Anthony Barbier

376c85f

2018-05-25 14:17:21 +0100

[diff] [blame]

496

{

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

497

//Capture 't' by copy, all the other variables by reference:

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame]

498

workloads[t] = [t, &hints, &max_window, &num_windows, &kernel, &inputs, &outputs](const ThreadInfo & info)

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

499

{

500

Window win = max_window.split_window(hints.split_dimension(), t, num_windows);

501

win.validate();

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame]

if(inputs.empty())

{

kernel->run(win, info);

}

else

{

kernel->run_op(inputs, outputs, win, info);

510

}

Joseph Dobson

2020-02-11 19:32:11 +0000

[diff] [blame]

511

};

512

}

513

run_workloads(workloads);

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

514

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

515

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

516

}

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame]

517

Michalis Spyrou

ce0c675

2020-06-18 10:14:57 +0100

[diff] [blame]

518

void CPPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs)

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame]

519

{

520

schedule_common(kernel, hints, inputs, outputs);

521

}

522

523

void CPPScheduler::schedule(ICPPKernel *kernel, const Hints &hints)

524

{

Michalis Spyrou

ce0c675

2020-06-18 10:14:57 +0100

[diff] [blame]

525

const InputTensorMap inputs;

526

OutputTensorMap outputs;

Michalis Spyrou

2020-05-21 15:02:36 +0100

[diff] [blame]

527

schedule_common(kernel, hints, inputs, outputs);

528

}

Moritz Pflanzer