blob: 0da9892cb2b9eb835d2b82cde57c00bb81db2d72 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Georgios Pinitas53d12272018-02-01 20:23:25 +00002 * Copyright (c) 2016-2018 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/runtime/CPP/CPPScheduler.h"
25
26#include "arm_compute/core/CPP/ICPPKernel.h"
27#include "arm_compute/core/Error.h"
28#include "arm_compute/core/Helpers.h"
29#include "arm_compute/core/Utils.h"
Pablo Tello7fad9b12018-03-14 17:55:27 +000030#include "arm_compute/runtime/CPUUtils.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010031
Moritz Pflanzerff06f202017-09-08 13:48:23 +010032#include <condition_variable>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010033#include <iostream>
Moritz Pflanzerff06f202017-09-08 13:48:23 +010034#include <mutex>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010035#include <system_error>
36#include <thread>
37
Moritz Pflanzerff06f202017-09-08 13:48:23 +010038namespace arm_compute
39{
Anthony Barbier52ecb062018-05-25 13:32:10 +010040namespace
41{
42class ThreadFeeder
43{
44public:
45 /** Constructor
46 *
47 * @param[in] start First value that will be returned by the feeder
48 * @param[in] end End condition (The last value returned by get_next() will be end - 1)
49 */
50 explicit ThreadFeeder(unsigned int start = 0, unsigned int end = 0)
51 : _current(start), _end(end), _m()
52 {
53 }
54 /** Return the next element in the range if there is one.
55 *
56 * @param[out] next Will contain the next element if there is one.
57 *
58 * @return False if the end of the range has been reached and next wasn't set.
59 */
60 bool get_next(unsigned int &next)
61 {
62 std::lock_guard<std::mutex> lock(_m);
63 if(_current < _end)
64 {
65 next = _current;
66 _current++;
67 return true;
68 }
69 return false;
70 }
71
72private:
73 unsigned int _current;
74 const unsigned int _end;
75 std::mutex _m;
76};
77
78/** Execute workloads[info.thread_id] first, then call the feeder to get the index of the next workload to run.
79 *
80 * Will run workloads until the feeder reaches the end of its range.
81 *
82 * @param[in] workloads The array of workloads
83 * @param[in,out] feeder The feeder indicating which workload to execute next.
84 * @param[in] info Threading and CPU info.
85 */
86void process_workloads(std::vector<IScheduler::Workload> &workloads, ThreadFeeder &feeder, const ThreadInfo &info)
87{
88 unsigned int workload_index = info.thread_id;
89 do
90 {
91 ARM_COMPUTE_ERROR_ON(workload_index >= workloads.size());
92 workloads[workload_index](info);
93 }
94 while(feeder.get_next(workload_index));
95}
96
97} //namespace
98
99class CPPScheduler::Thread
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100100{
101public:
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100102 /** Start a new thread. */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100103 Thread();
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100104
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100105 Thread(const Thread &) = delete;
106 Thread &operator=(const Thread &) = delete;
107 Thread(Thread &&) = delete;
108 Thread &operator=(Thread &&) = delete;
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100109
110 /** Destructor. Make the thread join. */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100111 ~Thread();
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100112
Anthony Barbier52ecb062018-05-25 13:32:10 +0100113 /** Request the worker thread to start executing workloads.
114 *
115 * The thread will start by executing workloads[info.thread_id] and will then call the feeder to
116 * get the index of the following workload to run.
117 *
118 * @note This function will return as soon as the workloads have been sent to the worker thread.
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100119 * wait() needs to be called to ensure the execution is complete.
120 */
Anthony Barbier52ecb062018-05-25 13:32:10 +0100121 void start(std::vector<IScheduler::Workload> *workloads, ThreadFeeder &feeder, const ThreadInfo &info);
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100122
123 /** Wait for the current kernel execution to complete. */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100124 void wait();
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100125
126 /** Function ran by the worker thread. */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100127 void worker_thread();
128
129private:
Anthony Barbier52ecb062018-05-25 13:32:10 +0100130 std::thread _thread{};
131 ThreadInfo _info{};
132 std::vector<IScheduler::Workload> *_workloads{ nullptr };
133 ThreadFeeder *_feeder{ nullptr };
134 std::mutex _m{};
135 std::condition_variable _cv{};
136 bool _wait_for_work{ false };
137 bool _job_complete{ true };
138 std::exception_ptr _current_exception{ nullptr };
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100139};
140
Anthony Barbier52ecb062018-05-25 13:32:10 +0100141CPPScheduler::Thread::Thread()
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100142{
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100143 _thread = std::thread(&Thread::worker_thread, this);
144}
145
Anthony Barbier52ecb062018-05-25 13:32:10 +0100146CPPScheduler::Thread::~Thread()
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100147{
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100148 // Make sure worker thread has ended
149 if(_thread.joinable())
150 {
Anthony Barbier52ecb062018-05-25 13:32:10 +0100151 ThreadFeeder feeder;
152 start(nullptr, feeder, ThreadInfo());
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100153 _thread.join();
154 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100155}
156
Anthony Barbier52ecb062018-05-25 13:32:10 +0100157void CPPScheduler::Thread::start(std::vector<IScheduler::Workload> *workloads, ThreadFeeder &feeder, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100158{
Anthony Barbier52ecb062018-05-25 13:32:10 +0100159 _workloads = workloads;
160 _feeder = &feeder;
161 _info = info;
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100162 {
163 std::lock_guard<std::mutex> lock(_m);
164 _wait_for_work = true;
165 _job_complete = false;
166 }
167 _cv.notify_one();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100168}
169
Anthony Barbier52ecb062018-05-25 13:32:10 +0100170void CPPScheduler::Thread::wait()
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100171{
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100172 {
173 std::unique_lock<std::mutex> lock(_m);
174 _cv.wait(lock, [&] { return _job_complete; });
175 }
176
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100177 if(_current_exception)
178 {
179 std::rethrow_exception(_current_exception);
180 }
181}
182
Anthony Barbier52ecb062018-05-25 13:32:10 +0100183void CPPScheduler::Thread::worker_thread()
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100184{
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100185 while(true)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100186 {
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100187 std::unique_lock<std::mutex> lock(_m);
188 _cv.wait(lock, [&] { return _wait_for_work; });
189 _wait_for_work = false;
190
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100191 _current_exception = nullptr;
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100192
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100193 // Time to exit
Anthony Barbier52ecb062018-05-25 13:32:10 +0100194 if(_workloads == nullptr)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100195 {
196 return;
197 }
198
199 try
200 {
Anthony Barbier52ecb062018-05-25 13:32:10 +0100201 process_workloads(*_workloads, *_feeder, _info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100202 }
203 catch(...)
204 {
205 _current_exception = std::current_exception();
206 }
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100207
208 _job_complete = true;
209 lock.unlock();
210 _cv.notify_one();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100211 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100212}
213
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100214CPPScheduler &CPPScheduler::get()
215{
216 static CPPScheduler scheduler;
217 return scheduler;
218}
219
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100220CPPScheduler::CPPScheduler()
Georgios Pinitas53d12272018-02-01 20:23:25 +0000221 : _num_threads(num_threads_hint()),
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100222 _threads(_num_threads - 1)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100223{
Pablo Tello7fad9b12018-03-14 17:55:27 +0000224 get_cpu_configuration(_cpu_info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100225}
226
227void CPPScheduler::set_num_threads(unsigned int num_threads)
228{
Anthony Barbierce876122018-02-22 12:44:15 +0000229 _num_threads = num_threads == 0 ? num_threads_hint() : num_threads;
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100230 _threads.resize(_num_threads - 1);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100231}
232
Moritz Pflanzerd929b9c2017-06-28 10:15:48 +0100233unsigned int CPPScheduler::num_threads() const
234{
235 return _num_threads;
236}
237
Anthony Barbier52ecb062018-05-25 13:32:10 +0100238void CPPScheduler::run_workloads(std::vector<IScheduler::Workload> &workloads)
239{
240 const unsigned int num_threads = std::min(_num_threads, static_cast<unsigned int>(workloads.size()));
241 if(num_threads < 1)
242 {
243 return;
244 }
245 ThreadFeeder feeder(num_threads, workloads.size());
246 ThreadInfo info;
247 info.cpu_info = &_cpu_info;
248 info.num_threads = num_threads;
249 unsigned int t = 0;
250 auto thread_it = _threads.begin();
251 for(; t < num_threads - 1; ++t, ++thread_it)
252 {
253 info.thread_id = t;
254 thread_it->start(&workloads, feeder, info);
255 }
256
257 info.thread_id = t;
258 process_workloads(workloads, feeder, info);
259
260 try
261 {
262 for(auto &thread : _threads)
263 {
264 thread.wait();
265 }
266 }
267 catch(const std::system_error &e)
268 {
269 std::cerr << "Caught system_error with code " << e.code() << " meaning " << e.what() << '\n';
270 }
271}
272
Anthony Barbier376c85f2018-05-25 14:17:21 +0100273void CPPScheduler::schedule(ICPPKernel *kernel, const Hints &hints)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100274{
275 ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel");
276
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100277 const Window &max_window = kernel->window();
Anthony Barbier376c85f2018-05-25 14:17:21 +0100278 const unsigned int num_iterations = max_window.num_iterations(hints.split_dimension());
Anthony Barbier52ecb062018-05-25 13:32:10 +0100279 const unsigned int num_threads = std::min(num_iterations, _num_threads);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100280
Moritz Pflanzer2fd5d952017-09-24 12:10:46 +0100281 if(num_iterations == 0)
282 {
283 return;
284 }
285
Anthony Barbier52ecb062018-05-25 13:32:10 +0100286 if(!kernel->is_parallelisable() || num_threads == 1)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100287 {
Anthony Barbier52ecb062018-05-25 13:32:10 +0100288 ThreadInfo info;
289 info.cpu_info = &_cpu_info;
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100290 kernel->run(max_window, info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100291 }
292 else
293 {
Anthony Barbier376c85f2018-05-25 14:17:21 +0100294 unsigned int num_windows = 0;
295 switch(hints.strategy())
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100296 {
Anthony Barbier376c85f2018-05-25 14:17:21 +0100297 case StrategyHint::STATIC:
298 num_windows = num_threads;
299 break;
300 case StrategyHint::DYNAMIC:
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100301 {
Anthony Barbier376c85f2018-05-25 14:17:21 +0100302 // Make sure we don't use some windows which are too small as this might create some contention on the ThreadFeeder
303 const unsigned int max_iterations = static_cast<unsigned int>(_num_threads) * 3;
304 num_windows = num_iterations > max_iterations ? max_iterations : num_iterations;
305 break;
306 }
307 default:
308 ARM_COMPUTE_ERROR("Unknown strategy");
309 }
310 std::vector<IScheduler::Workload> workloads(num_windows);
311 for(unsigned int t = 0; t < num_windows; t++)
312 {
313 //Capture 't' by copy, all the other variables by reference:
314 workloads[t] = [t, &hints, &max_window, &num_windows, &kernel](const ThreadInfo & info)
315 {
316 Window win = max_window.split_window(hints.split_dimension(), t, num_windows);
Anthony Barbier52ecb062018-05-25 13:32:10 +0100317 win.validate();
318 kernel->run(win, info);
319 };
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100320 }
Anthony Barbier52ecb062018-05-25 13:32:10 +0100321 run_workloads(workloads);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100322 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100323}
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100324} // namespace arm_compute