blob: 5916bb46fde1ffee189abc8fdb3df437c5a31f7f [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Georgios Pinitas53d12272018-02-01 20:23:25 +00002 * Copyright (c) 2016-2018 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/runtime/CPP/CPPScheduler.h"
25
26#include "arm_compute/core/CPP/ICPPKernel.h"
27#include "arm_compute/core/Error.h"
28#include "arm_compute/core/Helpers.h"
29#include "arm_compute/core/Utils.h"
Pablo Tello7fad9b12018-03-14 17:55:27 +000030#include "arm_compute/runtime/CPUUtils.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010031
Anthony Barbierd89940e2018-06-28 13:39:35 +010032#include <atomic>
Moritz Pflanzerff06f202017-09-08 13:48:23 +010033#include <condition_variable>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010034#include <iostream>
Moritz Pflanzerff06f202017-09-08 13:48:23 +010035#include <mutex>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010036#include <system_error>
37#include <thread>
38
Moritz Pflanzerff06f202017-09-08 13:48:23 +010039namespace arm_compute
40{
Anthony Barbier52ecb062018-05-25 13:32:10 +010041namespace
42{
43class ThreadFeeder
44{
45public:
46 /** Constructor
47 *
48 * @param[in] start First value that will be returned by the feeder
49 * @param[in] end End condition (The last value returned by get_next() will be end - 1)
50 */
51 explicit ThreadFeeder(unsigned int start = 0, unsigned int end = 0)
Anthony Barbierd89940e2018-06-28 13:39:35 +010052 : _atomic_counter(start), _end(end)
Anthony Barbier52ecb062018-05-25 13:32:10 +010053 {
54 }
55 /** Return the next element in the range if there is one.
56 *
57 * @param[out] next Will contain the next element if there is one.
58 *
59 * @return False if the end of the range has been reached and next wasn't set.
60 */
61 bool get_next(unsigned int &next)
62 {
Anthony Barbierd89940e2018-06-28 13:39:35 +010063 next = atomic_fetch_add_explicit(&_atomic_counter, 1u, std::memory_order_relaxed);
64 return next < _end;
Anthony Barbier52ecb062018-05-25 13:32:10 +010065 }
66
67private:
Anthony Barbierd89940e2018-06-28 13:39:35 +010068 std::atomic_uint _atomic_counter;
Anthony Barbier52ecb062018-05-25 13:32:10 +010069 const unsigned int _end;
Anthony Barbier52ecb062018-05-25 13:32:10 +010070};
71
72/** Execute workloads[info.thread_id] first, then call the feeder to get the index of the next workload to run.
73 *
74 * Will run workloads until the feeder reaches the end of its range.
75 *
76 * @param[in] workloads The array of workloads
77 * @param[in,out] feeder The feeder indicating which workload to execute next.
78 * @param[in] info Threading and CPU info.
79 */
80void process_workloads(std::vector<IScheduler::Workload> &workloads, ThreadFeeder &feeder, const ThreadInfo &info)
81{
82 unsigned int workload_index = info.thread_id;
83 do
84 {
85 ARM_COMPUTE_ERROR_ON(workload_index >= workloads.size());
86 workloads[workload_index](info);
87 }
88 while(feeder.get_next(workload_index));
89}
90
91} //namespace
92
93class CPPScheduler::Thread
Anthony Barbier6ff3b192017-09-04 18:44:23 +010094{
95public:
Moritz Pflanzerff06f202017-09-08 13:48:23 +010096 /** Start a new thread. */
Anthony Barbier6ff3b192017-09-04 18:44:23 +010097 Thread();
Moritz Pflanzerff06f202017-09-08 13:48:23 +010098
Anthony Barbier6ff3b192017-09-04 18:44:23 +010099 Thread(const Thread &) = delete;
100 Thread &operator=(const Thread &) = delete;
101 Thread(Thread &&) = delete;
102 Thread &operator=(Thread &&) = delete;
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100103
104 /** Destructor. Make the thread join. */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100105 ~Thread();
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100106
Anthony Barbier52ecb062018-05-25 13:32:10 +0100107 /** Request the worker thread to start executing workloads.
108 *
109 * The thread will start by executing workloads[info.thread_id] and will then call the feeder to
110 * get the index of the following workload to run.
111 *
112 * @note This function will return as soon as the workloads have been sent to the worker thread.
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100113 * wait() needs to be called to ensure the execution is complete.
114 */
Anthony Barbier52ecb062018-05-25 13:32:10 +0100115 void start(std::vector<IScheduler::Workload> *workloads, ThreadFeeder &feeder, const ThreadInfo &info);
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100116
117 /** Wait for the current kernel execution to complete. */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100118 void wait();
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100119
120 /** Function ran by the worker thread. */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100121 void worker_thread();
122
123private:
Anthony Barbier52ecb062018-05-25 13:32:10 +0100124 std::thread _thread{};
125 ThreadInfo _info{};
126 std::vector<IScheduler::Workload> *_workloads{ nullptr };
127 ThreadFeeder *_feeder{ nullptr };
128 std::mutex _m{};
129 std::condition_variable _cv{};
130 bool _wait_for_work{ false };
131 bool _job_complete{ true };
132 std::exception_ptr _current_exception{ nullptr };
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100133};
134
Anthony Barbier52ecb062018-05-25 13:32:10 +0100135CPPScheduler::Thread::Thread()
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100136{
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100137 _thread = std::thread(&Thread::worker_thread, this);
138}
139
Anthony Barbier52ecb062018-05-25 13:32:10 +0100140CPPScheduler::Thread::~Thread()
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100141{
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100142 // Make sure worker thread has ended
143 if(_thread.joinable())
144 {
Anthony Barbier52ecb062018-05-25 13:32:10 +0100145 ThreadFeeder feeder;
146 start(nullptr, feeder, ThreadInfo());
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100147 _thread.join();
148 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100149}
150
Anthony Barbier52ecb062018-05-25 13:32:10 +0100151void CPPScheduler::Thread::start(std::vector<IScheduler::Workload> *workloads, ThreadFeeder &feeder, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100152{
Anthony Barbier52ecb062018-05-25 13:32:10 +0100153 _workloads = workloads;
154 _feeder = &feeder;
155 _info = info;
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100156 {
157 std::lock_guard<std::mutex> lock(_m);
158 _wait_for_work = true;
159 _job_complete = false;
160 }
161 _cv.notify_one();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100162}
163
Anthony Barbier52ecb062018-05-25 13:32:10 +0100164void CPPScheduler::Thread::wait()
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100165{
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100166 {
167 std::unique_lock<std::mutex> lock(_m);
168 _cv.wait(lock, [&] { return _job_complete; });
169 }
170
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100171 if(_current_exception)
172 {
173 std::rethrow_exception(_current_exception);
174 }
175}
176
Anthony Barbier52ecb062018-05-25 13:32:10 +0100177void CPPScheduler::Thread::worker_thread()
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100178{
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100179 while(true)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100180 {
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100181 std::unique_lock<std::mutex> lock(_m);
182 _cv.wait(lock, [&] { return _wait_for_work; });
183 _wait_for_work = false;
184
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100185 _current_exception = nullptr;
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100186
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100187 // Time to exit
Anthony Barbier52ecb062018-05-25 13:32:10 +0100188 if(_workloads == nullptr)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100189 {
190 return;
191 }
192
Michalis Spyrou323ce0f2018-11-30 16:30:43 +0000193#ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100194 try
195 {
Michalis Spyrou323ce0f2018-11-30 16:30:43 +0000196#endif /* ARM_COMPUTE_EXCEPTIONS_ENABLED */
Anthony Barbier52ecb062018-05-25 13:32:10 +0100197 process_workloads(*_workloads, *_feeder, _info);
Michalis Spyrou323ce0f2018-11-30 16:30:43 +0000198
199#ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100200 }
201 catch(...)
202 {
203 _current_exception = std::current_exception();
204 }
Michalis Spyrou323ce0f2018-11-30 16:30:43 +0000205#endif /* ARM_COMPUTE_EXCEPTIONS_DISABLED */
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100206 _job_complete = true;
207 lock.unlock();
208 _cv.notify_one();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100209 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100210}
211
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100212CPPScheduler &CPPScheduler::get()
213{
214 static CPPScheduler scheduler;
215 return scheduler;
216}
217
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100218CPPScheduler::CPPScheduler()
Georgios Pinitas53d12272018-02-01 20:23:25 +0000219 : _num_threads(num_threads_hint()),
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100220 _threads(_num_threads - 1)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100221{
222}
223
224void CPPScheduler::set_num_threads(unsigned int num_threads)
225{
Anthony Barbierce876122018-02-22 12:44:15 +0000226 _num_threads = num_threads == 0 ? num_threads_hint() : num_threads;
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100227 _threads.resize(_num_threads - 1);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100228}
229
Moritz Pflanzerd929b9c2017-06-28 10:15:48 +0100230unsigned int CPPScheduler::num_threads() const
231{
232 return _num_threads;
233}
234
Vidhya Sudhan Loganathand646ae12018-11-19 15:18:20 +0000235#ifndef DOXYGEN_SKIP_THIS
Anthony Barbier52ecb062018-05-25 13:32:10 +0100236void CPPScheduler::run_workloads(std::vector<IScheduler::Workload> &workloads)
237{
238 const unsigned int num_threads = std::min(_num_threads, static_cast<unsigned int>(workloads.size()));
239 if(num_threads < 1)
240 {
241 return;
242 }
243 ThreadFeeder feeder(num_threads, workloads.size());
244 ThreadInfo info;
245 info.cpu_info = &_cpu_info;
246 info.num_threads = num_threads;
247 unsigned int t = 0;
248 auto thread_it = _threads.begin();
249 for(; t < num_threads - 1; ++t, ++thread_it)
250 {
251 info.thread_id = t;
252 thread_it->start(&workloads, feeder, info);
253 }
254
255 info.thread_id = t;
256 process_workloads(workloads, feeder, info);
Michalis Spyrou323ce0f2018-11-30 16:30:43 +0000257#ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED
Anthony Barbier52ecb062018-05-25 13:32:10 +0100258 try
259 {
Michalis Spyrou323ce0f2018-11-30 16:30:43 +0000260#endif /* ARM_COMPUTE_EXCEPTIONS_DISABLED */
Anthony Barbier52ecb062018-05-25 13:32:10 +0100261 for(auto &thread : _threads)
262 {
263 thread.wait();
264 }
Michalis Spyrou323ce0f2018-11-30 16:30:43 +0000265#ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED
Anthony Barbier52ecb062018-05-25 13:32:10 +0100266 }
267 catch(const std::system_error &e)
268 {
269 std::cerr << "Caught system_error with code " << e.code() << " meaning " << e.what() << '\n';
270 }
Michalis Spyrou323ce0f2018-11-30 16:30:43 +0000271#endif /* ARM_COMPUTE_EXCEPTIONS_DISABLED */
Anthony Barbier52ecb062018-05-25 13:32:10 +0100272}
Vidhya Sudhan Loganathand646ae12018-11-19 15:18:20 +0000273#endif /* DOXYGEN_SKIP_THIS */
Anthony Barbier52ecb062018-05-25 13:32:10 +0100274
Anthony Barbier376c85f2018-05-25 14:17:21 +0100275void CPPScheduler::schedule(ICPPKernel *kernel, const Hints &hints)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100276{
277 ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel");
278
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100279 const Window &max_window = kernel->window();
Anthony Barbier376c85f2018-05-25 14:17:21 +0100280 const unsigned int num_iterations = max_window.num_iterations(hints.split_dimension());
Anthony Barbier52ecb062018-05-25 13:32:10 +0100281 const unsigned int num_threads = std::min(num_iterations, _num_threads);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100282
Moritz Pflanzer2fd5d952017-09-24 12:10:46 +0100283 if(num_iterations == 0)
284 {
285 return;
286 }
287
Anthony Barbier52ecb062018-05-25 13:32:10 +0100288 if(!kernel->is_parallelisable() || num_threads == 1)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100289 {
Anthony Barbier52ecb062018-05-25 13:32:10 +0100290 ThreadInfo info;
291 info.cpu_info = &_cpu_info;
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100292 kernel->run(max_window, info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100293 }
294 else
295 {
Anthony Barbier376c85f2018-05-25 14:17:21 +0100296 unsigned int num_windows = 0;
297 switch(hints.strategy())
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100298 {
Anthony Barbier376c85f2018-05-25 14:17:21 +0100299 case StrategyHint::STATIC:
300 num_windows = num_threads;
301 break;
302 case StrategyHint::DYNAMIC:
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100303 {
Anthony Barbier376c85f2018-05-25 14:17:21 +0100304 // Make sure we don't use some windows which are too small as this might create some contention on the ThreadFeeder
305 const unsigned int max_iterations = static_cast<unsigned int>(_num_threads) * 3;
306 num_windows = num_iterations > max_iterations ? max_iterations : num_iterations;
307 break;
308 }
309 default:
310 ARM_COMPUTE_ERROR("Unknown strategy");
311 }
312 std::vector<IScheduler::Workload> workloads(num_windows);
313 for(unsigned int t = 0; t < num_windows; t++)
314 {
315 //Capture 't' by copy, all the other variables by reference:
316 workloads[t] = [t, &hints, &max_window, &num_windows, &kernel](const ThreadInfo & info)
317 {
318 Window win = max_window.split_window(hints.split_dimension(), t, num_windows);
Anthony Barbier52ecb062018-05-25 13:32:10 +0100319 win.validate();
320 kernel->run(win, info);
321 };
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100322 }
Anthony Barbier52ecb062018-05-25 13:32:10 +0100323 run_workloads(workloads);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100324 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100325}
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100326} // namespace arm_compute