blob: 2b179fd5ffe6a92155a7151689ed2e8d5ad63d60 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Georgios Pinitas53d12272018-02-01 20:23:25 +00002 * Copyright (c) 2016-2018 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/runtime/CPP/CPPScheduler.h"
25
26#include "arm_compute/core/CPP/ICPPKernel.h"
27#include "arm_compute/core/Error.h"
28#include "arm_compute/core/Helpers.h"
29#include "arm_compute/core/Utils.h"
Pablo Tello7fad9b12018-03-14 17:55:27 +000030#include "arm_compute/runtime/CPUUtils.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010031
Anthony Barbierd89940e2018-06-28 13:39:35 +010032#include <atomic>
Moritz Pflanzerff06f202017-09-08 13:48:23 +010033#include <condition_variable>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010034#include <iostream>
Moritz Pflanzerff06f202017-09-08 13:48:23 +010035#include <mutex>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010036#include <system_error>
37#include <thread>
38
Moritz Pflanzerff06f202017-09-08 13:48:23 +010039namespace arm_compute
40{
Anthony Barbier52ecb062018-05-25 13:32:10 +010041namespace
42{
43class ThreadFeeder
44{
45public:
46 /** Constructor
47 *
48 * @param[in] start First value that will be returned by the feeder
49 * @param[in] end End condition (The last value returned by get_next() will be end - 1)
50 */
51 explicit ThreadFeeder(unsigned int start = 0, unsigned int end = 0)
Anthony Barbierd89940e2018-06-28 13:39:35 +010052 : _atomic_counter(start), _end(end)
Anthony Barbier52ecb062018-05-25 13:32:10 +010053 {
54 }
55 /** Return the next element in the range if there is one.
56 *
57 * @param[out] next Will contain the next element if there is one.
58 *
59 * @return False if the end of the range has been reached and next wasn't set.
60 */
61 bool get_next(unsigned int &next)
62 {
Anthony Barbierd89940e2018-06-28 13:39:35 +010063 next = atomic_fetch_add_explicit(&_atomic_counter, 1u, std::memory_order_relaxed);
64 return next < _end;
Anthony Barbier52ecb062018-05-25 13:32:10 +010065 }
66
67private:
Anthony Barbierd89940e2018-06-28 13:39:35 +010068 std::atomic_uint _atomic_counter;
Anthony Barbier52ecb062018-05-25 13:32:10 +010069 const unsigned int _end;
Anthony Barbier52ecb062018-05-25 13:32:10 +010070};
71
72/** Execute workloads[info.thread_id] first, then call the feeder to get the index of the next workload to run.
73 *
74 * Will run workloads until the feeder reaches the end of its range.
75 *
76 * @param[in] workloads The array of workloads
77 * @param[in,out] feeder The feeder indicating which workload to execute next.
78 * @param[in] info Threading and CPU info.
79 */
80void process_workloads(std::vector<IScheduler::Workload> &workloads, ThreadFeeder &feeder, const ThreadInfo &info)
81{
82 unsigned int workload_index = info.thread_id;
83 do
84 {
85 ARM_COMPUTE_ERROR_ON(workload_index >= workloads.size());
86 workloads[workload_index](info);
87 }
88 while(feeder.get_next(workload_index));
89}
90
91} //namespace
92
93class CPPScheduler::Thread
Anthony Barbier6ff3b192017-09-04 18:44:23 +010094{
95public:
Moritz Pflanzerff06f202017-09-08 13:48:23 +010096 /** Start a new thread. */
Anthony Barbier6ff3b192017-09-04 18:44:23 +010097 Thread();
Moritz Pflanzerff06f202017-09-08 13:48:23 +010098
Anthony Barbier6ff3b192017-09-04 18:44:23 +010099 Thread(const Thread &) = delete;
100 Thread &operator=(const Thread &) = delete;
101 Thread(Thread &&) = delete;
102 Thread &operator=(Thread &&) = delete;
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100103
104 /** Destructor. Make the thread join. */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100105 ~Thread();
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100106
Anthony Barbier52ecb062018-05-25 13:32:10 +0100107 /** Request the worker thread to start executing workloads.
108 *
109 * The thread will start by executing workloads[info.thread_id] and will then call the feeder to
110 * get the index of the following workload to run.
111 *
112 * @note This function will return as soon as the workloads have been sent to the worker thread.
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100113 * wait() needs to be called to ensure the execution is complete.
114 */
Anthony Barbier52ecb062018-05-25 13:32:10 +0100115 void start(std::vector<IScheduler::Workload> *workloads, ThreadFeeder &feeder, const ThreadInfo &info);
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100116
117 /** Wait for the current kernel execution to complete. */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100118 void wait();
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100119
120 /** Function ran by the worker thread. */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100121 void worker_thread();
122
123private:
Anthony Barbier52ecb062018-05-25 13:32:10 +0100124 std::thread _thread{};
125 ThreadInfo _info{};
126 std::vector<IScheduler::Workload> *_workloads{ nullptr };
127 ThreadFeeder *_feeder{ nullptr };
128 std::mutex _m{};
129 std::condition_variable _cv{};
130 bool _wait_for_work{ false };
131 bool _job_complete{ true };
132 std::exception_ptr _current_exception{ nullptr };
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100133};
134
Anthony Barbier52ecb062018-05-25 13:32:10 +0100135CPPScheduler::Thread::Thread()
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100136{
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100137 _thread = std::thread(&Thread::worker_thread, this);
138}
139
Anthony Barbier52ecb062018-05-25 13:32:10 +0100140CPPScheduler::Thread::~Thread()
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100141{
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100142 // Make sure worker thread has ended
143 if(_thread.joinable())
144 {
Anthony Barbier52ecb062018-05-25 13:32:10 +0100145 ThreadFeeder feeder;
146 start(nullptr, feeder, ThreadInfo());
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100147 _thread.join();
148 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100149}
150
Anthony Barbier52ecb062018-05-25 13:32:10 +0100151void CPPScheduler::Thread::start(std::vector<IScheduler::Workload> *workloads, ThreadFeeder &feeder, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100152{
Anthony Barbier52ecb062018-05-25 13:32:10 +0100153 _workloads = workloads;
154 _feeder = &feeder;
155 _info = info;
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100156 {
157 std::lock_guard<std::mutex> lock(_m);
158 _wait_for_work = true;
159 _job_complete = false;
160 }
161 _cv.notify_one();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100162}
163
Anthony Barbier52ecb062018-05-25 13:32:10 +0100164void CPPScheduler::Thread::wait()
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100165{
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100166 {
167 std::unique_lock<std::mutex> lock(_m);
168 _cv.wait(lock, [&] { return _job_complete; });
169 }
170
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100171 if(_current_exception)
172 {
173 std::rethrow_exception(_current_exception);
174 }
175}
176
Anthony Barbier52ecb062018-05-25 13:32:10 +0100177void CPPScheduler::Thread::worker_thread()
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100178{
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100179 while(true)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100180 {
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100181 std::unique_lock<std::mutex> lock(_m);
182 _cv.wait(lock, [&] { return _wait_for_work; });
183 _wait_for_work = false;
184
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100185 _current_exception = nullptr;
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100186
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100187 // Time to exit
Anthony Barbier52ecb062018-05-25 13:32:10 +0100188 if(_workloads == nullptr)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100189 {
190 return;
191 }
192
193 try
194 {
Anthony Barbier52ecb062018-05-25 13:32:10 +0100195 process_workloads(*_workloads, *_feeder, _info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100196 }
197 catch(...)
198 {
199 _current_exception = std::current_exception();
200 }
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100201
202 _job_complete = true;
203 lock.unlock();
204 _cv.notify_one();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100205 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100206}
207
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100208CPPScheduler &CPPScheduler::get()
209{
210 static CPPScheduler scheduler;
211 return scheduler;
212}
213
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100214CPPScheduler::CPPScheduler()
Georgios Pinitas53d12272018-02-01 20:23:25 +0000215 : _num_threads(num_threads_hint()),
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100216 _threads(_num_threads - 1)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100217{
218}
219
220void CPPScheduler::set_num_threads(unsigned int num_threads)
221{
Anthony Barbierce876122018-02-22 12:44:15 +0000222 _num_threads = num_threads == 0 ? num_threads_hint() : num_threads;
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100223 _threads.resize(_num_threads - 1);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100224}
225
Moritz Pflanzerd929b9c2017-06-28 10:15:48 +0100226unsigned int CPPScheduler::num_threads() const
227{
228 return _num_threads;
229}
230
Vidhya Sudhan Loganathand646ae12018-11-19 15:18:20 +0000231#ifndef DOXYGEN_SKIP_THIS
Anthony Barbier52ecb062018-05-25 13:32:10 +0100232void CPPScheduler::run_workloads(std::vector<IScheduler::Workload> &workloads)
233{
234 const unsigned int num_threads = std::min(_num_threads, static_cast<unsigned int>(workloads.size()));
235 if(num_threads < 1)
236 {
237 return;
238 }
239 ThreadFeeder feeder(num_threads, workloads.size());
240 ThreadInfo info;
241 info.cpu_info = &_cpu_info;
242 info.num_threads = num_threads;
243 unsigned int t = 0;
244 auto thread_it = _threads.begin();
245 for(; t < num_threads - 1; ++t, ++thread_it)
246 {
247 info.thread_id = t;
248 thread_it->start(&workloads, feeder, info);
249 }
250
251 info.thread_id = t;
252 process_workloads(workloads, feeder, info);
253
254 try
255 {
256 for(auto &thread : _threads)
257 {
258 thread.wait();
259 }
260 }
261 catch(const std::system_error &e)
262 {
263 std::cerr << "Caught system_error with code " << e.code() << " meaning " << e.what() << '\n';
264 }
265}
Vidhya Sudhan Loganathand646ae12018-11-19 15:18:20 +0000266#endif /* DOXYGEN_SKIP_THIS */
Anthony Barbier52ecb062018-05-25 13:32:10 +0100267
Anthony Barbier376c85f2018-05-25 14:17:21 +0100268void CPPScheduler::schedule(ICPPKernel *kernel, const Hints &hints)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100269{
270 ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel");
271
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100272 const Window &max_window = kernel->window();
Anthony Barbier376c85f2018-05-25 14:17:21 +0100273 const unsigned int num_iterations = max_window.num_iterations(hints.split_dimension());
Anthony Barbier52ecb062018-05-25 13:32:10 +0100274 const unsigned int num_threads = std::min(num_iterations, _num_threads);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100275
Moritz Pflanzer2fd5d952017-09-24 12:10:46 +0100276 if(num_iterations == 0)
277 {
278 return;
279 }
280
Anthony Barbier52ecb062018-05-25 13:32:10 +0100281 if(!kernel->is_parallelisable() || num_threads == 1)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100282 {
Anthony Barbier52ecb062018-05-25 13:32:10 +0100283 ThreadInfo info;
284 info.cpu_info = &_cpu_info;
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100285 kernel->run(max_window, info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100286 }
287 else
288 {
Anthony Barbier376c85f2018-05-25 14:17:21 +0100289 unsigned int num_windows = 0;
290 switch(hints.strategy())
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100291 {
Anthony Barbier376c85f2018-05-25 14:17:21 +0100292 case StrategyHint::STATIC:
293 num_windows = num_threads;
294 break;
295 case StrategyHint::DYNAMIC:
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100296 {
Anthony Barbier376c85f2018-05-25 14:17:21 +0100297 // Make sure we don't use some windows which are too small as this might create some contention on the ThreadFeeder
298 const unsigned int max_iterations = static_cast<unsigned int>(_num_threads) * 3;
299 num_windows = num_iterations > max_iterations ? max_iterations : num_iterations;
300 break;
301 }
302 default:
303 ARM_COMPUTE_ERROR("Unknown strategy");
304 }
305 std::vector<IScheduler::Workload> workloads(num_windows);
306 for(unsigned int t = 0; t < num_windows; t++)
307 {
308 //Capture 't' by copy, all the other variables by reference:
309 workloads[t] = [t, &hints, &max_window, &num_windows, &kernel](const ThreadInfo & info)
310 {
311 Window win = max_window.split_window(hints.split_dimension(), t, num_windows);
Anthony Barbier52ecb062018-05-25 13:32:10 +0100312 win.validate();
313 kernel->run(win, info);
314 };
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100315 }
Anthony Barbier52ecb062018-05-25 13:32:10 +0100316 run_workloads(workloads);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100317 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100318}
Moritz Pflanzerff06f202017-09-08 13:48:23 +0100319} // namespace arm_compute