blob: 03f099d57b5d419a935d7cee95f318588664c555 [file] [log] [blame]
Pablo Telloeb82fd22018-02-23 13:43:50 +00001/*
2 * Copyright (c) 2017-2018 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#pragma once
25
26#include <cstdlib>
27#include <vector>
28
29#ifndef NO_MULTI_THREADING
30#include <atomic>
31#include <mutex>
32
33#define USE_SEMAPHORE
34
35#ifdef USE_SEMAPHORE
36#include <condition_variable>
37#endif
38
39#endif
40
Anthony Barbier5f707732018-07-03 16:22:02 +010041namespace arm_gemm {
42
Pablo Telloeb82fd22018-02-23 13:43:50 +000043#ifndef NO_MULTI_THREADING
Anthony Barbier5f707732018-07-03 16:22:02 +010044enum class BufferStatus {
Pablo Telloeb82fd22018-02-23 13:43:50 +000045 IDLE,
46 POPULATING,
47 BUSY
48};
49
Anthony Barbier5f707732018-07-03 16:22:02 +010050class Buffer {
Pablo Telloeb82fd22018-02-23 13:43:50 +000051private:
Anthony Barbier5f707732018-07-03 16:22:02 +010052 const int _maxusers; // Maximum permissible threads.
53 void * const _storage; // Storage for buffer content.
Pablo Telloeb82fd22018-02-23 13:43:50 +000054
Anthony Barbier5f707732018-07-03 16:22:02 +010055 int _numusers; // Actual number of threads (might be lower).
Pablo Telloeb82fd22018-02-23 13:43:50 +000056
Anthony Barbier5f707732018-07-03 16:22:02 +010057 volatile BufferStatus _status = BufferStatus::IDLE; // Status
58 std::atomic_int _users = { }; // How many users are still using the buffer.
59 volatile int _index = 0; // Which block of data currently resides in the buffer.
Pablo Telloeb82fd22018-02-23 13:43:50 +000060
Anthony Barbier5f707732018-07-03 16:22:02 +010061 std::mutex _lock = { };
Pablo Telloeb82fd22018-02-23 13:43:50 +000062#ifdef USE_SEMAPHORE
Anthony Barbier5f707732018-07-03 16:22:02 +010063 std::condition_variable _cv = { };
Pablo Telloeb82fd22018-02-23 13:43:50 +000064#endif
65
66 template <typename T>
Anthony Barbier5f707732018-07-03 16:22:02 +010067 void populate_buffer(T func) {
Pablo Telloeb82fd22018-02-23 13:43:50 +000068 func(_storage);
69
70 /* Now mark it as ready. */
71#ifdef USE_SEMAPHORE
72 {
73 std::unique_lock<std::mutex> ul(_lock);
74 _status = BufferStatus::BUSY;
75 _cv.notify_all();
76 }
77#else
Anthony Barbier5f707732018-07-03 16:22:02 +010078 _status = BufferStatus::BUSY;
Pablo Telloeb82fd22018-02-23 13:43:50 +000079#endif
80 }
81
82public:
83 Buffer(Buffer &) = delete;
Anthony Barbier5f707732018-07-03 16:22:02 +010084 Buffer &operator= (Buffer &) = delete;
Pablo Telloeb82fd22018-02-23 13:43:50 +000085
Anthony Barbier5f707732018-07-03 16:22:02 +010086 Buffer(void *storage, int maxusers) : _maxusers(maxusers), _storage(storage), _numusers(maxusers) {
Pablo Telloeb82fd22018-02-23 13:43:50 +000087 _status = BufferStatus::IDLE;
88 }
89
90 /* Try and populate the given index.
91 * Wait if the buffer is busy with previous index, then:
92 *
93 * If the buffer is idle, grab it and populate it.
94 * If it's already being populated by another thread or is ready, return.
95 */
96 template <typename T>
Anthony Barbier5f707732018-07-03 16:22:02 +010097 void try_populate(const int index, T func) {
98 for (;;) {
Pablo Telloeb82fd22018-02-23 13:43:50 +000099#ifdef USE_SEMAPHORE
100 /* If it's busy with a previous index, wait on the semaphore. */
Anthony Barbier5f707732018-07-03 16:22:02 +0100101 if ((_status == BufferStatus::BUSY) && (_index != index)) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000102 std::unique_lock<std::mutex> ul(_lock);
103
Anthony Barbier5f707732018-07-03 16:22:02 +0100104 if ((_status == BufferStatus::BUSY) && (_index != index)) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000105 _cv.wait(ul);
106 }
107 }
108#endif
109 /* Return if another thread is populating it already. */
Anthony Barbier5f707732018-07-03 16:22:02 +0100110 if ((_index == index) &&
111 ((_status == BufferStatus::POPULATING) || (_status == BufferStatus::BUSY))) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000112 return;
113 }
114
Anthony Barbier5f707732018-07-03 16:22:02 +0100115 if (_status == BufferStatus::IDLE) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000116 std::lock_guard<std::mutex> guard(_lock);
117
118 /* If the buffer is still idle, we can grab it and populate it. */
Anthony Barbier5f707732018-07-03 16:22:02 +0100119 if (_status == BufferStatus::IDLE) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000120 _status = BufferStatus::POPULATING;
Anthony Barbier5f707732018-07-03 16:22:02 +0100121 _index = index;
122 _users = _numusers;
Pablo Telloeb82fd22018-02-23 13:43:50 +0000123 break;
124 }
125 }
126 }
127
128 /* If we get here, fill in the buffer. */
129 populate_buffer(func);
130 }
131
132 template <typename T>
Anthony Barbier5f707732018-07-03 16:22:02 +0100133 void *get(const int index, T func) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000134 // Loop until we achieve something.
Anthony Barbier5f707732018-07-03 16:22:02 +0100135 for (;;) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000136 // If the index is correct and the buffer status is busy then we can
137 // just return the content. No locking is needed here as the index
138 // cannot change (and status cannot change from BUSY) until all
139 // users have finished.
Anthony Barbier5f707732018-07-03 16:22:02 +0100140 if ((_index == index) && (_status == BufferStatus::BUSY)) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000141 return _storage;
142 }
Anthony Barbier5f707732018-07-03 16:22:02 +0100143
144 /* If the buffer still has some previous content, or is being
145 * populated, we can wait with the semaphore. */
Pablo Telloeb82fd22018-02-23 13:43:50 +0000146#ifdef USE_SEMAPHORE
Anthony Barbier5f707732018-07-03 16:22:02 +0100147 if (((_status == BufferStatus::BUSY) && (_index != index)) ||
148 (_status == BufferStatus::POPULATING)) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000149 std::unique_lock<std::mutex> ul(_lock);
150
Anthony Barbier5f707732018-07-03 16:22:02 +0100151 if (((_status == BufferStatus::BUSY) && (_index != index)) ||
152 (_status == BufferStatus::POPULATING)) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000153 _cv.wait(ul);
154 }
155 }
156#endif
157
158 // If it's idle, we need to populate it. The IDLE->POPULATING
159 // transition requires the lock.
Anthony Barbier5f707732018-07-03 16:22:02 +0100160 if (_status == BufferStatus::IDLE) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000161 std::lock_guard<std::mutex> guard(_lock);
162
163 /* If it's still idle, grab it. Otherwise drop through and
164 * we'll do something else next time through the loop. */
Anthony Barbier5f707732018-07-03 16:22:02 +0100165 if (_status == BufferStatus::IDLE) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000166 _status = BufferStatus::POPULATING;
Anthony Barbier5f707732018-07-03 16:22:02 +0100167 _index = index;
168 _users = _numusers;
Pablo Telloeb82fd22018-02-23 13:43:50 +0000169 break;
170 }
171 }
172 }
173
174 /* If we get here we need to populate the buffer. */
175 populate_buffer(func);
176
177 return _storage;
178 }
179
180 /* Threads call this when they have finished processing a buffer. We
181 * simply (atomically) decrement the user count, and if it's hit zero we
182 * flag the buffer as idle.
183 */
Anthony Barbier5f707732018-07-03 16:22:02 +0100184 void release(void) {
185 if (--_users == 0) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000186#ifdef USE_SEMAPHORE
187 std::unique_lock<std::mutex> ul(_lock);
188 _status = BufferStatus::IDLE;
189 /* We notify all waiters as we expect one to do the populating
190 * and any others to go and process and earlier block. */
191 _cv.notify_all();
192#else
193 _status = BufferStatus::IDLE;
194#endif
195 }
196 }
197
198 /* This is called to change the number of users. */
Anthony Barbier5f707732018-07-03 16:22:02 +0100199 void set_numusers(int numusers) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000200 _numusers = std::min(numusers, _maxusers);
201 }
202};
203
Anthony Barbier5f707732018-07-03 16:22:02 +0100204
205class BufferManager {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000206private:
207 /* This has to be a vector of Buffer *, because a Buffer cannot be moved
208 * or copied due to atomic members. */
Anthony Barbier5f707732018-07-03 16:22:02 +0100209 std::vector<Buffer *> _buffers = { };
210 const int _maxthreads;
211 void * const _storage;
Pablo Telloeb82fd22018-02-23 13:43:50 +0000212
213public:
214 BufferManager(BufferManager &) = delete;
Anthony Barbier5f707732018-07-03 16:22:02 +0100215 BufferManager & operator=(BufferManager &) = delete;
Pablo Telloeb82fd22018-02-23 13:43:50 +0000216
217 // Say how much storage is needed.
Anthony Barbier5f707732018-07-03 16:22:02 +0100218 static inline size_t get_storage_requirement(const int maxthreads, const size_t buffersize) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000219 return buffersize * ((maxthreads == 1) ? 1 : 3);
220 }
221
Anthony Barbier5f707732018-07-03 16:22:02 +0100222 BufferManager(const int maxthreads, const size_t buffersize, void *storage) : _maxthreads(maxthreads), _storage(storage) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000223 const int numbuffers = (maxthreads == 1) ? 1 : 3;
224
225 /* We don't need any Buffer objects in single thread mode. */
Anthony Barbier5f707732018-07-03 16:22:02 +0100226 if (_maxthreads == 1) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000227 return;
228 }
229
230 /* Use intptr_t to avoid performing arithmetic on a void * */
231 intptr_t storage_int = reinterpret_cast<intptr_t>(_storage);
232
Anthony Barbier5f707732018-07-03 16:22:02 +0100233 for (int i=0; i<numbuffers; i++) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000234 _buffers.push_back(new Buffer(reinterpret_cast<void *>(storage_int), _maxthreads));
235 storage_int += buffersize;
236 }
237 }
238
Anthony Barbier5f707732018-07-03 16:22:02 +0100239 ~BufferManager() {
240 while (_buffers.size()) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000241 delete _buffers.back();
242 _buffers.pop_back();
243 }
244 }
245
246 template <typename T>
Anthony Barbier5f707732018-07-03 16:22:02 +0100247 void *get(const int index, T func) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000248 /* In single thread mode, we just directly call the populating
249 * function on the (single) buffer, otherwise forward to the
250 * relevant Buffer. */
Anthony Barbier5f707732018-07-03 16:22:02 +0100251 if (_maxthreads==1) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000252 func(_storage);
253 return _storage;
Anthony Barbier5f707732018-07-03 16:22:02 +0100254 } else {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000255 return _buffers[index % _buffers.size()]->get(index, func);
256 }
257 }
258
259 template <typename T>
Anthony Barbier5f707732018-07-03 16:22:02 +0100260 void try_populate(const int index, T func) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000261 /* No need for this in single thread mode. */
Anthony Barbier5f707732018-07-03 16:22:02 +0100262 if (_maxthreads==1) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000263 return;
264 }
265
266 _buffers[index % _buffers.size()]->try_populate(index, func);
267 }
268
Anthony Barbier5f707732018-07-03 16:22:02 +0100269 void release(const int index) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000270 /* No need for this in single thread mode. */
Anthony Barbier5f707732018-07-03 16:22:02 +0100271 if (_maxthreads==1) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000272 return;
273 }
274
275 _buffers[index % _buffers.size()]->release();
276 }
277
Anthony Barbier5f707732018-07-03 16:22:02 +0100278 void set_nthreads(int threads) {
279 if (_maxthreads==1) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000280 return;
281 }
282
Anthony Barbier5f707732018-07-03 16:22:02 +0100283 for(unsigned int i=0; i<_buffers.size(); i++) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000284 _buffers[i]->set_numusers(threads);
285 }
286 }
287};
288
289#else
290
291/* Trivial implementation if threading is disabled at compile time.
292 *
293 * Here, we only need storage for a single buffer. The 'get' method needs
294 * to call the supplied function to populate the buffer and then return it.
295 * All the other methods do nothing.
296 */
297
Anthony Barbier5f707732018-07-03 16:22:02 +0100298class BufferManager {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000299private:
Anthony Barbier5f707732018-07-03 16:22:02 +0100300 void * const _storage;
Pablo Telloeb82fd22018-02-23 13:43:50 +0000301
302public:
303 BufferManager(BufferManager &) = delete;
Anthony Barbier5f707732018-07-03 16:22:02 +0100304 BufferManager & operator=(BufferManager &) = delete;
Pablo Telloeb82fd22018-02-23 13:43:50 +0000305
Anthony Barbier5f707732018-07-03 16:22:02 +0100306 BufferManager(const int maxthreads, const size_t buffersize, void *storage) : _storage(storage) { }
Pablo Telloeb82fd22018-02-23 13:43:50 +0000307
Anthony Barbier5f707732018-07-03 16:22:02 +0100308 ~BufferManager() { }
Pablo Telloeb82fd22018-02-23 13:43:50 +0000309
310 // Say how much storage is needed.
Anthony Barbier5f707732018-07-03 16:22:02 +0100311 static inline size_t get_storage_requirement(const int maxthreads, const size_t buffersize) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000312 return buffersize;
313 }
314
315 template <typename T>
Anthony Barbier5f707732018-07-03 16:22:02 +0100316 void try_populate(const int index, T func) { }
Pablo Telloeb82fd22018-02-23 13:43:50 +0000317
Anthony Barbier5f707732018-07-03 16:22:02 +0100318 void release(const int index) { }
Pablo Telloeb82fd22018-02-23 13:43:50 +0000319
320 template <typename T>
Anthony Barbier5f707732018-07-03 16:22:02 +0100321 void *get(const int index, T func) {
Pablo Telloeb82fd22018-02-23 13:43:50 +0000322 func(_storage);
323 return _storage;
324 }
325
Anthony Barbier5f707732018-07-03 16:22:02 +0100326 void set_nthreads(int) { }
Pablo Telloeb82fd22018-02-23 13:43:50 +0000327};
328
329#endif
330
331} // namespace arm_gemm