diff --git a/docs/combininglock.md b/docs/combininglock.md index c15aa991f..a609d5d11 100644 --- a/docs/combininglock.md +++ b/docs/combininglock.md @@ -13,7 +13,7 @@ To further improve the startup time, we introduced a new [_combining lock_](../s The core idea of the combining lock is that it holds a queue of operations that need to be executed while holding the lock, and then the thread holding the lock can execute multiple threads' operations in a single lock acquisition. This can reduce the amount of cache misses as a single thread performs multiple updates, -and thus overall the system will have fewer cache misses. +and thus overall the system may have fewer cache misses. The rest of this document will describe the combining lock in more detail. @@ -274,12 +274,14 @@ inline void with(CombiningLock& lock, F&& f) while (curr->next.load(std::memory_order_relaxed) == nullptr) ; - // Notify next thread to execute the remaining work. + // Read the next thread auto next = curr->next.load(std::memory_order_acquire); - next->status.store(LockStatus::HEAD, std::memory_order_release); // Notify the current thread that its work has been completed. curr->status.store(LockStatus::DONE, std::memory_order_release); + // Notify the next thread it is the head of the queue, and should + // perform operations from the queue. + next->status.store(LockStatus::HEAD, std::memory_order_release); return; } ``` diff --git a/src/snmalloc/ds/combininglock.h b/src/snmalloc/ds/combininglock.h index fa91cdbc2..338c94b47 100644 --- a/src/snmalloc/ds/combininglock.h +++ b/src/snmalloc/ds/combininglock.h @@ -110,7 +110,7 @@ namespace snmalloc // We could set // status = LockStatus::HEAD - // However, the subsequent state assumes it is Ready, and + // However, the subsequent state assumes it is HEAD, and // nothing would read it. } @@ -119,11 +119,15 @@ namespace snmalloc auto curr = this; while (true) { + // Start pulling in the next element of the queue + auto n = curr->next.load(std::memory_order_acquire); + Aal::prefetch(n); + // Perform work for head of the queue curr->f_raw(curr); // Determine if there are more elements. - auto n = curr->next.load(std::memory_order_acquire); + n = curr->next.load(std::memory_order_acquire); if (n == nullptr) break; // Signal this work was completed and move on to @@ -153,14 +157,16 @@ namespace snmalloc while (curr->next.load(std::memory_order_relaxed) == nullptr) Aal::pause(); + auto n = curr->next.load(std::memory_order_acquire); + // As we had to wait, give the job to the next thread // to carry on performing the work. - auto n = curr->next.load(std::memory_order_acquire); n->set_status(LockStatus::HEAD); // Notify the thread that we completed its work. - // Note that this needs to be done last, as we can't read - // curr->next after setting curr->status + // Note that this needs to be before setting curr->status, + // as after the status is set the thread may deallocate the + // queue node. curr->set_status(LockStatus::DONE); return; }