diff --git a/docs/combininglock.md b/docs/combininglock.md
index c15aa991f..a609d5d11 100644
--- a/docs/combininglock.md
+++ b/docs/combininglock.md
@@ -13,7 +13,7 @@ To further improve the startup time, we introduced a new [_combining lock_](../s
 The core idea of the combining lock is that it holds a queue of operations that need to be executed while holding the lock,
 and then the thread holding the lock can execute multiple threads' operations in a single lock acquisition.
 This can reduce the amount of cache misses as a single thread performs multiple updates,
-and thus overall the system will have fewer cache misses.
+and thus overall the system may have fewer cache misses.
 
 The rest of this document will describe the combining lock in more detail.
 
@@ -274,12 +274,14 @@ inline void with(CombiningLock& lock, F&& f)
   while (curr->next.load(std::memory_order_relaxed) == nullptr)
     ;
 
-  // Notify next thread to execute the remaining work.
+  // Read the next thread
   auto next = curr->next.load(std::memory_order_acquire);
-  next->status.store(LockStatus::HEAD, std::memory_order_release);
 
   // Notify the current thread that its work has been completed.
   curr->status.store(LockStatus::DONE, std::memory_order_release);
+  // Notify the next thread it is the head of the queue, and should
+  // perform operations from the queue.
+  next->status.store(LockStatus::HEAD, std::memory_order_release);
   return;
 }
 ```
diff --git a/src/snmalloc/ds/combininglock.h b/src/snmalloc/ds/combininglock.h
index fa91cdbc2..338c94b47 100644
--- a/src/snmalloc/ds/combininglock.h
+++ b/src/snmalloc/ds/combininglock.h
@@ -110,7 +110,7 @@ namespace snmalloc
 
         // We could set
         //    status = LockStatus::HEAD
-        // However, the subsequent state assumes it is Ready, and
+        // However, the subsequent state assumes it is HEAD, and
         // nothing would read it.
       }
 
@@ -119,11 +119,15 @@ namespace snmalloc
       auto curr = this;
       while (true)
       {
+        // Start pulling in the next element of the queue
+        auto n = curr->next.load(std::memory_order_acquire);
+        Aal::prefetch(n);
+
         // Perform work for head of the queue
         curr->f_raw(curr);
 
         // Determine if there are more elements.
-        auto n = curr->next.load(std::memory_order_acquire);
+        n = curr->next.load(std::memory_order_acquire);
         if (n == nullptr)
           break;
         // Signal this work was completed and move on to
@@ -153,14 +157,16 @@ namespace snmalloc
       while (curr->next.load(std::memory_order_relaxed) == nullptr)
         Aal::pause();
 
+      auto n = curr->next.load(std::memory_order_acquire);
+
       // As we had to wait, give the job to the next thread
       // to carry on performing the work.
-      auto n = curr->next.load(std::memory_order_acquire);
       n->set_status(LockStatus::HEAD);
 
       // Notify the thread that we completed its work.
-      // Note that this needs to be done last, as we can't read
-      // curr->next after setting curr->status
+      // Note that this needs to be before setting curr->status,
+      // as after the status is set the thread may deallocate the
+      // queue node.
       curr->set_status(LockStatus::DONE);
       return;
     }