From 8798151bc06dbe3305531dc194e13099970db5b5 Mon Sep 17 00:00:00 2001
From: Eddy Ashton <edashton@microsoft.com>
Date: Mon, 13 Jan 2025 11:18:23 +0000
Subject: [PATCH] Upgrade snmalloc from 0.6.2 to 0.7.0 (#6746)

---
 3rdparty/exported/snmalloc/CMakeLists.txt     |  67 ++-
 3rdparty/exported/snmalloc/README.md          |   3 +-
 .../exported/snmalloc/src/snmalloc/aal/aal.h  |  43 +-
 .../snmalloc/src/snmalloc/aal/aal_arm.h       |   5 +-
 .../snmalloc/src/snmalloc/aal/aal_cheri.h     |  10 +-
 .../snmalloc/src/snmalloc/aal/aal_concept.h   | 120 +++--
 .../snmalloc/src/snmalloc/aal/aal_x86.h       |   6 +-
 .../snmalloc/src/snmalloc/backend/backend.h   |  52 ++-
 .../src/snmalloc/backend/fixedglobalconfig.h  |  13 +-
 .../src/snmalloc/backend/globalconfig.h       |  72 ++-
 .../snmalloc/backend/meta_protected_range.h   |  13 +-
 .../backend_helpers/backend_helpers.h         |   1 +
 .../src/snmalloc/backend_helpers/buddy.h      | 127 +++--
 .../snmalloc/backend_helpers/commonconfig.h   |  34 ++
 .../backend_helpers/defaultpagemapentry.h     |   9 +-
 .../backend_helpers/largebuddyrange.h         |   6 +-
 .../src/snmalloc/backend_helpers/lockrange.h  |  14 +-
 .../src/snmalloc/backend_helpers/noprange.h   |  36 ++
 .../snmalloc/backend_helpers/range_helpers.h  |   1 -
 .../backend_helpers/smallbuddyrange.h         |   2 +-
 .../exported/snmalloc/src/snmalloc/ds/aba.h   |   7 +-
 .../snmalloc/src/snmalloc/ds/allocconfig.h    | 101 +++-
 .../snmalloc/src/snmalloc/ds/combininglock.h  | 296 ++++++++++++
 .../exported/snmalloc/src/snmalloc/ds/ds.h    |   1 +
 .../snmalloc/src/snmalloc/ds/flaglock.h       |  10 +-
 .../snmalloc/src/snmalloc/ds/mpmcstack.h      |   6 -
 .../snmalloc/src/snmalloc/ds/pagemap.h        |  18 +
 .../snmalloc/src/snmalloc/ds/singleton.h      |  19 +-
 .../snmalloc/src/snmalloc/ds_core/bits.h      |  55 ++-
 .../snmalloc/src/snmalloc/ds_core/defines.h   |  42 +-
 .../snmalloc/src/snmalloc/ds_core/helpers.h   |   7 +-
 .../src/snmalloc/ds_core/mitigations.h        |   8 +-
 .../src/snmalloc/ds_core/redblacktree.h       | 111 ++---
 .../snmalloc/src/snmalloc/ds_core/seqset.h    |   3 +
 .../snmalloc/src/snmalloc/global/global.h     |   1 +
 .../snmalloc/src/snmalloc/global/libc.h       | 191 ++++++++
 .../snmalloc/src/snmalloc/global/memcpy.h     |   6 +-
 .../src/snmalloc/global/scopedalloc.h         |   1 -
 .../src/snmalloc/global/threadalloc.h         |   2 -
 .../src/snmalloc/mem/backend_concept.h        | 212 ++++-----
 .../snmalloc/src/snmalloc/mem/corealloc.h     | 434 ++++++++++++------
 .../snmalloc/src/snmalloc/mem/entropy.h       |  31 +-
 .../src/snmalloc/mem/external_alloc.h         |   3 +
 .../snmalloc/src/snmalloc/mem/freelist.h      | 328 +++++++++----
 .../src/snmalloc/mem/freelist_queue.h         | 193 ++++++++
 .../snmalloc/src/snmalloc/mem/localalloc.h    | 110 ++++-
 .../snmalloc/src/snmalloc/mem/localcache.h    |  17 +-
 .../snmalloc/src/snmalloc/mem/metadata.h      | 146 +++++-
 .../exported/snmalloc/src/snmalloc/mem/pool.h | 156 +++----
 .../snmalloc/src/snmalloc/mem/pooled.h        |  26 +-
 .../src/snmalloc/mem/remoteallocator.h        | 424 ++++++++++++-----
 .../snmalloc/src/snmalloc/mem/remotecache.h   | 236 +++++++++-
 .../src/snmalloc/mem/sizeclasstable.h         |  76 ++-
 .../src/snmalloc/override/jemalloc_compat.cc  |   7 +-
 .../src/snmalloc/override/malloc-extensions.h |   1 +
 .../snmalloc/src/snmalloc/override/malloc.cc  | 176 ++-----
 .../snmalloc/src/snmalloc/override/memcpy.cc  |   2 +-
 .../snmalloc/src/snmalloc/override/new.cc     |  66 ++-
 .../snmalloc/src/snmalloc/override/override.h |   2 +-
 .../snmalloc/src/snmalloc/override/rust.cc    |  20 +-
 .../snmalloc/src/snmalloc/pal/pal_apple.h     | 103 ++++-
 .../snmalloc/src/snmalloc/pal/pal_concept.h   | 149 +++---
 .../snmalloc/src/snmalloc/pal/pal_consts.h    |  12 +-
 .../snmalloc/src/snmalloc/pal/pal_ds.h        |   1 -
 .../snmalloc/src/snmalloc/pal/pal_freebsd.h   |  82 +++-
 .../snmalloc/src/snmalloc/pal/pal_haiku.h     |   9 -
 .../snmalloc/src/snmalloc/pal/pal_linux.h     |  73 ++-
 .../snmalloc/src/snmalloc/pal/pal_netbsd.h    |   1 +
 .../snmalloc/src/snmalloc/pal/pal_noalloc.h   |   2 +-
 .../src/snmalloc/pal/pal_open_enclave.h       |   1 +
 .../snmalloc/src/snmalloc/pal/pal_posix.h     |  14 +-
 .../src/snmalloc/pal/pal_timer_default.h      |   2 -
 .../snmalloc/src/snmalloc/pal/pal_windows.h   |  30 ++
 .../exported/snmalloc/src/snmalloc/snmalloc.h |  18 +-
 .../snmalloc/src/test/func/cheri/cheri.cc     |   3 +-
 .../src/test/func/client_meta/client_meta.cc  |  69 +++
 .../test/func/domestication/domestication.cc  |  13 +-
 .../snmalloc/src/test/func/malloc/malloc.cc   |   2 +-
 .../src/test/func/memcpy/func-memcpy.cc       |   9 +-
 .../snmalloc/src/test/func/memory/memory.cc   |  14 +-
 .../src/test/func/miracle_ptr/miracle_ptr.cc  | 204 ++++++++
 .../snmalloc/src/test/func/pagemap/pagemap.cc |   3 +
 .../snmalloc/src/test/func/pool/pool.cc       |  24 +-
 .../src/test/func/redblack/redblack.cc        |   9 +-
 .../snmalloc/src/test/func/sandbox/sandbox.cc |   5 +-
 .../src/test/func/sizeclass/sizeclass.cc      |  38 +-
 .../src/test/func/statistics/stats.cc         |  14 +-
 .../thread_alloc_external.cc                  |   3 +-
 .../src/test/func/two_alloc_types/alloc1.cc   |   1 +
 .../src/test/func/two_alloc_types/main.cc     |   1 +
 .../src/test/perf/contention/contention.cc    |   4 +-
 .../perf/external_pointer/externalpointer.cc  |   2 +-
 .../snmalloc/src/test/perf/memcpy/memcpy.cc   |   3 +-
 .../snmalloc/src/test/perf/msgpass/msgpass.cc | 307 +++++++++++++
 .../test/perf/singlethread/singlethread.cc    |   4 +-
 .../snmalloc/src/test/perf/startup/startup.cc |  96 ++++
 3rdparty/exported/snmalloc/src/test/setup.h   |   2 +
 CMakeLists.txt                                |   5 +-
 cgmanifest.json                               |   2 +-
 src/host/snmalloc.cpp                         |   2 +
 100 files changed, 4156 insertions(+), 1365 deletions(-)
 create mode 100644 3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/noprange.h
 create mode 100644 3rdparty/exported/snmalloc/src/snmalloc/ds/combininglock.h
 create mode 100644 3rdparty/exported/snmalloc/src/snmalloc/global/libc.h
 create mode 100644 3rdparty/exported/snmalloc/src/snmalloc/mem/freelist_queue.h
 create mode 100644 3rdparty/exported/snmalloc/src/test/func/client_meta/client_meta.cc
 create mode 100644 3rdparty/exported/snmalloc/src/test/func/miracle_ptr/miracle_ptr.cc
 create mode 100644 3rdparty/exported/snmalloc/src/test/perf/msgpass/msgpass.cc
 create mode 100644 3rdparty/exported/snmalloc/src/test/perf/startup/startup.cc

diff --git a/3rdparty/exported/snmalloc/CMakeLists.txt b/3rdparty/exported/snmalloc/CMakeLists.txt
index 1d9cbb6d1bcf..2948e56db461 100644
--- a/3rdparty/exported/snmalloc/CMakeLists.txt
+++ b/3rdparty/exported/snmalloc/CMakeLists.txt
@@ -26,6 +26,9 @@ option(SNMALLOC_NO_REALLOCARR "Build without reallocarr exported" ON)
 option(SNMALLOC_LINK_ICF "Link with Identical Code Folding" ON)
 option(SNMALLOC_IPO "Link with IPO/LTO support" OFF)
 option(SNMALLOC_BENCHMARK_INDIVIDUAL_MITIGATIONS "Build tests and ld_preload for individual mitigations" OFF)
+option(SNMALLOC_ENABLE_DYNAMIC_LOADING "Build such that snmalloc can be dynamically loaded. This is not required for LD_PRELOAD, and will harm performance if enabled." OFF)
+option(SNMALLOC_ENABLE_WAIT_ON_ADDRESS "Use wait on address backoff strategy if it is available" ON)
+option(SNMALLOC_ENABLE_FUZZING "Enable fuzzing instrumentation tests" OFF)
 # Options that apply only if we're not building the header-only library
 cmake_dependent_option(SNMALLOC_RUST_SUPPORT "Build static library for rust" OFF "NOT SNMALLOC_HEADER_ONLY_LIBRARY" OFF)
 cmake_dependent_option(SNMALLOC_STATIC_LIBRARY "Build static libraries" ON "NOT SNMALLOC_HEADER_ONLY_LIBRARY" OFF)
@@ -61,6 +64,18 @@ if (SNMALLOC_SANITIZER)
   message(STATUS "Using sanitizer=${SNMALLOC_SANITIZER}")
 endif()
 
+set(SNMALLOC_MIN_ALLOC_SIZE "" CACHE STRING "Minimum allocation bytes (power of 2)")
+set(SNMALLOC_MIN_ALLOC_STEP_SIZE "" CACHE STRING "Minimum allocation step (power of 2)")
+
+set(SNMALLOC_PAGESIZE "" CACHE STRING "Page size in bytes")
+
+set(SNMALLOC_DEALLOC_BATCH_RING_ASSOC "" CACHE STRING "Associativity of deallocation batch cache; 0 to disable")
+set(SNMALLOC_DEALLOC_BATCH_RING_SET_BITS "" CACHE STRING "Logarithm of number of deallocation batch cache associativity sets")
+
+if(MSVC AND SNMALLOC_STATIC_LIBRARY AND (SNMALLOC_STATIC_LIBRARY_PREFIX STREQUAL ""))
+  message(FATAL_ERROR "Empty static library prefix not supported on MSVC")
+endif()
+
 # If CheckLinkerFlag doesn't exist then provide a dummy implementation that
 # always fails.  The fallback can be removed when we move to CMake 3.18 as the
 # baseline.
@@ -121,6 +136,9 @@ int main() {
 # this is why we check its existence here
 CHECK_INCLUDE_FILE_CXX(linux/random.h SNMALLOC_HAS_LINUX_RANDOM_H)
 
+# check if futex.h is available
+CHECK_INCLUDE_FILE_CXX(linux/futex.h SNMALLOC_HAS_LINUX_FUTEX_H)
+
 # Provide as function so other projects can reuse
 # FIXME: This modifies some variables that may or may not be the ones that
 # provide flags and so is broken by design.  It should be removed once Verona
@@ -150,7 +168,7 @@ function(clangformat_targets)
   # tool.  It does not work with older versions as AfterCaseLabel is not supported
   # in earlier versions.
   find_program(CLANG_FORMAT NAMES
-    clang-format90 clang-format-9)
+    clang-format150 clang-format-15)
 
   # If we've found a clang-format tool, generate a target for it, otherwise emit
   # a warning.
@@ -176,12 +194,27 @@ endfunction()
 add_library(snmalloc INTERFACE)
 
 if(SNMALLOC_USE_CXX17)
-  target_compile_definitions(snmalloc INTERFACE -DSNMALLOC_USE_CXX17)
   target_compile_features(snmalloc INTERFACE cxx_std_17)
 else()
   target_compile_features(snmalloc INTERFACE cxx_std_20)
 endif()
 
+if(SNMALLOC_ENABLE_WAIT_ON_ADDRESS)
+  target_compile_definitions(snmalloc INTERFACE SNMALLOC_USE_WAIT_ON_ADDRESS=1)
+else()
+  target_compile_definitions(snmalloc INTERFACE SNMALLOC_USE_WAIT_ON_ADDRESS=0)
+endif()
+
+# https://learn.microsoft.com/en-us/cpp/build/reference/zc-cplusplus
+if(MSVC)
+  target_compile_options(snmalloc INTERFACE "/Zc:__cplusplus")
+endif()
+
+if (CMAKE_SYSTEM_NAME STREQUAL NetBSD)
+	target_include_directories(snmalloc INTERFACE /usr/pkg/include)
+	target_link_directories(snmalloc INTERFACE /usr/pkg/lib)
+endif()
+
 # Add header paths.
 target_include_directories(snmalloc
   INTERFACE
@@ -221,18 +254,30 @@ endif()
 function(add_as_define FLAG)
   target_compile_definitions(snmalloc INTERFACE $<$<BOOL:${${FLAG}}>:${FLAG}>)
 endfunction()
+function(add_as_define_value KEY)
+  if (NOT ${${KEY}} STREQUAL "")
+    target_compile_definitions(snmalloc INTERFACE ${KEY}=${${KEY}})
+  endif ()
+endfunction()
 
 add_as_define(SNMALLOC_QEMU_WORKAROUND)
 add_as_define(SNMALLOC_TRACING)
 add_as_define(SNMALLOC_CI_BUILD)
 add_as_define(SNMALLOC_PLATFORM_HAS_GETENTROPY)
 add_as_define(SNMALLOC_HAS_LINUX_RANDOM_H)
+add_as_define(SNMALLOC_HAS_LINUX_FUTEX_H)
 if (SNMALLOC_NO_REALLOCARRAY)
   add_as_define(SNMALLOC_NO_REALLOCARRAY)
 endif()
 if (SNMALLOC_NO_REALLOCARR)
   add_as_define(SNMALLOC_NO_REALLOCARR)
 endif()
+add_as_define_value(SNMALLOC_MIN_ALLOC_SIZE)
+add_as_define_value(SNMALLOC_MIN_ALLOC_STEP_SIZE)
+add_as_define_value(SNMALLOC_DEALLOC_BATCH_RING_ASSOC)
+add_as_define_value(SNMALLOC_DEALLOC_BATCH_RING_SET_BITS)
+
+add_as_define_value(SNMALLOC_PAGESIZE)
 
 target_compile_definitions(snmalloc INTERFACE $<$<BOOL:CONST_QUALIFIED_MALLOC_USABLE_SIZE>:MALLOC_USABLE_SIZE_QUALIFIER=const>)
 
@@ -316,6 +361,9 @@ if(NOT SNMALLOC_HEADER_ONLY_LIBRARY)
         if(SNMALLOC_SANITIZER)
           target_compile_options(${TESTNAME} PRIVATE -g -fsanitize=${SNMALLOC_SANITIZER} -fno-omit-frame-pointer)
           target_link_libraries(${TESTNAME} -fsanitize=${SNMALLOC_SANITIZER})
+          if (${SNMALLOC_SANITIZER} MATCHES "thread")
+            target_compile_definitions(${TESTNAME} PRIVATE SNMALLOC_THREAD_SANITIZER_ENABLED)
+          endif()
         endif()
 
         add_warning_flags(${TESTNAME})
@@ -386,8 +434,14 @@ if(NOT SNMALLOC_HEADER_ONLY_LIBRARY)
       target_compile_definitions(${name} PRIVATE "SNMALLOC_EXPORT=__attribute__((visibility(\"default\")))")
       target_compile_options(${name} PRIVATE
         -fomit-frame-pointer -ffunction-sections)
+
+      check_cxx_compiler_flag("-Werror -Wextra -Wall -mprfchw" SUPPORT_PREFETCH_WRITE)
+      if (SUPPORT_PREFETCH_WRITE)
+        target_compile_options(${name} PRIVATE -mprfchw)
+      endif()
       # Static TLS model is unsupported on Haiku.
-      if (NOT CMAKE_SYSTEM_NAME STREQUAL "Haiku")
+      if ((NOT CMAKE_SYSTEM_NAME STREQUAL "Haiku") AND (NOT SNMALLOC_ENABLE_DYNAMIC_LOADING))
+        message(STATUS "snmalloc: Using static TLS model")
         target_compile_options(${name} PRIVATE -ftls-model=initial-exec)
         target_compile_options(${name} PRIVATE $<$<BOOL:${SNMALLOC_CI_BUILD}>:-g>)
       endif()
@@ -426,9 +480,11 @@ if(NOT SNMALLOC_HEADER_ONLY_LIBRARY)
 
   endfunction()
 
-  set(SHIM_FILES src/snmalloc/override/new.cc)
+  set(SHIM_FILES src/snmalloc/override/malloc.cc src/snmalloc/override/new.cc)
   set(SHIM_FILES_MEMCPY src/snmalloc/override/memcpy.cc)
 
+  add_shim(snmalloc-new-override STATIC src/snmalloc/override/new.cc)
+
   if (SNMALLOC_STATIC_LIBRARY)
     add_shim(snmallocshim-static STATIC ${SHIM_FILES})
     target_compile_definitions(snmallocshim-static PRIVATE
@@ -549,3 +605,6 @@ install(EXPORT snmallocConfig
   DESTINATION "share/snmalloc"
 )
 
+if (SNMALLOC_ENABLE_FUZZING)
+  add_subdirectory(fuzzing)
+endif()
diff --git a/3rdparty/exported/snmalloc/README.md b/3rdparty/exported/snmalloc/README.md
index 8dcd9d119955..ff4d97f05b88 100644
--- a/3rdparty/exported/snmalloc/README.md
+++ b/3rdparty/exported/snmalloc/README.md
@@ -34,7 +34,8 @@ The mechanism for returning memory to remote threads has remained, but most of t
 We recommend you read [docs/security](./docs/security/README.md) to find out about the current design, and 
 if you want to dive into the code [docs/AddressSpace.md](./docs/AddressSpace.md) provides a good overview of the allocation and deallocation paths.
 
-[![snmalloc CI](https://github.com/microsoft/snmalloc/actions/workflows/main.yml/badge.svg?branch=master)](https://github.com/microsoft/snmalloc/actions/workflows/main.yml)
+[![snmalloc CI](https://github.com/microsoft/snmalloc/actions/workflows/main.yml/badge.svg)](https://github.com/microsoft/snmalloc/actions/workflows/main.yml)
+[![snmalloc CI for Morello](https://github.com/microsoft/snmalloc/actions/workflows/morello.yml/badge.svg)](https://github.com/microsoft/snmalloc/actions/workflows/morello.yml)
 
 # Hardening
 
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/aal/aal.h b/3rdparty/exported/snmalloc/src/snmalloc/aal/aal.h
index 49b92da682cb..dae6231361ab 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/aal/aal.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/aal/aal.h
@@ -10,13 +10,24 @@
 #include "aal_concept.h"
 #include "aal_consts.h"
 
-#include <chrono>
+#if __has_include(<time.h>)
+#  include <time.h>
+#  ifdef CLOCK_MONOTONIC
+#    define SNMALLOC_TICK_USE_CLOCK_GETTIME
+#  endif
+#endif
 #include <cstdint>
 #include <utility>
 
-#if defined(__i386__) || defined(_M_IX86) || defined(_X86_) || \
+#ifndef SNMALLOC_TICK_USE_CLOCK_GETTIME
+#  include <chrono>
+#endif
+
+#if ( \
+  defined(__i386__) || defined(_M_IX86) || defined(_X86_) || \
   defined(__amd64__) || defined(__x86_64__) || defined(_M_X64) || \
-  defined(_M_AMD64)
+  defined(_M_AMD64)) && \
+  !defined(_M_ARM64EC)
 #  if defined(SNMALLOC_SGX)
 #    define PLATFORM_IS_X86_SGX
 #    define SNMALLOC_NO_AAL_BUILTINS
@@ -25,7 +36,8 @@
 #  endif
 #endif
 
-#if defined(__arm__) || defined(__aarch64__)
+#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM64) || \
+  defined(_M_ARM64EC)
 #  define PLATFORM_IS_ARM
 #endif
 
@@ -53,7 +65,7 @@ namespace snmalloc
   {
     /*
      * Provide a default specification of address_t as uintptr_t for Arch-es
-     * that support IntegerPointers.  Those Arch-es without IntegerPoihnters
+     * that support IntegerPointers.  Those Arch-es without IntegerPointers
      * must explicitly give their address_t.
      *
      * This somewhat obtuse way of spelling the defaulting is necessary so
@@ -147,7 +159,7 @@ namespace snmalloc
     static inline void prefetch(void* ptr) noexcept
     {
 #if __has_builtin(__builtin_prefetch) && !defined(SNMALLOC_NO_AAL_BUILTINS)
-      __builtin_prefetch(ptr);
+      __builtin_prefetch(ptr, 1, 3);
 #else
       Arch::prefetch(ptr);
 #endif
@@ -166,11 +178,27 @@ namespace snmalloc
       if constexpr (
         (Arch::aal_features & NoCpuCycleCounters) == NoCpuCycleCounters)
       {
+#ifdef SNMALLOC_TICK_USE_CLOCK_GETTIME
+        // the buf is populated by clock_gettime
+        SNMALLOC_UNINITIALISED timespec buf;
+        // we can skip the error checking here:
+        // * EFAULT: for out-of-bound pointers (buf is always valid stack
+        // memory)
+        // * EINVAL: for invalid clock_id (we only use CLOCK_MONOTONIC enforced
+        // by POSIX.1)
+        // Notice that clock_gettime is a usually a vDSO call, so the overhead
+        // is minimal.
+        ::clock_gettime(CLOCK_MONOTONIC, &buf);
+        return static_cast<uint64_t>(buf.tv_sec) * 1000'000'000 +
+          static_cast<uint64_t>(buf.tv_nsec);
+#  undef SNMALLOC_TICK_USE_CLOCK_GETTIME
+#else
         auto tick = std::chrono::high_resolution_clock::now();
         return static_cast<uint64_t>(
           std::chrono::duration_cast<std::chrono::nanoseconds>(
             tick.time_since_epoch())
             .count());
+#endif
       }
       else
       {
@@ -204,9 +232,6 @@ namespace snmalloc
     static SNMALLOC_FAST_PATH CapPtr<T, BOut>
     capptr_bound(CapPtr<U, BIn> a, size_t size) noexcept
     {
-      static_assert(
-        BIn::spatial > capptr::dimension::Spatial::Alloc,
-        "Refusing to re-bound Spatial::Alloc CapPtr");
       static_assert(
         capptr::is_spatial_refinement<BIn, BOut>(),
         "capptr_bound must preserve non-spatial CapPtr dimensions");
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_arm.h b/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_arm.h
index b6bae779e4de..11013c44c758 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_arm.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_arm.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#if defined(__aarch64__)
+#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
 #  define SNMALLOC_VA_BITS_64
 #  ifdef _MSC_VER
 #    include <arm64_neon.h>
@@ -13,6 +13,7 @@
 #endif
 
 #include <cstddef>
+
 namespace snmalloc
 {
   /**
@@ -54,7 +55,7 @@ namespace snmalloc
 #elif __has_builtin(__builtin_prefetch) && !defined(SNMALLOC_NO_AAL_BUILTINS)
       __builtin_prefetch(ptr);
 #elif defined(SNMALLOC_VA_BITS_64)
-      __asm__ volatile("prfm pldl1keep, [%0]" : "=r"(ptr));
+      __asm__ volatile("prfm pstl1keep, [%0]" : "=r"(ptr));
 #else
       __asm__ volatile("pld\t[%0]" : "=r"(ptr));
 #endif
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_cheri.h b/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_cheri.h
index 4a4acd379a04..84f11c038d7e 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_cheri.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_cheri.h
@@ -69,9 +69,6 @@ namespace snmalloc
     static SNMALLOC_FAST_PATH CapPtr<T, BOut>
     capptr_bound(CapPtr<U, BIn> a, size_t size) noexcept
     {
-      static_assert(
-        BIn::spatial > capptr::dimension::Spatial::Alloc,
-        "Refusing to re-bound Spatial::Alloc CapPtr");
       static_assert(
         capptr::is_spatial_refinement<BIn, BOut>(),
         "capptr_bound must preserve non-spatial CapPtr dimensions");
@@ -87,8 +84,11 @@ namespace snmalloc
 
       void* pb = __builtin_cheri_bounds_set_exact(a.unsafe_ptr(), size);
 
-      SNMALLOC_ASSERT(
-        __builtin_cheri_tag_get(pb) && "capptr_bound exactness failed.");
+      SNMALLOC_ASSERT_MSG(
+        __builtin_cheri_tag_get(pb),
+        "capptr_bound exactness failed. {} of size {}",
+        a.unsafe_ptr(),
+        size);
 
       return CapPtr<T, BOut>::unsafe_from(static_cast<T*>(pb));
     }
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_concept.h b/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_concept.h
index 3ce64a79ccc8..eac6ebc816e7 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_concept.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_concept.h
@@ -14,87 +14,79 @@ namespace snmalloc
    * machine word size, and an upper bound on the address space size
    */
   template<typename AAL>
-  concept IsAAL_static_members = requires()
-  {
-    typename std::integral_constant<uint64_t, AAL::aal_features>;
-    typename std::integral_constant<int, AAL::aal_name>;
-    typename std::integral_constant<std::size_t, AAL::bits>;
-    typename std::integral_constant<std::size_t, AAL::address_bits>;
-  };
+  concept IsAAL_static_members =
+    requires() {
+      typename std::integral_constant<uint64_t, AAL::aal_features>;
+      typename std::integral_constant<int, AAL::aal_name>;
+      typename std::integral_constant<std::size_t, AAL::bits>;
+      typename std::integral_constant<std::size_t, AAL::address_bits>;
+    };
 
   /**
    * AALs provide a prefetch operation.
    */
   template<typename AAL>
-  concept IsAAL_prefetch = requires(void* ptr)
-  {
-    {
-      AAL::prefetch(ptr)
-    }
-    noexcept->ConceptSame<void>;
-  };
+  concept IsAAL_prefetch = requires(void* ptr) {
+                             {
+                               AAL::prefetch(ptr)
+                               } noexcept -> ConceptSame<void>;
+                           };
 
   /**
    * AALs provide a notion of high-precision timing.
    */
   template<typename AAL>
-  concept IsAAL_tick = requires()
-  {
-    {
-      AAL::tick()
-    }
-    noexcept->ConceptSame<uint64_t>;
-  };
+  concept IsAAL_tick = requires() {
+                         {
+                           AAL::tick()
+                           } noexcept -> ConceptSame<uint64_t>;
+                       };
 
   template<typename AAL>
   concept IsAAL_capptr_methods =
-    requires(capptr::Chunk<void> auth, capptr::AllocFull<void> ret, size_t sz)
-  {
-    /**
-     * Produce a pointer with reduced authority from a more privilged pointer.
-     * The resulting pointer will have base at auth's address and length of
-     * exactly sz.  auth+sz must not exceed auth's limit.
-     */
-    {
-      AAL::template capptr_bound<void, capptr::bounds::Chunk>(auth, sz)
-    }
-    noexcept->ConceptSame<capptr::Chunk<void>>;
+    requires(capptr::Chunk<void> auth, capptr::AllocFull<void> ret, size_t sz) {
+      /**
+       * Produce a pointer with reduced authority from a more privilged pointer.
+       * The resulting pointer will have base at auth's address and length of
+       * exactly sz.  auth+sz must not exceed auth's limit.
+       */
+      {
+        AAL::template capptr_bound<void, capptr::bounds::Chunk>(auth, sz)
+        } noexcept -> ConceptSame<capptr::Chunk<void>>;
 
-    /**
-     * "Amplify" by copying the address of one pointer into one of higher
-     * privilege.  The resulting pointer differs from auth only in address.
-     */
-    {
-      AAL::capptr_rebound(auth, ret)
-    }
-    noexcept->ConceptSame<capptr::Chunk<void>>;
+      /**
+       * "Amplify" by copying the address of one pointer into one of higher
+       * privilege.  The resulting pointer differs from auth only in address.
+       */
+      {
+        AAL::capptr_rebound(auth, ret)
+        } noexcept -> ConceptSame<capptr::Chunk<void>>;
 
-    /**
-     * Round up an allocation size to a size this architecture can represent.
-     * While there may also, in general, be alignment requirements for
-     * representability, in snmalloc so far we have not had reason to consider
-     * these explicitly: when we use our...
-     *
-     * - sizeclass machinery (for user-facing data), we assume that all
-     *   sizeclasses describe architecturally representable aligned-and-sized
-     *   regions
-     *
-     * - Range machinery (for internal meta-data), we always choose NAPOT
-     *   regions big enough for the requested size (returning space above the
-     *   allocation within such regions for use as smaller NAPOT regions).
-     *
-     * That is, capptr_size_round is not needed on the user-facing fast paths,
-     * merely internally for bootstrap and metadata management.
-     */
-    {
-      AAL::capptr_size_round(sz)
-    }
-    noexcept->ConceptSame<size_t>;
-  };
+      /**
+       * Round up an allocation size to a size this architecture can represent.
+       * While there may also, in general, be alignment requirements for
+       * representability, in snmalloc so far we have not had reason to consider
+       * these explicitly: when we use our...
+       *
+       * - sizeclass machinery (for user-facing data), we assume that all
+       *   sizeclasses describe architecturally representable aligned-and-sized
+       *   regions
+       *
+       * - Range machinery (for internal meta-data), we always choose NAPOT
+       *   regions big enough for the requested size (returning space above the
+       *   allocation within such regions for use as smaller NAPOT regions).
+       *
+       * That is, capptr_size_round is not needed on the user-facing fast paths,
+       * merely internally for bootstrap and metadata management.
+       */
+      {
+        AAL::capptr_size_round(sz)
+        } noexcept -> ConceptSame<size_t>;
+    };
 
   template<typename AAL>
-  concept IsAAL = IsAAL_static_members<AAL>&& IsAAL_prefetch<AAL>&&
-    IsAAL_tick<AAL>&& IsAAL_capptr_methods<AAL>;
+  concept IsAAL = IsAAL_static_members<AAL> && IsAAL_prefetch<AAL> &&
+    IsAAL_tick<AAL> && IsAAL_capptr_methods<AAL>;
 
 } // namespace snmalloc
 #endif
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_x86.h b/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_x86.h
index cc20e777a008..150de26451ff 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_x86.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_x86.h
@@ -78,7 +78,11 @@ namespace snmalloc
      */
     static inline void prefetch(void* ptr)
     {
-      _mm_prefetch(reinterpret_cast<const char*>(ptr), _MM_HINT_T0);
+#if defined(_MSC_VER)
+      _m_prefetchw(ptr);
+#else
+      _mm_prefetch(reinterpret_cast<const char*>(ptr), _MM_HINT_ET0);
+#endif
     }
 
     /**
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend/backend.h b/3rdparty/exported/snmalloc/src/snmalloc/backend/backend.h
index d220a080a558..ee170c38f7b4 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/backend/backend.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/backend/backend.h
@@ -23,9 +23,6 @@ namespace snmalloc
     using Pal = PAL;
     using SlabMetadata = typename PagemapEntry::SlabMetadata;
 
-    static constexpr size_t SizeofMetadata =
-      bits::next_pow2_const(sizeof(SlabMetadata));
-
   public:
     /**
      * Provide a block of meta-data with size and align.
@@ -70,6 +67,17 @@ namespace snmalloc
         Aal::capptr_bound<void, capptr::bounds::AllocFull>(p, size));
     }
 
+    /**
+     * Returns unused meta-data to the system.  This must have come from a call
+     * to alloc_meta_data, but can be a sub-range of the original allocation.
+     */
+    static void dealloc_meta_data(
+      LocalState& local_state, capptr::Alloc<void> p, size_t size)
+    {
+      auto arena = Authmap::amplify(p);
+      local_state.get_meta_range().dealloc_range(arena, size);
+    }
+
     /**
      * Returns a chunk of memory with alignment and size of `size`, and a
      * block containing metadata about the slab.
@@ -79,13 +87,26 @@ namespace snmalloc
      *   (remote, sizeclass, slab_metadata)
      * where slab_metadata, is the second element of the pair return.
      */
-    static std::pair<capptr::Chunk<void>, SlabMetadata*>
-    alloc_chunk(LocalState& local_state, size_t size, uintptr_t ras)
+    static std::pair<capptr::Chunk<void>, SlabMetadata*> alloc_chunk(
+      LocalState& local_state,
+      size_t size,
+      uintptr_t ras,
+      sizeclass_t sizeclass)
     {
       SNMALLOC_ASSERT(bits::is_pow2(size));
       SNMALLOC_ASSERT(size >= MIN_CHUNK_SIZE);
 
-      auto meta_cap = local_state.get_meta_range().alloc_range(SizeofMetadata);
+      // Calculate the extra bytes required to store the client meta-data.
+      size_t extra_bytes = SlabMetadata::get_extra_bytes(sizeclass);
+
+      auto meta_size = bits::next_pow2(sizeof(SlabMetadata) + extra_bytes);
+
+#ifdef SNMALLOC_TRACING
+      message<1024>(
+        "Allocating metadata of size: {} ({})", meta_size, extra_bytes);
+#endif
+
+      auto meta_cap = local_state.get_meta_range().alloc_range(meta_size);
 
       auto meta = meta_cap.template as_reinterpret<SlabMetadata>().unsafe_ptr();
 
@@ -102,7 +123,7 @@ namespace snmalloc
 #endif
       if (p == nullptr)
       {
-        local_state.get_meta_range().dealloc_range(meta_cap, SizeofMetadata);
+        local_state.get_meta_range().dealloc_range(meta_cap, meta_size);
         errno = ENOMEM;
 #ifdef SNMALLOC_TRACING
         message<1024>("Out of memory");
@@ -129,7 +150,8 @@ namespace snmalloc
       LocalState& local_state,
       SlabMetadata& slab_metadata,
       capptr::Alloc<void> alloc,
-      size_t size)
+      size_t size,
+      sizeclass_t sizeclass)
     {
       /*
        * The backend takes possession of these chunks now, by disassociating
@@ -156,12 +178,24 @@ namespace snmalloc
        */
       capptr::Arena<void> arena = Authmap::amplify(alloc);
 
+      // Calculate the extra bytes required to store the client meta-data.
+      size_t extra_bytes = SlabMetadata::get_extra_bytes(sizeclass);
+
+      auto meta_size = bits::next_pow2(sizeof(SlabMetadata) + extra_bytes);
       local_state.get_meta_range().dealloc_range(
-        capptr::Arena<void>::unsafe_from(&slab_metadata), SizeofMetadata);
+        capptr::Arena<void>::unsafe_from(&slab_metadata), meta_size);
 
       local_state.get_object_range()->dealloc_range(arena, size);
     }
 
+    SNMALLOC_FAST_PATH static capptr::Alloc<void>
+    capptr_rederive_alloc(capptr::Alloc<void> a, size_t objsize)
+    {
+      return capptr_to_user_address_control(
+        Aal::capptr_bound<void, capptr::bounds::AllocFull>(
+          Authmap::amplify(a), objsize));
+    }
+
     template<bool potentially_out_of_range = false>
     SNMALLOC_FAST_PATH static const PagemapEntry& get_metaentry(address_t p)
     {
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend/fixedglobalconfig.h b/3rdparty/exported/snmalloc/src/snmalloc/backend/fixedglobalconfig.h
index c6784e703779..83e1117470f2 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/backend/fixedglobalconfig.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/backend/fixedglobalconfig.h
@@ -8,11 +8,14 @@ namespace snmalloc
   /**
    * A single fixed address range allocator configuration
    */
-  template<SNMALLOC_CONCEPT(IsPAL) PAL>
+  template<
+    SNMALLOC_CONCEPT(IsPAL) PAL,
+    typename ClientMetaDataProvider = NoClientMetaDataProvider>
   class FixedRangeConfig final : public CommonConfig
   {
   public:
-    using PagemapEntry = DefaultPagemapEntry;
+    using PagemapEntry = DefaultPagemapEntry<ClientMetaDataProvider>;
+    using ClientMeta = ClientMetaDataProvider;
 
   private:
     using ConcretePagemap =
@@ -63,13 +66,11 @@ namespace snmalloc
      * C++, and not just its initializer fragment, to initialize a non-prefix
      * subset of the flags (in any order, at that).
      */
-    static constexpr Flags Options = []() constexpr
-    {
+    static constexpr Flags Options = []() constexpr {
       Flags opts = {};
       opts.HasDomesticate = true;
       return opts;
-    }
-    ();
+    }();
 
     // This needs to be a forward reference as the
     // thread local state will need to know about this.
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend/globalconfig.h b/3rdparty/exported/snmalloc/src/snmalloc/backend/globalconfig.h
index 525c77275c89..5d171a9b8710 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/backend/globalconfig.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/backend/globalconfig.h
@@ -1,13 +1,9 @@
 #pragma once
-// If you define SNMALLOC_PROVIDE_OWN_CONFIG then you must provide your own
-// definition of `snmalloc::Alloc` before including any files that include
-// `snmalloc.h` or consume the global allocation APIs.
-#ifndef SNMALLOC_PROVIDE_OWN_CONFIG
 
-#  include "../backend_helpers/backend_helpers.h"
-#  include "backend.h"
-#  include "meta_protected_range.h"
-#  include "standard_range.h"
+#include "../backend_helpers/backend_helpers.h"
+#include "backend.h"
+#include "meta_protected_range.h"
+#include "standard_range.h"
 
 namespace snmalloc
 {
@@ -28,13 +24,16 @@ namespace snmalloc
    * The Configuration sets up a Pagemap for the backend to use, and the state
    * required to build new allocators (GlobalPoolState).
    */
-  class StandardConfig final : public CommonConfig
+  template<typename ClientMetaDataProvider = NoClientMetaDataProvider>
+  class StandardConfigClientMeta final : public CommonConfig
   {
-    using GlobalPoolState = PoolState<CoreAllocator<StandardConfig>>;
+    using GlobalPoolState = PoolState<
+      CoreAllocator<StandardConfigClientMeta<ClientMetaDataProvider>>>;
 
   public:
     using Pal = DefaultPal;
-    using PagemapEntry = DefaultPagemapEntry;
+    using PagemapEntry = DefaultPagemapEntry<ClientMetaDataProvider>;
+    using ClientMeta = ClientMetaDataProvider;
 
   private:
     using ConcretePagemap =
@@ -97,33 +96,38 @@ namespace snmalloc
     // of allocators.
     SNMALLOC_SLOW_PATH static void ensure_init_slow()
     {
-      FlagLock lock{initialisation_lock};
-#  ifdef SNMALLOC_TRACING
-      message<1024>("Run init_impl");
-#  endif
-
       if (initialised)
         return;
 
-      LocalEntropy entropy;
-      entropy.init<Pal>();
-      // Initialise key for remote deallocation lists
-      RemoteAllocator::key_global = FreeListKey(entropy.get_free_list_key());
+      with(initialisation_lock, [&]() {
+#ifdef SNMALLOC_TRACING
+        message<1024>("Run init_impl");
+#endif
+
+        if (initialised)
+          return;
+
+        LocalEntropy entropy;
+        entropy.init<Pal>();
+        // Initialise key for remote deallocation lists
+        entropy.make_free_list_key(RemoteAllocator::key_global);
+        entropy.make_free_list_key(freelist::Object::key_root);
 
-      // Need to randomise pagemap location. If requested and not a
-      // StrictProvenance architecture, randomize its table's location within a
-      // significantly larger address space allocation.
-      static constexpr bool pagemap_randomize =
-        mitigations(random_pagemap) && !aal_supports<StrictProvenance>;
+        // Need to randomise pagemap location. If requested and not a
+        // StrictProvenance architecture, randomize its table's location within
+        // a significantly larger address space allocation.
+        static constexpr bool pagemap_randomize =
+          mitigations(random_pagemap) && !aal_supports<StrictProvenance>;
 
-      Pagemap::concretePagemap.template init<pagemap_randomize>();
+        Pagemap::concretePagemap.template init<pagemap_randomize>();
 
-      if constexpr (aal_supports<StrictProvenance>)
-      {
-        Authmap::init();
-      }
+        if constexpr (aal_supports<StrictProvenance>)
+        {
+          Authmap::init();
+        }
 
-      initialised.store(true, std::memory_order_release);
+        initialised.store(true, std::memory_order_release);
+      });
     }
 
   public:
@@ -162,10 +166,4 @@ namespace snmalloc
       snmalloc::register_clean_up();
     }
   };
-
-  /**
-   * Create allocator type for this configuration.
-   */
-  using Alloc = snmalloc::LocalAllocator<snmalloc::StandardConfig>;
 } // namespace snmalloc
-#endif
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend/meta_protected_range.h b/3rdparty/exported/snmalloc/src/snmalloc/backend/meta_protected_range.h
index 5c5795cc0589..b94968c9c657 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/backend/meta_protected_range.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/backend/meta_protected_range.h
@@ -75,11 +75,14 @@ namespace snmalloc
       CommitRange<PAL>,
       // In case of huge pages, we don't want to give each thread its own huge
       // page, so commit in the global range.
-      LargeBuddyRange<
-        max_page_chunk_size_bits,
-        max_page_chunk_size_bits,
-        Pagemap,
-        page_size_bits>,
+      std::conditional_t<
+        (max_page_chunk_size_bits > MIN_CHUNK_BITS),
+        LargeBuddyRange<
+          max_page_chunk_size_bits,
+          max_page_chunk_size_bits,
+          Pagemap,
+          page_size_bits>,
+        NopRange>,
       LogRange<4>,
       GlobalRange,
       StatsRange>;
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/backend_helpers.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/backend_helpers.h
index 2104e681d53e..24e02b0530b1 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/backend_helpers.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/backend_helpers.h
@@ -9,6 +9,7 @@
 #include "indirectrange.h"
 #include "largebuddyrange.h"
 #include "logrange.h"
+#include "noprange.h"
 #include "pagemap.h"
 #include "pagemapregisterrange.h"
 #include "palrange.h"
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/buddy.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/buddy.h
index ff9416614dda..d7406468e543 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/buddy.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/buddy.h
@@ -15,9 +15,17 @@ namespace snmalloc
   template<typename Rep, size_t MIN_SIZE_BITS, size_t MAX_SIZE_BITS>
   class Buddy
   {
-    std::array<RBTree<Rep>, MAX_SIZE_BITS - MIN_SIZE_BITS> trees;
+    static_assert(MAX_SIZE_BITS > MIN_SIZE_BITS);
+
+    struct Entry
+    {
+      typename Rep::Contents cache[3];
+      RBTree<Rep> tree{};
+    };
+
+    std::array<Entry, MAX_SIZE_BITS - MIN_SIZE_BITS> entries{};
     // All RBtrees at or above this index should be empty.
-    size_t empty_at_or_above = 0;
+    size_t empty_at_or_above{0};
 
     size_t to_index(size_t size)
     {
@@ -42,15 +50,57 @@ namespace snmalloc
     void invariant()
     {
 #ifndef NDEBUG
-      for (size_t i = empty_at_or_above; i < trees.size(); i++)
+      for (size_t i = empty_at_or_above; i < entries.size(); i++)
       {
-        SNMALLOC_ASSERT(trees[i].is_empty());
+        SNMALLOC_ASSERT(entries[i].tree.is_empty());
+        // TODO check cache is empty
       }
 #endif
     }
 
+    bool remove_buddy(typename Rep::Contents addr, size_t size)
+    {
+      auto idx = to_index(size);
+
+      // Empty at this range.
+      if (idx >= empty_at_or_above)
+        return false;
+
+      auto buddy = Rep::buddy(addr, size);
+
+      // Check local cache first
+      for (auto& e : entries[idx].cache)
+      {
+        if (Rep::equal(buddy, e))
+        {
+          if (!Rep::can_consolidate(addr, size))
+            return false;
+
+          e = entries[idx].tree.remove_min();
+          return true;
+        }
+      }
+
+      auto path = entries[idx].tree.get_root_path();
+      bool contains_buddy = entries[idx].tree.find(path, buddy);
+
+      if (!contains_buddy)
+        return false;
+
+      // Only check if we can consolidate after we know the buddy is in
+      // the buddy allocator.  This is required to prevent possible segfaults
+      // from looking at the buddies meta-data, which we only know exists
+      // once we have found it in the red-black tree.
+      if (!Rep::can_consolidate(addr, size))
+        return false;
+
+      entries[idx].tree.remove_path(path);
+      return true;
+    }
+
   public:
     constexpr Buddy() = default;
+
     /**
      * Add a block to the buddy allocator.
      *
@@ -63,48 +113,39 @@ namespace snmalloc
      */
     typename Rep::Contents add_block(typename Rep::Contents addr, size_t size)
     {
-      auto idx = to_index(size);
-      empty_at_or_above = bits::max(empty_at_or_above, idx + 1);
-
       validate_block(addr, size);
 
-      auto buddy = Rep::buddy(addr, size);
+      if (remove_buddy(addr, size))
+      {
+        // Add to next level cache
+        size *= 2;
+        addr = Rep::align_down(addr, size);
+        if (size == bits::one_at_bit(MAX_SIZE_BITS))
+        {
+          // Invariant should be checked on all non-tail return paths.
+          // Holds trivially here with current design.
+          invariant();
+          // Too big for this buddy allocator.
+          return addr;
+        }
+        return add_block(addr, size);
+      }
 
-      auto path = trees[idx].get_root_path();
-      bool contains_buddy = trees[idx].find(path, buddy);
+      auto idx = to_index(size);
+      empty_at_or_above = bits::max(empty_at_or_above, idx + 1);
 
-      if (contains_buddy)
+      for (auto& e : entries[idx].cache)
       {
-        // Only check if we can consolidate after we know the buddy is in
-        // the buddy allocator.  This is required to prevent possible segfaults
-        // from looking at the buddies meta-data, which we only know exists
-        // once we have found it in the red-black tree.
-        if (Rep::can_consolidate(addr, size))
+        if (Rep::equal(Rep::null, e))
         {
-          trees[idx].remove_path(path);
-
-          // Add to next level cache
-          size *= 2;
-          addr = Rep::align_down(addr, size);
-          if (size == bits::one_at_bit(MAX_SIZE_BITS))
-          {
-            // Invariant should be checked on all non-tail return paths.
-            // Holds trivially here with current design.
-            invariant();
-            // Too big for this buddy allocator.
-            return addr;
-          }
-          return add_block(addr, size);
+          e = addr;
+          return Rep::null;
         }
-
-        // Re-traverse as the path was to the buddy,
-        // but the representation says we cannot combine.
-        // We must find the correct place for this element.
-        // Something clever could be done here, but it's not worth it.
-        //        path = trees[idx].get_root_path();
-        trees[idx].find(path, addr);
       }
-      trees[idx].insert_path(path, addr);
+
+      auto path = entries[idx].tree.get_root_path();
+      entries[idx].tree.find(path, addr);
+      entries[idx].tree.insert_path(path, addr);
       invariant();
       return Rep::null;
     }
@@ -121,7 +162,15 @@ namespace snmalloc
       if (idx >= empty_at_or_above)
         return Rep::null;
 
-      auto addr = trees[idx].remove_min();
+      auto addr = entries[idx].tree.remove_min();
+      for (auto& e : entries[idx].cache)
+      {
+        if (Rep::equal(Rep::null, addr) || Rep::compare(e, addr))
+        {
+          addr = std::exchange(e, addr);
+        }
+      }
+
       if (addr != Rep::null)
       {
         validate_block(addr, size);
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/commonconfig.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/commonconfig.h
index a69b6a3897d4..8ea020874c66 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/commonconfig.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/commonconfig.h
@@ -95,6 +95,39 @@ namespace snmalloc
     bool HasDomesticate = false;
   };
 
+  struct NoClientMetaDataProvider
+  {
+    using StorageType = Empty;
+    using DataRef = Empty&;
+
+    static size_t required_count(size_t)
+    {
+      return 1;
+    }
+
+    static DataRef get(StorageType* base, size_t)
+    {
+      return *base;
+    }
+  };
+
+  template<typename T>
+  struct ArrayClientMetaDataProvider
+  {
+    using StorageType = T;
+    using DataRef = T&;
+
+    static size_t required_count(size_t max_count)
+    {
+      return max_count;
+    }
+
+    static DataRef get(StorageType* base, size_t index)
+    {
+      return base[index];
+    }
+  };
+
   /**
    * Class containing definitions that are likely to be used by all except for
    * the most unusual back-end implementations.  This can be subclassed as a
@@ -126,4 +159,5 @@ namespace snmalloc
     }
   }
 } // namespace snmalloc
+
 #include "../mem/remotecache.h"
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/defaultpagemapentry.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/defaultpagemapentry.h
index 2083db30eb08..5e1f703d26be 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/defaultpagemapentry.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/defaultpagemapentry.h
@@ -64,9 +64,14 @@ namespace snmalloc
     SNMALLOC_FAST_PATH DefaultPagemapEntryT() = default;
   };
 
-  class DefaultSlabMetadata : public FrontendSlabMetadata<DefaultSlabMetadata>
+  template<typename ClientMetaDataProvider>
+  class DefaultSlabMetadata : public FrontendSlabMetadata<
+                                DefaultSlabMetadata<ClientMetaDataProvider>,
+                                ClientMetaDataProvider>
   {};
 
-  using DefaultPagemapEntry = DefaultPagemapEntryT<DefaultSlabMetadata>;
+  template<typename ClientMetaDataProvider>
+  using DefaultPagemapEntry =
+    DefaultPagemapEntryT<DefaultSlabMetadata<ClientMetaDataProvider>>;
 
 } // namespace snmalloc
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/largebuddyrange.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/largebuddyrange.h
index d1446d725fc2..803eb4844dcd 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/largebuddyrange.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/largebuddyrange.h
@@ -6,8 +6,6 @@
 #include "empty_range.h"
 #include "range_helpers.h"
 
-#include <string>
-
 namespace snmalloc
 {
   /**
@@ -354,7 +352,7 @@ namespace snmalloc
         SNMALLOC_ASSERT(size >= MIN_CHUNK_SIZE);
         SNMALLOC_ASSERT(bits::is_pow2(size));
 
-        if (size >= (bits::one_at_bit(MAX_SIZE_BITS) - 1))
+        if (size >= bits::mask_bits(MAX_SIZE_BITS))
         {
           if (ParentRange::Aligned)
             return parent.alloc_range(size);
@@ -378,7 +376,7 @@ namespace snmalloc
 
         if constexpr (MAX_SIZE_BITS != (bits::BITS - 1))
         {
-          if (size >= (bits::one_at_bit(MAX_SIZE_BITS) - 1))
+          if (size >= bits::mask_bits(MAX_SIZE_BITS))
           {
             parent_dealloc_range(base, size);
             return;
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/lockrange.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/lockrange.h
index ce91711cce4c..2dc796ac696f 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/lockrange.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/lockrange.h
@@ -22,7 +22,7 @@ namespace snmalloc
        * This is infrequently used code, a spin lock simplifies the code
        * considerably, and should never be on the fast path.
        */
-      FlagWord spin_lock{};
+      CombiningLock spin_lock{};
 
     public:
       static constexpr bool Aligned = ParentRange::Aligned;
@@ -35,14 +35,18 @@ namespace snmalloc
 
       CapPtr<void, ChunkBounds> alloc_range(size_t size)
       {
-        FlagLock lock(spin_lock);
-        return parent.alloc_range(size);
+        CapPtr<void, ChunkBounds> result;
+        with(spin_lock, [&]() {
+          {
+            result = parent.alloc_range(size);
+          }
+        });
+        return result;
       }
 
       void dealloc_range(CapPtr<void, ChunkBounds> base, size_t size)
       {
-        FlagLock lock(spin_lock);
-        parent.dealloc_range(base, size);
+        with(spin_lock, [&]() { parent.dealloc_range(base, size); });
       }
     };
   };
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/noprange.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/noprange.h
new file mode 100644
index 000000000000..45dcfdcf690d
--- /dev/null
+++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/noprange.h
@@ -0,0 +1,36 @@
+#pragma once
+#include "range_helpers.h"
+
+namespace snmalloc
+{
+  struct NopRange
+  {
+    template<typename ParentRange>
+    class Type : public ContainsParent<ParentRange>
+    {
+      using ContainsParent<ParentRange>::parent;
+
+    public:
+      static constexpr bool Aligned = ParentRange::Aligned;
+
+      static constexpr bool ConcurrencySafe = ParentRange::ConcurrencySafe;
+
+      using ChunkBounds = typename ParentRange::ChunkBounds;
+      static_assert(
+        ChunkBounds::address_space_control ==
+        capptr::dimension::AddressSpaceControl::Full);
+
+      constexpr Type() = default;
+
+      CapPtr<void, ChunkBounds> alloc_range(size_t size)
+      {
+        return parent.alloc_range(size);
+      }
+
+      void dealloc_range(CapPtr<void, ChunkBounds> base, size_t size)
+      {
+        parent.dealloc_range(base, size);
+      }
+    };
+  };
+} // namespace snmalloc
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/range_helpers.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/range_helpers.h
index 076b9fd74072..f1a82baf2ded 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/range_helpers.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/range_helpers.h
@@ -160,5 +160,4 @@ namespace snmalloc
       }
     }
   };
-
 } // namespace snmalloc
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/smallbuddyrange.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/smallbuddyrange.h
index 83796e1ecbe4..6f8400e83f1b 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/smallbuddyrange.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/smallbuddyrange.h
@@ -31,6 +31,7 @@ namespace snmalloc
     static constexpr Contents root = nullptr;
 
     static constexpr address_t MASK = 1;
+
     static void set(Handle ptr, Contents r)
     {
       SNMALLOC_ASSERT((address_cast(r) & MASK) == 0);
@@ -244,7 +245,6 @@ namespace snmalloc
 
       void dealloc_range(CapPtr<void, ChunkBounds> base, size_t size)
       {
-        SNMALLOC_ASSERT(bits::is_pow2(size));
         add_range(base, size);
       }
     };
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds/aba.h b/3rdparty/exported/snmalloc/src/snmalloc/ds/aba.h
index f14cc9ef685a..af75de9e0f73 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/ds/aba.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/ds/aba.h
@@ -71,9 +71,10 @@ namespace snmalloc
         error("Only one inflight ABA operation at a time is allowed.");
       operation_in_flight = true;
 #  endif
-      return Cmp{{independent.ptr.load(std::memory_order_relaxed),
-                  independent.aba.load(std::memory_order_relaxed)},
-                 this};
+      return Cmp{
+        {independent.ptr.load(std::memory_order_relaxed),
+         independent.aba.load(std::memory_order_relaxed)},
+        this};
     }
 
     struct Cmp
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds/allocconfig.h b/3rdparty/exported/snmalloc/src/snmalloc/ds/allocconfig.h
index 858940f05e50..78ea9f41a861 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/ds/allocconfig.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/ds/allocconfig.h
@@ -20,10 +20,31 @@ namespace snmalloc
   // Used to isolate values on cache lines to prevent false sharing.
   static constexpr size_t CACHELINE_SIZE = 64;
 
-  // Minimum allocation size is space for two pointers.
-  static_assert(bits::next_pow2_const(sizeof(void*)) == sizeof(void*));
-  static constexpr size_t MIN_ALLOC_SIZE = 2 * sizeof(void*);
-  static constexpr size_t MIN_ALLOC_BITS = bits::ctz_const(MIN_ALLOC_SIZE);
+  /// The "machine epsilon" for the small sizeclass machinery.
+  static constexpr size_t MIN_ALLOC_STEP_SIZE =
+#if defined(SNMALLOC_MIN_ALLOC_STEP_SIZE)
+    SNMALLOC_MIN_ALLOC_STEP_SIZE;
+#else
+    2 * sizeof(void*);
+#endif
+
+  /// Derived from MIN_ALLOC_STEP_SIZE
+  static constexpr size_t MIN_ALLOC_STEP_BITS =
+    bits::ctz_const(MIN_ALLOC_STEP_SIZE);
+  static_assert(bits::is_pow2(MIN_ALLOC_STEP_SIZE));
+
+  /**
+   * Minimum allocation size is space for two pointers.  If the small sizeclass
+   * machinery permits smaller values (that is, if MIN_ALLOC_STEP_SIZE is
+   * smaller than MIN_ALLOC_SIZE), which may be useful if MIN_ALLOC_SIZE must
+   * be large or not a power of two, those smaller size classes will be unused.
+   */
+  static constexpr size_t MIN_ALLOC_SIZE =
+#if defined(SNMALLOC_MIN_ALLOC_SIZE)
+    SNMALLOC_MIN_ALLOC_SIZE;
+#else
+    2 * sizeof(void*);
+#endif
 
   // Minimum slab size.
 #if defined(SNMALLOC_QEMU_WORKAROUND) && defined(SNMALLOC_VA_BITS_64)
@@ -72,24 +93,92 @@ namespace snmalloc
     MAX_SMALL_SIZECLASS_SIZE >= MIN_CHUNK_SIZE,
     "Large sizes need to be representable by as a multiple of MIN_CHUNK_SIZE");
 
+  /**
+   * The number of bits needed to count the number of objects within a slab.
+   *
+   * Most likely, this is achieved by the smallest sizeclass, which will have
+   * many more than MIN_OBJECT_COUNT objects in its slab.  But, just in case,
+   * it's defined here and checked when we compute the sizeclass table, since
+   * computing this number is potentially nontrivial.
+   */
+#if defined(SNMALLOC_QEMU_WORKAROUND) && defined(SNMALLOC_VA_BITS_64)
+  static constexpr size_t MAX_CAPACITY_BITS = 13;
+#else
+  static constexpr size_t MAX_CAPACITY_BITS = 11;
+#endif
+
+  /**
+   * The maximum distance between the start of two objects in the same slab.
+   */
+  static constexpr size_t MAX_SLAB_SPAN_SIZE =
+    (MIN_OBJECT_COUNT - 1) * MAX_SMALL_SIZECLASS_SIZE;
+  static constexpr size_t MAX_SLAB_SPAN_BITS =
+    bits::next_pow2_bits_const(MAX_SLAB_SPAN_SIZE);
+
   // Number of slots for remote deallocation.
   static constexpr size_t REMOTE_SLOT_BITS = 8;
   static constexpr size_t REMOTE_SLOTS = 1 << REMOTE_SLOT_BITS;
   static constexpr size_t REMOTE_MASK = REMOTE_SLOTS - 1;
 
+#if defined(SNMALLOC_DEALLOC_BATCH_RING_ASSOC)
+  static constexpr size_t DEALLOC_BATCH_RING_ASSOC =
+    SNMALLOC_DEALLOC_BATCH_RING_ASSOC;
+#else
+#  if defined(__has_cpp_attribute)
+#    if ( \
+      __has_cpp_attribute(msvc::no_unique_address) && \
+      (__cplusplus >= 201803L || _MSVC_LANG >= 201803L)) || \
+      __has_cpp_attribute(no_unique_address)
+  // For C++20 or later, we do have [[no_unique_address]] and so can also do
+  // batching if we aren't turning on the backward-pointer mitigations
+  static constexpr size_t DEALLOC_BATCH_MIN_ALLOC_WORDS =
+    mitigations(freelist_backward_edge) ? 4 : 2;
+#    else
+  // For C++17, we don't have [[no_unique_address]] and so we always end up
+  // needing all four pointers' worth of space (because BatchedRemoteMessage has
+  // two freelist::Object::T<> links within, each of which will have two fields
+  // and will be padded to two pointers).
+  static constexpr size_t DEALLOC_BATCH_MIN_ALLOC_WORDS = 4;
+#    endif
+#  else
+  // If we don't even have the feature test macro, we're C++17 or earlier.
+  static constexpr size_t DEALLOC_BATCH_MIN_ALLOC_WORDS = 4;
+#  endif
+
+  static constexpr size_t DEALLOC_BATCH_RING_ASSOC =
+    (MIN_ALLOC_SIZE >= (DEALLOC_BATCH_MIN_ALLOC_WORDS * sizeof(void*))) ? 2 : 0;
+#endif
+
+#if defined(SNMALLOC_DEALLOC_BATCH_RING_SET_BITS)
+  static constexpr size_t DEALLOC_BATCH_RING_SET_BITS =
+    SNMALLOC_DEALLOC_BATCH_RING_SET_BITS;
+#else
+  static constexpr size_t DEALLOC_BATCH_RING_SET_BITS = 3;
+#endif
+
+  static constexpr size_t DEALLOC_BATCH_RINGS =
+    DEALLOC_BATCH_RING_ASSOC * bits::one_at_bit(DEALLOC_BATCH_RING_SET_BITS);
+
   static_assert(
-    INTERMEDIATE_BITS < MIN_ALLOC_BITS,
+    INTERMEDIATE_BITS < MIN_ALLOC_STEP_BITS,
     "INTERMEDIATE_BITS must be less than MIN_ALLOC_BITS");
   static_assert(
     MIN_ALLOC_SIZE >= (sizeof(void*) * 2),
     "MIN_ALLOC_SIZE must be sufficient for two pointers");
+  static_assert(
+    1 << (INTERMEDIATE_BITS + MIN_ALLOC_STEP_BITS) >=
+      bits::next_pow2_const(MIN_ALLOC_SIZE),
+    "Entire sizeclass exponent is below MIN_ALLOC_SIZE; adjust STEP_SIZE");
+  static_assert(
+    MIN_ALLOC_SIZE >= MIN_ALLOC_STEP_SIZE,
+    "Minimum alloc sizes below minimum step size; raise MIN_ALLOC_SIZE");
 
   // Return remote small allocs when the local cache reaches this size.
   static constexpr int64_t REMOTE_CACHE =
 #ifdef USE_REMOTE_CACHE
     USE_REMOTE_CACHE
 #else
-    1 << MIN_CHUNK_BITS
+    MIN_CHUNK_SIZE
 #endif
     ;
 
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds/combininglock.h b/3rdparty/exported/snmalloc/src/snmalloc/ds/combininglock.h
new file mode 100644
index 000000000000..89a4bc258e0b
--- /dev/null
+++ b/3rdparty/exported/snmalloc/src/snmalloc/ds/combininglock.h
@@ -0,0 +1,296 @@
+#pragma once
+
+#include "../aal/aal.h"
+#include "../pal/pal.h"
+
+#include <atomic>
+
+namespace snmalloc
+{
+  class CombiningLockNode;
+
+  struct CombiningLock
+  {
+    // Fast path lock incase there is no contention.
+    std::atomic<bool> flag{false};
+
+    // MCS queue of work items
+    std::atomic<CombiningLockNode*> last{nullptr};
+
+    void release()
+    {
+      flag.store(false, std::memory_order_release);
+    }
+  };
+
+  /**
+   * @brief Combinations of MCS queue lock with Flat Combining
+   *
+   * Each element in the queue has a pointer to a work item.
+   * This means when under contention the thread holding the lock
+   * can perform the work.
+   *
+   * As the work items are arbitrary lambdas there are no simplifications
+   * for combining related work items.  I.e. original Flat Combining paper
+   * might sort a collection of inserts, and perform them in a single traversal.
+   *
+   * Note that, we should perhaps add a Futex/WakeOnAddress mode to improve
+   * performance in the contended case, rather than spinning.
+   */
+  class CombiningLockNode
+  {
+    template<typename Pal>
+    static constexpr bool use_wait_on_address =
+      pal_supports<PalFeatures::WaitOnAddress, Pal> &&
+      SNMALLOC_USE_WAIT_ON_ADDRESS;
+
+    template<bool HasWaitOnAddress, typename Pal>
+    struct WaitWordTypeSelect;
+
+    template<typename Pal>
+    struct WaitWordTypeSelect<true, Pal>
+    {
+      using type = typename Pal::WaitingWord;
+    };
+
+    template<typename Pal>
+    struct WaitWordTypeSelect<false, Pal>
+    {
+      using type = int;
+    };
+
+    using WaitingWordType =
+      typename WaitWordTypeSelect<use_wait_on_address<DefaultPal>, DefaultPal>::
+        type;
+
+    template<typename F>
+    friend class CombiningLockNodeTempl;
+
+    enum class LockStatus : WaitingWordType
+    {
+      // The work for this node has not been completed.
+      WAITING,
+
+      // The work for this thread has been completed, and it is not the
+      // last element in the queue.
+      DONE,
+
+      // The work for this thread has not been completed, and it is the
+      // head of the queue.
+      HEAD,
+
+      // The waiter is currently sleeping.
+      SLEEPING
+    };
+
+    // Status of the queue, set by the thread at the head of the queue,
+    // When it makes the thread for this node either the head of the queue
+    // or completes its work.
+    std::atomic<LockStatus> status{LockStatus::WAITING};
+
+    // Used to store the queue
+    std::atomic<CombiningLockNode*> next{nullptr};
+
+    // Stores the C++ lambda associated with this node in the queue.
+    void (*f_raw)(CombiningLockNode*);
+
+    constexpr CombiningLockNode(void (*f)(CombiningLockNode*)) : f_raw(f) {}
+
+    void set_status(LockStatus s)
+    {
+      status.store(s, std::memory_order_release);
+    }
+
+    template<typename Pal = DefaultPal>
+    static void wake(CombiningLockNode* node, LockStatus message)
+    {
+      if constexpr (!use_wait_on_address<Pal>)
+      {
+        node->set_status(message);
+      }
+      else
+      {
+        if (
+          node->status.exchange(message, std::memory_order_acq_rel) ==
+          LockStatus::SLEEPING)
+        {
+          Pal::notify_one_on_address(node->status);
+        }
+      }
+    }
+
+    template<typename Pal = DefaultPal>
+    void wait()
+    {
+      if constexpr (!use_wait_on_address<Pal>)
+      {
+        while (status.load(std::memory_order_acquire) == LockStatus::WAITING)
+          Aal::pause();
+      }
+      else
+      {
+        int remaining = 100;
+        while (remaining > 0)
+        {
+          if (status.load(std::memory_order_acquire) != LockStatus::WAITING)
+            return;
+          Aal::pause();
+          remaining--;
+        }
+        LockStatus expected = LockStatus::WAITING;
+        if (status.compare_exchange_strong(
+              expected, LockStatus::SLEEPING, std::memory_order_acq_rel))
+        {
+          Pal::wait_on_address(status, LockStatus::SLEEPING);
+        }
+      }
+    }
+
+    SNMALLOC_SLOW_PATH void attach_slow(CombiningLock& lock)
+    {
+      // There is contention for the lock, we need to add our work to the
+      // queue of pending work
+      auto prev = lock.last.exchange(this, std::memory_order_acq_rel);
+
+      if (prev != nullptr)
+      {
+        // If we aren't the head, link into predecessor
+        prev->next.store(this, std::memory_order_release);
+
+        // Wait to for predecessor to complete
+        wait();
+
+        // Determine if another thread completed our work.
+        if (status.load(std::memory_order_acquire) == LockStatus::DONE)
+          return;
+      }
+      else
+      {
+        // We are the head of the queue. Spin until we acquire the fast path
+        // lock.  As we are in the queue future requests shouldn't try to
+        // acquire the fast path lock, but stale views of the queue being empty
+        // could still be concurrent with this thread.
+        while (lock.flag.exchange(true, std::memory_order_acquire))
+        {
+          while (lock.flag.load(std::memory_order_relaxed))
+          {
+            Aal::pause();
+          }
+        }
+
+        // We could set
+        //    status = LockStatus::HEAD
+        // However, the subsequent state assumes it is HEAD, and
+        // nothing would read it.
+      }
+
+      // We are the head of the queue, and responsible for
+      // waking/performing our and subsequent work.
+      auto curr = this;
+      while (true)
+      {
+        // Start pulling in the next element of the queue
+        auto n = curr->next.load(std::memory_order_acquire);
+        Aal::prefetch(n);
+
+        // Perform work for head of the queue
+        curr->f_raw(curr);
+
+        // Determine if there are more elements.
+        n = curr->next.load(std::memory_order_acquire);
+        if (n == nullptr)
+          break;
+        // Signal this work was completed and move on to
+        // next item.
+        wake(curr, LockStatus::DONE);
+        curr = n;
+      }
+
+      // This could be the end of the queue, attempt to close the
+      // queue.
+      auto curr_c = curr;
+      if (lock.last.compare_exchange_strong(
+            curr_c,
+            nullptr,
+            std::memory_order_release,
+            std::memory_order_relaxed))
+      {
+        // Queue was successfully closed.
+        // Notify last element the work was completed.
+        wake(curr, LockStatus::DONE);
+        lock.release();
+        return;
+      }
+
+      // Failed to close the queue wait for next thread to be
+      // added.
+      while (curr->next.load(std::memory_order_relaxed) == nullptr)
+        Aal::pause();
+
+      auto n = curr->next.load(std::memory_order_acquire);
+
+      // As we had to wait, give the job to the next thread
+      // to carry on performing the work.
+      wake(n, LockStatus::HEAD);
+
+      // Notify the thread that we completed its work.
+      // Note that this needs to be before setting curr->status,
+      // as after the status is set the thread may deallocate the
+      // queue node.
+      wake(curr, LockStatus::DONE);
+      return;
+    }
+  };
+
+  template<typename F>
+  class CombiningLockNodeTempl : CombiningLockNode
+  {
+    template<typename FF>
+    friend void with(CombiningLock&, FF&&);
+
+    // This holds the closure for the lambda
+    F f;
+
+    CombiningLockNodeTempl(CombiningLock& lock, F&& f_)
+    : CombiningLockNode([](CombiningLockNode* self) {
+        CombiningLockNodeTempl* self_templ =
+          reinterpret_cast<CombiningLockNodeTempl*>(self);
+        self_templ->f();
+      }),
+      f(std::forward<F>(f_))
+    {
+      attach_slow(lock);
+    }
+  };
+
+  /**
+   * Lock primitive. This takes a reference to a Lock, and a thunk to
+   * call when the lock is available.  The thunk should be independent of
+   * the current thread as the thunk may be executed by a different thread.
+   */
+  template<typename F>
+  inline void with(CombiningLock& lock, F&& f)
+  {
+    // Test if no one is waiting
+    if (SNMALLOC_LIKELY(lock.last.load(std::memory_order_relaxed) == nullptr))
+    {
+      // No one was waiting so low contention. Attempt to acquire the flag
+      // lock.
+      if (SNMALLOC_LIKELY(
+            lock.flag.exchange(true, std::memory_order_acquire) == false))
+      {
+        // We grabbed the lock.
+        // Execute the thunk.
+        f();
+
+        // Release the lock
+        lock.release();
+        return;
+      }
+    }
+
+    // There is contention for the lock, we need to take the slow path
+    // with the queue.
+    CombiningLockNodeTempl<F> node(lock, std::forward<F>(f));
+  }
+} // namespace snmalloc
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds/ds.h b/3rdparty/exported/snmalloc/src/snmalloc/ds/ds.h
index 4cfa22b9b9d3..a26eb20dec9f 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/ds/ds.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/ds/ds.h
@@ -6,6 +6,7 @@
 #include "../pal/pal.h"
 #include "aba.h"
 #include "allocconfig.h"
+#include "combininglock.h"
 #include "entropy.h"
 #include "flaglock.h"
 #include "mpmcstack.h"
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds/flaglock.h b/3rdparty/exported/snmalloc/src/snmalloc/ds/flaglock.h
index 4a539e636078..5463504858f3 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/ds/flaglock.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/ds/flaglock.h
@@ -4,7 +4,6 @@
 #include "../pal/pal.h"
 
 #include <atomic>
-#include <functional>
 
 namespace snmalloc
 {
@@ -93,7 +92,9 @@ namespace snmalloc
     {}
 
     void set_owner() {}
+
     void clear_owner() {}
+
     void assert_not_owned_by_current_thread() {}
   };
 
@@ -133,4 +134,11 @@ namespace snmalloc
       lock.flag.store(false, std::memory_order_release);
     }
   };
+
+  template<typename F>
+  inline void with(FlagWord& lock, F&& f)
+  {
+    FlagLock l(lock);
+    f();
+  }
 } // namespace snmalloc
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds/mpmcstack.h b/3rdparty/exported/snmalloc/src/snmalloc/ds/mpmcstack.h
index cd005e9bf00a..e6a3b1d9f604 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/ds/mpmcstack.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/ds/mpmcstack.h
@@ -4,12 +4,6 @@
 #include "aba.h"
 #include "allocconfig.h"
 
-#if defined(__has_feature)
-#  if __has_feature(thread_sanitizer)
-#    define SNMALLOC_THREAD_SANITIZER_ENABLED
-#  endif
-#endif
-
 namespace snmalloc
 {
   template<class T, Construction c = RequiresInit>
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds/pagemap.h b/3rdparty/exported/snmalloc/src/snmalloc/ds/pagemap.h
index 267fe9a0b30c..d8636f67f011 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/ds/pagemap.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/ds/pagemap.h
@@ -1,5 +1,7 @@
 #pragma once
 
+#include "../ds_core/ds_core.h"
+
 namespace snmalloc
 {
   /**
@@ -66,6 +68,10 @@ namespace snmalloc
       auto page_end = pointer_align_up<OS_PAGE_SIZE, char>(last);
       size_t using_size = pointer_diff(page_start, page_end);
       PAL::template notify_using<NoZero>(page_start, using_size);
+      if constexpr (pal_supports<CoreDump, PAL>)
+      {
+        PAL::notify_do_dump(page_start, using_size);
+      }
     }
 
     constexpr FlatPagemap() = default;
@@ -179,11 +185,23 @@ namespace snmalloc
       // Allocate a power of two extra to allow the placement of the
       // pagemap be difficult to guess if randomize_position set.
       size_t additional_size =
+#ifdef SNMALLOC_THREAD_SANITIZER_ENABLED
+        // When running with TSAN we failed to allocate the very large range
+        // randomly
+        randomize_position ? bits::next_pow2(REQUIRED_SIZE) : 0;
+#else
         randomize_position ? bits::next_pow2(REQUIRED_SIZE) * 4 : 0;
+#endif
       size_t request_size = REQUIRED_SIZE + additional_size;
 
       auto new_body_untyped = PAL::reserve(request_size);
 
+      if constexpr (pal_supports<CoreDump, PAL>)
+      {
+        // Pagemap should not be in core dump except where it is non-zero.
+        PAL::notify_do_not_dump(new_body_untyped, request_size);
+      }
+
       if (new_body_untyped == nullptr)
       {
         PAL::error("Failed to initialise snmalloc.");
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds/singleton.h b/3rdparty/exported/snmalloc/src/snmalloc/ds/singleton.h
index c85635d39f24..174128e77e67 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/ds/singleton.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/ds/singleton.h
@@ -3,9 +3,7 @@
 #include "../ds_core/ds_core.h"
 #include "flaglock.h"
 
-#include <array>
 #include <atomic>
-#include <string_view>
 #include <type_traits>
 
 namespace snmalloc
@@ -35,14 +33,15 @@ namespace snmalloc
 
       if (SNMALLOC_UNLIKELY(!initialised.load(std::memory_order_acquire)))
       {
-        FlagLock lock(flag);
-        if (!initialised)
-        {
-          init(&obj);
-          initialised.store(true, std::memory_order_release);
-          if (first != nullptr)
-            *first = true;
-        }
+        with(flag, [&]() {
+          if (!initialised)
+          {
+            init(&obj);
+            initialised.store(true, std::memory_order_release);
+            if (first != nullptr)
+              *first = true;
+          }
+        });
       }
       return obj;
     }
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/bits.h b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/bits.h
index b82ee846e318..b192c8275239 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/bits.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/bits.h
@@ -45,11 +45,12 @@ namespace snmalloc
     static constexpr size_t BITS = sizeof(size_t) * CHAR_BIT;
 
     /**
-     * Returns a value of type T that has a single bit set,
+     * Returns a value of type T that has a single bit set at the given index,
+     * with 0 being the least significant bit.
      *
-     * S is a template parameter because callers use either `int` or `size_t`
-     * and either is valid to represent a number in the range 0-63 (or 0-127 if
-     * we want to use `__uint128_t` as `T`).
+     * S, the type of the bit index, is a template parameter because callers
+     * use either `int` or `size_t` and either is valid to represent a number in
+     * the range 0-63 (or 0-127 if we want to use `__uint128_t` as `T`).
      */
     template<typename T = size_t, typename S>
     constexpr T one_at_bit(S shift)
@@ -59,6 +60,19 @@ namespace snmalloc
       return (static_cast<T>(1)) << shift;
     }
 
+    /**
+     * Returns a value of type T that has its n LSBs all set.
+     *
+     * S is a template parameter because callers use either `int` or `size_t`
+     * and either is valid to represent a number in the range 0-63 (or 0-127 if
+     * we want to use `__uint128_t` as `T`).
+     */
+    template<typename T = size_t, typename S>
+    constexpr T mask_bits(S n)
+    {
+      return one_at_bit<T>(n) - 1;
+    }
+
     inline SNMALLOC_FAST_PATH size_t clz(size_t x)
     {
       SNMALLOC_ASSERT(x != 0); // Calling with 0 is UB on some implementations
@@ -158,7 +172,11 @@ namespace snmalloc
       SNMALLOC_ASSERT(x != 0); // Calling with 0 is UB on some implementations
 
 #if defined(_MSC_VER) && !defined(__clang__)
-#  ifdef _WIN64
+#  if defined(_M_ARM64) || defined(_M_ARM64EC)
+      unsigned long n = 0;
+      _BitScanForward64(&n, static_cast<unsigned __int64>(x));
+      return static_cast<size_t>(n);
+#  elif defined(_WIN64)
       return _tzcnt_u64(static_cast<unsigned __int64>(x));
 #  else
       return _tzcnt_u32(static_cast<unsigned int>(x));
@@ -203,7 +221,12 @@ namespace snmalloc
       overflow = __builtin_mul_overflow(x, y, &prod);
       return prod;
 #elif defined(_MSC_VER)
-#  ifdef _WIN64
+#  if defined(_M_ARM64) || defined(_M_ARM64EC)
+      size_t high_prod = __umulh(x, y);
+      size_t prod = x * y;
+      overflow = high_prod != 0;
+      return prod;
+#  elif defined(_WIN64)
       size_t high_prod;
       size_t prod = _umul128(x, y, &high_prod);
       overflow = high_prod != 0;
@@ -313,27 +336,11 @@ namespace snmalloc
      *
      * Does not work for value=0.
      ***********************************************/
-    template<size_t MANTISSA_BITS, size_t LOW_BITS = 0>
-    static size_t to_exp_mant(size_t value)
-    {
-      constexpr size_t LEADING_BIT = one_at_bit(MANTISSA_BITS + LOW_BITS) >> 1;
-      constexpr size_t MANTISSA_MASK = one_at_bit(MANTISSA_BITS) - 1;
-
-      value = value - 1;
-
-      size_t e =
-        bits::BITS - MANTISSA_BITS - LOW_BITS - clz(value | LEADING_BIT);
-      size_t b = (e == 0) ? 0 : 1;
-      size_t m = (value >> (LOW_BITS + e - b)) & MANTISSA_MASK;
-
-      return (e << MANTISSA_BITS) + m;
-    }
-
     template<size_t MANTISSA_BITS, size_t LOW_BITS = 0>
     constexpr size_t to_exp_mant_const(size_t value)
     {
       constexpr size_t LEADING_BIT = one_at_bit(MANTISSA_BITS + LOW_BITS) >> 1;
-      constexpr size_t MANTISSA_MASK = one_at_bit(MANTISSA_BITS) - 1;
+      constexpr size_t MANTISSA_MASK = mask_bits(MANTISSA_BITS);
 
       value = value - 1;
 
@@ -351,7 +358,7 @@ namespace snmalloc
       if (MANTISSA_BITS > 0)
       {
         m_e = m_e + 1;
-        constexpr size_t MANTISSA_MASK = one_at_bit(MANTISSA_BITS) - 1;
+        constexpr size_t MANTISSA_MASK = mask_bits(MANTISSA_BITS);
         size_t m = m_e & MANTISSA_MASK;
         size_t e = m_e >> MANTISSA_BITS;
         size_t b = e == 0 ? 0 : 1;
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/defines.h b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/defines.h
index 2de53be036e8..d50939ad00e4 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/defines.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/defines.h
@@ -17,7 +17,7 @@
  * `inline` and complains if you specify `SNMALLOC_FAST_PATH` and `inline`.
  */
 #  define SNMALLOC_FAST_PATH_INLINE ALWAYSINLINE
-#  if _MSC_VER >= 1927 && !defined(SNMALLOC_USE_CXX17)
+#  if _MSC_VER >= 1927 && _MSVC_LANG > 201703L
 #    define SNMALLOC_FAST_PATH_LAMBDA [[msvc::forceinline]]
 #  else
 #    define SNMALLOC_FAST_PATH_LAMBDA
@@ -27,11 +27,6 @@
 #  define SNMALLOC_REQUIRE_CONSTINIT
 #  define SNMALLOC_UNUSED_FUNCTION
 #  define SNMALLOC_USED_FUNCTION
-#  ifdef SNMALLOC_USE_CXX17
-#    define SNMALLOC_NO_UNIQUE_ADDRESS
-#  else
-#    define SNMALLOC_NO_UNIQUE_ADDRESS [[msvc::no_unique_address]]
-#  endif
 #else
 #  define SNMALLOC_FAST_FAIL() __builtin_trap()
 #  define SNMALLOC_LIKELY(x) __builtin_expect(!!(x), 1)
@@ -55,11 +50,6 @@
 #  define SNMALLOC_COLD __attribute__((cold))
 #  define SNMALLOC_UNUSED_FUNCTION __attribute((unused))
 #  define SNMALLOC_USED_FUNCTION __attribute((used))
-#  ifdef SNMALLOC_USE_CXX17
-#    define SNMALLOC_NO_UNIQUE_ADDRESS
-#  else
-#    define SNMALLOC_NO_UNIQUE_ADDRESS [[no_unique_address]]
-#  endif
 #  ifdef __clang__
 #    define SNMALLOC_REQUIRE_CONSTINIT \
       [[clang::require_constant_initialization]]
@@ -68,6 +58,27 @@
 #  endif
 #endif
 
+/*
+ * Try to find the right "no_unique_address" attribute for our use, assuming one
+ * exists.
+ *
+ * Different compiler versions and ABIs make this a right pain; see, for
+ * example, https://github.com/llvm/llvm-project/issues/49358 and
+ * https://devblogs.microsoft.com/cppblog/msvc-cpp20-and-the-std-cpp20-switch/ .
+ */
+#if defined(__has_cpp_attribute)
+#  if __has_cpp_attribute(msvc::no_unique_address) && \
+    (__cplusplus >= 201803L || _MSVC_LANG >= 201803L)
+#    define SNMALLOC_NO_UNIQUE_ADDRESS [[msvc::no_unique_address]]
+#  elif __has_cpp_attribute(no_unique_address)
+#    define SNMALLOC_NO_UNIQUE_ADDRESS [[no_unique_address]]
+#  else
+#    define SNMALLOC_NO_UNIQUE_ADDRESS
+#  endif
+#else
+#  define SNMALLOC_NO_UNIQUE_ADDRESS
+#endif
+
 #if defined(__cpp_constinit) && __cpp_constinit >= 201907
 #  define SNMALLOC_CONSTINIT_FN constinit
 #  define SNMALLOC_CONSTINIT_STATIC constinit const
@@ -183,6 +194,15 @@ namespace snmalloc
 #  endif
 #endif
 
+// Used to suppress pattern filling for potentially unintialized variables with
+// automatic storage duration.
+// https://clang.llvm.org/docs/AttributeReference.html#uninitialized
+#ifdef __clang__
+#  define SNMALLOC_UNINITIALISED [[clang::uninitialized]]
+#else
+#  define SNMALLOC_UNINITIALISED
+#endif
+
 namespace snmalloc
 {
   /**
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/helpers.h b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/helpers.h
index 61fcee9545d6..aedb72f4b348 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/helpers.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/helpers.h
@@ -4,8 +4,8 @@
 
 #include <array>
 #include <atomic>
-#include <functional>
 #include <string_view>
+#include <tuple>
 #include <type_traits>
 
 namespace snmalloc
@@ -96,6 +96,7 @@ namespace snmalloc
    */
   template<typename Fn>
   struct function_ref;
+
   template<typename R, typename... Args>
   struct function_ref<R(Args...)>
   {
@@ -324,7 +325,7 @@ namespace snmalloc
       }
       std::array<char, 20> buf{{0}};
       const char digits[] = "0123456789";
-      for (long i = long(buf.size() - 1); i >= 0; i--)
+      for (long i = static_cast<long>(buf.size() - 1); i >= 0; i--)
       {
         buf[static_cast<size_t>(i)] = digits[s % 10];
         s /= 10;
@@ -356,7 +357,7 @@ namespace snmalloc
       const char hexdigits[] = "0123456789abcdef";
       // Length of string including null terminator
       static_assert(sizeof(hexdigits) == 0x11);
-      for (long i = long(buf.size() - 1); i >= 0; i--)
+      for (long i = static_cast<long>(buf.size() - 1); i >= 0; i--)
       {
         buf[static_cast<size_t>(i)] = hexdigits[s & 0xf];
         s >>= 4;
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/mitigations.h b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/mitigations.h
index 88547dcc7de4..2370f2951d0f 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/mitigations.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/mitigations.h
@@ -247,10 +247,10 @@ namespace snmalloc
      */
     full_checks + cheri_checks + clear_meta - freelist_forward_edge -
       pal_enforce_access :
-    /**
-     * clear_meta is important on CHERI to avoid leaking capabilities.
-     */
-    sanity_checks + cheri_checks + clear_meta;
+     /**
+      * clear_meta is important on CHERI to avoid leaking capabilities.
+      */
+     sanity_checks + cheri_checks + clear_meta;
 #else
     CHECK_CLIENT ? full_checks : no_checks;
 #endif
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/redblacktree.h b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/redblacktree.h
index df1fb9410661..77ca6e50d4ef 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/redblacktree.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/redblacktree.h
@@ -3,7 +3,6 @@
 #include <array>
 #include <cstddef>
 #include <cstdint>
-#include <string>
 
 namespace snmalloc
 {
@@ -17,11 +16,10 @@ namespace snmalloc
    * ID.
    */
   template<typename Rep>
-  concept RBRepTypes = requires()
-  {
-    typename Rep::Handle;
-    typename Rep::Contents;
-  };
+  concept RBRepTypes = requires() {
+                         typename Rep::Handle;
+                         typename Rep::Contents;
+                       };
 
   /**
    * The representation must define operations on the holder and contents
@@ -41,50 +39,38 @@ namespace snmalloc
    */
   template<typename Rep>
   concept RBRepMethods =
-    requires(typename Rep::Handle hp, typename Rep::Contents k, bool b)
-  {
-    {
-      Rep::get(hp)
-    }
-    ->ConceptSame<typename Rep::Contents>;
-    {
-      Rep::set(hp, k)
-    }
-    ->ConceptSame<void>;
-    {
-      Rep::is_red(k)
-    }
-    ->ConceptSame<bool>;
-    {
-      Rep::set_red(k, b)
-    }
-    ->ConceptSame<void>;
-    {
-      Rep::ref(b, k)
-    }
-    ->ConceptSame<typename Rep::Handle>;
-    {
-      Rep::null
-    }
-    ->ConceptSameModRef<const typename Rep::Contents>;
-    {
-      typename Rep::Handle
+    requires(typename Rep::Handle hp, typename Rep::Contents k, bool b) {
+      {
+        Rep::get(hp)
+        } -> ConceptSame<typename Rep::Contents>;
       {
-        const_cast<
+        Rep::set(hp, k)
+        } -> ConceptSame<void>;
+      {
+        Rep::is_red(k)
+        } -> ConceptSame<bool>;
+      {
+        Rep::set_red(k, b)
+        } -> ConceptSame<void>;
+      {
+        Rep::ref(b, k)
+        } -> ConceptSame<typename Rep::Handle>;
+      {
+        Rep::null
+        } -> ConceptSameModRef<const typename Rep::Contents>;
+      {
+        typename Rep::Handle{const_cast<
           std::remove_const_t<std::remove_reference_t<decltype(Rep::root)>>*>(
-          &Rep::root)
-      }
-    }
-    ->ConceptSame<typename Rep::Handle>;
-  };
+          &Rep::root)}
+        } -> ConceptSame<typename Rep::Handle>;
+    };
 
   template<typename Rep>
   concept RBRep = //
     RBRepTypes<Rep> //
-      && RBRepMethods<Rep> //
-        && ConceptSame<
-          decltype(Rep::null),
-          std::add_const_t<typename Rep::Contents>>;
+    && RBRepMethods<Rep> //
+    &&
+    ConceptSame<decltype(Rep::null), std::add_const_t<typename Rep::Contents>>;
 #endif
 
   /**
@@ -151,6 +137,7 @@ namespace snmalloc
       {
         return ptr != t.ptr;
       }
+
       ///@}
 
       bool is_null()
@@ -275,7 +262,7 @@ namespace snmalloc
       std::array<RBStep, 128> path;
       size_t length = 0;
 
-      RBPath(typename Rep::Handle root) : path{}
+      RBPath(typename Rep::Handle root)
       {
         path[0].set(root, false);
         length = 1;
@@ -452,9 +439,27 @@ namespace snmalloc
           depth);
         if (!(get_dir(true, curr).is_null() && get_dir(false, curr).is_null()))
         {
-          auto s_indent = std::string(indent);
-          print(get_dir(true, curr), (s_indent + "|").c_str(), depth + 1);
-          print(get_dir(false, curr), (s_indent + " ").c_str(), depth + 1);
+          // As the tree should be balanced, the depth should not exceed 128 if
+          // there are 2^64 elements in the tree. This is a debug feature, and
+          // it would be impossible to debug something of this size, so this is
+          // considerably larger than required.
+          // If there is a bug that leads to an unbalanced tree, this might be
+          // insufficient to accurately display the tree, but it will still be
+          // memory safe as the search code is bounded by the string size.
+          static constexpr size_t max_depth = 128;
+          char s_indent[max_depth];
+          size_t end = 0;
+          for (; end < max_depth - 1; end++)
+          {
+            if (indent[end] == 0)
+              break;
+            s_indent[end] = indent[end];
+          }
+          s_indent[end] = '|';
+          s_indent[end + 1] = 0;
+          print(get_dir(true, curr), s_indent, depth + 1);
+          s_indent[end] = ' ';
+          print(get_dir(false, curr), s_indent, depth + 1);
         }
       }
     }
@@ -490,8 +495,7 @@ namespace snmalloc
        */
       path.move(true);
       while (path.move(false))
-      {
-      }
+      {}
 
       K curr = path.curr();
 
@@ -510,8 +514,8 @@ namespace snmalloc
         // If we had a left child, replace ourselves with the extracted value
         // from above
         Rep::set_red(curr, Rep::is_red(splice));
-        get_dir(true, curr) = K(get_dir(true, splice));
-        get_dir(false, curr) = K(get_dir(false, splice));
+        get_dir(true, curr) = K{get_dir(true, splice)};
+        get_dir(false, curr) = K{get_dir(false, splice)};
         splice = curr;
         path.fixup();
       }
@@ -742,8 +746,7 @@ namespace snmalloc
 
       auto path = get_root_path();
       while (path.move(true))
-      {
-      }
+      {}
 
       K result = path.curr();
 
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/seqset.h b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/seqset.h
index 600ec07df0a5..e493fbba0b35 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/seqset.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/seqset.h
@@ -34,6 +34,9 @@ namespace snmalloc
       constexpr Node(Node* next, Node* prev) : next(next), prev(prev) {}
 
     public:
+      /// Default constructor, creates an invalid node.
+      constexpr Node() : Node(nullptr, nullptr) {}
+
       void invariant()
       {
         SNMALLOC_ASSERT(next != nullptr);
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/global/global.h b/3rdparty/exported/snmalloc/src/snmalloc/global/global.h
index a2f1159a14ee..514d69b7c61b 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/global/global.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/global/global.h
@@ -1,4 +1,5 @@
 #include "bounds_checks.h"
+#include "libc.h"
 #include "memcpy.h"
 #include "scopedalloc.h"
 #include "threadalloc.h"
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/global/libc.h b/3rdparty/exported/snmalloc/src/snmalloc/global/libc.h
new file mode 100644
index 000000000000..2a6db38a3892
--- /dev/null
+++ b/3rdparty/exported/snmalloc/src/snmalloc/global/libc.h
@@ -0,0 +1,191 @@
+#pragma once
+
+#include "threadalloc.h"
+
+#include <errno.h>
+#include <string.h>
+
+namespace snmalloc::libc
+{
+  SNMALLOC_SLOW_PATH inline void* set_error(int err = ENOMEM)
+  {
+    errno = err;
+    return nullptr;
+  }
+
+  SNMALLOC_SLOW_PATH inline int set_error_and_return(int err = ENOMEM)
+  {
+    errno = err;
+    return err;
+  }
+
+  inline void* __malloc_end_pointer(void* ptr)
+  {
+    return ThreadAlloc::get().external_pointer<OnePastEnd>(ptr);
+  }
+
+  SNMALLOC_FAST_PATH_INLINE void* malloc(size_t size)
+  {
+    return ThreadAlloc::get().alloc(size);
+  }
+
+  SNMALLOC_FAST_PATH_INLINE void free(void* ptr)
+  {
+    ThreadAlloc::get().dealloc(ptr);
+  }
+
+  SNMALLOC_FAST_PATH_INLINE void free_sized(void* ptr, size_t size)
+  {
+    ThreadAlloc::get().dealloc(ptr, size);
+  }
+
+  SNMALLOC_FAST_PATH_INLINE void* calloc(size_t nmemb, size_t size)
+  {
+    bool overflow = false;
+    size_t sz = bits::umul(size, nmemb, overflow);
+    if (SNMALLOC_UNLIKELY(overflow))
+    {
+      return set_error();
+    }
+    return ThreadAlloc::get().alloc<ZeroMem::YesZero>(sz);
+  }
+
+  SNMALLOC_FAST_PATH_INLINE void* realloc(void* ptr, size_t size)
+  {
+    auto& a = ThreadAlloc::get();
+    size_t sz = a.alloc_size(ptr);
+    // Keep the current allocation if the given size is in the same sizeclass.
+    if (sz == round_size(size))
+    {
+#ifdef SNMALLOC_PASS_THROUGH
+      // snmallocs alignment guarantees can be broken by realloc in pass-through
+      // this is not exercised, by existing clients, but is tested.
+      if (pointer_align_up(ptr, natural_alignment(size)) == ptr)
+        return ptr;
+#else
+      return ptr;
+#endif
+    }
+
+    void* p = a.alloc(size);
+    if (SNMALLOC_LIKELY(p != nullptr))
+    {
+      sz = bits::min(size, sz);
+      // Guard memcpy as GCC is assuming not nullptr for ptr after the memcpy
+      // otherwise.
+      if (SNMALLOC_UNLIKELY(sz != 0))
+      {
+        SNMALLOC_ASSUME(ptr != nullptr);
+        ::memcpy(p, ptr, sz);
+      }
+      a.dealloc(ptr);
+    }
+    else if (SNMALLOC_LIKELY(size == 0))
+    {
+      a.dealloc(ptr);
+    }
+    else
+    {
+      return set_error();
+    }
+    return p;
+  }
+
+  inline size_t malloc_usable_size(const void* ptr)
+  {
+    return ThreadAlloc::get().alloc_size(ptr);
+  }
+
+  inline void* reallocarray(void* ptr, size_t nmemb, size_t size)
+  {
+    bool overflow = false;
+    size_t sz = bits::umul(size, nmemb, overflow);
+    if (SNMALLOC_UNLIKELY(overflow))
+    {
+      return set_error();
+    }
+    return realloc(ptr, sz);
+  }
+
+  inline int reallocarr(void* ptr_, size_t nmemb, size_t size)
+  {
+    int err = errno;
+    auto& a = ThreadAlloc::get();
+    bool overflow = false;
+    size_t sz = bits::umul(size, nmemb, overflow);
+    if (SNMALLOC_UNLIKELY(sz == 0))
+    {
+      errno = err;
+      return 0;
+    }
+    if (SNMALLOC_UNLIKELY(overflow))
+    {
+      return set_error_and_return(EOVERFLOW);
+    }
+
+    void** ptr = reinterpret_cast<void**>(ptr_);
+    void* p = a.alloc(sz);
+    if (SNMALLOC_UNLIKELY(p == nullptr))
+    {
+      return set_error_and_return(ENOMEM);
+    }
+
+    sz = bits::min(sz, a.alloc_size(*ptr));
+
+    SNMALLOC_ASSUME(*ptr != nullptr || sz == 0);
+    // Guard memcpy as GCC is assuming not nullptr for ptr after the memcpy
+    // otherwise.
+    if (SNMALLOC_UNLIKELY(sz != 0))
+      ::memcpy(p, *ptr, sz);
+    errno = err;
+    a.dealloc(*ptr);
+    *ptr = p;
+    return 0;
+  }
+
+  inline void* memalign(size_t alignment, size_t size)
+  {
+    if (SNMALLOC_UNLIKELY(alignment == 0 || !bits::is_pow2(alignment)))
+    {
+      return set_error(EINVAL);
+    }
+
+    return malloc(aligned_size(alignment, size));
+  }
+
+  inline void* aligned_alloc(size_t alignment, size_t size)
+  {
+    return memalign(alignment, size);
+  }
+
+  inline int posix_memalign(void** memptr, size_t alignment, size_t size)
+  {
+    if (SNMALLOC_UNLIKELY(
+          (alignment < sizeof(uintptr_t) || !bits::is_pow2(alignment))))
+    {
+      return EINVAL;
+    }
+
+    void* p = memalign(alignment, size);
+    if (SNMALLOC_UNLIKELY(p == nullptr))
+    {
+      if (size != 0)
+        return ENOMEM;
+    }
+    *memptr = p;
+    return 0;
+  }
+
+  inline typename snmalloc::Alloc::Config::ClientMeta::DataRef
+  get_client_meta_data(void* p)
+  {
+    return ThreadAlloc::get().get_client_meta_data(p);
+  }
+
+  inline std::add_const_t<typename snmalloc::Alloc::Config::ClientMeta::DataRef>
+  get_client_meta_data_const(void* p)
+  {
+    return ThreadAlloc::get().get_client_meta_data_const(p);
+  }
+
+} // namespace snmalloc::libc
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/global/memcpy.h b/3rdparty/exported/snmalloc/src/snmalloc/global/memcpy.h
index f4996f6097eb..51a87270aca4 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/global/memcpy.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/global/memcpy.h
@@ -1,5 +1,4 @@
 #pragma once
-#include "../backend/globalconfig.h"
 #include "bounds_checks.h"
 
 namespace snmalloc
@@ -23,6 +22,7 @@ namespace snmalloc
     {
       char data[Size];
     };
+
     auto* d = static_cast<Block*>(dst);
     auto* s = static_cast<const Block*>(src);
     *d = *s;
@@ -192,7 +192,8 @@ namespace snmalloc
      * It's not entirely clear what we would do if this were not the case.
      * Best not think too hard about it now.
      */
-    static_assert(alignof(void*) == sizeof(void*));
+    static_assert(
+      alignof(void*) == sizeof(void*)); // NOLINT(misc-redundant-expression)
 
     static constexpr size_t LargestRegisterSize = 16;
 
@@ -254,6 +255,7 @@ namespace snmalloc
           {
             void* p[2];
           };
+
           if (sizeof(Ptr2) <= len)
           {
             auto dp = static_cast<Ptr2*>(dst);
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/global/scopedalloc.h b/3rdparty/exported/snmalloc/src/snmalloc/global/scopedalloc.h
index cb9f0fc8b1c0..345635a70aeb 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/global/scopedalloc.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/global/scopedalloc.h
@@ -1,5 +1,4 @@
 #pragma once
-#include "../backend/globalconfig.h"
 
 /**
  * This header requires that Alloc has been defined.
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/global/threadalloc.h b/3rdparty/exported/snmalloc/src/snmalloc/global/threadalloc.h
index d900fb27249b..7ba8ddd79ad4 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/global/threadalloc.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/global/threadalloc.h
@@ -1,7 +1,5 @@
 #pragma once
 
-#include "../backend/globalconfig.h"
-
 #if defined(SNMALLOC_EXTERNAL_THREAD_ALLOC)
 #  define SNMALLOC_THREAD_TEARDOWN_DEFINED
 #endif
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/backend_concept.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/backend_concept.h
index f0ed3964df9d..c7e76a15f067 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/mem/backend_concept.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/backend_concept.h
@@ -2,8 +2,10 @@
 
 #ifdef __cpp_concepts
 #  include "../ds/ds.h"
+#  include "sizeclasstable.h"
 
 #  include <cstddef>
+
 namespace snmalloc
 {
   /**
@@ -14,18 +16,15 @@ namespace snmalloc
    */
   template<typename Pagemap>
   concept IsReadablePagemap =
-    requires(address_t addr, size_t sz, const typename Pagemap::Entry& t)
-  {
-    {
-      Pagemap::template get_metaentry<true>(addr)
-    }
-    ->ConceptSame<const typename Pagemap::Entry&>;
-
-    {
-      Pagemap::template get_metaentry<false>(addr)
-    }
-    ->ConceptSame<const typename Pagemap::Entry&>;
-  };
+    requires(address_t addr, size_t sz, const typename Pagemap::Entry& t) {
+      {
+        Pagemap::template get_metaentry<true>(addr)
+        } -> ConceptSame<const typename Pagemap::Entry&>;
+
+      {
+        Pagemap::template get_metaentry<false>(addr)
+        } -> ConceptSame<const typename Pagemap::Entry&>;
+    };
 
   /**
    * The core of the static pagemap accessor interface: {get,set}_metadata.
@@ -36,24 +35,20 @@ namespace snmalloc
    * set_metadata updates the entry in the pagemap.
    */
   template<typename Pagemap>
-  concept IsWritablePagemap = IsReadablePagemap<Pagemap>&& requires(
-    address_t addr, size_t sz, const typename Pagemap::Entry& t)
-  {
-    {
-      Pagemap::template get_metaentry_mut<true>(addr)
-    }
-    ->ConceptSame<typename Pagemap::Entry&>;
-
-    {
-      Pagemap::template get_metaentry_mut<false>(addr)
-    }
-    ->ConceptSame<typename Pagemap::Entry&>;
-
-    {
-      Pagemap::set_metaentry(addr, sz, t)
-    }
-    ->ConceptSame<void>;
-  };
+  concept IsWritablePagemap = IsReadablePagemap<Pagemap> &&
+    requires(address_t addr, size_t sz, const typename Pagemap::Entry& t) {
+      {
+        Pagemap::template get_metaentry_mut<true>(addr)
+        } -> ConceptSame<typename Pagemap::Entry&>;
+
+      {
+        Pagemap::template get_metaentry_mut<false>(addr)
+        } -> ConceptSame<typename Pagemap::Entry&>;
+
+      {
+        Pagemap::set_metaentry(addr, sz, t)
+        } -> ConceptSame<void>;
+    };
 
   /**
    * The pagemap can also be told to commit backing storage for a range of
@@ -63,13 +58,11 @@ namespace snmalloc
    * which combines this and the core concept, above.
    */
   template<typename Pagemap>
-  concept IsPagemapWithRegister = requires(capptr::Arena<void> p, size_t sz)
-  {
-    {
-      Pagemap::register_range(p, sz)
-    }
-    ->ConceptSame<void>;
-  };
+  concept IsPagemapWithRegister = requires(capptr::Arena<void> p, size_t sz) {
+                                    {
+                                      Pagemap::register_range(p, sz)
+                                      } -> ConceptSame<void>;
+                                  };
 
   /**
    * The full pagemap accessor interface, with all of {get,set}_metadata and
@@ -81,7 +74,7 @@ namespace snmalloc
    */
   template<typename Pagemap>
   concept IsWritablePagemapWithRegister =
-    IsWritablePagemap<Pagemap>&& IsPagemapWithRegister<Pagemap>;
+    IsWritablePagemap<Pagemap> && IsPagemapWithRegister<Pagemap>;
 
   /**
    * The configuration also defines domestication (that is, the difference
@@ -91,62 +84,56 @@ namespace snmalloc
    */
   template<typename Config>
   concept IsConfigDomestication =
-    requires(typename Config::LocalState* ls, capptr::AllocWild<void> ptr)
-  {
-    {
-      Config::capptr_domesticate(ls, ptr)
-    }
-    ->ConceptSame<capptr::Alloc<void>>;
-
-    {
-      Config::capptr_domesticate(ls, ptr.template as_static<char>())
-    }
-    ->ConceptSame<capptr::Alloc<char>>;
-  };
+    requires(typename Config::LocalState* ls, capptr::AllocWild<void> ptr) {
+      {
+        Config::capptr_domesticate(ls, ptr)
+        } -> ConceptSame<capptr::Alloc<void>>;
+
+      {
+        Config::capptr_domesticate(ls, ptr.template as_static<char>())
+        } -> ConceptSame<capptr::Alloc<char>>;
+    };
 
   class CommonConfig;
   struct Flags;
 
   template<typename LocalState, typename PagemapEntry, typename Backend>
   concept IsBackend =
-    requires(LocalState& local_state, size_t size, uintptr_t ras)
-  {
-    {
-      Backend::alloc_chunk(local_state, size, ras)
-    }
-    ->ConceptSame<
-      std::pair<capptr::Chunk<void>, typename Backend::SlabMetadata*>>;
-  }
-  &&requires(LocalState* local_state, size_t size)
-  {
-    {
-      Backend::template alloc_meta_data<void*>(local_state, size)
-    }
-    ->ConceptSame<capptr::Alloc<void>>;
-  }
-  &&requires(
-    LocalState& local_state,
-    typename Backend::SlabMetadata& slab_metadata,
-    capptr::Alloc<void> alloc,
-    size_t size)
-  {
-    {
-      Backend::dealloc_chunk(local_state, slab_metadata, alloc, size)
-    }
-    ->ConceptSame<void>;
-  }
-  &&requires(address_t p)
-  {
-    {
-      Backend::template get_metaentry<true>(p)
-    }
-    ->ConceptSame<const PagemapEntry&>;
-
-    {
-      Backend::template get_metaentry<false>(p)
-    }
-    ->ConceptSame<const PagemapEntry&>;
-  };
+    requires(
+      LocalState& local_state,
+      size_t size,
+      uintptr_t ras,
+      sizeclass_t sizeclass) {
+      {
+        Backend::alloc_chunk(local_state, size, ras, sizeclass)
+        } -> ConceptSame<
+          std::pair<capptr::Chunk<void>, typename Backend::SlabMetadata*>>;
+    } &&
+    requires(LocalState* local_state, size_t size) {
+      {
+        Backend::template alloc_meta_data<void*>(local_state, size)
+        } -> ConceptSame<capptr::Alloc<void>>;
+    } &&
+    requires(
+      LocalState& local_state,
+      typename Backend::SlabMetadata& slab_metadata,
+      capptr::Alloc<void> alloc,
+      size_t size,
+      sizeclass_t sizeclass) {
+      {
+        Backend::dealloc_chunk(
+          local_state, slab_metadata, alloc, size, sizeclass)
+        } -> ConceptSame<void>;
+    } &&
+    requires(address_t p) {
+      {
+        Backend::template get_metaentry<true>(p)
+        } -> ConceptSame<const PagemapEntry&>;
+
+      {
+        Backend::template get_metaentry<false>(p)
+        } -> ConceptSame<const PagemapEntry&>;
+    };
 
   /**
    * Config objects of type T must obey a number of constraints.  They
@@ -161,38 +148,39 @@ namespace snmalloc
    *
    */
   template<typename Config>
-  concept IsConfig = std::is_base_of<CommonConfig, Config>::value&&
-    IsPAL<typename Config::Pal>&& IsBackend<
-      typename Config::LocalState,
-      typename Config::PagemapEntry,
-      typename Config::Backend>&& requires()
-  {
-    typename Config::LocalState;
-    typename Config::Backend;
-    typename Config::PagemapEntry;
-
-    {
-      Config::Options
-    }
-    ->ConceptSameModRef<const Flags>;
-  }
-  &&(
+  concept IsConfig = std::is_base_of<CommonConfig, Config>::value &&
+    IsPAL<typename Config::Pal> &&
+    IsBackend<typename Config::LocalState,
+              typename Config::PagemapEntry,
+              typename Config::Backend> &&
     requires() {
-      Config::Options.CoreAllocIsPoolAllocated == true;
-      typename Config::GlobalPoolState;
+      typename Config::LocalState;
+      typename Config::Backend;
+      typename Config::PagemapEntry;
+
       {
-        Config::pool()
-      }
-      ->ConceptSame<typename Config::GlobalPoolState&>;
-    } ||
-    requires() { Config::Options.CoreAllocIsPoolAllocated == false; });
+        Config::Options
+        } -> ConceptSameModRef<const Flags>;
+    } &&
+    (
+                       requires() {
+                         Config::Options.CoreAllocIsPoolAllocated == true;
+                         typename Config::GlobalPoolState;
+                         {
+                           Config::pool()
+                           } -> ConceptSame<typename Config::GlobalPoolState&>;
+                       } ||
+                       requires() {
+                         Config::Options.CoreAllocIsPoolAllocated == false;
+                       });
 
   /**
    * The lazy version of the above; please see ds_core/concept.h and use
    * sparingly.
    */
   template<typename Config>
-  concept IsConfigLazy = !is_type_complete_v<Config> || IsConfig<Config>;
+  concept IsConfigLazy = !
+  is_type_complete_v<Config> || IsConfig<Config>;
 
 } // namespace snmalloc
 
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/corealloc.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/corealloc.h
index c7fc79b72452..5b0381805839 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/mem/corealloc.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/corealloc.h
@@ -48,6 +48,7 @@ namespace snmalloc
      */
     using BackendSlabMetadata = typename Config::Backend::SlabMetadata;
     using PagemapEntry = typename Config::PagemapEntry;
+
     /// }@
 
     /**
@@ -104,7 +105,7 @@ namespace snmalloc
      * This is the thread local structure associated to this
      * allocator.
      */
-    LocalCache* attached_cache;
+    LocalCache<Config>* attached_cache;
 
     /**
      * Ticker to query the clock regularly at a lower cost.
@@ -186,7 +187,7 @@ namespace snmalloc
     {
       auto slab_end = pointer_offset(bumpptr, slab_size + 1 - rsize);
 
-      auto& key = entropy.get_free_list_key();
+      auto key_tweak = meta->as_key_tweak();
 
       auto& b = meta->free_queue;
 
@@ -197,6 +198,7 @@ namespace snmalloc
         {
           capptr::AllocFull<PreAllocObject> next;
         };
+
         // The following code implements Sattolo's algorithm for generating
         // random cyclic permutations.  This implementation is in the opposite
         // direction, so that the original space does not need initialising.
@@ -237,13 +239,15 @@ namespace snmalloc
         auto curr_ptr = start_ptr;
         do
         {
+          auto next_ptr = curr_ptr->next;
           b.add(
             // Here begins our treatment of the heap as containing Wild pointers
             freelist::Object::make<capptr::bounds::AllocWild>(
               capptr_to_user_address_control(curr_ptr.as_void())),
-            key,
+            freelist::Object::key_root,
+            key_tweak,
             entropy);
-          curr_ptr = curr_ptr->next;
+          curr_ptr = next_ptr;
         } while (curr_ptr != start_ptr);
       }
       else
@@ -257,7 +261,8 @@ namespace snmalloc
               capptr_to_user_address_control(
                 Aal::capptr_bound<void, capptr::bounds::AllocFull>(
                   p.as_void(), rsize))),
-            key,
+            freelist::Object::key_root,
+            key_tweak,
             entropy);
           p = pointer_offset(p, rsize);
         } while (p < slab_end);
@@ -269,17 +274,18 @@ namespace snmalloc
     capptr::Alloc<void>
     clear_slab(BackendSlabMetadata* meta, smallsizeclass_t sizeclass)
     {
-      auto& key = entropy.get_free_list_key();
+      auto key_tweak = meta->as_key_tweak();
       freelist::Iter<> fl;
-      auto more = meta->free_queue.close(fl, key);
+      auto more =
+        meta->free_queue.close(fl, freelist::Object::key_root, key_tweak);
       UNUSED(more);
       auto local_state = backend_state_ptr();
       auto domesticate = [local_state](freelist::QueuePtr p)
                            SNMALLOC_FAST_PATH_LAMBDA {
                              return capptr_domesticate<Config>(local_state, p);
                            };
-      capptr::Alloc<void> p =
-        finish_alloc_no_zero(fl.take(key, domesticate), sizeclass);
+      capptr::Alloc<void> p = finish_alloc_no_zero(
+        fl.take(freelist::Object::key_root, domesticate), sizeclass);
 
       // If clear_meta is requested, we should also walk the free list to clear
       // it.
@@ -293,7 +299,7 @@ namespace snmalloc
         size_t count = 1; // Already taken one above.
         while (!fl.empty())
         {
-          fl.take(key, domesticate);
+          fl.take(freelist::Object::key_root, domesticate);
           count++;
         }
         // Check the list contains all the elements
@@ -303,13 +309,14 @@ namespace snmalloc
 
         if (more > 0)
         {
-          auto no_more = meta->free_queue.close(fl, key);
+          auto no_more =
+            meta->free_queue.close(fl, freelist::Object::key_root, key_tweak);
           SNMALLOC_ASSERT(no_more == 0);
           UNUSED(no_more);
 
           while (!fl.empty())
           {
-            fl.take(key, domesticate);
+            fl.take(freelist::Object::key_root, domesticate);
             count++;
           }
         }
@@ -321,7 +328,7 @@ namespace snmalloc
 
 #ifdef SNMALLOC_TRACING
       message<1024>(
-        "Slab {}  is unused, Object sizeclass {}",
+        "Slab {} is unused, Object sizeclass {}",
         start_of_slab.unsafe_ptr(),
         sizeclass);
 #endif
@@ -348,7 +355,8 @@ namespace snmalloc
         {
           if (check_slabs)
           {
-            meta->free_queue.validate(entropy.get_free_list_key(), domesticate);
+            meta->free_queue.validate(
+              freelist::Object::key_root, meta->as_key_tweak(), domesticate);
           }
           return;
         }
@@ -368,47 +376,25 @@ namespace snmalloc
           get_backend_local_state(),
           *meta,
           start,
-          sizeclass_to_slab_size(sizeclass));
+          sizeclass_to_slab_size(sizeclass),
+          sizeclass_t::from_small_class(sizeclass));
       });
     }
 
     /**
-     * Slow path for deallocating an object locally.
-     * This is either waking up a slab that was not actively being used
-     * by this thread, or handling the final deallocation onto a slab,
-     * so it can be reused by other threads.
+     * Very slow path for object deallocation.
+     *
+     * The object has already been returned to the slab, so all that is left to
+     * do is update its metadata and, if that pushes us into having too many
+     * unused slabs in this size class, return some.
+     *
+     * Also while here, check the time.
      */
-    SNMALLOC_SLOW_PATH void
-    dealloc_local_object_slow(capptr::Alloc<void> p, const PagemapEntry& entry)
+    SNMALLOC_SLOW_PATH void dealloc_local_object_meta(
+      const PagemapEntry& entry, BackendSlabMetadata* meta)
     {
-      // TODO: Handle message queue on this path?
-
-      auto* meta = entry.get_slab_metadata();
-
-      if (meta->is_large())
-      {
-        // Handle large deallocation here.
-        size_t entry_sizeclass = entry.get_sizeclass().as_large();
-        size_t size = bits::one_at_bit(entry_sizeclass);
-
-#ifdef SNMALLOC_TRACING
-        message<1024>("Large deallocation: {}", size);
-#else
-        UNUSED(size);
-#endif
-
-        // Remove from set of fully used slabs.
-        meta->node.remove();
-
-        Config::Backend::dealloc_chunk(
-          get_backend_local_state(), *meta, p, size);
-
-        return;
-      }
-
       smallsizeclass_t sizeclass = entry.get_sizeclass().as_small();
 
-      UNUSED(entropy);
       if (meta->is_sleeping())
       {
         // Slab has been woken up add this to the list of slabs with free space.
@@ -444,25 +430,76 @@ namespace snmalloc
       ticker.check_tick();
     }
 
+    /**
+     * Slow path for deallocating an object locally.
+     * This is either waking up a slab that was not actively being used
+     * by this thread, or handling the final deallocation onto a slab,
+     * so it can be reused by other threads.
+     *
+     * Live large objects look like slabs that need attention when they become
+     * free; that attention is also given here.
+     */
+    SNMALLOC_SLOW_PATH void dealloc_local_object_slow(
+      capptr::Alloc<void> p,
+      const PagemapEntry& entry,
+      BackendSlabMetadata* meta)
+    {
+      // TODO: Handle message queue on this path?
+
+      if (meta->is_large())
+      {
+        // Handle large deallocation here.
+
+        // XXX: because large objects have unique metadata associated with them,
+        // the ring size here is one.  We should probably assert that.
+
+        size_t entry_sizeclass = entry.get_sizeclass().as_large();
+        size_t size = bits::one_at_bit(entry_sizeclass);
+
+#ifdef SNMALLOC_TRACING
+        message<1024>("Large deallocation: {}", size);
+#else
+        UNUSED(size);
+#endif
+
+        // Remove from set of fully used slabs.
+        meta->node.remove();
+
+        Config::Backend::dealloc_chunk(
+          get_backend_local_state(), *meta, p, size, entry.get_sizeclass());
+
+        return;
+      }
+
+      // Not a large object; update slab metadata
+      dealloc_local_object_meta(entry, meta);
+    }
+
     /**
      * Check if this allocator has messages to deallocate blocks from another
      * thread
      */
     SNMALLOC_FAST_PATH bool has_messages()
     {
-      auto domesticate = [local_state = backend_state_ptr()](
-                           freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA {
-        if constexpr (Config::Options.QueueHeadsAreTame)
-        {
-          return freelist::HeadPtr::unsafe_from(p.unsafe_ptr());
-        }
-        else
-        {
+      auto local_state = backend_state_ptr();
+      auto domesticate_head =
+        [local_state](freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA {
+          if constexpr (Config::Options.QueueHeadsAreTame)
+          {
+            UNUSED(local_state);
+            return freelist::HeadPtr::unsafe_from(p.unsafe_ptr());
+          }
+          else
+          {
+            return capptr_domesticate<Config>(local_state, p);
+          }
+        };
+      auto domesticate_queue =
+        [local_state](freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA {
           return capptr_domesticate<Config>(local_state, p);
-        }
-      };
+        };
 
-      return !(message_queue().can_dequeue(domesticate));
+      return message_queue().can_dequeue(domesticate_head, domesticate_queue);
     }
 
     /**
@@ -478,20 +515,18 @@ namespace snmalloc
                            SNMALLOC_FAST_PATH_LAMBDA {
                              return capptr_domesticate<Config>(local_state, p);
                            };
-      auto cb = [this,
-                 &need_post](freelist::HeadPtr msg) SNMALLOC_FAST_PATH_LAMBDA {
-#ifdef SNMALLOC_TRACING
-        message<1024>("Handling remote");
-#endif
-
+      auto cb = [this, domesticate, &need_post](
+                  capptr::Alloc<RemoteMessage> msg) SNMALLOC_FAST_PATH_LAMBDA {
         auto& entry =
-          Config::Backend::template get_metaentry(snmalloc::address_cast(msg));
-
-        handle_dealloc_remote(entry, msg.as_void(), need_post);
-
+          Config::Backend::get_metaentry(snmalloc::address_cast(msg));
+        handle_dealloc_remote(entry, msg, need_post, domesticate);
         return true;
       };
 
+#ifdef SNMALLOC_TRACING
+      message<1024>("Handling remote queue before proceeding...");
+#endif
+
       if constexpr (Config::Options.QueueHeadsAreTame)
       {
         /*
@@ -523,10 +558,12 @@ namespace snmalloc
      *
      * need_post will be set to true, if capacity is exceeded.
      */
+    template<typename Domesticator_queue>
     void handle_dealloc_remote(
       const PagemapEntry& entry,
-      CapPtr<void, capptr::bounds::Alloc> p,
-      bool& need_post)
+      capptr::Alloc<RemoteMessage> msg,
+      bool& need_post,
+      Domesticator_queue domesticate)
     {
       // TODO this needs to not double count stats
       // TODO this needs to not double revoke if using MTE
@@ -534,29 +571,53 @@ namespace snmalloc
 
       if (SNMALLOC_LIKELY(entry.get_remote() == public_state()))
       {
-        if (SNMALLOC_LIKELY(
-              dealloc_local_object_fast(entry, p.as_void(), entropy)))
-          return;
+        auto meta = entry.get_slab_metadata();
+
+        auto unreturned =
+          dealloc_local_objects_fast(msg, entry, meta, entropy, domesticate);
+
+        /*
+         * dealloc_local_objects_fast has updated the free list but not updated
+         * the slab metadata; it falls to us to do so.  It is UNLIKELY that we
+         * will need to take further steps, but we might.
+         */
+        if (SNMALLOC_UNLIKELY(unreturned.template step<true>()))
+        {
+          dealloc_local_object_slow(msg.as_void(), entry, meta);
+
+          while (SNMALLOC_UNLIKELY(unreturned.template step<false>()))
+          {
+            dealloc_local_object_meta(entry, meta);
+          }
+        }
 
-        dealloc_local_object_slow(p, entry);
+        return;
       }
-      else
+
+      auto nelem = RemoteMessage::template ring_size<Config>(
+        msg,
+        freelist::Object::key_root,
+        entry.get_slab_metadata()->as_key_tweak(),
+        domesticate);
+      if (
+        !need_post &&
+        !attached_cache->remote_dealloc_cache.reserve_space(entry, nelem))
       {
-        if (
-          !need_post &&
-          !attached_cache->remote_dealloc_cache.reserve_space(entry))
-          need_post = true;
-        attached_cache->remote_dealloc_cache
-          .template dealloc<sizeof(CoreAllocator)>(
-            entry.get_remote()->trunc_id(), p.as_void());
+        need_post = true;
       }
+      attached_cache->remote_dealloc_cache
+        .template forward<sizeof(CoreAllocator)>(
+          entry.get_remote()->trunc_id(), msg);
     }
 
     /**
      * Initialiser, shared code between the constructors for different
      * configurations.
+     *
+     * spare is the amount of space directly after the allocator that is
+     * reserved as meta-data, but is not required by this CoreAllocator.
      */
-    void init()
+    void init(Range<capptr::bounds::Alloc>& spare)
     {
 #ifdef SNMALLOC_TRACING
       message<1024>("Making an allocator.");
@@ -566,6 +627,20 @@ namespace snmalloc
       // This must occur before any freelists are constructed.
       entropy.init<typename Config::Pal>();
 
+      if (spare.length != 0)
+      {
+        /*
+         * Seed this frontend's private metadata allocation cache with any
+         * excess space from the metadata allocation holding the frontend
+         * Allocator object itself.  This alleviates thundering herd
+         * contention on the backend during startup: each slab opened now
+         * makes one trip to the backend, for the slab itself, rather than
+         * two, for the slab and its metadata.
+         */
+        Config::Backend::dealloc_meta_data(
+          get_backend_local_state(), spare.base, spare.length);
+      }
+
       // Ignoring stats for now.
       //      stats().start();
 
@@ -574,49 +649,41 @@ namespace snmalloc
         init_message_queue();
         message_queue().invariant();
       }
-
-      if constexpr (DEBUG)
-      {
-        for (smallsizeclass_t i = 0; i < NUM_SMALL_SIZECLASSES; i++)
-        {
-          size_t size = sizeclass_to_size(i);
-          smallsizeclass_t sc1 = size_to_sizeclass(size);
-          smallsizeclass_t sc2 = size_to_sizeclass_const(size);
-          size_t size1 = sizeclass_to_size(sc1);
-          size_t size2 = sizeclass_to_size(sc2);
-
-          SNMALLOC_CHECK(sc1 == i);
-          SNMALLOC_CHECK(sc1 == sc2);
-          SNMALLOC_CHECK(size1 == size);
-          SNMALLOC_CHECK(size1 == size2);
-        }
-      }
     }
 
   public:
     /**
      * Constructor for the case that the core allocator owns the local state.
      * SFINAE disabled if the allocator does not own the local state.
+     *
+     * spare is the amount of space directly after the allocator that is
+     * reserved as meta-data, but is not required by this CoreAllocator.
      */
     template<
       typename Config_ = Config,
       typename = std::enable_if_t<Config_::Options.CoreAllocOwnsLocalState>>
-    CoreAllocator(LocalCache* cache) : attached_cache(cache)
+    CoreAllocator(Range<capptr::bounds::Alloc>& spare)
     {
-      init();
+      init(spare);
     }
 
     /**
      * Constructor for the case that the core allocator does not owns the local
      * state. SFINAE disabled if the allocator does own the local state.
+     *
+     * spare is the amount of space directly after the allocator that is
+     * reserved as meta-data, but is not required by this CoreAllocator.
      */
     template<
       typename Config_ = Config,
       typename = std::enable_if_t<!Config_::Options.CoreAllocOwnsLocalState>>
-    CoreAllocator(LocalCache* cache, LocalState* backend = nullptr)
+    CoreAllocator(
+      Range<capptr::bounds::Alloc>& spare,
+      LocalCache<Config_>* cache,
+      LocalState* backend = nullptr)
     : backend_state(backend), attached_cache(cache)
     {
-      init();
+      init(spare);
     }
 
     /**
@@ -642,7 +709,7 @@ namespace snmalloc
       // stats().remote_post();  // TODO queue not in line!
       bool sent_something =
         attached_cache->remote_dealloc_cache
-          .post<sizeof(CoreAllocator), Config>(
+          .template post<sizeof(CoreAllocator)>(
             backend_state_ptr(), public_state()->trunc_id());
 
       return sent_something;
@@ -661,26 +728,33 @@ namespace snmalloc
       return handle_message_queue_inner(action, args...);
     }
 
+    SNMALLOC_FAST_PATH void dealloc_local_object(
+      CapPtr<void, capptr::bounds::Alloc> p,
+      const typename Config::PagemapEntry& entry)
+    {
+      auto meta = entry.get_slab_metadata();
+
+      if (SNMALLOC_LIKELY(dealloc_local_object_fast(p, entry, meta, entropy)))
+        return;
+
+      dealloc_local_object_slow(p, entry, meta);
+    }
+
     SNMALLOC_FAST_PATH void
     dealloc_local_object(CapPtr<void, capptr::bounds::Alloc> p)
     {
       // PagemapEntry-s seen here are expected to have meaningful Remote
       // pointers
-      auto& entry =
-        Config::Backend::template get_metaentry(snmalloc::address_cast(p));
-      if (SNMALLOC_LIKELY(dealloc_local_object_fast(entry, p, entropy)))
-        return;
-
-      dealloc_local_object_slow(p, entry);
+      dealloc_local_object(
+        p, Config::Backend::get_metaentry(snmalloc::address_cast(p)));
     }
 
     SNMALLOC_FAST_PATH static bool dealloc_local_object_fast(
-      const PagemapEntry& entry,
       CapPtr<void, capptr::bounds::Alloc> p,
+      const PagemapEntry& entry,
+      BackendSlabMetadata* meta,
       LocalEntropy& entropy)
     {
-      auto meta = entry.get_slab_metadata();
-
       SNMALLOC_ASSERT(!meta->is_unused());
 
       snmalloc_check_client(
@@ -690,14 +764,49 @@ namespace snmalloc
 
       auto cp = p.as_static<freelist::Object::T<>>();
 
-      auto& key = entropy.get_free_list_key();
-
       // Update the head and the next pointer in the free list.
-      meta->free_queue.add(cp, key, entropy);
+      meta->free_queue.add(
+        cp, freelist::Object::key_root, meta->as_key_tweak(), entropy);
 
       return SNMALLOC_LIKELY(!meta->return_object());
     }
 
+    template<typename Domesticator>
+    SNMALLOC_FAST_PATH static auto dealloc_local_objects_fast(
+      capptr::Alloc<RemoteMessage> msg,
+      const PagemapEntry& entry,
+      BackendSlabMetadata* meta,
+      LocalEntropy& entropy,
+      Domesticator domesticate)
+    {
+      SNMALLOC_ASSERT(!meta->is_unused());
+
+      snmalloc_check_client(
+        mitigations(sanity_checks),
+        is_start_of_object(entry.get_sizeclass(), address_cast(msg)),
+        "Not deallocating start of an object");
+
+      size_t objsize = sizeclass_full_to_size(entry.get_sizeclass());
+
+      auto [curr, length] = RemoteMessage::template open_free_ring<Config>(
+        msg,
+        objsize,
+        freelist::Object::key_root,
+        meta->as_key_tweak(),
+        domesticate);
+
+      // Update the head and the next pointer in the free list.
+      meta->free_queue.append_segment(
+        curr,
+        msg.template as_reinterpret<freelist::Object::T<>>(),
+        length,
+        freelist::Object::key_root,
+        meta->as_key_tweak(),
+        entropy);
+
+      return meta->return_objects(length);
+    }
+
     template<ZeroMem zero_mem>
     SNMALLOC_SLOW_PATH capptr::Alloc<void>
     small_alloc(smallsizeclass_t sizeclass, freelist::Iter<>& fast_free_list)
@@ -781,7 +890,8 @@ namespace snmalloc
         get_backend_local_state(),
         slab_size,
         PagemapEntry::encode(
-          public_state(), sizeclass_t::from_small_class(sizeclass)));
+          public_state(), sizeclass_t::from_small_class(sizeclass)),
+        sizeclass_t::from_small_class(sizeclass));
 
       if (slab == nullptr)
       {
@@ -790,7 +900,7 @@ namespace snmalloc
 
       // Set meta slab to empty.
       meta->initialise(
-        sizeclass, address_cast(slab), entropy.get_free_list_key());
+        sizeclass, address_cast(slab), freelist::Object::key_root);
 
       // Build a free list for the slab
       alloc_new_list(slab, meta, rsize, slab_size, entropy);
@@ -832,19 +942,14 @@ namespace snmalloc
 
       if (destroy_queue)
       {
-        auto p_wild = message_queue().destroy();
-        auto p_tame = domesticate(p_wild);
-
-        while (p_tame != nullptr)
-        {
+        auto cb = [this, domesticate](capptr::Alloc<RemoteMessage> m) {
           bool need_post = true; // Always going to post, so ignore.
-          auto n_tame =
-            p_tame->atomic_read_next(RemoteAllocator::key_global, domesticate);
           const PagemapEntry& entry =
-            Config::Backend::get_metaentry(snmalloc::address_cast(p_tame));
-          handle_dealloc_remote(entry, p_tame.as_void(), need_post);
-          p_tame = n_tame;
-        }
+            Config::Backend::get_metaentry(snmalloc::address_cast(m));
+          handle_dealloc_remote(entry, m, need_post, domesticate);
+        };
+
+        message_queue().destroy_and_iterate(domesticate, cb);
       }
       else
       {
@@ -854,7 +959,7 @@ namespace snmalloc
           handle_message_queue([]() {});
       }
 
-      auto posted = attached_cache->flush<sizeof(CoreAllocator), Config>(
+      auto posted = attached_cache->template flush<sizeof(CoreAllocator)>(
         backend_state_ptr(),
         [&](capptr::Alloc<void> p) { dealloc_local_object(p); });
 
@@ -865,20 +970,21 @@ namespace snmalloc
         dealloc_local_slabs<true>(sizeclass);
       }
 
-      laden.iterate([this, domesticate](
-                      BackendSlabMetadata* meta) SNMALLOC_FAST_PATH_LAMBDA {
-        if (!meta->is_large())
-        {
-          meta->free_queue.validate(entropy.get_free_list_key(), domesticate);
-        }
-      });
+      laden.iterate(
+        [domesticate](BackendSlabMetadata* meta) SNMALLOC_FAST_PATH_LAMBDA {
+          if (!meta->is_large())
+          {
+            meta->free_queue.validate(
+              freelist::Object::key_root, meta->as_key_tweak(), domesticate);
+          }
+        });
 
       return posted;
     }
 
     // This allows the caching layer to be attached to an underlying
     // allocator instance.
-    void attach(LocalCache* c)
+    void attach(LocalCache<Config>* c)
     {
 #ifdef SNMALLOC_TRACING
       message<1024>("Attach cache to {}", this);
@@ -901,10 +1007,9 @@ namespace snmalloc
      */
     bool debug_is_empty_impl(bool* result)
     {
-      auto& key = entropy.get_free_list_key();
-
-      auto error = [&result, &key](auto slab_metadata) {
-        auto slab_interior = slab_metadata->get_slab_interior(key);
+      auto error = [&result](auto slab_metadata) {
+        auto slab_interior =
+          slab_metadata->get_slab_interior(freelist::Object::key_root);
         const PagemapEntry& entry =
           Config::Backend::get_metaentry(slab_interior);
         SNMALLOC_ASSERT(slab_metadata == entry.get_slab_metadata());
@@ -917,9 +1022,11 @@ namespace snmalloc
         else
           report_fatal_error(
             "debug_is_empty: found non-empty allocator: size={} on "
-            "slab_start {}",
+            "slab_start {} meta {} entry {}",
             sizeclass_full_to_size(size_class),
-            slab_start);
+            slab_start,
+            address_cast(slab_metadata),
+            address_cast(&entry));
       };
 
       auto test = [&error](auto& queue) {
@@ -971,7 +1078,7 @@ namespace snmalloc
       {
         // We need a cache to perform some operations, so set one up
         // temporarily
-        LocalCache temp(public_state());
+        LocalCache<Config> temp(public_state());
         attach(&temp);
 #ifdef SNMALLOC_TRACING
         message<1024>("debug_is_empty - attach a cache");
@@ -988,9 +1095,42 @@ namespace snmalloc
     }
   };
 
+  template<typename Config>
+  class ConstructCoreAlloc
+  {
+    using CA = CoreAllocator<Config>;
+
+  public:
+    static capptr::Alloc<CA> make()
+    {
+      size_t size = sizeof(CA);
+      size_t round_sizeof = Aal::capptr_size_round(size);
+      size_t request_size = bits::next_pow2(round_sizeof);
+      size_t spare = request_size - round_sizeof;
+
+      auto raw =
+        Config::Backend::template alloc_meta_data<CA>(nullptr, request_size);
+
+      if (raw == nullptr)
+      {
+        Config::Pal::error("Failed to initialise thread local allocator.");
+      }
+
+      capptr::Alloc<void> spare_start = pointer_offset(raw, round_sizeof);
+      Range<capptr::bounds::Alloc> r{spare_start, spare};
+
+      auto p = capptr::Alloc<CA>::unsafe_from(new (raw.unsafe_ptr()) CA(r));
+
+      // Remove excess from the bounds.
+      p = Aal::capptr_bound<CA, capptr::bounds::Alloc>(p, round_sizeof);
+      return p;
+    }
+  };
+
   /**
    * Use this alias to access the pool of allocators throughout snmalloc.
    */
   template<typename Config>
-  using AllocPool = Pool<CoreAllocator<Config>, Config, Config::pool>;
+  using AllocPool =
+    Pool<CoreAllocator<Config>, ConstructCoreAlloc<Config>, Config::pool>;
 } // namespace snmalloc
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/entropy.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/entropy.h
index 2e63b68bfa00..c6f2c85ffe6d 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/mem/entropy.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/entropy.h
@@ -28,7 +28,6 @@ namespace snmalloc
     uint64_t local_counter{0};
     uint64_t fresh_bits{0};
     uint64_t count{0};
-    FreeListKey key{0, 0, 0};
 
   public:
     constexpr LocalEntropy() = default;
@@ -38,18 +37,6 @@ namespace snmalloc
     {
       local_key = get_entropy64<PAL>();
       local_counter = get_entropy64<PAL>();
-      if constexpr (bits::BITS == 64)
-      {
-        key.key1 = get_next();
-        key.key2 = get_next();
-        key.key_next = get_next();
-      }
-      else
-      {
-        key.key1 = get_next() & 0xffff'ffff;
-        key.key2 = get_next() & 0xffff'ffff;
-        key.key_next = get_next() & 0xffff'ffff;
-      }
       bit_source = get_next();
     }
 
@@ -70,9 +57,20 @@ namespace snmalloc
     /**
      * A key for the free lists for this thread.
      */
-    const FreeListKey& get_free_list_key()
+    void make_free_list_key(FreeListKey& key)
     {
-      return key;
+      if constexpr (bits::BITS == 64)
+      {
+        key.key1 = static_cast<address_t>(get_next());
+        key.key2 = static_cast<address_t>(get_next());
+        key.key_next = static_cast<address_t>(get_next());
+      }
+      else
+      {
+        key.key1 = static_cast<address_t>(get_next() & 0xffff'ffff);
+        key.key2 = static_cast<address_t>(get_next() & 0xffff'ffff);
+        key.key_next = static_cast<address_t>(get_next() & 0xffff'ffff);
+      }
     }
 
     /**
@@ -116,8 +114,7 @@ namespace snmalloc
         fresh_bits = get_next();
         count = 64;
       }
-      uint16_t result =
-        static_cast<uint16_t>(fresh_bits & (bits::one_at_bit(n) - 1));
+      uint16_t result = static_cast<uint16_t>(fresh_bits & bits::mask_bits(n));
       fresh_bits >>= n;
       count -= n;
       return result;
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/external_alloc.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/external_alloc.h
index 250719766e43..56d9f9ac632a 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/mem/external_alloc.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/external_alloc.h
@@ -21,6 +21,7 @@ namespace snmalloc::external_alloc
 }
 #    elif defined(__APPLE__)
 #      include <malloc/malloc.h>
+
 namespace snmalloc::external_alloc
 {
   inline size_t malloc_usable_size(void* ptr)
@@ -30,6 +31,7 @@ namespace snmalloc::external_alloc
 }
 #    elif defined(__linux__) || defined(__HAIKU__)
 #      include <malloc.h>
+
 namespace snmalloc::external_alloc
 {
   using ::malloc_usable_size;
@@ -41,6 +43,7 @@ namespace snmalloc::external_alloc
 }
 #    elif defined(__FreeBSD__)
 #      include <malloc_np.h>
+
 namespace snmalloc::external_alloc
 {
   using ::malloc_usable_size;
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/freelist.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/freelist.h
index 49348d1d8a93..f49004d938eb 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/mem/freelist.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/freelist.h
@@ -40,22 +40,40 @@
 
 namespace snmalloc
 {
+  class BatchedRemoteMessage;
+
+  static constexpr address_t NO_KEY_TWEAK = 0;
+
   /**
    * This function is used to sign back pointers in the free list.
    */
-  inline static address_t
-  signed_prev(address_t curr, address_t next, const FreeListKey& key)
+  inline static address_t signed_prev(
+    address_t curr, address_t next, const FreeListKey& key, address_t tweak)
   {
     auto c = curr;
     auto n = next;
-    return (c + key.key1) * (n + key.key2);
+    return (c + key.key1) * (n + (key.key2 ^ tweak));
   }
 
   namespace freelist
   {
+    template<
+      bool RANDOM,
+      bool TRACK_LENGTH = RANDOM,
+      SNMALLOC_CONCEPT(capptr::IsBound) BView = capptr::bounds::Alloc,
+      SNMALLOC_CONCEPT(capptr::IsBound) BQueue = capptr::bounds::AllocWild>
+    class Builder;
+
     class Object
     {
     public:
+      /**
+       * Shared key for slab free lists (but tweaked by metadata address).
+       *
+       * XXX Maybe this belongs somewhere else
+       */
+      inline static FreeListKey key_root{0xdeadbeef, 0xbeefdead, 0xdeadbeef};
+
       template<
         SNMALLOC_CONCEPT(capptr::IsBound) BQueue = capptr::bounds::AllocWild>
       class T;
@@ -115,6 +133,7 @@ namespace snmalloc
       class T
       {
         template<
+          bool,
           bool,
           SNMALLOC_CONCEPT(capptr::IsBound),
           SNMALLOC_CONCEPT(capptr::IsBound)>
@@ -122,6 +141,8 @@ namespace snmalloc
 
         friend class Object;
 
+        friend class ::snmalloc::BatchedRemoteMessage;
+
         class Empty
         {
         public:
@@ -171,23 +192,28 @@ namespace snmalloc
           SNMALLOC_CONCEPT(capptr::IsBound) BView = typename BQueue::
             template with_wildness<capptr::dimension::Wildness::Tame>,
           typename Domesticator>
-        BHeadPtr<BView, BQueue>
-        atomic_read_next(const FreeListKey& key, Domesticator domesticate)
+        BHeadPtr<BView, BQueue> atomic_read_next(
+          const FreeListKey& key, address_t key_tweak, Domesticator domesticate)
         {
           auto n_wild = Object::decode_next(
             address_cast(&this->next_object),
             this->atomic_next_object.load(std::memory_order_acquire),
-            key);
+            key,
+            key_tweak);
           auto n_tame = domesticate(n_wild);
           if constexpr (mitigations(freelist_backward_edge))
           {
             if (n_tame != nullptr)
             {
-              n_tame->prev.check_prev(
-                signed_prev(address_cast(this), address_cast(n_tame), key));
+              n_tame->prev.check_prev(signed_prev(
+                address_cast(this), address_cast(n_tame), key, key_tweak));
             }
           }
-          Aal::prefetch(&(n_tame->next_object));
+          else
+          {
+            UNUSED(key_tweak);
+          }
+          Aal::prefetch(n_tame.unsafe_ptr());
           return n_tame;
         }
 
@@ -198,11 +224,14 @@ namespace snmalloc
           SNMALLOC_CONCEPT(capptr::IsBound) BView = typename BQueue::
             template with_wildness<capptr::dimension::Wildness::Tame>,
           typename Domesticator>
-        BHeadPtr<BView, BQueue>
-        read_next(const FreeListKey& key, Domesticator domesticate)
+        BHeadPtr<BView, BQueue> read_next(
+          const FreeListKey& key, address_t key_tweak, Domesticator domesticate)
         {
           return domesticate(Object::decode_next(
-            address_cast(&this->next_object), this->next_object, key));
+            address_cast(&this->next_object),
+            this->next_object,
+            key,
+            key_tweak));
         }
 
         /**
@@ -235,7 +264,8 @@ namespace snmalloc
         SNMALLOC_CONCEPT(capptr::IsBound) BView>
       static BHeadPtr<BView, BQueue> make(CapPtr<void, BView> p)
       {
-        return p.template as_static<Object::T<BQueue>>();
+        return CapPtr<Object::T<BQueue>, BView>::unsafe_from(
+          new (p.unsafe_ptr()) Object::T());
       }
 
       /**
@@ -253,8 +283,11 @@ namespace snmalloc
        * Involutive encryption with raw pointers
        */
       template<SNMALLOC_CONCEPT(capptr::IsBound) BQueue>
-      inline static Object::T<BQueue>*
-      code_next(address_t curr, Object::T<BQueue>* next, const FreeListKey& key)
+      inline static Object::T<BQueue>* code_next(
+        address_t curr,
+        Object::T<BQueue>* next,
+        const FreeListKey& key,
+        address_t key_tweak)
       {
         // Note we can consider other encoding schemes here.
         //   * XORing curr and next.  This doesn't require any key material
@@ -267,11 +300,13 @@ namespace snmalloc
           mitigations(freelist_forward_edge) && !aal_supports<StrictProvenance>)
         {
           return unsafe_from_uintptr<Object::T<BQueue>>(
-            unsafe_to_uintptr<Object::T<BQueue>>(next) ^ key.key_next);
+            unsafe_to_uintptr<Object::T<BQueue>>(next) ^ key.key_next ^
+            key_tweak);
         }
         else
         {
           UNUSED(key);
+          UNUSED(key_tweak);
           return next;
         }
       }
@@ -289,16 +324,19 @@ namespace snmalloc
        *  though the result is likely not safe to dereference, being an
        *  obfuscated bundle of bits (on non-CHERI architectures, anyway). That's
        *  additional motivation to consider the result BQueue-bounded, as that
-       * is likely (but not necessarily) Wild.
+       *  is likely (but not necessarily) Wild.
        */
       template<
         SNMALLOC_CONCEPT(capptr::IsBound) BView,
         SNMALLOC_CONCEPT(capptr::IsBound) BQueue>
       inline static BQueuePtr<BQueue> encode_next(
-        address_t curr, BHeadPtr<BView, BQueue> next, const FreeListKey& key)
+        address_t curr,
+        BHeadPtr<BView, BQueue> next,
+        const FreeListKey& key,
+        address_t key_tweak)
       {
         return BQueuePtr<BQueue>::unsafe_from(
-          code_next(curr, next.unsafe_ptr(), key));
+          code_next(curr, next.unsafe_ptr(), key, key_tweak));
       }
 
       /**
@@ -320,10 +358,13 @@ namespace snmalloc
         SNMALLOC_CONCEPT(capptr::IsBound) BView,
         SNMALLOC_CONCEPT(capptr::IsBound) BQueue>
       inline static BHeadPtr<BView, BQueue> decode_next(
-        address_t curr, BHeadPtr<BView, BQueue> next, const FreeListKey& key)
+        address_t curr,
+        BHeadPtr<BView, BQueue> next,
+        const FreeListKey& key,
+        address_t key_tweak)
       {
         return BHeadPtr<BView, BQueue>::unsafe_from(
-          code_next(curr, next.unsafe_ptr(), key));
+          code_next(curr, next.unsafe_ptr(), key, key_tweak));
       }
 
       template<
@@ -343,6 +384,33 @@ namespace snmalloc
           "Free Object Queue bounds must match View bounds (but may be Wild)");
       }
 
+      template<
+        SNMALLOC_CONCEPT(capptr::IsBound) BView,
+        SNMALLOC_CONCEPT(capptr::IsBound) BQueue>
+      static void store_nextish(
+        BQueuePtr<BQueue>* curr,
+        BHeadPtr<BView, BQueue> next,
+        const FreeListKey& key,
+        address_t key_tweak,
+        BHeadPtr<BView, BQueue> next_value)
+      {
+        assert_view_queue_bounds<BView, BQueue>();
+
+        if constexpr (mitigations(freelist_backward_edge))
+        {
+          next->prev.set_prev(signed_prev(
+            address_cast(curr), address_cast(next), key, key_tweak));
+        }
+        else
+        {
+          UNUSED(next);
+          UNUSED(key);
+          UNUSED(key_tweak);
+        }
+
+        *curr = encode_next(address_cast(curr), next_value, key, key_tweak);
+      }
+
       /**
        * Assign next_object and update its prev_encoded if
        * SNMALLOC_CHECK_CLIENT. Static so that it can be used on reference to a
@@ -358,27 +426,19 @@ namespace snmalloc
       static BQueuePtr<BQueue>* store_next(
         BQueuePtr<BQueue>* curr,
         BHeadPtr<BView, BQueue> next,
-        const FreeListKey& key)
+        const FreeListKey& key,
+        address_t key_tweak)
       {
-        assert_view_queue_bounds<BView, BQueue>();
-
-        if constexpr (mitigations(freelist_backward_edge))
-        {
-          next->prev.set_prev(
-            signed_prev(address_cast(curr), address_cast(next), key));
-        }
-        else
-          UNUSED(key);
-
-        *curr = encode_next(address_cast(curr), next, key);
+        store_nextish(curr, next, key, key_tweak, next);
         return &(next->next_object);
       }
 
       template<SNMALLOC_CONCEPT(capptr::IsBound) BQueue>
-      static void store_null(BQueuePtr<BQueue>* curr, const FreeListKey& key)
+      static void store_null(
+        BQueuePtr<BQueue>* curr, const FreeListKey& key, address_t key_tweak)
       {
-        *curr =
-          encode_next(address_cast(curr), BQueuePtr<BQueue>(nullptr), key);
+        *curr = encode_next(
+          address_cast(curr), BQueuePtr<BQueue>(nullptr), key, key_tweak);
       }
 
       /**
@@ -392,36 +452,45 @@ namespace snmalloc
       static void atomic_store_next(
         BHeadPtr<BView, BQueue> curr,
         BHeadPtr<BView, BQueue> next,
-        const FreeListKey& key)
+        const FreeListKey& key,
+        address_t key_tweak)
       {
         static_assert(BView::wildness == capptr::dimension::Wildness::Tame);
 
         if constexpr (mitigations(freelist_backward_edge))
         {
-          next->prev.set_prev(
-            signed_prev(address_cast(curr), address_cast(next), key));
+          next->prev.set_prev(signed_prev(
+            address_cast(curr), address_cast(next), key, key_tweak));
         }
         else
+        {
           UNUSED(key);
+          UNUSED(key_tweak);
+        }
 
         // Signature needs to be visible before item is linked in
         // so requires release semantics.
         curr->atomic_next_object.store(
-          encode_next(address_cast(&curr->next_object), next, key),
+          encode_next(address_cast(&curr->next_object), next, key, key_tweak),
           std::memory_order_release);
       }
 
       template<
         SNMALLOC_CONCEPT(capptr::IsBound) BView,
         SNMALLOC_CONCEPT(capptr::IsBound) BQueue>
-      static void
-      atomic_store_null(BHeadPtr<BView, BQueue> curr, const FreeListKey& key)
+      static void atomic_store_null(
+        BHeadPtr<BView, BQueue> curr,
+        const FreeListKey& key,
+        address_t key_tweak)
       {
         static_assert(BView::wildness == capptr::dimension::Wildness::Tame);
 
         curr->atomic_next_object.store(
           encode_next(
-            address_cast(&curr->next_object), BQueuePtr<BQueue>(nullptr), key),
+            address_cast(&curr->next_object),
+            BQueuePtr<BQueue>(nullptr),
+            key,
+            key_tweak),
           std::memory_order_relaxed);
       }
     };
@@ -459,6 +528,7 @@ namespace snmalloc
 
     protected:
       constexpr Prev(address_t prev) : prev(prev) {}
+
       constexpr Prev() = default;
 
       address_t replace(address_t next)
@@ -498,11 +568,50 @@ namespace snmalloc
     {
       Object::BHeadPtr<BView, BQueue> curr{nullptr};
 
+      struct KeyTweak
+      {
+        address_t key_tweak = 0;
+
+        SNMALLOC_FAST_PATH address_t get()
+        {
+          return key_tweak;
+        }
+
+        void set(address_t kt)
+        {
+          key_tweak = kt;
+        }
+
+        constexpr KeyTweak() = default;
+      };
+
+      struct NoKeyTweak
+      {
+        SNMALLOC_FAST_PATH address_t get()
+        {
+          return 0;
+        }
+
+        void set(address_t) {}
+      };
+
+      SNMALLOC_NO_UNIQUE_ADDRESS
+      std::conditional_t<
+        mitigations(freelist_forward_edge) ||
+          mitigations(freelist_backward_edge),
+        KeyTweak,
+        NoKeyTweak>
+        key_tweak;
+
     public:
-      constexpr Iter(Object::BHeadPtr<BView, BQueue> head, address_t prev_value)
+      constexpr Iter(
+        Object::BHeadPtr<BView, BQueue> head,
+        address_t prev_value,
+        address_t kt)
       : IterBase(prev_value), curr(head)
       {
         UNUSED(prev_value);
+        key_tweak.set(kt);
       }
 
       constexpr Iter() = default;
@@ -531,15 +640,15 @@ namespace snmalloc
       take(const FreeListKey& key, Domesticator domesticate)
       {
         auto c = curr;
-        auto next = curr->read_next(key, domesticate);
+        auto next = curr->read_next(key, key_tweak.get(), domesticate);
 
         Aal::prefetch(next.unsafe_ptr());
         curr = next;
 
         if constexpr (mitigations(freelist_backward_edge))
         {
-          auto p =
-            replace(signed_prev(address_cast(c), address_cast(next), key));
+          auto p = replace(signed_prev(
+            address_cast(c), address_cast(next), key, key_tweak.get()));
           c->check_prev(p);
         }
         else
@@ -569,10 +678,13 @@ namespace snmalloc
      */
     template<
       bool RANDOM,
-      SNMALLOC_CONCEPT(capptr::IsBound) BView = capptr::bounds::Alloc,
-      SNMALLOC_CONCEPT(capptr::IsBound) BQueue = capptr::bounds::AllocWild>
+      bool TRACK_LENGTH,
+      SNMALLOC_CONCEPT(capptr::IsBound) BView,
+      SNMALLOC_CONCEPT(capptr::IsBound) BQueue>
     class Builder
     {
+      static_assert(!RANDOM || TRACK_LENGTH);
+
       static constexpr size_t LENGTH = RANDOM ? 2 : 1;
 
       /*
@@ -610,7 +722,8 @@ namespace snmalloc
           static_cast<Object::T<BQueue>*>(head[ix]));
       }
 
-      SNMALLOC_NO_UNIQUE_ADDRESS std::array<uint16_t, RANDOM ? 2 : 0> length{};
+      SNMALLOC_NO_UNIQUE_ADDRESS
+      std::array<uint16_t, RANDOM ? 2 : (TRACK_LENGTH ? 1 : 0)> length{};
 
     public:
       constexpr Builder() = default;
@@ -636,6 +749,7 @@ namespace snmalloc
       void add(
         Object::BHeadPtr<BView, BQueue> n,
         const FreeListKey& key,
+        address_t key_tweak,
         LocalEntropy& entropy)
       {
         uint32_t index;
@@ -644,8 +758,8 @@ namespace snmalloc
         else
           index = 0;
 
-        set_end(index, Object::store_next(cast_end(index), n, key));
-        if constexpr (RANDOM)
+        set_end(index, Object::store_next(cast_end(index), n, key, key_tweak));
+        if constexpr (TRACK_LENGTH)
         {
           length[index]++;
         }
@@ -660,20 +774,26 @@ namespace snmalloc
        * lists, which will be randomised at the other end.
        */
       template<bool RANDOM_ = RANDOM>
-      std::enable_if_t<!RANDOM_>
-      add(Object::BHeadPtr<BView, BQueue> n, const FreeListKey& key)
+      std::enable_if_t<!RANDOM_> add(
+        Object::BHeadPtr<BView, BQueue> n,
+        const FreeListKey& key,
+        address_t key_tweak)
       {
         static_assert(RANDOM_ == RANDOM, "Don't set template parameter");
-        set_end(0, Object::store_next(cast_end(0), n, key));
+        set_end(0, Object::store_next(cast_end(0), n, key, key_tweak));
+        if constexpr (TRACK_LENGTH)
+        {
+          length[0]++;
+        }
       }
 
       /**
        * Makes a terminator to a free list.
        */
-      SNMALLOC_FAST_PATH void
-      terminate_list(uint32_t index, const FreeListKey& key)
+      SNMALLOC_FAST_PATH void terminate_list(
+        uint32_t index, const FreeListKey& key, address_t key_tweak)
       {
-        Object::store_null(cast_end(index), key);
+        Object::store_null(cast_end(index), key, key_tweak);
       }
 
       /**
@@ -685,17 +805,21 @@ namespace snmalloc
        * and is thus subject to encoding if the next_object pointers
        * encoded.
        */
-      [[nodiscard]] Object::BHeadPtr<BView, BQueue>
-      read_head(uint32_t index, const FreeListKey& key) const
+      [[nodiscard]] Object::BHeadPtr<BView, BQueue> read_head(
+        uint32_t index, const FreeListKey& key, address_t key_tweak) const
       {
         return Object::decode_next(
-          address_cast(&head[index]), cast_head(index), key);
+          address_cast(&head[index]), cast_head(index), key, key_tweak);
       }
 
-      address_t get_fake_signed_prev(uint32_t index, const FreeListKey& key)
+      address_t get_fake_signed_prev(
+        uint32_t index, const FreeListKey& key, address_t key_tweak)
       {
         return signed_prev(
-          address_cast(&head[index]), address_cast(read_head(index, key)), key);
+          address_cast(&head[index]),
+          address_cast(read_head(index, key, key_tweak)),
+          key,
+          key_tweak);
       }
 
       /**
@@ -707,8 +831,8 @@ namespace snmalloc
        * The return value is how many entries are still contained in the
        * builder.
        */
-      SNMALLOC_FAST_PATH uint16_t
-      close(Iter<BView, BQueue>& fl, const FreeListKey& key)
+      SNMALLOC_FAST_PATH uint16_t close(
+        Iter<BView, BQueue>& fl, const FreeListKey& key, address_t key_tweak)
       {
         uint32_t i;
         if constexpr (RANDOM)
@@ -724,9 +848,12 @@ namespace snmalloc
           i = 0;
         }
 
-        terminate_list(i, key);
+        terminate_list(i, key, key_tweak);
 
-        fl = {read_head(i, key), get_fake_signed_prev(i, key)};
+        fl = {
+          read_head(i, key, key_tweak),
+          get_fake_signed_prev(i, key, key_tweak),
+          key_tweak};
 
         end[i] = &head[i];
 
@@ -744,12 +871,13 @@ namespace snmalloc
       /**
        * Set the builder to a not building state.
        */
-      constexpr void init(address_t slab, const FreeListKey& key)
+      constexpr void
+      init(address_t slab, const FreeListKey& key, address_t key_tweak)
       {
         for (size_t i = 0; i < LENGTH; i++)
         {
           end[i] = &head[i];
-          if constexpr (RANDOM)
+          if constexpr (TRACK_LENGTH)
           {
             length[i] = 0;
           }
@@ -762,35 +890,71 @@ namespace snmalloc
           head[i] = Object::code_next(
             address_cast(&head[i]),
             useless_ptr_from_addr<Object::T<BQueue>>(slab),
-            key);
+            key,
+            key_tweak);
         }
       }
 
+      template<bool RANDOM_ = RANDOM>
+      std::enable_if_t<!RANDOM_, size_t> extract_segment_length()
+      {
+        static_assert(RANDOM_ == RANDOM, "Don't set SFINAE parameter!");
+        return length[0];
+      }
+
       template<bool RANDOM_ = RANDOM>
       std::enable_if_t<
         !RANDOM_,
         std::pair<
           Object::BHeadPtr<BView, BQueue>,
           Object::BHeadPtr<BView, BQueue>>>
-      extract_segment(const FreeListKey& key)
+      extract_segment(const FreeListKey& key, address_t key_tweak)
       {
         static_assert(RANDOM_ == RANDOM, "Don't set SFINAE parameter!");
         SNMALLOC_ASSERT(!empty());
 
-        auto first = read_head(0, key);
+        auto first = read_head(0, key, key_tweak);
         // end[0] is pointing to the first field in the object,
         // this is doing a CONTAINING_RECORD like cast to get back
         // to the actual object.  This isn't true if the builder is
         // empty, but you are not allowed to call this in the empty case.
         auto last = Object::BHeadPtr<BView, BQueue>::unsafe_from(
           Object::from_next_ptr(cast_end(0)));
-        init(address_cast(head[0]), key);
+        init(address_cast(head[0]), key, key_tweak);
         return {first, last};
       }
 
+      /**
+       * Put back an extracted segment from a builder using the same key.
+       *
+       * The caller must tell us how many elements are involved.
+       */
+      void append_segment(
+        Object::BHeadPtr<BView, BQueue> first,
+        Object::BHeadPtr<BView, BQueue> last,
+        uint16_t size,
+        const FreeListKey& key,
+        address_t key_tweak,
+        LocalEntropy& entropy)
+      {
+        uint32_t index;
+        if constexpr (RANDOM)
+          index = entropy.next_bit();
+        else
+          index = 0;
+
+        if constexpr (TRACK_LENGTH)
+          length[index] += size;
+        else
+          UNUSED(size);
+
+        Object::store_next(cast_end(index), first, key, key_tweak);
+        set_end(index, &(last->next_object));
+      }
+
       template<typename Domesticator>
-      SNMALLOC_FAST_PATH void
-      validate(const FreeListKey& key, Domesticator domesticate)
+      SNMALLOC_FAST_PATH void validate(
+        const FreeListKey& key, address_t key_tweak, Domesticator domesticate)
       {
         if constexpr (mitigations(freelist_teardown_validate))
         {
@@ -798,29 +962,31 @@ namespace snmalloc
           {
             if (&head[i] == end[i])
             {
-              SNMALLOC_CHECK(!RANDOM || (length[i] == 0));
+              SNMALLOC_CHECK(!TRACK_LENGTH || (length[i] == 0));
               continue;
             }
 
             size_t count = 1;
-            auto curr = read_head(i, key);
-            auto prev = get_fake_signed_prev(i, key);
+            auto curr = read_head(i, key, key_tweak);
+            auto prev = get_fake_signed_prev(i, key, key_tweak);
             while (true)
             {
               curr->check_prev(prev);
               if (address_cast(&(curr->next_object)) == address_cast(end[i]))
                 break;
               count++;
-              auto next = curr->read_next(key, domesticate);
-              prev = signed_prev(address_cast(curr), address_cast(next), key);
+              auto next = curr->read_next(key, key_tweak, domesticate);
+              prev = signed_prev(
+                address_cast(curr), address_cast(next), key, key_tweak);
               curr = next;
             }
-            SNMALLOC_CHECK(!RANDOM || (count == length[i]));
+            SNMALLOC_CHECK(!TRACK_LENGTH || (count == length[i]));
           }
         }
         else
         {
           UNUSED(key);
+          UNUSED(key_tweak);
           UNUSED(domesticate);
         }
       }
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/freelist_queue.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/freelist_queue.h
new file mode 100644
index 000000000000..fb38f7c88de1
--- /dev/null
+++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/freelist_queue.h
@@ -0,0 +1,193 @@
+#pragma once
+
+#include "../ds/ds.h"
+#include "freelist.h"
+
+#include <atomic>
+
+namespace snmalloc
+{
+  /**
+   * A FreeListMPSCQ is a chain of freed objects exposed as a MPSC append-only
+   * atomic queue that uses one xchg per append.
+   *
+   * The internal pointers are considered QueuePtr-s to support deployment
+   * scenarios in which the MPSCQ itself is exposed to the client.  This is
+   * excessively paranoid in the common case that these metadata are as "hard"
+   * for the client to reach as the Pagemap, which we trust to store not just
+   * Tame CapPtr<>s but raw C++ pointers.
+   *
+   * Where necessary, methods expose two domesticator callbacks at the
+   * interface and are careful to use one for the front and back values and the
+   * other for pointers read from the queue itself.  That's not ideal, but it
+   * lets the client condition its behavior appropriately and prevents us from
+   * accidentally following either of these pointers in generic code.
+   * Specifically,
+   *
+   *   * `domesticate_head` is used for the MPSCQ pointers used to reach into
+   *     the chain of objects
+   *
+   *   * `domesticate_queue` is used to traverse links in that chain (and in
+   *     fact, we traverse only the first).
+   *
+   * In the case that the MPSCQ is not easily accessible to the client,
+   * `domesticate_head` can just be a type coersion, and `domesticate_queue`
+   * should perform actual validation.  If the MPSCQ is exposed to the
+   * allocator client, both Domesticators should perform validation.
+   */
+  template<FreeListKey& Key, address_t Key_tweak = NO_KEY_TWEAK>
+  struct alignas(REMOTE_MIN_ALIGN) FreeListMPSCQ
+  {
+    // Store the message queue on a separate cacheline. It is mutable data that
+    // is read by other threads.
+    alignas(CACHELINE_SIZE) freelist::AtomicQueuePtr back{nullptr};
+    // Store the two ends on different cache lines as access by different
+    // threads.
+    alignas(CACHELINE_SIZE) freelist::AtomicQueuePtr front{nullptr};
+    // Fake first entry
+    freelist::Object::T<capptr::bounds::AllocWild> stub{};
+
+    constexpr FreeListMPSCQ() = default;
+
+    void invariant()
+    {
+      SNMALLOC_ASSERT(
+        (address_cast(front.load()) == address_cast(&stub)) ||
+        (back != nullptr));
+    }
+
+    void init()
+    {
+      freelist::HeadPtr stub_ptr = freelist::HeadPtr::unsafe_from(&stub);
+      freelist::Object::atomic_store_null(stub_ptr, Key, Key_tweak);
+      front.store(freelist::QueuePtr::unsafe_from(&stub));
+      back.store(nullptr, std::memory_order_relaxed);
+      invariant();
+    }
+
+    freelist::QueuePtr destroy()
+    {
+      freelist::QueuePtr fnt = front.load();
+      back.store(nullptr, std::memory_order_relaxed);
+      if (address_cast(front.load()) == address_cast(&stub))
+        return nullptr;
+      return fnt;
+    }
+
+    template<typename Domesticator_queue, typename Cb>
+    void destroy_and_iterate(Domesticator_queue domesticate, Cb cb)
+    {
+      auto p = domesticate(destroy());
+
+      while (p != nullptr)
+      {
+        auto n = p->atomic_read_next(Key, Key_tweak, domesticate);
+        cb(p);
+        p = n;
+      }
+    }
+
+    template<typename Domesticator_head, typename Domesticator_queue>
+    inline bool can_dequeue(
+      Domesticator_head domesticate_head, Domesticator_queue domesticate_queue)
+    {
+      return domesticate_head(front.load())
+               ->atomic_read_next(Key, Key_tweak, domesticate_queue) != nullptr;
+    }
+
+    /**
+     * Pushes a list of messages to the queue. Each message from first to
+     * last should be linked together through their next pointers.
+     *
+     * The Domesticator here is used only on pointers read from the head.  See
+     * the commentary on the class.
+     */
+    template<typename Domesticator_head>
+    void enqueue(
+      freelist::HeadPtr first,
+      freelist::HeadPtr last,
+      Domesticator_head domesticate_head)
+    {
+      invariant();
+      freelist::Object::atomic_store_null(last, Key, Key_tweak);
+
+      // Exchange needs to be acq_rel.
+      // *  It needs to be a release, so nullptr in next is visible.
+      // *  Needs to be acquire, so linking into the list does not race with
+      //    the other threads nullptr init of the next field.
+      freelist::QueuePtr prev =
+        back.exchange(capptr_rewild(last), std::memory_order_acq_rel);
+
+      if (SNMALLOC_LIKELY(prev != nullptr))
+      {
+        freelist::Object::atomic_store_next(
+          domesticate_head(prev), first, Key, Key_tweak);
+        return;
+      }
+
+      front.store(capptr_rewild(first));
+    }
+
+    /**
+     * Destructively iterate the queue.  Each queue element is removed and fed
+     * to the callback in turn.  The callback may return false to stop iteration
+     * early (but must have processed the element it was given!).
+     *
+     * Takes a domestication callback for each of "pointers read from head" and
+     * "pointers read from queue".  See the commentary on the class.
+     */
+    template<
+      typename Domesticator_head,
+      typename Domesticator_queue,
+      typename Cb>
+    void dequeue(
+      Domesticator_head domesticate_head,
+      Domesticator_queue domesticate_queue,
+      Cb cb)
+    {
+      invariant();
+      SNMALLOC_ASSERT(front.load() != nullptr);
+
+      // Use back to bound, so we don't handle new entries.
+      auto b = back.load(std::memory_order_relaxed);
+      freelist::HeadPtr curr = domesticate_head(front.load());
+
+      while (address_cast(curr) != address_cast(b))
+      {
+        freelist::HeadPtr next =
+          curr->atomic_read_next(Key, Key_tweak, domesticate_queue);
+        // We have observed a non-linearisable effect of the queue.
+        // Just go back to allocating normally.
+        if (SNMALLOC_UNLIKELY(next == nullptr))
+          break;
+        // We want this element next, so start it loading.
+        Aal::prefetch(next.unsafe_ptr());
+        if (SNMALLOC_UNLIKELY(!cb(curr)))
+        {
+          /*
+           * We've domesticate_queue-d next so that we can read through it, but
+           * we're storing it back into client-accessible memory in
+           * !QueueHeadsAreTame builds, so go ahead and consider it Wild again.
+           * On QueueHeadsAreTame builds, the subsequent domesticate_head call
+           * above will also be a type-level sleight of hand, but we can still
+           * justify it by the domesticate_queue that happened in this
+           * dequeue().
+           */
+          front = capptr_rewild(next);
+          invariant();
+          return;
+        }
+
+        curr = next;
+      }
+
+      /*
+       * Here, we've hit the end of the queue: next is nullptr and curr has not
+       * been handed to the callback.  The same considerations about Wildness
+       * above hold here.
+       */
+      front = capptr_rewild(curr);
+      invariant();
+    }
+  };
+} // namespace snmalloc
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/localalloc.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/localalloc.h
index c85d30b2b8f5..abf4e2e096aa 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/mem/localalloc.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/localalloc.h
@@ -22,6 +22,7 @@
 
 #include <string.h>
 #include <utility>
+
 namespace snmalloc
 {
   enum Boundary
@@ -78,7 +79,7 @@ namespace snmalloc
     // allocation on the fast path. This part of the code is inspired by
     // mimalloc.
     // Also contains remote deallocation cache.
-    LocalCache local_cache{&Config::unused_remote};
+    LocalCache<Config> local_cache{&Config::unused_remote};
 
     // Underlying allocator for most non-fast path operations.
     CoreAlloc* core_alloc{nullptr};
@@ -184,13 +185,21 @@ namespace snmalloc
       }
 
       return check_init([&](CoreAlloc* core_alloc) {
+        if (size > bits::one_at_bit(bits::BITS - 1))
+        {
+          // Cannot allocate something that is more that half the size of the
+          // address space
+          errno = ENOMEM;
+          return capptr::Alloc<void>{nullptr};
+        }
         // Grab slab of correct size
         // Set remote as large allocator remote.
         auto [chunk, meta] = Config::Backend::alloc_chunk(
           core_alloc->get_backend_local_state(),
           large_size_to_chunk_size(size),
           PagemapEntry::encode(
-            core_alloc->public_state(), size_to_sizeclass_full(size)));
+            core_alloc->public_state(), size_to_sizeclass_full(size)),
+          size_to_sizeclass_full(size));
         // set up meta data so sizeclass is correct, and hence alloc size, and
         // external pointer.
 #ifdef SNMALLOC_TRACING
@@ -201,7 +210,7 @@ namespace snmalloc
         if (meta != nullptr)
         {
           meta->initialise_large(
-            address_cast(chunk), local_cache.entropy.get_free_list_key());
+            address_cast(chunk), freelist::Object::key_root);
           core_alloc->laden.insert(meta);
         }
 
@@ -245,8 +254,7 @@ namespace snmalloc
           sizeclass);
       };
 
-      return local_cache.template alloc<zero_mem, Config>(
-        domesticate, size, slowpath);
+      return local_cache.template alloc<zero_mem>(domesticate, size, slowpath);
     }
 
     /**
@@ -266,20 +274,20 @@ namespace snmalloc
      * In the second case we need to recheck if this is a remote deallocation,
      * as we might acquire the originating allocator.
      */
-    SNMALLOC_SLOW_PATH void dealloc_remote_slow(capptr::Alloc<void> p)
+    SNMALLOC_SLOW_PATH void
+    dealloc_remote_slow(const PagemapEntry& entry, capptr::Alloc<void> p)
     {
       if (core_alloc != nullptr)
       {
 #ifdef SNMALLOC_TRACING
         message<1024>(
-          "Remote dealloc post {} ({})",
+          "Remote dealloc post {} ({}, {})",
           p.unsafe_ptr(),
-          alloc_size(p.unsafe_ptr()));
+          alloc_size(p.unsafe_ptr()),
+          address_cast(entry.get_slab_metadata()));
 #endif
-        const PagemapEntry& entry =
-          Config::Backend::template get_metaentry(address_cast(p));
         local_cache.remote_dealloc_cache.template dealloc<sizeof(CoreAlloc)>(
-          entry.get_remote()->trunc_id(), p);
+          entry.get_slab_metadata(), p, &local_cache.entropy);
         post_remote_cache();
         return;
       }
@@ -386,7 +394,7 @@ namespace snmalloc
       // Initialise the global allocator structures
       ensure_init();
       // Grab an allocator for this thread.
-      init(AllocPool<Config>::acquire(&(this->local_cache)));
+      init(AllocPool<Config>::acquire());
     }
 
     // Return all state in the fast allocator and release the underlying
@@ -647,14 +655,16 @@ namespace snmalloc
       if (SNMALLOC_LIKELY(local_cache.remote_allocator == entry.get_remote()))
       {
         dealloc_cheri_checks(p_tame.unsafe_ptr());
-
-        if (SNMALLOC_LIKELY(CoreAlloc::dealloc_local_object_fast(
-              entry, p_tame, local_cache.entropy)))
-          return;
-        core_alloc->dealloc_local_object_slow(p_tame, entry);
+        core_alloc->dealloc_local_object(p_tame, entry);
         return;
       }
 
+      dealloc_remote(entry, p_tame);
+    }
+
+    SNMALLOC_SLOW_PATH void
+    dealloc_remote(const PagemapEntry& entry, capptr::Alloc<void> p_tame)
+    {
       RemoteAllocator* remote = entry.get_remote();
       if (SNMALLOC_LIKELY(remote != nullptr))
       {
@@ -670,15 +680,18 @@ namespace snmalloc
         if (local_cache.remote_dealloc_cache.reserve_space(entry))
         {
           local_cache.remote_dealloc_cache.template dealloc<sizeof(CoreAlloc)>(
-            remote->trunc_id(), p_tame);
+            entry.get_slab_metadata(), p_tame, &local_cache.entropy);
 #  ifdef SNMALLOC_TRACING
           message<1024>(
-            "Remote dealloc fast {} ({})", p_raw, alloc_size(p_raw));
+            "Remote dealloc fast {} ({}, {})",
+            address_cast(p_tame),
+            alloc_size(p_tame.unsafe_ptr()),
+            address_cast(entry.get_slab_metadata()));
 #  endif
           return;
         }
 
-        dealloc_remote_slow(p_tame);
+        dealloc_remote_slow(entry, p_tame);
         return;
       }
 
@@ -712,7 +725,7 @@ namespace snmalloc
         auto pm_size = sizeclass_full_to_size(pm_sc);
         snmalloc_check_client(
           mitigations(sanity_checks),
-          sc == pm_sc,
+          (sc == pm_sc) || (p == nullptr),
           "Dealloc rounded size mismatch: {} != {}",
           rsize,
           pm_size);
@@ -765,7 +778,7 @@ namespace snmalloc
       // entry for the first chunk of memory, that states it represents a
       // large object, so we can pull the check for null off the fast path.
       const PagemapEntry& entry =
-        Config::Backend::template get_metaentry(address_cast(p_raw));
+        Config::Backend::get_metaentry(address_cast(p_raw));
 
       return sizeclass_full_to_size(entry.get_sizeclass());
 #endif
@@ -809,6 +822,57 @@ namespace snmalloc
       }
     }
 
+    /**
+     * @brief Get the client meta data for the snmalloc allocation covering this
+     * pointer.
+     */
+    typename Config::ClientMeta::DataRef get_client_meta_data(void* p)
+    {
+      const PagemapEntry& entry =
+        Config::Backend::get_metaentry(address_cast(p));
+
+      size_t index = slab_index(entry.get_sizeclass(), address_cast(p));
+
+      auto* meta_slab = entry.get_slab_metadata();
+
+      if (SNMALLOC_UNLIKELY(entry.is_backend_owned()))
+      {
+        error("Cannot access meta-data for write for freed memory!");
+      }
+
+      if (SNMALLOC_UNLIKELY(meta_slab == nullptr))
+      {
+        error(
+          "Cannot access meta-data for non-snmalloc object in writable form!");
+      }
+
+      return meta_slab->get_meta_for_object(index);
+    }
+
+    /**
+     * @brief Get the client meta data for the snmalloc allocation covering this
+     * pointer.
+     */
+    std::add_const_t<typename Config::ClientMeta::DataRef>
+    get_client_meta_data_const(void* p)
+    {
+      const PagemapEntry& entry =
+        Config::Backend::template get_metaentry<true>(address_cast(p));
+
+      size_t index = slab_index(entry.get_sizeclass(), address_cast(p));
+
+      auto* meta_slab = entry.get_slab_metadata();
+
+      if (SNMALLOC_UNLIKELY(
+            (meta_slab == nullptr) || (entry.is_backend_owned())))
+      {
+        static typename Config::ClientMeta::StorageType null_meta_store{};
+        return Config::ClientMeta::get(&null_meta_store, 0);
+      }
+
+      return meta_slab->get_meta_for_object(index);
+    }
+
     /**
      * Returns the number of remaining bytes in an object.
      *
@@ -862,7 +926,7 @@ namespace snmalloc
      * core allocator for use by this local allocator then it needs to access
      * this field.
      */
-    LocalCache& get_local_cache()
+    LocalCache<Config>& get_local_cache()
     {
       return local_cache;
     }
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/localcache.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/localcache.h
index cfbbaa576f2f..5a63e281d910 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/mem/localcache.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/localcache.h
@@ -37,6 +37,7 @@ namespace snmalloc
   // This is defined on its own, so that it can be embedded in the
   // thread local fast allocator, but also referenced from the
   // thread local core allocator.
+  template<typename Config>
   struct LocalCache
   {
     // Free list per small size class.  These are used for
@@ -54,7 +55,7 @@ namespace snmalloc
     /**
      * Remote deallocations for other threads
      */
-    RemoteDeallocCache remote_dealloc_cache;
+    RemoteDeallocCache<Config> remote_dealloc_cache;
 
     constexpr LocalCache(RemoteAllocator* remote_allocator)
     : remote_allocator(remote_allocator)
@@ -63,10 +64,10 @@ namespace snmalloc
     /**
      * Return all the free lists to the allocator.  Used during thread teardown.
      */
-    template<size_t allocator_size, typename Config, typename DeallocFun>
+    template<size_t allocator_size, typename DeallocFun>
     bool flush(typename Config::LocalState* local_state, DeallocFun dealloc)
     {
-      auto& key = entropy.get_free_list_key();
+      auto& key = freelist::Object::key_root;
       auto domesticate = [local_state](freelist::QueuePtr p)
                            SNMALLOC_FAST_PATH_LAMBDA {
                              return capptr_domesticate<Config>(local_state, p);
@@ -85,19 +86,15 @@ namespace snmalloc
         }
       }
 
-      return remote_dealloc_cache.post<allocator_size, Config>(
+      return remote_dealloc_cache.template post<allocator_size>(
         local_state, remote_allocator->trunc_id());
     }
 
-    template<
-      ZeroMem zero_mem,
-      typename Config,
-      typename Slowpath,
-      typename Domesticator>
+    template<ZeroMem zero_mem, typename Slowpath, typename Domesticator>
     SNMALLOC_FAST_PATH capptr::Alloc<void>
     alloc(Domesticator domesticate, size_t size, Slowpath slowpath)
     {
-      auto& key = entropy.get_free_list_key();
+      auto& key = freelist::Object::key_root;
       smallsizeclass_t sizeclass = size_to_sizeclass(size);
       auto& fl = small_fast_free_lists[sizeclass];
       if (SNMALLOC_LIKELY(!fl.empty()))
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/metadata.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/metadata.h
index 8b1314e2e17d..968902da8ed9 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/mem/metadata.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/metadata.h
@@ -189,6 +189,7 @@ namespace snmalloc
     {
       return meta &= ~META_BOUNDARY_BIT;
     }
+
     ///@}
 
     /**
@@ -368,21 +369,26 @@ namespace snmalloc
   class FrontendSlabMetadata_Trait
   {
   private:
-    template<typename BackendType>
+    template<typename BackendType, typename ClientMeta_>
     friend class FrontendSlabMetadata;
 
     // Can only be constructed by FrontendSlabMetadata
-    FrontendSlabMetadata_Trait() = default;
+    constexpr FrontendSlabMetadata_Trait() = default;
   };
 
   /**
    * The FrontendSlabMetadata represent the metadata associated with a single
    * slab.
    */
-  template<typename BackendType>
+  template<typename BackendType, typename ClientMeta_>
   class FrontendSlabMetadata : public FrontendSlabMetadata_Trait
   {
   public:
+    /**
+     * Type that encapsulates logic for accessing client meta-data.
+     */
+    using ClientMeta = ClientMeta_;
+
     /**
      * Used to link slab metadata together in various other data-structures.
      * This is used with `SeqSet` and so may actually hold a subclass of this
@@ -424,6 +430,13 @@ namespace snmalloc
      */
     bool large_ = false;
 
+    /**
+     * Stores client meta-data for this slab. This must be last element in the
+     * slab. The meta data will actually allocate multiple elements after this
+     * type, so that client_meta_[1] will work for the required meta-data size.
+     */
+    SNMALLOC_NO_UNIQUE_ADDRESS typename ClientMeta::StorageType client_meta_{};
+
     uint16_t& needed()
     {
       return needed_;
@@ -443,7 +456,7 @@ namespace snmalloc
       static_assert(
         std::is_base_of<FrontendSlabMetadata_Trait, BackendType>::value,
         "Template should be a subclass of FrontendSlabMetadata");
-      free_queue.init(slab, key);
+      free_queue.init(slab, key, this->as_key_tweak());
       // Set up meta data as if the entire slab has been turned into a free
       // list. This means we don't have to check for special cases where we have
       // returned all the elements, but this is a slab that is still being bump
@@ -452,6 +465,9 @@ namespace snmalloc
       set_sleeping(sizeclass, 0);
 
       large_ = false;
+
+      new (&client_meta_)
+        typename ClientMeta::StorageType[get_client_storage_count(sizeclass)];
     }
 
     /**
@@ -462,13 +478,15 @@ namespace snmalloc
     void initialise_large(address_t slab, const FreeListKey& key)
     {
       // We will push to this just to make the fast path clean.
-      free_queue.init(slab, key);
+      free_queue.init(slab, key, this->as_key_tweak());
 
       // Flag to detect that it is a large alloc on the slow path
       large_ = true;
 
       // Jump to slow path on first deallocation.
       needed() = 1;
+
+      new (&client_meta_) typename ClientMeta::StorageType();
     }
 
     /**
@@ -483,6 +501,59 @@ namespace snmalloc
       return (--needed()) == 0;
     }
 
+    class ReturnObjectsIterator
+    {
+      uint16_t _batch;
+      FrontendSlabMetadata* _meta;
+
+      static_assert(sizeof(_batch) * 8 > MAX_CAPACITY_BITS);
+
+    public:
+      ReturnObjectsIterator(uint16_t n, FrontendSlabMetadata* m)
+      : _batch(n), _meta(m)
+      {}
+
+      template<bool first>
+      SNMALLOC_FAST_PATH bool step()
+      {
+        // The first update must always return some positive number of objects.
+        SNMALLOC_ASSERT(!first || (_batch != 0));
+
+        /*
+         * Stop iteration when there are no more objects to return.  Perform
+         * this test only on non-first steps to avoid a branch on the hot path.
+         */
+        if (!first && _batch == 0)
+          return false;
+
+        if (SNMALLOC_LIKELY(_batch < _meta->needed()))
+        {
+          // Will not hit threshold for state transition
+          _meta->needed() -= _batch;
+          return false;
+        }
+
+        // Hit threshold for state transition, may yet hit another
+        _batch -= _meta->needed();
+        _meta->needed() = 0;
+        return true;
+      }
+    };
+
+    /**
+     * A batch version of return_object.
+     *
+     * Returns an iterator that should have `.step<>()` called on it repeatedly
+     * until it returns `false`.  The first step should invoke `.step<true>()`
+     * while the rest should invoke `.step<false>()`.  After each
+     * true-returning `.step()`, the caller should run the slow-path code to
+     * update the rest of the metadata for this slab.
+     */
+    ReturnObjectsIterator return_objects(uint16_t n)
+    {
+      return ReturnObjectsIterator(n, this);
+    }
+
     bool is_unused()
     {
       return needed() == 0;
@@ -556,10 +627,12 @@ namespace snmalloc
       LocalEntropy& entropy,
       smallsizeclass_t sizeclass)
     {
-      auto& key = entropy.get_free_list_key();
+      auto& key = freelist::Object::key_root;
 
       std::remove_reference_t<decltype(fast_free_list)> tmp_fl;
-      auto remaining = meta->free_queue.close(tmp_fl, key);
+
+      auto remaining =
+        meta->free_queue.close(tmp_fl, key, meta->as_key_tweak());
       auto p = tmp_fl.take(key, domesticate);
       fast_free_list = tmp_fl;
 
@@ -581,7 +654,45 @@ namespace snmalloc
     // start of the slab.
     [[nodiscard]] address_t get_slab_interior(const FreeListKey& key) const
     {
-      return address_cast(free_queue.read_head(0, key));
+      return address_cast(free_queue.read_head(0, key, this->as_key_tweak()));
+    }
+
+    [[nodiscard]] SNMALLOC_FAST_PATH address_t as_key_tweak() const noexcept
+    {
+      return as_key_tweak(address_cast(this));
+    }
+
+    [[nodiscard]] SNMALLOC_FAST_PATH static address_t
+    as_key_tweak(address_t self)
+    {
+      return self / alignof(FrontendSlabMetadata);
+    }
+
+    typename ClientMeta::DataRef get_meta_for_object(size_t index)
+    {
+      return ClientMeta::get(&client_meta_, index);
+    }
+
+    static size_t get_client_storage_count(smallsizeclass_t sizeclass)
+    {
+      auto count = sizeclass_to_slab_object_count(sizeclass);
+      auto result = ClientMeta::required_count(count);
+      if (result == 0)
+        return 1;
+      return result;
+    }
+
+    static size_t get_extra_bytes(sizeclass_t sizeclass)
+    {
+      if (sizeclass.is_small())
+        // We remove one from the extra-bytes as there is one in the metadata to
+        // start with.
+        return (get_client_storage_count(sizeclass.as_small()) - 1) *
+          sizeof(typename ClientMeta::StorageType);
+
+      // For large classes there is only a single entry, so this is covered by
+      // the existing entry in the metaslab, and further bytes are not required.
+      return 0;
     }
   };
 
@@ -589,19 +700,19 @@ namespace snmalloc
    * Entry stored in the pagemap.  See docs/AddressSpace.md for the full
    * FrontendMetaEntry lifecycle.
    */
-  template<typename BackendSlabMetadata>
+  template<typename SlabMetadataType>
   class FrontendMetaEntry : public MetaEntryBase
   {
     /**
      * Ensure that the template parameter is valid.
      */
     static_assert(
-      std::is_convertible_v<BackendSlabMetadata, FrontendSlabMetadata_Trait>,
+      std::is_convertible_v<SlabMetadataType, FrontendSlabMetadata_Trait>,
       "The front end requires that the back end provides slab metadata that is "
       "compatible with the front-end's structure");
 
   public:
-    using SlabMetadata = BackendSlabMetadata;
+    using SlabMetadata = SlabMetadataType;
 
     constexpr FrontendMetaEntry() = default;
 
@@ -612,9 +723,8 @@ namespace snmalloc
      * `get_remote_and_sizeclass`.
      */
     SNMALLOC_FAST_PATH
-    FrontendMetaEntry(BackendSlabMetadata* meta, uintptr_t remote_and_sizeclass)
-    : MetaEntryBase(
-        unsafe_to_uintptr<BackendSlabMetadata>(meta), remote_and_sizeclass)
+    FrontendMetaEntry(SlabMetadata* meta, uintptr_t remote_and_sizeclass)
+    : MetaEntryBase(unsafe_to_uintptr<SlabMetadata>(meta), remote_and_sizeclass)
     {
       SNMALLOC_ASSERT_MSG(
         (REMOTE_BACKEND_MARKER & remote_and_sizeclass) == 0,
@@ -645,12 +755,10 @@ namespace snmalloc
      * guarded by an assert that this chunk is being used as a slab (i.e., has
      * an associated owning allocator).
      */
-    [[nodiscard]] SNMALLOC_FAST_PATH BackendSlabMetadata*
-    get_slab_metadata() const
+    [[nodiscard]] SNMALLOC_FAST_PATH SlabMetadata* get_slab_metadata() const
     {
-      SNMALLOC_ASSERT(get_remote() != nullptr);
-      return unsafe_from_uintptr<BackendSlabMetadata>(
-        meta & ~META_BOUNDARY_BIT);
+      SNMALLOC_ASSERT(!is_backend_owned());
+      return unsafe_from_uintptr<SlabMetadata>(meta & ~META_BOUNDARY_BIT);
     }
   };
 
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/pool.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/pool.h
index 36737207db2d..0497d1ad9f8f 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/mem/pool.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/pool.h
@@ -22,7 +22,7 @@ namespace snmalloc
   {
     template<
       typename TT,
-      SNMALLOC_CONCEPT(IsConfig) Config,
+      SNMALLOC_CONCEPT(Constructable<TT>) Construct,
       PoolState<TT>& get_state()>
     friend class Pool;
 
@@ -45,50 +45,10 @@ namespace snmalloc
    * SingletonPoolState::pool is the default provider for the PoolState within
    * the Pool class.
    */
-  template<typename T, SNMALLOC_CONCEPT(IsConfig) Config>
+  template<typename T>
   class SingletonPoolState
   {
-    /**
-     * SFINAE helper.  Matched only if `T` implements `ensure_init`.  Calls it
-     * if it exists.
-     */
-    template<typename SharedStateHandle_>
-    SNMALLOC_FAST_PATH static auto call_ensure_init(SharedStateHandle_*, int)
-      -> decltype(SharedStateHandle_::ensure_init())
-    {
-      static_assert(
-        std::is_same<Config, SharedStateHandle_>::value,
-        "SFINAE parameter, should only be used with Config");
-      SharedStateHandle_::ensure_init();
-    }
-
-    /**
-     * SFINAE helper.  Matched only if `T` does not implement `ensure_init`.
-     * Does nothing if called.
-     */
-    template<typename SharedStateHandle_>
-    SNMALLOC_FAST_PATH static auto call_ensure_init(SharedStateHandle_*, long)
-    {
-      static_assert(
-        std::is_same<Config, SharedStateHandle_>::value,
-        "SFINAE parameter, should only be used with Config");
-    }
-
-    /**
-     * Call `Config::ensure_init()` if it is implemented, do nothing
-     * otherwise.
-     */
-    SNMALLOC_FAST_PATH static void ensure_init()
-    {
-      call_ensure_init<Config>(nullptr, 0);
-    }
-
-    static void make_pool(PoolState<T>*) noexcept
-    {
-      ensure_init();
-      // Default initializer already called on PoolState, no need to use
-      // placement new.
-    }
+    static void make_pool(PoolState<T>*) noexcept {}
 
   public:
     /**
@@ -101,6 +61,22 @@ namespace snmalloc
     }
   };
 
+  /**
+   * @brief Default construct helper for the pool. Just uses `new`.  This can't
+   * be used by the allocator pool as it has not created memory yet.
+   *
+   * @tparam T
+   */
+  template<typename T>
+  class DefaultConstruct
+  {
+  public:
+    static capptr::Alloc<T> make()
+    {
+      return capptr::Alloc<T>::unsafe_from(new T());
+    }
+  };
+
   /**
    * Wrapper class to access a pool of a particular type of object.
    *
@@ -116,17 +92,17 @@ namespace snmalloc
    */
   template<
     typename T,
-    SNMALLOC_CONCEPT(IsConfig) Config,
-    PoolState<T>& get_state() = SingletonPoolState<T, Config>::pool>
+    SNMALLOC_CONCEPT(Constructable<T>) ConstructT = DefaultConstruct<T>,
+    PoolState<T>& get_state() = SingletonPoolState<T>::pool>
   class Pool
   {
   public:
-    template<typename... Args>
-    static T* acquire(Args&&... args)
+    static T* acquire()
     {
       PoolState<T>& pool = get_state();
-      {
-        FlagLock f(pool.lock);
+
+      T* result{nullptr};
+      with(pool.lock, [&]() {
         if (pool.front != nullptr)
         {
           auto p = pool.front;
@@ -137,26 +113,21 @@ namespace snmalloc
           }
           pool.front = next;
           p->set_in_use();
-          return p.unsafe_ptr();
+          result = p.unsafe_ptr();
         }
-      }
+      });
 
-      auto raw =
-        Config::Backend::template alloc_meta_data<T>(nullptr, sizeof(T));
+      if (result != nullptr)
+        return result;
 
-      if (raw == nullptr)
-      {
-        Config::Pal::error("Failed to initialise thread local allocator.");
-      }
+      auto p = ConstructT::make();
 
-      auto p = capptr::Alloc<T>::unsafe_from(new (raw.unsafe_ptr())
-                                               T(std::forward<Args>(args)...));
+      with(pool.lock, [&]() {
+        p->list_next = pool.list;
+        pool.list = p;
 
-      FlagLock f(pool.lock);
-      p->list_next = pool.list;
-      pool.list = p;
-
-      p->set_in_use();
+        p->set_in_use();
+      });
       return p.unsafe_ptr();
     }
 
@@ -180,11 +151,13 @@ namespace snmalloc
       // Returns a linked list of all objects in the stack, emptying the stack.
       if (p == nullptr)
       {
-        FlagLock f(pool.lock);
-        auto result = pool.front;
-        pool.front = nullptr;
-        pool.back = nullptr;
-        return result.unsafe_ptr();
+        T* result;
+        with(pool.lock, [&]() {
+          result = pool.front.unsafe_ptr();
+          pool.front = nullptr;
+          pool.back = nullptr;
+        });
+        return result;
       }
 
       return p->next.unsafe_ptr();
@@ -199,18 +172,18 @@ namespace snmalloc
     {
       PoolState<T>& pool = get_state();
       last->next = nullptr;
-      FlagLock f(pool.lock);
-
-      if (pool.front == nullptr)
-      {
-        pool.front = capptr::Alloc<T>::unsafe_from(first);
-      }
-      else
-      {
-        pool.back->next = capptr::Alloc<T>::unsafe_from(first);
-      }
+      with(pool.lock, [&]() {
+        if (pool.front == nullptr)
+        {
+          pool.front = capptr::Alloc<T>::unsafe_from(first);
+        }
+        else
+        {
+          pool.back->next = capptr::Alloc<T>::unsafe_from(first);
+        }
 
-      pool.back = capptr::Alloc<T>::unsafe_from(last);
+        pool.back = capptr::Alloc<T>::unsafe_from(last);
+      });
     }
 
     /**
@@ -222,18 +195,19 @@ namespace snmalloc
     {
       PoolState<T>& pool = get_state();
       last->next = nullptr;
-      FlagLock f(pool.lock);
 
-      if (pool.front == nullptr)
-      {
-        pool.back = capptr::Alloc<T>::unsafe_from(last);
-      }
-      else
-      {
-        last->next = pool.front;
-        pool.back->next = capptr::Alloc<T>::unsafe_from(first);
-      }
-      pool.front = capptr::Alloc<T>::unsafe_from(first);
+      with(pool.lock, [&]() {
+        if (pool.front == nullptr)
+        {
+          pool.back = capptr::Alloc<T>::unsafe_from(last);
+        }
+        else
+        {
+          last->next = pool.front;
+          pool.back->next = capptr::Alloc<T>::unsafe_from(first);
+        }
+        pool.front = capptr::Alloc<T>::unsafe_from(first);
+      });
     }
 
     static T* iterate(T* p = nullptr)
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/pooled.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/pooled.h
index a812bc924cb1..4e7c76884166 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/mem/pooled.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/pooled.h
@@ -5,16 +5,40 @@
 
 namespace snmalloc
 {
+  template<SNMALLOC_CONCEPT(capptr::IsBound) bounds>
+  struct Range
+  {
+    CapPtr<void, bounds> base;
+    size_t length;
+  };
+
   template<class T>
   class PoolState;
 
+#ifdef __cpp_concepts
+  template<typename C, typename T>
+  concept Constructable = requires() {
+                            {
+                              C::make()
+                              } -> ConceptSame<capptr::Alloc<T>>;
+                          };
+#endif // __cpp_concepts
+
+  /**
+   * Required to be implemented by all types that are pooled.
+   *
+   * The constructor of any inherited type must take a Range& as its first
+   * argument.  This represents the leftover from pool allocation rounding up to
+   * the nearest power of 2. It is valid to ignore this argument, but can be
+   * used to optimise meta-data usage at startup.
+   */
   template<class T>
   class Pooled
   {
   public:
     template<
       typename TT,
-      SNMALLOC_CONCEPT(IsConfig) Config,
+      SNMALLOC_CONCEPT(Constructable<TT>) Construct,
       PoolState<TT>& get_state()>
     friend class Pool;
 
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/remoteallocator.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/remoteallocator.h
index 2d15e6d1119a..a207d12f235c 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/mem/remoteallocator.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/remoteallocator.h
@@ -1,43 +1,296 @@
 #pragma once
 
-#include "../ds/ds.h"
-#include "freelist.h"
-#include "metadata.h"
-#include "sizeclasstable.h"
+#include "freelist_queue.h"
 
-#include <array>
-#include <atomic>
+#include <new>
 
 namespace snmalloc
 {
+  class RemoteMessageAssertions;
+
   /**
+   * Entries on a remote message queue.  Logically, this is a pair of freelist
+   * linkages, together with some metadata:
+   *
+   * - a cyclic list ("ring") of free objects (atypically for rings, there is
+   *   no sentinel node here: the message itself is a free object),
    *
-   * A RemoteAllocator is the message queue of freed objects.  It exposes a MPSC
-   * append-only atomic queue that uses one xchg per append.
+   * - the length of that ring
    *
-   * The internal pointers are considered QueuePtr-s to support deployment
-   * scenarios in which the RemoteAllocator itself is exposed to the client.
-   * This is excessively paranoid in the common case that the RemoteAllocator-s
-   * are as "hard" for the client to reach as the Pagemap, which we trust to
-   * store not just Tame CapPtr<>s but raw C++ pointers.
+   * - the linkage for the message queue itself
+   *
+   * In practice, there is a fair bit more going on here: the ring of free
+   * objects is not entirely encoded as a freelist.  While traversing the
+   * successor pointers in objects on the ring will eventually lead back to
+   * this RemoteMessage object, the successor pointer from this object is
+   * encoded as a relative displacement.  This is guaranteed to be physically
+   * smaller than a full pointer (because slabs are smaller than the whole
+   * address space).  This gives us enough room to pack in the length of the
+   * ring, without needing to grow the structure.
+   */
+  class BatchedRemoteMessage
+  {
+    friend class BatchedRemoteMessageAssertions;
+
+    freelist::Object::T<> free_ring;
+    freelist::Object::T<> message_link;
+
+    static_assert(
+      sizeof(free_ring.next_object) >= sizeof(void*),
+      "BatchedRemoteMessage bitpacking needs sizeof(void*) in next_object");
+
+  public:
+    static auto emplace_in_alloc(capptr::Alloc<void> alloc)
+    {
+      return CapPtr<BatchedRemoteMessage, capptr::bounds::Alloc>::unsafe_from(
+        new (alloc.unsafe_ptr()) BatchedRemoteMessage());
+    }
+
+    static auto mk_from_freelist_builder(
+      freelist::Builder<false, true>& flb,
+      const FreeListKey& key,
+      address_t key_tweak)
+    {
+      size_t size = flb.extract_segment_length();
+
+      SNMALLOC_ASSERT(size < bits::one_at_bit(MAX_CAPACITY_BITS));
+
+      auto [first, last] = flb.extract_segment(key, key_tweak);
+
+      /*
+       * Preserve the last node's backpointer and change its type.  Because we
+       * use placement new to build our RemoteMessage atop the memory of a
+       * freelist::Object::T<> (to avoid UB) and the constructor may nullify
+       * the `prev` field, put it right back.  Ideally the compiler is smart
+       * enough to see that this is a no-op.
+       */
+      auto last_prev = last->prev;
+      auto self =
+        CapPtr<BatchedRemoteMessage, capptr::bounds::Alloc>::unsafe_from(
+          new (last.unsafe_ptr()) BatchedRemoteMessage());
+      self->free_ring.prev = last_prev;
+
+      // XXX On CHERI, we could do a fair bit better if we had a primitive for
+      // extracting and discarding the offset.  That probably beats the dance
+      // done below, but it should work as it stands.
+
+      auto n = freelist::HeadPtr::unsafe_from(
+        unsafe_from_uintptr<freelist::Object::T<>>(
+          (static_cast<uintptr_t>(pointer_diff_signed(self, first))
+           << MAX_CAPACITY_BITS) +
+          size));
+
+      // Close the ring, storing our bit-packed value in the next field.
+      freelist::Object::store_nextish(
+        &self->free_ring.next_object, first, key, key_tweak, n);
+
+      return self;
+    }
+
+    static freelist::HeadPtr
+    to_message_link(capptr::Alloc<BatchedRemoteMessage> m)
+    {
+      return pointer_offset(m, offsetof(BatchedRemoteMessage, message_link))
+        .as_reinterpret<freelist::Object::T<>>();
+    }
+
+    static capptr::Alloc<BatchedRemoteMessage>
+    from_message_link(freelist::HeadPtr chainPtr)
+    {
+      return pointer_offset_signed(
+               chainPtr,
+               -static_cast<ptrdiff_t>(
+                 offsetof(BatchedRemoteMessage, message_link)))
+        .as_reinterpret<BatchedRemoteMessage>();
+    }
+
+    template<SNMALLOC_CONCEPT(IsConfigLazy) Config, typename Domesticator_queue>
+    SNMALLOC_FAST_PATH static std::pair<freelist::HeadPtr, uint16_t>
+    open_free_ring(
+      capptr::Alloc<BatchedRemoteMessage> m,
+      size_t objsize,
+      const FreeListKey& key,
+      address_t key_tweak,
+      Domesticator_queue domesticate)
+    {
+      uintptr_t encoded =
+        m->free_ring.read_next(key, key_tweak, domesticate).unsafe_uintptr();
+
+      uint16_t decoded_size =
+        static_cast<uint16_t>(encoded) & bits::mask_bits(MAX_CAPACITY_BITS);
+      static_assert(sizeof(decoded_size) * 8 > MAX_CAPACITY_BITS);
+
+      /*
+       * Derive an out-of-bounds pointer to the next allocation, then use the
+       * authmap to reconstruct an in-bounds version, which we then immediately
+       * bound and rewild and then domesticate (how strange).
+       *
+       * XXX See above re: doing better on CHERI.
+       */
+      auto next = domesticate(
+        capptr_rewild(
+          Config::Backend::capptr_rederive_alloc(
+            pointer_offset_signed(
+              m, static_cast<ptrdiff_t>(encoded) >> MAX_CAPACITY_BITS),
+            objsize))
+          .template as_static<freelist::Object::T<>>());
+
+      if constexpr (mitigations(freelist_backward_edge))
+      {
+        next->check_prev(
+          signed_prev(address_cast(m), address_cast(next), key, key_tweak));
+      }
+      else
+      {
+        UNUSED(key);
+        UNUSED(key_tweak);
+      }
+
+      return {next.template as_static<freelist::Object::T<>>(), decoded_size};
+    }
+
+    template<SNMALLOC_CONCEPT(IsConfigLazy) Config, typename Domesticator_queue>
+    static uint16_t ring_size(
+      capptr::Alloc<BatchedRemoteMessage> m,
+      const FreeListKey& key,
+      address_t key_tweak,
+      Domesticator_queue domesticate)
+    {
+      uintptr_t encoded =
+        m->free_ring.read_next(key, key_tweak, domesticate).unsafe_uintptr();
+
+      uint16_t decoded_size =
+        static_cast<uint16_t>(encoded) & bits::mask_bits(MAX_CAPACITY_BITS);
+      static_assert(sizeof(decoded_size) * 8 > MAX_CAPACITY_BITS);
+
+      if constexpr (mitigations(freelist_backward_edge))
+      {
+        /*
+         * Like above, but we don't strictly need to rebound the pointer,
+         * since it's only used internally.  Still, doesn't hurt to bound
+         * to the free list linkage.
+         */
+        auto next = domesticate(
+          capptr_rewild(
+            Config::Backend::capptr_rederive_alloc(
+              pointer_offset_signed(
+                m, static_cast<ptrdiff_t>(encoded) >> MAX_CAPACITY_BITS),
+              sizeof(freelist::Object::T<>)))
+            .template as_static<freelist::Object::T<>>());
+
+        next->check_prev(
+          signed_prev(address_cast(m), address_cast(next), key, key_tweak));
+      }
+      else
+      {
+        UNUSED(key);
+        UNUSED(key_tweak);
+        UNUSED(domesticate);
+      }
+
+      return decoded_size;
+    }
+  };
+
+  class BatchedRemoteMessageAssertions
+  {
+    static_assert(
+      (DEALLOC_BATCH_RINGS == 0) ||
+      (sizeof(BatchedRemoteMessage) <= MIN_ALLOC_SIZE));
+    static_assert(offsetof(BatchedRemoteMessage, free_ring) == 0);
+
+    static_assert(
+      (DEALLOC_BATCH_RINGS == 0) ||
+        (MAX_SLAB_SPAN_BITS + MAX_CAPACITY_BITS < 8 * sizeof(void*)),
+      "Ring bit-stuffing trick can't reach far enough to enclose a slab");
+  };
+
+  /** The type of a remote message when we are not batching messages onto
+   * rings.
    *
-   * While we could try to condition the types used here on a flag in the
-   * backend's `struct Flags Options` value, we instead expose two domesticator
-   * callbacks at the interface and are careful to use one for the front and
-   * back values and the other for pointers read from the queue itself.  That's
-   * not ideal, but it lets the client condition its behavior appropriately and
-   * prevents us from accidentally following either of these pointers in generic
-   * code.
+   * Relative to BatchRemoteMessage, this type is smaller, as it contains only
+   * a single linkage, to the next message.  (And possibly a backref, if
+   * mitigations(freelist_backward_edge) is enabled.)
+   */
+  class SingletonRemoteMessage
+  {
+    friend class SingletonRemoteMessageAssertions;
+
+    freelist::Object::T<> message_link;
+
+  public:
+    static auto emplace_in_alloc(capptr::Alloc<void> alloc)
+    {
+      return CapPtr<SingletonRemoteMessage, capptr::bounds::Alloc>::unsafe_from(
+        new (alloc.unsafe_ptr()) SingletonRemoteMessage());
+    }
+
+    static freelist::HeadPtr
+    to_message_link(capptr::Alloc<SingletonRemoteMessage> m)
+    {
+      return pointer_offset(m, offsetof(SingletonRemoteMessage, message_link))
+        .as_reinterpret<freelist::Object::T<>>();
+    }
+
+    static capptr::Alloc<SingletonRemoteMessage>
+    from_message_link(freelist::HeadPtr chainPtr)
+    {
+      return pointer_offset_signed(
+               chainPtr,
+               -static_cast<ptrdiff_t>(
+                 offsetof(SingletonRemoteMessage, message_link)))
+        .as_reinterpret<SingletonRemoteMessage>();
+    }
+
+    template<SNMALLOC_CONCEPT(IsConfigLazy) Config, typename Domesticator_queue>
+    SNMALLOC_FAST_PATH static std::pair<freelist::HeadPtr, uint16_t>
+    open_free_ring(
+      capptr::Alloc<SingletonRemoteMessage> m,
+      size_t,
+      const FreeListKey&,
+      address_t,
+      Domesticator_queue)
+    {
+      return {
+        m.as_reinterpret<freelist::Object::T<>>(), static_cast<uint16_t>(1)};
+    }
+
+    template<SNMALLOC_CONCEPT(IsConfigLazy) Config, typename Domesticator_queue>
+    static uint16_t ring_size(
+      capptr::Alloc<SingletonRemoteMessage>,
+      const FreeListKey&,
+      address_t,
+      Domesticator_queue)
+    {
+      return 1;
+    }
+  };
+
+  class SingletonRemoteMessageAssertions
+  {
+    static_assert(sizeof(SingletonRemoteMessage) <= MIN_ALLOC_SIZE);
+    static_assert(
+      sizeof(SingletonRemoteMessage) == sizeof(freelist::Object::T<>));
+    static_assert(offsetof(SingletonRemoteMessage, message_link) == 0);
+  };
+
+  using RemoteMessage = std::conditional_t<
+    (DEALLOC_BATCH_RINGS > 0),
+    BatchedRemoteMessage,
+    SingletonRemoteMessage>;
+
+  static_assert(sizeof(RemoteMessage) <= MIN_ALLOC_SIZE);
+
+  /**
+   * A RemoteAllocator is the message queue of freed objects.  It builds on the
+   * FreeListMPSCQ but encapsulates knowledge that the objects are actually
+   * RemoteMessage-s and not just any freelist::object::T<>s.
    *
-   * `domesticate_head` is used for the pointer used to reach the of the queue,
-   * while `domesticate_queue` is used to traverse the first link in the queue
-   * itself.  In the case that the RemoteAllocator is not easily accessible to
-   * the client, `domesticate_head` can just be a type coersion, and
-   * `domesticate_queue` should perform actual validation.  If the
-   * RemoteAllocator is exposed to the client, both Domesticators should perform
-   * validation.
+   * RemoteAllocator-s may be exposed to client tampering.  As a result,
+   * pointer domestication may be necessary.  See the documentation for
+   * FreeListMPSCQ for details.
    */
-  struct alignas(REMOTE_MIN_ALIGN) RemoteAllocator
+  struct RemoteAllocator
   {
     /**
      * Global key for all remote lists.
@@ -49,49 +302,37 @@ namespace snmalloc
      */
     inline static FreeListKey key_global{0xdeadbeef, 0xbeefdead, 0xdeadbeef};
 
-    using alloc_id_t = address_t;
+    FreeListMPSCQ<key_global> list;
 
-    // Store the message queue on a separate cacheline. It is mutable data that
-    // is read by other threads.
-    alignas(CACHELINE_SIZE) freelist::AtomicQueuePtr back{nullptr};
-    // Store the two ends on different cache lines as access by different
-    // threads.
-    alignas(CACHELINE_SIZE) freelist::AtomicQueuePtr front{nullptr};
-    // Fake first entry
-    freelist::Object::T<capptr::bounds::AllocWild> stub{};
+    using alloc_id_t = address_t;
 
     constexpr RemoteAllocator() = default;
 
     void invariant()
     {
-      SNMALLOC_ASSERT(
-        (address_cast(front.load()) == address_cast(&stub)) ||
-        (back != nullptr));
+      list.invariant();
     }
 
     void init()
     {
-      freelist::HeadPtr stub_ptr = freelist::HeadPtr::unsafe_from(&stub);
-      freelist::Object::atomic_store_null(stub_ptr, key_global);
-      front.store(freelist::QueuePtr::unsafe_from(&stub));
-      back.store(nullptr, std::memory_order_relaxed);
-      invariant();
+      list.init();
     }
 
-    freelist::QueuePtr destroy()
+    template<typename Domesticator_queue, typename Cb>
+    void destroy_and_iterate(Domesticator_queue domesticate, Cb cb)
     {
-      freelist::QueuePtr fnt = front.load();
-      back.store(nullptr, std::memory_order_relaxed);
-      if (address_cast(front.load()) == address_cast(&stub))
-        return nullptr;
-      return fnt;
+      auto cbwrap = [cb](freelist::HeadPtr p) SNMALLOC_FAST_PATH_LAMBDA {
+        cb(RemoteMessage::from_message_link(p));
+      };
+
+      return list.destroy_and_iterate(domesticate, cbwrap);
     }
 
-    template<typename Domesticator_head>
-    inline bool can_dequeue(Domesticator_head domesticate_head)
+    template<typename Domesticator_head, typename Domesticator_queue>
+    inline bool can_dequeue(
+      Domesticator_head domesticate_head, Domesticator_queue domesticate_queue)
     {
-      return domesticate_head(front.load())
-               ->atomic_read_next(key_global, domesticate_head) == nullptr;
+      return list.can_dequeue(domesticate_head, domesticate_queue);
     }
 
     /**
@@ -103,28 +344,14 @@ namespace snmalloc
      */
     template<typename Domesticator_head>
     void enqueue(
-      freelist::HeadPtr first,
-      freelist::HeadPtr last,
+      capptr::Alloc<RemoteMessage> first,
+      capptr::Alloc<RemoteMessage> last,
       Domesticator_head domesticate_head)
     {
-      invariant();
-      freelist::Object::atomic_store_null(last, key_global);
-
-      // Exchange needs to be acq_rel.
-      // *  It needs to be a release, so nullptr in next is visible.
-      // *  Needs to be acquire, so linking into the list does not race with
-      //    the other threads nullptr init of the next field.
-      freelist::QueuePtr prev =
-        back.exchange(capptr_rewild(last), std::memory_order_acq_rel);
-
-      if (SNMALLOC_LIKELY(prev != nullptr))
-      {
-        freelist::Object::atomic_store_next(
-          domesticate_head(prev), first, key_global);
-        return;
-      }
-
-      front.store(capptr_rewild(first));
+      list.enqueue(
+        RemoteMessage::to_message_link(first),
+        RemoteMessage::to_message_link(last),
+        domesticate_head);
     }
 
     /**
@@ -144,49 +371,10 @@ namespace snmalloc
       Domesticator_queue domesticate_queue,
       Cb cb)
     {
-      invariant();
-      SNMALLOC_ASSERT(front.load() != nullptr);
-
-      // Use back to bound, so we don't handle new entries.
-      auto b = back.load(std::memory_order_relaxed);
-      freelist::HeadPtr curr = domesticate_head(front.load());
-
-      while (address_cast(curr) != address_cast(b))
-      {
-        freelist::HeadPtr next =
-          curr->atomic_read_next(key_global, domesticate_queue);
-        // We have observed a non-linearisable effect of the queue.
-        // Just go back to allocating normally.
-        if (SNMALLOC_UNLIKELY(next == nullptr))
-          break;
-        // We want this element next, so start it loading.
-        Aal::prefetch(next.unsafe_ptr());
-        if (SNMALLOC_UNLIKELY(!cb(curr)))
-        {
-          /*
-           * We've domesticate_queue-d next so that we can read through it, but
-           * we're storing it back into client-accessible memory in
-           * !QueueHeadsAreTame builds, so go ahead and consider it Wild again.
-           * On QueueHeadsAreTame builds, the subsequent domesticate_head call
-           * above will also be a type-level sleight of hand, but we can still
-           * justify it by the domesticate_queue that happened in this
-           * dequeue().
-           */
-          front = capptr_rewild(next);
-          invariant();
-          return;
-        }
-
-        curr = next;
-      }
-
-      /*
-       * Here, we've hit the end of the queue: next is nullptr and curr has not
-       * been handed to the callback.  The same considerations about Wildness
-       * above hold here.
-       */
-      front = capptr_rewild(curr);
-      invariant();
+      auto cbwrap = [cb](freelist::HeadPtr p) SNMALLOC_FAST_PATH_LAMBDA {
+        return cb(RemoteMessage::from_message_link(p));
+      };
+      list.dequeue(domesticate_head, domesticate_queue, cbwrap);
     }
 
     alloc_id_t trunc_id()
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/remotecache.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/remotecache.h
index 96f5e09732a6..585fb9146276 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/mem/remotecache.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/remotecache.h
@@ -12,13 +12,189 @@
 
 namespace snmalloc
 {
+
+  /**
+   * Same-destination message batching.
+   *
+   * In addition to batching message sends (see below), we can also batch
+   * collections of messages destined for the same slab.  This class handles
+   * collecting sufficiently temporally local messages destined to the same
+   * slab, collecting them with freelist::Builder(s), and then converting
+   * them to RemoteMessage rings when appropriate.
+   *
+   * In order that this class not need to know about the mechanics of actually
+   * pushing RemoteMessage-s around, the methods involved in "closing" rings
+   * -- that is, in converting freelist::Builder(s) to RemoteMessages -- take
+   * a callable, of template type Forward, which is given the destination
+   * slab('s metadata address) and the to-be-sent RemoteMessage.
+   */
+  template<typename Config, size_t RINGS>
+  class RemoteDeallocCacheBatching
+  {
+    static_assert(RINGS > 0);
+
+    std::array<freelist::Builder<false, true>, RINGS> open_builder;
+    std::array<address_t, RINGS> open_meta = {0};
+
+    SNMALLOC_FAST_PATH size_t
+    ring_set(typename Config::PagemapEntry::SlabMetadata* meta)
+    {
+      // See https://github.com/skeeto/hash-prospector for choice of constant
+      return ((meta->as_key_tweak() * 0x7EFB352D) >> 16) &
+        bits::mask_bits(DEALLOC_BATCH_RING_SET_BITS);
+    }
+
+    template<typename Forward>
+    SNMALLOC_FAST_PATH void close_one_pending(Forward forward, size_t ix)
+    {
+      auto rmsg = BatchedRemoteMessage::mk_from_freelist_builder(
+        open_builder[ix],
+        freelist::Object::key_root,
+        Config::PagemapEntry::SlabMetadata::as_key_tweak(open_meta[ix]));
+
+      auto& entry = Config::Backend::get_metaentry(address_cast(rmsg));
+
+      forward(entry.get_remote()->trunc_id(), rmsg);
+
+      open_meta[ix] = 0;
+    }
+
+    SNMALLOC_FAST_PATH void init_one_pending(
+      size_t ix, typename Config::PagemapEntry::SlabMetadata* meta)
+    {
+      open_builder[ix].init(
+        0,
+        freelist::Object::key_root,
+        Config::PagemapEntry::SlabMetadata::as_key_tweak(open_meta[ix]));
+      open_meta[ix] = address_cast(meta);
+    }
+
+  public:
+    template<typename Forward>
+    SNMALLOC_FAST_PATH void dealloc(
+      typename Config::PagemapEntry::SlabMetadata* meta,
+      freelist::HeadPtr r,
+      LocalEntropy* entropy,
+      Forward forward)
+    {
+      size_t ix_set = ring_set(meta);
+
+      for (size_t ix_way = 0; ix_way < DEALLOC_BATCH_RING_ASSOC; ix_way++)
+      {
+        size_t ix = ix_set + ix_way;
+        if (address_cast(meta) == open_meta[ix])
+        {
+          open_builder[ix].add(
+            r, freelist::Object::key_root, meta->as_key_tweak());
+
+          if constexpr (mitigations(random_preserve))
+          {
+            auto rand_limit = entropy->next_fresh_bits(MAX_CAPACITY_BITS);
+            if (open_builder[ix].extract_segment_length() >= rand_limit)
+            {
+              close_one_pending(forward, ix);
+              open_meta[ix] = 0;
+            }
+          }
+          else
+          {
+            UNUSED(entropy);
+          }
+          return;
+        }
+      }
+
+      // No hit in cache, so find an available or victim line.
+
+      size_t victim_ix = ix_set;
+      size_t victim_size = 0;
+      for (size_t ix_way = 0; ix_way < DEALLOC_BATCH_RING_ASSOC; ix_way++)
+      {
+        size_t ix = ix_set + ix_way;
+        if (open_meta[ix] == 0)
+        {
+          victim_ix = ix;
+          break;
+        }
+
+        size_t szix = open_builder[ix].extract_segment_length();
+        if (szix > victim_size)
+        {
+          victim_size = szix;
+          victim_ix = ix;
+        }
+      }
+
+      if (open_meta[victim_ix] != 0)
+      {
+        close_one_pending(forward, victim_ix);
+      }
+      init_one_pending(victim_ix, meta);
+
+      open_builder[victim_ix].add(
+        r, freelist::Object::key_root, meta->as_key_tweak());
+    }
+
+    template<typename Forward>
+    SNMALLOC_FAST_PATH void close_all(Forward forward)
+    {
+      for (size_t ix = 0; ix < RINGS; ix++)
+      {
+        if (open_meta[ix] != 0)
+        {
+          close_one_pending(forward, ix);
+          open_meta[ix] = 0;
+        }
+      }
+    }
+
+    void init()
+    {
+      open_meta = {0};
+    }
+  };
+
+  template<typename Config>
+  struct RemoteDeallocCacheNoBatching
+  {
+    void init() {}
+
+    template<typename Forward>
+    void close_all(Forward)
+    {}
+
+    template<typename Forward>
+    SNMALLOC_FAST_PATH void dealloc(
+      typename Config::PagemapEntry::SlabMetadata*,
+      freelist::HeadPtr r,
+      LocalEntropy* entropy,
+      Forward forward)
+    {
+      UNUSED(entropy);
+
+      auto& entry = Config::Backend::get_metaentry(address_cast(r));
+      forward(
+        entry.get_remote()->trunc_id(),
+        SingletonRemoteMessage::emplace_in_alloc(r.as_void()));
+    }
+  };
+
+  template<typename Config>
+  using RemoteDeallocCacheBatchingImpl = std::conditional_t<
+    (DEALLOC_BATCH_RINGS > 0),
+    RemoteDeallocCacheBatching<Config, DEALLOC_BATCH_RINGS>,
+    RemoteDeallocCacheNoBatching<Config>>;
+
   /**
    * Stores the remote deallocation to batch them before sending
    */
+  template<typename Config>
   struct RemoteDeallocCache
   {
     std::array<freelist::Builder<false>, REMOTE_SLOTS> list;
 
+    RemoteDeallocCacheBatchingImpl<Config> batching;
+
     /**
      * The total amount of memory we are waiting for before we will dispatch
      * to other allocators. Zero can mean we have not initialised the allocator
@@ -54,10 +230,12 @@ namespace snmalloc
      * This does not require initialisation to be safely called.
      */
     template<typename Entry>
-    SNMALLOC_FAST_PATH bool reserve_space(const Entry& entry)
+    SNMALLOC_FAST_PATH bool reserve_space(const Entry& entry, uint16_t n = 1)
     {
+      static_assert(sizeof(n) * 8 > MAX_CAPACITY_BITS);
+
       auto size =
-        static_cast<int64_t>(sizeclass_full_to_size(entry.get_sizeclass()));
+        n * static_cast<int64_t>(sizeclass_full_to_size(entry.get_sizeclass()));
 
       bool result = capacity > size;
       if (result)
@@ -66,17 +244,37 @@ namespace snmalloc
     }
 
     template<size_t allocator_size>
-    SNMALLOC_FAST_PATH void
-    dealloc(RemoteAllocator::alloc_id_t target_id, capptr::Alloc<void> p)
+    SNMALLOC_FAST_PATH void forward(
+      RemoteAllocator::alloc_id_t target_id, capptr::Alloc<RemoteMessage> msg)
+    {
+      list[get_slot<allocator_size>(target_id, 0)].add(
+        RemoteMessage::to_message_link(msg),
+        RemoteAllocator::key_global,
+        NO_KEY_TWEAK);
+    }
+
+    template<size_t allocator_size>
+    SNMALLOC_FAST_PATH void dealloc(
+      typename Config::PagemapEntry::SlabMetadata* meta,
+      capptr::Alloc<void> p,
+      LocalEntropy* entropy)
     {
       SNMALLOC_ASSERT(initialised);
-      auto r = p.template as_reinterpret<freelist::Object::T<>>();
 
-      list[get_slot<allocator_size>(target_id, 0)].add(
-        r, RemoteAllocator::key_global);
+      auto r = freelist::Object::make<capptr::bounds::AllocWild>(p);
+
+      batching.dealloc(
+        meta,
+        r,
+        entropy,
+        [this](
+          RemoteAllocator::alloc_id_t target_id,
+          capptr::Alloc<RemoteMessage> msg) {
+          forward<allocator_size>(target_id, msg);
+        });
     }
 
-    template<size_t allocator_size, typename Config>
+    template<size_t allocator_size>
     bool post(
       typename Config::LocalState* local_state, RemoteAllocator::alloc_id_t id)
     {
@@ -91,6 +289,12 @@ namespace snmalloc
                              return capptr_domesticate<Config>(local_state, p);
                            };
 
+      batching.close_all([this](
+                           RemoteAllocator::alloc_id_t target_id,
+                           capptr::Alloc<RemoteMessage> msg) {
+        forward<allocator_size>(target_id, msg);
+      });
+
       while (true)
       {
         auto my_slot = get_slot<allocator_size>(id, post_round);
@@ -102,9 +306,11 @@ namespace snmalloc
 
           if (!list[i].empty())
           {
-            auto [first, last] = list[i].extract_segment(key);
+            auto [first_, last_] = list[i].extract_segment(key, NO_KEY_TWEAK);
+            auto first = RemoteMessage::from_message_link(first_);
+            auto last = RemoteMessage::from_message_link(last_);
             const auto& entry =
-              Config::Backend::get_metaentry(address_cast(first));
+              Config::Backend::get_metaentry(address_cast(first_));
             auto remote = entry.get_remote();
             // If the allocator is not correctly aligned, then the bit that is
             // set implies this is used by the backend, and we should not be
@@ -135,7 +341,7 @@ namespace snmalloc
         // so take copy of the head, mark the last element,
         // and clear the original list.
         freelist::Iter<> resend;
-        list[my_slot].close(resend, key);
+        list[my_slot].close(resend, key, NO_KEY_TWEAK);
 
         post_round++;
 
@@ -147,11 +353,11 @@ namespace snmalloc
           const auto& entry = Config::Backend::get_metaentry(address_cast(r));
           auto i = entry.get_remote()->trunc_id();
           size_t slot = get_slot<allocator_size>(i, post_round);
-          list[slot].add(r, key);
+          list[slot].add(r, key, NO_KEY_TWEAK);
         }
       }
 
-      // Reset capacity as we have empty everything
+      // Reset capacity as we have emptied everything
       capacity = REMOTE_CACHE;
 
       return sent_something;
@@ -175,9 +381,11 @@ namespace snmalloc
       {
         // We do not need to initialise with a particular slab, so pass
         // a null address.
-        l.init(0, RemoteAllocator::key_global);
+        l.init(0, RemoteAllocator::key_global, NO_KEY_TWEAK);
       }
       capacity = REMOTE_CACHE;
+
+      batching.init();
     }
   };
 } // namespace snmalloc
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/sizeclasstable.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/sizeclasstable.h
index 2037443223d2..4dd2eec0edd7 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/mem/sizeclasstable.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/sizeclasstable.h
@@ -24,7 +24,7 @@ namespace snmalloc
     // For example, 24 byte allocations can be
     // problematic for some data due to alignment issues.
     auto sc = static_cast<smallsizeclass_t>(
-      bits::to_exp_mant_const<INTERMEDIATE_BITS, MIN_ALLOC_BITS>(size));
+      bits::to_exp_mant_const<INTERMEDIATE_BITS, MIN_ALLOC_STEP_BITS>(size));
 
     SNMALLOC_ASSERT(sc == static_cast<uint8_t>(sc));
 
@@ -165,10 +165,12 @@ namespace snmalloc
     uint16_t waking;
   };
 
+  static_assert(sizeof(sizeclass_data_slow::capacity) * 8 > MAX_CAPACITY_BITS);
+
   struct SizeClassTable
   {
-    ModArray<SIZECLASS_REP_SIZE, sizeclass_data_fast> fast_;
-    ModArray<SIZECLASS_REP_SIZE, sizeclass_data_slow> slow_;
+    ModArray<SIZECLASS_REP_SIZE, sizeclass_data_fast> fast_{};
+    ModArray<SIZECLASS_REP_SIZE, sizeclass_data_slow> slow_{};
 
     size_t DIV_MULT_SHIFT{0};
 
@@ -203,7 +205,7 @@ namespace snmalloc
       return slow_[index.raw()];
     }
 
-    constexpr SizeClassTable() : fast_(), slow_(), DIV_MULT_SHIFT()
+    constexpr SizeClassTable()
     {
       size_t max_capacity = 0;
 
@@ -214,12 +216,13 @@ namespace snmalloc
         auto& meta = fast_small(sizeclass);
 
         size_t rsize =
-          bits::from_exp_mant<INTERMEDIATE_BITS, MIN_ALLOC_BITS>(sizeclass);
+          bits::from_exp_mant<INTERMEDIATE_BITS, MIN_ALLOC_STEP_BITS>(
+            sizeclass);
         meta.size = rsize;
         size_t slab_bits = bits::max(
           bits::next_pow2_bits_const(MIN_OBJECT_COUNT * rsize), MIN_CHUNK_BITS);
 
-        meta.slab_mask = bits::one_at_bit(slab_bits) - 1;
+        meta.slab_mask = bits::mask_bits(slab_bits);
 
         auto& meta_slow = slow(sizeclass_t::from_small_class(sizeclass));
         meta_slow.capacity =
@@ -244,8 +247,7 @@ namespace snmalloc
       {
         // Calculate reciprocal division constant.
         auto& meta = fast_small(sizeclass);
-        meta.div_mult =
-          ((bits::one_at_bit(DIV_MULT_SHIFT) - 1) / meta.size) + 1;
+        meta.div_mult = (bits::mask_bits(DIV_MULT_SHIFT) / meta.size) + 1;
 
         size_t zero = 0;
         meta.mod_zero_mult = (~zero / meta.size) + 1;
@@ -269,6 +271,9 @@ namespace snmalloc
 
   constexpr SizeClassTable sizeclass_metadata = SizeClassTable();
 
+  static_assert(
+    bits::BITS - sizeclass_metadata.DIV_MULT_SHIFT <= MAX_CAPACITY_BITS);
+
   constexpr size_t DIV_MULT_SHIFT = sizeclass_metadata.DIV_MULT_SHIFT;
 
   constexpr size_t sizeclass_to_size(smallsizeclass_t sizeclass)
@@ -332,14 +337,11 @@ namespace snmalloc
       .capacity;
   }
 
-  constexpr address_t start_of_object(sizeclass_t sc, address_t addr)
+  SNMALLOC_FAST_PATH constexpr size_t slab_index(sizeclass_t sc, address_t addr)
   {
     auto meta = sizeclass_metadata.fast(sc);
-    address_t slab_start = addr & ~meta.slab_mask;
     size_t offset = addr & meta.slab_mask;
-    size_t size = meta.size;
-
-    if constexpr (sizeof(addr) >= 8)
+    if constexpr (sizeof(offset) >= 8)
     {
       // Only works for 64 bit multiplication, as the following will overflow in
       // 32bit.
@@ -350,17 +352,27 @@ namespace snmalloc
       // the slab_mask by making the `div_mult` zero. The link uses 128 bit
       // multiplication, we have shrunk the range of the calculation to remove
       // this dependency.
-      size_t offset_start = ((offset * meta.div_mult) >> DIV_MULT_SHIFT) * size;
-      return slab_start + offset_start;
+      size_t index = ((offset * meta.div_mult) >> DIV_MULT_SHIFT);
+      return index;
     }
     else
     {
+      size_t size = meta.size;
       if (size == 0)
         return 0;
-      return slab_start + (offset / size) * size;
+      return offset / size;
     }
   }
 
+  SNMALLOC_FAST_PATH constexpr address_t
+  start_of_object(sizeclass_t sc, address_t addr)
+  {
+    auto meta = sizeclass_metadata.fast(sc);
+    address_t slab_start = addr & ~meta.slab_mask;
+    size_t index = slab_index(sc, addr);
+    return slab_start + (index * meta.size);
+  }
+
   constexpr size_t index_in_object(sizeclass_t sc, address_t addr)
   {
     return addr - start_of_object(sc, addr);
@@ -405,7 +417,7 @@ namespace snmalloc
   {
     // We subtract and shift to reduce the size of the table, i.e. we don't have
     // to store a value for every size.
-    return (s - 1) >> MIN_ALLOC_BITS;
+    return (s - 1) >> MIN_ALLOC_STEP_BITS;
   }
 
   constexpr size_t sizeclass_lookup_size =
@@ -421,13 +433,29 @@ namespace snmalloc
 
     constexpr SizeClassLookup()
     {
+      constexpr sizeclass_compress_t minimum_class =
+        static_cast<sizeclass_compress_t>(
+          size_to_sizeclass_const(MIN_ALLOC_SIZE));
+
+      /* Some unused sizeclasses is OK, but keep it within reason! */
+      static_assert(minimum_class < sizeclass_lookup_size);
+
       size_t curr = 1;
-      for (sizeclass_compress_t sizeclass = 0;
-           sizeclass < NUM_SMALL_SIZECLASSES;
-           sizeclass++)
+
+      sizeclass_compress_t sizeclass = 0;
+      for (; sizeclass < minimum_class; sizeclass++)
+      {
+        for (; curr <= sizeclass_metadata.fast_small(sizeclass).size;
+             curr += MIN_ALLOC_STEP_SIZE)
+        {
+          table[sizeclass_lookup_index(curr)] = minimum_class;
+        }
+      }
+
+      for (; sizeclass < NUM_SMALL_SIZECLASSES; sizeclass++)
       {
         for (; curr <= sizeclass_metadata.fast_small(sizeclass).size;
-             curr += 1 << MIN_ALLOC_BITS)
+             curr += MIN_ALLOC_STEP_SIZE)
         {
           auto i = sizeclass_lookup_index(curr);
           if (i == sizeclass_lookup_size)
@@ -478,6 +506,12 @@ namespace snmalloc
   {
     if (size > sizeclass_to_size(NUM_SMALL_SIZECLASSES - 1))
     {
+      if (size > bits::one_at_bit(bits::BITS - 1))
+      {
+        // This size is too large, no rounding should occur as will result in a
+        // failed allocation later.
+        return size;
+      }
       return bits::next_pow2(size);
     }
     // If realloc(ptr, 0) returns nullptr, some consumers treat this as a
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/override/jemalloc_compat.cc b/3rdparty/exported/snmalloc/src/snmalloc/override/jemalloc_compat.cc
index a65554630691..79206fed7d03 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/override/jemalloc_compat.cc
+++ b/3rdparty/exported/snmalloc/src/snmalloc/override/jemalloc_compat.cc
@@ -4,6 +4,7 @@
 #include <string.h>
 
 using namespace snmalloc;
+
 namespace
 {
   /**
@@ -88,7 +89,9 @@ extern "C"
   // statistics on fork if built with statistics.
 
   SNMALLOC_EXPORT SNMALLOC_USED_FUNCTION inline void _malloc_prefork(void) {}
+
   SNMALLOC_EXPORT SNMALLOC_USED_FUNCTION inline void _malloc_postfork(void) {}
+
   SNMALLOC_EXPORT SNMALLOC_USED_FUNCTION inline void _malloc_first_thread(void)
   {}
 
@@ -116,7 +119,7 @@ extern "C"
    * now, this is always implemented to return an error.
    */
   SNMALLOC_EXPORT int
-    SNMALLOC_NAME_MANGLE(mallctl)(const char*, void*, size_t*, void*, size_t)
+  SNMALLOC_NAME_MANGLE(mallctl)(const char*, void*, size_t*, void*, size_t)
   {
     return ENOENT;
   }
@@ -265,7 +268,7 @@ extern "C"
    * controlling the thread cache and arena are ignored.
    */
   SNMALLOC_EXPORT void*
-    SNMALLOC_NAME_MANGLE(rallocx)(void* ptr, size_t size, int flags)
+  SNMALLOC_NAME_MANGLE(rallocx)(void* ptr, size_t size, int flags)
   {
     auto f = JEMallocFlags(flags);
     size = f.aligned_size(size);
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/override/malloc-extensions.h b/3rdparty/exported/snmalloc/src/snmalloc/override/malloc-extensions.h
index f7429cd11f3d..1c0f5c8d99f6 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/override/malloc-extensions.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/override/malloc-extensions.h
@@ -1,4 +1,5 @@
 #pragma once
+
 /**
  * Malloc extensions
  *
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/override/malloc.cc b/3rdparty/exported/snmalloc/src/snmalloc/override/malloc.cc
index 512ba3dceb38..cbef9428c953 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/override/malloc.cc
+++ b/3rdparty/exported/snmalloc/src/snmalloc/override/malloc.cc
@@ -1,8 +1,5 @@
 #include "override.h"
 
-#include <errno.h>
-#include <string.h>
-
 using namespace snmalloc;
 
 #ifndef MALLOC_USABLE_SIZE_QUALIFIER
@@ -13,54 +10,44 @@ extern "C"
 {
   SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(__malloc_end_pointer)(void* ptr)
   {
-    return ThreadAlloc::get().external_pointer<OnePastEnd>(ptr);
+    return snmalloc::libc::__malloc_end_pointer(ptr);
   }
 
   SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(malloc)(size_t size)
   {
-    return ThreadAlloc::get().alloc(size);
+    return snmalloc::libc::malloc(size);
   }
 
   SNMALLOC_EXPORT void SNMALLOC_NAME_MANGLE(free)(void* ptr)
   {
-    ThreadAlloc::get().dealloc(ptr);
+    snmalloc::libc::free(ptr);
   }
 
   SNMALLOC_EXPORT void SNMALLOC_NAME_MANGLE(cfree)(void* ptr)
   {
-    ThreadAlloc::get().dealloc(ptr);
-  }
-
-  /**
-   * Clang was helpfully inlining the constant return value, and
-   * thus converting from a tail call to an ordinary call.
-   */
-  SNMALLOC_EXPORT inline void* snmalloc_not_allocated = nullptr;
-
-  static SNMALLOC_SLOW_PATH void* SNMALLOC_NAME_MANGLE(snmalloc_set_error)()
-  {
-    errno = ENOMEM;
-    return snmalloc_not_allocated;
+    snmalloc::libc::free(ptr);
   }
 
   SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(calloc)(size_t nmemb, size_t size)
   {
-    bool overflow = false;
-    size_t sz = bits::umul(size, nmemb, overflow);
-    if (SNMALLOC_UNLIKELY(overflow))
-    {
-      return SNMALLOC_NAME_MANGLE(snmalloc_set_error)();
-    }
-    return ThreadAlloc::get().alloc<ZeroMem::YesZero>(sz);
+    return snmalloc::libc::calloc(nmemb, size);
   }
 
   SNMALLOC_EXPORT
   size_t SNMALLOC_NAME_MANGLE(malloc_usable_size)(
     MALLOC_USABLE_SIZE_QUALIFIER void* ptr)
   {
-    return ThreadAlloc::get().alloc_size(ptr);
+    return snmalloc::libc::malloc_usable_size(ptr);
   }
 
+#ifdef _WIN32
+  SNMALLOC_EXPORT
+  size_t SNMALLOC_NAME_MANGLE(_msize)(MALLOC_USABLE_SIZE_QUALIFIER void* ptr)
+  {
+    return snmalloc::libc::malloc_usable_size(ptr);
+  }
+#endif
+
   SNMALLOC_EXPORT
   size_t SNMALLOC_NAME_MANGLE(malloc_good_size)(size_t size)
   {
@@ -69,162 +56,53 @@ extern "C"
 
   SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(realloc)(void* ptr, size_t size)
   {
-    auto& a = ThreadAlloc::get();
-    size_t sz = a.alloc_size(ptr);
-    // Keep the current allocation if the given size is in the same sizeclass.
-    if (sz == round_size(size))
-    {
-#ifdef SNMALLOC_PASS_THROUGH
-      // snmallocs alignment guarantees can be broken by realloc in pass-through
-      // this is not exercised, by existing clients, but is tested.
-      if (pointer_align_up(ptr, natural_alignment(size)) == ptr)
-        return ptr;
-#else
-      return ptr;
-#endif
-    }
-
-    if (size == (size_t)-1)
-    {
-      errno = ENOMEM;
-      return nullptr;
-    }
-
-    void* p = a.alloc(size);
-    if (SNMALLOC_LIKELY(p != nullptr))
-    {
-      sz = bits::min(size, sz);
-      // Guard memcpy as GCC is assuming not nullptr for ptr after the memcpy
-      // otherwise.
-      if (sz != 0)
-        memcpy(p, ptr, sz);
-      a.dealloc(ptr);
-    }
-    else if (SNMALLOC_LIKELY(size == 0))
-    {
-      a.dealloc(ptr);
-    }
-    else
-    {
-      errno = ENOMEM;
-    }
-    return p;
+    return snmalloc::libc::realloc(ptr, size);
   }
 
 #if !defined(SNMALLOC_NO_REALLOCARRAY)
   SNMALLOC_EXPORT void*
-    SNMALLOC_NAME_MANGLE(reallocarray)(void* ptr, size_t nmemb, size_t size)
+  SNMALLOC_NAME_MANGLE(reallocarray)(void* ptr, size_t nmemb, size_t size)
   {
-    bool overflow = false;
-    size_t sz = bits::umul(size, nmemb, overflow);
-    if (overflow)
-    {
-      errno = ENOMEM;
-      return nullptr;
-    }
-    return SNMALLOC_NAME_MANGLE(realloc)(ptr, sz);
+    return snmalloc::libc::reallocarray(ptr, nmemb, size);
   }
 #endif
 
 #if !defined(SNMALLOC_NO_REALLOCARR)
   SNMALLOC_EXPORT int
-    SNMALLOC_NAME_MANGLE(reallocarr)(void* ptr_, size_t nmemb, size_t size)
-  {
-    int err = errno;
-    auto& a = ThreadAlloc::get();
-    bool overflow = false;
-    size_t sz = bits::umul(size, nmemb, overflow);
-    if (sz == 0)
-    {
-      errno = err;
-      return 0;
-    }
-    if (overflow)
-    {
-      errno = err;
-      return EOVERFLOW;
-    }
-
-    void** ptr = reinterpret_cast<void**>(ptr_);
-    void* p = a.alloc(sz);
-    if (p == nullptr)
-    {
-      errno = ENOMEM;
-      return ENOMEM;
-    }
-
-    sz = bits::min(sz, a.alloc_size(*ptr));
-
-    SNMALLOC_ASSUME(*ptr != nullptr || sz == 0);
-    // Guard memcpy as GCC is assuming not nullptr for ptr after the memcpy
-    // otherwise.
-    if (sz != 0)
-      memcpy(p, *ptr, sz);
-    errno = err;
-    a.dealloc(*ptr);
-    *ptr = p;
-    return 0;
+  SNMALLOC_NAME_MANGLE(reallocarr)(void* ptr, size_t nmemb, size_t size)
+  {
+    return snmalloc::libc::reallocarr(ptr, nmemb, size);
   }
 #endif
 
   SNMALLOC_EXPORT void*
-    SNMALLOC_NAME_MANGLE(memalign)(size_t alignment, size_t size)
+  SNMALLOC_NAME_MANGLE(memalign)(size_t alignment, size_t size)
   {
-    if ((alignment == 0) || (alignment == size_t(-1)))
-    {
-      errno = EINVAL;
-      return nullptr;
-    }
-
-    if ((size + alignment) < size)
-    {
-      errno = ENOMEM;
-      return nullptr;
-    }
-
-    return SNMALLOC_NAME_MANGLE(malloc)(aligned_size(alignment, size));
+    return snmalloc::libc::memalign(alignment, size);
   }
 
   SNMALLOC_EXPORT void*
-    SNMALLOC_NAME_MANGLE(aligned_alloc)(size_t alignment, size_t size)
+  SNMALLOC_NAME_MANGLE(aligned_alloc)(size_t alignment, size_t size)
   {
-    SNMALLOC_ASSERT((size % alignment) == 0);
-    return SNMALLOC_NAME_MANGLE(memalign)(alignment, size);
+    return snmalloc::libc::aligned_alloc(alignment, size);
   }
 
   SNMALLOC_EXPORT int SNMALLOC_NAME_MANGLE(posix_memalign)(
     void** memptr, size_t alignment, size_t size)
   {
-    if ((alignment < sizeof(uintptr_t) || ((alignment & (alignment - 1)) != 0)))
-    {
-      return EINVAL;
-    }
-
-    void* p = SNMALLOC_NAME_MANGLE(memalign)(alignment, size);
-    if (SNMALLOC_UNLIKELY(p == nullptr))
-    {
-      if (size != 0)
-        return ENOMEM;
-    }
-    *memptr = p;
-    return 0;
+    return snmalloc::libc::posix_memalign(memptr, alignment, size);
   }
 
 #if !defined(__FreeBSD__) && !defined(__OpenBSD__)
   SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(valloc)(size_t size)
   {
-    return SNMALLOC_NAME_MANGLE(memalign)(OS_PAGE_SIZE, size);
+    return snmalloc::libc::memalign(OS_PAGE_SIZE, size);
   }
 #endif
 
   SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(pvalloc)(size_t size)
   {
-    if (size == size_t(-1))
-    {
-      errno = ENOMEM;
-      return nullptr;
-    }
-    return SNMALLOC_NAME_MANGLE(memalign)(
+    return snmalloc::libc::memalign(
       OS_PAGE_SIZE, (size + OS_PAGE_SIZE - 1) & ~(OS_PAGE_SIZE - 1));
   }
 
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/override/memcpy.cc b/3rdparty/exported/snmalloc/src/snmalloc/override/memcpy.cc
index c2283ec1e327..c6053ae02abc 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/override/memcpy.cc
+++ b/3rdparty/exported/snmalloc/src/snmalloc/override/memcpy.cc
@@ -6,7 +6,7 @@ extern "C"
    * Snmalloc checked memcpy.
    */
   SNMALLOC_EXPORT void*
-    SNMALLOC_NAME_MANGLE(memcpy)(void* dst, const void* src, size_t len)
+  SNMALLOC_NAME_MANGLE(memcpy)(void* dst, const void* src, size_t len)
   {
     return snmalloc::memcpy<true>(dst, src, len);
   }
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/override/new.cc b/3rdparty/exported/snmalloc/src/snmalloc/override/new.cc
index 29372a7b231d..19aa9f58c963 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/override/new.cc
+++ b/3rdparty/exported/snmalloc/src/snmalloc/override/new.cc
@@ -1,4 +1,4 @@
-#include "malloc.cc"
+#include "snmalloc/snmalloc.h"
 
 #ifdef _WIN32
 #  ifdef __clang__
@@ -16,106 +16,98 @@
 #  endif
 #endif
 
-using namespace snmalloc;
-
 void* operator new(size_t size)
 {
-  return ThreadAlloc::get().alloc(size);
+  return snmalloc::libc::malloc(size);
 }
 
 void* operator new[](size_t size)
 {
-  return ThreadAlloc::get().alloc(size);
+  return snmalloc::libc::malloc(size);
 }
 
 void* operator new(size_t size, std::nothrow_t&)
 {
-  return ThreadAlloc::get().alloc(size);
+  return snmalloc::libc::malloc(size);
 }
 
 void* operator new[](size_t size, std::nothrow_t&)
 {
-  return ThreadAlloc::get().alloc(size);
+  return snmalloc::libc::malloc(size);
 }
 
-void operator delete(void* p)EXCEPTSPEC
+void operator delete(void* p) EXCEPTSPEC
 {
-  ThreadAlloc::get().dealloc(p);
+  snmalloc::libc::free(p);
 }
 
-void operator delete(void* p, size_t size)EXCEPTSPEC
+void operator delete(void* p, size_t size) EXCEPTSPEC
 {
-  if (p == nullptr)
-    return;
-  ThreadAlloc::get().dealloc(p, size);
+  snmalloc::libc::free_sized(p, size);
 }
 
 void operator delete(void* p, std::nothrow_t&)
 {
-  ThreadAlloc::get().dealloc(p);
+  snmalloc::libc::free(p);
 }
 
 void operator delete[](void* p) EXCEPTSPEC
 {
-  ThreadAlloc::get().dealloc(p);
+  snmalloc::libc::free(p);
 }
 
 void operator delete[](void* p, size_t size) EXCEPTSPEC
 {
-  if (p == nullptr)
-    return;
-  ThreadAlloc::get().dealloc(p, size);
+  snmalloc::libc::free_sized(p, size);
 }
 
 void operator delete[](void* p, std::nothrow_t&)
 {
-  ThreadAlloc::get().dealloc(p);
+  snmalloc::libc::free(p);
 }
 
 void* operator new(size_t size, std::align_val_t val)
 {
-  size = aligned_size(size_t(val), size);
-  return ThreadAlloc::get().alloc(size);
+  size = snmalloc::aligned_size(size_t(val), size);
+  return snmalloc::libc::malloc(size);
 }
 
 void* operator new[](size_t size, std::align_val_t val)
 {
-  size = aligned_size(size_t(val), size);
-  return ThreadAlloc::get().alloc(size);
+  size = snmalloc::aligned_size(size_t(val), size);
+  return snmalloc::libc::malloc(size);
 }
 
 void* operator new(size_t size, std::align_val_t val, std::nothrow_t&)
 {
-  size = aligned_size(size_t(val), size);
-  return ThreadAlloc::get().alloc(size);
+  size = snmalloc::aligned_size(size_t(val), size);
+  return snmalloc::libc::malloc(size);
 }
 
 void* operator new[](size_t size, std::align_val_t val, std::nothrow_t&)
 {
-  size = aligned_size(size_t(val), size);
-  return ThreadAlloc::get().alloc(size);
+  size = snmalloc::aligned_size(size_t(val), size);
+  return snmalloc::libc::malloc(size);
 }
 
-void operator delete(void* p, std::align_val_t)EXCEPTSPEC
+void operator delete(void* p, std::align_val_t) EXCEPTSPEC
 {
-  ThreadAlloc::get().dealloc(p);
+  snmalloc::libc::free(p);
 }
 
 void operator delete[](void* p, std::align_val_t) EXCEPTSPEC
 {
-  ThreadAlloc::get().dealloc(p);
+  snmalloc::libc::free(p);
 }
 
-void operator delete(void* p, size_t size, std::align_val_t val)EXCEPTSPEC
+void operator delete(void* p, size_t size, std::align_val_t val) EXCEPTSPEC
 {
-  size = aligned_size(size_t(val), size);
-  ThreadAlloc::get().dealloc(p, size);
+  size = snmalloc::aligned_size(size_t(val), size);
+  snmalloc::libc::free_sized(p, size);
 }
 
 void operator delete[](void* p, size_t size, std::align_val_t val) EXCEPTSPEC
 {
-  if (p == nullptr)
-    return;
-  size = aligned_size(size_t(val), size);
-  ThreadAlloc::get().dealloc(p, size);
+  size = snmalloc::aligned_size(size_t(val), size);
+  snmalloc::libc::free_sized(p, size);
 }
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/override/override.h b/3rdparty/exported/snmalloc/src/snmalloc/override/override.h
index 0ca70bc11a38..5dda309c0b74 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/override/override.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/override/override.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include "../global/global.h"
+#include "snmalloc/snmalloc.h"
 
 #ifndef SNMALLOC_EXPORT
 #  define SNMALLOC_EXPORT
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/override/rust.cc b/3rdparty/exported/snmalloc/src/snmalloc/override/rust.cc
index 64da984ca71e..4a5dcdaf94f9 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/override/rust.cc
+++ b/3rdparty/exported/snmalloc/src/snmalloc/override/rust.cc
@@ -1,5 +1,5 @@
 #define SNMALLOC_NAME_MANGLE(a) sn_##a
-#include "malloc.cc"
+#include "snmalloc/snmalloc.h"
 
 #include <cstring>
 
@@ -10,19 +10,19 @@
 using namespace snmalloc;
 
 extern "C" SNMALLOC_EXPORT void*
-  SNMALLOC_NAME_MANGLE(rust_alloc)(size_t alignment, size_t size)
+SNMALLOC_NAME_MANGLE(rust_alloc)(size_t alignment, size_t size)
 {
   return ThreadAlloc::get().alloc(aligned_size(alignment, size));
 }
 
 extern "C" SNMALLOC_EXPORT void*
-  SNMALLOC_NAME_MANGLE(rust_alloc_zeroed)(size_t alignment, size_t size)
+SNMALLOC_NAME_MANGLE(rust_alloc_zeroed)(size_t alignment, size_t size)
 {
   return ThreadAlloc::get().alloc<YesZero>(aligned_size(alignment, size));
 }
 
 extern "C" SNMALLOC_EXPORT void
-  SNMALLOC_NAME_MANGLE(rust_dealloc)(void* ptr, size_t alignment, size_t size)
+SNMALLOC_NAME_MANGLE(rust_dealloc)(void* ptr, size_t alignment, size_t size)
 {
   ThreadAlloc::get().dealloc(ptr, aligned_size(alignment, size));
 }
@@ -48,6 +48,12 @@ extern "C" SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(rust_realloc)(
 extern "C" SNMALLOC_EXPORT void SNMALLOC_NAME_MANGLE(rust_statistics)(
   size_t* current_memory_usage, size_t* peak_memory_usage)
 {
-  *current_memory_usage = StandardConfig::Backend::get_current_usage();
-  *peak_memory_usage = StandardConfig::Backend::get_peak_usage();
-}
\ No newline at end of file
+  *current_memory_usage = Alloc::Config::Backend::get_current_usage();
+  *peak_memory_usage = Alloc::Config::Backend::get_peak_usage();
+}
+
+extern "C" SNMALLOC_EXPORT size_t
+SNMALLOC_NAME_MANGLE(rust_usable_size)(const void* ptr)
+{
+  return ThreadAlloc::get().alloc_size(ptr);
+}
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_apple.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_apple.h
index f023e195af76..f6a7f1a2d600 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_apple.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_apple.h
@@ -15,8 +15,39 @@
 #  include <sys/mman.h>
 #  include <unistd.h>
 
+#  if __has_include(<AvailabilityMacros.h>) && __has_include(<Availability.h>)
+#    include <Availability.h>
+#    include <AvailabilityMacros.h>
+#    if defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && \
+      defined(MAC_OS_X_VERSION_14_4)
+#      if __MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_14_4
+#        define SNMALLOC_APPLE_HAS_OS_SYNC_WAIT_ON_ADDRESS
+#      endif
+#    endif
+#  endif
+
 namespace snmalloc
 {
+#  ifdef SNMALLOC_APPLE_HAS_OS_SYNC_WAIT_ON_ADDRESS
+  // For macos 14.4+, we use os_sync_wait_on_address and friends. It is
+  // available as a part of stable API, and the usage is more straightforward.
+  extern "C" int os_sync_wait_on_address(
+    void* addr, uint64_t value, size_t size, uint32_t flags);
+
+  extern "C" int
+  os_sync_wake_by_address_any(void* addr, size_t size, uint32_t flags);
+
+  extern "C" int
+  os_sync_wake_by_address_all(void* addr, size_t size, uint32_t flags);
+#  else
+  // For platforms before macos 14.4, we use __ulock_wait and friends. It is
+  // available since macos 10.12.
+  extern "C" int
+  __ulock_wait(uint32_t lock_type, void* addr, uint64_t value, uint32_t);
+
+  extern "C" int __ulock_wake(uint32_t lock_type, void* addr, uint64_t);
+#  endif
+
   /**
    * PAL implementation for Apple systems (macOS, iOS, watchOS, tvOS...).
    */
@@ -28,7 +59,7 @@ namespace snmalloc
      * The features exported by this PAL.
      */
     static constexpr uint64_t pal_features =
-      AlignedAllocation | LazyCommit | Entropy | Time;
+      AlignedAllocation | LazyCommit | Entropy | Time | WaitOnAddress;
 
     /*
      * `page_size`
@@ -281,6 +312,76 @@ namespace snmalloc
 
       return result;
     }
+
+    using WaitingWord = uint32_t;
+#  ifndef SNMALLOC_APPLE_HAS_OS_SYNC_WAIT_ON_ADDRESS
+    static constexpr uint32_t UL_COMPARE_AND_WAIT = 0x0000'0001;
+    static constexpr uint32_t ULF_NO_ERRNO = 0x0100'0000;
+    static constexpr uint32_t ULF_WAKE_ALL = 0x0000'0100;
+#  endif
+
+    template<class T>
+    static void wait_on_address(std::atomic<T>& addr, T expected)
+    {
+      [[maybe_unused]] int errno_backup = errno;
+      while (addr.load(std::memory_order_relaxed) == expected)
+      {
+#  ifdef SNMALLOC_APPLE_HAS_OS_SYNC_WAIT_ON_ADDRESS
+        if (
+          os_sync_wait_on_address(
+            &addr, static_cast<uint64_t>(expected), sizeof(T), 0) != -1)
+        {
+          errno = errno_backup;
+          return;
+        }
+#  else
+        if (
+          __ulock_wait(
+            UL_COMPARE_AND_WAIT | ULF_NO_ERRNO,
+            &addr,
+            static_cast<uint64_t>(expected),
+            0) != -1)
+        {
+          return;
+        }
+#  endif
+      }
+    }
+
+    template<class T>
+    static void notify_one_on_address(std::atomic<T>& addr)
+    {
+#  ifdef SNMALLOC_APPLE_HAS_OS_SYNC_WAIT_ON_ADDRESS
+      os_sync_wake_by_address_any(&addr, sizeof(T), 0);
+#  else
+      // __ulock_wake can get interrupted, so retry until either waking up a
+      // waiter or failing because there are no waiters (ENOENT).
+      for (;;)
+      {
+        int ret = __ulock_wake(UL_COMPARE_AND_WAIT | ULF_NO_ERRNO, &addr, 0);
+        if (ret >= 0 || ret == -ENOENT)
+          return;
+      }
+#  endif
+    }
+
+    template<class T>
+    static void notify_all_on_address(std::atomic<T>& addr)
+    {
+#  ifdef SNMALLOC_APPLE_HAS_OS_SYNC_WAIT_ON_ADDRESS
+      os_sync_wake_by_address_all(&addr, sizeof(T), 0);
+#  else
+      // __ulock_wake can get interrupted, so retry until either waking up a
+      // waiter or failing because there are no waiters (ENOENT).
+      for (;;)
+      {
+        int ret = __ulock_wake(
+          UL_COMPARE_AND_WAIT | ULF_NO_ERRNO | ULF_WAKE_ALL, &addr, 0);
+        if (ret >= 0 || ret == -ENOENT)
+          return;
+      }
+#  endif
+    }
   };
 } // namespace snmalloc
 #endif
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_concept.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_concept.h
index 44dec410a000..7efbd08b7d4c 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_concept.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_concept.h
@@ -19,62 +19,54 @@ namespace snmalloc
    * PALs must advertize the bit vector of their supported features.
    */
   template<typename PAL>
-  concept IsPAL_static_features = requires()
-  {
-    typename std::integral_constant<uint64_t, PAL::pal_features>;
-  };
+  concept IsPAL_static_features =
+    requires() {
+      typename std::integral_constant<uint64_t, PAL::pal_features>;
+    };
 
   /**
    * PALs must advertise the size of the address space and their page size
    */
   template<typename PAL>
-  concept IsPAL_static_sizes = requires()
-  {
-    typename std::integral_constant<std::size_t, PAL::address_bits>;
-    typename std::integral_constant<std::size_t, PAL::page_size>;
-  };
+  concept IsPAL_static_sizes =
+    requires() {
+      typename std::integral_constant<std::size_t, PAL::address_bits>;
+      typename std::integral_constant<std::size_t, PAL::page_size>;
+    };
 
   /**
    * PALs expose an error reporting function which takes a const C string.
    */
   template<typename PAL>
-  concept IsPAL_error = requires(const char* const str)
-  {
-    {
-      PAL::error(str)
-    }
-    ->ConceptSame<void>;
-  };
+  concept IsPAL_error = requires(const char* const str) {
+                          {
+                            PAL::error(str)
+                            } -> ConceptSame<void>;
+                        };
 
   /**
    * PALs expose a basic library of memory operations.
    */
   template<typename PAL>
-  concept IsPAL_memops = requires(void* vp, std::size_t sz)
-  {
-    {
-      PAL::notify_not_using(vp, sz)
-    }
-    noexcept->ConceptSame<void>;
-
-    {
-      PAL::template notify_using<NoZero>(vp, sz)
-    }
-    noexcept->ConceptSame<void>;
-    {
-      PAL::template notify_using<YesZero>(vp, sz)
-    }
-    noexcept->ConceptSame<void>;
-
-    {
-      PAL::template zero<false>(vp, sz)
-    }
-    noexcept->ConceptSame<void>;
-    {
-      PAL::template zero<true>(vp, sz)
-    }
-    noexcept->ConceptSame<void>;
-  };
+  concept IsPAL_memops = requires(void* vp, std::size_t sz) {
+                           {
+                             PAL::notify_not_using(vp, sz)
+                             } noexcept -> ConceptSame<void>;
+
+                           {
+                             PAL::template notify_using<NoZero>(vp, sz)
+                             } noexcept -> ConceptSame<void>;
+                           {
+                             PAL::template notify_using<YesZero>(vp, sz)
+                             } noexcept -> ConceptSame<void>;
+
+                           {
+                             PAL::template zero<false>(vp, sz)
+                             } noexcept -> ConceptSame<void>;
+                           {
+                             PAL::template zero<true>(vp, sz)
+                             } noexcept -> ConceptSame<void>;
+                         };
 
   /**
    * The Pal must provide a thread id for debugging.  It should not return
@@ -82,66 +74,55 @@ namespace snmalloc
    * places.
    */
   template<typename PAL>
-  concept IsPAL_tid = requires()
-  {
-    {
-      PAL::get_tid()
-    }
-    noexcept->ConceptSame<typename PAL::ThreadIdentity>;
-  };
+  concept IsPAL_tid =
+    requires() {
+      {
+        PAL::get_tid()
+        } noexcept -> ConceptSame<typename PAL::ThreadIdentity>;
+    };
 
   /**
    * Absent any feature flags, the PAL must support a crude primitive allocator
    */
   template<typename PAL>
-  concept IsPAL_reserve = requires(PAL p, std::size_t sz)
-  {
-    {
-      PAL::reserve(sz)
-    }
-    noexcept->ConceptSame<void*>;
-  };
+  concept IsPAL_reserve = requires(PAL p, std::size_t sz) {
+                            {
+                              PAL::reserve(sz)
+                              } noexcept -> ConceptSame<void*>;
+                          };
 
   /**
    * Some PALs expose a richer allocator which understands aligned allocations
    */
   template<typename PAL>
-  concept IsPAL_reserve_aligned = requires(std::size_t sz)
-  {
-    {
-      PAL::template reserve_aligned<true>(sz)
-    }
-    noexcept->ConceptSame<void*>;
-    {
-      PAL::template reserve_aligned<false>(sz)
-    }
-    noexcept->ConceptSame<void*>;
-  };
+  concept IsPAL_reserve_aligned = requires(std::size_t sz) {
+                                    {
+                                      PAL::template reserve_aligned<true>(sz)
+                                      } noexcept -> ConceptSame<void*>;
+                                    {
+                                      PAL::template reserve_aligned<false>(sz)
+                                      } noexcept -> ConceptSame<void*>;
+                                  };
 
   /**
    * Some PALs can provide memory pressure callbacks.
    */
   template<typename PAL>
-  concept IsPAL_mem_low_notify = requires(PalNotificationObject* pno)
-  {
-    {
-      PAL::expensive_low_memory_check()
-    }
-    ->ConceptSame<bool>;
-    {
-      PAL::register_for_low_memory_callback(pno)
-    }
-    ->ConceptSame<void>;
-  };
+  concept IsPAL_mem_low_notify = requires(PalNotificationObject* pno) {
+                                   {
+                                     PAL::expensive_low_memory_check()
+                                     } -> ConceptSame<bool>;
+                                   {
+                                     PAL::register_for_low_memory_callback(pno)
+                                     } -> ConceptSame<void>;
+                                 };
 
   template<typename PAL>
-  concept IsPAL_get_entropy64 = requires()
-  {
-    {
-      PAL::get_entropy64()
-    }
-    ->ConceptSame<uint64_t>;
-  };
+  concept IsPAL_get_entropy64 = requires() {
+                                  {
+                                    PAL::get_entropy64()
+                                    } -> ConceptSame<uint64_t>;
+                                };
 
   /**
    * PALs ascribe to the conjunction of several concepts.  These are broken
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_consts.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_consts.h
index 83aa52ef2f95..c4c4c25a244a 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_consts.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_consts.h
@@ -3,7 +3,6 @@
 #include "../ds_core/ds_core.h"
 
 #include <atomic>
-#include <functional>
 
 namespace snmalloc
 {
@@ -55,6 +54,17 @@ namespace snmalloc
      * This Pal provides a millisecond time source
      */
     Time = (1 << 5),
+
+    /**
+     * This Pal provides selective core dumps, so
+     * modify which parts get dumped.
+     */
+    CoreDump = (1 << 6),
+
+    /**
+     * This Pal provides a way for parking threads at a specific address.
+     */
+    WaitOnAddress = (1 << 7),
   };
 
   /**
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_ds.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_ds.h
index 3da37cf46159..008d1f2c2210 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_ds.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_ds.h
@@ -3,7 +3,6 @@
 #include "../ds_core/ds_core.h"
 
 #include <atomic>
-#include <functional>
 
 namespace snmalloc
 {
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_freebsd.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_freebsd.h
index 86a6576e49d1..d967dc1b566e 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_freebsd.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_freebsd.h
@@ -13,6 +13,8 @@
 #    endif
 #  endif
 
+#  include <sys/umtx.h>
+
 /**
  * Direct system-call wrappers so that we can skip libthr interception, which
  * won't work if malloc is broken.
@@ -20,6 +22,7 @@
  */
 extern "C" ssize_t __sys_writev(int fd, const struct iovec* iov, int iovcnt);
 extern "C" int __sys_fsync(int fd);
+
 /// @}
 
 namespace snmalloc
@@ -43,7 +46,8 @@ namespace snmalloc
      * field is declared explicitly to remind anyone modifying this class to
      * add new features that they should add any required feature flags.
      */
-    static constexpr uint64_t pal_features = PALBSD_Aligned::pal_features;
+    static constexpr uint64_t pal_features =
+      PALBSD_Aligned::pal_features | CoreDump | WaitOnAddress;
 
     /**
      * FreeBSD uses atypically small address spaces on its 64 bit RISC machines.
@@ -54,6 +58,7 @@ namespace snmalloc
     static constexpr size_t address_bits = (Aal::bits == 32) ?
       Aal::address_bits :
       (Aal::aal_name == RISCV ? 38 : Aal::address_bits);
+
     // TODO, if we ever backport to MIPS, this should yield 39 there.
 
     /**
@@ -68,10 +73,8 @@ namespace snmalloc
     /**
      * Notify platform that we will not be using these pages.
      *
-     * We use the `MADV_FREE` and `NADV_NOCORE` flags to `madvise`.  The first
-     * allows the system to discard the page and replace it with a CoW mapping
-     * of the zero page.  The second prevents this mapping from appearing in
-     * core files.
+     * We use the `MADV_FREE` flag to `madvise`. This allows the system to
+     * discard the page and replace it with a CoW mapping of the zero page.
      */
     static void notify_not_using(void* p, size_t size) noexcept
     {
@@ -80,7 +83,6 @@ namespace snmalloc
       if constexpr (DEBUG)
         memset(p, 0x5a, size);
 
-      madvise(p, size, MADV_NOCORE);
       madvise(p, size, MADV_FREE);
 
       if constexpr (mitigations(pal_enforce_access))
@@ -90,28 +92,19 @@ namespace snmalloc
     }
 
     /**
-     * Notify platform that we will be using these pages for reading.
-     *
-     * This is used only for pages full of zeroes and so we exclude them from
-     * core dumps.
+     * Notify platform that these pages should be included in a core dump.
      */
-    static void notify_using_readonly(void* p, size_t size) noexcept
+    static void notify_do_dump(void* p, size_t size) noexcept
     {
-      PALBSD_Aligned<PALFreeBSD>::notify_using_readonly(p, size);
-      madvise(p, size, MADV_NOCORE);
+      madvise(p, size, MADV_CORE);
     }
 
     /**
-     * Notify platform that we will be using these pages.
-     *
-     * We may have previously marked this memory as not being included in core
-     * files, so mark it for inclusion again.
+     * Notify platform that these pages should not be included in a core dump.
      */
-    template<ZeroMem zero_mem>
-    static void notify_using(void* p, size_t size) noexcept
+    static void notify_do_not_dump(void* p, size_t size) noexcept
     {
-      PALBSD_Aligned<PALFreeBSD>::notify_using<zero_mem>(p, size);
-      madvise(p, size, MADV_CORE);
+      madvise(p, size, MADV_NOCORE);
     }
 
 #  if defined(__CHERI_PURE_CAPABILITY__)
@@ -140,6 +133,53 @@ namespace snmalloc
           p.unsafe_ptr(), ~static_cast<unsigned int>(CHERI_PERM_SW_VMEM)));
     }
 #  endif
+
+    using WaitingWord = unsigned int;
+
+    template<typename T>
+    static void wait_on_address(std::atomic<T>& addr, T expected)
+    {
+      static_assert(
+        sizeof(T) == sizeof(WaitingWord) && alignof(T) == alignof(WaitingWord),
+        "T must be the same size and alignment as WaitingWord");
+      int backup = errno;
+      while (addr.load(std::memory_order_relaxed) == expected)
+      {
+        int ret = _umtx_op(
+          &addr,
+          UMTX_OP_WAIT_UINT_PRIVATE,
+          static_cast<unsigned long>(expected),
+          nullptr,
+          nullptr);
+
+        if (ret == 0)
+          break;
+      }
+      errno = backup;
+    }
+
+    template<typename T>
+    static void notify_one_on_address(std::atomic<T>& addr)
+    {
+      static_assert(
+        sizeof(T) == sizeof(WaitingWord) && alignof(T) == alignof(WaitingWord),
+        "T must be the same size and alignment as WaitingWord");
+      _umtx_op(&addr, UMTX_OP_WAKE_PRIVATE, 1, nullptr, nullptr);
+    }
+
+    template<typename T>
+    static void notify_all_on_address(std::atomic<T>& addr)
+    {
+      static_assert(
+        sizeof(T) == sizeof(WaitingWord) && alignof(T) == alignof(WaitingWord),
+        "T must be the same size and alignment as WaitingWord");
+      _umtx_op(
+        &addr,
+        UMTX_OP_WAKE_PRIVATE,
+        static_cast<unsigned long>(INT_MAX),
+        nullptr,
+        nullptr);
+    }
   };
 } // namespace snmalloc
 #endif
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_haiku.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_haiku.h
index bafe23c48781..bbc9e077c0a0 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_haiku.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_haiku.h
@@ -37,15 +37,6 @@ namespace snmalloc
       SNMALLOC_ASSERT(is_aligned_block<page_size>(p, size));
       posix_madvise(p, size, POSIX_MADV_DONTNEED);
     }
-
-    /**
-     * Hopefully a temporary workaround until the kernel random feature
-     * is exposed properly in the userspace ?
-     */
-    static uint64_t get_entropy64()
-    {
-      return PALPOSIX::dev_urandom();
-    }
   };
 } // namespace snmalloc
 #endif
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_linux.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_linux.h
index 6f131b0cc6fd..e1774fbd3dc4 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_linux.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_linux.h
@@ -14,6 +14,10 @@
 #    include <linux/random.h>
 #  endif
 
+#  if defined(SNMALLOC_HAS_LINUX_FUTEX_H)
+#    include <linux/futex.h>
+#  endif
+
 extern "C" int puts(const char* str);
 
 namespace snmalloc
@@ -27,7 +31,12 @@ namespace snmalloc
      *
      * We always make sure that linux has entropy support.
      */
-    static constexpr uint64_t pal_features = PALPOSIX::pal_features | Entropy;
+    static constexpr uint64_t pal_features = PALPOSIX::pal_features | Entropy |
+      CoreDump
+#  ifdef SNMALLOC_HAS_LINUX_FUTEX_H
+      | WaitOnAddress
+#  endif
+      ;
 
     static constexpr size_t page_size =
       Aal::aal_name == PowerPC ? 0x10000 : PALPOSIX::page_size;
@@ -58,7 +67,6 @@ namespace snmalloc
       void* p = PALPOSIX<PALLinux>::reserve(size);
       if (p)
       {
-        madvise(p, size, MADV_DONTDUMP);
 #  ifdef SNMALLOC_PAGEID
 #    ifndef PR_SET_VMA
 #      define PR_SET_VMA 0x53564d41
@@ -125,7 +133,6 @@ namespace snmalloc
       if constexpr (DEBUG)
         memset(p, 0x5a, size);
 
-      madvise(p, size, MADV_DONTDUMP);
       madvise(p, size, madvise_free_flags);
 
       if constexpr (mitigations(pal_enforce_access))
@@ -135,25 +142,19 @@ namespace snmalloc
     }
 
     /**
-     * Notify platform that we will be using these pages for reading.
-     *
-     * This is used only for pages full of zeroes and so we exclude them from
-     * core dumps.
+     * Notify platform that these pages should be included in a core dump.
      */
-    static void notify_using_readonly(void* p, size_t size) noexcept
+    static void notify_do_dump(void* p, size_t size) noexcept
     {
-      PALPOSIX<PALLinux>::notify_using_readonly(p, size);
-      madvise(p, size, MADV_DONTDUMP);
+      madvise(p, size, MADV_DODUMP);
     }
 
     /**
-     * Notify platform that we will be using these pages.
+     * Notify platform that these pages should not be included in a core dump.
      */
-    template<ZeroMem zero_mem>
-    static void notify_using(void* p, size_t size) noexcept
+    static void notify_do_not_dump(void* p, size_t size) noexcept
     {
-      PALPOSIX<PALLinux>::notify_using<zero_mem>(p, size);
-      madvise(p, size, MADV_DODUMP);
+      madvise(p, size, MADV_DONTDUMP);
     }
 
     static uint64_t get_entropy64()
@@ -171,6 +172,7 @@ namespace snmalloc
         uint64_t result;
         char buffer[sizeof(uint64_t)];
       };
+
       ssize_t ret;
 
       // give a try to SYS_getrandom
@@ -239,6 +241,47 @@ namespace snmalloc
       // its APIs are not exception-free.
       return dev_urandom();
     }
+
+#  ifdef SNMALLOC_HAS_LINUX_FUTEX_H
+    using WaitingWord = int;
+
+    template<class T>
+    static void wait_on_address(std::atomic<T>& addr, T expected)
+    {
+      int backup = errno;
+      static_assert(
+        sizeof(T) == sizeof(WaitingWord) && alignof(T) == alignof(WaitingWord),
+        "T must be the same size and alignment as WaitingWord");
+      while (addr.load(std::memory_order_relaxed) == expected)
+      {
+        long ret = syscall(
+          SYS_futex, &addr, FUTEX_WAIT_PRIVATE, expected, nullptr, nullptr, 0);
+
+        if (ret == 0)
+          break;
+      }
+      errno = backup;
+    }
+
+    template<class T>
+    static void notify_one_on_address(std::atomic<T>& addr)
+    {
+      static_assert(
+        sizeof(T) == sizeof(WaitingWord) && alignof(T) == alignof(WaitingWord),
+        "T must be the same size and alignment as WaitingWord");
+      syscall(SYS_futex, &addr, FUTEX_WAKE_PRIVATE, 1, nullptr, nullptr, 0);
+    }
+
+    template<class T>
+    static void notify_all_on_address(std::atomic<T>& addr)
+    {
+      static_assert(
+        sizeof(T) == sizeof(WaitingWord) && alignof(T) == alignof(WaitingWord),
+        "T must be the same size and alignment as WaitingWord");
+      syscall(
+        SYS_futex, &addr, FUTEX_WAKE_PRIVATE, INT_MAX, nullptr, nullptr, 0);
+    }
+#  endif
   };
 } // namespace snmalloc
 #endif
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_netbsd.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_netbsd.h
index 6e91d98bf266..250826423e52 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_netbsd.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_netbsd.h
@@ -14,6 +14,7 @@
  */
 extern "C" ssize_t _sys_writev(int fd, const struct iovec* iov, int iovcnt);
 extern "C" int _sys_fsync(int fd);
+
 /// @}
 
 namespace snmalloc
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_noalloc.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_noalloc.h
index 94bc61e020b5..49b0d4b6a482 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_noalloc.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_noalloc.h
@@ -17,7 +17,7 @@ namespace snmalloc
    * The minimal subset of a PAL that we need for delegation
    */
   template<typename PAL>
-  concept PALNoAllocBase = IsPAL_static_sizes<PAL>&& IsPAL_error<PAL>;
+  concept PALNoAllocBase = IsPAL_static_sizes<PAL> && IsPAL_error<PAL>;
 #endif
 
   /**
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_open_enclave.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_open_enclave.h
index be0f141beca4..4966eccc7d56 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_open_enclave.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_open_enclave.h
@@ -20,6 +20,7 @@ namespace snmalloc
       UNUSED(str);
       oe_abort();
     }
+
     static constexpr size_t address_bits = Aal::address_bits;
     static constexpr size_t page_size = Aal::smallest_page_size;
   };
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_posix.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_posix.h
index 6c9ae05e85af..1214ff327030 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_posix.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_posix.h
@@ -6,10 +6,11 @@
 #if defined(SNMALLOC_BACKTRACE_HEADER)
 #  include SNMALLOC_BACKTRACE_HEADER
 #endif
+#include <cstdlib>
 #include <errno.h>
 #include <fcntl.h>
+#include <limits.h>
 #include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
 #include <strings.h>
 #include <sys/mman.h>
@@ -130,8 +131,16 @@ namespace snmalloc
       | Entropy
 #endif
       ;
-
+#ifdef SNMALLOC_PAGESIZE
+    static_assert(
+      bits::is_pow2(SNMALLOC_PAGESIZE), "Page size must be a power of 2");
+    static constexpr size_t page_size = SNMALLOC_PAGESIZE;
+#elif defined(PAGESIZE)
+    static constexpr size_t page_size =
+      bits::max(Aal::smallest_page_size, static_cast<size_t>(PAGESIZE));
+#else
     static constexpr size_t page_size = Aal::smallest_page_size;
+#endif
 
     /**
      * Address bits are potentially mediated by some POSIX OSes, but generally
@@ -399,6 +408,7 @@ namespace snmalloc
         uint64_t result;
         char buffer[sizeof(uint64_t)];
       };
+
       ssize_t ret;
       int flags = O_RDONLY;
 #if defined(O_CLOEXEC)
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_timer_default.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_timer_default.h
index c7761effe57e..d70abd5262be 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_timer_default.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_timer_default.h
@@ -4,8 +4,6 @@
 #include "pal_consts.h"
 #include "pal_ds.h"
 
-#include <chrono>
-
 namespace snmalloc
 {
   template<typename PalTime>
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_windows.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_windows.h
index 2ab0bfc1ff26..d025b1bebb36 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_windows.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_windows.h
@@ -20,6 +20,7 @@
 #    if (NTDDI_VERSION >= NTDDI_WIN10_RS5) && \
       (WINVER >= _WIN32_WINNT_WIN10) && !defined(USE_SYSTEMATIC_TESTING)
 #      define PLATFORM_HAS_VIRTUALALLOC2
+#      define PLATFORM_HAS_WAITONADDRESS
 #    endif
 #  endif
 
@@ -60,6 +61,9 @@ namespace snmalloc
       Time
 #  if defined(PLATFORM_HAS_VIRTUALALLOC2) && !defined(USE_SYSTEMATIC_TESTING)
       | AlignedAllocation
+#  endif
+#  if defined(PLATFORM_HAS_WAITONADDRESS)
+      | WaitOnAddress
 #  endif
       ;
 
@@ -231,6 +235,32 @@ namespace snmalloc
           std::chrono::steady_clock::now().time_since_epoch())
           .count());
     }
+
+#  ifdef PLATFORM_HAS_WAITONADDRESS
+    using WaitingWord = char;
+
+    template<class T>
+    static void wait_on_address(std::atomic<T>& addr, T expected)
+    {
+      while (addr.load(std::memory_order_relaxed) == expected)
+      {
+        if (::WaitOnAddress(&addr, &expected, sizeof(T), INFINITE))
+          break;
+      }
+    }
+
+    template<class T>
+    static void notify_one_on_address(std::atomic<T>& addr)
+    {
+      ::WakeByAddressSingle(&addr);
+    }
+
+    template<class T>
+    static void notify_all_on_address(std::atomic<T>& addr)
+    {
+      ::WakeByAddressAll(&addr);
+    }
+#  endif
   };
 }
 #endif
diff --git a/3rdparty/exported/snmalloc/src/snmalloc/snmalloc.h b/3rdparty/exported/snmalloc/src/snmalloc/snmalloc.h
index 47bd6e78a30d..b05b1a330f34 100644
--- a/3rdparty/exported/snmalloc/src/snmalloc/snmalloc.h
+++ b/3rdparty/exported/snmalloc/src/snmalloc/snmalloc.h
@@ -3,8 +3,22 @@
 // Core implementation of snmalloc independent of the configuration mode
 #include "snmalloc_core.h"
 
-// If the user has defined SNMALLOC_PROVIDE_OWN_CONFIG, this include does
-// nothing.  Otherwise, it provide a default configuration of snmalloc::Alloc.
+// Provides the global configuration for the snmalloc implementation.
 #include "backend/globalconfig.h"
+
+// If you define SNMALLOC_PROVIDE_OWN_CONFIG then you must provide your own
+// definition of `snmalloc::Alloc` before including any files that include
+// `snmalloc.h` or consume the global allocation APIs.
+#ifndef SNMALLOC_PROVIDE_OWN_CONFIG
+namespace snmalloc
+{
+  /**
+   * Create allocator type for this configuration.
+   */
+  using Alloc = snmalloc::LocalAllocator<
+    snmalloc::StandardConfigClientMeta<NoClientMetaDataProvider>>;
+} // namespace snmalloc
+#endif
+
 // User facing API surface, needs to know what `Alloc` is.
 #include "snmalloc_front.h"
diff --git a/3rdparty/exported/snmalloc/src/test/func/cheri/cheri.cc b/3rdparty/exported/snmalloc/src/test/func/cheri/cheri.cc
index 4efaec6b7a42..cde8be071af3 100644
--- a/3rdparty/exported/snmalloc/src/test/func/cheri/cheri.cc
+++ b/3rdparty/exported/snmalloc/src/test/func/cheri/cheri.cc
@@ -133,8 +133,7 @@ int main()
     static_assert(
       std::is_same_v<decltype(alloc.alloc), LocalAllocator<StandardConfig>>);
 
-    LocalCache lc{&StandardConfig::unused_remote};
-    auto* ca = AllocPool<StandardConfig>::acquire(&lc);
+    auto* ca = AllocPool<StandardConfig>::acquire();
 
     SNMALLOC_CHECK(cap_len_is(ca, sizeof(*ca)));
     SNMALLOC_CHECK(cap_vmem_perm_is(ca, false));
diff --git a/3rdparty/exported/snmalloc/src/test/func/client_meta/client_meta.cc b/3rdparty/exported/snmalloc/src/test/func/client_meta/client_meta.cc
new file mode 100644
index 000000000000..0359666bb0d6
--- /dev/null
+++ b/3rdparty/exported/snmalloc/src/test/func/client_meta/client_meta.cc
@@ -0,0 +1,69 @@
+/**
+ * This test performs a very simple use of the client_meta data feature in
+ * snmalloc.
+ */
+
+#include "test/setup.h"
+
+#include <iostream>
+#include <snmalloc/backend/globalconfig.h>
+#include <snmalloc/snmalloc_core.h>
+#include <vector>
+
+namespace snmalloc
+{
+  // Create an allocator that stores an std::atomic<size_t>> per allocation.
+  using Alloc = snmalloc::LocalAllocator<snmalloc::StandardConfigClientMeta<
+    ArrayClientMetaDataProvider<std::atomic<size_t>>>>;
+}
+
+#define SNMALLOC_PROVIDE_OWN_CONFIG
+#include <snmalloc/snmalloc.h>
+
+int main()
+{
+#ifdef SNMALLOC_PASS_THROUGH
+  // This test does not make sense in pass-through
+  return 0;
+#else
+  // Allocate a bunch of objects, and store the index into the meta-data.
+  std::vector<void*> ptrs;
+  for (size_t i = 0; i < 10000; i++)
+  {
+    auto p = snmalloc::libc::malloc(1024);
+    auto& meta = snmalloc::libc::get_client_meta_data(p);
+    meta = i;
+    ptrs.push_back(p);
+    memset(p, (uint8_t)i, 1024);
+  }
+
+  // Check meta-data contains expected value, and that the memory contains
+  // the expected pattern.
+  for (size_t i = 0; i < 10000; i++)
+  {
+    auto p = ptrs[i];
+    auto& meta = snmalloc::libc::get_client_meta_data(p);
+    if (meta != i)
+    {
+      std::cout << "Failed at index " << i << std::endl;
+      abort();
+    }
+    for (size_t j = 0; j < 1024; j++)
+    {
+      if (reinterpret_cast<uint8_t*>(p)[j] != (uint8_t)i)
+      {
+        std::cout << "Failed at index " << i << " byte " << j << std::endl;
+        abort();
+      }
+    }
+    snmalloc::libc::free(p);
+  }
+
+  // Access in a read-only way meta-data associated with the stack.
+  // This would fail if it was accessed for write.
+  auto& meta = snmalloc::libc::get_client_meta_data_const(&ptrs);
+  std::cout << "meta for stack" << meta << std::endl;
+
+  return 0;
+#endif
+}
diff --git a/3rdparty/exported/snmalloc/src/test/func/domestication/domestication.cc b/3rdparty/exported/snmalloc/src/test/func/domestication/domestication.cc
index 03cc9ba3bdf6..390bba741757 100644
--- a/3rdparty/exported/snmalloc/src/test/func/domestication/domestication.cc
+++ b/3rdparty/exported/snmalloc/src/test/func/domestication/domestication.cc
@@ -17,13 +17,15 @@ int main()
 
 // Specify type of allocator
 #  define SNMALLOC_PROVIDE_OWN_CONFIG
+
 namespace snmalloc
 {
   class CustomConfig : public CommonConfig
   {
   public:
     using Pal = DefaultPal;
-    using PagemapEntry = DefaultPagemapEntry;
+    using PagemapEntry = DefaultPagemapEntry<NoClientMetaDataProvider>;
+    using ClientMeta = NoClientMetaDataProvider;
 
   private:
     using ConcretePagemap =
@@ -62,14 +64,12 @@ namespace snmalloc
      * C++, and not just its initializer fragment, to initialize a non-prefix
      * subset of the flags (in any order, at that).
      */
-    static constexpr Flags Options = []() constexpr
-    {
+    static constexpr Flags Options = []() constexpr {
       Flags opts = {};
       opts.QueueHeadsAreTame = false;
       opts.HasDomesticate = true;
       return opts;
-    }
-    ();
+    }();
 
     static GlobalPoolState& pool()
     {
@@ -138,7 +138,8 @@ int main()
 
   LocalEntropy entropy;
   entropy.init<DefaultPal>();
-  RemoteAllocator::key_global = FreeListKey(entropy.get_free_list_key());
+  entropy.make_free_list_key(RemoteAllocator::key_global);
+  entropy.make_free_list_key(freelist::Object::key_root);
 
   auto alloc1 = new Alloc();
 
diff --git a/3rdparty/exported/snmalloc/src/test/func/malloc/malloc.cc b/3rdparty/exported/snmalloc/src/test/func/malloc/malloc.cc
index 1d4c31da9e07..6549e583435a 100644
--- a/3rdparty/exported/snmalloc/src/test/func/malloc/malloc.cc
+++ b/3rdparty/exported/snmalloc/src/test/func/malloc/malloc.cc
@@ -375,6 +375,6 @@ int main(int argc, char** argv)
     our_malloc_usable_size(nullptr) == 0,
     "malloc_usable_size(nullptr) should be zero");
 
-  snmalloc::debug_check_empty<snmalloc::StandardConfig>();
+  snmalloc::debug_check_empty<snmalloc::Alloc::Config>();
   return 0;
 }
diff --git a/3rdparty/exported/snmalloc/src/test/func/memcpy/func-memcpy.cc b/3rdparty/exported/snmalloc/src/test/func/memcpy/func-memcpy.cc
index ff1856fac007..f435b457266e 100644
--- a/3rdparty/exported/snmalloc/src/test/func/memcpy/func-memcpy.cc
+++ b/3rdparty/exported/snmalloc/src/test/func/memcpy/func-memcpy.cc
@@ -57,6 +57,9 @@ extern "C" void abort()
   {
     longjmp(jmp, 1);
   }
+#  if __has_builtin(__builtin_trap)
+  __builtin_trap();
+#  endif
   exit(-1);
 }
 
@@ -152,7 +155,11 @@ int main()
   // Some sizes to check for out-of-bounds access.  As we are only able to
   // catch overflows past the end of the sizeclass-padded allocation, make
   // sure we don't try to test on smaller allocations.
-  std::initializer_list<size_t> sizes = {MIN_ALLOC_SIZE, 1024, 2 * 1024 * 1024};
+
+  static constexpr size_t min_class_size =
+    sizeclass_to_size(size_to_sizeclass(MIN_ALLOC_SIZE));
+
+  std::initializer_list<size_t> sizes = {min_class_size, 1024, 2 * 1024 * 1024};
   static_assert(
     MIN_ALLOC_SIZE < 1024,
     "Can't detect overflow except at sizeclass boundaries");
diff --git a/3rdparty/exported/snmalloc/src/test/func/memory/memory.cc b/3rdparty/exported/snmalloc/src/test/func/memory/memory.cc
index 2a2ada2eef07..7d176f43de01 100644
--- a/3rdparty/exported/snmalloc/src/test/func/memory/memory.cc
+++ b/3rdparty/exported/snmalloc/src/test/func/memory/memory.cc
@@ -184,7 +184,7 @@ void test_calloc()
     alloc.dealloc(p, size);
   }
 
-  snmalloc::debug_check_empty<StandardConfig>();
+  snmalloc::debug_check_empty<snmalloc::Alloc::Config>();
 }
 
 void test_double_alloc()
@@ -229,7 +229,7 @@ void test_double_alloc()
       }
     }
   }
-  snmalloc::debug_check_empty<StandardConfig>();
+  snmalloc::debug_check_empty<snmalloc::Alloc::Config>();
 }
 
 void test_external_pointer()
@@ -237,7 +237,9 @@ void test_external_pointer()
   // Malloc does not have an external pointer querying mechanism.
   auto& alloc = ThreadAlloc::get();
 
-  for (uint8_t sc = 0; sc < NUM_SMALL_SIZECLASSES; sc++)
+  for (snmalloc::smallsizeclass_t sc = size_to_sizeclass(MIN_ALLOC_SIZE);
+       sc < NUM_SMALL_SIZECLASSES;
+       sc++)
   {
     size_t size = sizeclass_to_size(sc);
     void* p1 = alloc.alloc(size);
@@ -273,7 +275,7 @@ void test_external_pointer()
     alloc.dealloc(p1, size);
   }
 
-  snmalloc::debug_check_empty<StandardConfig>();
+  snmalloc::debug_check_empty<snmalloc::Alloc::Config>();
 };
 
 void check_offset(void* base, void* interior)
@@ -470,7 +472,9 @@ void test_static_sized_allocs()
 void test_remaining_bytes()
 {
   auto& alloc = ThreadAlloc::get();
-  for (size_t sc = 0; sc < NUM_SMALL_SIZECLASSES; sc++)
+  for (snmalloc::smallsizeclass_t sc = size_to_sizeclass(MIN_ALLOC_SIZE);
+       sc < NUM_SMALL_SIZECLASSES;
+       sc++)
   {
     auto size = sizeclass_to_size(sc);
     char* p = (char*)alloc.alloc(size);
diff --git a/3rdparty/exported/snmalloc/src/test/func/miracle_ptr/miracle_ptr.cc b/3rdparty/exported/snmalloc/src/test/func/miracle_ptr/miracle_ptr.cc
new file mode 100644
index 000000000000..c4e4783bbd15
--- /dev/null
+++ b/3rdparty/exported/snmalloc/src/test/func/miracle_ptr/miracle_ptr.cc
@@ -0,0 +1,204 @@
+/**
+ * This file demonstrates how the snmalloc library could be implemented to
+ * provide a miracle pointer like feature.  This is not a hardened
+ * implementation and is purely for illustrative purposes.
+ *
+ * Do not use as is.
+ */
+
+#ifdef SNMALLOC_THREAD_SANITIZER_ENABLED
+int main()
+{
+  return 0;
+}
+#else
+
+#  include "test/setup.h"
+
+#  include <iostream>
+#  include <memory>
+#  include <snmalloc/backend/globalconfig.h>
+#  include <snmalloc/snmalloc_core.h>
+
+namespace snmalloc
+{
+  // Instantiate the allocator with a client meta data provider that uses an
+  // atomic size_t to store the reference count.
+  using Alloc = snmalloc::LocalAllocator<snmalloc::StandardConfigClientMeta<
+    ArrayClientMetaDataProvider<std::atomic<size_t>>>>;
+}
+
+#  define SNMALLOC_PROVIDE_OWN_CONFIG
+#  include <snmalloc/snmalloc.h>
+
+SNMALLOC_SLOW_PATH void error(std::string msg)
+{
+  std::cout << msg << std::endl;
+  abort();
+}
+
+SNMALLOC_FAST_PATH_INLINE void check(bool b, std::string msg)
+{
+  if (SNMALLOC_UNLIKELY(!b))
+    error(msg);
+}
+
+namespace snmalloc::miracle
+{
+  // snmalloc meta-data representation
+  //   * 2n + 1:  Represents an object that has not been deallocated with n
+  //              additional references to it
+  //   * 2n    :  Represents a deallocated object that
+  //              has n additional references to it
+
+  inline void* malloc(size_t size)
+  {
+    auto p = snmalloc::libc::malloc(size);
+    if (SNMALLOC_UNLIKELY(p == nullptr))
+      return nullptr;
+
+    snmalloc::libc::get_client_meta_data(p) = 1;
+    return p;
+  }
+
+  inline void free(void* ptr)
+  {
+    if (ptr == nullptr)
+      return;
+
+    // TODO could build a check into this that it is the start of the object?
+    auto previous =
+      snmalloc::libc::get_client_meta_data(ptr).fetch_add((size_t)-1);
+
+    if (SNMALLOC_LIKELY(previous == 1))
+    {
+      std::cout << "Freeing " << ptr << std::endl;
+      snmalloc::libc::free(ptr);
+      return;
+    }
+
+    check((previous & 1) == 1, "Double free detected");
+
+    // We have additional references to this object.
+    // We should not free it.
+    // TOOD this assumes this is not an internal pointer.
+    memset(ptr, 0, snmalloc::libc::malloc_usable_size(ptr));
+  }
+
+  inline void acquire(void* p)
+  {
+    auto previous =
+      snmalloc::libc::get_client_meta_data(p).fetch_add((size_t)2);
+
+    // Can we take new pointers to a deallocated object?
+    check((previous & 1) == 1, "Acquiring a deallocated object");
+  }
+
+  inline void release(void* p)
+  {
+    auto previous =
+      snmalloc::libc::get_client_meta_data(p).fetch_add((size_t)-2);
+
+    if (previous > 2)
+      return;
+
+    check(previous == 2, "Releasing an object with insufficient references");
+
+    std::cout << "Freeing from release " << p << std::endl;
+    snmalloc::libc::free(p);
+  }
+
+  /**
+   * This class can be used to replace a raw pointer. It will automatically use
+   * the underlying backup reference counting design from the miracle pointer
+   * docs.
+   */
+  template<typename T>
+  class raw_ptr
+  {
+    T* p;
+
+  public:
+    raw_ptr() : p(nullptr) {}
+
+    raw_ptr(T* p) : p(p)
+    {
+      snmalloc::miracle::acquire(p);
+    }
+
+    T& operator*()
+    {
+      return *p;
+    }
+
+    ~raw_ptr()
+    {
+      if (p == nullptr)
+        return;
+      snmalloc::miracle::release(p);
+    }
+
+    raw_ptr(const raw_ptr& rp) : p(rp.p)
+    {
+      snmalloc::miracle::acquire(p);
+    }
+
+    raw_ptr& operator=(const raw_ptr& other)
+    {
+      p = other.p;
+      snmalloc::miracle::acquire(other.p);
+      return *this;
+    }
+
+    raw_ptr(raw_ptr&& other) : p(other.p)
+    {
+      other.p = nullptr;
+    }
+
+    raw_ptr& operator=(raw_ptr&& other)
+    {
+      p = other.p;
+      other.p = nullptr;
+      return *this;
+    }
+  };
+} // namespace snmalloc::miracle
+
+/**
+ * Overload new and delete to use the "miracle pointer" implementation.
+ */
+void* operator new(size_t size)
+{
+  return snmalloc::miracle::malloc(size);
+}
+
+void operator delete(void* p)
+{
+  snmalloc::miracle::free(p);
+}
+
+void operator delete(void* p, size_t)
+{
+  snmalloc::miracle::free(p);
+}
+
+int main()
+{
+#  ifndef SNMALLOC_PASS_THROUGH
+  snmalloc::miracle::raw_ptr<int> p;
+  {
+    auto up1 = std::make_unique<int>(41);
+    auto up = std::make_unique<int>(42);
+    auto up2 = std::make_unique<int>(40);
+    auto up3 = std::make_unique<int>(39);
+    p = up.get();
+    check(*p == 42, "Failed to set p");
+  }
+  // Still safe to access here.  The unique_ptr has been destroyed, but the
+  // raw_ptr has kept the memory live.
+  // Current implementation zeros the memory when the unique_ptr is destroyed.
+  check(*p == 0, "Failed to keep memory live");
+#  endif
+  return 0;
+}
+#endif
\ No newline at end of file
diff --git a/3rdparty/exported/snmalloc/src/test/func/pagemap/pagemap.cc b/3rdparty/exported/snmalloc/src/test/func/pagemap/pagemap.cc
index dca7bf382990..a0b53689f61e 100644
--- a/3rdparty/exported/snmalloc/src/test/func/pagemap/pagemap.cc
+++ b/3rdparty/exported/snmalloc/src/test/func/pagemap/pagemap.cc
@@ -12,10 +12,13 @@
 
 using namespace snmalloc;
 static constexpr size_t GRANULARITY_BITS = 20;
+
 struct T
 {
   size_t v = 99;
+
   T(size_t v) : v(v) {}
+
   T() {}
 };
 
diff --git a/3rdparty/exported/snmalloc/src/test/func/pool/pool.cc b/3rdparty/exported/snmalloc/src/test/func/pool/pool.cc
index 7eeff87438cb..2ed960a454a4 100644
--- a/3rdparty/exported/snmalloc/src/test/func/pool/pool.cc
+++ b/3rdparty/exported/snmalloc/src/test/func/pool/pool.cc
@@ -14,17 +14,16 @@ struct PoolAEntry : Pooled<PoolAEntry>
   PoolAEntry() : field(1){};
 };
 
-using PoolA = Pool<PoolAEntry, Alloc::Config>;
+using PoolA = Pool<PoolAEntry>;
 
 struct PoolBEntry : Pooled<PoolBEntry>
 {
   int field;
 
   PoolBEntry() : field(0){};
-  PoolBEntry(int f) : field(f){};
 };
 
-using PoolB = Pool<PoolBEntry, Alloc::Config>;
+using PoolB = Pool<PoolBEntry>;
 
 struct PoolLargeEntry : Pooled<PoolLargeEntry>
 {
@@ -41,18 +40,18 @@ struct PoolLargeEntry : Pooled<PoolLargeEntry>
   };
 };
 
-using PoolLarge = Pool<PoolLargeEntry, Alloc::Config>;
+using PoolLarge = Pool<PoolLargeEntry>;
 
 template<bool order>
 struct PoolSortEntry : Pooled<PoolSortEntry<order>>
 {
   int field;
 
-  PoolSortEntry(int f) : field(f){};
+  PoolSortEntry() : field(1){};
 };
 
 template<bool order>
-using PoolSort = Pool<PoolSortEntry<order>, Alloc::Config>;
+using PoolSort = Pool<PoolSortEntry<order>>;
 
 void test_alloc()
 {
@@ -73,13 +72,8 @@ void test_constructor()
   SNMALLOC_CHECK(ptr2 != nullptr);
   SNMALLOC_CHECK(ptr2->field == 0);
 
-  auto ptr3 = PoolB::acquire(1);
-  SNMALLOC_CHECK(ptr3 != nullptr);
-  SNMALLOC_CHECK(ptr3->field == 1);
-
   PoolA::release(ptr1);
   PoolB::release(ptr2);
-  PoolB::release(ptr3);
 }
 
 void test_alloc_many()
@@ -181,8 +175,8 @@ void test_sort()
 
   // This test checks that `sort` puts the elements in the right order,
   // so it is the same as if they had been allocated in that order.
-  auto a1 = PoolSort<order>::acquire(1);
-  auto a2 = PoolSort<order>::acquire(1);
+  auto a1 = PoolSort<order>::acquire();
+  auto a2 = PoolSort<order>::acquire();
 
   auto position1 = position(a1);
   auto position2 = position(a2);
@@ -201,8 +195,8 @@ void test_sort()
 
   PoolSort<order>::sort();
 
-  auto b1 = PoolSort<order>::acquire(1);
-  auto b2 = PoolSort<order>::acquire(1);
+  auto b1 = PoolSort<order>::acquire();
+  auto b2 = PoolSort<order>::acquire();
 
   SNMALLOC_CHECK(position1 == position(b1));
   SNMALLOC_CHECK(position2 == position(b2));
diff --git a/3rdparty/exported/snmalloc/src/test/func/redblack/redblack.cc b/3rdparty/exported/snmalloc/src/test/func/redblack/redblack.cc
index f13c72ebb99e..164a5978f034 100644
--- a/3rdparty/exported/snmalloc/src/test/func/redblack/redblack.cc
+++ b/3rdparty/exported/snmalloc/src/test/func/redblack/redblack.cc
@@ -4,7 +4,6 @@
 #include "test/xoroshiro.h"
 
 #include <algorithm>
-#include <array>
 #include <iostream>
 #include <vector>
 
@@ -23,28 +22,36 @@ struct NodeRef
   static constexpr size_t offset = 10000;
 
   size_t* ptr;
+
   constexpr NodeRef(size_t* p) : ptr(p) {}
+
   constexpr NodeRef() : ptr(nullptr) {}
+
   constexpr NodeRef(const NodeRef& other) : ptr(other.ptr) {}
+
   constexpr NodeRef(NodeRef&& other) : ptr(other.ptr) {}
 
   bool operator!=(const NodeRef& other) const
   {
     return ptr != other.ptr;
   }
+
   NodeRef& operator=(const NodeRef& other)
   {
     ptr = other.ptr;
     return *this;
   }
+
   void set(uint16_t val)
   {
     *ptr = ((size_t(val) + offset) << 1) + (*ptr & 1);
   }
+
   explicit operator uint16_t()
   {
     return uint16_t((*ptr >> 1) - offset);
   }
+
   explicit operator size_t*()
   {
     return ptr;
diff --git a/3rdparty/exported/snmalloc/src/test/func/sandbox/sandbox.cc b/3rdparty/exported/snmalloc/src/test/func/sandbox/sandbox.cc
index 519488070084..69ce99cfae5a 100644
--- a/3rdparty/exported/snmalloc/src/test/func/sandbox/sandbox.cc
+++ b/3rdparty/exported/snmalloc/src/test/func/sandbox/sandbox.cc
@@ -25,6 +25,7 @@ namespace
   {
     SNMALLOC_CHECK(0 && "Should never be called!");
   }
+
   /**
    * Sandbox class.  Allocates a memory region and an allocator that can
    * allocate into this from the outside.
@@ -172,7 +173,7 @@ namespace
        * sandbox but allocates memory inside.
        */
       struct RemoteAllocator queue;
-    } * shared_state;
+    }* shared_state;
 
     /**
      * The memory provider for this sandbox.
@@ -195,7 +196,7 @@ namespace
     Sandbox(size_t sb_size)
     : start(alloc_sandbox_heap(sb_size)),
       top(pointer_offset(start, sb_size)),
-      shared_state(new (start) SharedState()),
+      shared_state(new(start) SharedState()),
       state(
         pointer_offset(CapPtr<void, CBChunk>(start), sizeof(SharedState)),
         sb_size - sizeof(SharedState)),
diff --git a/3rdparty/exported/snmalloc/src/test/func/sizeclass/sizeclass.cc b/3rdparty/exported/snmalloc/src/test/func/sizeclass/sizeclass.cc
index d42794e44845..836c62111f9b 100644
--- a/3rdparty/exported/snmalloc/src/test/func/sizeclass/sizeclass.cc
+++ b/3rdparty/exported/snmalloc/src/test/func/sizeclass/sizeclass.cc
@@ -8,6 +8,9 @@ snmalloc::smallsizeclass_t size_to_sizeclass(size_t size)
   return snmalloc::size_to_sizeclass(size);
 }
 
+static constexpr snmalloc::smallsizeclass_t minimum_sizeclass =
+  snmalloc::size_to_sizeclass_const(snmalloc::MIN_ALLOC_SIZE);
+
 void test_align_size()
 {
   bool failed = false;
@@ -72,6 +75,10 @@ int main(int, char**)
   bool failed = false;
   size_t size_low = 0;
 
+  std::cout << "Configured with minimum allocation size "
+            << snmalloc::MIN_ALLOC_SIZE << " and step size "
+            << snmalloc::MIN_ALLOC_STEP_SIZE << std::endl;
+
   std::cout << "0 has sizeclass: " << (size_t)snmalloc::size_to_sizeclass(0)
             << std::endl;
 
@@ -86,12 +93,14 @@ int main(int, char**)
       slab_size != snmalloc::sizeclass_to_slab_size(sz))
     {
       slab_size = snmalloc::sizeclass_to_slab_size(sz);
-      std::cout << std::endl;
+      std::cout << std::endl << "slab size: " << slab_size << std::endl;
     }
 
     size_t size = snmalloc::sizeclass_to_size(sz);
     std::cout << (size_t)sz << " |-> "
-              << "[" << size_low + 1 << ", " << size << "]" << std::endl;
+              << "[" << size_low + 1 << ", " << size << "]"
+              << (sz == minimum_sizeclass ? " is minimum class" : "")
+              << std::endl;
 
     if (size < size_low)
     {
@@ -102,7 +111,30 @@ int main(int, char**)
 
     for (size_t i = size_low + 1; i <= size; i++)
     {
-      if (size_to_sizeclass(i) != sz)
+      /* All sizes should, via bit-math, come back to their class value */
+      if (snmalloc::size_to_sizeclass_const(i) != sz)
+      {
+        std::cout << "Size " << i << " has _const sizeclass "
+                  << (size_t)snmalloc::size_to_sizeclass_const(i)
+                  << " but expected sizeclass " << (size_t)sz << std::endl;
+        failed = true;
+      }
+
+      if (size < snmalloc::MIN_ALLOC_SIZE)
+      {
+        /*
+         * It is expected that these sizes have the "wrong" class from tabular
+         * lookup: they will have been clipped up to the minimum class.
+         */
+        if (size_to_sizeclass(i) != minimum_sizeclass)
+        {
+          std::cout << "Size " << i << " below minimum size; sizeclass "
+                    << (size_t)size_to_sizeclass(i) << " not expected minimum "
+                    << (size_t)minimum_sizeclass << std::endl;
+          failed = true;
+        }
+      }
+      else if (size_to_sizeclass(i) != sz)
       {
         std::cout << "Size " << i << " has sizeclass "
                   << (size_t)size_to_sizeclass(i) << " but expected sizeclass "
diff --git a/3rdparty/exported/snmalloc/src/test/func/statistics/stats.cc b/3rdparty/exported/snmalloc/src/test/func/statistics/stats.cc
index c8db1cad762a..214a0bcf3cce 100644
--- a/3rdparty/exported/snmalloc/src/test/func/statistics/stats.cc
+++ b/3rdparty/exported/snmalloc/src/test/func/statistics/stats.cc
@@ -17,7 +17,7 @@ void debug_check_empty_1()
 
   auto r = a.alloc(size);
 
-  snmalloc::debug_check_empty<snmalloc::StandardConfig>(&result);
+  snmalloc::debug_check_empty<snmalloc::Alloc::Config>(&result);
   if (result != false)
   {
     std::cout << "debug_check_empty failed to detect leaked memory:" << size
@@ -27,7 +27,7 @@ void debug_check_empty_1()
 
   a.dealloc(r);
 
-  snmalloc::debug_check_empty<snmalloc::StandardConfig>(&result);
+  snmalloc::debug_check_empty<snmalloc::Alloc::Config>(&result);
   if (result != true)
   {
     std::cout << "debug_check_empty failed to say empty:" << size << std::endl;
@@ -36,7 +36,7 @@ void debug_check_empty_1()
 
   r = a.alloc(size);
 
-  snmalloc::debug_check_empty<snmalloc::StandardConfig>(&result);
+  snmalloc::debug_check_empty<snmalloc::Alloc::Config>(&result);
   if (result != false)
   {
     std::cout << "debug_check_empty failed to detect leaked memory:" << size
@@ -46,7 +46,7 @@ void debug_check_empty_1()
 
   a.dealloc(r);
 
-  snmalloc::debug_check_empty<snmalloc::StandardConfig>(&result);
+  snmalloc::debug_check_empty<snmalloc::Alloc::Config>(&result);
   if (result != true)
   {
     std::cout << "debug_check_empty failed to say empty:" << size << std::endl;
@@ -72,7 +72,7 @@ void debug_check_empty_2()
     }
     auto r = a.alloc(size);
     allocs.push_back(r);
-    snmalloc::debug_check_empty<snmalloc::StandardConfig>(&result);
+    snmalloc::debug_check_empty<snmalloc::Alloc::Config>(&result);
     if (result != false)
     {
       std::cout << "False empty after " << i << " allocations of " << size
@@ -88,7 +88,7 @@ void debug_check_empty_2()
     {
       std::cout << "." << std::flush;
     }
-    snmalloc::debug_check_empty<snmalloc::StandardConfig>(&result);
+    snmalloc::debug_check_empty<snmalloc::Alloc::Config>(&result);
     if (result != false)
     {
       std::cout << "False empty after " << i << " deallocations of " << size
@@ -98,7 +98,7 @@ void debug_check_empty_2()
     a.dealloc(allocs[i]);
   }
   std::cout << std::endl;
-  snmalloc::debug_check_empty<snmalloc::StandardConfig>();
+  snmalloc::debug_check_empty<snmalloc::Alloc::Config>();
 }
 
 int main()
diff --git a/3rdparty/exported/snmalloc/src/test/func/thread_alloc_external/thread_alloc_external.cc b/3rdparty/exported/snmalloc/src/test/func/thread_alloc_external/thread_alloc_external.cc
index 2b10ed8cbcd2..686c08dc487a 100644
--- a/3rdparty/exported/snmalloc/src/test/func/thread_alloc_external/thread_alloc_external.cc
+++ b/3rdparty/exported/snmalloc/src/test/func/thread_alloc_external/thread_alloc_external.cc
@@ -12,7 +12,8 @@
 
 namespace snmalloc
 {
-  using Alloc = snmalloc::LocalAllocator<snmalloc::StandardConfig>;
+  using Alloc = snmalloc::LocalAllocator<
+    snmalloc::StandardConfigClientMeta<NoClientMetaDataProvider>>;
 }
 
 using namespace snmalloc;
diff --git a/3rdparty/exported/snmalloc/src/test/func/two_alloc_types/alloc1.cc b/3rdparty/exported/snmalloc/src/test/func/two_alloc_types/alloc1.cc
index 74996b5178b1..b4e0ae32af1b 100644
--- a/3rdparty/exported/snmalloc/src/test/func/two_alloc_types/alloc1.cc
+++ b/3rdparty/exported/snmalloc/src/test/func/two_alloc_types/alloc1.cc
@@ -10,6 +10,7 @@
 
 // Specify type of allocator
 #define SNMALLOC_PROVIDE_OWN_CONFIG
+
 namespace snmalloc
 {
   using CustomGlobals = FixedRangeConfig<PALNoAlloc<DefaultPal>>;
diff --git a/3rdparty/exported/snmalloc/src/test/func/two_alloc_types/main.cc b/3rdparty/exported/snmalloc/src/test/func/two_alloc_types/main.cc
index b7f6ded9e7a4..74d0338dcc62 100644
--- a/3rdparty/exported/snmalloc/src/test/func/two_alloc_types/main.cc
+++ b/3rdparty/exported/snmalloc/src/test/func/two_alloc_types/main.cc
@@ -30,6 +30,7 @@ extern "C" void* enclave_malloc(size_t);
 extern "C" void enclave_free(void*);
 
 using namespace snmalloc;
+
 int main()
 {
   setup();
diff --git a/3rdparty/exported/snmalloc/src/test/perf/contention/contention.cc b/3rdparty/exported/snmalloc/src/test/perf/contention/contention.cc
index e266f0491020..c2cfd8f85c74 100644
--- a/3rdparty/exported/snmalloc/src/test/perf/contention/contention.cc
+++ b/3rdparty/exported/snmalloc/src/test/perf/contention/contention.cc
@@ -137,7 +137,7 @@ void test_tasks(size_t num_tasks, size_t count, size_t size)
     ParallelTest<test_tasks_f> test(num_tasks);
 
     std::cout << "Task test, " << num_tasks << " threads, " << count
-              << " swaps per thread " << test.time() << "ticks" << std::endl;
+              << " swaps per thread " << test.time() << " ticks" << std::endl;
 
     for (size_t n = 0; n < swapsize; n++)
     {
@@ -154,7 +154,7 @@ void test_tasks(size_t num_tasks, size_t count, size_t size)
   }
 
 #ifndef NDEBUG
-  snmalloc::debug_check_empty<StandardConfig>();
+  snmalloc::debug_check_empty<snmalloc::Alloc::Config>();
 #endif
 };
 
diff --git a/3rdparty/exported/snmalloc/src/test/perf/external_pointer/externalpointer.cc b/3rdparty/exported/snmalloc/src/test/perf/external_pointer/externalpointer.cc
index be3306cba00b..96d46582076a 100644
--- a/3rdparty/exported/snmalloc/src/test/perf/external_pointer/externalpointer.cc
+++ b/3rdparty/exported/snmalloc/src/test/perf/external_pointer/externalpointer.cc
@@ -47,7 +47,7 @@ namespace test
       alloc.dealloc(objects[i]);
     }
 
-    snmalloc::debug_check_empty<StandardConfig>();
+    snmalloc::debug_check_empty<snmalloc::Alloc::Config>();
   }
 
   void test_external_pointer(xoroshiro::p128r64& r)
diff --git a/3rdparty/exported/snmalloc/src/test/perf/memcpy/memcpy.cc b/3rdparty/exported/snmalloc/src/test/perf/memcpy/memcpy.cc
index e3bee7d2c784..763dcd72e95d 100644
--- a/3rdparty/exported/snmalloc/src/test/perf/memcpy/memcpy.cc
+++ b/3rdparty/exported/snmalloc/src/test/perf/memcpy/memcpy.cc
@@ -1,5 +1,4 @@
-#include "snmalloc/global/memcpy.h"
-
+#include <snmalloc/snmalloc.h>
 #include <test/measuretime.h>
 #include <test/opt.h>
 #include <vector>
diff --git a/3rdparty/exported/snmalloc/src/test/perf/msgpass/msgpass.cc b/3rdparty/exported/snmalloc/src/test/perf/msgpass/msgpass.cc
new file mode 100644
index 000000000000..7e639a02b829
--- /dev/null
+++ b/3rdparty/exported/snmalloc/src/test/perf/msgpass/msgpass.cc
@@ -0,0 +1,307 @@
+/**
+ * A simulation of a message-passing application workload for snmalloc.
+ *
+ * - N_PRODUCER producer threads allocate and queue spans of messages randomly,
+ * - to N_CONSUMER consumer threads, which dequeue messages and free() them.
+ *
+ * Optionally, N_PROXY threads act as both producers and consumers, forwarding
+ * received messages back to another queue rather than freeing them.
+ */
+
+#include "test/opt.h"
+#include "test/setup.h"
+#include "test/usage.h"
+#include "test/xoroshiro.h"
+
+constexpr static bool be_chatty = false;
+
+#include <chrono>
+#include <iomanip>
+#include <iostream>
+#include <snmalloc/snmalloc.h>
+#include <stdarg.h>
+#include <thread>
+#include <vector>
+
+using namespace snmalloc;
+
+void chatty(const char* p, ...)
+{
+  if constexpr (be_chatty)
+  {
+    va_list va;
+    va_start(va, p);
+    vfprintf(stderr, p, va);
+    va_end(va);
+  }
+}
+
+/*
+ * Interpret SNMALLOC_PASS_THROUGH ourselves to make this a bit more fair of a
+ * comparison, since relying of snmalloc itself to do the passing through
+ * results in it imposing its own idea of alignment onto the underlying
+ * allocator, which might result in it taking less optimized paths.
+ */
+#ifdef SNMALLOC_PASS_THROUGH
+struct MyAlloc
+{
+  MyAlloc() {}
+
+  void* alloc(size_t sz)
+  {
+    return malloc(sz);
+  }
+
+  void dealloc(void* p)
+  {
+    free(p);
+  }
+};
+#else
+struct MyAlloc
+{
+  snmalloc::Alloc& a;
+
+  MyAlloc() : a(ThreadAlloc::get()) {}
+
+  void* alloc(size_t sz)
+  {
+    return a.alloc(sz);
+  }
+
+  void dealloc(void* p)
+  {
+    a.dealloc(p);
+  }
+};
+#endif
+
+/*
+ * FreeListMPSCQ make for convenient MPSC queues, so we use those for sending
+ * "messages".  Each consumer or proxy has its own (source) queue.
+ */
+static FreeListKey msgqueue_key{0xab2acada, 0xb2a01234, 0x56789abc};
+static constexpr address_t msgqueue_key_tweak = 0xfedc'ba98;
+
+struct params
+{
+  size_t N_PRODUCER;
+  size_t N_CONSUMER;
+  size_t N_PROXY;
+  size_t N_QUEUE;
+  size_t N_PRODUCER_BATCH;
+  size_t N_MAX_OUTSTANDING;
+  size_t N_MAX_BATCH_SIZE;
+  FreeListMPSCQ<msgqueue_key, msgqueue_key_tweak>* msgqueue; // [N_QUEUE]
+};
+
+std::atomic<bool> producers_live;
+std::atomic<size_t> queue_gate;
+std::atomic<size_t> messages_outstanding;
+
+freelist::HeadPtr domesticate_nop(freelist::QueuePtr p)
+{
+  return freelist::HeadPtr::unsafe_from(p.unsafe_ptr());
+};
+
+void consumer(const struct params* param, size_t qix)
+{
+  MyAlloc a{};
+  auto& myq = param->msgqueue[qix];
+
+  chatty("Cl %zu q is %p\n", qix, &myq);
+
+  do
+  {
+    size_t reap = 0;
+
+    if (myq.can_dequeue(domesticate_nop, domesticate_nop))
+    {
+      myq.dequeue(
+        domesticate_nop,
+        domesticate_nop,
+        [qix, &a, &reap](freelist::HeadPtr o) {
+          UNUSED(qix);
+          auto p = o.as_void().unsafe_ptr();
+          chatty("Cl %zu free %p\n", qix, p);
+          a.dealloc(p);
+          reap++;
+          return true;
+        });
+    }
+
+    messages_outstanding -= reap;
+
+    if (reap == 0)
+    {
+      std::this_thread::yield();
+    }
+    else
+    {
+      chatty("Cl %zu reap %zu\n", qix, reap);
+    }
+
+  } while (myq.can_dequeue(domesticate_nop, domesticate_nop) ||
+           producers_live || (queue_gate > param->N_CONSUMER));
+
+  chatty("Cl %zu fini\n", qix);
+  a.dealloc(myq.destroy().unsafe_ptr());
+}
+
+void proxy(const struct params* param, size_t qix)
+{
+  auto& myq = param->msgqueue[qix];
+  auto& qs = param->msgqueue;
+
+  chatty("Px %zu q is %p\n", qix, &myq);
+
+  xoroshiro::p128r32 r(1234 + qix, qix);
+  do
+  {
+    if (myq.can_dequeue(domesticate_nop, domesticate_nop))
+    {
+      myq.dequeue(
+        domesticate_nop, domesticate_nop, [qs, qix, &r](freelist::HeadPtr o) {
+          auto rcptqix = r.next() % qix;
+
+          chatty(
+            "Px %zu send %p to %zu\n", qix, o.as_void().unsafe_ptr(), rcptqix);
+
+          qs[rcptqix].enqueue(o, o, domesticate_nop);
+          return true;
+        });
+    }
+
+    std::this_thread::yield();
+  } while (myq.can_dequeue(domesticate_nop, domesticate_nop) ||
+           producers_live || (queue_gate > qix + 1));
+
+  chatty("Px %zu fini\n", qix);
+
+  MyAlloc().dealloc(myq.destroy().unsafe_ptr());
+  queue_gate--;
+}
+
+void producer(const struct params* param, size_t pix)
+{
+  MyAlloc a{};
+  static constexpr size_t msgsizes[] = {48, 64, 96, 128};
+  static constexpr size_t nmsgsizes = sizeof(msgsizes) / sizeof(msgsizes[0]);
+
+  xoroshiro::p128r32 r(5489 + pix, pix);
+
+  freelist::Builder<false> batch;
+  batch.init(0, msgqueue_key, msgqueue_key_tweak);
+
+  for (size_t batchix = param->N_PRODUCER_BATCH; batchix > 0; batchix--)
+  {
+    while (messages_outstanding >= param->N_MAX_OUTSTANDING)
+    {
+      std::this_thread::yield();
+    }
+
+    size_t nmsg = (r.next() & 15) + 1;
+    size_t msgsize = msgsizes[r.next() % nmsgsizes];
+
+    /* Allocate batch and form list */
+    for (size_t msgix = 0; msgix < nmsg; msgix++)
+    {
+      auto msg = a.alloc(msgsize);
+      chatty("Pd %zu make %p\n", pix, msg);
+
+      auto msgc = capptr::Alloc<void>::unsafe_from(msg)
+                    .template as_reinterpret<freelist::Object::T<>>();
+      batch.add(msgc, msgqueue_key, msgqueue_key_tweak);
+    }
+
+    /* Post to random queue */
+    auto [bfirst, blast] =
+      batch.extract_segment(msgqueue_key, msgqueue_key_tweak);
+    auto rcptqix = r.next() % param->N_QUEUE;
+    param->msgqueue[rcptqix].enqueue(bfirst, blast, domesticate_nop);
+    messages_outstanding += nmsg;
+
+    chatty("Pd %zu send %zu to %zu\n", pix, nmsg, rcptqix);
+
+    /* Occasionally yield the CPU */
+    if ((batchix & 0xF) == 1)
+      std::this_thread::yield();
+  }
+
+  chatty("Pd %zu fini\n", pix);
+}
+
+int main(int argc, char** argv)
+{
+  struct params param;
+
+  opt::Opt opt(argc, argv);
+  param.N_PRODUCER = opt.is<size_t>("--producers", 3);
+  param.N_CONSUMER = opt.is<size_t>("--consumers", 3);
+  param.N_PROXY = opt.is<size_t>("--proxies", 2);
+  param.N_PRODUCER_BATCH = opt.is<size_t>("--batches", 1024 * 1024);
+  param.N_MAX_OUTSTANDING = opt.is<size_t>("--max-out", 4 * 1024);
+  param.N_MAX_BATCH_SIZE = opt.is<size_t>("--max-batch", 16);
+
+  std::cout << "msgpass --producers=" << param.N_PRODUCER
+            << " --consumers=" << param.N_CONSUMER
+            << " --proxies=" << param.N_PROXY
+            << " --batches=" << param.N_PRODUCER_BATCH
+            << " --max-out=" << param.N_MAX_OUTSTANDING
+            << " --max-batch=" << param.N_MAX_BATCH_SIZE << std::endl;
+
+  param.N_QUEUE = param.N_CONSUMER + param.N_PROXY;
+  param.msgqueue =
+    new FreeListMPSCQ<msgqueue_key, msgqueue_key_tweak>[param.N_QUEUE];
+
+  auto* producer_threads = new std::thread[param.N_PRODUCER];
+  auto* queue_threads = new std::thread[param.N_QUEUE];
+
+  for (size_t i = 0; i < param.N_QUEUE; i++)
+  {
+    param.msgqueue[i].init();
+  }
+
+  producers_live = true;
+  queue_gate = param.N_QUEUE;
+  messages_outstanding = 0;
+
+  /* Spawn consumers */
+  for (size_t i = 0; i < param.N_CONSUMER; i++)
+  {
+    queue_threads[i] = std::thread(consumer, &param, i);
+  }
+
+  /* Spawn proxies */
+  for (size_t i = param.N_CONSUMER; i < param.N_QUEUE; i++)
+  {
+    queue_threads[i] = std::thread(proxy, &param, i);
+  }
+
+  /* Spawn producers */
+  for (size_t i = 0; i < param.N_PRODUCER; i++)
+  {
+    producer_threads[i] = std::thread(producer, &param, i);
+  }
+
+  /* Wait for producers to finish */
+  for (size_t i = 0; i < param.N_PRODUCER; i++)
+  {
+    producer_threads[i].join();
+  }
+  producers_live = false;
+
+  /* Wait for proxies and consumers to finish */
+  for (size_t i = 0; i < param.N_QUEUE; i++)
+  {
+    queue_threads[param.N_QUEUE - 1 - i].join();
+  }
+
+  delete[] producer_threads;
+  delete[] queue_threads;
+
+  /* Ensure that we have not lost any allocations */
+  debug_check_empty<snmalloc::Alloc::Config>();
+
+  return 0;
+}
diff --git a/3rdparty/exported/snmalloc/src/test/perf/singlethread/singlethread.cc b/3rdparty/exported/snmalloc/src/test/perf/singlethread/singlethread.cc
index b93dcd428a2a..431d40d2494e 100644
--- a/3rdparty/exported/snmalloc/src/test/perf/singlethread/singlethread.cc
+++ b/3rdparty/exported/snmalloc/src/test/perf/singlethread/singlethread.cc
@@ -34,9 +34,9 @@ void test_alloc_dealloc(size_t count, size_t size, bool write)
     {
       auto it = set.begin();
       void* p = *it;
-      alloc.dealloc(p, size);
       set.erase(it);
       SNMALLOC_CHECK(set.find(p) == set.end());
+      alloc.dealloc(p, size);
     }
 
     // alloc 1x objects
@@ -60,7 +60,7 @@ void test_alloc_dealloc(size_t count, size_t size, bool write)
     }
   }
 
-  snmalloc::debug_check_empty<StandardConfig>();
+  snmalloc::debug_check_empty<snmalloc::Alloc::Config>();
 }
 
 int main(int, char**)
diff --git a/3rdparty/exported/snmalloc/src/test/perf/startup/startup.cc b/3rdparty/exported/snmalloc/src/test/perf/startup/startup.cc
new file mode 100644
index 000000000000..46e18f90d432
--- /dev/null
+++ b/3rdparty/exported/snmalloc/src/test/perf/startup/startup.cc
@@ -0,0 +1,96 @@
+#include "test/opt.h"
+#include "test/setup.h"
+#include "test/usage.h"
+#include "test/xoroshiro.h"
+
+#include <algorithm>
+#include <iostream>
+#include <snmalloc/snmalloc.h>
+#include <thread>
+#include <vector>
+
+using namespace snmalloc;
+
+std::vector<uint64_t> counters{};
+
+template<typename F>
+class ParallelTest
+{
+private:
+  std::atomic<bool> flag = false;
+  std::atomic<size_t> ready = 0;
+  uint64_t start;
+  uint64_t end;
+  std::atomic<size_t> complete = 0;
+  size_t cores;
+  F f;
+
+  void run(size_t id)
+  {
+    auto prev = ready.fetch_add(1);
+    if (prev + 1 == cores)
+    {
+      start = Aal::tick();
+      flag = true;
+    }
+    while (!flag)
+      Aal::pause();
+
+    f(id);
+
+    prev = complete.fetch_add(1);
+    if (prev + 1 == cores)
+    {
+      end = Aal::tick();
+    }
+  }
+
+public:
+  ParallelTest(F&& f, size_t cores) : cores(cores), f(std::forward<F>(f))
+  {
+    std::thread* t = new std::thread[cores];
+
+    for (size_t i = 0; i < cores; i++)
+    {
+      t[i] = std::thread(&ParallelTest::run, this, i);
+    }
+    // Wait for all the threads.
+    for (size_t i = 0; i < cores; i++)
+    {
+      t[i].join();
+    }
+
+    delete[] t;
+  }
+
+  uint64_t time()
+  {
+    return end - start;
+  }
+};
+
+int main()
+{
+  auto nthreads = std::thread::hardware_concurrency();
+  counters.resize(nthreads);
+
+  ParallelTest test(
+    [](size_t id) {
+      auto start = Aal::tick();
+      auto& alloc = snmalloc::ThreadAlloc::get();
+      alloc.dealloc(alloc.alloc(1));
+      auto end = Aal::tick();
+      counters[id] = end - start;
+    },
+    nthreads);
+
+  std::cout << "Taken: " << test.time() << std::endl;
+  std::sort(counters.begin(), counters.end());
+  uint64_t start = 0;
+  for (auto counter : counters)
+  {
+    std::cout << "Thread time " << counter << " (" << counter - start << ")"
+              << std::endl;
+    start = counter;
+  }
+}
\ No newline at end of file
diff --git a/3rdparty/exported/snmalloc/src/test/setup.h b/3rdparty/exported/snmalloc/src/test/setup.h
index 61f9a991c178..deb903de6819 100644
--- a/3rdparty/exported/snmalloc/src/test/setup.h
+++ b/3rdparty/exported/snmalloc/src/test/setup.h
@@ -97,12 +97,14 @@ void setup()
 }
 #  else
 #    include <signal.h>
+
 void error_handle(int signal)
 {
   snmalloc::UNUSED(signal);
   snmalloc::error("Seg Fault");
   _exit(1);
 }
+
 void setup()
 {
   signal(SIGSEGV, error_handle);
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4cbe3559e194..b2142ca5fea7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -229,6 +229,7 @@ else()
   set(SNMALLOC_HEADER_ONLY_LIBRARY ON)
   add_subdirectory(3rdparty/exported/snmalloc EXCLUDE_FROM_ALL)
   set(SNMALLOC_COMPILE_OPTIONS "-mcx16")
+  set(SNMALLOC_INCLUDE_DIRS 3rdparty/exported/snmalloc/src)
   list(APPEND CCHOST_SOURCES src/host/snmalloc.cpp)
 endif()
 
@@ -244,7 +245,9 @@ add_san(cchost)
 target_compile_options(
   cchost PRIVATE ${COMPILE_LIBCXX} ${SNMALLOC_COMPILE_OPTIONS}
 )
-target_include_directories(cchost PRIVATE ${CCF_GENERATED_DIR})
+target_include_directories(
+  cchost PRIVATE ${CCF_GENERATED_DIR} ${SNMALLOC_INCLUDE_DIRS}
+)
 
 if(COMPILE_TARGET STREQUAL "snp")
   target_compile_definitions(cchost PUBLIC PLATFORM_SNP)
diff --git a/cgmanifest.json b/cgmanifest.json
index efca9ac007dd..6c6ef7ff602b 100644
--- a/cgmanifest.json
+++ b/cgmanifest.json
@@ -60,7 +60,7 @@
         "type": "git",
         "git": {
           "repositoryUrl": "https://github.com/microsoft/snmalloc",
-          "commitHash": "dc1268886a5d49d38a54e5d1402b5924a71fee0b"
+          "commitHash": "564c88b07c53728ec90a88d7d34d0f74d3b0bfff"
         }
       }
     },
diff --git a/src/host/snmalloc.cpp b/src/host/snmalloc.cpp
index 6e5742950d8c..92ec79a6735e 100644
--- a/src/host/snmalloc.cpp
+++ b/src/host/snmalloc.cpp
@@ -2,9 +2,11 @@
 // Licensed under the Apache 2.0 License.
 
 #define NO_BOOTSTRAP_ALLOCATOR
+#define SNMALLOC_USE_WAIT_ON_ADDRESS 0
 
 #ifndef NDEBUG
 #  define NDEBUG
 #endif
 
+#include "snmalloc/src/snmalloc/override/malloc.cc"
 #include "snmalloc/src/snmalloc/override/new.cc"
\ No newline at end of file