diff --git a/src/snmalloc/aal/aal.h b/src/snmalloc/aal/aal.h
index dcdc55d98..5014a7296 100644
--- a/src/snmalloc/aal/aal.h
+++ b/src/snmalloc/aal/aal.h
@@ -204,9 +204,6 @@ namespace snmalloc
     static SNMALLOC_FAST_PATH CapPtr<T, BOut>
     capptr_bound(CapPtr<U, BIn> a, size_t size) noexcept
     {
-      static_assert(
-        BIn::spatial > capptr::dimension::Spatial::Alloc,
-        "Refusing to re-bound Spatial::Alloc CapPtr");
       static_assert(
         capptr::is_spatial_refinement<BIn, BOut>(),
         "capptr_bound must preserve non-spatial CapPtr dimensions");
diff --git a/src/snmalloc/aal/aal_cheri.h b/src/snmalloc/aal/aal_cheri.h
index 4a4acd379..84f11c038 100644
--- a/src/snmalloc/aal/aal_cheri.h
+++ b/src/snmalloc/aal/aal_cheri.h
@@ -69,9 +69,6 @@ namespace snmalloc
     static SNMALLOC_FAST_PATH CapPtr<T, BOut>
     capptr_bound(CapPtr<U, BIn> a, size_t size) noexcept
     {
-      static_assert(
-        BIn::spatial > capptr::dimension::Spatial::Alloc,
-        "Refusing to re-bound Spatial::Alloc CapPtr");
       static_assert(
         capptr::is_spatial_refinement<BIn, BOut>(),
         "capptr_bound must preserve non-spatial CapPtr dimensions");
@@ -87,8 +84,11 @@ namespace snmalloc
 
       void* pb = __builtin_cheri_bounds_set_exact(a.unsafe_ptr(), size);
 
-      SNMALLOC_ASSERT(
-        __builtin_cheri_tag_get(pb) && "capptr_bound exactness failed.");
+      SNMALLOC_ASSERT_MSG(
+        __builtin_cheri_tag_get(pb),
+        "capptr_bound exactness failed. {} of size {}",
+        a.unsafe_ptr(),
+        size);
 
       return CapPtr<T, BOut>::unsafe_from(static_cast<T*>(pb));
     }
diff --git a/src/snmalloc/backend/backend.h b/src/snmalloc/backend/backend.h
index d220a080a..ce5e757ed 100644
--- a/src/snmalloc/backend/backend.h
+++ b/src/snmalloc/backend/backend.h
@@ -70,6 +70,17 @@ namespace snmalloc
         Aal::capptr_bound<void, capptr::bounds::AllocFull>(p, size));
     }
 
+    /**
+     * Returns unused meta-data to the system.  This must have come from a call
+     * to alloc_meta_data, but can be a sub-range of the original allocation.
+     */
+    static void dealloc_meta_data(
+      LocalState& local_state, capptr::Alloc<void> p, size_t size)
+    {
+      auto arena = Authmap::amplify(p);
+      local_state.get_meta_range().dealloc_range(arena, size);
+    }
+
     /**
      * Returns a chunk of memory with alignment and size of `size`, and a
      * block containing metadata about the slab.
diff --git a/src/snmalloc/backend_helpers/range_helpers.h b/src/snmalloc/backend_helpers/range_helpers.h
index 076b9fd74..f1a82baf2 100644
--- a/src/snmalloc/backend_helpers/range_helpers.h
+++ b/src/snmalloc/backend_helpers/range_helpers.h
@@ -160,5 +160,4 @@ namespace snmalloc
       }
     }
   };
-
 } // namespace snmalloc
diff --git a/src/snmalloc/backend_helpers/smallbuddyrange.h b/src/snmalloc/backend_helpers/smallbuddyrange.h
index 83796e1ec..2a3f3a34c 100644
--- a/src/snmalloc/backend_helpers/smallbuddyrange.h
+++ b/src/snmalloc/backend_helpers/smallbuddyrange.h
@@ -244,7 +244,6 @@ namespace snmalloc
 
       void dealloc_range(CapPtr<void, ChunkBounds> base, size_t size)
       {
-        SNMALLOC_ASSERT(bits::is_pow2(size));
         add_range(base, size);
       }
     };
diff --git a/src/snmalloc/mem/corealloc.h b/src/snmalloc/mem/corealloc.h
index c7fc79b72..cb70f5d77 100644
--- a/src/snmalloc/mem/corealloc.h
+++ b/src/snmalloc/mem/corealloc.h
@@ -555,8 +555,11 @@ namespace snmalloc
     /**
      * Initialiser, shared code between the constructors for different
      * configurations.
+     *
+     * spare is the amount of space directly after the allocator that is
+     * reserved as meta-data, but is not required by this CoreAllocator.
      */
-    void init()
+    void init(Range<capptr::bounds::Alloc>& spare)
     {
 #ifdef SNMALLOC_TRACING
       message<1024>("Making an allocator.");
@@ -566,6 +569,20 @@ namespace snmalloc
       // This must occur before any freelists are constructed.
       entropy.init<typename Config::Pal>();
 
+      if (spare.length != 0)
+      {
+        /*
+         * Seed this frontend's private metadata allocation cache with any
+         * excess space from the metadata allocation holding the frontend
+         * Allocator object itself.  This alleviates thundering herd
+         * contention on the backend during startup: each slab opened now
+         * makes one trip to the backend, for the slab itself, rather than
+         * two, for the slab and its metadata.
+         */
+        Config::Backend::dealloc_meta_data(
+          get_backend_local_state(), spare.base, spare.length);
+      }
+
       // Ignoring stats for now.
       //      stats().start();
 
@@ -597,26 +614,36 @@ namespace snmalloc
     /**
      * Constructor for the case that the core allocator owns the local state.
      * SFINAE disabled if the allocator does not own the local state.
+     *
+     * spare is the amount of space directly after the allocator that is
+     * reserved as meta-data, but is not required by this CoreAllocator.
      */
     template<
       typename Config_ = Config,
       typename = std::enable_if_t<Config_::Options.CoreAllocOwnsLocalState>>
-    CoreAllocator(LocalCache* cache) : attached_cache(cache)
+    CoreAllocator(Range<capptr::bounds::Alloc>& spare, LocalCache* cache)
+    : attached_cache(cache)
     {
-      init();
+      init(spare);
     }
 
     /**
      * Constructor for the case that the core allocator does not owns the local
      * state. SFINAE disabled if the allocator does own the local state.
+     *
+     * spare is the amount of space directly after the allocator that is
+     * reserved as meta-data, but is not required by this CoreAllocator.
      */
     template<
       typename Config_ = Config,
       typename = std::enable_if_t<!Config_::Options.CoreAllocOwnsLocalState>>
-    CoreAllocator(LocalCache* cache, LocalState* backend = nullptr)
+    CoreAllocator(
+      Range<capptr::bounds::Alloc>& spare,
+      LocalCache* cache,
+      LocalState* backend = nullptr)
     : backend_state(backend), attached_cache(cache)
     {
-      init();
+      init(spare);
     }
 
     /**
diff --git a/src/snmalloc/mem/pool.h b/src/snmalloc/mem/pool.h
index 36737207d..cbcbdb12d 100644
--- a/src/snmalloc/mem/pool.h
+++ b/src/snmalloc/mem/pool.h
@@ -141,16 +141,26 @@ namespace snmalloc
         }
       }
 
+      size_t request_size = bits::next_pow2(sizeof(T));
+      size_t round_sizeof = Aal::capptr_size_round(sizeof(T));
+      size_t spare = request_size - round_sizeof;
+
       auto raw =
-        Config::Backend::template alloc_meta_data<T>(nullptr, sizeof(T));
+        Config::Backend::template alloc_meta_data<T>(nullptr, request_size);
 
       if (raw == nullptr)
       {
         Config::Pal::error("Failed to initialise thread local allocator.");
       }
 
-      auto p = capptr::Alloc<T>::unsafe_from(new (raw.unsafe_ptr())
-                                               T(std::forward<Args>(args)...));
+      capptr::Alloc<void> spare_start = pointer_offset(raw, round_sizeof);
+      Range<capptr::bounds::Alloc> r{spare_start, spare};
+
+      auto p = capptr::Alloc<T>::unsafe_from(
+        new (raw.unsafe_ptr()) T(r, std::forward<Args>(args)...));
+
+      // Remove excess from the permissions.
+      p = Aal::capptr_bound<T, capptr::bounds::Alloc>(p, round_sizeof);
 
       FlagLock f(pool.lock);
       p->list_next = pool.list;
diff --git a/src/snmalloc/mem/pooled.h b/src/snmalloc/mem/pooled.h
index a812bc924..7fb0ce33e 100644
--- a/src/snmalloc/mem/pooled.h
+++ b/src/snmalloc/mem/pooled.h
@@ -5,9 +5,24 @@
 
 namespace snmalloc
 {
+  template<SNMALLOC_CONCEPT(capptr::IsBound) bounds>
+  struct Range
+  {
+    CapPtr<void, bounds> base;
+    size_t length;
+  };
+
   template<class T>
   class PoolState;
 
+  /**
+   * Required to be implemented by all types that are pooled.
+   *
+   * The constructor of any inherited type must take a Range& as its first
+   * argument.  This represents the leftover from pool allocation rounding up to
+   * the nearest power of 2. It is valid to ignore this argument, but can be
+   * used to optimise meta-data usage at startup.
+   */
   template<class T>
   class Pooled
   {
diff --git a/src/test/func/pool/pool.cc b/src/test/func/pool/pool.cc
index 7eeff8743..600118d58 100644
--- a/src/test/func/pool/pool.cc
+++ b/src/test/func/pool/pool.cc
@@ -11,7 +11,7 @@ struct PoolAEntry : Pooled<PoolAEntry>
 {
   int field;
 
-  PoolAEntry() : field(1){};
+  PoolAEntry(Range<capptr::bounds::Alloc>&) : field(1){};
 };
 
 using PoolA = Pool<PoolAEntry, Alloc::Config>;
@@ -20,8 +20,8 @@ struct PoolBEntry : Pooled<PoolBEntry>
 {
   int field;
 
-  PoolBEntry() : field(0){};
-  PoolBEntry(int f) : field(f){};
+  PoolBEntry(Range<capptr::bounds::Alloc>&) : field(0){};
+  PoolBEntry(Range<capptr::bounds::Alloc>&, int f) : field(f){};
 };
 
 using PoolB = Pool<PoolBEntry, Alloc::Config>;
@@ -30,7 +30,7 @@ struct PoolLargeEntry : Pooled<PoolLargeEntry>
 {
   std::array<int, 2'000'000> payload;
 
-  PoolLargeEntry()
+  PoolLargeEntry(Range<capptr::bounds::Alloc>&)
   {
     printf(".");
     fflush(stdout);
@@ -48,7 +48,7 @@ struct PoolSortEntry : Pooled<PoolSortEntry<order>>
 {
   int field;
 
-  PoolSortEntry(int f) : field(f){};
+  PoolSortEntry(Range<capptr::bounds::Alloc>&, int f) : field(f){};
 };
 
 template<bool order>
diff --git a/src/test/perf/startup/startup.cc b/src/test/perf/startup/startup.cc
new file mode 100644
index 000000000..d1e999a2f
--- /dev/null
+++ b/src/test/perf/startup/startup.cc
@@ -0,0 +1,94 @@
+#include "test/opt.h"
+#include "test/setup.h"
+#include "test/usage.h"
+#include "test/xoroshiro.h"
+
+#include <iostream>
+#include <snmalloc/snmalloc.h>
+#include <thread>
+#include <vector>
+
+using namespace snmalloc;
+
+std::vector<uint64_t> counters{};
+
+template<typename F>
+class ParallelTest
+{
+private:
+  std::atomic<bool> flag = false;
+  std::atomic<size_t> ready = 0;
+  uint64_t start;
+  uint64_t end;
+  std::atomic<size_t> complete = 0;
+  size_t cores;
+  F f;
+
+  void run(size_t id)
+  {
+    auto prev = ready.fetch_add(1);
+    if (prev + 1 == cores)
+    {
+      start = Aal::tick();
+      flag = true;
+    }
+    while (!flag)
+      Aal::pause();
+
+    f(id);
+
+    prev = complete.fetch_add(1);
+    if (prev + 1 == cores)
+    {
+      end = Aal::tick();
+    }
+  }
+
+public:
+  ParallelTest(F&& f, size_t cores) : cores(cores), f(std::forward<F>(f))
+  {
+    std::thread* t = new std::thread[cores];
+
+    for (size_t i = 0; i < cores; i++)
+    {
+      t[i] = std::thread(&ParallelTest::run, this, i);
+    }
+    // Wait for all the threads.
+    for (size_t i = 0; i < cores; i++)
+    {
+      t[i].join();
+    }
+
+    delete[] t;
+  }
+
+  uint64_t time()
+  {
+    return end - start;
+  }
+};
+
+int main()
+{
+  counters.resize(std::thread::hardware_concurrency());
+
+  ParallelTest test(
+    [](size_t id) {
+      auto start = Aal::tick();
+      auto& alloc = snmalloc::ThreadAlloc::get();
+      alloc.dealloc(alloc.alloc(1));
+      auto end = Aal::tick();
+      counters[id] = end - start;
+    },
+    counters.size());
+
+  std::cout << "Taken: " << test.time() << std::endl;
+  std::sort(counters.begin(), counters.end());
+  uint64_t start = 0;
+  for (auto counter : counters)
+  {
+    std::cout << "Thread time " << counter << " (" << counter - start << ")"
+              << std::endl;
+    start = counter;
+  }
+}
\ No newline at end of file