diff --git a/src/snmalloc/aal/aal.h b/src/snmalloc/aal/aal.h index dcdc55d98..5014a7296 100644 --- a/src/snmalloc/aal/aal.h +++ b/src/snmalloc/aal/aal.h @@ -204,9 +204,6 @@ namespace snmalloc static SNMALLOC_FAST_PATH CapPtr capptr_bound(CapPtr a, size_t size) noexcept { - static_assert( - BIn::spatial > capptr::dimension::Spatial::Alloc, - "Refusing to re-bound Spatial::Alloc CapPtr"); static_assert( capptr::is_spatial_refinement(), "capptr_bound must preserve non-spatial CapPtr dimensions"); diff --git a/src/snmalloc/aal/aal_cheri.h b/src/snmalloc/aal/aal_cheri.h index 4a4acd379..84f11c038 100644 --- a/src/snmalloc/aal/aal_cheri.h +++ b/src/snmalloc/aal/aal_cheri.h @@ -69,9 +69,6 @@ namespace snmalloc static SNMALLOC_FAST_PATH CapPtr capptr_bound(CapPtr a, size_t size) noexcept { - static_assert( - BIn::spatial > capptr::dimension::Spatial::Alloc, - "Refusing to re-bound Spatial::Alloc CapPtr"); static_assert( capptr::is_spatial_refinement(), "capptr_bound must preserve non-spatial CapPtr dimensions"); @@ -87,8 +84,11 @@ namespace snmalloc void* pb = __builtin_cheri_bounds_set_exact(a.unsafe_ptr(), size); - SNMALLOC_ASSERT( - __builtin_cheri_tag_get(pb) && "capptr_bound exactness failed."); + SNMALLOC_ASSERT_MSG( + __builtin_cheri_tag_get(pb), + "capptr_bound exactness failed. {} of size {}", + a.unsafe_ptr(), + size); return CapPtr::unsafe_from(static_cast(pb)); } diff --git a/src/snmalloc/backend/backend.h b/src/snmalloc/backend/backend.h index d220a080a..ce5e757ed 100644 --- a/src/snmalloc/backend/backend.h +++ b/src/snmalloc/backend/backend.h @@ -70,6 +70,17 @@ namespace snmalloc Aal::capptr_bound(p, size)); } + /** + * Returns unused meta-data to the system. This must have come from a call + * to alloc_meta_data, but can be a sub-range of the original allocation. + */ + static void dealloc_meta_data( + LocalState& local_state, capptr::Alloc p, size_t size) + { + auto arena = Authmap::amplify(p); + local_state.get_meta_range().dealloc_range(arena, size); + } + /** * Returns a chunk of memory with alignment and size of `size`, and a * block containing metadata about the slab. diff --git a/src/snmalloc/backend_helpers/range_helpers.h b/src/snmalloc/backend_helpers/range_helpers.h index 076b9fd74..f1a82baf2 100644 --- a/src/snmalloc/backend_helpers/range_helpers.h +++ b/src/snmalloc/backend_helpers/range_helpers.h @@ -160,5 +160,4 @@ namespace snmalloc } } }; - } // namespace snmalloc diff --git a/src/snmalloc/backend_helpers/smallbuddyrange.h b/src/snmalloc/backend_helpers/smallbuddyrange.h index 83796e1ec..2a3f3a34c 100644 --- a/src/snmalloc/backend_helpers/smallbuddyrange.h +++ b/src/snmalloc/backend_helpers/smallbuddyrange.h @@ -244,7 +244,6 @@ namespace snmalloc void dealloc_range(CapPtr base, size_t size) { - SNMALLOC_ASSERT(bits::is_pow2(size)); add_range(base, size); } }; diff --git a/src/snmalloc/mem/corealloc.h b/src/snmalloc/mem/corealloc.h index c7fc79b72..cb70f5d77 100644 --- a/src/snmalloc/mem/corealloc.h +++ b/src/snmalloc/mem/corealloc.h @@ -555,8 +555,11 @@ namespace snmalloc /** * Initialiser, shared code between the constructors for different * configurations. + * + * spare is the amount of space directly after the allocator that is + * reserved as meta-data, but is not required by this CoreAllocator. */ - void init() + void init(Range& spare) { #ifdef SNMALLOC_TRACING message<1024>("Making an allocator."); @@ -566,6 +569,20 @@ namespace snmalloc // This must occur before any freelists are constructed. entropy.init(); + if (spare.length != 0) + { + /* + * Seed this frontend's private metadata allocation cache with any + * excess space from the metadata allocation holding the frontend + * Allocator object itself. This alleviates thundering herd + * contention on the backend during startup: each slab opened now + * makes one trip to the backend, for the slab itself, rather than + * two, for the slab and its metadata. + */ + Config::Backend::dealloc_meta_data( + get_backend_local_state(), spare.base, spare.length); + } + // Ignoring stats for now. // stats().start(); @@ -597,26 +614,36 @@ namespace snmalloc /** * Constructor for the case that the core allocator owns the local state. * SFINAE disabled if the allocator does not own the local state. + * + * spare is the amount of space directly after the allocator that is + * reserved as meta-data, but is not required by this CoreAllocator. */ template< typename Config_ = Config, typename = std::enable_if_t> - CoreAllocator(LocalCache* cache) : attached_cache(cache) + CoreAllocator(Range& spare, LocalCache* cache) + : attached_cache(cache) { - init(); + init(spare); } /** * Constructor for the case that the core allocator does not owns the local * state. SFINAE disabled if the allocator does own the local state. + * + * spare is the amount of space directly after the allocator that is + * reserved as meta-data, but is not required by this CoreAllocator. */ template< typename Config_ = Config, typename = std::enable_if_t> - CoreAllocator(LocalCache* cache, LocalState* backend = nullptr) + CoreAllocator( + Range& spare, + LocalCache* cache, + LocalState* backend = nullptr) : backend_state(backend), attached_cache(cache) { - init(); + init(spare); } /** diff --git a/src/snmalloc/mem/pool.h b/src/snmalloc/mem/pool.h index 36737207d..cbcbdb12d 100644 --- a/src/snmalloc/mem/pool.h +++ b/src/snmalloc/mem/pool.h @@ -141,16 +141,26 @@ namespace snmalloc } } + size_t request_size = bits::next_pow2(sizeof(T)); + size_t round_sizeof = Aal::capptr_size_round(sizeof(T)); + size_t spare = request_size - round_sizeof; + auto raw = - Config::Backend::template alloc_meta_data(nullptr, sizeof(T)); + Config::Backend::template alloc_meta_data(nullptr, request_size); if (raw == nullptr) { Config::Pal::error("Failed to initialise thread local allocator."); } - auto p = capptr::Alloc::unsafe_from(new (raw.unsafe_ptr()) - T(std::forward(args)...)); + capptr::Alloc spare_start = pointer_offset(raw, round_sizeof); + Range r{spare_start, spare}; + + auto p = capptr::Alloc::unsafe_from( + new (raw.unsafe_ptr()) T(r, std::forward(args)...)); + + // Remove excess from the permissions. + p = Aal::capptr_bound(p, round_sizeof); FlagLock f(pool.lock); p->list_next = pool.list; diff --git a/src/snmalloc/mem/pooled.h b/src/snmalloc/mem/pooled.h index a812bc924..7fb0ce33e 100644 --- a/src/snmalloc/mem/pooled.h +++ b/src/snmalloc/mem/pooled.h @@ -5,9 +5,24 @@ namespace snmalloc { + template + struct Range + { + CapPtr base; + size_t length; + }; + template class PoolState; + /** + * Required to be implemented by all types that are pooled. + * + * The constructor of any inherited type must take a Range& as its first + * argument. This represents the leftover from pool allocation rounding up to + * the nearest power of 2. It is valid to ignore this argument, but can be + * used to optimise meta-data usage at startup. + */ template class Pooled { diff --git a/src/test/func/pool/pool.cc b/src/test/func/pool/pool.cc index 7eeff8743..600118d58 100644 --- a/src/test/func/pool/pool.cc +++ b/src/test/func/pool/pool.cc @@ -11,7 +11,7 @@ struct PoolAEntry : Pooled { int field; - PoolAEntry() : field(1){}; + PoolAEntry(Range&) : field(1){}; }; using PoolA = Pool; @@ -20,8 +20,8 @@ struct PoolBEntry : Pooled { int field; - PoolBEntry() : field(0){}; - PoolBEntry(int f) : field(f){}; + PoolBEntry(Range&) : field(0){}; + PoolBEntry(Range&, int f) : field(f){}; }; using PoolB = Pool; @@ -30,7 +30,7 @@ struct PoolLargeEntry : Pooled { std::array payload; - PoolLargeEntry() + PoolLargeEntry(Range&) { printf("."); fflush(stdout); @@ -48,7 +48,7 @@ struct PoolSortEntry : Pooled> { int field; - PoolSortEntry(int f) : field(f){}; + PoolSortEntry(Range&, int f) : field(f){}; }; template diff --git a/src/test/perf/startup/startup.cc b/src/test/perf/startup/startup.cc new file mode 100644 index 000000000..d1e999a2f --- /dev/null +++ b/src/test/perf/startup/startup.cc @@ -0,0 +1,94 @@ +#include "test/opt.h" +#include "test/setup.h" +#include "test/usage.h" +#include "test/xoroshiro.h" + +#include +#include +#include +#include + +using namespace snmalloc; + +std::vector counters{}; + +template +class ParallelTest +{ +private: + std::atomic flag = false; + std::atomic ready = 0; + uint64_t start; + uint64_t end; + std::atomic complete = 0; + size_t cores; + F f; + + void run(size_t id) + { + auto prev = ready.fetch_add(1); + if (prev + 1 == cores) + { + start = Aal::tick(); + flag = true; + } + while (!flag) + Aal::pause(); + + f(id); + + prev = complete.fetch_add(1); + if (prev + 1 == cores) + { + end = Aal::tick(); + } + } + +public: + ParallelTest(F&& f, size_t cores) : cores(cores), f(std::forward(f)) + { + std::thread* t = new std::thread[cores]; + + for (size_t i = 0; i < cores; i++) + { + t[i] = std::thread(&ParallelTest::run, this, i); + } + // Wait for all the threads. + for (size_t i = 0; i < cores; i++) + { + t[i].join(); + } + + delete[] t; + } + + uint64_t time() + { + return end - start; + } +}; + +int main() +{ + counters.resize(std::thread::hardware_concurrency()); + + ParallelTest test( + [](size_t id) { + auto start = Aal::tick(); + auto& alloc = snmalloc::ThreadAlloc::get(); + alloc.dealloc(alloc.alloc(1)); + auto end = Aal::tick(); + counters[id] = end - start; + }, + counters.size()); + + std::cout << "Taken: " << test.time() << std::endl; + std::sort(counters.begin(), counters.end()); + uint64_t start = 0; + for (auto counter : counters) + { + std::cout << "Thread time " << counter << " (" << counter - start << ")" + << std::endl; + start = counter; + } +} \ No newline at end of file