Skip to content

Commit

Permalink
Startup improvements (#639)
Browse files Browse the repository at this point in the history
* Benchmark for testing startup performance.

* Make pool pass spare space to pooled item

The pool will result in power of 2 allocations as it doesn't have a
local state when it is initially set up.

This commit passes this extra space to the constructor of the pooled
type, so that it can be feed into the freshly created allocator.

Co-authored-by: Nathaniel Wesley Filardo <nfilardo@microsoft.com>
  • Loading branch information
mjp41 and nwf-msr authored Sep 28, 2023
1 parent 126e77f commit 5543347
Show file tree
Hide file tree
Showing 10 changed files with 175 additions and 23 deletions.
3 changes: 0 additions & 3 deletions src/snmalloc/aal/aal.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,9 +204,6 @@ namespace snmalloc
static SNMALLOC_FAST_PATH CapPtr<T, BOut>
capptr_bound(CapPtr<U, BIn> a, size_t size) noexcept
{
static_assert(
BIn::spatial > capptr::dimension::Spatial::Alloc,
"Refusing to re-bound Spatial::Alloc CapPtr");
static_assert(
capptr::is_spatial_refinement<BIn, BOut>(),
"capptr_bound must preserve non-spatial CapPtr dimensions");
Expand Down
10 changes: 5 additions & 5 deletions src/snmalloc/aal/aal_cheri.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,6 @@ namespace snmalloc
static SNMALLOC_FAST_PATH CapPtr<T, BOut>
capptr_bound(CapPtr<U, BIn> a, size_t size) noexcept
{
static_assert(
BIn::spatial > capptr::dimension::Spatial::Alloc,
"Refusing to re-bound Spatial::Alloc CapPtr");
static_assert(
capptr::is_spatial_refinement<BIn, BOut>(),
"capptr_bound must preserve non-spatial CapPtr dimensions");
Expand All @@ -87,8 +84,11 @@ namespace snmalloc

void* pb = __builtin_cheri_bounds_set_exact(a.unsafe_ptr(), size);

SNMALLOC_ASSERT(
__builtin_cheri_tag_get(pb) && "capptr_bound exactness failed.");
SNMALLOC_ASSERT_MSG(
__builtin_cheri_tag_get(pb),
"capptr_bound exactness failed. {} of size {}",
a.unsafe_ptr(),
size);

return CapPtr<T, BOut>::unsafe_from(static_cast<T*>(pb));
}
Expand Down
11 changes: 11 additions & 0 deletions src/snmalloc/backend/backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,17 @@ namespace snmalloc
Aal::capptr_bound<void, capptr::bounds::AllocFull>(p, size));
}

/**
* Returns unused meta-data to the system. This must have come from a call
* to alloc_meta_data, but can be a sub-range of the original allocation.
*/
static void dealloc_meta_data(
LocalState& local_state, capptr::Alloc<void> p, size_t size)
{
auto arena = Authmap::amplify(p);
local_state.get_meta_range().dealloc_range(arena, size);
}

/**
* Returns a chunk of memory with alignment and size of `size`, and a
* block containing metadata about the slab.
Expand Down
1 change: 0 additions & 1 deletion src/snmalloc/backend_helpers/range_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,5 +160,4 @@ namespace snmalloc
}
}
};

} // namespace snmalloc
1 change: 0 additions & 1 deletion src/snmalloc/backend_helpers/smallbuddyrange.h
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,6 @@ namespace snmalloc

void dealloc_range(CapPtr<void, ChunkBounds> base, size_t size)
{
SNMALLOC_ASSERT(bits::is_pow2(size));
add_range(base, size);
}
};
Expand Down
37 changes: 32 additions & 5 deletions src/snmalloc/mem/corealloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -555,8 +555,11 @@ namespace snmalloc
/**
* Initialiser, shared code between the constructors for different
* configurations.
*
* spare is the amount of space directly after the allocator that is
* reserved as meta-data, but is not required by this CoreAllocator.
*/
void init()
void init(Range<capptr::bounds::Alloc>& spare)
{
#ifdef SNMALLOC_TRACING
message<1024>("Making an allocator.");
Expand All @@ -566,6 +569,20 @@ namespace snmalloc
// This must occur before any freelists are constructed.
entropy.init<typename Config::Pal>();

if (spare.length != 0)
{
/*
* Seed this frontend's private metadata allocation cache with any
* excess space from the metadata allocation holding the frontend
* Allocator object itself. This alleviates thundering herd
* contention on the backend during startup: each slab opened now
* makes one trip to the backend, for the slab itself, rather than
* two, for the slab and its metadata.
*/
Config::Backend::dealloc_meta_data(
get_backend_local_state(), spare.base, spare.length);
}

// Ignoring stats for now.
// stats().start();

Expand Down Expand Up @@ -597,26 +614,36 @@ namespace snmalloc
/**
* Constructor for the case that the core allocator owns the local state.
* SFINAE disabled if the allocator does not own the local state.
*
* spare is the amount of space directly after the allocator that is
* reserved as meta-data, but is not required by this CoreAllocator.
*/
template<
typename Config_ = Config,
typename = std::enable_if_t<Config_::Options.CoreAllocOwnsLocalState>>
CoreAllocator(LocalCache* cache) : attached_cache(cache)
CoreAllocator(Range<capptr::bounds::Alloc>& spare, LocalCache* cache)
: attached_cache(cache)
{
init();
init(spare);
}

/**
* Constructor for the case that the core allocator does not owns the local
* state. SFINAE disabled if the allocator does own the local state.
*
* spare is the amount of space directly after the allocator that is
* reserved as meta-data, but is not required by this CoreAllocator.
*/
template<
typename Config_ = Config,
typename = std::enable_if_t<!Config_::Options.CoreAllocOwnsLocalState>>
CoreAllocator(LocalCache* cache, LocalState* backend = nullptr)
CoreAllocator(
Range<capptr::bounds::Alloc>& spare,
LocalCache* cache,
LocalState* backend = nullptr)
: backend_state(backend), attached_cache(cache)
{
init();
init(spare);
}

/**
Expand Down
16 changes: 13 additions & 3 deletions src/snmalloc/mem/pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,16 +141,26 @@ namespace snmalloc
}
}

size_t request_size = bits::next_pow2(sizeof(T));
size_t round_sizeof = Aal::capptr_size_round(sizeof(T));
size_t spare = request_size - round_sizeof;

auto raw =
Config::Backend::template alloc_meta_data<T>(nullptr, sizeof(T));
Config::Backend::template alloc_meta_data<T>(nullptr, request_size);

if (raw == nullptr)
{
Config::Pal::error("Failed to initialise thread local allocator.");
}

auto p = capptr::Alloc<T>::unsafe_from(new (raw.unsafe_ptr())
T(std::forward<Args>(args)...));
capptr::Alloc<void> spare_start = pointer_offset(raw, round_sizeof);
Range<capptr::bounds::Alloc> r{spare_start, spare};

auto p = capptr::Alloc<T>::unsafe_from(
new (raw.unsafe_ptr()) T(r, std::forward<Args>(args)...));

// Remove excess from the permissions.
p = Aal::capptr_bound<T, capptr::bounds::Alloc>(p, round_sizeof);

FlagLock f(pool.lock);
p->list_next = pool.list;
Expand Down
15 changes: 15 additions & 0 deletions src/snmalloc/mem/pooled.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,24 @@

namespace snmalloc
{
template<SNMALLOC_CONCEPT(capptr::IsBound) bounds>
struct Range
{
CapPtr<void, bounds> base;
size_t length;
};

template<class T>
class PoolState;

/**
* Required to be implemented by all types that are pooled.
*
* The constructor of any inherited type must take a Range& as its first
* argument. This represents the leftover from pool allocation rounding up to
* the nearest power of 2. It is valid to ignore this argument, but can be
* used to optimise meta-data usage at startup.
*/
template<class T>
class Pooled
{
Expand Down
10 changes: 5 additions & 5 deletions src/test/func/pool/pool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ struct PoolAEntry : Pooled<PoolAEntry>
{
int field;

PoolAEntry() : field(1){};
PoolAEntry(Range<capptr::bounds::Alloc>&) : field(1){};
};

using PoolA = Pool<PoolAEntry, Alloc::Config>;
Expand All @@ -20,8 +20,8 @@ struct PoolBEntry : Pooled<PoolBEntry>
{
int field;

PoolBEntry() : field(0){};
PoolBEntry(int f) : field(f){};
PoolBEntry(Range<capptr::bounds::Alloc>&) : field(0){};
PoolBEntry(Range<capptr::bounds::Alloc>&, int f) : field(f){};
};

using PoolB = Pool<PoolBEntry, Alloc::Config>;
Expand All @@ -30,7 +30,7 @@ struct PoolLargeEntry : Pooled<PoolLargeEntry>
{
std::array<int, 2'000'000> payload;

PoolLargeEntry()
PoolLargeEntry(Range<capptr::bounds::Alloc>&)
{
printf(".");
fflush(stdout);
Expand All @@ -48,7 +48,7 @@ struct PoolSortEntry : Pooled<PoolSortEntry<order>>
{
int field;

PoolSortEntry(int f) : field(f){};
PoolSortEntry(Range<capptr::bounds::Alloc>&, int f) : field(f){};
};

template<bool order>
Expand Down
94 changes: 94 additions & 0 deletions src/test/perf/startup/startup.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#include "test/opt.h"
#include "test/setup.h"
#include "test/usage.h"
#include "test/xoroshiro.h"

#include <iostream>
#include <snmalloc/snmalloc.h>
#include <thread>
#include <vector>

using namespace snmalloc;

std::vector<uint64_t> counters{};

template<typename F>
class ParallelTest
{
private:
std::atomic<bool> flag = false;
std::atomic<size_t> ready = 0;
uint64_t start;
uint64_t end;
std::atomic<size_t> complete = 0;
size_t cores;
F f;

void run(size_t id)
{
auto prev = ready.fetch_add(1);
if (prev + 1 == cores)
{
start = Aal::tick();
flag = true;
}
while (!flag)
Aal::pause();

f(id);

prev = complete.fetch_add(1);
if (prev + 1 == cores)
{
end = Aal::tick();
}
}

public:
ParallelTest(F&& f, size_t cores) : cores(cores), f(std::forward<F>(f))
{
std::thread* t = new std::thread[cores];

for (size_t i = 0; i < cores; i++)
{
t[i] = std::thread(&ParallelTest::run, this, i);
}
// Wait for all the threads.
for (size_t i = 0; i < cores; i++)
{
t[i].join();
}

delete[] t;
}

uint64_t time()
{
return end - start;
}
};

int main()
{
counters.resize(std::thread::hardware_concurrency());

ParallelTest test(
[](size_t id) {
auto start = Aal::tick();
auto& alloc = snmalloc::ThreadAlloc::get();
alloc.dealloc(alloc.alloc(1));
auto end = Aal::tick();
counters[id] = end - start;
},
counters.size());

std::cout << "Taken: " << test.time() << std::endl;
std::sort(counters.begin(), counters.end());
uint64_t start = 0;
for (auto counter : counters)
{
std::cout << "Thread time " << counter << " (" << counter - start << ")"
<< std::endl;
start = counter;
}
}

0 comments on commit 5543347

Please sign in to comment.