Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: BatchIt #637

Closed
wants to merge 17 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ endif()
set(SNMALLOC_MIN_ALLOC_SIZE "" CACHE STRING "Minimum allocation bytes (power of 2)")
set(SNMALLOC_MIN_ALLOC_STEP_SIZE "" CACHE STRING "Minimum allocation step (power of 2)")

set(SNMALLOC_DEALLOC_BATCH_RING_ASSOC "" CACHE STRING "Associativity of deallocation batch cache; 0 to disable")
set(SNMALLOC_DEALLOC_BATCH_RING_SET_BITS "" CACHE STRING "Logarithm of number of deallocation batch cache associativity sets")

if(MSVC AND SNMALLOC_STATIC_LIBRARY AND (SNMALLOC_STATIC_LIBRARY_PREFIX STREQUAL ""))
message(FATAL_ERROR "Empty static library prefix not supported on MSVC")
endif()
Expand Down Expand Up @@ -251,6 +254,8 @@ if (SNMALLOC_NO_REALLOCARR)
endif()
add_as_define_value(SNMALLOC_MIN_ALLOC_SIZE)
add_as_define_value(SNMALLOC_MIN_ALLOC_STEP_SIZE)
add_as_define_value(SNMALLOC_DEALLOC_BATCH_RING_ASSOC)
add_as_define_value(SNMALLOC_DEALLOC_BATCH_RING_SET_BITS)

target_compile_definitions(snmalloc INTERFACE $<$<BOOL:CONST_QUALIFIED_MALLOC_USABLE_SIZE>:MALLOC_USABLE_SIZE_QUALIFIER=const>)

Expand Down
8 changes: 8 additions & 0 deletions src/snmalloc/backend/backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,14 @@ namespace snmalloc
local_state.get_object_range()->dealloc_range(arena, size);
}

SNMALLOC_FAST_PATH static capptr::Alloc<void>
capptr_rederive_alloc(capptr::Alloc<void> a, size_t objsize)
{
return capptr_to_user_address_control(
Aal::capptr_bound<void, capptr::bounds::AllocFull>(
Authmap::amplify(a), objsize));
}

template<bool potentially_out_of_range = false>
SNMALLOC_FAST_PATH static const PagemapEntry& get_metaentry(address_t p)
{
Expand Down
41 changes: 40 additions & 1 deletion src/snmalloc/ds/allocconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ namespace snmalloc
#if defined(SNMALLOC_MIN_ALLOC_SIZE)
SNMALLOC_MIN_ALLOC_SIZE;
#else
2 * sizeof(void*);
sizeof(void*) * (mitigations(freelist_backward_edge) ? 4 : 2);
#endif

// Minimum slab size.
Expand Down Expand Up @@ -120,6 +120,45 @@ namespace snmalloc
static constexpr size_t REMOTE_SLOTS = 1 << REMOTE_SLOT_BITS;
static constexpr size_t REMOTE_MASK = REMOTE_SLOTS - 1;

#if defined(SNMALLOC_DEALLOC_BATCH_RING_ASSOC)
static constexpr size_t DEALLOC_BATCH_RING_ASSOC =
SNMALLOC_DEALLOC_BATCH_RING_ASSOC;
#else
# if defined(__has_cpp_attribute)
# if ( \
__has_cpp_attribute(msvc::no_unique_address) && \
(__cplusplus >= 201803L || _MSVC_LANG >= 201803L)) || \
__has_cpp_attribute(no_unique_address)
// For C++20 or later, we do have [[no_unique_address]] and so can also do
// batching if we aren't turning on the backward-pointer mitigations
static constexpr size_t DEALLOC_BATCH_MIN_ALLOC_WORDS =
mitigations(freelist_backward_edge) ? 4 : 2;
# else
// For C++17, we don't have [[no_unique_address]] and so we always end up
// needing all four pointers' worth of space (because BatchedRemoteMessage has
// two freelist::Object::T<> links within, each of which will have two fields
// and will be padded to two pointers).
static constexpr size_t DEALLOC_BATCH_MIN_ALLOC_WORDS = 4;
# endif
# else
// If we don't even have the feature test macro, we're C++17 or earlier.
static constexpr size_t DEALLOC_BATCH_MIN_ALLOC_WORDS = 4;
# endif

static constexpr size_t DEALLOC_BATCH_RING_ASSOC =
(MIN_ALLOC_SIZE >= (DEALLOC_BATCH_MIN_ALLOC_WORDS * sizeof(void*))) ? 2 : 0;
#endif

#if defined(SNMALLOC_DEALLOC_BATCH_RING_SET_BITS)
static constexpr size_t DEALLOC_BATCH_RING_SET_BITS =
SNMALLOC_DEALLOC_BATCH_RING_SET_BITS;
#else
static constexpr size_t DEALLOC_BATCH_RING_SET_BITS = 3;
#endif

static constexpr size_t DEALLOC_BATCH_RINGS =
DEALLOC_BATCH_RING_ASSOC * bits::one_at_bit(DEALLOC_BATCH_RING_SET_BITS);

static_assert(
INTERMEDIATE_BITS < MIN_ALLOC_STEP_BITS,
"INTERMEDIATE_BITS must be less than MIN_ALLOC_BITS");
Expand Down
86 changes: 71 additions & 15 deletions src/snmalloc/mem/corealloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -503,13 +503,11 @@ namespace snmalloc
SNMALLOC_FAST_PATH_LAMBDA {
return capptr_domesticate<Config>(local_state, p);
};
auto cb = [this,
&need_post](freelist::HeadPtr msg) SNMALLOC_FAST_PATH_LAMBDA {
auto cb = [this, domesticate, &need_post](
capptr::Alloc<RemoteMessage> msg) SNMALLOC_FAST_PATH_LAMBDA {
auto& entry =
Config::Backend::template get_metaentry(snmalloc::address_cast(msg));

handle_dealloc_remote(entry, msg.as_void(), need_post);

handle_dealloc_remote(entry, msg, need_post, domesticate);
return true;
};

Expand Down Expand Up @@ -548,31 +546,52 @@ namespace snmalloc
*
* need_post will be set to true, if capacity is exceeded.
*/
template<typename Domesticator_queue>
void handle_dealloc_remote(
const PagemapEntry& entry,
CapPtr<void, capptr::bounds::Alloc> p,
bool& need_post)
capptr::Alloc<RemoteMessage> msg,
bool& need_post,
Domesticator_queue domesticate)
{
// TODO this needs to not double count stats
// TODO this needs to not double revoke if using MTE
// TODO thread capabilities?

if (SNMALLOC_LIKELY(entry.get_remote() == public_state()))
{
dealloc_local_object(p, entry);
return;
auto unreturned =
dealloc_local_objects_fast(entry, msg, entropy, domesticate);
if (SNMALLOC_UNLIKELY(unreturned.needs_work()))
{
dealloc_local_object_slow(msg.as_void(), entry);
if (unreturned.batch_size() > 0)
{
auto meta = entry.get_slab_metadata();
do
{
unreturned = meta->return_objects(unreturned.batch_size());
if (unreturned.needs_work())
dealloc_local_object_slower(entry, meta);
} while (unreturned.batch_size() > 0);
}
}
}
else
{
auto nelem = RemoteMessage::template ring_size<Config>(
msg,
freelist::Object::key_root,
entry.get_slab_metadata()->as_key_tweak(),
domesticate);
if (
!need_post &&
!attached_cache->remote_dealloc_cache.reserve_space(entry))
!attached_cache->remote_dealloc_cache.reserve_space(entry, nelem))
{
need_post = true;
}
attached_cache->remote_dealloc_cache
.template dealloc<sizeof(CoreAllocator)>(
entry.get_remote()->trunc_id(), p.as_void());
.template forward<sizeof(CoreAllocator)>(
entry.get_remote()->trunc_id(), msg);
}
}

Expand Down Expand Up @@ -736,6 +755,43 @@ namespace snmalloc
return SNMALLOC_LIKELY(!meta->return_object());
}

template<typename Domesticator>
SNMALLOC_FAST_PATH static auto dealloc_local_objects_fast(
const PagemapEntry& entry,
capptr::Alloc<RemoteMessage> msg,
LocalEntropy& entropy,
Domesticator domesticate)
{
auto meta = entry.get_slab_metadata();

SNMALLOC_ASSERT(!meta->is_unused());

snmalloc_check_client(
mitigations(sanity_checks),
is_start_of_object(entry.get_sizeclass(), address_cast(msg)),
"Not deallocating start of an object");

size_t objsize = sizeclass_full_to_size(entry.get_sizeclass());

auto [curr, length] = RemoteMessage::template open_free_ring<Config>(
msg,
objsize,
freelist::Object::key_root,
meta->as_key_tweak(),
domesticate);

// Update the head and the next pointer in the free list.
meta->free_queue.append_segment(
curr,
msg.template as_reinterpret<freelist::Object::T<>>(),
length,
freelist::Object::key_root,
meta->as_key_tweak(),
entropy);

return meta->return_objects(length);
}

template<ZeroMem zero_mem>
SNMALLOC_SLOW_PATH capptr::Alloc<void>
small_alloc(smallsizeclass_t sizeclass, freelist::Iter<>& fast_free_list)
Expand Down Expand Up @@ -871,11 +927,11 @@ namespace snmalloc

if (destroy_queue)
{
auto cb = [this](capptr::Alloc<void> p) {
auto cb = [this, domesticate](capptr::Alloc<RemoteMessage> m) {
bool need_post = true; // Always going to post, so ignore.
const PagemapEntry& entry =
Config::Backend::get_metaentry(snmalloc::address_cast(p));
handle_dealloc_remote(entry, p.as_void(), need_post);
Config::Backend::get_metaentry(snmalloc::address_cast(m));
handle_dealloc_remote(entry, m, need_post, domesticate);
};

message_queue().destroy_and_iterate(domesticate, cb);
Expand Down
32 changes: 32 additions & 0 deletions src/snmalloc/mem/freelist.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@

namespace snmalloc
{
class BatchedRemoteMessage;

static constexpr address_t NO_KEY_TWEAK = 0;

/**
Expand Down Expand Up @@ -139,6 +141,8 @@ namespace snmalloc

friend class Object;

friend class ::snmalloc::BatchedRemoteMessage;

class Empty
{
public:
Expand Down Expand Up @@ -916,6 +920,34 @@ namespace snmalloc
return {first, last};
}

/**
* Put back an extracted segment from a builder using the same key.
*
* The caller must tell us how many elements are involved.
*/
void append_segment(
Object::BHeadPtr<BView, BQueue> first,
Object::BHeadPtr<BView, BQueue> last,
uint16_t size,
const FreeListKey& key,
address_t key_tweak,
LocalEntropy& entropy)
{
uint32_t index;
if constexpr (RANDOM)
index = entropy.next_bit();
else
index = 0;

if constexpr (TRACK_LENGTH)
length[index] += size;
else
UNUSED(size);

Object::store_next(cast_end(index), first, key, key_tweak);
set_end(index, &(last->next_object));
}

template<typename Domesticator>
SNMALLOC_FAST_PATH void validate(
const FreeListKey& key, address_t key_tweak, Domesticator domesticate)
Expand Down
14 changes: 10 additions & 4 deletions src/snmalloc/mem/localalloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ namespace snmalloc
address_cast(entry.get_slab_metadata()));
#endif
local_cache.remote_dealloc_cache.template dealloc<sizeof(CoreAlloc)>(
entry.get_remote()->trunc_id(), p);
entry.get_slab_metadata(), p, &local_cache.entropy);
post_remote_cache();
return;
}
Expand Down Expand Up @@ -658,6 +658,12 @@ namespace snmalloc
return;
}

dealloc_remote(entry, p_tame);
}

SNMALLOC_SLOW_PATH void
dealloc_remote(const PagemapEntry& entry, capptr::Alloc<void> p_tame)
{
RemoteAllocator* remote = entry.get_remote();
if (SNMALLOC_LIKELY(remote != nullptr))
{
Expand All @@ -673,12 +679,12 @@ namespace snmalloc
if (local_cache.remote_dealloc_cache.reserve_space(entry))
{
local_cache.remote_dealloc_cache.template dealloc<sizeof(CoreAlloc)>(
remote->trunc_id(), p_tame);
entry.get_slab_metadata(), p_tame, &local_cache.entropy);
# ifdef SNMALLOC_TRACING
message<1024>(
"Remote dealloc fast {} ({}, {})",
p_raw,
alloc_size(p_raw),
address_cast(p_tame),
alloc_size(p_tame.unsafe_ptr()),
address_cast(entry.get_slab_metadata()));
# endif
return;
Expand Down
57 changes: 56 additions & 1 deletion src/snmalloc/mem/metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,55 @@ namespace snmalloc
return (--needed()) == 0;
}

class ReturnObjectsResult
{
friend FrontendSlabMetadata;

bool _needs;
uint16_t _batch;

static_assert(sizeof(_batch) * 8 > MAX_CAPACITY_BITS);

ReturnObjectsResult() : _needs(false), _batch(0) {}
ReturnObjectsResult(uint16_t n) : _needs(true), _batch(n) {}

public:
bool needs_work()
{
return _needs;
}
uint16_t batch_size()
{
return _batch;
}
};

/**
* A batch version of return_object. Returns up to the next threshold of
* objects all at once, which may leave objects unreturned.
*
* The ReturnObjectsResult will indicate that it `.needs_work()` to be done
* if a slow-path threshold has been hit. The `.batch_size()` indicates the
* number of objects yet to be returned (which will be nonzero only if
* `.needs_work()` is true, but may be zero even if there is work to be
* done).
*
* Unlike return_object(), the caller's slow-path must loop to retry any
* unreturned objects.
*/
ReturnObjectsResult return_objects(uint16_t n)
{
if (n >= needed())
{
n -= needed();
needed() = 0;
return ReturnObjectsResult(n);
}

needed() -= n;
return ReturnObjectsResult();
}

bool is_unused()
{
return needed() == 0;
Expand Down Expand Up @@ -605,7 +654,13 @@ namespace snmalloc

[[nodiscard]] SNMALLOC_FAST_PATH address_t as_key_tweak() const noexcept
{
return address_cast(this) / alignof(decltype(*this));
return as_key_tweak(address_cast(this));
}

[[nodiscard]] SNMALLOC_FAST_PATH static address_t
as_key_tweak(address_t self)
{
return self / alignof(FrontendSlabMetadata);
}

typename ClientMeta::DataRef get_meta_for_object(size_t index)
Expand Down
Loading
Loading