From 8798151bc06dbe3305531dc194e13099970db5b5 Mon Sep 17 00:00:00 2001 From: Eddy Ashton Date: Mon, 13 Jan 2025 11:18:23 +0000 Subject: [PATCH] Upgrade snmalloc from 0.6.2 to 0.7.0 (#6746) --- 3rdparty/exported/snmalloc/CMakeLists.txt | 67 ++- 3rdparty/exported/snmalloc/README.md | 3 +- .../exported/snmalloc/src/snmalloc/aal/aal.h | 43 +- .../snmalloc/src/snmalloc/aal/aal_arm.h | 5 +- .../snmalloc/src/snmalloc/aal/aal_cheri.h | 10 +- .../snmalloc/src/snmalloc/aal/aal_concept.h | 120 +++-- .../snmalloc/src/snmalloc/aal/aal_x86.h | 6 +- .../snmalloc/src/snmalloc/backend/backend.h | 52 ++- .../src/snmalloc/backend/fixedglobalconfig.h | 13 +- .../src/snmalloc/backend/globalconfig.h | 72 ++- .../snmalloc/backend/meta_protected_range.h | 13 +- .../backend_helpers/backend_helpers.h | 1 + .../src/snmalloc/backend_helpers/buddy.h | 127 +++-- .../snmalloc/backend_helpers/commonconfig.h | 34 ++ .../backend_helpers/defaultpagemapentry.h | 9 +- .../backend_helpers/largebuddyrange.h | 6 +- .../src/snmalloc/backend_helpers/lockrange.h | 14 +- .../src/snmalloc/backend_helpers/noprange.h | 36 ++ .../snmalloc/backend_helpers/range_helpers.h | 1 - .../backend_helpers/smallbuddyrange.h | 2 +- .../exported/snmalloc/src/snmalloc/ds/aba.h | 7 +- .../snmalloc/src/snmalloc/ds/allocconfig.h | 101 +++- .../snmalloc/src/snmalloc/ds/combininglock.h | 296 ++++++++++++ .../exported/snmalloc/src/snmalloc/ds/ds.h | 1 + .../snmalloc/src/snmalloc/ds/flaglock.h | 10 +- .../snmalloc/src/snmalloc/ds/mpmcstack.h | 6 - .../snmalloc/src/snmalloc/ds/pagemap.h | 18 + .../snmalloc/src/snmalloc/ds/singleton.h | 19 +- .../snmalloc/src/snmalloc/ds_core/bits.h | 55 ++- .../snmalloc/src/snmalloc/ds_core/defines.h | 42 +- .../snmalloc/src/snmalloc/ds_core/helpers.h | 7 +- .../src/snmalloc/ds_core/mitigations.h | 8 +- .../src/snmalloc/ds_core/redblacktree.h | 111 ++--- .../snmalloc/src/snmalloc/ds_core/seqset.h | 3 + .../snmalloc/src/snmalloc/global/global.h | 1 + .../snmalloc/src/snmalloc/global/libc.h | 191 ++++++++ .../snmalloc/src/snmalloc/global/memcpy.h | 6 +- .../src/snmalloc/global/scopedalloc.h | 1 - .../src/snmalloc/global/threadalloc.h | 2 - .../src/snmalloc/mem/backend_concept.h | 212 ++++----- .../snmalloc/src/snmalloc/mem/corealloc.h | 434 ++++++++++++------ .../snmalloc/src/snmalloc/mem/entropy.h | 31 +- .../src/snmalloc/mem/external_alloc.h | 3 + .../snmalloc/src/snmalloc/mem/freelist.h | 328 +++++++++---- .../src/snmalloc/mem/freelist_queue.h | 193 ++++++++ .../snmalloc/src/snmalloc/mem/localalloc.h | 110 ++++- .../snmalloc/src/snmalloc/mem/localcache.h | 17 +- .../snmalloc/src/snmalloc/mem/metadata.h | 146 +++++- .../exported/snmalloc/src/snmalloc/mem/pool.h | 156 +++---- .../snmalloc/src/snmalloc/mem/pooled.h | 26 +- .../src/snmalloc/mem/remoteallocator.h | 424 ++++++++++++----- .../snmalloc/src/snmalloc/mem/remotecache.h | 236 +++++++++- .../src/snmalloc/mem/sizeclasstable.h | 76 ++- .../src/snmalloc/override/jemalloc_compat.cc | 7 +- .../src/snmalloc/override/malloc-extensions.h | 1 + .../snmalloc/src/snmalloc/override/malloc.cc | 176 ++----- .../snmalloc/src/snmalloc/override/memcpy.cc | 2 +- .../snmalloc/src/snmalloc/override/new.cc | 66 ++- .../snmalloc/src/snmalloc/override/override.h | 2 +- .../snmalloc/src/snmalloc/override/rust.cc | 20 +- .../snmalloc/src/snmalloc/pal/pal_apple.h | 103 ++++- .../snmalloc/src/snmalloc/pal/pal_concept.h | 149 +++--- .../snmalloc/src/snmalloc/pal/pal_consts.h | 12 +- .../snmalloc/src/snmalloc/pal/pal_ds.h | 1 - .../snmalloc/src/snmalloc/pal/pal_freebsd.h | 82 +++- .../snmalloc/src/snmalloc/pal/pal_haiku.h | 9 - .../snmalloc/src/snmalloc/pal/pal_linux.h | 73 ++- .../snmalloc/src/snmalloc/pal/pal_netbsd.h | 1 + .../snmalloc/src/snmalloc/pal/pal_noalloc.h | 2 +- .../src/snmalloc/pal/pal_open_enclave.h | 1 + .../snmalloc/src/snmalloc/pal/pal_posix.h | 14 +- .../src/snmalloc/pal/pal_timer_default.h | 2 - .../snmalloc/src/snmalloc/pal/pal_windows.h | 30 ++ .../exported/snmalloc/src/snmalloc/snmalloc.h | 18 +- .../snmalloc/src/test/func/cheri/cheri.cc | 3 +- .../src/test/func/client_meta/client_meta.cc | 69 +++ .../test/func/domestication/domestication.cc | 13 +- .../snmalloc/src/test/func/malloc/malloc.cc | 2 +- .../src/test/func/memcpy/func-memcpy.cc | 9 +- .../snmalloc/src/test/func/memory/memory.cc | 14 +- .../src/test/func/miracle_ptr/miracle_ptr.cc | 204 ++++++++ .../snmalloc/src/test/func/pagemap/pagemap.cc | 3 + .../snmalloc/src/test/func/pool/pool.cc | 24 +- .../src/test/func/redblack/redblack.cc | 9 +- .../snmalloc/src/test/func/sandbox/sandbox.cc | 5 +- .../src/test/func/sizeclass/sizeclass.cc | 38 +- .../src/test/func/statistics/stats.cc | 14 +- .../thread_alloc_external.cc | 3 +- .../src/test/func/two_alloc_types/alloc1.cc | 1 + .../src/test/func/two_alloc_types/main.cc | 1 + .../src/test/perf/contention/contention.cc | 4 +- .../perf/external_pointer/externalpointer.cc | 2 +- .../snmalloc/src/test/perf/memcpy/memcpy.cc | 3 +- .../snmalloc/src/test/perf/msgpass/msgpass.cc | 307 +++++++++++++ .../test/perf/singlethread/singlethread.cc | 4 +- .../snmalloc/src/test/perf/startup/startup.cc | 96 ++++ 3rdparty/exported/snmalloc/src/test/setup.h | 2 + CMakeLists.txt | 5 +- cgmanifest.json | 2 +- src/host/snmalloc.cpp | 2 + 100 files changed, 4156 insertions(+), 1365 deletions(-) create mode 100644 3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/noprange.h create mode 100644 3rdparty/exported/snmalloc/src/snmalloc/ds/combininglock.h create mode 100644 3rdparty/exported/snmalloc/src/snmalloc/global/libc.h create mode 100644 3rdparty/exported/snmalloc/src/snmalloc/mem/freelist_queue.h create mode 100644 3rdparty/exported/snmalloc/src/test/func/client_meta/client_meta.cc create mode 100644 3rdparty/exported/snmalloc/src/test/func/miracle_ptr/miracle_ptr.cc create mode 100644 3rdparty/exported/snmalloc/src/test/perf/msgpass/msgpass.cc create mode 100644 3rdparty/exported/snmalloc/src/test/perf/startup/startup.cc diff --git a/3rdparty/exported/snmalloc/CMakeLists.txt b/3rdparty/exported/snmalloc/CMakeLists.txt index 1d9cbb6d1bcf..2948e56db461 100644 --- a/3rdparty/exported/snmalloc/CMakeLists.txt +++ b/3rdparty/exported/snmalloc/CMakeLists.txt @@ -26,6 +26,9 @@ option(SNMALLOC_NO_REALLOCARR "Build without reallocarr exported" ON) option(SNMALLOC_LINK_ICF "Link with Identical Code Folding" ON) option(SNMALLOC_IPO "Link with IPO/LTO support" OFF) option(SNMALLOC_BENCHMARK_INDIVIDUAL_MITIGATIONS "Build tests and ld_preload for individual mitigations" OFF) +option(SNMALLOC_ENABLE_DYNAMIC_LOADING "Build such that snmalloc can be dynamically loaded. This is not required for LD_PRELOAD, and will harm performance if enabled." OFF) +option(SNMALLOC_ENABLE_WAIT_ON_ADDRESS "Use wait on address backoff strategy if it is available" ON) +option(SNMALLOC_ENABLE_FUZZING "Enable fuzzing instrumentation tests" OFF) # Options that apply only if we're not building the header-only library cmake_dependent_option(SNMALLOC_RUST_SUPPORT "Build static library for rust" OFF "NOT SNMALLOC_HEADER_ONLY_LIBRARY" OFF) cmake_dependent_option(SNMALLOC_STATIC_LIBRARY "Build static libraries" ON "NOT SNMALLOC_HEADER_ONLY_LIBRARY" OFF) @@ -61,6 +64,18 @@ if (SNMALLOC_SANITIZER) message(STATUS "Using sanitizer=${SNMALLOC_SANITIZER}") endif() +set(SNMALLOC_MIN_ALLOC_SIZE "" CACHE STRING "Minimum allocation bytes (power of 2)") +set(SNMALLOC_MIN_ALLOC_STEP_SIZE "" CACHE STRING "Minimum allocation step (power of 2)") + +set(SNMALLOC_PAGESIZE "" CACHE STRING "Page size in bytes") + +set(SNMALLOC_DEALLOC_BATCH_RING_ASSOC "" CACHE STRING "Associativity of deallocation batch cache; 0 to disable") +set(SNMALLOC_DEALLOC_BATCH_RING_SET_BITS "" CACHE STRING "Logarithm of number of deallocation batch cache associativity sets") + +if(MSVC AND SNMALLOC_STATIC_LIBRARY AND (SNMALLOC_STATIC_LIBRARY_PREFIX STREQUAL "")) + message(FATAL_ERROR "Empty static library prefix not supported on MSVC") +endif() + # If CheckLinkerFlag doesn't exist then provide a dummy implementation that # always fails. The fallback can be removed when we move to CMake 3.18 as the # baseline. @@ -121,6 +136,9 @@ int main() { # this is why we check its existence here CHECK_INCLUDE_FILE_CXX(linux/random.h SNMALLOC_HAS_LINUX_RANDOM_H) +# check if futex.h is available +CHECK_INCLUDE_FILE_CXX(linux/futex.h SNMALLOC_HAS_LINUX_FUTEX_H) + # Provide as function so other projects can reuse # FIXME: This modifies some variables that may or may not be the ones that # provide flags and so is broken by design. It should be removed once Verona @@ -150,7 +168,7 @@ function(clangformat_targets) # tool. It does not work with older versions as AfterCaseLabel is not supported # in earlier versions. find_program(CLANG_FORMAT NAMES - clang-format90 clang-format-9) + clang-format150 clang-format-15) # If we've found a clang-format tool, generate a target for it, otherwise emit # a warning. @@ -176,12 +194,27 @@ endfunction() add_library(snmalloc INTERFACE) if(SNMALLOC_USE_CXX17) - target_compile_definitions(snmalloc INTERFACE -DSNMALLOC_USE_CXX17) target_compile_features(snmalloc INTERFACE cxx_std_17) else() target_compile_features(snmalloc INTERFACE cxx_std_20) endif() +if(SNMALLOC_ENABLE_WAIT_ON_ADDRESS) + target_compile_definitions(snmalloc INTERFACE SNMALLOC_USE_WAIT_ON_ADDRESS=1) +else() + target_compile_definitions(snmalloc INTERFACE SNMALLOC_USE_WAIT_ON_ADDRESS=0) +endif() + +# https://learn.microsoft.com/en-us/cpp/build/reference/zc-cplusplus +if(MSVC) + target_compile_options(snmalloc INTERFACE "/Zc:__cplusplus") +endif() + +if (CMAKE_SYSTEM_NAME STREQUAL NetBSD) + target_include_directories(snmalloc INTERFACE /usr/pkg/include) + target_link_directories(snmalloc INTERFACE /usr/pkg/lib) +endif() + # Add header paths. target_include_directories(snmalloc INTERFACE @@ -221,18 +254,30 @@ endif() function(add_as_define FLAG) target_compile_definitions(snmalloc INTERFACE $<$:${FLAG}>) endfunction() +function(add_as_define_value KEY) + if (NOT ${${KEY}} STREQUAL "") + target_compile_definitions(snmalloc INTERFACE ${KEY}=${${KEY}}) + endif () +endfunction() add_as_define(SNMALLOC_QEMU_WORKAROUND) add_as_define(SNMALLOC_TRACING) add_as_define(SNMALLOC_CI_BUILD) add_as_define(SNMALLOC_PLATFORM_HAS_GETENTROPY) add_as_define(SNMALLOC_HAS_LINUX_RANDOM_H) +add_as_define(SNMALLOC_HAS_LINUX_FUTEX_H) if (SNMALLOC_NO_REALLOCARRAY) add_as_define(SNMALLOC_NO_REALLOCARRAY) endif() if (SNMALLOC_NO_REALLOCARR) add_as_define(SNMALLOC_NO_REALLOCARR) endif() +add_as_define_value(SNMALLOC_MIN_ALLOC_SIZE) +add_as_define_value(SNMALLOC_MIN_ALLOC_STEP_SIZE) +add_as_define_value(SNMALLOC_DEALLOC_BATCH_RING_ASSOC) +add_as_define_value(SNMALLOC_DEALLOC_BATCH_RING_SET_BITS) + +add_as_define_value(SNMALLOC_PAGESIZE) target_compile_definitions(snmalloc INTERFACE $<$:MALLOC_USABLE_SIZE_QUALIFIER=const>) @@ -316,6 +361,9 @@ if(NOT SNMALLOC_HEADER_ONLY_LIBRARY) if(SNMALLOC_SANITIZER) target_compile_options(${TESTNAME} PRIVATE -g -fsanitize=${SNMALLOC_SANITIZER} -fno-omit-frame-pointer) target_link_libraries(${TESTNAME} -fsanitize=${SNMALLOC_SANITIZER}) + if (${SNMALLOC_SANITIZER} MATCHES "thread") + target_compile_definitions(${TESTNAME} PRIVATE SNMALLOC_THREAD_SANITIZER_ENABLED) + endif() endif() add_warning_flags(${TESTNAME}) @@ -386,8 +434,14 @@ if(NOT SNMALLOC_HEADER_ONLY_LIBRARY) target_compile_definitions(${name} PRIVATE "SNMALLOC_EXPORT=__attribute__((visibility(\"default\")))") target_compile_options(${name} PRIVATE -fomit-frame-pointer -ffunction-sections) + + check_cxx_compiler_flag("-Werror -Wextra -Wall -mprfchw" SUPPORT_PREFETCH_WRITE) + if (SUPPORT_PREFETCH_WRITE) + target_compile_options(${name} PRIVATE -mprfchw) + endif() # Static TLS model is unsupported on Haiku. - if (NOT CMAKE_SYSTEM_NAME STREQUAL "Haiku") + if ((NOT CMAKE_SYSTEM_NAME STREQUAL "Haiku") AND (NOT SNMALLOC_ENABLE_DYNAMIC_LOADING)) + message(STATUS "snmalloc: Using static TLS model") target_compile_options(${name} PRIVATE -ftls-model=initial-exec) target_compile_options(${name} PRIVATE $<$:-g>) endif() @@ -426,9 +480,11 @@ if(NOT SNMALLOC_HEADER_ONLY_LIBRARY) endfunction() - set(SHIM_FILES src/snmalloc/override/new.cc) + set(SHIM_FILES src/snmalloc/override/malloc.cc src/snmalloc/override/new.cc) set(SHIM_FILES_MEMCPY src/snmalloc/override/memcpy.cc) + add_shim(snmalloc-new-override STATIC src/snmalloc/override/new.cc) + if (SNMALLOC_STATIC_LIBRARY) add_shim(snmallocshim-static STATIC ${SHIM_FILES}) target_compile_definitions(snmallocshim-static PRIVATE @@ -549,3 +605,6 @@ install(EXPORT snmallocConfig DESTINATION "share/snmalloc" ) +if (SNMALLOC_ENABLE_FUZZING) + add_subdirectory(fuzzing) +endif() diff --git a/3rdparty/exported/snmalloc/README.md b/3rdparty/exported/snmalloc/README.md index 8dcd9d119955..ff4d97f05b88 100644 --- a/3rdparty/exported/snmalloc/README.md +++ b/3rdparty/exported/snmalloc/README.md @@ -34,7 +34,8 @@ The mechanism for returning memory to remote threads has remained, but most of t We recommend you read [docs/security](./docs/security/README.md) to find out about the current design, and if you want to dive into the code [docs/AddressSpace.md](./docs/AddressSpace.md) provides a good overview of the allocation and deallocation paths. -[![snmalloc CI](https://github.com/microsoft/snmalloc/actions/workflows/main.yml/badge.svg?branch=master)](https://github.com/microsoft/snmalloc/actions/workflows/main.yml) +[![snmalloc CI](https://github.com/microsoft/snmalloc/actions/workflows/main.yml/badge.svg)](https://github.com/microsoft/snmalloc/actions/workflows/main.yml) +[![snmalloc CI for Morello](https://github.com/microsoft/snmalloc/actions/workflows/morello.yml/badge.svg)](https://github.com/microsoft/snmalloc/actions/workflows/morello.yml) # Hardening diff --git a/3rdparty/exported/snmalloc/src/snmalloc/aal/aal.h b/3rdparty/exported/snmalloc/src/snmalloc/aal/aal.h index 49b92da682cb..dae6231361ab 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/aal/aal.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/aal/aal.h @@ -10,13 +10,24 @@ #include "aal_concept.h" #include "aal_consts.h" -#include +#if __has_include() +# include +# ifdef CLOCK_MONOTONIC +# define SNMALLOC_TICK_USE_CLOCK_GETTIME +# endif +#endif #include #include -#if defined(__i386__) || defined(_M_IX86) || defined(_X86_) || \ +#ifndef SNMALLOC_TICK_USE_CLOCK_GETTIME +# include +#endif + +#if ( \ + defined(__i386__) || defined(_M_IX86) || defined(_X86_) || \ defined(__amd64__) || defined(__x86_64__) || defined(_M_X64) || \ - defined(_M_AMD64) + defined(_M_AMD64)) && \ + !defined(_M_ARM64EC) # if defined(SNMALLOC_SGX) # define PLATFORM_IS_X86_SGX # define SNMALLOC_NO_AAL_BUILTINS @@ -25,7 +36,8 @@ # endif #endif -#if defined(__arm__) || defined(__aarch64__) +#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM64) || \ + defined(_M_ARM64EC) # define PLATFORM_IS_ARM #endif @@ -53,7 +65,7 @@ namespace snmalloc { /* * Provide a default specification of address_t as uintptr_t for Arch-es - * that support IntegerPointers. Those Arch-es without IntegerPoihnters + * that support IntegerPointers. Those Arch-es without IntegerPointers * must explicitly give their address_t. * * This somewhat obtuse way of spelling the defaulting is necessary so @@ -147,7 +159,7 @@ namespace snmalloc static inline void prefetch(void* ptr) noexcept { #if __has_builtin(__builtin_prefetch) && !defined(SNMALLOC_NO_AAL_BUILTINS) - __builtin_prefetch(ptr); + __builtin_prefetch(ptr, 1, 3); #else Arch::prefetch(ptr); #endif @@ -166,11 +178,27 @@ namespace snmalloc if constexpr ( (Arch::aal_features & NoCpuCycleCounters) == NoCpuCycleCounters) { +#ifdef SNMALLOC_TICK_USE_CLOCK_GETTIME + // the buf is populated by clock_gettime + SNMALLOC_UNINITIALISED timespec buf; + // we can skip the error checking here: + // * EFAULT: for out-of-bound pointers (buf is always valid stack + // memory) + // * EINVAL: for invalid clock_id (we only use CLOCK_MONOTONIC enforced + // by POSIX.1) + // Notice that clock_gettime is a usually a vDSO call, so the overhead + // is minimal. + ::clock_gettime(CLOCK_MONOTONIC, &buf); + return static_cast(buf.tv_sec) * 1000'000'000 + + static_cast(buf.tv_nsec); +# undef SNMALLOC_TICK_USE_CLOCK_GETTIME +#else auto tick = std::chrono::high_resolution_clock::now(); return static_cast( std::chrono::duration_cast( tick.time_since_epoch()) .count()); +#endif } else { @@ -204,9 +232,6 @@ namespace snmalloc static SNMALLOC_FAST_PATH CapPtr capptr_bound(CapPtr a, size_t size) noexcept { - static_assert( - BIn::spatial > capptr::dimension::Spatial::Alloc, - "Refusing to re-bound Spatial::Alloc CapPtr"); static_assert( capptr::is_spatial_refinement(), "capptr_bound must preserve non-spatial CapPtr dimensions"); diff --git a/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_arm.h b/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_arm.h index b6bae779e4de..11013c44c758 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_arm.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_arm.h @@ -1,6 +1,6 @@ #pragma once -#if defined(__aarch64__) +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) # define SNMALLOC_VA_BITS_64 # ifdef _MSC_VER # include @@ -13,6 +13,7 @@ #endif #include + namespace snmalloc { /** @@ -54,7 +55,7 @@ namespace snmalloc #elif __has_builtin(__builtin_prefetch) && !defined(SNMALLOC_NO_AAL_BUILTINS) __builtin_prefetch(ptr); #elif defined(SNMALLOC_VA_BITS_64) - __asm__ volatile("prfm pldl1keep, [%0]" : "=r"(ptr)); + __asm__ volatile("prfm pstl1keep, [%0]" : "=r"(ptr)); #else __asm__ volatile("pld\t[%0]" : "=r"(ptr)); #endif diff --git a/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_cheri.h b/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_cheri.h index 4a4acd379a04..84f11c038d7e 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_cheri.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_cheri.h @@ -69,9 +69,6 @@ namespace snmalloc static SNMALLOC_FAST_PATH CapPtr capptr_bound(CapPtr a, size_t size) noexcept { - static_assert( - BIn::spatial > capptr::dimension::Spatial::Alloc, - "Refusing to re-bound Spatial::Alloc CapPtr"); static_assert( capptr::is_spatial_refinement(), "capptr_bound must preserve non-spatial CapPtr dimensions"); @@ -87,8 +84,11 @@ namespace snmalloc void* pb = __builtin_cheri_bounds_set_exact(a.unsafe_ptr(), size); - SNMALLOC_ASSERT( - __builtin_cheri_tag_get(pb) && "capptr_bound exactness failed."); + SNMALLOC_ASSERT_MSG( + __builtin_cheri_tag_get(pb), + "capptr_bound exactness failed. {} of size {}", + a.unsafe_ptr(), + size); return CapPtr::unsafe_from(static_cast(pb)); } diff --git a/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_concept.h b/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_concept.h index 3ce64a79ccc8..eac6ebc816e7 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_concept.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_concept.h @@ -14,87 +14,79 @@ namespace snmalloc * machine word size, and an upper bound on the address space size */ template - concept IsAAL_static_members = requires() - { - typename std::integral_constant; - typename std::integral_constant; - typename std::integral_constant; - typename std::integral_constant; - }; + concept IsAAL_static_members = + requires() { + typename std::integral_constant; + typename std::integral_constant; + typename std::integral_constant; + typename std::integral_constant; + }; /** * AALs provide a prefetch operation. */ template - concept IsAAL_prefetch = requires(void* ptr) - { - { - AAL::prefetch(ptr) - } - noexcept->ConceptSame; - }; + concept IsAAL_prefetch = requires(void* ptr) { + { + AAL::prefetch(ptr) + } noexcept -> ConceptSame; + }; /** * AALs provide a notion of high-precision timing. */ template - concept IsAAL_tick = requires() - { - { - AAL::tick() - } - noexcept->ConceptSame; - }; + concept IsAAL_tick = requires() { + { + AAL::tick() + } noexcept -> ConceptSame; + }; template concept IsAAL_capptr_methods = - requires(capptr::Chunk auth, capptr::AllocFull ret, size_t sz) - { - /** - * Produce a pointer with reduced authority from a more privilged pointer. - * The resulting pointer will have base at auth's address and length of - * exactly sz. auth+sz must not exceed auth's limit. - */ - { - AAL::template capptr_bound(auth, sz) - } - noexcept->ConceptSame>; + requires(capptr::Chunk auth, capptr::AllocFull ret, size_t sz) { + /** + * Produce a pointer with reduced authority from a more privilged pointer. + * The resulting pointer will have base at auth's address and length of + * exactly sz. auth+sz must not exceed auth's limit. + */ + { + AAL::template capptr_bound(auth, sz) + } noexcept -> ConceptSame>; - /** - * "Amplify" by copying the address of one pointer into one of higher - * privilege. The resulting pointer differs from auth only in address. - */ - { - AAL::capptr_rebound(auth, ret) - } - noexcept->ConceptSame>; + /** + * "Amplify" by copying the address of one pointer into one of higher + * privilege. The resulting pointer differs from auth only in address. + */ + { + AAL::capptr_rebound(auth, ret) + } noexcept -> ConceptSame>; - /** - * Round up an allocation size to a size this architecture can represent. - * While there may also, in general, be alignment requirements for - * representability, in snmalloc so far we have not had reason to consider - * these explicitly: when we use our... - * - * - sizeclass machinery (for user-facing data), we assume that all - * sizeclasses describe architecturally representable aligned-and-sized - * regions - * - * - Range machinery (for internal meta-data), we always choose NAPOT - * regions big enough for the requested size (returning space above the - * allocation within such regions for use as smaller NAPOT regions). - * - * That is, capptr_size_round is not needed on the user-facing fast paths, - * merely internally for bootstrap and metadata management. - */ - { - AAL::capptr_size_round(sz) - } - noexcept->ConceptSame; - }; + /** + * Round up an allocation size to a size this architecture can represent. + * While there may also, in general, be alignment requirements for + * representability, in snmalloc so far we have not had reason to consider + * these explicitly: when we use our... + * + * - sizeclass machinery (for user-facing data), we assume that all + * sizeclasses describe architecturally representable aligned-and-sized + * regions + * + * - Range machinery (for internal meta-data), we always choose NAPOT + * regions big enough for the requested size (returning space above the + * allocation within such regions for use as smaller NAPOT regions). + * + * That is, capptr_size_round is not needed on the user-facing fast paths, + * merely internally for bootstrap and metadata management. + */ + { + AAL::capptr_size_round(sz) + } noexcept -> ConceptSame; + }; template - concept IsAAL = IsAAL_static_members&& IsAAL_prefetch&& - IsAAL_tick&& IsAAL_capptr_methods; + concept IsAAL = IsAAL_static_members && IsAAL_prefetch && + IsAAL_tick && IsAAL_capptr_methods; } // namespace snmalloc #endif diff --git a/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_x86.h b/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_x86.h index cc20e777a008..150de26451ff 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_x86.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_x86.h @@ -78,7 +78,11 @@ namespace snmalloc */ static inline void prefetch(void* ptr) { - _mm_prefetch(reinterpret_cast(ptr), _MM_HINT_T0); +#if defined(_MSC_VER) + _m_prefetchw(ptr); +#else + _mm_prefetch(reinterpret_cast(ptr), _MM_HINT_ET0); +#endif } /** diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend/backend.h b/3rdparty/exported/snmalloc/src/snmalloc/backend/backend.h index d220a080a558..ee170c38f7b4 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend/backend.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend/backend.h @@ -23,9 +23,6 @@ namespace snmalloc using Pal = PAL; using SlabMetadata = typename PagemapEntry::SlabMetadata; - static constexpr size_t SizeofMetadata = - bits::next_pow2_const(sizeof(SlabMetadata)); - public: /** * Provide a block of meta-data with size and align. @@ -70,6 +67,17 @@ namespace snmalloc Aal::capptr_bound(p, size)); } + /** + * Returns unused meta-data to the system. This must have come from a call + * to alloc_meta_data, but can be a sub-range of the original allocation. + */ + static void dealloc_meta_data( + LocalState& local_state, capptr::Alloc p, size_t size) + { + auto arena = Authmap::amplify(p); + local_state.get_meta_range().dealloc_range(arena, size); + } + /** * Returns a chunk of memory with alignment and size of `size`, and a * block containing metadata about the slab. @@ -79,13 +87,26 @@ namespace snmalloc * (remote, sizeclass, slab_metadata) * where slab_metadata, is the second element of the pair return. */ - static std::pair, SlabMetadata*> - alloc_chunk(LocalState& local_state, size_t size, uintptr_t ras) + static std::pair, SlabMetadata*> alloc_chunk( + LocalState& local_state, + size_t size, + uintptr_t ras, + sizeclass_t sizeclass) { SNMALLOC_ASSERT(bits::is_pow2(size)); SNMALLOC_ASSERT(size >= MIN_CHUNK_SIZE); - auto meta_cap = local_state.get_meta_range().alloc_range(SizeofMetadata); + // Calculate the extra bytes required to store the client meta-data. + size_t extra_bytes = SlabMetadata::get_extra_bytes(sizeclass); + + auto meta_size = bits::next_pow2(sizeof(SlabMetadata) + extra_bytes); + +#ifdef SNMALLOC_TRACING + message<1024>( + "Allocating metadata of size: {} ({})", meta_size, extra_bytes); +#endif + + auto meta_cap = local_state.get_meta_range().alloc_range(meta_size); auto meta = meta_cap.template as_reinterpret().unsafe_ptr(); @@ -102,7 +123,7 @@ namespace snmalloc #endif if (p == nullptr) { - local_state.get_meta_range().dealloc_range(meta_cap, SizeofMetadata); + local_state.get_meta_range().dealloc_range(meta_cap, meta_size); errno = ENOMEM; #ifdef SNMALLOC_TRACING message<1024>("Out of memory"); @@ -129,7 +150,8 @@ namespace snmalloc LocalState& local_state, SlabMetadata& slab_metadata, capptr::Alloc alloc, - size_t size) + size_t size, + sizeclass_t sizeclass) { /* * The backend takes possession of these chunks now, by disassociating @@ -156,12 +178,24 @@ namespace snmalloc */ capptr::Arena arena = Authmap::amplify(alloc); + // Calculate the extra bytes required to store the client meta-data. + size_t extra_bytes = SlabMetadata::get_extra_bytes(sizeclass); + + auto meta_size = bits::next_pow2(sizeof(SlabMetadata) + extra_bytes); local_state.get_meta_range().dealloc_range( - capptr::Arena::unsafe_from(&slab_metadata), SizeofMetadata); + capptr::Arena::unsafe_from(&slab_metadata), meta_size); local_state.get_object_range()->dealloc_range(arena, size); } + SNMALLOC_FAST_PATH static capptr::Alloc + capptr_rederive_alloc(capptr::Alloc a, size_t objsize) + { + return capptr_to_user_address_control( + Aal::capptr_bound( + Authmap::amplify(a), objsize)); + } + template SNMALLOC_FAST_PATH static const PagemapEntry& get_metaentry(address_t p) { diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend/fixedglobalconfig.h b/3rdparty/exported/snmalloc/src/snmalloc/backend/fixedglobalconfig.h index c6784e703779..83e1117470f2 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend/fixedglobalconfig.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend/fixedglobalconfig.h @@ -8,11 +8,14 @@ namespace snmalloc /** * A single fixed address range allocator configuration */ - template + template< + SNMALLOC_CONCEPT(IsPAL) PAL, + typename ClientMetaDataProvider = NoClientMetaDataProvider> class FixedRangeConfig final : public CommonConfig { public: - using PagemapEntry = DefaultPagemapEntry; + using PagemapEntry = DefaultPagemapEntry; + using ClientMeta = ClientMetaDataProvider; private: using ConcretePagemap = @@ -63,13 +66,11 @@ namespace snmalloc * C++, and not just its initializer fragment, to initialize a non-prefix * subset of the flags (in any order, at that). */ - static constexpr Flags Options = []() constexpr - { + static constexpr Flags Options = []() constexpr { Flags opts = {}; opts.HasDomesticate = true; return opts; - } - (); + }(); // This needs to be a forward reference as the // thread local state will need to know about this. diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend/globalconfig.h b/3rdparty/exported/snmalloc/src/snmalloc/backend/globalconfig.h index 525c77275c89..5d171a9b8710 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend/globalconfig.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend/globalconfig.h @@ -1,13 +1,9 @@ #pragma once -// If you define SNMALLOC_PROVIDE_OWN_CONFIG then you must provide your own -// definition of `snmalloc::Alloc` before including any files that include -// `snmalloc.h` or consume the global allocation APIs. -#ifndef SNMALLOC_PROVIDE_OWN_CONFIG -# include "../backend_helpers/backend_helpers.h" -# include "backend.h" -# include "meta_protected_range.h" -# include "standard_range.h" +#include "../backend_helpers/backend_helpers.h" +#include "backend.h" +#include "meta_protected_range.h" +#include "standard_range.h" namespace snmalloc { @@ -28,13 +24,16 @@ namespace snmalloc * The Configuration sets up a Pagemap for the backend to use, and the state * required to build new allocators (GlobalPoolState). */ - class StandardConfig final : public CommonConfig + template + class StandardConfigClientMeta final : public CommonConfig { - using GlobalPoolState = PoolState>; + using GlobalPoolState = PoolState< + CoreAllocator>>; public: using Pal = DefaultPal; - using PagemapEntry = DefaultPagemapEntry; + using PagemapEntry = DefaultPagemapEntry; + using ClientMeta = ClientMetaDataProvider; private: using ConcretePagemap = @@ -97,33 +96,38 @@ namespace snmalloc // of allocators. SNMALLOC_SLOW_PATH static void ensure_init_slow() { - FlagLock lock{initialisation_lock}; -# ifdef SNMALLOC_TRACING - message<1024>("Run init_impl"); -# endif - if (initialised) return; - LocalEntropy entropy; - entropy.init(); - // Initialise key for remote deallocation lists - RemoteAllocator::key_global = FreeListKey(entropy.get_free_list_key()); + with(initialisation_lock, [&]() { +#ifdef SNMALLOC_TRACING + message<1024>("Run init_impl"); +#endif + + if (initialised) + return; + + LocalEntropy entropy; + entropy.init(); + // Initialise key for remote deallocation lists + entropy.make_free_list_key(RemoteAllocator::key_global); + entropy.make_free_list_key(freelist::Object::key_root); - // Need to randomise pagemap location. If requested and not a - // StrictProvenance architecture, randomize its table's location within a - // significantly larger address space allocation. - static constexpr bool pagemap_randomize = - mitigations(random_pagemap) && !aal_supports; + // Need to randomise pagemap location. If requested and not a + // StrictProvenance architecture, randomize its table's location within + // a significantly larger address space allocation. + static constexpr bool pagemap_randomize = + mitigations(random_pagemap) && !aal_supports; - Pagemap::concretePagemap.template init(); + Pagemap::concretePagemap.template init(); - if constexpr (aal_supports) - { - Authmap::init(); - } + if constexpr (aal_supports) + { + Authmap::init(); + } - initialised.store(true, std::memory_order_release); + initialised.store(true, std::memory_order_release); + }); } public: @@ -162,10 +166,4 @@ namespace snmalloc snmalloc::register_clean_up(); } }; - - /** - * Create allocator type for this configuration. - */ - using Alloc = snmalloc::LocalAllocator; } // namespace snmalloc -#endif diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend/meta_protected_range.h b/3rdparty/exported/snmalloc/src/snmalloc/backend/meta_protected_range.h index 5c5795cc0589..b94968c9c657 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend/meta_protected_range.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend/meta_protected_range.h @@ -75,11 +75,14 @@ namespace snmalloc CommitRange, // In case of huge pages, we don't want to give each thread its own huge // page, so commit in the global range. - LargeBuddyRange< - max_page_chunk_size_bits, - max_page_chunk_size_bits, - Pagemap, - page_size_bits>, + std::conditional_t< + (max_page_chunk_size_bits > MIN_CHUNK_BITS), + LargeBuddyRange< + max_page_chunk_size_bits, + max_page_chunk_size_bits, + Pagemap, + page_size_bits>, + NopRange>, LogRange<4>, GlobalRange, StatsRange>; diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/backend_helpers.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/backend_helpers.h index 2104e681d53e..24e02b0530b1 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/backend_helpers.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/backend_helpers.h @@ -9,6 +9,7 @@ #include "indirectrange.h" #include "largebuddyrange.h" #include "logrange.h" +#include "noprange.h" #include "pagemap.h" #include "pagemapregisterrange.h" #include "palrange.h" diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/buddy.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/buddy.h index ff9416614dda..d7406468e543 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/buddy.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/buddy.h @@ -15,9 +15,17 @@ namespace snmalloc template class Buddy { - std::array, MAX_SIZE_BITS - MIN_SIZE_BITS> trees; + static_assert(MAX_SIZE_BITS > MIN_SIZE_BITS); + + struct Entry + { + typename Rep::Contents cache[3]; + RBTree tree{}; + }; + + std::array entries{}; // All RBtrees at or above this index should be empty. - size_t empty_at_or_above = 0; + size_t empty_at_or_above{0}; size_t to_index(size_t size) { @@ -42,15 +50,57 @@ namespace snmalloc void invariant() { #ifndef NDEBUG - for (size_t i = empty_at_or_above; i < trees.size(); i++) + for (size_t i = empty_at_or_above; i < entries.size(); i++) { - SNMALLOC_ASSERT(trees[i].is_empty()); + SNMALLOC_ASSERT(entries[i].tree.is_empty()); + // TODO check cache is empty } #endif } + bool remove_buddy(typename Rep::Contents addr, size_t size) + { + auto idx = to_index(size); + + // Empty at this range. + if (idx >= empty_at_or_above) + return false; + + auto buddy = Rep::buddy(addr, size); + + // Check local cache first + for (auto& e : entries[idx].cache) + { + if (Rep::equal(buddy, e)) + { + if (!Rep::can_consolidate(addr, size)) + return false; + + e = entries[idx].tree.remove_min(); + return true; + } + } + + auto path = entries[idx].tree.get_root_path(); + bool contains_buddy = entries[idx].tree.find(path, buddy); + + if (!contains_buddy) + return false; + + // Only check if we can consolidate after we know the buddy is in + // the buddy allocator. This is required to prevent possible segfaults + // from looking at the buddies meta-data, which we only know exists + // once we have found it in the red-black tree. + if (!Rep::can_consolidate(addr, size)) + return false; + + entries[idx].tree.remove_path(path); + return true; + } + public: constexpr Buddy() = default; + /** * Add a block to the buddy allocator. * @@ -63,48 +113,39 @@ namespace snmalloc */ typename Rep::Contents add_block(typename Rep::Contents addr, size_t size) { - auto idx = to_index(size); - empty_at_or_above = bits::max(empty_at_or_above, idx + 1); - validate_block(addr, size); - auto buddy = Rep::buddy(addr, size); + if (remove_buddy(addr, size)) + { + // Add to next level cache + size *= 2; + addr = Rep::align_down(addr, size); + if (size == bits::one_at_bit(MAX_SIZE_BITS)) + { + // Invariant should be checked on all non-tail return paths. + // Holds trivially here with current design. + invariant(); + // Too big for this buddy allocator. + return addr; + } + return add_block(addr, size); + } - auto path = trees[idx].get_root_path(); - bool contains_buddy = trees[idx].find(path, buddy); + auto idx = to_index(size); + empty_at_or_above = bits::max(empty_at_or_above, idx + 1); - if (contains_buddy) + for (auto& e : entries[idx].cache) { - // Only check if we can consolidate after we know the buddy is in - // the buddy allocator. This is required to prevent possible segfaults - // from looking at the buddies meta-data, which we only know exists - // once we have found it in the red-black tree. - if (Rep::can_consolidate(addr, size)) + if (Rep::equal(Rep::null, e)) { - trees[idx].remove_path(path); - - // Add to next level cache - size *= 2; - addr = Rep::align_down(addr, size); - if (size == bits::one_at_bit(MAX_SIZE_BITS)) - { - // Invariant should be checked on all non-tail return paths. - // Holds trivially here with current design. - invariant(); - // Too big for this buddy allocator. - return addr; - } - return add_block(addr, size); + e = addr; + return Rep::null; } - - // Re-traverse as the path was to the buddy, - // but the representation says we cannot combine. - // We must find the correct place for this element. - // Something clever could be done here, but it's not worth it. - // path = trees[idx].get_root_path(); - trees[idx].find(path, addr); } - trees[idx].insert_path(path, addr); + + auto path = entries[idx].tree.get_root_path(); + entries[idx].tree.find(path, addr); + entries[idx].tree.insert_path(path, addr); invariant(); return Rep::null; } @@ -121,7 +162,15 @@ namespace snmalloc if (idx >= empty_at_or_above) return Rep::null; - auto addr = trees[idx].remove_min(); + auto addr = entries[idx].tree.remove_min(); + for (auto& e : entries[idx].cache) + { + if (Rep::equal(Rep::null, addr) || Rep::compare(e, addr)) + { + addr = std::exchange(e, addr); + } + } + if (addr != Rep::null) { validate_block(addr, size); diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/commonconfig.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/commonconfig.h index a69b6a3897d4..8ea020874c66 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/commonconfig.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/commonconfig.h @@ -95,6 +95,39 @@ namespace snmalloc bool HasDomesticate = false; }; + struct NoClientMetaDataProvider + { + using StorageType = Empty; + using DataRef = Empty&; + + static size_t required_count(size_t) + { + return 1; + } + + static DataRef get(StorageType* base, size_t) + { + return *base; + } + }; + + template + struct ArrayClientMetaDataProvider + { + using StorageType = T; + using DataRef = T&; + + static size_t required_count(size_t max_count) + { + return max_count; + } + + static DataRef get(StorageType* base, size_t index) + { + return base[index]; + } + }; + /** * Class containing definitions that are likely to be used by all except for * the most unusual back-end implementations. This can be subclassed as a @@ -126,4 +159,5 @@ namespace snmalloc } } } // namespace snmalloc + #include "../mem/remotecache.h" diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/defaultpagemapentry.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/defaultpagemapentry.h index 2083db30eb08..5e1f703d26be 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/defaultpagemapentry.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/defaultpagemapentry.h @@ -64,9 +64,14 @@ namespace snmalloc SNMALLOC_FAST_PATH DefaultPagemapEntryT() = default; }; - class DefaultSlabMetadata : public FrontendSlabMetadata + template + class DefaultSlabMetadata : public FrontendSlabMetadata< + DefaultSlabMetadata, + ClientMetaDataProvider> {}; - using DefaultPagemapEntry = DefaultPagemapEntryT; + template + using DefaultPagemapEntry = + DefaultPagemapEntryT>; } // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/largebuddyrange.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/largebuddyrange.h index d1446d725fc2..803eb4844dcd 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/largebuddyrange.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/largebuddyrange.h @@ -6,8 +6,6 @@ #include "empty_range.h" #include "range_helpers.h" -#include - namespace snmalloc { /** @@ -354,7 +352,7 @@ namespace snmalloc SNMALLOC_ASSERT(size >= MIN_CHUNK_SIZE); SNMALLOC_ASSERT(bits::is_pow2(size)); - if (size >= (bits::one_at_bit(MAX_SIZE_BITS) - 1)) + if (size >= bits::mask_bits(MAX_SIZE_BITS)) { if (ParentRange::Aligned) return parent.alloc_range(size); @@ -378,7 +376,7 @@ namespace snmalloc if constexpr (MAX_SIZE_BITS != (bits::BITS - 1)) { - if (size >= (bits::one_at_bit(MAX_SIZE_BITS) - 1)) + if (size >= bits::mask_bits(MAX_SIZE_BITS)) { parent_dealloc_range(base, size); return; diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/lockrange.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/lockrange.h index ce91711cce4c..2dc796ac696f 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/lockrange.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/lockrange.h @@ -22,7 +22,7 @@ namespace snmalloc * This is infrequently used code, a spin lock simplifies the code * considerably, and should never be on the fast path. */ - FlagWord spin_lock{}; + CombiningLock spin_lock{}; public: static constexpr bool Aligned = ParentRange::Aligned; @@ -35,14 +35,18 @@ namespace snmalloc CapPtr alloc_range(size_t size) { - FlagLock lock(spin_lock); - return parent.alloc_range(size); + CapPtr result; + with(spin_lock, [&]() { + { + result = parent.alloc_range(size); + } + }); + return result; } void dealloc_range(CapPtr base, size_t size) { - FlagLock lock(spin_lock); - parent.dealloc_range(base, size); + with(spin_lock, [&]() { parent.dealloc_range(base, size); }); } }; }; diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/noprange.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/noprange.h new file mode 100644 index 000000000000..45dcfdcf690d --- /dev/null +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/noprange.h @@ -0,0 +1,36 @@ +#pragma once +#include "range_helpers.h" + +namespace snmalloc +{ + struct NopRange + { + template + class Type : public ContainsParent + { + using ContainsParent::parent; + + public: + static constexpr bool Aligned = ParentRange::Aligned; + + static constexpr bool ConcurrencySafe = ParentRange::ConcurrencySafe; + + using ChunkBounds = typename ParentRange::ChunkBounds; + static_assert( + ChunkBounds::address_space_control == + capptr::dimension::AddressSpaceControl::Full); + + constexpr Type() = default; + + CapPtr alloc_range(size_t size) + { + return parent.alloc_range(size); + } + + void dealloc_range(CapPtr base, size_t size) + { + parent.dealloc_range(base, size); + } + }; + }; +} // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/range_helpers.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/range_helpers.h index 076b9fd74072..f1a82baf2ded 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/range_helpers.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/range_helpers.h @@ -160,5 +160,4 @@ namespace snmalloc } } }; - } // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/smallbuddyrange.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/smallbuddyrange.h index 83796e1ecbe4..6f8400e83f1b 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/smallbuddyrange.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/smallbuddyrange.h @@ -31,6 +31,7 @@ namespace snmalloc static constexpr Contents root = nullptr; static constexpr address_t MASK = 1; + static void set(Handle ptr, Contents r) { SNMALLOC_ASSERT((address_cast(r) & MASK) == 0); @@ -244,7 +245,6 @@ namespace snmalloc void dealloc_range(CapPtr base, size_t size) { - SNMALLOC_ASSERT(bits::is_pow2(size)); add_range(base, size); } }; diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds/aba.h b/3rdparty/exported/snmalloc/src/snmalloc/ds/aba.h index f14cc9ef685a..af75de9e0f73 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/ds/aba.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/ds/aba.h @@ -71,9 +71,10 @@ namespace snmalloc error("Only one inflight ABA operation at a time is allowed."); operation_in_flight = true; # endif - return Cmp{{independent.ptr.load(std::memory_order_relaxed), - independent.aba.load(std::memory_order_relaxed)}, - this}; + return Cmp{ + {independent.ptr.load(std::memory_order_relaxed), + independent.aba.load(std::memory_order_relaxed)}, + this}; } struct Cmp diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds/allocconfig.h b/3rdparty/exported/snmalloc/src/snmalloc/ds/allocconfig.h index 858940f05e50..78ea9f41a861 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/ds/allocconfig.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/ds/allocconfig.h @@ -20,10 +20,31 @@ namespace snmalloc // Used to isolate values on cache lines to prevent false sharing. static constexpr size_t CACHELINE_SIZE = 64; - // Minimum allocation size is space for two pointers. - static_assert(bits::next_pow2_const(sizeof(void*)) == sizeof(void*)); - static constexpr size_t MIN_ALLOC_SIZE = 2 * sizeof(void*); - static constexpr size_t MIN_ALLOC_BITS = bits::ctz_const(MIN_ALLOC_SIZE); + /// The "machine epsilon" for the small sizeclass machinery. + static constexpr size_t MIN_ALLOC_STEP_SIZE = +#if defined(SNMALLOC_MIN_ALLOC_STEP_SIZE) + SNMALLOC_MIN_ALLOC_STEP_SIZE; +#else + 2 * sizeof(void*); +#endif + + /// Derived from MIN_ALLOC_STEP_SIZE + static constexpr size_t MIN_ALLOC_STEP_BITS = + bits::ctz_const(MIN_ALLOC_STEP_SIZE); + static_assert(bits::is_pow2(MIN_ALLOC_STEP_SIZE)); + + /** + * Minimum allocation size is space for two pointers. If the small sizeclass + * machinery permits smaller values (that is, if MIN_ALLOC_STEP_SIZE is + * smaller than MIN_ALLOC_SIZE), which may be useful if MIN_ALLOC_SIZE must + * be large or not a power of two, those smaller size classes will be unused. + */ + static constexpr size_t MIN_ALLOC_SIZE = +#if defined(SNMALLOC_MIN_ALLOC_SIZE) + SNMALLOC_MIN_ALLOC_SIZE; +#else + 2 * sizeof(void*); +#endif // Minimum slab size. #if defined(SNMALLOC_QEMU_WORKAROUND) && defined(SNMALLOC_VA_BITS_64) @@ -72,24 +93,92 @@ namespace snmalloc MAX_SMALL_SIZECLASS_SIZE >= MIN_CHUNK_SIZE, "Large sizes need to be representable by as a multiple of MIN_CHUNK_SIZE"); + /** + * The number of bits needed to count the number of objects within a slab. + * + * Most likely, this is achieved by the smallest sizeclass, which will have + * many more than MIN_OBJECT_COUNT objects in its slab. But, just in case, + * it's defined here and checked when we compute the sizeclass table, since + * computing this number is potentially nontrivial. + */ +#if defined(SNMALLOC_QEMU_WORKAROUND) && defined(SNMALLOC_VA_BITS_64) + static constexpr size_t MAX_CAPACITY_BITS = 13; +#else + static constexpr size_t MAX_CAPACITY_BITS = 11; +#endif + + /** + * The maximum distance between the start of two objects in the same slab. + */ + static constexpr size_t MAX_SLAB_SPAN_SIZE = + (MIN_OBJECT_COUNT - 1) * MAX_SMALL_SIZECLASS_SIZE; + static constexpr size_t MAX_SLAB_SPAN_BITS = + bits::next_pow2_bits_const(MAX_SLAB_SPAN_SIZE); + // Number of slots for remote deallocation. static constexpr size_t REMOTE_SLOT_BITS = 8; static constexpr size_t REMOTE_SLOTS = 1 << REMOTE_SLOT_BITS; static constexpr size_t REMOTE_MASK = REMOTE_SLOTS - 1; +#if defined(SNMALLOC_DEALLOC_BATCH_RING_ASSOC) + static constexpr size_t DEALLOC_BATCH_RING_ASSOC = + SNMALLOC_DEALLOC_BATCH_RING_ASSOC; +#else +# if defined(__has_cpp_attribute) +# if ( \ + __has_cpp_attribute(msvc::no_unique_address) && \ + (__cplusplus >= 201803L || _MSVC_LANG >= 201803L)) || \ + __has_cpp_attribute(no_unique_address) + // For C++20 or later, we do have [[no_unique_address]] and so can also do + // batching if we aren't turning on the backward-pointer mitigations + static constexpr size_t DEALLOC_BATCH_MIN_ALLOC_WORDS = + mitigations(freelist_backward_edge) ? 4 : 2; +# else + // For C++17, we don't have [[no_unique_address]] and so we always end up + // needing all four pointers' worth of space (because BatchedRemoteMessage has + // two freelist::Object::T<> links within, each of which will have two fields + // and will be padded to two pointers). + static constexpr size_t DEALLOC_BATCH_MIN_ALLOC_WORDS = 4; +# endif +# else + // If we don't even have the feature test macro, we're C++17 or earlier. + static constexpr size_t DEALLOC_BATCH_MIN_ALLOC_WORDS = 4; +# endif + + static constexpr size_t DEALLOC_BATCH_RING_ASSOC = + (MIN_ALLOC_SIZE >= (DEALLOC_BATCH_MIN_ALLOC_WORDS * sizeof(void*))) ? 2 : 0; +#endif + +#if defined(SNMALLOC_DEALLOC_BATCH_RING_SET_BITS) + static constexpr size_t DEALLOC_BATCH_RING_SET_BITS = + SNMALLOC_DEALLOC_BATCH_RING_SET_BITS; +#else + static constexpr size_t DEALLOC_BATCH_RING_SET_BITS = 3; +#endif + + static constexpr size_t DEALLOC_BATCH_RINGS = + DEALLOC_BATCH_RING_ASSOC * bits::one_at_bit(DEALLOC_BATCH_RING_SET_BITS); + static_assert( - INTERMEDIATE_BITS < MIN_ALLOC_BITS, + INTERMEDIATE_BITS < MIN_ALLOC_STEP_BITS, "INTERMEDIATE_BITS must be less than MIN_ALLOC_BITS"); static_assert( MIN_ALLOC_SIZE >= (sizeof(void*) * 2), "MIN_ALLOC_SIZE must be sufficient for two pointers"); + static_assert( + 1 << (INTERMEDIATE_BITS + MIN_ALLOC_STEP_BITS) >= + bits::next_pow2_const(MIN_ALLOC_SIZE), + "Entire sizeclass exponent is below MIN_ALLOC_SIZE; adjust STEP_SIZE"); + static_assert( + MIN_ALLOC_SIZE >= MIN_ALLOC_STEP_SIZE, + "Minimum alloc sizes below minimum step size; raise MIN_ALLOC_SIZE"); // Return remote small allocs when the local cache reaches this size. static constexpr int64_t REMOTE_CACHE = #ifdef USE_REMOTE_CACHE USE_REMOTE_CACHE #else - 1 << MIN_CHUNK_BITS + MIN_CHUNK_SIZE #endif ; diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds/combininglock.h b/3rdparty/exported/snmalloc/src/snmalloc/ds/combininglock.h new file mode 100644 index 000000000000..89a4bc258e0b --- /dev/null +++ b/3rdparty/exported/snmalloc/src/snmalloc/ds/combininglock.h @@ -0,0 +1,296 @@ +#pragma once + +#include "../aal/aal.h" +#include "../pal/pal.h" + +#include + +namespace snmalloc +{ + class CombiningLockNode; + + struct CombiningLock + { + // Fast path lock incase there is no contention. + std::atomic flag{false}; + + // MCS queue of work items + std::atomic last{nullptr}; + + void release() + { + flag.store(false, std::memory_order_release); + } + }; + + /** + * @brief Combinations of MCS queue lock with Flat Combining + * + * Each element in the queue has a pointer to a work item. + * This means when under contention the thread holding the lock + * can perform the work. + * + * As the work items are arbitrary lambdas there are no simplifications + * for combining related work items. I.e. original Flat Combining paper + * might sort a collection of inserts, and perform them in a single traversal. + * + * Note that, we should perhaps add a Futex/WakeOnAddress mode to improve + * performance in the contended case, rather than spinning. + */ + class CombiningLockNode + { + template + static constexpr bool use_wait_on_address = + pal_supports && + SNMALLOC_USE_WAIT_ON_ADDRESS; + + template + struct WaitWordTypeSelect; + + template + struct WaitWordTypeSelect + { + using type = typename Pal::WaitingWord; + }; + + template + struct WaitWordTypeSelect + { + using type = int; + }; + + using WaitingWordType = + typename WaitWordTypeSelect, DefaultPal>:: + type; + + template + friend class CombiningLockNodeTempl; + + enum class LockStatus : WaitingWordType + { + // The work for this node has not been completed. + WAITING, + + // The work for this thread has been completed, and it is not the + // last element in the queue. + DONE, + + // The work for this thread has not been completed, and it is the + // head of the queue. + HEAD, + + // The waiter is currently sleeping. + SLEEPING + }; + + // Status of the queue, set by the thread at the head of the queue, + // When it makes the thread for this node either the head of the queue + // or completes its work. + std::atomic status{LockStatus::WAITING}; + + // Used to store the queue + std::atomic next{nullptr}; + + // Stores the C++ lambda associated with this node in the queue. + void (*f_raw)(CombiningLockNode*); + + constexpr CombiningLockNode(void (*f)(CombiningLockNode*)) : f_raw(f) {} + + void set_status(LockStatus s) + { + status.store(s, std::memory_order_release); + } + + template + static void wake(CombiningLockNode* node, LockStatus message) + { + if constexpr (!use_wait_on_address) + { + node->set_status(message); + } + else + { + if ( + node->status.exchange(message, std::memory_order_acq_rel) == + LockStatus::SLEEPING) + { + Pal::notify_one_on_address(node->status); + } + } + } + + template + void wait() + { + if constexpr (!use_wait_on_address) + { + while (status.load(std::memory_order_acquire) == LockStatus::WAITING) + Aal::pause(); + } + else + { + int remaining = 100; + while (remaining > 0) + { + if (status.load(std::memory_order_acquire) != LockStatus::WAITING) + return; + Aal::pause(); + remaining--; + } + LockStatus expected = LockStatus::WAITING; + if (status.compare_exchange_strong( + expected, LockStatus::SLEEPING, std::memory_order_acq_rel)) + { + Pal::wait_on_address(status, LockStatus::SLEEPING); + } + } + } + + SNMALLOC_SLOW_PATH void attach_slow(CombiningLock& lock) + { + // There is contention for the lock, we need to add our work to the + // queue of pending work + auto prev = lock.last.exchange(this, std::memory_order_acq_rel); + + if (prev != nullptr) + { + // If we aren't the head, link into predecessor + prev->next.store(this, std::memory_order_release); + + // Wait to for predecessor to complete + wait(); + + // Determine if another thread completed our work. + if (status.load(std::memory_order_acquire) == LockStatus::DONE) + return; + } + else + { + // We are the head of the queue. Spin until we acquire the fast path + // lock. As we are in the queue future requests shouldn't try to + // acquire the fast path lock, but stale views of the queue being empty + // could still be concurrent with this thread. + while (lock.flag.exchange(true, std::memory_order_acquire)) + { + while (lock.flag.load(std::memory_order_relaxed)) + { + Aal::pause(); + } + } + + // We could set + // status = LockStatus::HEAD + // However, the subsequent state assumes it is HEAD, and + // nothing would read it. + } + + // We are the head of the queue, and responsible for + // waking/performing our and subsequent work. + auto curr = this; + while (true) + { + // Start pulling in the next element of the queue + auto n = curr->next.load(std::memory_order_acquire); + Aal::prefetch(n); + + // Perform work for head of the queue + curr->f_raw(curr); + + // Determine if there are more elements. + n = curr->next.load(std::memory_order_acquire); + if (n == nullptr) + break; + // Signal this work was completed and move on to + // next item. + wake(curr, LockStatus::DONE); + curr = n; + } + + // This could be the end of the queue, attempt to close the + // queue. + auto curr_c = curr; + if (lock.last.compare_exchange_strong( + curr_c, + nullptr, + std::memory_order_release, + std::memory_order_relaxed)) + { + // Queue was successfully closed. + // Notify last element the work was completed. + wake(curr, LockStatus::DONE); + lock.release(); + return; + } + + // Failed to close the queue wait for next thread to be + // added. + while (curr->next.load(std::memory_order_relaxed) == nullptr) + Aal::pause(); + + auto n = curr->next.load(std::memory_order_acquire); + + // As we had to wait, give the job to the next thread + // to carry on performing the work. + wake(n, LockStatus::HEAD); + + // Notify the thread that we completed its work. + // Note that this needs to be before setting curr->status, + // as after the status is set the thread may deallocate the + // queue node. + wake(curr, LockStatus::DONE); + return; + } + }; + + template + class CombiningLockNodeTempl : CombiningLockNode + { + template + friend void with(CombiningLock&, FF&&); + + // This holds the closure for the lambda + F f; + + CombiningLockNodeTempl(CombiningLock& lock, F&& f_) + : CombiningLockNode([](CombiningLockNode* self) { + CombiningLockNodeTempl* self_templ = + reinterpret_cast(self); + self_templ->f(); + }), + f(std::forward(f_)) + { + attach_slow(lock); + } + }; + + /** + * Lock primitive. This takes a reference to a Lock, and a thunk to + * call when the lock is available. The thunk should be independent of + * the current thread as the thunk may be executed by a different thread. + */ + template + inline void with(CombiningLock& lock, F&& f) + { + // Test if no one is waiting + if (SNMALLOC_LIKELY(lock.last.load(std::memory_order_relaxed) == nullptr)) + { + // No one was waiting so low contention. Attempt to acquire the flag + // lock. + if (SNMALLOC_LIKELY( + lock.flag.exchange(true, std::memory_order_acquire) == false)) + { + // We grabbed the lock. + // Execute the thunk. + f(); + + // Release the lock + lock.release(); + return; + } + } + + // There is contention for the lock, we need to take the slow path + // with the queue. + CombiningLockNodeTempl node(lock, std::forward(f)); + } +} // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds/ds.h b/3rdparty/exported/snmalloc/src/snmalloc/ds/ds.h index 4cfa22b9b9d3..a26eb20dec9f 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/ds/ds.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/ds/ds.h @@ -6,6 +6,7 @@ #include "../pal/pal.h" #include "aba.h" #include "allocconfig.h" +#include "combininglock.h" #include "entropy.h" #include "flaglock.h" #include "mpmcstack.h" diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds/flaglock.h b/3rdparty/exported/snmalloc/src/snmalloc/ds/flaglock.h index 4a539e636078..5463504858f3 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/ds/flaglock.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/ds/flaglock.h @@ -4,7 +4,6 @@ #include "../pal/pal.h" #include -#include namespace snmalloc { @@ -93,7 +92,9 @@ namespace snmalloc {} void set_owner() {} + void clear_owner() {} + void assert_not_owned_by_current_thread() {} }; @@ -133,4 +134,11 @@ namespace snmalloc lock.flag.store(false, std::memory_order_release); } }; + + template + inline void with(FlagWord& lock, F&& f) + { + FlagLock l(lock); + f(); + } } // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds/mpmcstack.h b/3rdparty/exported/snmalloc/src/snmalloc/ds/mpmcstack.h index cd005e9bf00a..e6a3b1d9f604 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/ds/mpmcstack.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/ds/mpmcstack.h @@ -4,12 +4,6 @@ #include "aba.h" #include "allocconfig.h" -#if defined(__has_feature) -# if __has_feature(thread_sanitizer) -# define SNMALLOC_THREAD_SANITIZER_ENABLED -# endif -#endif - namespace snmalloc { template diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds/pagemap.h b/3rdparty/exported/snmalloc/src/snmalloc/ds/pagemap.h index 267fe9a0b30c..d8636f67f011 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/ds/pagemap.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/ds/pagemap.h @@ -1,5 +1,7 @@ #pragma once +#include "../ds_core/ds_core.h" + namespace snmalloc { /** @@ -66,6 +68,10 @@ namespace snmalloc auto page_end = pointer_align_up(last); size_t using_size = pointer_diff(page_start, page_end); PAL::template notify_using(page_start, using_size); + if constexpr (pal_supports) + { + PAL::notify_do_dump(page_start, using_size); + } } constexpr FlatPagemap() = default; @@ -179,11 +185,23 @@ namespace snmalloc // Allocate a power of two extra to allow the placement of the // pagemap be difficult to guess if randomize_position set. size_t additional_size = +#ifdef SNMALLOC_THREAD_SANITIZER_ENABLED + // When running with TSAN we failed to allocate the very large range + // randomly + randomize_position ? bits::next_pow2(REQUIRED_SIZE) : 0; +#else randomize_position ? bits::next_pow2(REQUIRED_SIZE) * 4 : 0; +#endif size_t request_size = REQUIRED_SIZE + additional_size; auto new_body_untyped = PAL::reserve(request_size); + if constexpr (pal_supports) + { + // Pagemap should not be in core dump except where it is non-zero. + PAL::notify_do_not_dump(new_body_untyped, request_size); + } + if (new_body_untyped == nullptr) { PAL::error("Failed to initialise snmalloc."); diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds/singleton.h b/3rdparty/exported/snmalloc/src/snmalloc/ds/singleton.h index c85635d39f24..174128e77e67 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/ds/singleton.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/ds/singleton.h @@ -3,9 +3,7 @@ #include "../ds_core/ds_core.h" #include "flaglock.h" -#include #include -#include #include namespace snmalloc @@ -35,14 +33,15 @@ namespace snmalloc if (SNMALLOC_UNLIKELY(!initialised.load(std::memory_order_acquire))) { - FlagLock lock(flag); - if (!initialised) - { - init(&obj); - initialised.store(true, std::memory_order_release); - if (first != nullptr) - *first = true; - } + with(flag, [&]() { + if (!initialised) + { + init(&obj); + initialised.store(true, std::memory_order_release); + if (first != nullptr) + *first = true; + } + }); } return obj; } diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/bits.h b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/bits.h index b82ee846e318..b192c8275239 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/bits.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/bits.h @@ -45,11 +45,12 @@ namespace snmalloc static constexpr size_t BITS = sizeof(size_t) * CHAR_BIT; /** - * Returns a value of type T that has a single bit set, + * Returns a value of type T that has a single bit set at the given index, + * with 0 being the least significant bit. * - * S is a template parameter because callers use either `int` or `size_t` - * and either is valid to represent a number in the range 0-63 (or 0-127 if - * we want to use `__uint128_t` as `T`). + * S, the type of the bit index, is a template parameter because callers + * use either `int` or `size_t` and either is valid to represent a number in + * the range 0-63 (or 0-127 if we want to use `__uint128_t` as `T`). */ template constexpr T one_at_bit(S shift) @@ -59,6 +60,19 @@ namespace snmalloc return (static_cast(1)) << shift; } + /** + * Returns a value of type T that has its n LSBs all set. + * + * S is a template parameter because callers use either `int` or `size_t` + * and either is valid to represent a number in the range 0-63 (or 0-127 if + * we want to use `__uint128_t` as `T`). + */ + template + constexpr T mask_bits(S n) + { + return one_at_bit(n) - 1; + } + inline SNMALLOC_FAST_PATH size_t clz(size_t x) { SNMALLOC_ASSERT(x != 0); // Calling with 0 is UB on some implementations @@ -158,7 +172,11 @@ namespace snmalloc SNMALLOC_ASSERT(x != 0); // Calling with 0 is UB on some implementations #if defined(_MSC_VER) && !defined(__clang__) -# ifdef _WIN64 +# if defined(_M_ARM64) || defined(_M_ARM64EC) + unsigned long n = 0; + _BitScanForward64(&n, static_cast(x)); + return static_cast(n); +# elif defined(_WIN64) return _tzcnt_u64(static_cast(x)); # else return _tzcnt_u32(static_cast(x)); @@ -203,7 +221,12 @@ namespace snmalloc overflow = __builtin_mul_overflow(x, y, &prod); return prod; #elif defined(_MSC_VER) -# ifdef _WIN64 +# if defined(_M_ARM64) || defined(_M_ARM64EC) + size_t high_prod = __umulh(x, y); + size_t prod = x * y; + overflow = high_prod != 0; + return prod; +# elif defined(_WIN64) size_t high_prod; size_t prod = _umul128(x, y, &high_prod); overflow = high_prod != 0; @@ -313,27 +336,11 @@ namespace snmalloc * * Does not work for value=0. ***********************************************/ - template - static size_t to_exp_mant(size_t value) - { - constexpr size_t LEADING_BIT = one_at_bit(MANTISSA_BITS + LOW_BITS) >> 1; - constexpr size_t MANTISSA_MASK = one_at_bit(MANTISSA_BITS) - 1; - - value = value - 1; - - size_t e = - bits::BITS - MANTISSA_BITS - LOW_BITS - clz(value | LEADING_BIT); - size_t b = (e == 0) ? 0 : 1; - size_t m = (value >> (LOW_BITS + e - b)) & MANTISSA_MASK; - - return (e << MANTISSA_BITS) + m; - } - template constexpr size_t to_exp_mant_const(size_t value) { constexpr size_t LEADING_BIT = one_at_bit(MANTISSA_BITS + LOW_BITS) >> 1; - constexpr size_t MANTISSA_MASK = one_at_bit(MANTISSA_BITS) - 1; + constexpr size_t MANTISSA_MASK = mask_bits(MANTISSA_BITS); value = value - 1; @@ -351,7 +358,7 @@ namespace snmalloc if (MANTISSA_BITS > 0) { m_e = m_e + 1; - constexpr size_t MANTISSA_MASK = one_at_bit(MANTISSA_BITS) - 1; + constexpr size_t MANTISSA_MASK = mask_bits(MANTISSA_BITS); size_t m = m_e & MANTISSA_MASK; size_t e = m_e >> MANTISSA_BITS; size_t b = e == 0 ? 0 : 1; diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/defines.h b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/defines.h index 2de53be036e8..d50939ad00e4 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/defines.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/defines.h @@ -17,7 +17,7 @@ * `inline` and complains if you specify `SNMALLOC_FAST_PATH` and `inline`. */ # define SNMALLOC_FAST_PATH_INLINE ALWAYSINLINE -# if _MSC_VER >= 1927 && !defined(SNMALLOC_USE_CXX17) +# if _MSC_VER >= 1927 && _MSVC_LANG > 201703L # define SNMALLOC_FAST_PATH_LAMBDA [[msvc::forceinline]] # else # define SNMALLOC_FAST_PATH_LAMBDA @@ -27,11 +27,6 @@ # define SNMALLOC_REQUIRE_CONSTINIT # define SNMALLOC_UNUSED_FUNCTION # define SNMALLOC_USED_FUNCTION -# ifdef SNMALLOC_USE_CXX17 -# define SNMALLOC_NO_UNIQUE_ADDRESS -# else -# define SNMALLOC_NO_UNIQUE_ADDRESS [[msvc::no_unique_address]] -# endif #else # define SNMALLOC_FAST_FAIL() __builtin_trap() # define SNMALLOC_LIKELY(x) __builtin_expect(!!(x), 1) @@ -55,11 +50,6 @@ # define SNMALLOC_COLD __attribute__((cold)) # define SNMALLOC_UNUSED_FUNCTION __attribute((unused)) # define SNMALLOC_USED_FUNCTION __attribute((used)) -# ifdef SNMALLOC_USE_CXX17 -# define SNMALLOC_NO_UNIQUE_ADDRESS -# else -# define SNMALLOC_NO_UNIQUE_ADDRESS [[no_unique_address]] -# endif # ifdef __clang__ # define SNMALLOC_REQUIRE_CONSTINIT \ [[clang::require_constant_initialization]] @@ -68,6 +58,27 @@ # endif #endif +/* + * Try to find the right "no_unique_address" attribute for our use, assuming one + * exists. + * + * Different compiler versions and ABIs make this a right pain; see, for + * example, https://github.com/llvm/llvm-project/issues/49358 and + * https://devblogs.microsoft.com/cppblog/msvc-cpp20-and-the-std-cpp20-switch/ . + */ +#if defined(__has_cpp_attribute) +# if __has_cpp_attribute(msvc::no_unique_address) && \ + (__cplusplus >= 201803L || _MSVC_LANG >= 201803L) +# define SNMALLOC_NO_UNIQUE_ADDRESS [[msvc::no_unique_address]] +# elif __has_cpp_attribute(no_unique_address) +# define SNMALLOC_NO_UNIQUE_ADDRESS [[no_unique_address]] +# else +# define SNMALLOC_NO_UNIQUE_ADDRESS +# endif +#else +# define SNMALLOC_NO_UNIQUE_ADDRESS +#endif + #if defined(__cpp_constinit) && __cpp_constinit >= 201907 # define SNMALLOC_CONSTINIT_FN constinit # define SNMALLOC_CONSTINIT_STATIC constinit const @@ -183,6 +194,15 @@ namespace snmalloc # endif #endif +// Used to suppress pattern filling for potentially unintialized variables with +// automatic storage duration. +// https://clang.llvm.org/docs/AttributeReference.html#uninitialized +#ifdef __clang__ +# define SNMALLOC_UNINITIALISED [[clang::uninitialized]] +#else +# define SNMALLOC_UNINITIALISED +#endif + namespace snmalloc { /** diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/helpers.h b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/helpers.h index 61fcee9545d6..aedb72f4b348 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/helpers.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/helpers.h @@ -4,8 +4,8 @@ #include #include -#include #include +#include #include namespace snmalloc @@ -96,6 +96,7 @@ namespace snmalloc */ template struct function_ref; + template struct function_ref { @@ -324,7 +325,7 @@ namespace snmalloc } std::array buf{{0}}; const char digits[] = "0123456789"; - for (long i = long(buf.size() - 1); i >= 0; i--) + for (long i = static_cast(buf.size() - 1); i >= 0; i--) { buf[static_cast(i)] = digits[s % 10]; s /= 10; @@ -356,7 +357,7 @@ namespace snmalloc const char hexdigits[] = "0123456789abcdef"; // Length of string including null terminator static_assert(sizeof(hexdigits) == 0x11); - for (long i = long(buf.size() - 1); i >= 0; i--) + for (long i = static_cast(buf.size() - 1); i >= 0; i--) { buf[static_cast(i)] = hexdigits[s & 0xf]; s >>= 4; diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/mitigations.h b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/mitigations.h index 88547dcc7de4..2370f2951d0f 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/mitigations.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/mitigations.h @@ -247,10 +247,10 @@ namespace snmalloc */ full_checks + cheri_checks + clear_meta - freelist_forward_edge - pal_enforce_access : - /** - * clear_meta is important on CHERI to avoid leaking capabilities. - */ - sanity_checks + cheri_checks + clear_meta; + /** + * clear_meta is important on CHERI to avoid leaking capabilities. + */ + sanity_checks + cheri_checks + clear_meta; #else CHECK_CLIENT ? full_checks : no_checks; #endif diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/redblacktree.h b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/redblacktree.h index df1fb9410661..77ca6e50d4ef 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/redblacktree.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/redblacktree.h @@ -3,7 +3,6 @@ #include #include #include -#include namespace snmalloc { @@ -17,11 +16,10 @@ namespace snmalloc * ID. */ template - concept RBRepTypes = requires() - { - typename Rep::Handle; - typename Rep::Contents; - }; + concept RBRepTypes = requires() { + typename Rep::Handle; + typename Rep::Contents; + }; /** * The representation must define operations on the holder and contents @@ -41,50 +39,38 @@ namespace snmalloc */ template concept RBRepMethods = - requires(typename Rep::Handle hp, typename Rep::Contents k, bool b) - { - { - Rep::get(hp) - } - ->ConceptSame; - { - Rep::set(hp, k) - } - ->ConceptSame; - { - Rep::is_red(k) - } - ->ConceptSame; - { - Rep::set_red(k, b) - } - ->ConceptSame; - { - Rep::ref(b, k) - } - ->ConceptSame; - { - Rep::null - } - ->ConceptSameModRef; - { - typename Rep::Handle + requires(typename Rep::Handle hp, typename Rep::Contents k, bool b) { + { + Rep::get(hp) + } -> ConceptSame; { - const_cast< + Rep::set(hp, k) + } -> ConceptSame; + { + Rep::is_red(k) + } -> ConceptSame; + { + Rep::set_red(k, b) + } -> ConceptSame; + { + Rep::ref(b, k) + } -> ConceptSame; + { + Rep::null + } -> ConceptSameModRef; + { + typename Rep::Handle{const_cast< std::remove_const_t>*>( - &Rep::root) - } - } - ->ConceptSame; - }; + &Rep::root)} + } -> ConceptSame; + }; template concept RBRep = // RBRepTypes // - && RBRepMethods // - && ConceptSame< - decltype(Rep::null), - std::add_const_t>; + && RBRepMethods // + && + ConceptSame>; #endif /** @@ -151,6 +137,7 @@ namespace snmalloc { return ptr != t.ptr; } + ///@} bool is_null() @@ -275,7 +262,7 @@ namespace snmalloc std::array path; size_t length = 0; - RBPath(typename Rep::Handle root) : path{} + RBPath(typename Rep::Handle root) { path[0].set(root, false); length = 1; @@ -452,9 +439,27 @@ namespace snmalloc depth); if (!(get_dir(true, curr).is_null() && get_dir(false, curr).is_null())) { - auto s_indent = std::string(indent); - print(get_dir(true, curr), (s_indent + "|").c_str(), depth + 1); - print(get_dir(false, curr), (s_indent + " ").c_str(), depth + 1); + // As the tree should be balanced, the depth should not exceed 128 if + // there are 2^64 elements in the tree. This is a debug feature, and + // it would be impossible to debug something of this size, so this is + // considerably larger than required. + // If there is a bug that leads to an unbalanced tree, this might be + // insufficient to accurately display the tree, but it will still be + // memory safe as the search code is bounded by the string size. + static constexpr size_t max_depth = 128; + char s_indent[max_depth]; + size_t end = 0; + for (; end < max_depth - 1; end++) + { + if (indent[end] == 0) + break; + s_indent[end] = indent[end]; + } + s_indent[end] = '|'; + s_indent[end + 1] = 0; + print(get_dir(true, curr), s_indent, depth + 1); + s_indent[end] = ' '; + print(get_dir(false, curr), s_indent, depth + 1); } } } @@ -490,8 +495,7 @@ namespace snmalloc */ path.move(true); while (path.move(false)) - { - } + {} K curr = path.curr(); @@ -510,8 +514,8 @@ namespace snmalloc // If we had a left child, replace ourselves with the extracted value // from above Rep::set_red(curr, Rep::is_red(splice)); - get_dir(true, curr) = K(get_dir(true, splice)); - get_dir(false, curr) = K(get_dir(false, splice)); + get_dir(true, curr) = K{get_dir(true, splice)}; + get_dir(false, curr) = K{get_dir(false, splice)}; splice = curr; path.fixup(); } @@ -742,8 +746,7 @@ namespace snmalloc auto path = get_root_path(); while (path.move(true)) - { - } + {} K result = path.curr(); diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/seqset.h b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/seqset.h index 600ec07df0a5..e493fbba0b35 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/seqset.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/seqset.h @@ -34,6 +34,9 @@ namespace snmalloc constexpr Node(Node* next, Node* prev) : next(next), prev(prev) {} public: + /// Default constructor, creates an invalid node. + constexpr Node() : Node(nullptr, nullptr) {} + void invariant() { SNMALLOC_ASSERT(next != nullptr); diff --git a/3rdparty/exported/snmalloc/src/snmalloc/global/global.h b/3rdparty/exported/snmalloc/src/snmalloc/global/global.h index a2f1159a14ee..514d69b7c61b 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/global/global.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/global/global.h @@ -1,4 +1,5 @@ #include "bounds_checks.h" +#include "libc.h" #include "memcpy.h" #include "scopedalloc.h" #include "threadalloc.h" diff --git a/3rdparty/exported/snmalloc/src/snmalloc/global/libc.h b/3rdparty/exported/snmalloc/src/snmalloc/global/libc.h new file mode 100644 index 000000000000..2a6db38a3892 --- /dev/null +++ b/3rdparty/exported/snmalloc/src/snmalloc/global/libc.h @@ -0,0 +1,191 @@ +#pragma once + +#include "threadalloc.h" + +#include +#include + +namespace snmalloc::libc +{ + SNMALLOC_SLOW_PATH inline void* set_error(int err = ENOMEM) + { + errno = err; + return nullptr; + } + + SNMALLOC_SLOW_PATH inline int set_error_and_return(int err = ENOMEM) + { + errno = err; + return err; + } + + inline void* __malloc_end_pointer(void* ptr) + { + return ThreadAlloc::get().external_pointer(ptr); + } + + SNMALLOC_FAST_PATH_INLINE void* malloc(size_t size) + { + return ThreadAlloc::get().alloc(size); + } + + SNMALLOC_FAST_PATH_INLINE void free(void* ptr) + { + ThreadAlloc::get().dealloc(ptr); + } + + SNMALLOC_FAST_PATH_INLINE void free_sized(void* ptr, size_t size) + { + ThreadAlloc::get().dealloc(ptr, size); + } + + SNMALLOC_FAST_PATH_INLINE void* calloc(size_t nmemb, size_t size) + { + bool overflow = false; + size_t sz = bits::umul(size, nmemb, overflow); + if (SNMALLOC_UNLIKELY(overflow)) + { + return set_error(); + } + return ThreadAlloc::get().alloc(sz); + } + + SNMALLOC_FAST_PATH_INLINE void* realloc(void* ptr, size_t size) + { + auto& a = ThreadAlloc::get(); + size_t sz = a.alloc_size(ptr); + // Keep the current allocation if the given size is in the same sizeclass. + if (sz == round_size(size)) + { +#ifdef SNMALLOC_PASS_THROUGH + // snmallocs alignment guarantees can be broken by realloc in pass-through + // this is not exercised, by existing clients, but is tested. + if (pointer_align_up(ptr, natural_alignment(size)) == ptr) + return ptr; +#else + return ptr; +#endif + } + + void* p = a.alloc(size); + if (SNMALLOC_LIKELY(p != nullptr)) + { + sz = bits::min(size, sz); + // Guard memcpy as GCC is assuming not nullptr for ptr after the memcpy + // otherwise. + if (SNMALLOC_UNLIKELY(sz != 0)) + { + SNMALLOC_ASSUME(ptr != nullptr); + ::memcpy(p, ptr, sz); + } + a.dealloc(ptr); + } + else if (SNMALLOC_LIKELY(size == 0)) + { + a.dealloc(ptr); + } + else + { + return set_error(); + } + return p; + } + + inline size_t malloc_usable_size(const void* ptr) + { + return ThreadAlloc::get().alloc_size(ptr); + } + + inline void* reallocarray(void* ptr, size_t nmemb, size_t size) + { + bool overflow = false; + size_t sz = bits::umul(size, nmemb, overflow); + if (SNMALLOC_UNLIKELY(overflow)) + { + return set_error(); + } + return realloc(ptr, sz); + } + + inline int reallocarr(void* ptr_, size_t nmemb, size_t size) + { + int err = errno; + auto& a = ThreadAlloc::get(); + bool overflow = false; + size_t sz = bits::umul(size, nmemb, overflow); + if (SNMALLOC_UNLIKELY(sz == 0)) + { + errno = err; + return 0; + } + if (SNMALLOC_UNLIKELY(overflow)) + { + return set_error_and_return(EOVERFLOW); + } + + void** ptr = reinterpret_cast(ptr_); + void* p = a.alloc(sz); + if (SNMALLOC_UNLIKELY(p == nullptr)) + { + return set_error_and_return(ENOMEM); + } + + sz = bits::min(sz, a.alloc_size(*ptr)); + + SNMALLOC_ASSUME(*ptr != nullptr || sz == 0); + // Guard memcpy as GCC is assuming not nullptr for ptr after the memcpy + // otherwise. + if (SNMALLOC_UNLIKELY(sz != 0)) + ::memcpy(p, *ptr, sz); + errno = err; + a.dealloc(*ptr); + *ptr = p; + return 0; + } + + inline void* memalign(size_t alignment, size_t size) + { + if (SNMALLOC_UNLIKELY(alignment == 0 || !bits::is_pow2(alignment))) + { + return set_error(EINVAL); + } + + return malloc(aligned_size(alignment, size)); + } + + inline void* aligned_alloc(size_t alignment, size_t size) + { + return memalign(alignment, size); + } + + inline int posix_memalign(void** memptr, size_t alignment, size_t size) + { + if (SNMALLOC_UNLIKELY( + (alignment < sizeof(uintptr_t) || !bits::is_pow2(alignment)))) + { + return EINVAL; + } + + void* p = memalign(alignment, size); + if (SNMALLOC_UNLIKELY(p == nullptr)) + { + if (size != 0) + return ENOMEM; + } + *memptr = p; + return 0; + } + + inline typename snmalloc::Alloc::Config::ClientMeta::DataRef + get_client_meta_data(void* p) + { + return ThreadAlloc::get().get_client_meta_data(p); + } + + inline std::add_const_t + get_client_meta_data_const(void* p) + { + return ThreadAlloc::get().get_client_meta_data_const(p); + } + +} // namespace snmalloc::libc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/global/memcpy.h b/3rdparty/exported/snmalloc/src/snmalloc/global/memcpy.h index f4996f6097eb..51a87270aca4 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/global/memcpy.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/global/memcpy.h @@ -1,5 +1,4 @@ #pragma once -#include "../backend/globalconfig.h" #include "bounds_checks.h" namespace snmalloc @@ -23,6 +22,7 @@ namespace snmalloc { char data[Size]; }; + auto* d = static_cast(dst); auto* s = static_cast(src); *d = *s; @@ -192,7 +192,8 @@ namespace snmalloc * It's not entirely clear what we would do if this were not the case. * Best not think too hard about it now. */ - static_assert(alignof(void*) == sizeof(void*)); + static_assert( + alignof(void*) == sizeof(void*)); // NOLINT(misc-redundant-expression) static constexpr size_t LargestRegisterSize = 16; @@ -254,6 +255,7 @@ namespace snmalloc { void* p[2]; }; + if (sizeof(Ptr2) <= len) { auto dp = static_cast(dst); diff --git a/3rdparty/exported/snmalloc/src/snmalloc/global/scopedalloc.h b/3rdparty/exported/snmalloc/src/snmalloc/global/scopedalloc.h index cb9f0fc8b1c0..345635a70aeb 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/global/scopedalloc.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/global/scopedalloc.h @@ -1,5 +1,4 @@ #pragma once -#include "../backend/globalconfig.h" /** * This header requires that Alloc has been defined. diff --git a/3rdparty/exported/snmalloc/src/snmalloc/global/threadalloc.h b/3rdparty/exported/snmalloc/src/snmalloc/global/threadalloc.h index d900fb27249b..7ba8ddd79ad4 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/global/threadalloc.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/global/threadalloc.h @@ -1,7 +1,5 @@ #pragma once -#include "../backend/globalconfig.h" - #if defined(SNMALLOC_EXTERNAL_THREAD_ALLOC) # define SNMALLOC_THREAD_TEARDOWN_DEFINED #endif diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/backend_concept.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/backend_concept.h index f0ed3964df9d..c7e76a15f067 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/mem/backend_concept.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/backend_concept.h @@ -2,8 +2,10 @@ #ifdef __cpp_concepts # include "../ds/ds.h" +# include "sizeclasstable.h" # include + namespace snmalloc { /** @@ -14,18 +16,15 @@ namespace snmalloc */ template concept IsReadablePagemap = - requires(address_t addr, size_t sz, const typename Pagemap::Entry& t) - { - { - Pagemap::template get_metaentry(addr) - } - ->ConceptSame; - - { - Pagemap::template get_metaentry(addr) - } - ->ConceptSame; - }; + requires(address_t addr, size_t sz, const typename Pagemap::Entry& t) { + { + Pagemap::template get_metaentry(addr) + } -> ConceptSame; + + { + Pagemap::template get_metaentry(addr) + } -> ConceptSame; + }; /** * The core of the static pagemap accessor interface: {get,set}_metadata. @@ -36,24 +35,20 @@ namespace snmalloc * set_metadata updates the entry in the pagemap. */ template - concept IsWritablePagemap = IsReadablePagemap&& requires( - address_t addr, size_t sz, const typename Pagemap::Entry& t) - { - { - Pagemap::template get_metaentry_mut(addr) - } - ->ConceptSame; - - { - Pagemap::template get_metaentry_mut(addr) - } - ->ConceptSame; - - { - Pagemap::set_metaentry(addr, sz, t) - } - ->ConceptSame; - }; + concept IsWritablePagemap = IsReadablePagemap && + requires(address_t addr, size_t sz, const typename Pagemap::Entry& t) { + { + Pagemap::template get_metaentry_mut(addr) + } -> ConceptSame; + + { + Pagemap::template get_metaentry_mut(addr) + } -> ConceptSame; + + { + Pagemap::set_metaentry(addr, sz, t) + } -> ConceptSame; + }; /** * The pagemap can also be told to commit backing storage for a range of @@ -63,13 +58,11 @@ namespace snmalloc * which combines this and the core concept, above. */ template - concept IsPagemapWithRegister = requires(capptr::Arena p, size_t sz) - { - { - Pagemap::register_range(p, sz) - } - ->ConceptSame; - }; + concept IsPagemapWithRegister = requires(capptr::Arena p, size_t sz) { + { + Pagemap::register_range(p, sz) + } -> ConceptSame; + }; /** * The full pagemap accessor interface, with all of {get,set}_metadata and @@ -81,7 +74,7 @@ namespace snmalloc */ template concept IsWritablePagemapWithRegister = - IsWritablePagemap&& IsPagemapWithRegister; + IsWritablePagemap && IsPagemapWithRegister; /** * The configuration also defines domestication (that is, the difference @@ -91,62 +84,56 @@ namespace snmalloc */ template concept IsConfigDomestication = - requires(typename Config::LocalState* ls, capptr::AllocWild ptr) - { - { - Config::capptr_domesticate(ls, ptr) - } - ->ConceptSame>; - - { - Config::capptr_domesticate(ls, ptr.template as_static()) - } - ->ConceptSame>; - }; + requires(typename Config::LocalState* ls, capptr::AllocWild ptr) { + { + Config::capptr_domesticate(ls, ptr) + } -> ConceptSame>; + + { + Config::capptr_domesticate(ls, ptr.template as_static()) + } -> ConceptSame>; + }; class CommonConfig; struct Flags; template concept IsBackend = - requires(LocalState& local_state, size_t size, uintptr_t ras) - { - { - Backend::alloc_chunk(local_state, size, ras) - } - ->ConceptSame< - std::pair, typename Backend::SlabMetadata*>>; - } - &&requires(LocalState* local_state, size_t size) - { - { - Backend::template alloc_meta_data(local_state, size) - } - ->ConceptSame>; - } - &&requires( - LocalState& local_state, - typename Backend::SlabMetadata& slab_metadata, - capptr::Alloc alloc, - size_t size) - { - { - Backend::dealloc_chunk(local_state, slab_metadata, alloc, size) - } - ->ConceptSame; - } - &&requires(address_t p) - { - { - Backend::template get_metaentry(p) - } - ->ConceptSame; - - { - Backend::template get_metaentry(p) - } - ->ConceptSame; - }; + requires( + LocalState& local_state, + size_t size, + uintptr_t ras, + sizeclass_t sizeclass) { + { + Backend::alloc_chunk(local_state, size, ras, sizeclass) + } -> ConceptSame< + std::pair, typename Backend::SlabMetadata*>>; + } && + requires(LocalState* local_state, size_t size) { + { + Backend::template alloc_meta_data(local_state, size) + } -> ConceptSame>; + } && + requires( + LocalState& local_state, + typename Backend::SlabMetadata& slab_metadata, + capptr::Alloc alloc, + size_t size, + sizeclass_t sizeclass) { + { + Backend::dealloc_chunk( + local_state, slab_metadata, alloc, size, sizeclass) + } -> ConceptSame; + } && + requires(address_t p) { + { + Backend::template get_metaentry(p) + } -> ConceptSame; + + { + Backend::template get_metaentry(p) + } -> ConceptSame; + }; /** * Config objects of type T must obey a number of constraints. They @@ -161,38 +148,39 @@ namespace snmalloc * */ template - concept IsConfig = std::is_base_of::value&& - IsPAL&& IsBackend< - typename Config::LocalState, - typename Config::PagemapEntry, - typename Config::Backend>&& requires() - { - typename Config::LocalState; - typename Config::Backend; - typename Config::PagemapEntry; - - { - Config::Options - } - ->ConceptSameModRef; - } - &&( + concept IsConfig = std::is_base_of::value && + IsPAL && + IsBackend && requires() { - Config::Options.CoreAllocIsPoolAllocated == true; - typename Config::GlobalPoolState; + typename Config::LocalState; + typename Config::Backend; + typename Config::PagemapEntry; + { - Config::pool() - } - ->ConceptSame; - } || - requires() { Config::Options.CoreAllocIsPoolAllocated == false; }); + Config::Options + } -> ConceptSameModRef; + } && + ( + requires() { + Config::Options.CoreAllocIsPoolAllocated == true; + typename Config::GlobalPoolState; + { + Config::pool() + } -> ConceptSame; + } || + requires() { + Config::Options.CoreAllocIsPoolAllocated == false; + }); /** * The lazy version of the above; please see ds_core/concept.h and use * sparingly. */ template - concept IsConfigLazy = !is_type_complete_v || IsConfig; + concept IsConfigLazy = ! + is_type_complete_v || IsConfig; } // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/corealloc.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/corealloc.h index c7fc79b72452..5b0381805839 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/mem/corealloc.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/corealloc.h @@ -48,6 +48,7 @@ namespace snmalloc */ using BackendSlabMetadata = typename Config::Backend::SlabMetadata; using PagemapEntry = typename Config::PagemapEntry; + /// }@ /** @@ -104,7 +105,7 @@ namespace snmalloc * This is the thread local structure associated to this * allocator. */ - LocalCache* attached_cache; + LocalCache* attached_cache; /** * Ticker to query the clock regularly at a lower cost. @@ -186,7 +187,7 @@ namespace snmalloc { auto slab_end = pointer_offset(bumpptr, slab_size + 1 - rsize); - auto& key = entropy.get_free_list_key(); + auto key_tweak = meta->as_key_tweak(); auto& b = meta->free_queue; @@ -197,6 +198,7 @@ namespace snmalloc { capptr::AllocFull next; }; + // The following code implements Sattolo's algorithm for generating // random cyclic permutations. This implementation is in the opposite // direction, so that the original space does not need initialising. @@ -237,13 +239,15 @@ namespace snmalloc auto curr_ptr = start_ptr; do { + auto next_ptr = curr_ptr->next; b.add( // Here begins our treatment of the heap as containing Wild pointers freelist::Object::make( capptr_to_user_address_control(curr_ptr.as_void())), - key, + freelist::Object::key_root, + key_tweak, entropy); - curr_ptr = curr_ptr->next; + curr_ptr = next_ptr; } while (curr_ptr != start_ptr); } else @@ -257,7 +261,8 @@ namespace snmalloc capptr_to_user_address_control( Aal::capptr_bound( p.as_void(), rsize))), - key, + freelist::Object::key_root, + key_tweak, entropy); p = pointer_offset(p, rsize); } while (p < slab_end); @@ -269,17 +274,18 @@ namespace snmalloc capptr::Alloc clear_slab(BackendSlabMetadata* meta, smallsizeclass_t sizeclass) { - auto& key = entropy.get_free_list_key(); + auto key_tweak = meta->as_key_tweak(); freelist::Iter<> fl; - auto more = meta->free_queue.close(fl, key); + auto more = + meta->free_queue.close(fl, freelist::Object::key_root, key_tweak); UNUSED(more); auto local_state = backend_state_ptr(); auto domesticate = [local_state](freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA { return capptr_domesticate(local_state, p); }; - capptr::Alloc p = - finish_alloc_no_zero(fl.take(key, domesticate), sizeclass); + capptr::Alloc p = finish_alloc_no_zero( + fl.take(freelist::Object::key_root, domesticate), sizeclass); // If clear_meta is requested, we should also walk the free list to clear // it. @@ -293,7 +299,7 @@ namespace snmalloc size_t count = 1; // Already taken one above. while (!fl.empty()) { - fl.take(key, domesticate); + fl.take(freelist::Object::key_root, domesticate); count++; } // Check the list contains all the elements @@ -303,13 +309,14 @@ namespace snmalloc if (more > 0) { - auto no_more = meta->free_queue.close(fl, key); + auto no_more = + meta->free_queue.close(fl, freelist::Object::key_root, key_tweak); SNMALLOC_ASSERT(no_more == 0); UNUSED(no_more); while (!fl.empty()) { - fl.take(key, domesticate); + fl.take(freelist::Object::key_root, domesticate); count++; } } @@ -321,7 +328,7 @@ namespace snmalloc #ifdef SNMALLOC_TRACING message<1024>( - "Slab {} is unused, Object sizeclass {}", + "Slab {} is unused, Object sizeclass {}", start_of_slab.unsafe_ptr(), sizeclass); #endif @@ -348,7 +355,8 @@ namespace snmalloc { if (check_slabs) { - meta->free_queue.validate(entropy.get_free_list_key(), domesticate); + meta->free_queue.validate( + freelist::Object::key_root, meta->as_key_tweak(), domesticate); } return; } @@ -368,47 +376,25 @@ namespace snmalloc get_backend_local_state(), *meta, start, - sizeclass_to_slab_size(sizeclass)); + sizeclass_to_slab_size(sizeclass), + sizeclass_t::from_small_class(sizeclass)); }); } /** - * Slow path for deallocating an object locally. - * This is either waking up a slab that was not actively being used - * by this thread, or handling the final deallocation onto a slab, - * so it can be reused by other threads. + * Very slow path for object deallocation. + * + * The object has already been returned to the slab, so all that is left to + * do is update its metadata and, if that pushes us into having too many + * unused slabs in this size class, return some. + * + * Also while here, check the time. */ - SNMALLOC_SLOW_PATH void - dealloc_local_object_slow(capptr::Alloc p, const PagemapEntry& entry) + SNMALLOC_SLOW_PATH void dealloc_local_object_meta( + const PagemapEntry& entry, BackendSlabMetadata* meta) { - // TODO: Handle message queue on this path? - - auto* meta = entry.get_slab_metadata(); - - if (meta->is_large()) - { - // Handle large deallocation here. - size_t entry_sizeclass = entry.get_sizeclass().as_large(); - size_t size = bits::one_at_bit(entry_sizeclass); - -#ifdef SNMALLOC_TRACING - message<1024>("Large deallocation: {}", size); -#else - UNUSED(size); -#endif - - // Remove from set of fully used slabs. - meta->node.remove(); - - Config::Backend::dealloc_chunk( - get_backend_local_state(), *meta, p, size); - - return; - } - smallsizeclass_t sizeclass = entry.get_sizeclass().as_small(); - UNUSED(entropy); if (meta->is_sleeping()) { // Slab has been woken up add this to the list of slabs with free space. @@ -444,25 +430,76 @@ namespace snmalloc ticker.check_tick(); } + /** + * Slow path for deallocating an object locally. + * This is either waking up a slab that was not actively being used + * by this thread, or handling the final deallocation onto a slab, + * so it can be reused by other threads. + * + * Live large objects look like slabs that need attention when they become + * free; that attention is also given here. + */ + SNMALLOC_SLOW_PATH void dealloc_local_object_slow( + capptr::Alloc p, + const PagemapEntry& entry, + BackendSlabMetadata* meta) + { + // TODO: Handle message queue on this path? + + if (meta->is_large()) + { + // Handle large deallocation here. + + // XXX: because large objects have unique metadata associated with them, + // the ring size here is one. We should probably assert that. + + size_t entry_sizeclass = entry.get_sizeclass().as_large(); + size_t size = bits::one_at_bit(entry_sizeclass); + +#ifdef SNMALLOC_TRACING + message<1024>("Large deallocation: {}", size); +#else + UNUSED(size); +#endif + + // Remove from set of fully used slabs. + meta->node.remove(); + + Config::Backend::dealloc_chunk( + get_backend_local_state(), *meta, p, size, entry.get_sizeclass()); + + return; + } + + // Not a large object; update slab metadata + dealloc_local_object_meta(entry, meta); + } + /** * Check if this allocator has messages to deallocate blocks from another * thread */ SNMALLOC_FAST_PATH bool has_messages() { - auto domesticate = [local_state = backend_state_ptr()]( - freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA { - if constexpr (Config::Options.QueueHeadsAreTame) - { - return freelist::HeadPtr::unsafe_from(p.unsafe_ptr()); - } - else - { + auto local_state = backend_state_ptr(); + auto domesticate_head = + [local_state](freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA { + if constexpr (Config::Options.QueueHeadsAreTame) + { + UNUSED(local_state); + return freelist::HeadPtr::unsafe_from(p.unsafe_ptr()); + } + else + { + return capptr_domesticate(local_state, p); + } + }; + auto domesticate_queue = + [local_state](freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA { return capptr_domesticate(local_state, p); - } - }; + }; - return !(message_queue().can_dequeue(domesticate)); + return message_queue().can_dequeue(domesticate_head, domesticate_queue); } /** @@ -478,20 +515,18 @@ namespace snmalloc SNMALLOC_FAST_PATH_LAMBDA { return capptr_domesticate(local_state, p); }; - auto cb = [this, - &need_post](freelist::HeadPtr msg) SNMALLOC_FAST_PATH_LAMBDA { -#ifdef SNMALLOC_TRACING - message<1024>("Handling remote"); -#endif - + auto cb = [this, domesticate, &need_post]( + capptr::Alloc msg) SNMALLOC_FAST_PATH_LAMBDA { auto& entry = - Config::Backend::template get_metaentry(snmalloc::address_cast(msg)); - - handle_dealloc_remote(entry, msg.as_void(), need_post); - + Config::Backend::get_metaentry(snmalloc::address_cast(msg)); + handle_dealloc_remote(entry, msg, need_post, domesticate); return true; }; +#ifdef SNMALLOC_TRACING + message<1024>("Handling remote queue before proceeding..."); +#endif + if constexpr (Config::Options.QueueHeadsAreTame) { /* @@ -523,10 +558,12 @@ namespace snmalloc * * need_post will be set to true, if capacity is exceeded. */ + template void handle_dealloc_remote( const PagemapEntry& entry, - CapPtr p, - bool& need_post) + capptr::Alloc msg, + bool& need_post, + Domesticator_queue domesticate) { // TODO this needs to not double count stats // TODO this needs to not double revoke if using MTE @@ -534,29 +571,53 @@ namespace snmalloc if (SNMALLOC_LIKELY(entry.get_remote() == public_state())) { - if (SNMALLOC_LIKELY( - dealloc_local_object_fast(entry, p.as_void(), entropy))) - return; + auto meta = entry.get_slab_metadata(); + + auto unreturned = + dealloc_local_objects_fast(msg, entry, meta, entropy, domesticate); + + /* + * dealloc_local_objects_fast has updated the free list but not updated + * the slab metadata; it falls to us to do so. It is UNLIKELY that we + * will need to take further steps, but we might. + */ + if (SNMALLOC_UNLIKELY(unreturned.template step())) + { + dealloc_local_object_slow(msg.as_void(), entry, meta); + + while (SNMALLOC_UNLIKELY(unreturned.template step())) + { + dealloc_local_object_meta(entry, meta); + } + } - dealloc_local_object_slow(p, entry); + return; } - else + + auto nelem = RemoteMessage::template ring_size( + msg, + freelist::Object::key_root, + entry.get_slab_metadata()->as_key_tweak(), + domesticate); + if ( + !need_post && + !attached_cache->remote_dealloc_cache.reserve_space(entry, nelem)) { - if ( - !need_post && - !attached_cache->remote_dealloc_cache.reserve_space(entry)) - need_post = true; - attached_cache->remote_dealloc_cache - .template dealloc( - entry.get_remote()->trunc_id(), p.as_void()); + need_post = true; } + attached_cache->remote_dealloc_cache + .template forward( + entry.get_remote()->trunc_id(), msg); } /** * Initialiser, shared code between the constructors for different * configurations. + * + * spare is the amount of space directly after the allocator that is + * reserved as meta-data, but is not required by this CoreAllocator. */ - void init() + void init(Range& spare) { #ifdef SNMALLOC_TRACING message<1024>("Making an allocator."); @@ -566,6 +627,20 @@ namespace snmalloc // This must occur before any freelists are constructed. entropy.init(); + if (spare.length != 0) + { + /* + * Seed this frontend's private metadata allocation cache with any + * excess space from the metadata allocation holding the frontend + * Allocator object itself. This alleviates thundering herd + * contention on the backend during startup: each slab opened now + * makes one trip to the backend, for the slab itself, rather than + * two, for the slab and its metadata. + */ + Config::Backend::dealloc_meta_data( + get_backend_local_state(), spare.base, spare.length); + } + // Ignoring stats for now. // stats().start(); @@ -574,49 +649,41 @@ namespace snmalloc init_message_queue(); message_queue().invariant(); } - - if constexpr (DEBUG) - { - for (smallsizeclass_t i = 0; i < NUM_SMALL_SIZECLASSES; i++) - { - size_t size = sizeclass_to_size(i); - smallsizeclass_t sc1 = size_to_sizeclass(size); - smallsizeclass_t sc2 = size_to_sizeclass_const(size); - size_t size1 = sizeclass_to_size(sc1); - size_t size2 = sizeclass_to_size(sc2); - - SNMALLOC_CHECK(sc1 == i); - SNMALLOC_CHECK(sc1 == sc2); - SNMALLOC_CHECK(size1 == size); - SNMALLOC_CHECK(size1 == size2); - } - } } public: /** * Constructor for the case that the core allocator owns the local state. * SFINAE disabled if the allocator does not own the local state. + * + * spare is the amount of space directly after the allocator that is + * reserved as meta-data, but is not required by this CoreAllocator. */ template< typename Config_ = Config, typename = std::enable_if_t> - CoreAllocator(LocalCache* cache) : attached_cache(cache) + CoreAllocator(Range& spare) { - init(); + init(spare); } /** * Constructor for the case that the core allocator does not owns the local * state. SFINAE disabled if the allocator does own the local state. + * + * spare is the amount of space directly after the allocator that is + * reserved as meta-data, but is not required by this CoreAllocator. */ template< typename Config_ = Config, typename = std::enable_if_t> - CoreAllocator(LocalCache* cache, LocalState* backend = nullptr) + CoreAllocator( + Range& spare, + LocalCache* cache, + LocalState* backend = nullptr) : backend_state(backend), attached_cache(cache) { - init(); + init(spare); } /** @@ -642,7 +709,7 @@ namespace snmalloc // stats().remote_post(); // TODO queue not in line! bool sent_something = attached_cache->remote_dealloc_cache - .post( + .template post( backend_state_ptr(), public_state()->trunc_id()); return sent_something; @@ -661,26 +728,33 @@ namespace snmalloc return handle_message_queue_inner(action, args...); } + SNMALLOC_FAST_PATH void dealloc_local_object( + CapPtr p, + const typename Config::PagemapEntry& entry) + { + auto meta = entry.get_slab_metadata(); + + if (SNMALLOC_LIKELY(dealloc_local_object_fast(p, entry, meta, entropy))) + return; + + dealloc_local_object_slow(p, entry, meta); + } + SNMALLOC_FAST_PATH void dealloc_local_object(CapPtr p) { // PagemapEntry-s seen here are expected to have meaningful Remote // pointers - auto& entry = - Config::Backend::template get_metaentry(snmalloc::address_cast(p)); - if (SNMALLOC_LIKELY(dealloc_local_object_fast(entry, p, entropy))) - return; - - dealloc_local_object_slow(p, entry); + dealloc_local_object( + p, Config::Backend::get_metaentry(snmalloc::address_cast(p))); } SNMALLOC_FAST_PATH static bool dealloc_local_object_fast( - const PagemapEntry& entry, CapPtr p, + const PagemapEntry& entry, + BackendSlabMetadata* meta, LocalEntropy& entropy) { - auto meta = entry.get_slab_metadata(); - SNMALLOC_ASSERT(!meta->is_unused()); snmalloc_check_client( @@ -690,14 +764,49 @@ namespace snmalloc auto cp = p.as_static>(); - auto& key = entropy.get_free_list_key(); - // Update the head and the next pointer in the free list. - meta->free_queue.add(cp, key, entropy); + meta->free_queue.add( + cp, freelist::Object::key_root, meta->as_key_tweak(), entropy); return SNMALLOC_LIKELY(!meta->return_object()); } + template + SNMALLOC_FAST_PATH static auto dealloc_local_objects_fast( + capptr::Alloc msg, + const PagemapEntry& entry, + BackendSlabMetadata* meta, + LocalEntropy& entropy, + Domesticator domesticate) + { + SNMALLOC_ASSERT(!meta->is_unused()); + + snmalloc_check_client( + mitigations(sanity_checks), + is_start_of_object(entry.get_sizeclass(), address_cast(msg)), + "Not deallocating start of an object"); + + size_t objsize = sizeclass_full_to_size(entry.get_sizeclass()); + + auto [curr, length] = RemoteMessage::template open_free_ring( + msg, + objsize, + freelist::Object::key_root, + meta->as_key_tweak(), + domesticate); + + // Update the head and the next pointer in the free list. + meta->free_queue.append_segment( + curr, + msg.template as_reinterpret>(), + length, + freelist::Object::key_root, + meta->as_key_tweak(), + entropy); + + return meta->return_objects(length); + } + template SNMALLOC_SLOW_PATH capptr::Alloc small_alloc(smallsizeclass_t sizeclass, freelist::Iter<>& fast_free_list) @@ -781,7 +890,8 @@ namespace snmalloc get_backend_local_state(), slab_size, PagemapEntry::encode( - public_state(), sizeclass_t::from_small_class(sizeclass))); + public_state(), sizeclass_t::from_small_class(sizeclass)), + sizeclass_t::from_small_class(sizeclass)); if (slab == nullptr) { @@ -790,7 +900,7 @@ namespace snmalloc // Set meta slab to empty. meta->initialise( - sizeclass, address_cast(slab), entropy.get_free_list_key()); + sizeclass, address_cast(slab), freelist::Object::key_root); // Build a free list for the slab alloc_new_list(slab, meta, rsize, slab_size, entropy); @@ -832,19 +942,14 @@ namespace snmalloc if (destroy_queue) { - auto p_wild = message_queue().destroy(); - auto p_tame = domesticate(p_wild); - - while (p_tame != nullptr) - { + auto cb = [this, domesticate](capptr::Alloc m) { bool need_post = true; // Always going to post, so ignore. - auto n_tame = - p_tame->atomic_read_next(RemoteAllocator::key_global, domesticate); const PagemapEntry& entry = - Config::Backend::get_metaentry(snmalloc::address_cast(p_tame)); - handle_dealloc_remote(entry, p_tame.as_void(), need_post); - p_tame = n_tame; - } + Config::Backend::get_metaentry(snmalloc::address_cast(m)); + handle_dealloc_remote(entry, m, need_post, domesticate); + }; + + message_queue().destroy_and_iterate(domesticate, cb); } else { @@ -854,7 +959,7 @@ namespace snmalloc handle_message_queue([]() {}); } - auto posted = attached_cache->flush( + auto posted = attached_cache->template flush( backend_state_ptr(), [&](capptr::Alloc p) { dealloc_local_object(p); }); @@ -865,20 +970,21 @@ namespace snmalloc dealloc_local_slabs(sizeclass); } - laden.iterate([this, domesticate]( - BackendSlabMetadata* meta) SNMALLOC_FAST_PATH_LAMBDA { - if (!meta->is_large()) - { - meta->free_queue.validate(entropy.get_free_list_key(), domesticate); - } - }); + laden.iterate( + [domesticate](BackendSlabMetadata* meta) SNMALLOC_FAST_PATH_LAMBDA { + if (!meta->is_large()) + { + meta->free_queue.validate( + freelist::Object::key_root, meta->as_key_tweak(), domesticate); + } + }); return posted; } // This allows the caching layer to be attached to an underlying // allocator instance. - void attach(LocalCache* c) + void attach(LocalCache* c) { #ifdef SNMALLOC_TRACING message<1024>("Attach cache to {}", this); @@ -901,10 +1007,9 @@ namespace snmalloc */ bool debug_is_empty_impl(bool* result) { - auto& key = entropy.get_free_list_key(); - - auto error = [&result, &key](auto slab_metadata) { - auto slab_interior = slab_metadata->get_slab_interior(key); + auto error = [&result](auto slab_metadata) { + auto slab_interior = + slab_metadata->get_slab_interior(freelist::Object::key_root); const PagemapEntry& entry = Config::Backend::get_metaentry(slab_interior); SNMALLOC_ASSERT(slab_metadata == entry.get_slab_metadata()); @@ -917,9 +1022,11 @@ namespace snmalloc else report_fatal_error( "debug_is_empty: found non-empty allocator: size={} on " - "slab_start {}", + "slab_start {} meta {} entry {}", sizeclass_full_to_size(size_class), - slab_start); + slab_start, + address_cast(slab_metadata), + address_cast(&entry)); }; auto test = [&error](auto& queue) { @@ -971,7 +1078,7 @@ namespace snmalloc { // We need a cache to perform some operations, so set one up // temporarily - LocalCache temp(public_state()); + LocalCache temp(public_state()); attach(&temp); #ifdef SNMALLOC_TRACING message<1024>("debug_is_empty - attach a cache"); @@ -988,9 +1095,42 @@ namespace snmalloc } }; + template + class ConstructCoreAlloc + { + using CA = CoreAllocator; + + public: + static capptr::Alloc make() + { + size_t size = sizeof(CA); + size_t round_sizeof = Aal::capptr_size_round(size); + size_t request_size = bits::next_pow2(round_sizeof); + size_t spare = request_size - round_sizeof; + + auto raw = + Config::Backend::template alloc_meta_data(nullptr, request_size); + + if (raw == nullptr) + { + Config::Pal::error("Failed to initialise thread local allocator."); + } + + capptr::Alloc spare_start = pointer_offset(raw, round_sizeof); + Range r{spare_start, spare}; + + auto p = capptr::Alloc::unsafe_from(new (raw.unsafe_ptr()) CA(r)); + + // Remove excess from the bounds. + p = Aal::capptr_bound(p, round_sizeof); + return p; + } + }; + /** * Use this alias to access the pool of allocators throughout snmalloc. */ template - using AllocPool = Pool, Config, Config::pool>; + using AllocPool = + Pool, ConstructCoreAlloc, Config::pool>; } // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/entropy.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/entropy.h index 2e63b68bfa00..c6f2c85ffe6d 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/mem/entropy.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/entropy.h @@ -28,7 +28,6 @@ namespace snmalloc uint64_t local_counter{0}; uint64_t fresh_bits{0}; uint64_t count{0}; - FreeListKey key{0, 0, 0}; public: constexpr LocalEntropy() = default; @@ -38,18 +37,6 @@ namespace snmalloc { local_key = get_entropy64(); local_counter = get_entropy64(); - if constexpr (bits::BITS == 64) - { - key.key1 = get_next(); - key.key2 = get_next(); - key.key_next = get_next(); - } - else - { - key.key1 = get_next() & 0xffff'ffff; - key.key2 = get_next() & 0xffff'ffff; - key.key_next = get_next() & 0xffff'ffff; - } bit_source = get_next(); } @@ -70,9 +57,20 @@ namespace snmalloc /** * A key for the free lists for this thread. */ - const FreeListKey& get_free_list_key() + void make_free_list_key(FreeListKey& key) { - return key; + if constexpr (bits::BITS == 64) + { + key.key1 = static_cast(get_next()); + key.key2 = static_cast(get_next()); + key.key_next = static_cast(get_next()); + } + else + { + key.key1 = static_cast(get_next() & 0xffff'ffff); + key.key2 = static_cast(get_next() & 0xffff'ffff); + key.key_next = static_cast(get_next() & 0xffff'ffff); + } } /** @@ -116,8 +114,7 @@ namespace snmalloc fresh_bits = get_next(); count = 64; } - uint16_t result = - static_cast(fresh_bits & (bits::one_at_bit(n) - 1)); + uint16_t result = static_cast(fresh_bits & bits::mask_bits(n)); fresh_bits >>= n; count -= n; return result; diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/external_alloc.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/external_alloc.h index 250719766e43..56d9f9ac632a 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/mem/external_alloc.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/external_alloc.h @@ -21,6 +21,7 @@ namespace snmalloc::external_alloc } # elif defined(__APPLE__) # include + namespace snmalloc::external_alloc { inline size_t malloc_usable_size(void* ptr) @@ -30,6 +31,7 @@ namespace snmalloc::external_alloc } # elif defined(__linux__) || defined(__HAIKU__) # include + namespace snmalloc::external_alloc { using ::malloc_usable_size; @@ -41,6 +43,7 @@ namespace snmalloc::external_alloc } # elif defined(__FreeBSD__) # include + namespace snmalloc::external_alloc { using ::malloc_usable_size; diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/freelist.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/freelist.h index 49348d1d8a93..f49004d938eb 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/mem/freelist.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/freelist.h @@ -40,22 +40,40 @@ namespace snmalloc { + class BatchedRemoteMessage; + + static constexpr address_t NO_KEY_TWEAK = 0; + /** * This function is used to sign back pointers in the free list. */ - inline static address_t - signed_prev(address_t curr, address_t next, const FreeListKey& key) + inline static address_t signed_prev( + address_t curr, address_t next, const FreeListKey& key, address_t tweak) { auto c = curr; auto n = next; - return (c + key.key1) * (n + key.key2); + return (c + key.key1) * (n + (key.key2 ^ tweak)); } namespace freelist { + template< + bool RANDOM, + bool TRACK_LENGTH = RANDOM, + SNMALLOC_CONCEPT(capptr::IsBound) BView = capptr::bounds::Alloc, + SNMALLOC_CONCEPT(capptr::IsBound) BQueue = capptr::bounds::AllocWild> + class Builder; + class Object { public: + /** + * Shared key for slab free lists (but tweaked by metadata address). + * + * XXX Maybe this belongs somewhere else + */ + inline static FreeListKey key_root{0xdeadbeef, 0xbeefdead, 0xdeadbeef}; + template< SNMALLOC_CONCEPT(capptr::IsBound) BQueue = capptr::bounds::AllocWild> class T; @@ -115,6 +133,7 @@ namespace snmalloc class T { template< + bool, bool, SNMALLOC_CONCEPT(capptr::IsBound), SNMALLOC_CONCEPT(capptr::IsBound)> @@ -122,6 +141,8 @@ namespace snmalloc friend class Object; + friend class ::snmalloc::BatchedRemoteMessage; + class Empty { public: @@ -171,23 +192,28 @@ namespace snmalloc SNMALLOC_CONCEPT(capptr::IsBound) BView = typename BQueue:: template with_wildness, typename Domesticator> - BHeadPtr - atomic_read_next(const FreeListKey& key, Domesticator domesticate) + BHeadPtr atomic_read_next( + const FreeListKey& key, address_t key_tweak, Domesticator domesticate) { auto n_wild = Object::decode_next( address_cast(&this->next_object), this->atomic_next_object.load(std::memory_order_acquire), - key); + key, + key_tweak); auto n_tame = domesticate(n_wild); if constexpr (mitigations(freelist_backward_edge)) { if (n_tame != nullptr) { - n_tame->prev.check_prev( - signed_prev(address_cast(this), address_cast(n_tame), key)); + n_tame->prev.check_prev(signed_prev( + address_cast(this), address_cast(n_tame), key, key_tweak)); } } - Aal::prefetch(&(n_tame->next_object)); + else + { + UNUSED(key_tweak); + } + Aal::prefetch(n_tame.unsafe_ptr()); return n_tame; } @@ -198,11 +224,14 @@ namespace snmalloc SNMALLOC_CONCEPT(capptr::IsBound) BView = typename BQueue:: template with_wildness, typename Domesticator> - BHeadPtr - read_next(const FreeListKey& key, Domesticator domesticate) + BHeadPtr read_next( + const FreeListKey& key, address_t key_tweak, Domesticator domesticate) { return domesticate(Object::decode_next( - address_cast(&this->next_object), this->next_object, key)); + address_cast(&this->next_object), + this->next_object, + key, + key_tweak)); } /** @@ -235,7 +264,8 @@ namespace snmalloc SNMALLOC_CONCEPT(capptr::IsBound) BView> static BHeadPtr make(CapPtr p) { - return p.template as_static>(); + return CapPtr, BView>::unsafe_from( + new (p.unsafe_ptr()) Object::T()); } /** @@ -253,8 +283,11 @@ namespace snmalloc * Involutive encryption with raw pointers */ template - inline static Object::T* - code_next(address_t curr, Object::T* next, const FreeListKey& key) + inline static Object::T* code_next( + address_t curr, + Object::T* next, + const FreeListKey& key, + address_t key_tweak) { // Note we can consider other encoding schemes here. // * XORing curr and next. This doesn't require any key material @@ -267,11 +300,13 @@ namespace snmalloc mitigations(freelist_forward_edge) && !aal_supports) { return unsafe_from_uintptr>( - unsafe_to_uintptr>(next) ^ key.key_next); + unsafe_to_uintptr>(next) ^ key.key_next ^ + key_tweak); } else { UNUSED(key); + UNUSED(key_tweak); return next; } } @@ -289,16 +324,19 @@ namespace snmalloc * though the result is likely not safe to dereference, being an * obfuscated bundle of bits (on non-CHERI architectures, anyway). That's * additional motivation to consider the result BQueue-bounded, as that - * is likely (but not necessarily) Wild. + * is likely (but not necessarily) Wild. */ template< SNMALLOC_CONCEPT(capptr::IsBound) BView, SNMALLOC_CONCEPT(capptr::IsBound) BQueue> inline static BQueuePtr encode_next( - address_t curr, BHeadPtr next, const FreeListKey& key) + address_t curr, + BHeadPtr next, + const FreeListKey& key, + address_t key_tweak) { return BQueuePtr::unsafe_from( - code_next(curr, next.unsafe_ptr(), key)); + code_next(curr, next.unsafe_ptr(), key, key_tweak)); } /** @@ -320,10 +358,13 @@ namespace snmalloc SNMALLOC_CONCEPT(capptr::IsBound) BView, SNMALLOC_CONCEPT(capptr::IsBound) BQueue> inline static BHeadPtr decode_next( - address_t curr, BHeadPtr next, const FreeListKey& key) + address_t curr, + BHeadPtr next, + const FreeListKey& key, + address_t key_tweak) { return BHeadPtr::unsafe_from( - code_next(curr, next.unsafe_ptr(), key)); + code_next(curr, next.unsafe_ptr(), key, key_tweak)); } template< @@ -343,6 +384,33 @@ namespace snmalloc "Free Object Queue bounds must match View bounds (but may be Wild)"); } + template< + SNMALLOC_CONCEPT(capptr::IsBound) BView, + SNMALLOC_CONCEPT(capptr::IsBound) BQueue> + static void store_nextish( + BQueuePtr* curr, + BHeadPtr next, + const FreeListKey& key, + address_t key_tweak, + BHeadPtr next_value) + { + assert_view_queue_bounds(); + + if constexpr (mitigations(freelist_backward_edge)) + { + next->prev.set_prev(signed_prev( + address_cast(curr), address_cast(next), key, key_tweak)); + } + else + { + UNUSED(next); + UNUSED(key); + UNUSED(key_tweak); + } + + *curr = encode_next(address_cast(curr), next_value, key, key_tweak); + } + /** * Assign next_object and update its prev_encoded if * SNMALLOC_CHECK_CLIENT. Static so that it can be used on reference to a @@ -358,27 +426,19 @@ namespace snmalloc static BQueuePtr* store_next( BQueuePtr* curr, BHeadPtr next, - const FreeListKey& key) + const FreeListKey& key, + address_t key_tweak) { - assert_view_queue_bounds(); - - if constexpr (mitigations(freelist_backward_edge)) - { - next->prev.set_prev( - signed_prev(address_cast(curr), address_cast(next), key)); - } - else - UNUSED(key); - - *curr = encode_next(address_cast(curr), next, key); + store_nextish(curr, next, key, key_tweak, next); return &(next->next_object); } template - static void store_null(BQueuePtr* curr, const FreeListKey& key) + static void store_null( + BQueuePtr* curr, const FreeListKey& key, address_t key_tweak) { - *curr = - encode_next(address_cast(curr), BQueuePtr(nullptr), key); + *curr = encode_next( + address_cast(curr), BQueuePtr(nullptr), key, key_tweak); } /** @@ -392,36 +452,45 @@ namespace snmalloc static void atomic_store_next( BHeadPtr curr, BHeadPtr next, - const FreeListKey& key) + const FreeListKey& key, + address_t key_tweak) { static_assert(BView::wildness == capptr::dimension::Wildness::Tame); if constexpr (mitigations(freelist_backward_edge)) { - next->prev.set_prev( - signed_prev(address_cast(curr), address_cast(next), key)); + next->prev.set_prev(signed_prev( + address_cast(curr), address_cast(next), key, key_tweak)); } else + { UNUSED(key); + UNUSED(key_tweak); + } // Signature needs to be visible before item is linked in // so requires release semantics. curr->atomic_next_object.store( - encode_next(address_cast(&curr->next_object), next, key), + encode_next(address_cast(&curr->next_object), next, key, key_tweak), std::memory_order_release); } template< SNMALLOC_CONCEPT(capptr::IsBound) BView, SNMALLOC_CONCEPT(capptr::IsBound) BQueue> - static void - atomic_store_null(BHeadPtr curr, const FreeListKey& key) + static void atomic_store_null( + BHeadPtr curr, + const FreeListKey& key, + address_t key_tweak) { static_assert(BView::wildness == capptr::dimension::Wildness::Tame); curr->atomic_next_object.store( encode_next( - address_cast(&curr->next_object), BQueuePtr(nullptr), key), + address_cast(&curr->next_object), + BQueuePtr(nullptr), + key, + key_tweak), std::memory_order_relaxed); } }; @@ -459,6 +528,7 @@ namespace snmalloc protected: constexpr Prev(address_t prev) : prev(prev) {} + constexpr Prev() = default; address_t replace(address_t next) @@ -498,11 +568,50 @@ namespace snmalloc { Object::BHeadPtr curr{nullptr}; + struct KeyTweak + { + address_t key_tweak = 0; + + SNMALLOC_FAST_PATH address_t get() + { + return key_tweak; + } + + void set(address_t kt) + { + key_tweak = kt; + } + + constexpr KeyTweak() = default; + }; + + struct NoKeyTweak + { + SNMALLOC_FAST_PATH address_t get() + { + return 0; + } + + void set(address_t) {} + }; + + SNMALLOC_NO_UNIQUE_ADDRESS + std::conditional_t< + mitigations(freelist_forward_edge) || + mitigations(freelist_backward_edge), + KeyTweak, + NoKeyTweak> + key_tweak; + public: - constexpr Iter(Object::BHeadPtr head, address_t prev_value) + constexpr Iter( + Object::BHeadPtr head, + address_t prev_value, + address_t kt) : IterBase(prev_value), curr(head) { UNUSED(prev_value); + key_tweak.set(kt); } constexpr Iter() = default; @@ -531,15 +640,15 @@ namespace snmalloc take(const FreeListKey& key, Domesticator domesticate) { auto c = curr; - auto next = curr->read_next(key, domesticate); + auto next = curr->read_next(key, key_tweak.get(), domesticate); Aal::prefetch(next.unsafe_ptr()); curr = next; if constexpr (mitigations(freelist_backward_edge)) { - auto p = - replace(signed_prev(address_cast(c), address_cast(next), key)); + auto p = replace(signed_prev( + address_cast(c), address_cast(next), key, key_tweak.get())); c->check_prev(p); } else @@ -569,10 +678,13 @@ namespace snmalloc */ template< bool RANDOM, - SNMALLOC_CONCEPT(capptr::IsBound) BView = capptr::bounds::Alloc, - SNMALLOC_CONCEPT(capptr::IsBound) BQueue = capptr::bounds::AllocWild> + bool TRACK_LENGTH, + SNMALLOC_CONCEPT(capptr::IsBound) BView, + SNMALLOC_CONCEPT(capptr::IsBound) BQueue> class Builder { + static_assert(!RANDOM || TRACK_LENGTH); + static constexpr size_t LENGTH = RANDOM ? 2 : 1; /* @@ -610,7 +722,8 @@ namespace snmalloc static_cast*>(head[ix])); } - SNMALLOC_NO_UNIQUE_ADDRESS std::array length{}; + SNMALLOC_NO_UNIQUE_ADDRESS + std::array length{}; public: constexpr Builder() = default; @@ -636,6 +749,7 @@ namespace snmalloc void add( Object::BHeadPtr n, const FreeListKey& key, + address_t key_tweak, LocalEntropy& entropy) { uint32_t index; @@ -644,8 +758,8 @@ namespace snmalloc else index = 0; - set_end(index, Object::store_next(cast_end(index), n, key)); - if constexpr (RANDOM) + set_end(index, Object::store_next(cast_end(index), n, key, key_tweak)); + if constexpr (TRACK_LENGTH) { length[index]++; } @@ -660,20 +774,26 @@ namespace snmalloc * lists, which will be randomised at the other end. */ template - std::enable_if_t - add(Object::BHeadPtr n, const FreeListKey& key) + std::enable_if_t add( + Object::BHeadPtr n, + const FreeListKey& key, + address_t key_tweak) { static_assert(RANDOM_ == RANDOM, "Don't set template parameter"); - set_end(0, Object::store_next(cast_end(0), n, key)); + set_end(0, Object::store_next(cast_end(0), n, key, key_tweak)); + if constexpr (TRACK_LENGTH) + { + length[0]++; + } } /** * Makes a terminator to a free list. */ - SNMALLOC_FAST_PATH void - terminate_list(uint32_t index, const FreeListKey& key) + SNMALLOC_FAST_PATH void terminate_list( + uint32_t index, const FreeListKey& key, address_t key_tweak) { - Object::store_null(cast_end(index), key); + Object::store_null(cast_end(index), key, key_tweak); } /** @@ -685,17 +805,21 @@ namespace snmalloc * and is thus subject to encoding if the next_object pointers * encoded. */ - [[nodiscard]] Object::BHeadPtr - read_head(uint32_t index, const FreeListKey& key) const + [[nodiscard]] Object::BHeadPtr read_head( + uint32_t index, const FreeListKey& key, address_t key_tweak) const { return Object::decode_next( - address_cast(&head[index]), cast_head(index), key); + address_cast(&head[index]), cast_head(index), key, key_tweak); } - address_t get_fake_signed_prev(uint32_t index, const FreeListKey& key) + address_t get_fake_signed_prev( + uint32_t index, const FreeListKey& key, address_t key_tweak) { return signed_prev( - address_cast(&head[index]), address_cast(read_head(index, key)), key); + address_cast(&head[index]), + address_cast(read_head(index, key, key_tweak)), + key, + key_tweak); } /** @@ -707,8 +831,8 @@ namespace snmalloc * The return value is how many entries are still contained in the * builder. */ - SNMALLOC_FAST_PATH uint16_t - close(Iter& fl, const FreeListKey& key) + SNMALLOC_FAST_PATH uint16_t close( + Iter& fl, const FreeListKey& key, address_t key_tweak) { uint32_t i; if constexpr (RANDOM) @@ -724,9 +848,12 @@ namespace snmalloc i = 0; } - terminate_list(i, key); + terminate_list(i, key, key_tweak); - fl = {read_head(i, key), get_fake_signed_prev(i, key)}; + fl = { + read_head(i, key, key_tweak), + get_fake_signed_prev(i, key, key_tweak), + key_tweak}; end[i] = &head[i]; @@ -744,12 +871,13 @@ namespace snmalloc /** * Set the builder to a not building state. */ - constexpr void init(address_t slab, const FreeListKey& key) + constexpr void + init(address_t slab, const FreeListKey& key, address_t key_tweak) { for (size_t i = 0; i < LENGTH; i++) { end[i] = &head[i]; - if constexpr (RANDOM) + if constexpr (TRACK_LENGTH) { length[i] = 0; } @@ -762,35 +890,71 @@ namespace snmalloc head[i] = Object::code_next( address_cast(&head[i]), useless_ptr_from_addr>(slab), - key); + key, + key_tweak); } } + template + std::enable_if_t extract_segment_length() + { + static_assert(RANDOM_ == RANDOM, "Don't set SFINAE parameter!"); + return length[0]; + } + template std::enable_if_t< !RANDOM_, std::pair< Object::BHeadPtr, Object::BHeadPtr>> - extract_segment(const FreeListKey& key) + extract_segment(const FreeListKey& key, address_t key_tweak) { static_assert(RANDOM_ == RANDOM, "Don't set SFINAE parameter!"); SNMALLOC_ASSERT(!empty()); - auto first = read_head(0, key); + auto first = read_head(0, key, key_tweak); // end[0] is pointing to the first field in the object, // this is doing a CONTAINING_RECORD like cast to get back // to the actual object. This isn't true if the builder is // empty, but you are not allowed to call this in the empty case. auto last = Object::BHeadPtr::unsafe_from( Object::from_next_ptr(cast_end(0))); - init(address_cast(head[0]), key); + init(address_cast(head[0]), key, key_tweak); return {first, last}; } + /** + * Put back an extracted segment from a builder using the same key. + * + * The caller must tell us how many elements are involved. + */ + void append_segment( + Object::BHeadPtr first, + Object::BHeadPtr last, + uint16_t size, + const FreeListKey& key, + address_t key_tweak, + LocalEntropy& entropy) + { + uint32_t index; + if constexpr (RANDOM) + index = entropy.next_bit(); + else + index = 0; + + if constexpr (TRACK_LENGTH) + length[index] += size; + else + UNUSED(size); + + Object::store_next(cast_end(index), first, key, key_tweak); + set_end(index, &(last->next_object)); + } + template - SNMALLOC_FAST_PATH void - validate(const FreeListKey& key, Domesticator domesticate) + SNMALLOC_FAST_PATH void validate( + const FreeListKey& key, address_t key_tweak, Domesticator domesticate) { if constexpr (mitigations(freelist_teardown_validate)) { @@ -798,29 +962,31 @@ namespace snmalloc { if (&head[i] == end[i]) { - SNMALLOC_CHECK(!RANDOM || (length[i] == 0)); + SNMALLOC_CHECK(!TRACK_LENGTH || (length[i] == 0)); continue; } size_t count = 1; - auto curr = read_head(i, key); - auto prev = get_fake_signed_prev(i, key); + auto curr = read_head(i, key, key_tweak); + auto prev = get_fake_signed_prev(i, key, key_tweak); while (true) { curr->check_prev(prev); if (address_cast(&(curr->next_object)) == address_cast(end[i])) break; count++; - auto next = curr->read_next(key, domesticate); - prev = signed_prev(address_cast(curr), address_cast(next), key); + auto next = curr->read_next(key, key_tweak, domesticate); + prev = signed_prev( + address_cast(curr), address_cast(next), key, key_tweak); curr = next; } - SNMALLOC_CHECK(!RANDOM || (count == length[i])); + SNMALLOC_CHECK(!TRACK_LENGTH || (count == length[i])); } } else { UNUSED(key); + UNUSED(key_tweak); UNUSED(domesticate); } } diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/freelist_queue.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/freelist_queue.h new file mode 100644 index 000000000000..fb38f7c88de1 --- /dev/null +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/freelist_queue.h @@ -0,0 +1,193 @@ +#pragma once + +#include "../ds/ds.h" +#include "freelist.h" + +#include + +namespace snmalloc +{ + /** + * A FreeListMPSCQ is a chain of freed objects exposed as a MPSC append-only + * atomic queue that uses one xchg per append. + * + * The internal pointers are considered QueuePtr-s to support deployment + * scenarios in which the MPSCQ itself is exposed to the client. This is + * excessively paranoid in the common case that these metadata are as "hard" + * for the client to reach as the Pagemap, which we trust to store not just + * Tame CapPtr<>s but raw C++ pointers. + * + * Where necessary, methods expose two domesticator callbacks at the + * interface and are careful to use one for the front and back values and the + * other for pointers read from the queue itself. That's not ideal, but it + * lets the client condition its behavior appropriately and prevents us from + * accidentally following either of these pointers in generic code. + * Specifically, + * + * * `domesticate_head` is used for the MPSCQ pointers used to reach into + * the chain of objects + * + * * `domesticate_queue` is used to traverse links in that chain (and in + * fact, we traverse only the first). + * + * In the case that the MPSCQ is not easily accessible to the client, + * `domesticate_head` can just be a type coersion, and `domesticate_queue` + * should perform actual validation. If the MPSCQ is exposed to the + * allocator client, both Domesticators should perform validation. + */ + template + struct alignas(REMOTE_MIN_ALIGN) FreeListMPSCQ + { + // Store the message queue on a separate cacheline. It is mutable data that + // is read by other threads. + alignas(CACHELINE_SIZE) freelist::AtomicQueuePtr back{nullptr}; + // Store the two ends on different cache lines as access by different + // threads. + alignas(CACHELINE_SIZE) freelist::AtomicQueuePtr front{nullptr}; + // Fake first entry + freelist::Object::T stub{}; + + constexpr FreeListMPSCQ() = default; + + void invariant() + { + SNMALLOC_ASSERT( + (address_cast(front.load()) == address_cast(&stub)) || + (back != nullptr)); + } + + void init() + { + freelist::HeadPtr stub_ptr = freelist::HeadPtr::unsafe_from(&stub); + freelist::Object::atomic_store_null(stub_ptr, Key, Key_tweak); + front.store(freelist::QueuePtr::unsafe_from(&stub)); + back.store(nullptr, std::memory_order_relaxed); + invariant(); + } + + freelist::QueuePtr destroy() + { + freelist::QueuePtr fnt = front.load(); + back.store(nullptr, std::memory_order_relaxed); + if (address_cast(front.load()) == address_cast(&stub)) + return nullptr; + return fnt; + } + + template + void destroy_and_iterate(Domesticator_queue domesticate, Cb cb) + { + auto p = domesticate(destroy()); + + while (p != nullptr) + { + auto n = p->atomic_read_next(Key, Key_tweak, domesticate); + cb(p); + p = n; + } + } + + template + inline bool can_dequeue( + Domesticator_head domesticate_head, Domesticator_queue domesticate_queue) + { + return domesticate_head(front.load()) + ->atomic_read_next(Key, Key_tweak, domesticate_queue) != nullptr; + } + + /** + * Pushes a list of messages to the queue. Each message from first to + * last should be linked together through their next pointers. + * + * The Domesticator here is used only on pointers read from the head. See + * the commentary on the class. + */ + template + void enqueue( + freelist::HeadPtr first, + freelist::HeadPtr last, + Domesticator_head domesticate_head) + { + invariant(); + freelist::Object::atomic_store_null(last, Key, Key_tweak); + + // Exchange needs to be acq_rel. + // * It needs to be a release, so nullptr in next is visible. + // * Needs to be acquire, so linking into the list does not race with + // the other threads nullptr init of the next field. + freelist::QueuePtr prev = + back.exchange(capptr_rewild(last), std::memory_order_acq_rel); + + if (SNMALLOC_LIKELY(prev != nullptr)) + { + freelist::Object::atomic_store_next( + domesticate_head(prev), first, Key, Key_tweak); + return; + } + + front.store(capptr_rewild(first)); + } + + /** + * Destructively iterate the queue. Each queue element is removed and fed + * to the callback in turn. The callback may return false to stop iteration + * early (but must have processed the element it was given!). + * + * Takes a domestication callback for each of "pointers read from head" and + * "pointers read from queue". See the commentary on the class. + */ + template< + typename Domesticator_head, + typename Domesticator_queue, + typename Cb> + void dequeue( + Domesticator_head domesticate_head, + Domesticator_queue domesticate_queue, + Cb cb) + { + invariant(); + SNMALLOC_ASSERT(front.load() != nullptr); + + // Use back to bound, so we don't handle new entries. + auto b = back.load(std::memory_order_relaxed); + freelist::HeadPtr curr = domesticate_head(front.load()); + + while (address_cast(curr) != address_cast(b)) + { + freelist::HeadPtr next = + curr->atomic_read_next(Key, Key_tweak, domesticate_queue); + // We have observed a non-linearisable effect of the queue. + // Just go back to allocating normally. + if (SNMALLOC_UNLIKELY(next == nullptr)) + break; + // We want this element next, so start it loading. + Aal::prefetch(next.unsafe_ptr()); + if (SNMALLOC_UNLIKELY(!cb(curr))) + { + /* + * We've domesticate_queue-d next so that we can read through it, but + * we're storing it back into client-accessible memory in + * !QueueHeadsAreTame builds, so go ahead and consider it Wild again. + * On QueueHeadsAreTame builds, the subsequent domesticate_head call + * above will also be a type-level sleight of hand, but we can still + * justify it by the domesticate_queue that happened in this + * dequeue(). + */ + front = capptr_rewild(next); + invariant(); + return; + } + + curr = next; + } + + /* + * Here, we've hit the end of the queue: next is nullptr and curr has not + * been handed to the callback. The same considerations about Wildness + * above hold here. + */ + front = capptr_rewild(curr); + invariant(); + } + }; +} // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/localalloc.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/localalloc.h index c85d30b2b8f5..abf4e2e096aa 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/mem/localalloc.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/localalloc.h @@ -22,6 +22,7 @@ #include #include + namespace snmalloc { enum Boundary @@ -78,7 +79,7 @@ namespace snmalloc // allocation on the fast path. This part of the code is inspired by // mimalloc. // Also contains remote deallocation cache. - LocalCache local_cache{&Config::unused_remote}; + LocalCache local_cache{&Config::unused_remote}; // Underlying allocator for most non-fast path operations. CoreAlloc* core_alloc{nullptr}; @@ -184,13 +185,21 @@ namespace snmalloc } return check_init([&](CoreAlloc* core_alloc) { + if (size > bits::one_at_bit(bits::BITS - 1)) + { + // Cannot allocate something that is more that half the size of the + // address space + errno = ENOMEM; + return capptr::Alloc{nullptr}; + } // Grab slab of correct size // Set remote as large allocator remote. auto [chunk, meta] = Config::Backend::alloc_chunk( core_alloc->get_backend_local_state(), large_size_to_chunk_size(size), PagemapEntry::encode( - core_alloc->public_state(), size_to_sizeclass_full(size))); + core_alloc->public_state(), size_to_sizeclass_full(size)), + size_to_sizeclass_full(size)); // set up meta data so sizeclass is correct, and hence alloc size, and // external pointer. #ifdef SNMALLOC_TRACING @@ -201,7 +210,7 @@ namespace snmalloc if (meta != nullptr) { meta->initialise_large( - address_cast(chunk), local_cache.entropy.get_free_list_key()); + address_cast(chunk), freelist::Object::key_root); core_alloc->laden.insert(meta); } @@ -245,8 +254,7 @@ namespace snmalloc sizeclass); }; - return local_cache.template alloc( - domesticate, size, slowpath); + return local_cache.template alloc(domesticate, size, slowpath); } /** @@ -266,20 +274,20 @@ namespace snmalloc * In the second case we need to recheck if this is a remote deallocation, * as we might acquire the originating allocator. */ - SNMALLOC_SLOW_PATH void dealloc_remote_slow(capptr::Alloc p) + SNMALLOC_SLOW_PATH void + dealloc_remote_slow(const PagemapEntry& entry, capptr::Alloc p) { if (core_alloc != nullptr) { #ifdef SNMALLOC_TRACING message<1024>( - "Remote dealloc post {} ({})", + "Remote dealloc post {} ({}, {})", p.unsafe_ptr(), - alloc_size(p.unsafe_ptr())); + alloc_size(p.unsafe_ptr()), + address_cast(entry.get_slab_metadata())); #endif - const PagemapEntry& entry = - Config::Backend::template get_metaentry(address_cast(p)); local_cache.remote_dealloc_cache.template dealloc( - entry.get_remote()->trunc_id(), p); + entry.get_slab_metadata(), p, &local_cache.entropy); post_remote_cache(); return; } @@ -386,7 +394,7 @@ namespace snmalloc // Initialise the global allocator structures ensure_init(); // Grab an allocator for this thread. - init(AllocPool::acquire(&(this->local_cache))); + init(AllocPool::acquire()); } // Return all state in the fast allocator and release the underlying @@ -647,14 +655,16 @@ namespace snmalloc if (SNMALLOC_LIKELY(local_cache.remote_allocator == entry.get_remote())) { dealloc_cheri_checks(p_tame.unsafe_ptr()); - - if (SNMALLOC_LIKELY(CoreAlloc::dealloc_local_object_fast( - entry, p_tame, local_cache.entropy))) - return; - core_alloc->dealloc_local_object_slow(p_tame, entry); + core_alloc->dealloc_local_object(p_tame, entry); return; } + dealloc_remote(entry, p_tame); + } + + SNMALLOC_SLOW_PATH void + dealloc_remote(const PagemapEntry& entry, capptr::Alloc p_tame) + { RemoteAllocator* remote = entry.get_remote(); if (SNMALLOC_LIKELY(remote != nullptr)) { @@ -670,15 +680,18 @@ namespace snmalloc if (local_cache.remote_dealloc_cache.reserve_space(entry)) { local_cache.remote_dealloc_cache.template dealloc( - remote->trunc_id(), p_tame); + entry.get_slab_metadata(), p_tame, &local_cache.entropy); # ifdef SNMALLOC_TRACING message<1024>( - "Remote dealloc fast {} ({})", p_raw, alloc_size(p_raw)); + "Remote dealloc fast {} ({}, {})", + address_cast(p_tame), + alloc_size(p_tame.unsafe_ptr()), + address_cast(entry.get_slab_metadata())); # endif return; } - dealloc_remote_slow(p_tame); + dealloc_remote_slow(entry, p_tame); return; } @@ -712,7 +725,7 @@ namespace snmalloc auto pm_size = sizeclass_full_to_size(pm_sc); snmalloc_check_client( mitigations(sanity_checks), - sc == pm_sc, + (sc == pm_sc) || (p == nullptr), "Dealloc rounded size mismatch: {} != {}", rsize, pm_size); @@ -765,7 +778,7 @@ namespace snmalloc // entry for the first chunk of memory, that states it represents a // large object, so we can pull the check for null off the fast path. const PagemapEntry& entry = - Config::Backend::template get_metaentry(address_cast(p_raw)); + Config::Backend::get_metaentry(address_cast(p_raw)); return sizeclass_full_to_size(entry.get_sizeclass()); #endif @@ -809,6 +822,57 @@ namespace snmalloc } } + /** + * @brief Get the client meta data for the snmalloc allocation covering this + * pointer. + */ + typename Config::ClientMeta::DataRef get_client_meta_data(void* p) + { + const PagemapEntry& entry = + Config::Backend::get_metaentry(address_cast(p)); + + size_t index = slab_index(entry.get_sizeclass(), address_cast(p)); + + auto* meta_slab = entry.get_slab_metadata(); + + if (SNMALLOC_UNLIKELY(entry.is_backend_owned())) + { + error("Cannot access meta-data for write for freed memory!"); + } + + if (SNMALLOC_UNLIKELY(meta_slab == nullptr)) + { + error( + "Cannot access meta-data for non-snmalloc object in writable form!"); + } + + return meta_slab->get_meta_for_object(index); + } + + /** + * @brief Get the client meta data for the snmalloc allocation covering this + * pointer. + */ + std::add_const_t + get_client_meta_data_const(void* p) + { + const PagemapEntry& entry = + Config::Backend::template get_metaentry(address_cast(p)); + + size_t index = slab_index(entry.get_sizeclass(), address_cast(p)); + + auto* meta_slab = entry.get_slab_metadata(); + + if (SNMALLOC_UNLIKELY( + (meta_slab == nullptr) || (entry.is_backend_owned()))) + { + static typename Config::ClientMeta::StorageType null_meta_store{}; + return Config::ClientMeta::get(&null_meta_store, 0); + } + + return meta_slab->get_meta_for_object(index); + } + /** * Returns the number of remaining bytes in an object. * @@ -862,7 +926,7 @@ namespace snmalloc * core allocator for use by this local allocator then it needs to access * this field. */ - LocalCache& get_local_cache() + LocalCache& get_local_cache() { return local_cache; } diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/localcache.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/localcache.h index cfbbaa576f2f..5a63e281d910 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/mem/localcache.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/localcache.h @@ -37,6 +37,7 @@ namespace snmalloc // This is defined on its own, so that it can be embedded in the // thread local fast allocator, but also referenced from the // thread local core allocator. + template struct LocalCache { // Free list per small size class. These are used for @@ -54,7 +55,7 @@ namespace snmalloc /** * Remote deallocations for other threads */ - RemoteDeallocCache remote_dealloc_cache; + RemoteDeallocCache remote_dealloc_cache; constexpr LocalCache(RemoteAllocator* remote_allocator) : remote_allocator(remote_allocator) @@ -63,10 +64,10 @@ namespace snmalloc /** * Return all the free lists to the allocator. Used during thread teardown. */ - template + template bool flush(typename Config::LocalState* local_state, DeallocFun dealloc) { - auto& key = entropy.get_free_list_key(); + auto& key = freelist::Object::key_root; auto domesticate = [local_state](freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA { return capptr_domesticate(local_state, p); @@ -85,19 +86,15 @@ namespace snmalloc } } - return remote_dealloc_cache.post( + return remote_dealloc_cache.template post( local_state, remote_allocator->trunc_id()); } - template< - ZeroMem zero_mem, - typename Config, - typename Slowpath, - typename Domesticator> + template SNMALLOC_FAST_PATH capptr::Alloc alloc(Domesticator domesticate, size_t size, Slowpath slowpath) { - auto& key = entropy.get_free_list_key(); + auto& key = freelist::Object::key_root; smallsizeclass_t sizeclass = size_to_sizeclass(size); auto& fl = small_fast_free_lists[sizeclass]; if (SNMALLOC_LIKELY(!fl.empty())) diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/metadata.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/metadata.h index 8b1314e2e17d..968902da8ed9 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/mem/metadata.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/metadata.h @@ -189,6 +189,7 @@ namespace snmalloc { return meta &= ~META_BOUNDARY_BIT; } + ///@} /** @@ -368,21 +369,26 @@ namespace snmalloc class FrontendSlabMetadata_Trait { private: - template + template friend class FrontendSlabMetadata; // Can only be constructed by FrontendSlabMetadata - FrontendSlabMetadata_Trait() = default; + constexpr FrontendSlabMetadata_Trait() = default; }; /** * The FrontendSlabMetadata represent the metadata associated with a single * slab. */ - template + template class FrontendSlabMetadata : public FrontendSlabMetadata_Trait { public: + /** + * Type that encapsulates logic for accessing client meta-data. + */ + using ClientMeta = ClientMeta_; + /** * Used to link slab metadata together in various other data-structures. * This is used with `SeqSet` and so may actually hold a subclass of this @@ -424,6 +430,13 @@ namespace snmalloc */ bool large_ = false; + /** + * Stores client meta-data for this slab. This must be last element in the + * slab. The meta data will actually allocate multiple elements after this + * type, so that client_meta_[1] will work for the required meta-data size. + */ + SNMALLOC_NO_UNIQUE_ADDRESS typename ClientMeta::StorageType client_meta_{}; + uint16_t& needed() { return needed_; @@ -443,7 +456,7 @@ namespace snmalloc static_assert( std::is_base_of::value, "Template should be a subclass of FrontendSlabMetadata"); - free_queue.init(slab, key); + free_queue.init(slab, key, this->as_key_tweak()); // Set up meta data as if the entire slab has been turned into a free // list. This means we don't have to check for special cases where we have // returned all the elements, but this is a slab that is still being bump @@ -452,6 +465,9 @@ namespace snmalloc set_sleeping(sizeclass, 0); large_ = false; + + new (&client_meta_) + typename ClientMeta::StorageType[get_client_storage_count(sizeclass)]; } /** @@ -462,13 +478,15 @@ namespace snmalloc void initialise_large(address_t slab, const FreeListKey& key) { // We will push to this just to make the fast path clean. - free_queue.init(slab, key); + free_queue.init(slab, key, this->as_key_tweak()); // Flag to detect that it is a large alloc on the slow path large_ = true; // Jump to slow path on first deallocation. needed() = 1; + + new (&client_meta_) typename ClientMeta::StorageType(); } /** @@ -483,6 +501,59 @@ namespace snmalloc return (--needed()) == 0; } + class ReturnObjectsIterator + { + uint16_t _batch; + FrontendSlabMetadata* _meta; + + static_assert(sizeof(_batch) * 8 > MAX_CAPACITY_BITS); + + public: + ReturnObjectsIterator(uint16_t n, FrontendSlabMetadata* m) + : _batch(n), _meta(m) + {} + + template + SNMALLOC_FAST_PATH bool step() + { + // The first update must always return some positive number of objects. + SNMALLOC_ASSERT(!first || (_batch != 0)); + + /* + * Stop iteration when there are no more objects to return. Perform + * this test only on non-first steps to avoid a branch on the hot path. + */ + if (!first && _batch == 0) + return false; + + if (SNMALLOC_LIKELY(_batch < _meta->needed())) + { + // Will not hit threshold for state transition + _meta->needed() -= _batch; + return false; + } + + // Hit threshold for state transition, may yet hit another + _batch -= _meta->needed(); + _meta->needed() = 0; + return true; + } + }; + + /** + * A batch version of return_object. + * + * Returns an iterator that should have `.step<>()` called on it repeatedly + * until it returns `false`. The first step should invoke `.step()` + * while the rest should invoke `.step()`. After each + * true-returning `.step()`, the caller should run the slow-path code to + * update the rest of the metadata for this slab. + */ + ReturnObjectsIterator return_objects(uint16_t n) + { + return ReturnObjectsIterator(n, this); + } + bool is_unused() { return needed() == 0; @@ -556,10 +627,12 @@ namespace snmalloc LocalEntropy& entropy, smallsizeclass_t sizeclass) { - auto& key = entropy.get_free_list_key(); + auto& key = freelist::Object::key_root; std::remove_reference_t tmp_fl; - auto remaining = meta->free_queue.close(tmp_fl, key); + + auto remaining = + meta->free_queue.close(tmp_fl, key, meta->as_key_tweak()); auto p = tmp_fl.take(key, domesticate); fast_free_list = tmp_fl; @@ -581,7 +654,45 @@ namespace snmalloc // start of the slab. [[nodiscard]] address_t get_slab_interior(const FreeListKey& key) const { - return address_cast(free_queue.read_head(0, key)); + return address_cast(free_queue.read_head(0, key, this->as_key_tweak())); + } + + [[nodiscard]] SNMALLOC_FAST_PATH address_t as_key_tweak() const noexcept + { + return as_key_tweak(address_cast(this)); + } + + [[nodiscard]] SNMALLOC_FAST_PATH static address_t + as_key_tweak(address_t self) + { + return self / alignof(FrontendSlabMetadata); + } + + typename ClientMeta::DataRef get_meta_for_object(size_t index) + { + return ClientMeta::get(&client_meta_, index); + } + + static size_t get_client_storage_count(smallsizeclass_t sizeclass) + { + auto count = sizeclass_to_slab_object_count(sizeclass); + auto result = ClientMeta::required_count(count); + if (result == 0) + return 1; + return result; + } + + static size_t get_extra_bytes(sizeclass_t sizeclass) + { + if (sizeclass.is_small()) + // We remove one from the extra-bytes as there is one in the metadata to + // start with. + return (get_client_storage_count(sizeclass.as_small()) - 1) * + sizeof(typename ClientMeta::StorageType); + + // For large classes there is only a single entry, so this is covered by + // the existing entry in the metaslab, and further bytes are not required. + return 0; } }; @@ -589,19 +700,19 @@ namespace snmalloc * Entry stored in the pagemap. See docs/AddressSpace.md for the full * FrontendMetaEntry lifecycle. */ - template + template class FrontendMetaEntry : public MetaEntryBase { /** * Ensure that the template parameter is valid. */ static_assert( - std::is_convertible_v, + std::is_convertible_v, "The front end requires that the back end provides slab metadata that is " "compatible with the front-end's structure"); public: - using SlabMetadata = BackendSlabMetadata; + using SlabMetadata = SlabMetadataType; constexpr FrontendMetaEntry() = default; @@ -612,9 +723,8 @@ namespace snmalloc * `get_remote_and_sizeclass`. */ SNMALLOC_FAST_PATH - FrontendMetaEntry(BackendSlabMetadata* meta, uintptr_t remote_and_sizeclass) - : MetaEntryBase( - unsafe_to_uintptr(meta), remote_and_sizeclass) + FrontendMetaEntry(SlabMetadata* meta, uintptr_t remote_and_sizeclass) + : MetaEntryBase(unsafe_to_uintptr(meta), remote_and_sizeclass) { SNMALLOC_ASSERT_MSG( (REMOTE_BACKEND_MARKER & remote_and_sizeclass) == 0, @@ -645,12 +755,10 @@ namespace snmalloc * guarded by an assert that this chunk is being used as a slab (i.e., has * an associated owning allocator). */ - [[nodiscard]] SNMALLOC_FAST_PATH BackendSlabMetadata* - get_slab_metadata() const + [[nodiscard]] SNMALLOC_FAST_PATH SlabMetadata* get_slab_metadata() const { - SNMALLOC_ASSERT(get_remote() != nullptr); - return unsafe_from_uintptr( - meta & ~META_BOUNDARY_BIT); + SNMALLOC_ASSERT(!is_backend_owned()); + return unsafe_from_uintptr(meta & ~META_BOUNDARY_BIT); } }; diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/pool.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/pool.h index 36737207db2d..0497d1ad9f8f 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/mem/pool.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/pool.h @@ -22,7 +22,7 @@ namespace snmalloc { template< typename TT, - SNMALLOC_CONCEPT(IsConfig) Config, + SNMALLOC_CONCEPT(Constructable) Construct, PoolState& get_state()> friend class Pool; @@ -45,50 +45,10 @@ namespace snmalloc * SingletonPoolState::pool is the default provider for the PoolState within * the Pool class. */ - template + template class SingletonPoolState { - /** - * SFINAE helper. Matched only if `T` implements `ensure_init`. Calls it - * if it exists. - */ - template - SNMALLOC_FAST_PATH static auto call_ensure_init(SharedStateHandle_*, int) - -> decltype(SharedStateHandle_::ensure_init()) - { - static_assert( - std::is_same::value, - "SFINAE parameter, should only be used with Config"); - SharedStateHandle_::ensure_init(); - } - - /** - * SFINAE helper. Matched only if `T` does not implement `ensure_init`. - * Does nothing if called. - */ - template - SNMALLOC_FAST_PATH static auto call_ensure_init(SharedStateHandle_*, long) - { - static_assert( - std::is_same::value, - "SFINAE parameter, should only be used with Config"); - } - - /** - * Call `Config::ensure_init()` if it is implemented, do nothing - * otherwise. - */ - SNMALLOC_FAST_PATH static void ensure_init() - { - call_ensure_init(nullptr, 0); - } - - static void make_pool(PoolState*) noexcept - { - ensure_init(); - // Default initializer already called on PoolState, no need to use - // placement new. - } + static void make_pool(PoolState*) noexcept {} public: /** @@ -101,6 +61,22 @@ namespace snmalloc } }; + /** + * @brief Default construct helper for the pool. Just uses `new`. This can't + * be used by the allocator pool as it has not created memory yet. + * + * @tparam T + */ + template + class DefaultConstruct + { + public: + static capptr::Alloc make() + { + return capptr::Alloc::unsafe_from(new T()); + } + }; + /** * Wrapper class to access a pool of a particular type of object. * @@ -116,17 +92,17 @@ namespace snmalloc */ template< typename T, - SNMALLOC_CONCEPT(IsConfig) Config, - PoolState& get_state() = SingletonPoolState::pool> + SNMALLOC_CONCEPT(Constructable) ConstructT = DefaultConstruct, + PoolState& get_state() = SingletonPoolState::pool> class Pool { public: - template - static T* acquire(Args&&... args) + static T* acquire() { PoolState& pool = get_state(); - { - FlagLock f(pool.lock); + + T* result{nullptr}; + with(pool.lock, [&]() { if (pool.front != nullptr) { auto p = pool.front; @@ -137,26 +113,21 @@ namespace snmalloc } pool.front = next; p->set_in_use(); - return p.unsafe_ptr(); + result = p.unsafe_ptr(); } - } + }); - auto raw = - Config::Backend::template alloc_meta_data(nullptr, sizeof(T)); + if (result != nullptr) + return result; - if (raw == nullptr) - { - Config::Pal::error("Failed to initialise thread local allocator."); - } + auto p = ConstructT::make(); - auto p = capptr::Alloc::unsafe_from(new (raw.unsafe_ptr()) - T(std::forward(args)...)); + with(pool.lock, [&]() { + p->list_next = pool.list; + pool.list = p; - FlagLock f(pool.lock); - p->list_next = pool.list; - pool.list = p; - - p->set_in_use(); + p->set_in_use(); + }); return p.unsafe_ptr(); } @@ -180,11 +151,13 @@ namespace snmalloc // Returns a linked list of all objects in the stack, emptying the stack. if (p == nullptr) { - FlagLock f(pool.lock); - auto result = pool.front; - pool.front = nullptr; - pool.back = nullptr; - return result.unsafe_ptr(); + T* result; + with(pool.lock, [&]() { + result = pool.front.unsafe_ptr(); + pool.front = nullptr; + pool.back = nullptr; + }); + return result; } return p->next.unsafe_ptr(); @@ -199,18 +172,18 @@ namespace snmalloc { PoolState& pool = get_state(); last->next = nullptr; - FlagLock f(pool.lock); - - if (pool.front == nullptr) - { - pool.front = capptr::Alloc::unsafe_from(first); - } - else - { - pool.back->next = capptr::Alloc::unsafe_from(first); - } + with(pool.lock, [&]() { + if (pool.front == nullptr) + { + pool.front = capptr::Alloc::unsafe_from(first); + } + else + { + pool.back->next = capptr::Alloc::unsafe_from(first); + } - pool.back = capptr::Alloc::unsafe_from(last); + pool.back = capptr::Alloc::unsafe_from(last); + }); } /** @@ -222,18 +195,19 @@ namespace snmalloc { PoolState& pool = get_state(); last->next = nullptr; - FlagLock f(pool.lock); - if (pool.front == nullptr) - { - pool.back = capptr::Alloc::unsafe_from(last); - } - else - { - last->next = pool.front; - pool.back->next = capptr::Alloc::unsafe_from(first); - } - pool.front = capptr::Alloc::unsafe_from(first); + with(pool.lock, [&]() { + if (pool.front == nullptr) + { + pool.back = capptr::Alloc::unsafe_from(last); + } + else + { + last->next = pool.front; + pool.back->next = capptr::Alloc::unsafe_from(first); + } + pool.front = capptr::Alloc::unsafe_from(first); + }); } static T* iterate(T* p = nullptr) diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/pooled.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/pooled.h index a812bc924cb1..4e7c76884166 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/mem/pooled.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/pooled.h @@ -5,16 +5,40 @@ namespace snmalloc { + template + struct Range + { + CapPtr base; + size_t length; + }; + template class PoolState; +#ifdef __cpp_concepts + template + concept Constructable = requires() { + { + C::make() + } -> ConceptSame>; + }; +#endif // __cpp_concepts + + /** + * Required to be implemented by all types that are pooled. + * + * The constructor of any inherited type must take a Range& as its first + * argument. This represents the leftover from pool allocation rounding up to + * the nearest power of 2. It is valid to ignore this argument, but can be + * used to optimise meta-data usage at startup. + */ template class Pooled { public: template< typename TT, - SNMALLOC_CONCEPT(IsConfig) Config, + SNMALLOC_CONCEPT(Constructable) Construct, PoolState& get_state()> friend class Pool; diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/remoteallocator.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/remoteallocator.h index 2d15e6d1119a..a207d12f235c 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/mem/remoteallocator.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/remoteallocator.h @@ -1,43 +1,296 @@ #pragma once -#include "../ds/ds.h" -#include "freelist.h" -#include "metadata.h" -#include "sizeclasstable.h" +#include "freelist_queue.h" -#include -#include +#include namespace snmalloc { + class RemoteMessageAssertions; + /** + * Entries on a remote message queue. Logically, this is a pair of freelist + * linkages, together with some metadata: + * + * - a cyclic list ("ring") of free objects (atypically for rings, there is + * no sentinel node here: the message itself is a free object), * - * A RemoteAllocator is the message queue of freed objects. It exposes a MPSC - * append-only atomic queue that uses one xchg per append. + * - the length of that ring * - * The internal pointers are considered QueuePtr-s to support deployment - * scenarios in which the RemoteAllocator itself is exposed to the client. - * This is excessively paranoid in the common case that the RemoteAllocator-s - * are as "hard" for the client to reach as the Pagemap, which we trust to - * store not just Tame CapPtr<>s but raw C++ pointers. + * - the linkage for the message queue itself + * + * In practice, there is a fair bit more going on here: the ring of free + * objects is not entirely encoded as a freelist. While traversing the + * successor pointers in objects on the ring will eventually lead back to + * this RemoteMessage object, the successor pointer from this object is + * encoded as a relative displacement. This is guaranteed to be physically + * smaller than a full pointer (because slabs are smaller than the whole + * address space). This gives us enough room to pack in the length of the + * ring, without needing to grow the structure. + */ + class BatchedRemoteMessage + { + friend class BatchedRemoteMessageAssertions; + + freelist::Object::T<> free_ring; + freelist::Object::T<> message_link; + + static_assert( + sizeof(free_ring.next_object) >= sizeof(void*), + "BatchedRemoteMessage bitpacking needs sizeof(void*) in next_object"); + + public: + static auto emplace_in_alloc(capptr::Alloc alloc) + { + return CapPtr::unsafe_from( + new (alloc.unsafe_ptr()) BatchedRemoteMessage()); + } + + static auto mk_from_freelist_builder( + freelist::Builder& flb, + const FreeListKey& key, + address_t key_tweak) + { + size_t size = flb.extract_segment_length(); + + SNMALLOC_ASSERT(size < bits::one_at_bit(MAX_CAPACITY_BITS)); + + auto [first, last] = flb.extract_segment(key, key_tweak); + + /* + * Preserve the last node's backpointer and change its type. Because we + * use placement new to build our RemoteMessage atop the memory of a + * freelist::Object::T<> (to avoid UB) and the constructor may nullify + * the `prev` field, put it right back. Ideally the compiler is smart + * enough to see that this is a no-op. + */ + auto last_prev = last->prev; + auto self = + CapPtr::unsafe_from( + new (last.unsafe_ptr()) BatchedRemoteMessage()); + self->free_ring.prev = last_prev; + + // XXX On CHERI, we could do a fair bit better if we had a primitive for + // extracting and discarding the offset. That probably beats the dance + // done below, but it should work as it stands. + + auto n = freelist::HeadPtr::unsafe_from( + unsafe_from_uintptr>( + (static_cast(pointer_diff_signed(self, first)) + << MAX_CAPACITY_BITS) + + size)); + + // Close the ring, storing our bit-packed value in the next field. + freelist::Object::store_nextish( + &self->free_ring.next_object, first, key, key_tweak, n); + + return self; + } + + static freelist::HeadPtr + to_message_link(capptr::Alloc m) + { + return pointer_offset(m, offsetof(BatchedRemoteMessage, message_link)) + .as_reinterpret>(); + } + + static capptr::Alloc + from_message_link(freelist::HeadPtr chainPtr) + { + return pointer_offset_signed( + chainPtr, + -static_cast( + offsetof(BatchedRemoteMessage, message_link))) + .as_reinterpret(); + } + + template + SNMALLOC_FAST_PATH static std::pair + open_free_ring( + capptr::Alloc m, + size_t objsize, + const FreeListKey& key, + address_t key_tweak, + Domesticator_queue domesticate) + { + uintptr_t encoded = + m->free_ring.read_next(key, key_tweak, domesticate).unsafe_uintptr(); + + uint16_t decoded_size = + static_cast(encoded) & bits::mask_bits(MAX_CAPACITY_BITS); + static_assert(sizeof(decoded_size) * 8 > MAX_CAPACITY_BITS); + + /* + * Derive an out-of-bounds pointer to the next allocation, then use the + * authmap to reconstruct an in-bounds version, which we then immediately + * bound and rewild and then domesticate (how strange). + * + * XXX See above re: doing better on CHERI. + */ + auto next = domesticate( + capptr_rewild( + Config::Backend::capptr_rederive_alloc( + pointer_offset_signed( + m, static_cast(encoded) >> MAX_CAPACITY_BITS), + objsize)) + .template as_static>()); + + if constexpr (mitigations(freelist_backward_edge)) + { + next->check_prev( + signed_prev(address_cast(m), address_cast(next), key, key_tweak)); + } + else + { + UNUSED(key); + UNUSED(key_tweak); + } + + return {next.template as_static>(), decoded_size}; + } + + template + static uint16_t ring_size( + capptr::Alloc m, + const FreeListKey& key, + address_t key_tweak, + Domesticator_queue domesticate) + { + uintptr_t encoded = + m->free_ring.read_next(key, key_tweak, domesticate).unsafe_uintptr(); + + uint16_t decoded_size = + static_cast(encoded) & bits::mask_bits(MAX_CAPACITY_BITS); + static_assert(sizeof(decoded_size) * 8 > MAX_CAPACITY_BITS); + + if constexpr (mitigations(freelist_backward_edge)) + { + /* + * Like above, but we don't strictly need to rebound the pointer, + * since it's only used internally. Still, doesn't hurt to bound + * to the free list linkage. + */ + auto next = domesticate( + capptr_rewild( + Config::Backend::capptr_rederive_alloc( + pointer_offset_signed( + m, static_cast(encoded) >> MAX_CAPACITY_BITS), + sizeof(freelist::Object::T<>))) + .template as_static>()); + + next->check_prev( + signed_prev(address_cast(m), address_cast(next), key, key_tweak)); + } + else + { + UNUSED(key); + UNUSED(key_tweak); + UNUSED(domesticate); + } + + return decoded_size; + } + }; + + class BatchedRemoteMessageAssertions + { + static_assert( + (DEALLOC_BATCH_RINGS == 0) || + (sizeof(BatchedRemoteMessage) <= MIN_ALLOC_SIZE)); + static_assert(offsetof(BatchedRemoteMessage, free_ring) == 0); + + static_assert( + (DEALLOC_BATCH_RINGS == 0) || + (MAX_SLAB_SPAN_BITS + MAX_CAPACITY_BITS < 8 * sizeof(void*)), + "Ring bit-stuffing trick can't reach far enough to enclose a slab"); + }; + + /** The type of a remote message when we are not batching messages onto + * rings. * - * While we could try to condition the types used here on a flag in the - * backend's `struct Flags Options` value, we instead expose two domesticator - * callbacks at the interface and are careful to use one for the front and - * back values and the other for pointers read from the queue itself. That's - * not ideal, but it lets the client condition its behavior appropriately and - * prevents us from accidentally following either of these pointers in generic - * code. + * Relative to BatchRemoteMessage, this type is smaller, as it contains only + * a single linkage, to the next message. (And possibly a backref, if + * mitigations(freelist_backward_edge) is enabled.) + */ + class SingletonRemoteMessage + { + friend class SingletonRemoteMessageAssertions; + + freelist::Object::T<> message_link; + + public: + static auto emplace_in_alloc(capptr::Alloc alloc) + { + return CapPtr::unsafe_from( + new (alloc.unsafe_ptr()) SingletonRemoteMessage()); + } + + static freelist::HeadPtr + to_message_link(capptr::Alloc m) + { + return pointer_offset(m, offsetof(SingletonRemoteMessage, message_link)) + .as_reinterpret>(); + } + + static capptr::Alloc + from_message_link(freelist::HeadPtr chainPtr) + { + return pointer_offset_signed( + chainPtr, + -static_cast( + offsetof(SingletonRemoteMessage, message_link))) + .as_reinterpret(); + } + + template + SNMALLOC_FAST_PATH static std::pair + open_free_ring( + capptr::Alloc m, + size_t, + const FreeListKey&, + address_t, + Domesticator_queue) + { + return { + m.as_reinterpret>(), static_cast(1)}; + } + + template + static uint16_t ring_size( + capptr::Alloc, + const FreeListKey&, + address_t, + Domesticator_queue) + { + return 1; + } + }; + + class SingletonRemoteMessageAssertions + { + static_assert(sizeof(SingletonRemoteMessage) <= MIN_ALLOC_SIZE); + static_assert( + sizeof(SingletonRemoteMessage) == sizeof(freelist::Object::T<>)); + static_assert(offsetof(SingletonRemoteMessage, message_link) == 0); + }; + + using RemoteMessage = std::conditional_t< + (DEALLOC_BATCH_RINGS > 0), + BatchedRemoteMessage, + SingletonRemoteMessage>; + + static_assert(sizeof(RemoteMessage) <= MIN_ALLOC_SIZE); + + /** + * A RemoteAllocator is the message queue of freed objects. It builds on the + * FreeListMPSCQ but encapsulates knowledge that the objects are actually + * RemoteMessage-s and not just any freelist::object::T<>s. * - * `domesticate_head` is used for the pointer used to reach the of the queue, - * while `domesticate_queue` is used to traverse the first link in the queue - * itself. In the case that the RemoteAllocator is not easily accessible to - * the client, `domesticate_head` can just be a type coersion, and - * `domesticate_queue` should perform actual validation. If the - * RemoteAllocator is exposed to the client, both Domesticators should perform - * validation. + * RemoteAllocator-s may be exposed to client tampering. As a result, + * pointer domestication may be necessary. See the documentation for + * FreeListMPSCQ for details. */ - struct alignas(REMOTE_MIN_ALIGN) RemoteAllocator + struct RemoteAllocator { /** * Global key for all remote lists. @@ -49,49 +302,37 @@ namespace snmalloc */ inline static FreeListKey key_global{0xdeadbeef, 0xbeefdead, 0xdeadbeef}; - using alloc_id_t = address_t; + FreeListMPSCQ list; - // Store the message queue on a separate cacheline. It is mutable data that - // is read by other threads. - alignas(CACHELINE_SIZE) freelist::AtomicQueuePtr back{nullptr}; - // Store the two ends on different cache lines as access by different - // threads. - alignas(CACHELINE_SIZE) freelist::AtomicQueuePtr front{nullptr}; - // Fake first entry - freelist::Object::T stub{}; + using alloc_id_t = address_t; constexpr RemoteAllocator() = default; void invariant() { - SNMALLOC_ASSERT( - (address_cast(front.load()) == address_cast(&stub)) || - (back != nullptr)); + list.invariant(); } void init() { - freelist::HeadPtr stub_ptr = freelist::HeadPtr::unsafe_from(&stub); - freelist::Object::atomic_store_null(stub_ptr, key_global); - front.store(freelist::QueuePtr::unsafe_from(&stub)); - back.store(nullptr, std::memory_order_relaxed); - invariant(); + list.init(); } - freelist::QueuePtr destroy() + template + void destroy_and_iterate(Domesticator_queue domesticate, Cb cb) { - freelist::QueuePtr fnt = front.load(); - back.store(nullptr, std::memory_order_relaxed); - if (address_cast(front.load()) == address_cast(&stub)) - return nullptr; - return fnt; + auto cbwrap = [cb](freelist::HeadPtr p) SNMALLOC_FAST_PATH_LAMBDA { + cb(RemoteMessage::from_message_link(p)); + }; + + return list.destroy_and_iterate(domesticate, cbwrap); } - template - inline bool can_dequeue(Domesticator_head domesticate_head) + template + inline bool can_dequeue( + Domesticator_head domesticate_head, Domesticator_queue domesticate_queue) { - return domesticate_head(front.load()) - ->atomic_read_next(key_global, domesticate_head) == nullptr; + return list.can_dequeue(domesticate_head, domesticate_queue); } /** @@ -103,28 +344,14 @@ namespace snmalloc */ template void enqueue( - freelist::HeadPtr first, - freelist::HeadPtr last, + capptr::Alloc first, + capptr::Alloc last, Domesticator_head domesticate_head) { - invariant(); - freelist::Object::atomic_store_null(last, key_global); - - // Exchange needs to be acq_rel. - // * It needs to be a release, so nullptr in next is visible. - // * Needs to be acquire, so linking into the list does not race with - // the other threads nullptr init of the next field. - freelist::QueuePtr prev = - back.exchange(capptr_rewild(last), std::memory_order_acq_rel); - - if (SNMALLOC_LIKELY(prev != nullptr)) - { - freelist::Object::atomic_store_next( - domesticate_head(prev), first, key_global); - return; - } - - front.store(capptr_rewild(first)); + list.enqueue( + RemoteMessage::to_message_link(first), + RemoteMessage::to_message_link(last), + domesticate_head); } /** @@ -144,49 +371,10 @@ namespace snmalloc Domesticator_queue domesticate_queue, Cb cb) { - invariant(); - SNMALLOC_ASSERT(front.load() != nullptr); - - // Use back to bound, so we don't handle new entries. - auto b = back.load(std::memory_order_relaxed); - freelist::HeadPtr curr = domesticate_head(front.load()); - - while (address_cast(curr) != address_cast(b)) - { - freelist::HeadPtr next = - curr->atomic_read_next(key_global, domesticate_queue); - // We have observed a non-linearisable effect of the queue. - // Just go back to allocating normally. - if (SNMALLOC_UNLIKELY(next == nullptr)) - break; - // We want this element next, so start it loading. - Aal::prefetch(next.unsafe_ptr()); - if (SNMALLOC_UNLIKELY(!cb(curr))) - { - /* - * We've domesticate_queue-d next so that we can read through it, but - * we're storing it back into client-accessible memory in - * !QueueHeadsAreTame builds, so go ahead and consider it Wild again. - * On QueueHeadsAreTame builds, the subsequent domesticate_head call - * above will also be a type-level sleight of hand, but we can still - * justify it by the domesticate_queue that happened in this - * dequeue(). - */ - front = capptr_rewild(next); - invariant(); - return; - } - - curr = next; - } - - /* - * Here, we've hit the end of the queue: next is nullptr and curr has not - * been handed to the callback. The same considerations about Wildness - * above hold here. - */ - front = capptr_rewild(curr); - invariant(); + auto cbwrap = [cb](freelist::HeadPtr p) SNMALLOC_FAST_PATH_LAMBDA { + return cb(RemoteMessage::from_message_link(p)); + }; + list.dequeue(domesticate_head, domesticate_queue, cbwrap); } alloc_id_t trunc_id() diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/remotecache.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/remotecache.h index 96f5e09732a6..585fb9146276 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/mem/remotecache.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/remotecache.h @@ -12,13 +12,189 @@ namespace snmalloc { + + /** + * Same-destination message batching. + * + * In addition to batching message sends (see below), we can also batch + * collections of messages destined for the same slab. This class handles + * collecting sufficiently temporally local messages destined to the same + * slab, collecting them with freelist::Builder(s), and then converting + * them to RemoteMessage rings when appropriate. + * + * In order that this class not need to know about the mechanics of actually + * pushing RemoteMessage-s around, the methods involved in "closing" rings + * -- that is, in converting freelist::Builder(s) to RemoteMessages -- take + * a callable, of template type Forward, which is given the destination + * slab('s metadata address) and the to-be-sent RemoteMessage. + */ + template + class RemoteDeallocCacheBatching + { + static_assert(RINGS > 0); + + std::array, RINGS> open_builder; + std::array open_meta = {0}; + + SNMALLOC_FAST_PATH size_t + ring_set(typename Config::PagemapEntry::SlabMetadata* meta) + { + // See https://github.com/skeeto/hash-prospector for choice of constant + return ((meta->as_key_tweak() * 0x7EFB352D) >> 16) & + bits::mask_bits(DEALLOC_BATCH_RING_SET_BITS); + } + + template + SNMALLOC_FAST_PATH void close_one_pending(Forward forward, size_t ix) + { + auto rmsg = BatchedRemoteMessage::mk_from_freelist_builder( + open_builder[ix], + freelist::Object::key_root, + Config::PagemapEntry::SlabMetadata::as_key_tweak(open_meta[ix])); + + auto& entry = Config::Backend::get_metaentry(address_cast(rmsg)); + + forward(entry.get_remote()->trunc_id(), rmsg); + + open_meta[ix] = 0; + } + + SNMALLOC_FAST_PATH void init_one_pending( + size_t ix, typename Config::PagemapEntry::SlabMetadata* meta) + { + open_builder[ix].init( + 0, + freelist::Object::key_root, + Config::PagemapEntry::SlabMetadata::as_key_tweak(open_meta[ix])); + open_meta[ix] = address_cast(meta); + } + + public: + template + SNMALLOC_FAST_PATH void dealloc( + typename Config::PagemapEntry::SlabMetadata* meta, + freelist::HeadPtr r, + LocalEntropy* entropy, + Forward forward) + { + size_t ix_set = ring_set(meta); + + for (size_t ix_way = 0; ix_way < DEALLOC_BATCH_RING_ASSOC; ix_way++) + { + size_t ix = ix_set + ix_way; + if (address_cast(meta) == open_meta[ix]) + { + open_builder[ix].add( + r, freelist::Object::key_root, meta->as_key_tweak()); + + if constexpr (mitigations(random_preserve)) + { + auto rand_limit = entropy->next_fresh_bits(MAX_CAPACITY_BITS); + if (open_builder[ix].extract_segment_length() >= rand_limit) + { + close_one_pending(forward, ix); + open_meta[ix] = 0; + } + } + else + { + UNUSED(entropy); + } + return; + } + } + + // No hit in cache, so find an available or victim line. + + size_t victim_ix = ix_set; + size_t victim_size = 0; + for (size_t ix_way = 0; ix_way < DEALLOC_BATCH_RING_ASSOC; ix_way++) + { + size_t ix = ix_set + ix_way; + if (open_meta[ix] == 0) + { + victim_ix = ix; + break; + } + + size_t szix = open_builder[ix].extract_segment_length(); + if (szix > victim_size) + { + victim_size = szix; + victim_ix = ix; + } + } + + if (open_meta[victim_ix] != 0) + { + close_one_pending(forward, victim_ix); + } + init_one_pending(victim_ix, meta); + + open_builder[victim_ix].add( + r, freelist::Object::key_root, meta->as_key_tweak()); + } + + template + SNMALLOC_FAST_PATH void close_all(Forward forward) + { + for (size_t ix = 0; ix < RINGS; ix++) + { + if (open_meta[ix] != 0) + { + close_one_pending(forward, ix); + open_meta[ix] = 0; + } + } + } + + void init() + { + open_meta = {0}; + } + }; + + template + struct RemoteDeallocCacheNoBatching + { + void init() {} + + template + void close_all(Forward) + {} + + template + SNMALLOC_FAST_PATH void dealloc( + typename Config::PagemapEntry::SlabMetadata*, + freelist::HeadPtr r, + LocalEntropy* entropy, + Forward forward) + { + UNUSED(entropy); + + auto& entry = Config::Backend::get_metaentry(address_cast(r)); + forward( + entry.get_remote()->trunc_id(), + SingletonRemoteMessage::emplace_in_alloc(r.as_void())); + } + }; + + template + using RemoteDeallocCacheBatchingImpl = std::conditional_t< + (DEALLOC_BATCH_RINGS > 0), + RemoteDeallocCacheBatching, + RemoteDeallocCacheNoBatching>; + /** * Stores the remote deallocation to batch them before sending */ + template struct RemoteDeallocCache { std::array, REMOTE_SLOTS> list; + RemoteDeallocCacheBatchingImpl batching; + /** * The total amount of memory we are waiting for before we will dispatch * to other allocators. Zero can mean we have not initialised the allocator @@ -54,10 +230,12 @@ namespace snmalloc * This does not require initialisation to be safely called. */ template - SNMALLOC_FAST_PATH bool reserve_space(const Entry& entry) + SNMALLOC_FAST_PATH bool reserve_space(const Entry& entry, uint16_t n = 1) { + static_assert(sizeof(n) * 8 > MAX_CAPACITY_BITS); + auto size = - static_cast(sizeclass_full_to_size(entry.get_sizeclass())); + n * static_cast(sizeclass_full_to_size(entry.get_sizeclass())); bool result = capacity > size; if (result) @@ -66,17 +244,37 @@ namespace snmalloc } template - SNMALLOC_FAST_PATH void - dealloc(RemoteAllocator::alloc_id_t target_id, capptr::Alloc p) + SNMALLOC_FAST_PATH void forward( + RemoteAllocator::alloc_id_t target_id, capptr::Alloc msg) + { + list[get_slot(target_id, 0)].add( + RemoteMessage::to_message_link(msg), + RemoteAllocator::key_global, + NO_KEY_TWEAK); + } + + template + SNMALLOC_FAST_PATH void dealloc( + typename Config::PagemapEntry::SlabMetadata* meta, + capptr::Alloc p, + LocalEntropy* entropy) { SNMALLOC_ASSERT(initialised); - auto r = p.template as_reinterpret>(); - list[get_slot(target_id, 0)].add( - r, RemoteAllocator::key_global); + auto r = freelist::Object::make(p); + + batching.dealloc( + meta, + r, + entropy, + [this]( + RemoteAllocator::alloc_id_t target_id, + capptr::Alloc msg) { + forward(target_id, msg); + }); } - template + template bool post( typename Config::LocalState* local_state, RemoteAllocator::alloc_id_t id) { @@ -91,6 +289,12 @@ namespace snmalloc return capptr_domesticate(local_state, p); }; + batching.close_all([this]( + RemoteAllocator::alloc_id_t target_id, + capptr::Alloc msg) { + forward(target_id, msg); + }); + while (true) { auto my_slot = get_slot(id, post_round); @@ -102,9 +306,11 @@ namespace snmalloc if (!list[i].empty()) { - auto [first, last] = list[i].extract_segment(key); + auto [first_, last_] = list[i].extract_segment(key, NO_KEY_TWEAK); + auto first = RemoteMessage::from_message_link(first_); + auto last = RemoteMessage::from_message_link(last_); const auto& entry = - Config::Backend::get_metaentry(address_cast(first)); + Config::Backend::get_metaentry(address_cast(first_)); auto remote = entry.get_remote(); // If the allocator is not correctly aligned, then the bit that is // set implies this is used by the backend, and we should not be @@ -135,7 +341,7 @@ namespace snmalloc // so take copy of the head, mark the last element, // and clear the original list. freelist::Iter<> resend; - list[my_slot].close(resend, key); + list[my_slot].close(resend, key, NO_KEY_TWEAK); post_round++; @@ -147,11 +353,11 @@ namespace snmalloc const auto& entry = Config::Backend::get_metaentry(address_cast(r)); auto i = entry.get_remote()->trunc_id(); size_t slot = get_slot(i, post_round); - list[slot].add(r, key); + list[slot].add(r, key, NO_KEY_TWEAK); } } - // Reset capacity as we have empty everything + // Reset capacity as we have emptied everything capacity = REMOTE_CACHE; return sent_something; @@ -175,9 +381,11 @@ namespace snmalloc { // We do not need to initialise with a particular slab, so pass // a null address. - l.init(0, RemoteAllocator::key_global); + l.init(0, RemoteAllocator::key_global, NO_KEY_TWEAK); } capacity = REMOTE_CACHE; + + batching.init(); } }; } // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/sizeclasstable.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/sizeclasstable.h index 2037443223d2..4dd2eec0edd7 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/mem/sizeclasstable.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/sizeclasstable.h @@ -24,7 +24,7 @@ namespace snmalloc // For example, 24 byte allocations can be // problematic for some data due to alignment issues. auto sc = static_cast( - bits::to_exp_mant_const(size)); + bits::to_exp_mant_const(size)); SNMALLOC_ASSERT(sc == static_cast(sc)); @@ -165,10 +165,12 @@ namespace snmalloc uint16_t waking; }; + static_assert(sizeof(sizeclass_data_slow::capacity) * 8 > MAX_CAPACITY_BITS); + struct SizeClassTable { - ModArray fast_; - ModArray slow_; + ModArray fast_{}; + ModArray slow_{}; size_t DIV_MULT_SHIFT{0}; @@ -203,7 +205,7 @@ namespace snmalloc return slow_[index.raw()]; } - constexpr SizeClassTable() : fast_(), slow_(), DIV_MULT_SHIFT() + constexpr SizeClassTable() { size_t max_capacity = 0; @@ -214,12 +216,13 @@ namespace snmalloc auto& meta = fast_small(sizeclass); size_t rsize = - bits::from_exp_mant(sizeclass); + bits::from_exp_mant( + sizeclass); meta.size = rsize; size_t slab_bits = bits::max( bits::next_pow2_bits_const(MIN_OBJECT_COUNT * rsize), MIN_CHUNK_BITS); - meta.slab_mask = bits::one_at_bit(slab_bits) - 1; + meta.slab_mask = bits::mask_bits(slab_bits); auto& meta_slow = slow(sizeclass_t::from_small_class(sizeclass)); meta_slow.capacity = @@ -244,8 +247,7 @@ namespace snmalloc { // Calculate reciprocal division constant. auto& meta = fast_small(sizeclass); - meta.div_mult = - ((bits::one_at_bit(DIV_MULT_SHIFT) - 1) / meta.size) + 1; + meta.div_mult = (bits::mask_bits(DIV_MULT_SHIFT) / meta.size) + 1; size_t zero = 0; meta.mod_zero_mult = (~zero / meta.size) + 1; @@ -269,6 +271,9 @@ namespace snmalloc constexpr SizeClassTable sizeclass_metadata = SizeClassTable(); + static_assert( + bits::BITS - sizeclass_metadata.DIV_MULT_SHIFT <= MAX_CAPACITY_BITS); + constexpr size_t DIV_MULT_SHIFT = sizeclass_metadata.DIV_MULT_SHIFT; constexpr size_t sizeclass_to_size(smallsizeclass_t sizeclass) @@ -332,14 +337,11 @@ namespace snmalloc .capacity; } - constexpr address_t start_of_object(sizeclass_t sc, address_t addr) + SNMALLOC_FAST_PATH constexpr size_t slab_index(sizeclass_t sc, address_t addr) { auto meta = sizeclass_metadata.fast(sc); - address_t slab_start = addr & ~meta.slab_mask; size_t offset = addr & meta.slab_mask; - size_t size = meta.size; - - if constexpr (sizeof(addr) >= 8) + if constexpr (sizeof(offset) >= 8) { // Only works for 64 bit multiplication, as the following will overflow in // 32bit. @@ -350,17 +352,27 @@ namespace snmalloc // the slab_mask by making the `div_mult` zero. The link uses 128 bit // multiplication, we have shrunk the range of the calculation to remove // this dependency. - size_t offset_start = ((offset * meta.div_mult) >> DIV_MULT_SHIFT) * size; - return slab_start + offset_start; + size_t index = ((offset * meta.div_mult) >> DIV_MULT_SHIFT); + return index; } else { + size_t size = meta.size; if (size == 0) return 0; - return slab_start + (offset / size) * size; + return offset / size; } } + SNMALLOC_FAST_PATH constexpr address_t + start_of_object(sizeclass_t sc, address_t addr) + { + auto meta = sizeclass_metadata.fast(sc); + address_t slab_start = addr & ~meta.slab_mask; + size_t index = slab_index(sc, addr); + return slab_start + (index * meta.size); + } + constexpr size_t index_in_object(sizeclass_t sc, address_t addr) { return addr - start_of_object(sc, addr); @@ -405,7 +417,7 @@ namespace snmalloc { // We subtract and shift to reduce the size of the table, i.e. we don't have // to store a value for every size. - return (s - 1) >> MIN_ALLOC_BITS; + return (s - 1) >> MIN_ALLOC_STEP_BITS; } constexpr size_t sizeclass_lookup_size = @@ -421,13 +433,29 @@ namespace snmalloc constexpr SizeClassLookup() { + constexpr sizeclass_compress_t minimum_class = + static_cast( + size_to_sizeclass_const(MIN_ALLOC_SIZE)); + + /* Some unused sizeclasses is OK, but keep it within reason! */ + static_assert(minimum_class < sizeclass_lookup_size); + size_t curr = 1; - for (sizeclass_compress_t sizeclass = 0; - sizeclass < NUM_SMALL_SIZECLASSES; - sizeclass++) + + sizeclass_compress_t sizeclass = 0; + for (; sizeclass < minimum_class; sizeclass++) + { + for (; curr <= sizeclass_metadata.fast_small(sizeclass).size; + curr += MIN_ALLOC_STEP_SIZE) + { + table[sizeclass_lookup_index(curr)] = minimum_class; + } + } + + for (; sizeclass < NUM_SMALL_SIZECLASSES; sizeclass++) { for (; curr <= sizeclass_metadata.fast_small(sizeclass).size; - curr += 1 << MIN_ALLOC_BITS) + curr += MIN_ALLOC_STEP_SIZE) { auto i = sizeclass_lookup_index(curr); if (i == sizeclass_lookup_size) @@ -478,6 +506,12 @@ namespace snmalloc { if (size > sizeclass_to_size(NUM_SMALL_SIZECLASSES - 1)) { + if (size > bits::one_at_bit(bits::BITS - 1)) + { + // This size is too large, no rounding should occur as will result in a + // failed allocation later. + return size; + } return bits::next_pow2(size); } // If realloc(ptr, 0) returns nullptr, some consumers treat this as a diff --git a/3rdparty/exported/snmalloc/src/snmalloc/override/jemalloc_compat.cc b/3rdparty/exported/snmalloc/src/snmalloc/override/jemalloc_compat.cc index a65554630691..79206fed7d03 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/override/jemalloc_compat.cc +++ b/3rdparty/exported/snmalloc/src/snmalloc/override/jemalloc_compat.cc @@ -4,6 +4,7 @@ #include using namespace snmalloc; + namespace { /** @@ -88,7 +89,9 @@ extern "C" // statistics on fork if built with statistics. SNMALLOC_EXPORT SNMALLOC_USED_FUNCTION inline void _malloc_prefork(void) {} + SNMALLOC_EXPORT SNMALLOC_USED_FUNCTION inline void _malloc_postfork(void) {} + SNMALLOC_EXPORT SNMALLOC_USED_FUNCTION inline void _malloc_first_thread(void) {} @@ -116,7 +119,7 @@ extern "C" * now, this is always implemented to return an error. */ SNMALLOC_EXPORT int - SNMALLOC_NAME_MANGLE(mallctl)(const char*, void*, size_t*, void*, size_t) + SNMALLOC_NAME_MANGLE(mallctl)(const char*, void*, size_t*, void*, size_t) { return ENOENT; } @@ -265,7 +268,7 @@ extern "C" * controlling the thread cache and arena are ignored. */ SNMALLOC_EXPORT void* - SNMALLOC_NAME_MANGLE(rallocx)(void* ptr, size_t size, int flags) + SNMALLOC_NAME_MANGLE(rallocx)(void* ptr, size_t size, int flags) { auto f = JEMallocFlags(flags); size = f.aligned_size(size); diff --git a/3rdparty/exported/snmalloc/src/snmalloc/override/malloc-extensions.h b/3rdparty/exported/snmalloc/src/snmalloc/override/malloc-extensions.h index f7429cd11f3d..1c0f5c8d99f6 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/override/malloc-extensions.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/override/malloc-extensions.h @@ -1,4 +1,5 @@ #pragma once + /** * Malloc extensions * diff --git a/3rdparty/exported/snmalloc/src/snmalloc/override/malloc.cc b/3rdparty/exported/snmalloc/src/snmalloc/override/malloc.cc index 512ba3dceb38..cbef9428c953 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/override/malloc.cc +++ b/3rdparty/exported/snmalloc/src/snmalloc/override/malloc.cc @@ -1,8 +1,5 @@ #include "override.h" -#include -#include - using namespace snmalloc; #ifndef MALLOC_USABLE_SIZE_QUALIFIER @@ -13,54 +10,44 @@ extern "C" { SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(__malloc_end_pointer)(void* ptr) { - return ThreadAlloc::get().external_pointer(ptr); + return snmalloc::libc::__malloc_end_pointer(ptr); } SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(malloc)(size_t size) { - return ThreadAlloc::get().alloc(size); + return snmalloc::libc::malloc(size); } SNMALLOC_EXPORT void SNMALLOC_NAME_MANGLE(free)(void* ptr) { - ThreadAlloc::get().dealloc(ptr); + snmalloc::libc::free(ptr); } SNMALLOC_EXPORT void SNMALLOC_NAME_MANGLE(cfree)(void* ptr) { - ThreadAlloc::get().dealloc(ptr); - } - - /** - * Clang was helpfully inlining the constant return value, and - * thus converting from a tail call to an ordinary call. - */ - SNMALLOC_EXPORT inline void* snmalloc_not_allocated = nullptr; - - static SNMALLOC_SLOW_PATH void* SNMALLOC_NAME_MANGLE(snmalloc_set_error)() - { - errno = ENOMEM; - return snmalloc_not_allocated; + snmalloc::libc::free(ptr); } SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(calloc)(size_t nmemb, size_t size) { - bool overflow = false; - size_t sz = bits::umul(size, nmemb, overflow); - if (SNMALLOC_UNLIKELY(overflow)) - { - return SNMALLOC_NAME_MANGLE(snmalloc_set_error)(); - } - return ThreadAlloc::get().alloc(sz); + return snmalloc::libc::calloc(nmemb, size); } SNMALLOC_EXPORT size_t SNMALLOC_NAME_MANGLE(malloc_usable_size)( MALLOC_USABLE_SIZE_QUALIFIER void* ptr) { - return ThreadAlloc::get().alloc_size(ptr); + return snmalloc::libc::malloc_usable_size(ptr); } +#ifdef _WIN32 + SNMALLOC_EXPORT + size_t SNMALLOC_NAME_MANGLE(_msize)(MALLOC_USABLE_SIZE_QUALIFIER void* ptr) + { + return snmalloc::libc::malloc_usable_size(ptr); + } +#endif + SNMALLOC_EXPORT size_t SNMALLOC_NAME_MANGLE(malloc_good_size)(size_t size) { @@ -69,162 +56,53 @@ extern "C" SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(realloc)(void* ptr, size_t size) { - auto& a = ThreadAlloc::get(); - size_t sz = a.alloc_size(ptr); - // Keep the current allocation if the given size is in the same sizeclass. - if (sz == round_size(size)) - { -#ifdef SNMALLOC_PASS_THROUGH - // snmallocs alignment guarantees can be broken by realloc in pass-through - // this is not exercised, by existing clients, but is tested. - if (pointer_align_up(ptr, natural_alignment(size)) == ptr) - return ptr; -#else - return ptr; -#endif - } - - if (size == (size_t)-1) - { - errno = ENOMEM; - return nullptr; - } - - void* p = a.alloc(size); - if (SNMALLOC_LIKELY(p != nullptr)) - { - sz = bits::min(size, sz); - // Guard memcpy as GCC is assuming not nullptr for ptr after the memcpy - // otherwise. - if (sz != 0) - memcpy(p, ptr, sz); - a.dealloc(ptr); - } - else if (SNMALLOC_LIKELY(size == 0)) - { - a.dealloc(ptr); - } - else - { - errno = ENOMEM; - } - return p; + return snmalloc::libc::realloc(ptr, size); } #if !defined(SNMALLOC_NO_REALLOCARRAY) SNMALLOC_EXPORT void* - SNMALLOC_NAME_MANGLE(reallocarray)(void* ptr, size_t nmemb, size_t size) + SNMALLOC_NAME_MANGLE(reallocarray)(void* ptr, size_t nmemb, size_t size) { - bool overflow = false; - size_t sz = bits::umul(size, nmemb, overflow); - if (overflow) - { - errno = ENOMEM; - return nullptr; - } - return SNMALLOC_NAME_MANGLE(realloc)(ptr, sz); + return snmalloc::libc::reallocarray(ptr, nmemb, size); } #endif #if !defined(SNMALLOC_NO_REALLOCARR) SNMALLOC_EXPORT int - SNMALLOC_NAME_MANGLE(reallocarr)(void* ptr_, size_t nmemb, size_t size) - { - int err = errno; - auto& a = ThreadAlloc::get(); - bool overflow = false; - size_t sz = bits::umul(size, nmemb, overflow); - if (sz == 0) - { - errno = err; - return 0; - } - if (overflow) - { - errno = err; - return EOVERFLOW; - } - - void** ptr = reinterpret_cast(ptr_); - void* p = a.alloc(sz); - if (p == nullptr) - { - errno = ENOMEM; - return ENOMEM; - } - - sz = bits::min(sz, a.alloc_size(*ptr)); - - SNMALLOC_ASSUME(*ptr != nullptr || sz == 0); - // Guard memcpy as GCC is assuming not nullptr for ptr after the memcpy - // otherwise. - if (sz != 0) - memcpy(p, *ptr, sz); - errno = err; - a.dealloc(*ptr); - *ptr = p; - return 0; + SNMALLOC_NAME_MANGLE(reallocarr)(void* ptr, size_t nmemb, size_t size) + { + return snmalloc::libc::reallocarr(ptr, nmemb, size); } #endif SNMALLOC_EXPORT void* - SNMALLOC_NAME_MANGLE(memalign)(size_t alignment, size_t size) + SNMALLOC_NAME_MANGLE(memalign)(size_t alignment, size_t size) { - if ((alignment == 0) || (alignment == size_t(-1))) - { - errno = EINVAL; - return nullptr; - } - - if ((size + alignment) < size) - { - errno = ENOMEM; - return nullptr; - } - - return SNMALLOC_NAME_MANGLE(malloc)(aligned_size(alignment, size)); + return snmalloc::libc::memalign(alignment, size); } SNMALLOC_EXPORT void* - SNMALLOC_NAME_MANGLE(aligned_alloc)(size_t alignment, size_t size) + SNMALLOC_NAME_MANGLE(aligned_alloc)(size_t alignment, size_t size) { - SNMALLOC_ASSERT((size % alignment) == 0); - return SNMALLOC_NAME_MANGLE(memalign)(alignment, size); + return snmalloc::libc::aligned_alloc(alignment, size); } SNMALLOC_EXPORT int SNMALLOC_NAME_MANGLE(posix_memalign)( void** memptr, size_t alignment, size_t size) { - if ((alignment < sizeof(uintptr_t) || ((alignment & (alignment - 1)) != 0))) - { - return EINVAL; - } - - void* p = SNMALLOC_NAME_MANGLE(memalign)(alignment, size); - if (SNMALLOC_UNLIKELY(p == nullptr)) - { - if (size != 0) - return ENOMEM; - } - *memptr = p; - return 0; + return snmalloc::libc::posix_memalign(memptr, alignment, size); } #if !defined(__FreeBSD__) && !defined(__OpenBSD__) SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(valloc)(size_t size) { - return SNMALLOC_NAME_MANGLE(memalign)(OS_PAGE_SIZE, size); + return snmalloc::libc::memalign(OS_PAGE_SIZE, size); } #endif SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(pvalloc)(size_t size) { - if (size == size_t(-1)) - { - errno = ENOMEM; - return nullptr; - } - return SNMALLOC_NAME_MANGLE(memalign)( + return snmalloc::libc::memalign( OS_PAGE_SIZE, (size + OS_PAGE_SIZE - 1) & ~(OS_PAGE_SIZE - 1)); } diff --git a/3rdparty/exported/snmalloc/src/snmalloc/override/memcpy.cc b/3rdparty/exported/snmalloc/src/snmalloc/override/memcpy.cc index c2283ec1e327..c6053ae02abc 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/override/memcpy.cc +++ b/3rdparty/exported/snmalloc/src/snmalloc/override/memcpy.cc @@ -6,7 +6,7 @@ extern "C" * Snmalloc checked memcpy. */ SNMALLOC_EXPORT void* - SNMALLOC_NAME_MANGLE(memcpy)(void* dst, const void* src, size_t len) + SNMALLOC_NAME_MANGLE(memcpy)(void* dst, const void* src, size_t len) { return snmalloc::memcpy(dst, src, len); } diff --git a/3rdparty/exported/snmalloc/src/snmalloc/override/new.cc b/3rdparty/exported/snmalloc/src/snmalloc/override/new.cc index 29372a7b231d..19aa9f58c963 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/override/new.cc +++ b/3rdparty/exported/snmalloc/src/snmalloc/override/new.cc @@ -1,4 +1,4 @@ -#include "malloc.cc" +#include "snmalloc/snmalloc.h" #ifdef _WIN32 # ifdef __clang__ @@ -16,106 +16,98 @@ # endif #endif -using namespace snmalloc; - void* operator new(size_t size) { - return ThreadAlloc::get().alloc(size); + return snmalloc::libc::malloc(size); } void* operator new[](size_t size) { - return ThreadAlloc::get().alloc(size); + return snmalloc::libc::malloc(size); } void* operator new(size_t size, std::nothrow_t&) { - return ThreadAlloc::get().alloc(size); + return snmalloc::libc::malloc(size); } void* operator new[](size_t size, std::nothrow_t&) { - return ThreadAlloc::get().alloc(size); + return snmalloc::libc::malloc(size); } -void operator delete(void* p)EXCEPTSPEC +void operator delete(void* p) EXCEPTSPEC { - ThreadAlloc::get().dealloc(p); + snmalloc::libc::free(p); } -void operator delete(void* p, size_t size)EXCEPTSPEC +void operator delete(void* p, size_t size) EXCEPTSPEC { - if (p == nullptr) - return; - ThreadAlloc::get().dealloc(p, size); + snmalloc::libc::free_sized(p, size); } void operator delete(void* p, std::nothrow_t&) { - ThreadAlloc::get().dealloc(p); + snmalloc::libc::free(p); } void operator delete[](void* p) EXCEPTSPEC { - ThreadAlloc::get().dealloc(p); + snmalloc::libc::free(p); } void operator delete[](void* p, size_t size) EXCEPTSPEC { - if (p == nullptr) - return; - ThreadAlloc::get().dealloc(p, size); + snmalloc::libc::free_sized(p, size); } void operator delete[](void* p, std::nothrow_t&) { - ThreadAlloc::get().dealloc(p); + snmalloc::libc::free(p); } void* operator new(size_t size, std::align_val_t val) { - size = aligned_size(size_t(val), size); - return ThreadAlloc::get().alloc(size); + size = snmalloc::aligned_size(size_t(val), size); + return snmalloc::libc::malloc(size); } void* operator new[](size_t size, std::align_val_t val) { - size = aligned_size(size_t(val), size); - return ThreadAlloc::get().alloc(size); + size = snmalloc::aligned_size(size_t(val), size); + return snmalloc::libc::malloc(size); } void* operator new(size_t size, std::align_val_t val, std::nothrow_t&) { - size = aligned_size(size_t(val), size); - return ThreadAlloc::get().alloc(size); + size = snmalloc::aligned_size(size_t(val), size); + return snmalloc::libc::malloc(size); } void* operator new[](size_t size, std::align_val_t val, std::nothrow_t&) { - size = aligned_size(size_t(val), size); - return ThreadAlloc::get().alloc(size); + size = snmalloc::aligned_size(size_t(val), size); + return snmalloc::libc::malloc(size); } -void operator delete(void* p, std::align_val_t)EXCEPTSPEC +void operator delete(void* p, std::align_val_t) EXCEPTSPEC { - ThreadAlloc::get().dealloc(p); + snmalloc::libc::free(p); } void operator delete[](void* p, std::align_val_t) EXCEPTSPEC { - ThreadAlloc::get().dealloc(p); + snmalloc::libc::free(p); } -void operator delete(void* p, size_t size, std::align_val_t val)EXCEPTSPEC +void operator delete(void* p, size_t size, std::align_val_t val) EXCEPTSPEC { - size = aligned_size(size_t(val), size); - ThreadAlloc::get().dealloc(p, size); + size = snmalloc::aligned_size(size_t(val), size); + snmalloc::libc::free_sized(p, size); } void operator delete[](void* p, size_t size, std::align_val_t val) EXCEPTSPEC { - if (p == nullptr) - return; - size = aligned_size(size_t(val), size); - ThreadAlloc::get().dealloc(p, size); + size = snmalloc::aligned_size(size_t(val), size); + snmalloc::libc::free_sized(p, size); } diff --git a/3rdparty/exported/snmalloc/src/snmalloc/override/override.h b/3rdparty/exported/snmalloc/src/snmalloc/override/override.h index 0ca70bc11a38..5dda309c0b74 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/override/override.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/override/override.h @@ -1,6 +1,6 @@ #pragma once -#include "../global/global.h" +#include "snmalloc/snmalloc.h" #ifndef SNMALLOC_EXPORT # define SNMALLOC_EXPORT diff --git a/3rdparty/exported/snmalloc/src/snmalloc/override/rust.cc b/3rdparty/exported/snmalloc/src/snmalloc/override/rust.cc index 64da984ca71e..4a5dcdaf94f9 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/override/rust.cc +++ b/3rdparty/exported/snmalloc/src/snmalloc/override/rust.cc @@ -1,5 +1,5 @@ #define SNMALLOC_NAME_MANGLE(a) sn_##a -#include "malloc.cc" +#include "snmalloc/snmalloc.h" #include @@ -10,19 +10,19 @@ using namespace snmalloc; extern "C" SNMALLOC_EXPORT void* - SNMALLOC_NAME_MANGLE(rust_alloc)(size_t alignment, size_t size) +SNMALLOC_NAME_MANGLE(rust_alloc)(size_t alignment, size_t size) { return ThreadAlloc::get().alloc(aligned_size(alignment, size)); } extern "C" SNMALLOC_EXPORT void* - SNMALLOC_NAME_MANGLE(rust_alloc_zeroed)(size_t alignment, size_t size) +SNMALLOC_NAME_MANGLE(rust_alloc_zeroed)(size_t alignment, size_t size) { return ThreadAlloc::get().alloc(aligned_size(alignment, size)); } extern "C" SNMALLOC_EXPORT void - SNMALLOC_NAME_MANGLE(rust_dealloc)(void* ptr, size_t alignment, size_t size) +SNMALLOC_NAME_MANGLE(rust_dealloc)(void* ptr, size_t alignment, size_t size) { ThreadAlloc::get().dealloc(ptr, aligned_size(alignment, size)); } @@ -48,6 +48,12 @@ extern "C" SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(rust_realloc)( extern "C" SNMALLOC_EXPORT void SNMALLOC_NAME_MANGLE(rust_statistics)( size_t* current_memory_usage, size_t* peak_memory_usage) { - *current_memory_usage = StandardConfig::Backend::get_current_usage(); - *peak_memory_usage = StandardConfig::Backend::get_peak_usage(); -} \ No newline at end of file + *current_memory_usage = Alloc::Config::Backend::get_current_usage(); + *peak_memory_usage = Alloc::Config::Backend::get_peak_usage(); +} + +extern "C" SNMALLOC_EXPORT size_t +SNMALLOC_NAME_MANGLE(rust_usable_size)(const void* ptr) +{ + return ThreadAlloc::get().alloc_size(ptr); +} diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_apple.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_apple.h index f023e195af76..f6a7f1a2d600 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_apple.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_apple.h @@ -15,8 +15,39 @@ # include # include +# if __has_include() && __has_include() +# include +# include +# if defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && \ + defined(MAC_OS_X_VERSION_14_4) +# if __MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_14_4 +# define SNMALLOC_APPLE_HAS_OS_SYNC_WAIT_ON_ADDRESS +# endif +# endif +# endif + namespace snmalloc { +# ifdef SNMALLOC_APPLE_HAS_OS_SYNC_WAIT_ON_ADDRESS + // For macos 14.4+, we use os_sync_wait_on_address and friends. It is + // available as a part of stable API, and the usage is more straightforward. + extern "C" int os_sync_wait_on_address( + void* addr, uint64_t value, size_t size, uint32_t flags); + + extern "C" int + os_sync_wake_by_address_any(void* addr, size_t size, uint32_t flags); + + extern "C" int + os_sync_wake_by_address_all(void* addr, size_t size, uint32_t flags); +# else + // For platforms before macos 14.4, we use __ulock_wait and friends. It is + // available since macos 10.12. + extern "C" int + __ulock_wait(uint32_t lock_type, void* addr, uint64_t value, uint32_t); + + extern "C" int __ulock_wake(uint32_t lock_type, void* addr, uint64_t); +# endif + /** * PAL implementation for Apple systems (macOS, iOS, watchOS, tvOS...). */ @@ -28,7 +59,7 @@ namespace snmalloc * The features exported by this PAL. */ static constexpr uint64_t pal_features = - AlignedAllocation | LazyCommit | Entropy | Time; + AlignedAllocation | LazyCommit | Entropy | Time | WaitOnAddress; /* * `page_size` @@ -281,6 +312,76 @@ namespace snmalloc return result; } + + using WaitingWord = uint32_t; +# ifndef SNMALLOC_APPLE_HAS_OS_SYNC_WAIT_ON_ADDRESS + static constexpr uint32_t UL_COMPARE_AND_WAIT = 0x0000'0001; + static constexpr uint32_t ULF_NO_ERRNO = 0x0100'0000; + static constexpr uint32_t ULF_WAKE_ALL = 0x0000'0100; +# endif + + template + static void wait_on_address(std::atomic& addr, T expected) + { + [[maybe_unused]] int errno_backup = errno; + while (addr.load(std::memory_order_relaxed) == expected) + { +# ifdef SNMALLOC_APPLE_HAS_OS_SYNC_WAIT_ON_ADDRESS + if ( + os_sync_wait_on_address( + &addr, static_cast(expected), sizeof(T), 0) != -1) + { + errno = errno_backup; + return; + } +# else + if ( + __ulock_wait( + UL_COMPARE_AND_WAIT | ULF_NO_ERRNO, + &addr, + static_cast(expected), + 0) != -1) + { + return; + } +# endif + } + } + + template + static void notify_one_on_address(std::atomic& addr) + { +# ifdef SNMALLOC_APPLE_HAS_OS_SYNC_WAIT_ON_ADDRESS + os_sync_wake_by_address_any(&addr, sizeof(T), 0); +# else + // __ulock_wake can get interrupted, so retry until either waking up a + // waiter or failing because there are no waiters (ENOENT). + for (;;) + { + int ret = __ulock_wake(UL_COMPARE_AND_WAIT | ULF_NO_ERRNO, &addr, 0); + if (ret >= 0 || ret == -ENOENT) + return; + } +# endif + } + + template + static void notify_all_on_address(std::atomic& addr) + { +# ifdef SNMALLOC_APPLE_HAS_OS_SYNC_WAIT_ON_ADDRESS + os_sync_wake_by_address_all(&addr, sizeof(T), 0); +# else + // __ulock_wake can get interrupted, so retry until either waking up a + // waiter or failing because there are no waiters (ENOENT). + for (;;) + { + int ret = __ulock_wake( + UL_COMPARE_AND_WAIT | ULF_NO_ERRNO | ULF_WAKE_ALL, &addr, 0); + if (ret >= 0 || ret == -ENOENT) + return; + } +# endif + } }; } // namespace snmalloc #endif diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_concept.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_concept.h index 44dec410a000..7efbd08b7d4c 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_concept.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_concept.h @@ -19,62 +19,54 @@ namespace snmalloc * PALs must advertize the bit vector of their supported features. */ template - concept IsPAL_static_features = requires() - { - typename std::integral_constant; - }; + concept IsPAL_static_features = + requires() { + typename std::integral_constant; + }; /** * PALs must advertise the size of the address space and their page size */ template - concept IsPAL_static_sizes = requires() - { - typename std::integral_constant; - typename std::integral_constant; - }; + concept IsPAL_static_sizes = + requires() { + typename std::integral_constant; + typename std::integral_constant; + }; /** * PALs expose an error reporting function which takes a const C string. */ template - concept IsPAL_error = requires(const char* const str) - { - { - PAL::error(str) - } - ->ConceptSame; - }; + concept IsPAL_error = requires(const char* const str) { + { + PAL::error(str) + } -> ConceptSame; + }; /** * PALs expose a basic library of memory operations. */ template - concept IsPAL_memops = requires(void* vp, std::size_t sz) - { - { - PAL::notify_not_using(vp, sz) - } - noexcept->ConceptSame; - - { - PAL::template notify_using(vp, sz) - } - noexcept->ConceptSame; - { - PAL::template notify_using(vp, sz) - } - noexcept->ConceptSame; - - { - PAL::template zero(vp, sz) - } - noexcept->ConceptSame; - { - PAL::template zero(vp, sz) - } - noexcept->ConceptSame; - }; + concept IsPAL_memops = requires(void* vp, std::size_t sz) { + { + PAL::notify_not_using(vp, sz) + } noexcept -> ConceptSame; + + { + PAL::template notify_using(vp, sz) + } noexcept -> ConceptSame; + { + PAL::template notify_using(vp, sz) + } noexcept -> ConceptSame; + + { + PAL::template zero(vp, sz) + } noexcept -> ConceptSame; + { + PAL::template zero(vp, sz) + } noexcept -> ConceptSame; + }; /** * The Pal must provide a thread id for debugging. It should not return @@ -82,66 +74,55 @@ namespace snmalloc * places. */ template - concept IsPAL_tid = requires() - { - { - PAL::get_tid() - } - noexcept->ConceptSame; - }; + concept IsPAL_tid = + requires() { + { + PAL::get_tid() + } noexcept -> ConceptSame; + }; /** * Absent any feature flags, the PAL must support a crude primitive allocator */ template - concept IsPAL_reserve = requires(PAL p, std::size_t sz) - { - { - PAL::reserve(sz) - } - noexcept->ConceptSame; - }; + concept IsPAL_reserve = requires(PAL p, std::size_t sz) { + { + PAL::reserve(sz) + } noexcept -> ConceptSame; + }; /** * Some PALs expose a richer allocator which understands aligned allocations */ template - concept IsPAL_reserve_aligned = requires(std::size_t sz) - { - { - PAL::template reserve_aligned(sz) - } - noexcept->ConceptSame; - { - PAL::template reserve_aligned(sz) - } - noexcept->ConceptSame; - }; + concept IsPAL_reserve_aligned = requires(std::size_t sz) { + { + PAL::template reserve_aligned(sz) + } noexcept -> ConceptSame; + { + PAL::template reserve_aligned(sz) + } noexcept -> ConceptSame; + }; /** * Some PALs can provide memory pressure callbacks. */ template - concept IsPAL_mem_low_notify = requires(PalNotificationObject* pno) - { - { - PAL::expensive_low_memory_check() - } - ->ConceptSame; - { - PAL::register_for_low_memory_callback(pno) - } - ->ConceptSame; - }; + concept IsPAL_mem_low_notify = requires(PalNotificationObject* pno) { + { + PAL::expensive_low_memory_check() + } -> ConceptSame; + { + PAL::register_for_low_memory_callback(pno) + } -> ConceptSame; + }; template - concept IsPAL_get_entropy64 = requires() - { - { - PAL::get_entropy64() - } - ->ConceptSame; - }; + concept IsPAL_get_entropy64 = requires() { + { + PAL::get_entropy64() + } -> ConceptSame; + }; /** * PALs ascribe to the conjunction of several concepts. These are broken diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_consts.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_consts.h index 83aa52ef2f95..c4c4c25a244a 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_consts.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_consts.h @@ -3,7 +3,6 @@ #include "../ds_core/ds_core.h" #include -#include namespace snmalloc { @@ -55,6 +54,17 @@ namespace snmalloc * This Pal provides a millisecond time source */ Time = (1 << 5), + + /** + * This Pal provides selective core dumps, so + * modify which parts get dumped. + */ + CoreDump = (1 << 6), + + /** + * This Pal provides a way for parking threads at a specific address. + */ + WaitOnAddress = (1 << 7), }; /** diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_ds.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_ds.h index 3da37cf46159..008d1f2c2210 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_ds.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_ds.h @@ -3,7 +3,6 @@ #include "../ds_core/ds_core.h" #include -#include namespace snmalloc { diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_freebsd.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_freebsd.h index 86a6576e49d1..d967dc1b566e 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_freebsd.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_freebsd.h @@ -13,6 +13,8 @@ # endif # endif +# include + /** * Direct system-call wrappers so that we can skip libthr interception, which * won't work if malloc is broken. @@ -20,6 +22,7 @@ */ extern "C" ssize_t __sys_writev(int fd, const struct iovec* iov, int iovcnt); extern "C" int __sys_fsync(int fd); + /// @} namespace snmalloc @@ -43,7 +46,8 @@ namespace snmalloc * field is declared explicitly to remind anyone modifying this class to * add new features that they should add any required feature flags. */ - static constexpr uint64_t pal_features = PALBSD_Aligned::pal_features; + static constexpr uint64_t pal_features = + PALBSD_Aligned::pal_features | CoreDump | WaitOnAddress; /** * FreeBSD uses atypically small address spaces on its 64 bit RISC machines. @@ -54,6 +58,7 @@ namespace snmalloc static constexpr size_t address_bits = (Aal::bits == 32) ? Aal::address_bits : (Aal::aal_name == RISCV ? 38 : Aal::address_bits); + // TODO, if we ever backport to MIPS, this should yield 39 there. /** @@ -68,10 +73,8 @@ namespace snmalloc /** * Notify platform that we will not be using these pages. * - * We use the `MADV_FREE` and `NADV_NOCORE` flags to `madvise`. The first - * allows the system to discard the page and replace it with a CoW mapping - * of the zero page. The second prevents this mapping from appearing in - * core files. + * We use the `MADV_FREE` flag to `madvise`. This allows the system to + * discard the page and replace it with a CoW mapping of the zero page. */ static void notify_not_using(void* p, size_t size) noexcept { @@ -80,7 +83,6 @@ namespace snmalloc if constexpr (DEBUG) memset(p, 0x5a, size); - madvise(p, size, MADV_NOCORE); madvise(p, size, MADV_FREE); if constexpr (mitigations(pal_enforce_access)) @@ -90,28 +92,19 @@ namespace snmalloc } /** - * Notify platform that we will be using these pages for reading. - * - * This is used only for pages full of zeroes and so we exclude them from - * core dumps. + * Notify platform that these pages should be included in a core dump. */ - static void notify_using_readonly(void* p, size_t size) noexcept + static void notify_do_dump(void* p, size_t size) noexcept { - PALBSD_Aligned::notify_using_readonly(p, size); - madvise(p, size, MADV_NOCORE); + madvise(p, size, MADV_CORE); } /** - * Notify platform that we will be using these pages. - * - * We may have previously marked this memory as not being included in core - * files, so mark it for inclusion again. + * Notify platform that these pages should not be included in a core dump. */ - template - static void notify_using(void* p, size_t size) noexcept + static void notify_do_not_dump(void* p, size_t size) noexcept { - PALBSD_Aligned::notify_using(p, size); - madvise(p, size, MADV_CORE); + madvise(p, size, MADV_NOCORE); } # if defined(__CHERI_PURE_CAPABILITY__) @@ -140,6 +133,53 @@ namespace snmalloc p.unsafe_ptr(), ~static_cast(CHERI_PERM_SW_VMEM))); } # endif + + using WaitingWord = unsigned int; + + template + static void wait_on_address(std::atomic& addr, T expected) + { + static_assert( + sizeof(T) == sizeof(WaitingWord) && alignof(T) == alignof(WaitingWord), + "T must be the same size and alignment as WaitingWord"); + int backup = errno; + while (addr.load(std::memory_order_relaxed) == expected) + { + int ret = _umtx_op( + &addr, + UMTX_OP_WAIT_UINT_PRIVATE, + static_cast(expected), + nullptr, + nullptr); + + if (ret == 0) + break; + } + errno = backup; + } + + template + static void notify_one_on_address(std::atomic& addr) + { + static_assert( + sizeof(T) == sizeof(WaitingWord) && alignof(T) == alignof(WaitingWord), + "T must be the same size and alignment as WaitingWord"); + _umtx_op(&addr, UMTX_OP_WAKE_PRIVATE, 1, nullptr, nullptr); + } + + template + static void notify_all_on_address(std::atomic& addr) + { + static_assert( + sizeof(T) == sizeof(WaitingWord) && alignof(T) == alignof(WaitingWord), + "T must be the same size and alignment as WaitingWord"); + _umtx_op( + &addr, + UMTX_OP_WAKE_PRIVATE, + static_cast(INT_MAX), + nullptr, + nullptr); + } }; } // namespace snmalloc #endif diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_haiku.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_haiku.h index bafe23c48781..bbc9e077c0a0 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_haiku.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_haiku.h @@ -37,15 +37,6 @@ namespace snmalloc SNMALLOC_ASSERT(is_aligned_block(p, size)); posix_madvise(p, size, POSIX_MADV_DONTNEED); } - - /** - * Hopefully a temporary workaround until the kernel random feature - * is exposed properly in the userspace ? - */ - static uint64_t get_entropy64() - { - return PALPOSIX::dev_urandom(); - } }; } // namespace snmalloc #endif diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_linux.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_linux.h index 6f131b0cc6fd..e1774fbd3dc4 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_linux.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_linux.h @@ -14,6 +14,10 @@ # include # endif +# if defined(SNMALLOC_HAS_LINUX_FUTEX_H) +# include +# endif + extern "C" int puts(const char* str); namespace snmalloc @@ -27,7 +31,12 @@ namespace snmalloc * * We always make sure that linux has entropy support. */ - static constexpr uint64_t pal_features = PALPOSIX::pal_features | Entropy; + static constexpr uint64_t pal_features = PALPOSIX::pal_features | Entropy | + CoreDump +# ifdef SNMALLOC_HAS_LINUX_FUTEX_H + | WaitOnAddress +# endif + ; static constexpr size_t page_size = Aal::aal_name == PowerPC ? 0x10000 : PALPOSIX::page_size; @@ -58,7 +67,6 @@ namespace snmalloc void* p = PALPOSIX::reserve(size); if (p) { - madvise(p, size, MADV_DONTDUMP); # ifdef SNMALLOC_PAGEID # ifndef PR_SET_VMA # define PR_SET_VMA 0x53564d41 @@ -125,7 +133,6 @@ namespace snmalloc if constexpr (DEBUG) memset(p, 0x5a, size); - madvise(p, size, MADV_DONTDUMP); madvise(p, size, madvise_free_flags); if constexpr (mitigations(pal_enforce_access)) @@ -135,25 +142,19 @@ namespace snmalloc } /** - * Notify platform that we will be using these pages for reading. - * - * This is used only for pages full of zeroes and so we exclude them from - * core dumps. + * Notify platform that these pages should be included in a core dump. */ - static void notify_using_readonly(void* p, size_t size) noexcept + static void notify_do_dump(void* p, size_t size) noexcept { - PALPOSIX::notify_using_readonly(p, size); - madvise(p, size, MADV_DONTDUMP); + madvise(p, size, MADV_DODUMP); } /** - * Notify platform that we will be using these pages. + * Notify platform that these pages should not be included in a core dump. */ - template - static void notify_using(void* p, size_t size) noexcept + static void notify_do_not_dump(void* p, size_t size) noexcept { - PALPOSIX::notify_using(p, size); - madvise(p, size, MADV_DODUMP); + madvise(p, size, MADV_DONTDUMP); } static uint64_t get_entropy64() @@ -171,6 +172,7 @@ namespace snmalloc uint64_t result; char buffer[sizeof(uint64_t)]; }; + ssize_t ret; // give a try to SYS_getrandom @@ -239,6 +241,47 @@ namespace snmalloc // its APIs are not exception-free. return dev_urandom(); } + +# ifdef SNMALLOC_HAS_LINUX_FUTEX_H + using WaitingWord = int; + + template + static void wait_on_address(std::atomic& addr, T expected) + { + int backup = errno; + static_assert( + sizeof(T) == sizeof(WaitingWord) && alignof(T) == alignof(WaitingWord), + "T must be the same size and alignment as WaitingWord"); + while (addr.load(std::memory_order_relaxed) == expected) + { + long ret = syscall( + SYS_futex, &addr, FUTEX_WAIT_PRIVATE, expected, nullptr, nullptr, 0); + + if (ret == 0) + break; + } + errno = backup; + } + + template + static void notify_one_on_address(std::atomic& addr) + { + static_assert( + sizeof(T) == sizeof(WaitingWord) && alignof(T) == alignof(WaitingWord), + "T must be the same size and alignment as WaitingWord"); + syscall(SYS_futex, &addr, FUTEX_WAKE_PRIVATE, 1, nullptr, nullptr, 0); + } + + template + static void notify_all_on_address(std::atomic& addr) + { + static_assert( + sizeof(T) == sizeof(WaitingWord) && alignof(T) == alignof(WaitingWord), + "T must be the same size and alignment as WaitingWord"); + syscall( + SYS_futex, &addr, FUTEX_WAKE_PRIVATE, INT_MAX, nullptr, nullptr, 0); + } +# endif }; } // namespace snmalloc #endif diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_netbsd.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_netbsd.h index 6e91d98bf266..250826423e52 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_netbsd.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_netbsd.h @@ -14,6 +14,7 @@ */ extern "C" ssize_t _sys_writev(int fd, const struct iovec* iov, int iovcnt); extern "C" int _sys_fsync(int fd); + /// @} namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_noalloc.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_noalloc.h index 94bc61e020b5..49b0d4b6a482 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_noalloc.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_noalloc.h @@ -17,7 +17,7 @@ namespace snmalloc * The minimal subset of a PAL that we need for delegation */ template - concept PALNoAllocBase = IsPAL_static_sizes&& IsPAL_error; + concept PALNoAllocBase = IsPAL_static_sizes && IsPAL_error; #endif /** diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_open_enclave.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_open_enclave.h index be0f141beca4..4966eccc7d56 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_open_enclave.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_open_enclave.h @@ -20,6 +20,7 @@ namespace snmalloc UNUSED(str); oe_abort(); } + static constexpr size_t address_bits = Aal::address_bits; static constexpr size_t page_size = Aal::smallest_page_size; }; diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_posix.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_posix.h index 6c9ae05e85af..1214ff327030 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_posix.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_posix.h @@ -6,10 +6,11 @@ #if defined(SNMALLOC_BACKTRACE_HEADER) # include SNMALLOC_BACKTRACE_HEADER #endif +#include #include #include +#include #include -#include #include #include #include @@ -130,8 +131,16 @@ namespace snmalloc | Entropy #endif ; - +#ifdef SNMALLOC_PAGESIZE + static_assert( + bits::is_pow2(SNMALLOC_PAGESIZE), "Page size must be a power of 2"); + static constexpr size_t page_size = SNMALLOC_PAGESIZE; +#elif defined(PAGESIZE) + static constexpr size_t page_size = + bits::max(Aal::smallest_page_size, static_cast(PAGESIZE)); +#else static constexpr size_t page_size = Aal::smallest_page_size; +#endif /** * Address bits are potentially mediated by some POSIX OSes, but generally @@ -399,6 +408,7 @@ namespace snmalloc uint64_t result; char buffer[sizeof(uint64_t)]; }; + ssize_t ret; int flags = O_RDONLY; #if defined(O_CLOEXEC) diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_timer_default.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_timer_default.h index c7761effe57e..d70abd5262be 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_timer_default.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_timer_default.h @@ -4,8 +4,6 @@ #include "pal_consts.h" #include "pal_ds.h" -#include - namespace snmalloc { template diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_windows.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_windows.h index 2ab0bfc1ff26..d025b1bebb36 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_windows.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_windows.h @@ -20,6 +20,7 @@ # if (NTDDI_VERSION >= NTDDI_WIN10_RS5) && \ (WINVER >= _WIN32_WINNT_WIN10) && !defined(USE_SYSTEMATIC_TESTING) # define PLATFORM_HAS_VIRTUALALLOC2 +# define PLATFORM_HAS_WAITONADDRESS # endif # endif @@ -60,6 +61,9 @@ namespace snmalloc Time # if defined(PLATFORM_HAS_VIRTUALALLOC2) && !defined(USE_SYSTEMATIC_TESTING) | AlignedAllocation +# endif +# if defined(PLATFORM_HAS_WAITONADDRESS) + | WaitOnAddress # endif ; @@ -231,6 +235,32 @@ namespace snmalloc std::chrono::steady_clock::now().time_since_epoch()) .count()); } + +# ifdef PLATFORM_HAS_WAITONADDRESS + using WaitingWord = char; + + template + static void wait_on_address(std::atomic& addr, T expected) + { + while (addr.load(std::memory_order_relaxed) == expected) + { + if (::WaitOnAddress(&addr, &expected, sizeof(T), INFINITE)) + break; + } + } + + template + static void notify_one_on_address(std::atomic& addr) + { + ::WakeByAddressSingle(&addr); + } + + template + static void notify_all_on_address(std::atomic& addr) + { + ::WakeByAddressAll(&addr); + } +# endif }; } #endif diff --git a/3rdparty/exported/snmalloc/src/snmalloc/snmalloc.h b/3rdparty/exported/snmalloc/src/snmalloc/snmalloc.h index 47bd6e78a30d..b05b1a330f34 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/snmalloc.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/snmalloc.h @@ -3,8 +3,22 @@ // Core implementation of snmalloc independent of the configuration mode #include "snmalloc_core.h" -// If the user has defined SNMALLOC_PROVIDE_OWN_CONFIG, this include does -// nothing. Otherwise, it provide a default configuration of snmalloc::Alloc. +// Provides the global configuration for the snmalloc implementation. #include "backend/globalconfig.h" + +// If you define SNMALLOC_PROVIDE_OWN_CONFIG then you must provide your own +// definition of `snmalloc::Alloc` before including any files that include +// `snmalloc.h` or consume the global allocation APIs. +#ifndef SNMALLOC_PROVIDE_OWN_CONFIG +namespace snmalloc +{ + /** + * Create allocator type for this configuration. + */ + using Alloc = snmalloc::LocalAllocator< + snmalloc::StandardConfigClientMeta>; +} // namespace snmalloc +#endif + // User facing API surface, needs to know what `Alloc` is. #include "snmalloc_front.h" diff --git a/3rdparty/exported/snmalloc/src/test/func/cheri/cheri.cc b/3rdparty/exported/snmalloc/src/test/func/cheri/cheri.cc index 4efaec6b7a42..cde8be071af3 100644 --- a/3rdparty/exported/snmalloc/src/test/func/cheri/cheri.cc +++ b/3rdparty/exported/snmalloc/src/test/func/cheri/cheri.cc @@ -133,8 +133,7 @@ int main() static_assert( std::is_same_v>); - LocalCache lc{&StandardConfig::unused_remote}; - auto* ca = AllocPool::acquire(&lc); + auto* ca = AllocPool::acquire(); SNMALLOC_CHECK(cap_len_is(ca, sizeof(*ca))); SNMALLOC_CHECK(cap_vmem_perm_is(ca, false)); diff --git a/3rdparty/exported/snmalloc/src/test/func/client_meta/client_meta.cc b/3rdparty/exported/snmalloc/src/test/func/client_meta/client_meta.cc new file mode 100644 index 000000000000..0359666bb0d6 --- /dev/null +++ b/3rdparty/exported/snmalloc/src/test/func/client_meta/client_meta.cc @@ -0,0 +1,69 @@ +/** + * This test performs a very simple use of the client_meta data feature in + * snmalloc. + */ + +#include "test/setup.h" + +#include +#include +#include +#include + +namespace snmalloc +{ + // Create an allocator that stores an std::atomic> per allocation. + using Alloc = snmalloc::LocalAllocator>>>; +} + +#define SNMALLOC_PROVIDE_OWN_CONFIG +#include + +int main() +{ +#ifdef SNMALLOC_PASS_THROUGH + // This test does not make sense in pass-through + return 0; +#else + // Allocate a bunch of objects, and store the index into the meta-data. + std::vector ptrs; + for (size_t i = 0; i < 10000; i++) + { + auto p = snmalloc::libc::malloc(1024); + auto& meta = snmalloc::libc::get_client_meta_data(p); + meta = i; + ptrs.push_back(p); + memset(p, (uint8_t)i, 1024); + } + + // Check meta-data contains expected value, and that the memory contains + // the expected pattern. + for (size_t i = 0; i < 10000; i++) + { + auto p = ptrs[i]; + auto& meta = snmalloc::libc::get_client_meta_data(p); + if (meta != i) + { + std::cout << "Failed at index " << i << std::endl; + abort(); + } + for (size_t j = 0; j < 1024; j++) + { + if (reinterpret_cast(p)[j] != (uint8_t)i) + { + std::cout << "Failed at index " << i << " byte " << j << std::endl; + abort(); + } + } + snmalloc::libc::free(p); + } + + // Access in a read-only way meta-data associated with the stack. + // This would fail if it was accessed for write. + auto& meta = snmalloc::libc::get_client_meta_data_const(&ptrs); + std::cout << "meta for stack" << meta << std::endl; + + return 0; +#endif +} diff --git a/3rdparty/exported/snmalloc/src/test/func/domestication/domestication.cc b/3rdparty/exported/snmalloc/src/test/func/domestication/domestication.cc index 03cc9ba3bdf6..390bba741757 100644 --- a/3rdparty/exported/snmalloc/src/test/func/domestication/domestication.cc +++ b/3rdparty/exported/snmalloc/src/test/func/domestication/domestication.cc @@ -17,13 +17,15 @@ int main() // Specify type of allocator # define SNMALLOC_PROVIDE_OWN_CONFIG + namespace snmalloc { class CustomConfig : public CommonConfig { public: using Pal = DefaultPal; - using PagemapEntry = DefaultPagemapEntry; + using PagemapEntry = DefaultPagemapEntry; + using ClientMeta = NoClientMetaDataProvider; private: using ConcretePagemap = @@ -62,14 +64,12 @@ namespace snmalloc * C++, and not just its initializer fragment, to initialize a non-prefix * subset of the flags (in any order, at that). */ - static constexpr Flags Options = []() constexpr - { + static constexpr Flags Options = []() constexpr { Flags opts = {}; opts.QueueHeadsAreTame = false; opts.HasDomesticate = true; return opts; - } - (); + }(); static GlobalPoolState& pool() { @@ -138,7 +138,8 @@ int main() LocalEntropy entropy; entropy.init(); - RemoteAllocator::key_global = FreeListKey(entropy.get_free_list_key()); + entropy.make_free_list_key(RemoteAllocator::key_global); + entropy.make_free_list_key(freelist::Object::key_root); auto alloc1 = new Alloc(); diff --git a/3rdparty/exported/snmalloc/src/test/func/malloc/malloc.cc b/3rdparty/exported/snmalloc/src/test/func/malloc/malloc.cc index 1d4c31da9e07..6549e583435a 100644 --- a/3rdparty/exported/snmalloc/src/test/func/malloc/malloc.cc +++ b/3rdparty/exported/snmalloc/src/test/func/malloc/malloc.cc @@ -375,6 +375,6 @@ int main(int argc, char** argv) our_malloc_usable_size(nullptr) == 0, "malloc_usable_size(nullptr) should be zero"); - snmalloc::debug_check_empty(); + snmalloc::debug_check_empty(); return 0; } diff --git a/3rdparty/exported/snmalloc/src/test/func/memcpy/func-memcpy.cc b/3rdparty/exported/snmalloc/src/test/func/memcpy/func-memcpy.cc index ff1856fac007..f435b457266e 100644 --- a/3rdparty/exported/snmalloc/src/test/func/memcpy/func-memcpy.cc +++ b/3rdparty/exported/snmalloc/src/test/func/memcpy/func-memcpy.cc @@ -57,6 +57,9 @@ extern "C" void abort() { longjmp(jmp, 1); } +# if __has_builtin(__builtin_trap) + __builtin_trap(); +# endif exit(-1); } @@ -152,7 +155,11 @@ int main() // Some sizes to check for out-of-bounds access. As we are only able to // catch overflows past the end of the sizeclass-padded allocation, make // sure we don't try to test on smaller allocations. - std::initializer_list sizes = {MIN_ALLOC_SIZE, 1024, 2 * 1024 * 1024}; + + static constexpr size_t min_class_size = + sizeclass_to_size(size_to_sizeclass(MIN_ALLOC_SIZE)); + + std::initializer_list sizes = {min_class_size, 1024, 2 * 1024 * 1024}; static_assert( MIN_ALLOC_SIZE < 1024, "Can't detect overflow except at sizeclass boundaries"); diff --git a/3rdparty/exported/snmalloc/src/test/func/memory/memory.cc b/3rdparty/exported/snmalloc/src/test/func/memory/memory.cc index 2a2ada2eef07..7d176f43de01 100644 --- a/3rdparty/exported/snmalloc/src/test/func/memory/memory.cc +++ b/3rdparty/exported/snmalloc/src/test/func/memory/memory.cc @@ -184,7 +184,7 @@ void test_calloc() alloc.dealloc(p, size); } - snmalloc::debug_check_empty(); + snmalloc::debug_check_empty(); } void test_double_alloc() @@ -229,7 +229,7 @@ void test_double_alloc() } } } - snmalloc::debug_check_empty(); + snmalloc::debug_check_empty(); } void test_external_pointer() @@ -237,7 +237,9 @@ void test_external_pointer() // Malloc does not have an external pointer querying mechanism. auto& alloc = ThreadAlloc::get(); - for (uint8_t sc = 0; sc < NUM_SMALL_SIZECLASSES; sc++) + for (snmalloc::smallsizeclass_t sc = size_to_sizeclass(MIN_ALLOC_SIZE); + sc < NUM_SMALL_SIZECLASSES; + sc++) { size_t size = sizeclass_to_size(sc); void* p1 = alloc.alloc(size); @@ -273,7 +275,7 @@ void test_external_pointer() alloc.dealloc(p1, size); } - snmalloc::debug_check_empty(); + snmalloc::debug_check_empty(); }; void check_offset(void* base, void* interior) @@ -470,7 +472,9 @@ void test_static_sized_allocs() void test_remaining_bytes() { auto& alloc = ThreadAlloc::get(); - for (size_t sc = 0; sc < NUM_SMALL_SIZECLASSES; sc++) + for (snmalloc::smallsizeclass_t sc = size_to_sizeclass(MIN_ALLOC_SIZE); + sc < NUM_SMALL_SIZECLASSES; + sc++) { auto size = sizeclass_to_size(sc); char* p = (char*)alloc.alloc(size); diff --git a/3rdparty/exported/snmalloc/src/test/func/miracle_ptr/miracle_ptr.cc b/3rdparty/exported/snmalloc/src/test/func/miracle_ptr/miracle_ptr.cc new file mode 100644 index 000000000000..c4e4783bbd15 --- /dev/null +++ b/3rdparty/exported/snmalloc/src/test/func/miracle_ptr/miracle_ptr.cc @@ -0,0 +1,204 @@ +/** + * This file demonstrates how the snmalloc library could be implemented to + * provide a miracle pointer like feature. This is not a hardened + * implementation and is purely for illustrative purposes. + * + * Do not use as is. + */ + +#ifdef SNMALLOC_THREAD_SANITIZER_ENABLED +int main() +{ + return 0; +} +#else + +# include "test/setup.h" + +# include +# include +# include +# include + +namespace snmalloc +{ + // Instantiate the allocator with a client meta data provider that uses an + // atomic size_t to store the reference count. + using Alloc = snmalloc::LocalAllocator>>>; +} + +# define SNMALLOC_PROVIDE_OWN_CONFIG +# include + +SNMALLOC_SLOW_PATH void error(std::string msg) +{ + std::cout << msg << std::endl; + abort(); +} + +SNMALLOC_FAST_PATH_INLINE void check(bool b, std::string msg) +{ + if (SNMALLOC_UNLIKELY(!b)) + error(msg); +} + +namespace snmalloc::miracle +{ + // snmalloc meta-data representation + // * 2n + 1: Represents an object that has not been deallocated with n + // additional references to it + // * 2n : Represents a deallocated object that + // has n additional references to it + + inline void* malloc(size_t size) + { + auto p = snmalloc::libc::malloc(size); + if (SNMALLOC_UNLIKELY(p == nullptr)) + return nullptr; + + snmalloc::libc::get_client_meta_data(p) = 1; + return p; + } + + inline void free(void* ptr) + { + if (ptr == nullptr) + return; + + // TODO could build a check into this that it is the start of the object? + auto previous = + snmalloc::libc::get_client_meta_data(ptr).fetch_add((size_t)-1); + + if (SNMALLOC_LIKELY(previous == 1)) + { + std::cout << "Freeing " << ptr << std::endl; + snmalloc::libc::free(ptr); + return; + } + + check((previous & 1) == 1, "Double free detected"); + + // We have additional references to this object. + // We should not free it. + // TOOD this assumes this is not an internal pointer. + memset(ptr, 0, snmalloc::libc::malloc_usable_size(ptr)); + } + + inline void acquire(void* p) + { + auto previous = + snmalloc::libc::get_client_meta_data(p).fetch_add((size_t)2); + + // Can we take new pointers to a deallocated object? + check((previous & 1) == 1, "Acquiring a deallocated object"); + } + + inline void release(void* p) + { + auto previous = + snmalloc::libc::get_client_meta_data(p).fetch_add((size_t)-2); + + if (previous > 2) + return; + + check(previous == 2, "Releasing an object with insufficient references"); + + std::cout << "Freeing from release " << p << std::endl; + snmalloc::libc::free(p); + } + + /** + * This class can be used to replace a raw pointer. It will automatically use + * the underlying backup reference counting design from the miracle pointer + * docs. + */ + template + class raw_ptr + { + T* p; + + public: + raw_ptr() : p(nullptr) {} + + raw_ptr(T* p) : p(p) + { + snmalloc::miracle::acquire(p); + } + + T& operator*() + { + return *p; + } + + ~raw_ptr() + { + if (p == nullptr) + return; + snmalloc::miracle::release(p); + } + + raw_ptr(const raw_ptr& rp) : p(rp.p) + { + snmalloc::miracle::acquire(p); + } + + raw_ptr& operator=(const raw_ptr& other) + { + p = other.p; + snmalloc::miracle::acquire(other.p); + return *this; + } + + raw_ptr(raw_ptr&& other) : p(other.p) + { + other.p = nullptr; + } + + raw_ptr& operator=(raw_ptr&& other) + { + p = other.p; + other.p = nullptr; + return *this; + } + }; +} // namespace snmalloc::miracle + +/** + * Overload new and delete to use the "miracle pointer" implementation. + */ +void* operator new(size_t size) +{ + return snmalloc::miracle::malloc(size); +} + +void operator delete(void* p) +{ + snmalloc::miracle::free(p); +} + +void operator delete(void* p, size_t) +{ + snmalloc::miracle::free(p); +} + +int main() +{ +# ifndef SNMALLOC_PASS_THROUGH + snmalloc::miracle::raw_ptr p; + { + auto up1 = std::make_unique(41); + auto up = std::make_unique(42); + auto up2 = std::make_unique(40); + auto up3 = std::make_unique(39); + p = up.get(); + check(*p == 42, "Failed to set p"); + } + // Still safe to access here. The unique_ptr has been destroyed, but the + // raw_ptr has kept the memory live. + // Current implementation zeros the memory when the unique_ptr is destroyed. + check(*p == 0, "Failed to keep memory live"); +# endif + return 0; +} +#endif \ No newline at end of file diff --git a/3rdparty/exported/snmalloc/src/test/func/pagemap/pagemap.cc b/3rdparty/exported/snmalloc/src/test/func/pagemap/pagemap.cc index dca7bf382990..a0b53689f61e 100644 --- a/3rdparty/exported/snmalloc/src/test/func/pagemap/pagemap.cc +++ b/3rdparty/exported/snmalloc/src/test/func/pagemap/pagemap.cc @@ -12,10 +12,13 @@ using namespace snmalloc; static constexpr size_t GRANULARITY_BITS = 20; + struct T { size_t v = 99; + T(size_t v) : v(v) {} + T() {} }; diff --git a/3rdparty/exported/snmalloc/src/test/func/pool/pool.cc b/3rdparty/exported/snmalloc/src/test/func/pool/pool.cc index 7eeff87438cb..2ed960a454a4 100644 --- a/3rdparty/exported/snmalloc/src/test/func/pool/pool.cc +++ b/3rdparty/exported/snmalloc/src/test/func/pool/pool.cc @@ -14,17 +14,16 @@ struct PoolAEntry : Pooled PoolAEntry() : field(1){}; }; -using PoolA = Pool; +using PoolA = Pool; struct PoolBEntry : Pooled { int field; PoolBEntry() : field(0){}; - PoolBEntry(int f) : field(f){}; }; -using PoolB = Pool; +using PoolB = Pool; struct PoolLargeEntry : Pooled { @@ -41,18 +40,18 @@ struct PoolLargeEntry : Pooled }; }; -using PoolLarge = Pool; +using PoolLarge = Pool; template struct PoolSortEntry : Pooled> { int field; - PoolSortEntry(int f) : field(f){}; + PoolSortEntry() : field(1){}; }; template -using PoolSort = Pool, Alloc::Config>; +using PoolSort = Pool>; void test_alloc() { @@ -73,13 +72,8 @@ void test_constructor() SNMALLOC_CHECK(ptr2 != nullptr); SNMALLOC_CHECK(ptr2->field == 0); - auto ptr3 = PoolB::acquire(1); - SNMALLOC_CHECK(ptr3 != nullptr); - SNMALLOC_CHECK(ptr3->field == 1); - PoolA::release(ptr1); PoolB::release(ptr2); - PoolB::release(ptr3); } void test_alloc_many() @@ -181,8 +175,8 @@ void test_sort() // This test checks that `sort` puts the elements in the right order, // so it is the same as if they had been allocated in that order. - auto a1 = PoolSort::acquire(1); - auto a2 = PoolSort::acquire(1); + auto a1 = PoolSort::acquire(); + auto a2 = PoolSort::acquire(); auto position1 = position(a1); auto position2 = position(a2); @@ -201,8 +195,8 @@ void test_sort() PoolSort::sort(); - auto b1 = PoolSort::acquire(1); - auto b2 = PoolSort::acquire(1); + auto b1 = PoolSort::acquire(); + auto b2 = PoolSort::acquire(); SNMALLOC_CHECK(position1 == position(b1)); SNMALLOC_CHECK(position2 == position(b2)); diff --git a/3rdparty/exported/snmalloc/src/test/func/redblack/redblack.cc b/3rdparty/exported/snmalloc/src/test/func/redblack/redblack.cc index f13c72ebb99e..164a5978f034 100644 --- a/3rdparty/exported/snmalloc/src/test/func/redblack/redblack.cc +++ b/3rdparty/exported/snmalloc/src/test/func/redblack/redblack.cc @@ -4,7 +4,6 @@ #include "test/xoroshiro.h" #include -#include #include #include @@ -23,28 +22,36 @@ struct NodeRef static constexpr size_t offset = 10000; size_t* ptr; + constexpr NodeRef(size_t* p) : ptr(p) {} + constexpr NodeRef() : ptr(nullptr) {} + constexpr NodeRef(const NodeRef& other) : ptr(other.ptr) {} + constexpr NodeRef(NodeRef&& other) : ptr(other.ptr) {} bool operator!=(const NodeRef& other) const { return ptr != other.ptr; } + NodeRef& operator=(const NodeRef& other) { ptr = other.ptr; return *this; } + void set(uint16_t val) { *ptr = ((size_t(val) + offset) << 1) + (*ptr & 1); } + explicit operator uint16_t() { return uint16_t((*ptr >> 1) - offset); } + explicit operator size_t*() { return ptr; diff --git a/3rdparty/exported/snmalloc/src/test/func/sandbox/sandbox.cc b/3rdparty/exported/snmalloc/src/test/func/sandbox/sandbox.cc index 519488070084..69ce99cfae5a 100644 --- a/3rdparty/exported/snmalloc/src/test/func/sandbox/sandbox.cc +++ b/3rdparty/exported/snmalloc/src/test/func/sandbox/sandbox.cc @@ -25,6 +25,7 @@ namespace { SNMALLOC_CHECK(0 && "Should never be called!"); } + /** * Sandbox class. Allocates a memory region and an allocator that can * allocate into this from the outside. @@ -172,7 +173,7 @@ namespace * sandbox but allocates memory inside. */ struct RemoteAllocator queue; - } * shared_state; + }* shared_state; /** * The memory provider for this sandbox. @@ -195,7 +196,7 @@ namespace Sandbox(size_t sb_size) : start(alloc_sandbox_heap(sb_size)), top(pointer_offset(start, sb_size)), - shared_state(new (start) SharedState()), + shared_state(new(start) SharedState()), state( pointer_offset(CapPtr(start), sizeof(SharedState)), sb_size - sizeof(SharedState)), diff --git a/3rdparty/exported/snmalloc/src/test/func/sizeclass/sizeclass.cc b/3rdparty/exported/snmalloc/src/test/func/sizeclass/sizeclass.cc index d42794e44845..836c62111f9b 100644 --- a/3rdparty/exported/snmalloc/src/test/func/sizeclass/sizeclass.cc +++ b/3rdparty/exported/snmalloc/src/test/func/sizeclass/sizeclass.cc @@ -8,6 +8,9 @@ snmalloc::smallsizeclass_t size_to_sizeclass(size_t size) return snmalloc::size_to_sizeclass(size); } +static constexpr snmalloc::smallsizeclass_t minimum_sizeclass = + snmalloc::size_to_sizeclass_const(snmalloc::MIN_ALLOC_SIZE); + void test_align_size() { bool failed = false; @@ -72,6 +75,10 @@ int main(int, char**) bool failed = false; size_t size_low = 0; + std::cout << "Configured with minimum allocation size " + << snmalloc::MIN_ALLOC_SIZE << " and step size " + << snmalloc::MIN_ALLOC_STEP_SIZE << std::endl; + std::cout << "0 has sizeclass: " << (size_t)snmalloc::size_to_sizeclass(0) << std::endl; @@ -86,12 +93,14 @@ int main(int, char**) slab_size != snmalloc::sizeclass_to_slab_size(sz)) { slab_size = snmalloc::sizeclass_to_slab_size(sz); - std::cout << std::endl; + std::cout << std::endl << "slab size: " << slab_size << std::endl; } size_t size = snmalloc::sizeclass_to_size(sz); std::cout << (size_t)sz << " |-> " - << "[" << size_low + 1 << ", " << size << "]" << std::endl; + << "[" << size_low + 1 << ", " << size << "]" + << (sz == minimum_sizeclass ? " is minimum class" : "") + << std::endl; if (size < size_low) { @@ -102,7 +111,30 @@ int main(int, char**) for (size_t i = size_low + 1; i <= size; i++) { - if (size_to_sizeclass(i) != sz) + /* All sizes should, via bit-math, come back to their class value */ + if (snmalloc::size_to_sizeclass_const(i) != sz) + { + std::cout << "Size " << i << " has _const sizeclass " + << (size_t)snmalloc::size_to_sizeclass_const(i) + << " but expected sizeclass " << (size_t)sz << std::endl; + failed = true; + } + + if (size < snmalloc::MIN_ALLOC_SIZE) + { + /* + * It is expected that these sizes have the "wrong" class from tabular + * lookup: they will have been clipped up to the minimum class. + */ + if (size_to_sizeclass(i) != minimum_sizeclass) + { + std::cout << "Size " << i << " below minimum size; sizeclass " + << (size_t)size_to_sizeclass(i) << " not expected minimum " + << (size_t)minimum_sizeclass << std::endl; + failed = true; + } + } + else if (size_to_sizeclass(i) != sz) { std::cout << "Size " << i << " has sizeclass " << (size_t)size_to_sizeclass(i) << " but expected sizeclass " diff --git a/3rdparty/exported/snmalloc/src/test/func/statistics/stats.cc b/3rdparty/exported/snmalloc/src/test/func/statistics/stats.cc index c8db1cad762a..214a0bcf3cce 100644 --- a/3rdparty/exported/snmalloc/src/test/func/statistics/stats.cc +++ b/3rdparty/exported/snmalloc/src/test/func/statistics/stats.cc @@ -17,7 +17,7 @@ void debug_check_empty_1() auto r = a.alloc(size); - snmalloc::debug_check_empty(&result); + snmalloc::debug_check_empty(&result); if (result != false) { std::cout << "debug_check_empty failed to detect leaked memory:" << size @@ -27,7 +27,7 @@ void debug_check_empty_1() a.dealloc(r); - snmalloc::debug_check_empty(&result); + snmalloc::debug_check_empty(&result); if (result != true) { std::cout << "debug_check_empty failed to say empty:" << size << std::endl; @@ -36,7 +36,7 @@ void debug_check_empty_1() r = a.alloc(size); - snmalloc::debug_check_empty(&result); + snmalloc::debug_check_empty(&result); if (result != false) { std::cout << "debug_check_empty failed to detect leaked memory:" << size @@ -46,7 +46,7 @@ void debug_check_empty_1() a.dealloc(r); - snmalloc::debug_check_empty(&result); + snmalloc::debug_check_empty(&result); if (result != true) { std::cout << "debug_check_empty failed to say empty:" << size << std::endl; @@ -72,7 +72,7 @@ void debug_check_empty_2() } auto r = a.alloc(size); allocs.push_back(r); - snmalloc::debug_check_empty(&result); + snmalloc::debug_check_empty(&result); if (result != false) { std::cout << "False empty after " << i << " allocations of " << size @@ -88,7 +88,7 @@ void debug_check_empty_2() { std::cout << "." << std::flush; } - snmalloc::debug_check_empty(&result); + snmalloc::debug_check_empty(&result); if (result != false) { std::cout << "False empty after " << i << " deallocations of " << size @@ -98,7 +98,7 @@ void debug_check_empty_2() a.dealloc(allocs[i]); } std::cout << std::endl; - snmalloc::debug_check_empty(); + snmalloc::debug_check_empty(); } int main() diff --git a/3rdparty/exported/snmalloc/src/test/func/thread_alloc_external/thread_alloc_external.cc b/3rdparty/exported/snmalloc/src/test/func/thread_alloc_external/thread_alloc_external.cc index 2b10ed8cbcd2..686c08dc487a 100644 --- a/3rdparty/exported/snmalloc/src/test/func/thread_alloc_external/thread_alloc_external.cc +++ b/3rdparty/exported/snmalloc/src/test/func/thread_alloc_external/thread_alloc_external.cc @@ -12,7 +12,8 @@ namespace snmalloc { - using Alloc = snmalloc::LocalAllocator; + using Alloc = snmalloc::LocalAllocator< + snmalloc::StandardConfigClientMeta>; } using namespace snmalloc; diff --git a/3rdparty/exported/snmalloc/src/test/func/two_alloc_types/alloc1.cc b/3rdparty/exported/snmalloc/src/test/func/two_alloc_types/alloc1.cc index 74996b5178b1..b4e0ae32af1b 100644 --- a/3rdparty/exported/snmalloc/src/test/func/two_alloc_types/alloc1.cc +++ b/3rdparty/exported/snmalloc/src/test/func/two_alloc_types/alloc1.cc @@ -10,6 +10,7 @@ // Specify type of allocator #define SNMALLOC_PROVIDE_OWN_CONFIG + namespace snmalloc { using CustomGlobals = FixedRangeConfig>; diff --git a/3rdparty/exported/snmalloc/src/test/func/two_alloc_types/main.cc b/3rdparty/exported/snmalloc/src/test/func/two_alloc_types/main.cc index b7f6ded9e7a4..74d0338dcc62 100644 --- a/3rdparty/exported/snmalloc/src/test/func/two_alloc_types/main.cc +++ b/3rdparty/exported/snmalloc/src/test/func/two_alloc_types/main.cc @@ -30,6 +30,7 @@ extern "C" void* enclave_malloc(size_t); extern "C" void enclave_free(void*); using namespace snmalloc; + int main() { setup(); diff --git a/3rdparty/exported/snmalloc/src/test/perf/contention/contention.cc b/3rdparty/exported/snmalloc/src/test/perf/contention/contention.cc index e266f0491020..c2cfd8f85c74 100644 --- a/3rdparty/exported/snmalloc/src/test/perf/contention/contention.cc +++ b/3rdparty/exported/snmalloc/src/test/perf/contention/contention.cc @@ -137,7 +137,7 @@ void test_tasks(size_t num_tasks, size_t count, size_t size) ParallelTest test(num_tasks); std::cout << "Task test, " << num_tasks << " threads, " << count - << " swaps per thread " << test.time() << "ticks" << std::endl; + << " swaps per thread " << test.time() << " ticks" << std::endl; for (size_t n = 0; n < swapsize; n++) { @@ -154,7 +154,7 @@ void test_tasks(size_t num_tasks, size_t count, size_t size) } #ifndef NDEBUG - snmalloc::debug_check_empty(); + snmalloc::debug_check_empty(); #endif }; diff --git a/3rdparty/exported/snmalloc/src/test/perf/external_pointer/externalpointer.cc b/3rdparty/exported/snmalloc/src/test/perf/external_pointer/externalpointer.cc index be3306cba00b..96d46582076a 100644 --- a/3rdparty/exported/snmalloc/src/test/perf/external_pointer/externalpointer.cc +++ b/3rdparty/exported/snmalloc/src/test/perf/external_pointer/externalpointer.cc @@ -47,7 +47,7 @@ namespace test alloc.dealloc(objects[i]); } - snmalloc::debug_check_empty(); + snmalloc::debug_check_empty(); } void test_external_pointer(xoroshiro::p128r64& r) diff --git a/3rdparty/exported/snmalloc/src/test/perf/memcpy/memcpy.cc b/3rdparty/exported/snmalloc/src/test/perf/memcpy/memcpy.cc index e3bee7d2c784..763dcd72e95d 100644 --- a/3rdparty/exported/snmalloc/src/test/perf/memcpy/memcpy.cc +++ b/3rdparty/exported/snmalloc/src/test/perf/memcpy/memcpy.cc @@ -1,5 +1,4 @@ -#include "snmalloc/global/memcpy.h" - +#include #include #include #include diff --git a/3rdparty/exported/snmalloc/src/test/perf/msgpass/msgpass.cc b/3rdparty/exported/snmalloc/src/test/perf/msgpass/msgpass.cc new file mode 100644 index 000000000000..7e639a02b829 --- /dev/null +++ b/3rdparty/exported/snmalloc/src/test/perf/msgpass/msgpass.cc @@ -0,0 +1,307 @@ +/** + * A simulation of a message-passing application workload for snmalloc. + * + * - N_PRODUCER producer threads allocate and queue spans of messages randomly, + * - to N_CONSUMER consumer threads, which dequeue messages and free() them. + * + * Optionally, N_PROXY threads act as both producers and consumers, forwarding + * received messages back to another queue rather than freeing them. + */ + +#include "test/opt.h" +#include "test/setup.h" +#include "test/usage.h" +#include "test/xoroshiro.h" + +constexpr static bool be_chatty = false; + +#include +#include +#include +#include +#include +#include +#include + +using namespace snmalloc; + +void chatty(const char* p, ...) +{ + if constexpr (be_chatty) + { + va_list va; + va_start(va, p); + vfprintf(stderr, p, va); + va_end(va); + } +} + +/* + * Interpret SNMALLOC_PASS_THROUGH ourselves to make this a bit more fair of a + * comparison, since relying of snmalloc itself to do the passing through + * results in it imposing its own idea of alignment onto the underlying + * allocator, which might result in it taking less optimized paths. + */ +#ifdef SNMALLOC_PASS_THROUGH +struct MyAlloc +{ + MyAlloc() {} + + void* alloc(size_t sz) + { + return malloc(sz); + } + + void dealloc(void* p) + { + free(p); + } +}; +#else +struct MyAlloc +{ + snmalloc::Alloc& a; + + MyAlloc() : a(ThreadAlloc::get()) {} + + void* alloc(size_t sz) + { + return a.alloc(sz); + } + + void dealloc(void* p) + { + a.dealloc(p); + } +}; +#endif + +/* + * FreeListMPSCQ make for convenient MPSC queues, so we use those for sending + * "messages". Each consumer or proxy has its own (source) queue. + */ +static FreeListKey msgqueue_key{0xab2acada, 0xb2a01234, 0x56789abc}; +static constexpr address_t msgqueue_key_tweak = 0xfedc'ba98; + +struct params +{ + size_t N_PRODUCER; + size_t N_CONSUMER; + size_t N_PROXY; + size_t N_QUEUE; + size_t N_PRODUCER_BATCH; + size_t N_MAX_OUTSTANDING; + size_t N_MAX_BATCH_SIZE; + FreeListMPSCQ* msgqueue; // [N_QUEUE] +}; + +std::atomic producers_live; +std::atomic queue_gate; +std::atomic messages_outstanding; + +freelist::HeadPtr domesticate_nop(freelist::QueuePtr p) +{ + return freelist::HeadPtr::unsafe_from(p.unsafe_ptr()); +}; + +void consumer(const struct params* param, size_t qix) +{ + MyAlloc a{}; + auto& myq = param->msgqueue[qix]; + + chatty("Cl %zu q is %p\n", qix, &myq); + + do + { + size_t reap = 0; + + if (myq.can_dequeue(domesticate_nop, domesticate_nop)) + { + myq.dequeue( + domesticate_nop, + domesticate_nop, + [qix, &a, &reap](freelist::HeadPtr o) { + UNUSED(qix); + auto p = o.as_void().unsafe_ptr(); + chatty("Cl %zu free %p\n", qix, p); + a.dealloc(p); + reap++; + return true; + }); + } + + messages_outstanding -= reap; + + if (reap == 0) + { + std::this_thread::yield(); + } + else + { + chatty("Cl %zu reap %zu\n", qix, reap); + } + + } while (myq.can_dequeue(domesticate_nop, domesticate_nop) || + producers_live || (queue_gate > param->N_CONSUMER)); + + chatty("Cl %zu fini\n", qix); + a.dealloc(myq.destroy().unsafe_ptr()); +} + +void proxy(const struct params* param, size_t qix) +{ + auto& myq = param->msgqueue[qix]; + auto& qs = param->msgqueue; + + chatty("Px %zu q is %p\n", qix, &myq); + + xoroshiro::p128r32 r(1234 + qix, qix); + do + { + if (myq.can_dequeue(domesticate_nop, domesticate_nop)) + { + myq.dequeue( + domesticate_nop, domesticate_nop, [qs, qix, &r](freelist::HeadPtr o) { + auto rcptqix = r.next() % qix; + + chatty( + "Px %zu send %p to %zu\n", qix, o.as_void().unsafe_ptr(), rcptqix); + + qs[rcptqix].enqueue(o, o, domesticate_nop); + return true; + }); + } + + std::this_thread::yield(); + } while (myq.can_dequeue(domesticate_nop, domesticate_nop) || + producers_live || (queue_gate > qix + 1)); + + chatty("Px %zu fini\n", qix); + + MyAlloc().dealloc(myq.destroy().unsafe_ptr()); + queue_gate--; +} + +void producer(const struct params* param, size_t pix) +{ + MyAlloc a{}; + static constexpr size_t msgsizes[] = {48, 64, 96, 128}; + static constexpr size_t nmsgsizes = sizeof(msgsizes) / sizeof(msgsizes[0]); + + xoroshiro::p128r32 r(5489 + pix, pix); + + freelist::Builder batch; + batch.init(0, msgqueue_key, msgqueue_key_tweak); + + for (size_t batchix = param->N_PRODUCER_BATCH; batchix > 0; batchix--) + { + while (messages_outstanding >= param->N_MAX_OUTSTANDING) + { + std::this_thread::yield(); + } + + size_t nmsg = (r.next() & 15) + 1; + size_t msgsize = msgsizes[r.next() % nmsgsizes]; + + /* Allocate batch and form list */ + for (size_t msgix = 0; msgix < nmsg; msgix++) + { + auto msg = a.alloc(msgsize); + chatty("Pd %zu make %p\n", pix, msg); + + auto msgc = capptr::Alloc::unsafe_from(msg) + .template as_reinterpret>(); + batch.add(msgc, msgqueue_key, msgqueue_key_tweak); + } + + /* Post to random queue */ + auto [bfirst, blast] = + batch.extract_segment(msgqueue_key, msgqueue_key_tweak); + auto rcptqix = r.next() % param->N_QUEUE; + param->msgqueue[rcptqix].enqueue(bfirst, blast, domesticate_nop); + messages_outstanding += nmsg; + + chatty("Pd %zu send %zu to %zu\n", pix, nmsg, rcptqix); + + /* Occasionally yield the CPU */ + if ((batchix & 0xF) == 1) + std::this_thread::yield(); + } + + chatty("Pd %zu fini\n", pix); +} + +int main(int argc, char** argv) +{ + struct params param; + + opt::Opt opt(argc, argv); + param.N_PRODUCER = opt.is("--producers", 3); + param.N_CONSUMER = opt.is("--consumers", 3); + param.N_PROXY = opt.is("--proxies", 2); + param.N_PRODUCER_BATCH = opt.is("--batches", 1024 * 1024); + param.N_MAX_OUTSTANDING = opt.is("--max-out", 4 * 1024); + param.N_MAX_BATCH_SIZE = opt.is("--max-batch", 16); + + std::cout << "msgpass --producers=" << param.N_PRODUCER + << " --consumers=" << param.N_CONSUMER + << " --proxies=" << param.N_PROXY + << " --batches=" << param.N_PRODUCER_BATCH + << " --max-out=" << param.N_MAX_OUTSTANDING + << " --max-batch=" << param.N_MAX_BATCH_SIZE << std::endl; + + param.N_QUEUE = param.N_CONSUMER + param.N_PROXY; + param.msgqueue = + new FreeListMPSCQ[param.N_QUEUE]; + + auto* producer_threads = new std::thread[param.N_PRODUCER]; + auto* queue_threads = new std::thread[param.N_QUEUE]; + + for (size_t i = 0; i < param.N_QUEUE; i++) + { + param.msgqueue[i].init(); + } + + producers_live = true; + queue_gate = param.N_QUEUE; + messages_outstanding = 0; + + /* Spawn consumers */ + for (size_t i = 0; i < param.N_CONSUMER; i++) + { + queue_threads[i] = std::thread(consumer, ¶m, i); + } + + /* Spawn proxies */ + for (size_t i = param.N_CONSUMER; i < param.N_QUEUE; i++) + { + queue_threads[i] = std::thread(proxy, ¶m, i); + } + + /* Spawn producers */ + for (size_t i = 0; i < param.N_PRODUCER; i++) + { + producer_threads[i] = std::thread(producer, ¶m, i); + } + + /* Wait for producers to finish */ + for (size_t i = 0; i < param.N_PRODUCER; i++) + { + producer_threads[i].join(); + } + producers_live = false; + + /* Wait for proxies and consumers to finish */ + for (size_t i = 0; i < param.N_QUEUE; i++) + { + queue_threads[param.N_QUEUE - 1 - i].join(); + } + + delete[] producer_threads; + delete[] queue_threads; + + /* Ensure that we have not lost any allocations */ + debug_check_empty(); + + return 0; +} diff --git a/3rdparty/exported/snmalloc/src/test/perf/singlethread/singlethread.cc b/3rdparty/exported/snmalloc/src/test/perf/singlethread/singlethread.cc index b93dcd428a2a..431d40d2494e 100644 --- a/3rdparty/exported/snmalloc/src/test/perf/singlethread/singlethread.cc +++ b/3rdparty/exported/snmalloc/src/test/perf/singlethread/singlethread.cc @@ -34,9 +34,9 @@ void test_alloc_dealloc(size_t count, size_t size, bool write) { auto it = set.begin(); void* p = *it; - alloc.dealloc(p, size); set.erase(it); SNMALLOC_CHECK(set.find(p) == set.end()); + alloc.dealloc(p, size); } // alloc 1x objects @@ -60,7 +60,7 @@ void test_alloc_dealloc(size_t count, size_t size, bool write) } } - snmalloc::debug_check_empty(); + snmalloc::debug_check_empty(); } int main(int, char**) diff --git a/3rdparty/exported/snmalloc/src/test/perf/startup/startup.cc b/3rdparty/exported/snmalloc/src/test/perf/startup/startup.cc new file mode 100644 index 000000000000..46e18f90d432 --- /dev/null +++ b/3rdparty/exported/snmalloc/src/test/perf/startup/startup.cc @@ -0,0 +1,96 @@ +#include "test/opt.h" +#include "test/setup.h" +#include "test/usage.h" +#include "test/xoroshiro.h" + +#include +#include +#include +#include +#include + +using namespace snmalloc; + +std::vector counters{}; + +template +class ParallelTest +{ +private: + std::atomic flag = false; + std::atomic ready = 0; + uint64_t start; + uint64_t end; + std::atomic complete = 0; + size_t cores; + F f; + + void run(size_t id) + { + auto prev = ready.fetch_add(1); + if (prev + 1 == cores) + { + start = Aal::tick(); + flag = true; + } + while (!flag) + Aal::pause(); + + f(id); + + prev = complete.fetch_add(1); + if (prev + 1 == cores) + { + end = Aal::tick(); + } + } + +public: + ParallelTest(F&& f, size_t cores) : cores(cores), f(std::forward(f)) + { + std::thread* t = new std::thread[cores]; + + for (size_t i = 0; i < cores; i++) + { + t[i] = std::thread(&ParallelTest::run, this, i); + } + // Wait for all the threads. + for (size_t i = 0; i < cores; i++) + { + t[i].join(); + } + + delete[] t; + } + + uint64_t time() + { + return end - start; + } +}; + +int main() +{ + auto nthreads = std::thread::hardware_concurrency(); + counters.resize(nthreads); + + ParallelTest test( + [](size_t id) { + auto start = Aal::tick(); + auto& alloc = snmalloc::ThreadAlloc::get(); + alloc.dealloc(alloc.alloc(1)); + auto end = Aal::tick(); + counters[id] = end - start; + }, + nthreads); + + std::cout << "Taken: " << test.time() << std::endl; + std::sort(counters.begin(), counters.end()); + uint64_t start = 0; + for (auto counter : counters) + { + std::cout << "Thread time " << counter << " (" << counter - start << ")" + << std::endl; + start = counter; + } +} \ No newline at end of file diff --git a/3rdparty/exported/snmalloc/src/test/setup.h b/3rdparty/exported/snmalloc/src/test/setup.h index 61f9a991c178..deb903de6819 100644 --- a/3rdparty/exported/snmalloc/src/test/setup.h +++ b/3rdparty/exported/snmalloc/src/test/setup.h @@ -97,12 +97,14 @@ void setup() } # else # include + void error_handle(int signal) { snmalloc::UNUSED(signal); snmalloc::error("Seg Fault"); _exit(1); } + void setup() { signal(SIGSEGV, error_handle); diff --git a/CMakeLists.txt b/CMakeLists.txt index 4cbe3559e194..b2142ca5fea7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -229,6 +229,7 @@ else() set(SNMALLOC_HEADER_ONLY_LIBRARY ON) add_subdirectory(3rdparty/exported/snmalloc EXCLUDE_FROM_ALL) set(SNMALLOC_COMPILE_OPTIONS "-mcx16") + set(SNMALLOC_INCLUDE_DIRS 3rdparty/exported/snmalloc/src) list(APPEND CCHOST_SOURCES src/host/snmalloc.cpp) endif() @@ -244,7 +245,9 @@ add_san(cchost) target_compile_options( cchost PRIVATE ${COMPILE_LIBCXX} ${SNMALLOC_COMPILE_OPTIONS} ) -target_include_directories(cchost PRIVATE ${CCF_GENERATED_DIR}) +target_include_directories( + cchost PRIVATE ${CCF_GENERATED_DIR} ${SNMALLOC_INCLUDE_DIRS} +) if(COMPILE_TARGET STREQUAL "snp") target_compile_definitions(cchost PUBLIC PLATFORM_SNP) diff --git a/cgmanifest.json b/cgmanifest.json index efca9ac007dd..6c6ef7ff602b 100644 --- a/cgmanifest.json +++ b/cgmanifest.json @@ -60,7 +60,7 @@ "type": "git", "git": { "repositoryUrl": "https://github.com/microsoft/snmalloc", - "commitHash": "dc1268886a5d49d38a54e5d1402b5924a71fee0b" + "commitHash": "564c88b07c53728ec90a88d7d34d0f74d3b0bfff" } } }, diff --git a/src/host/snmalloc.cpp b/src/host/snmalloc.cpp index 6e5742950d8c..92ec79a6735e 100644 --- a/src/host/snmalloc.cpp +++ b/src/host/snmalloc.cpp @@ -2,9 +2,11 @@ // Licensed under the Apache 2.0 License. #define NO_BOOTSTRAP_ALLOCATOR +#define SNMALLOC_USE_WAIT_ON_ADDRESS 0 #ifndef NDEBUG # define NDEBUG #endif +#include "snmalloc/src/snmalloc/override/malloc.cc" #include "snmalloc/src/snmalloc/override/new.cc" \ No newline at end of file