From d4c6cf0c99a9b714de151f343b61778566fb2825 Mon Sep 17 00:00:00 2001 From: Jake Hughes Date: Wed, 17 Apr 2024 11:01:10 +0100 Subject: [PATCH 1/2] Use BDWGC's buffered finalisation This commit switches the new buffered finalisation mechanism in BDWGC using the disclaim API. Gc objects which need finalising are allocated with a 'disclaim' (i.e. finaliser) function pointer in their first word. These finalisers are added to buffers during GC which are then finalised on a separate thread from within the BDWGC. The table below shows the performance difference between this approach (called disclaim) and the previous tip-of-tree commit (which used the much slower, channel-based off-thread finalisation). ``` Benchmark Config Type mean (ms) Bounce baseline micro 874 Bounce disclaim micro 114 BubbleSort baseline micro 1273 BubbleSort disclaim micro 144 DeltaBlue baseline macro 552 DeltaBlue disclaim macro 91 Dispatch baseline micro 726 Dispatch disclaim micro 120 Fannkuch baseline micro 364 Fannkuch disclaim micro 66 Fibonacci baseline micro 1559 Fibonacci disclaim micro 200 FieldLoop baseline micro 835 FieldLoop disclaim micro 179 GraphSearch baseline macro 175 GraphSearch disclaim macro 43 IntegerLoop baseline micro 1685 IntegerLoop disclaim micro 181 JsonSmall baseline macro 542 JsonSmall disclaim macro 125 List baseline micro 1447 List disclaim micro 149 Loop baseline micro 2099 Loop disclaim micro 231 Mandelbrot baseline micro 1172 Mandelbrot disclaim micro 160 NBody baseline macro 722 NBody disclaim macro 69 PageRank baseline macro 662 PageRank disclaim macro 150 Permute baseline micro 1281 Permute disclaim micro 156 Queens baseline micro 1017 Queens disclaim micro 131 QuickSort baseline micro 251 QuickSort disclaim micro 41 Recurse baseline micro 1313 Recurse disclaim micro 183 Richards baseline macro Failed Richards disclaim macro 1722 Sieve baseline micro 1068 Sieve disclaim micro 215 Storage baseline micro 79 Storage disclaim micro 43 Sum baseline micro 864 Sum disclaim micro 105 Towers baseline micro 1309 Towers disclaim micro 153 TreeSort baseline micro 350 TreeSort disclaim micro 66 WhileLoop baseline micro 1962 WhileLoop disclaim micro 262 ``` --- library/bdwgc/build.rs | 2 + library/bdwgc/src/lib.rs | 2 + library/std/src/gc.rs | 146 ++++++++++++--------- library/std/src/lib.rs | 1 + tests/ui/runtime/gc/run_finalizers.rs | 42 ------ tests/ui/runtime/gc/unchecked_finalizer.rs | 16 ++- tests/ui/runtime/gc/zero_vecs.rs | 14 +- 7 files changed, 115 insertions(+), 108 deletions(-) delete mode 100644 tests/ui/runtime/gc/run_finalizers.rs diff --git a/library/bdwgc/build.rs b/library/bdwgc/build.rs index b6ec2ae85820c..f86e3a748f5da 100644 --- a/library/bdwgc/build.rs +++ b/library/bdwgc/build.rs @@ -33,6 +33,8 @@ fn main() { .pic(true) .define("BUILD_SHARED_LIBS", "OFF") .cflag("-DGC_ALWAYS_MULTITHREADED") + .cflag("-DDISCLAIM_MARK_CHILDREN") + .cflag("-DBUFFERED_FINALIZATION") .cflag("-DGC_JAVA_FINALIZATION"); if env::var("ENABLE_GC_ASSERTIONS").map_or(false, |v| v == "true") { diff --git a/library/bdwgc/src/lib.rs b/library/bdwgc/src/lib.rs index 2ef9f534d1e99..52f42351e1904 100644 --- a/library/bdwgc/src/lib.rs +++ b/library/bdwgc/src/lib.rs @@ -36,6 +36,8 @@ pub struct ProfileStats { extern "C" { pub fn GC_malloc(nbytes: usize) -> *mut u8; + pub fn GC_buffered_finalize_malloc(nbytes: usize) -> *mut u8; + pub fn GC_posix_memalign(mem_ptr: *mut *mut u8, align: usize, nbytes: usize) -> i32; pub fn GC_realloc(old: *mut u8, new_size: usize) -> *mut u8; diff --git a/library/std/src/gc.rs b/library/std/src/gc.rs index 2e8b81da63bdb..b238a4d5a4af7 100644 --- a/library/std/src/gc.rs +++ b/library/std/src/gc.rs @@ -43,10 +43,10 @@ use core::{ cmp::{self, Ordering}, fmt, hash::{Hash, Hasher}, - marker::{FinalizerSafe, PhantomData, Unsize}, - mem::MaybeUninit, + marker::{FinalizerSafe, Unsize}, + mem::{align_of_val_raw, transmute, MaybeUninit}, ops::{CoerceUnsized, Deref, DispatchFromDyn, Receiver}, - ptr::{self, drop_in_place, null_mut, NonNull}, + ptr::{self, drop_in_place, NonNull}, }; pub use core::gc::*; @@ -72,6 +72,25 @@ pub const MIN_ALIGN: usize = 8; #[derive(Debug)] pub struct GcAllocator; +#[derive(Debug)] +pub struct GcFinalizedAllocator; + +unsafe impl Allocator for GcFinalizedAllocator { + #[inline] + fn allocate(&self, layout: Layout) -> Result, AllocError> { + match layout.size() { + 0 => Ok(NonNull::slice_from_raw_parts(layout.dangling(), 0)), + size => unsafe { + let ptr = bdwgc::GC_buffered_finalize_malloc(layout.size()) as *mut u8; + let ptr = NonNull::new(ptr).ok_or(AllocError)?; + Ok(NonNull::slice_from_raw_parts(ptr, size)) + }, + } + } + + unsafe fn deallocate(&self, _: NonNull, _: Layout) {} +} + unsafe impl GlobalAlloc for GcAllocator { #[inline] unsafe fn alloc(&self, layout: Layout) -> *mut u8 { @@ -176,11 +195,24 @@ pub fn thread_registered() -> bool { unsafe { bdwgc::GC_thread_is_registered() != 0 } } +// The function pointer to be executed on the finalization thread. +// +// This is not polymorphic because type information about the object inside the box is not known +// during collection. However, it is enough to use the () type because we ensure that during +// allocation that it points to the correct drop_in_place fn for the underlying value. +type Finalizer = unsafe fn(*mut GcBox<()>); + //////////////////////////////////////////////////////////////////////////////// // GC API //////////////////////////////////////////////////////////////////////////////// -struct GcBox(T); +struct GcBox { + /// The finalizer fn pointer for `GcBox`. `None` if needs_finalize == false. + #[allow(dead_code)] + finalizer: Option, + /// The object being garbage collected. + value: T, +} /// A multi-threaded garbage collected pointer. /// @@ -190,7 +222,6 @@ struct GcBox(T); #[cfg_attr(not(test), rustc_diagnostic_item = "gc")] pub struct Gc { ptr: NonNull>, - _phantom: PhantomData, } unsafe impl Send for Gc {} @@ -241,8 +272,19 @@ impl Drop for Gc { } impl Gc { + #[inline(always)] + fn inner(&self) -> &GcBox { + // This unsafety is ok because while this Gc is alive we're guaranteed + // that the inner pointer is valid. + unsafe { self.ptr.as_ref() } + } + unsafe fn from_inner(ptr: NonNull>) -> Self { - Self { ptr, _phantom: PhantomData } + Self { ptr } + } + + unsafe fn from_ptr(ptr: *mut GcBox) -> Self { + unsafe { Self::from_inner(NonNull::new_unchecked(ptr)) } } /// Get a `Gc` from a raw pointer. @@ -255,19 +297,30 @@ impl Gc { /// size and alignment of the originally allocated block. #[unstable(feature = "gc", issue = "none")] pub fn from_raw(raw: *const T) -> Gc { - Gc { ptr: unsafe { NonNull::new_unchecked(raw as *mut GcBox) }, _phantom: PhantomData } + let layout = Layout::new::>(); + // Align the unsized value to the end of the GcBox. + // Because GcBox is repr(C), it will always be the last field in memory. + // SAFETY: since the only unsized types for T possible are slices, trait objects, + // and extern types, the input safety requirement is currently enough to + // satisfy the requirements of align_of_val_raw. + let raw_align = unsafe { align_of_val_raw(raw) }; + let offset = layout.size() + layout.padding_needed_for(raw_align); + // Reverse the offset to find the original GcBox. + let box_ptr = unsafe { raw.byte_sub(offset) as *mut GcBox }; + unsafe { Self::from_ptr(box_ptr) } } /// Get a raw pointer to the underlying value `T`. #[unstable(feature = "gc", issue = "none")] pub fn into_raw(this: Self) -> *const T { - this.ptr.as_ptr() as *const T + Self::as_ptr(&this) } /// Get a raw pointer to the underlying value `T`. #[unstable(feature = "gc", issue = "none")] pub fn as_ptr(this: &Self) -> *const T { - this.ptr.as_ptr() as *const T + let ptr: *mut GcBox = NonNull::as_ptr(this.ptr); + unsafe { ptr::addr_of_mut!((*ptr).value) } } #[unstable(feature = "gc", issue = "none")] @@ -291,9 +344,7 @@ impl Gc { #[unstable(feature = "gc", issue = "none")] #[cfg_attr(not(test), rustc_diagnostic_item = "gc_ctor")] pub fn new(value: T) -> Self { - let mut gc = unsafe { Self::new_internal(value) }; - gc.register_finalizer(); - gc + unsafe { Self::new_internal(value) } } } @@ -355,57 +406,35 @@ impl Gc { #[cfg(not(no_global_oom_handling))] #[unstable(feature = "gc", issue = "none")] pub unsafe fn new_unsynchronised(value: T) -> Self { - let mut gc = unsafe { Self::new_internal(value) }; - gc.register_finalizer(); - gc + unsafe { Self::new_internal(value) } } #[inline(always)] #[cfg(not(no_global_oom_handling))] unsafe fn new_internal(value: T) -> Self { - unsafe { Self::from_inner(Box::leak(Box::new_in(GcBox(value), GcAllocator)).into()) } - } - - fn register_finalizer(&mut self) { #[cfg(not(bootstrap))] - if !core::mem::needs_finalizer::() { - return; - } - - #[cfg(profile_gc)] - FINALIZERS_REGISTERED.fetch_add(1, atomic::Ordering::Relaxed); - - unsafe extern "C" fn finalizer(obj: *mut u8, _meta: *mut u8) { - unsafe { - drop_in_place(obj as *mut T); - #[cfg(profile_gc)] - FINALIZERS_COMPLETED.fetch_add(1, atomic::Ordering::Relaxed); - } - } - - unsafe { - bdwgc::GC_register_finalizer_no_order( - self.ptr.as_ptr() as *mut u8, - Some(finalizer::), - null_mut(), - null_mut(), - null_mut(), - ) - } - } - - #[unstable(feature = "gc", issue = "none")] - pub fn unregister_finalizer(&mut self) { - let ptr = self.ptr.as_ptr() as *mut GcBox as *mut u8; - unsafe { - bdwgc::GC_register_finalizer( - ptr, - None, - ::core::ptr::null_mut(), - ::core::ptr::null_mut(), - ::core::ptr::null_mut(), + if !crate::mem::needs_finalizer::() { + return Self::from_inner( + Box::leak(Box::new_in(GcBox { finalizer: None, value }, GcAllocator)).into(), ); } + + // By explicitly using type parameters here, we force rustc to compile monomorphised drop + // glue for `GcBox`. This ensures that the fn pointer points to the correct drop method + // (or chain of drop methods) for the type `T`. After this, it's safe to cast it to the + // generic function pointer `Finalizer` and then pass that around inside the collector where + // the type of the object is unknown. + // + // Note that we reify a `drop_in_place` for `GcBox` here and not just `T` -- even though + // `GcBox` has no drop implementation! This is because `T` is stored at some offset inside + // `GcBox`, and doing it this way means that we don't have to manually add these offsets + // later when we call the finaliser. + let drop_fn = drop_in_place::> as unsafe fn(_); + let tagged = transmute::<_, usize>(drop_fn) | 0x1; + let finalizer = Some(transmute::(tagged)); + Self::from_inner( + Box::leak(Box::new_in(GcBox { finalizer, value }, GcFinalizedAllocator)).into(), + ) } } @@ -504,11 +533,9 @@ impl Gc> { #[unstable(feature = "gc", issue = "none")] pub unsafe fn assume_init(self) -> Gc { let ptr = self.ptr.as_ptr() as *mut GcBox>; - let mut gc = unsafe { Gc::from_inner((&mut *ptr).assume_init()) }; + unsafe { Gc::from_inner((&mut *ptr).assume_init()) } // Now that T is initialized, we must make sure that it's dropped when // `GcBox` is freed. - gc.register_finalizer(); - gc } } @@ -740,8 +767,9 @@ impl fmt::Pointer for Gc { impl Deref for Gc { type Target = T; + #[inline(always)] fn deref(&self) -> &Self::Target { - unsafe { &*(self.ptr.as_ptr() as *const T) } + &self.inner().value } } diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs index 552a8c9a4bb54..f04b75f384618 100644 --- a/library/std/src/lib.rs +++ b/library/std/src/lib.rs @@ -321,6 +321,7 @@ #![feature(hashmap_internals)] #![feature(ip)] #![feature(ip_in_core)] +#![feature(layout_for_ptr)] #![feature(maybe_uninit_slice)] #![feature(maybe_uninit_uninit_array)] #![feature(maybe_uninit_write_slice)] diff --git a/tests/ui/runtime/gc/run_finalizers.rs b/tests/ui/runtime/gc/run_finalizers.rs deleted file mode 100644 index a3a08a566e05a..0000000000000 --- a/tests/ui/runtime/gc/run_finalizers.rs +++ /dev/null @@ -1,42 +0,0 @@ -// run-pass -// ignore-tidy-linelength -#![feature(gc)] - -use std::gc::{Gc, GcAllocator}; -use std::sync::atomic::{self, AtomicUsize}; -use std::thread; -use std::time; - -struct Finalizable(usize); - -impl Drop for Finalizable { - fn drop(&mut self) { - FINALIZER_COUNT.fetch_add(1, atomic::Ordering::Relaxed); - } -} - -static FINALIZER_COUNT: AtomicUsize = AtomicUsize::new(0); -static ALLOCATED_COUNT: usize = 100; - -fn foo() { - for i in 0..ALLOCATED_COUNT { - { - let mut _gc = Some(Gc::new(Finalizable(i))); - - // Zero the root to the GC object. - _gc = None; - } - } -} - -fn main() { - foo(); - GcAllocator::force_gc(); - - // Wait enough time for the finaliser thread to finish running. - - thread::sleep(time::Duration::from_millis(100)); - // On some platforms, the last object might not be finalised because it's - // kept alive by a lingering reference. - assert!(FINALIZER_COUNT.load(atomic::Ordering::Relaxed) >= ALLOCATED_COUNT -1); -} diff --git a/tests/ui/runtime/gc/unchecked_finalizer.rs b/tests/ui/runtime/gc/unchecked_finalizer.rs index 3e0ba41526a7c..e1609d6dc54b2 100644 --- a/tests/ui/runtime/gc/unchecked_finalizer.rs +++ b/tests/ui/runtime/gc/unchecked_finalizer.rs @@ -19,7 +19,8 @@ impl Drop for UnsafeContainer { impl !FinalizerSafe for UnsafeContainer {} static FINALIZER_COUNT: AtomicUsize = AtomicUsize::new(0); -static ALLOCATED_COUNT: usize = 100; +static ALLOCATED_COUNT: usize = 10; +static SLEEP_MAX: u64 = 8192; // in millis. fn foo() { for i in 0..ALLOCATED_COUNT { @@ -36,10 +37,17 @@ fn main() { foo(); GcAllocator::force_gc(); - // Wait enough time for the finaliser thread to finish running. - thread::sleep(time::Duration::from_millis(100)); + let mut count = FINALIZER_COUNT.load(atomic::Ordering::Relaxed); + let mut sleep_duration = 2; + while count < ALLOCATED_COUNT - 1 && sleep_duration <= SLEEP_MAX { + // Wait an acceptable amount of time for the finalizer thread to do its work. + thread::sleep(time::Duration::from_millis(sleep_duration)); + sleep_duration = sleep_duration * 2; + count = FINALIZER_COUNT.load(atomic::Ordering::Relaxed); + } // On some platforms, the last object might not be finalised because it's // kept alive by a lingering reference. - assert!(FINALIZER_COUNT.load(atomic::Ordering::Relaxed) >= ALLOCATED_COUNT -1); + assert!(count >= ALLOCATED_COUNT - 1); + assert!(count <= ALLOCATED_COUNT); } diff --git a/tests/ui/runtime/gc/zero_vecs.rs b/tests/ui/runtime/gc/zero_vecs.rs index 21bb92618f0c4..01bf5d86edaa8 100644 --- a/tests/ui/runtime/gc/zero_vecs.rs +++ b/tests/ui/runtime/gc/zero_vecs.rs @@ -11,6 +11,7 @@ use std::sync::atomic::{self, AtomicUsize}; use std::thread; use std::time; +static SLEEP_MAX: u64 = 8192; // in millis. struct Finalizable(usize); impl Drop for Finalizable { @@ -41,10 +42,17 @@ fn main() { test_pop(&mut v1); test_pop(&mut v1); + let expected = (capacity * 2) - 1; GcAllocator::force_gc(); - // Wait enough time for the finaliser thread to finish running. - thread::sleep(time::Duration::from_millis(100)); + let mut count = FINALIZER_COUNT.load(atomic::Ordering::Relaxed); + let mut sleep_duration = 2; + while count < expected && sleep_duration <= SLEEP_MAX { + // Wait an acceptable amount of time for the finalizer thread to do its work. + thread::sleep(time::Duration::from_millis(sleep_duration)); + sleep_duration = sleep_duration * 2; + count = FINALIZER_COUNT.load(atomic::Ordering::Relaxed); + } // This tests that finalisation happened indirectly by trying to overwrite references to live GC // objects in order for Boehm to consider them dead. This is inherently flaky because we might @@ -55,5 +63,5 @@ fn main() { // what matters is that *most* were, as this is enough to have confidence that popping an item // from a vector does not allow it to be indirectly kept alive from within the vector's backing // store. - assert!(FINALIZER_COUNT.load(atomic::Ordering::Relaxed) >= (capacity * 2) -1); + assert!(FINALIZER_COUNT.load(atomic::Ordering::Relaxed) >= expected); } From b40e488f709aa9eb9ad26f719627e297c88d016b Mon Sep 17 00:00:00 2001 From: Jake Hughes Date: Wed, 17 Apr 2024 12:32:23 +0100 Subject: [PATCH 2/2] Update BDWGC submodule --- src/bdwgc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bdwgc b/src/bdwgc index a601f2a49e199..9bfdaa2aeaf7a 160000 --- a/src/bdwgc +++ b/src/bdwgc @@ -1 +1 @@ -Subproject commit a601f2a49e1991007123a24520f6e6b00a79c7f3 +Subproject commit 9bfdaa2aeaf7a2becb18f72e5c9bc147a039baae