Skip to content

Commit

Permalink
Use BDWGC's buffered finalisation
Browse files Browse the repository at this point in the history
This commit switches the new buffered finalisation mechanism in BDWGC
using the disclaim API. Gc<T> objects which need finalising are
allocated with a 'disclaim' (i.e. finaliser) function pointer in their
first word.

These finalisers are added to buffers during GC which are then finalised
on a separate thread from within the BDWGC. The table below shows the
performance difference between this approach (called disclaim) and the
previous tip-of-tree commit (which used the much slower, channel-based
off-thread finalisation).

```
 Benchmark      Config     Type      mean (ms)

  Bounce        baseline   micro     874
  Bounce        disclaim   micro     114
  BubbleSort    baseline   micro     1273
  BubbleSort    disclaim   micro     144
  DeltaBlue     baseline   macro     552
  DeltaBlue     disclaim   macro     91
  Dispatch      baseline   micro     726
  Dispatch      disclaim   micro     120
  Fannkuch      baseline   micro     364
  Fannkuch      disclaim   micro     66
  Fibonacci     baseline   micro     1559
  Fibonacci     disclaim   micro     200
  FieldLoop     baseline   micro     835
  FieldLoop     disclaim   micro     179
  GraphSearch   baseline   macro     175
  GraphSearch   disclaim   macro     43
  IntegerLoop   baseline   micro     1685
  IntegerLoop   disclaim   micro     181
  JsonSmall     baseline   macro     542
  JsonSmall     disclaim   macro     125
  List          baseline   micro     1447
  List          disclaim   micro     149
  Loop          baseline   micro     2099
  Loop          disclaim   micro     231
  Mandelbrot    baseline   micro     1172
  Mandelbrot    disclaim   micro     160
  NBody         baseline   macro     722
  NBody         disclaim   macro     69
  PageRank      baseline   macro     662
  PageRank      disclaim   macro     150
  Permute       baseline   micro     1281
  Permute       disclaim   micro     156
  Queens        baseline   micro     1017
  Queens        disclaim   micro     131
  QuickSort     baseline   micro     251
  QuickSort     disclaim   micro     41
  Recurse       baseline   micro     1313
  Recurse       disclaim   micro     183
  Richards      baseline   macro     Failed
  Richards      disclaim   macro     1722
  Sieve         baseline   micro     1068
  Sieve         disclaim   micro     215
  Storage       baseline   micro     79
  Storage       disclaim   micro     43
  Sum           baseline   micro     864
  Sum           disclaim   micro     105
  Towers        baseline   micro     1309
  Towers        disclaim   micro     153
  TreeSort      baseline   micro     350
  TreeSort      disclaim   micro     66
  WhileLoop     baseline   micro     1962
  WhileLoop     disclaim   micro     262
```
  • Loading branch information
jacob-hughes committed Apr 18, 2024
1 parent 21f4c79 commit d4c6cf0
Show file tree
Hide file tree
Showing 7 changed files with 115 additions and 108 deletions.
2 changes: 2 additions & 0 deletions library/bdwgc/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ fn main() {
.pic(true)
.define("BUILD_SHARED_LIBS", "OFF")
.cflag("-DGC_ALWAYS_MULTITHREADED")
.cflag("-DDISCLAIM_MARK_CHILDREN")
.cflag("-DBUFFERED_FINALIZATION")
.cflag("-DGC_JAVA_FINALIZATION");

if env::var("ENABLE_GC_ASSERTIONS").map_or(false, |v| v == "true") {
Expand Down
2 changes: 2 additions & 0 deletions library/bdwgc/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ pub struct ProfileStats {
extern "C" {
pub fn GC_malloc(nbytes: usize) -> *mut u8;

pub fn GC_buffered_finalize_malloc(nbytes: usize) -> *mut u8;

pub fn GC_posix_memalign(mem_ptr: *mut *mut u8, align: usize, nbytes: usize) -> i32;

pub fn GC_realloc(old: *mut u8, new_size: usize) -> *mut u8;
Expand Down
146 changes: 87 additions & 59 deletions library/std/src/gc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ use core::{
cmp::{self, Ordering},
fmt,
hash::{Hash, Hasher},
marker::{FinalizerSafe, PhantomData, Unsize},
mem::MaybeUninit,
marker::{FinalizerSafe, Unsize},
mem::{align_of_val_raw, transmute, MaybeUninit},
ops::{CoerceUnsized, Deref, DispatchFromDyn, Receiver},
ptr::{self, drop_in_place, null_mut, NonNull},
ptr::{self, drop_in_place, NonNull},
};

pub use core::gc::*;
Expand All @@ -72,6 +72,25 @@ pub const MIN_ALIGN: usize = 8;
#[derive(Debug)]
pub struct GcAllocator;

#[derive(Debug)]
pub struct GcFinalizedAllocator;

unsafe impl Allocator for GcFinalizedAllocator {
#[inline]
fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
match layout.size() {
0 => Ok(NonNull::slice_from_raw_parts(layout.dangling(), 0)),
size => unsafe {
let ptr = bdwgc::GC_buffered_finalize_malloc(layout.size()) as *mut u8;
let ptr = NonNull::new(ptr).ok_or(AllocError)?;
Ok(NonNull::slice_from_raw_parts(ptr, size))
},
}
}

unsafe fn deallocate(&self, _: NonNull<u8>, _: Layout) {}
}

unsafe impl GlobalAlloc for GcAllocator {
#[inline]
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
Expand Down Expand Up @@ -176,11 +195,24 @@ pub fn thread_registered() -> bool {
unsafe { bdwgc::GC_thread_is_registered() != 0 }
}

// The function pointer to be executed on the finalization thread.
//
// This is not polymorphic because type information about the object inside the box is not known
// during collection. However, it is enough to use the () type because we ensure that during
// allocation that it points to the correct drop_in_place fn for the underlying value.
type Finalizer = unsafe fn(*mut GcBox<()>);

////////////////////////////////////////////////////////////////////////////////
// GC API
////////////////////////////////////////////////////////////////////////////////

struct GcBox<T: ?Sized>(T);
struct GcBox<T: ?Sized> {
/// The finalizer fn pointer for `GcBox<T>`. `None` if needs_finalize<T> == false.
#[allow(dead_code)]
finalizer: Option<Finalizer>,
/// The object being garbage collected.
value: T,
}

/// A multi-threaded garbage collected pointer.
///
Expand All @@ -190,7 +222,6 @@ struct GcBox<T: ?Sized>(T);
#[cfg_attr(not(test), rustc_diagnostic_item = "gc")]
pub struct Gc<T: ?Sized> {
ptr: NonNull<GcBox<T>>,
_phantom: PhantomData<T>,
}

unsafe impl<T: ?Sized + Send> Send for Gc<T> {}
Expand Down Expand Up @@ -241,8 +272,19 @@ impl<T: ?Sized> Drop for Gc<T> {
}

impl<T: ?Sized> Gc<T> {
#[inline(always)]
fn inner(&self) -> &GcBox<T> {
// This unsafety is ok because while this Gc is alive we're guaranteed
// that the inner pointer is valid.
unsafe { self.ptr.as_ref() }
}

unsafe fn from_inner(ptr: NonNull<GcBox<T>>) -> Self {
Self { ptr, _phantom: PhantomData }
Self { ptr }
}

unsafe fn from_ptr(ptr: *mut GcBox<T>) -> Self {
unsafe { Self::from_inner(NonNull::new_unchecked(ptr)) }
}

/// Get a `Gc<T>` from a raw pointer.
Expand All @@ -255,19 +297,30 @@ impl<T: ?Sized> Gc<T> {
/// size and alignment of the originally allocated block.
#[unstable(feature = "gc", issue = "none")]
pub fn from_raw(raw: *const T) -> Gc<T> {
Gc { ptr: unsafe { NonNull::new_unchecked(raw as *mut GcBox<T>) }, _phantom: PhantomData }
let layout = Layout::new::<GcBox<()>>();
// Align the unsized value to the end of the GcBox.
// Because GcBox is repr(C), it will always be the last field in memory.
// SAFETY: since the only unsized types for T possible are slices, trait objects,
// and extern types, the input safety requirement is currently enough to
// satisfy the requirements of align_of_val_raw.
let raw_align = unsafe { align_of_val_raw(raw) };
let offset = layout.size() + layout.padding_needed_for(raw_align);
// Reverse the offset to find the original GcBox.
let box_ptr = unsafe { raw.byte_sub(offset) as *mut GcBox<T> };
unsafe { Self::from_ptr(box_ptr) }
}

/// Get a raw pointer to the underlying value `T`.
#[unstable(feature = "gc", issue = "none")]
pub fn into_raw(this: Self) -> *const T {
this.ptr.as_ptr() as *const T
Self::as_ptr(&this)
}

/// Get a raw pointer to the underlying value `T`.
#[unstable(feature = "gc", issue = "none")]
pub fn as_ptr(this: &Self) -> *const T {
this.ptr.as_ptr() as *const T
let ptr: *mut GcBox<T> = NonNull::as_ptr(this.ptr);
unsafe { ptr::addr_of_mut!((*ptr).value) }
}

#[unstable(feature = "gc", issue = "none")]
Expand All @@ -291,9 +344,7 @@ impl<T> Gc<T> {
#[unstable(feature = "gc", issue = "none")]
#[cfg_attr(not(test), rustc_diagnostic_item = "gc_ctor")]
pub fn new(value: T) -> Self {
let mut gc = unsafe { Self::new_internal(value) };
gc.register_finalizer();
gc
unsafe { Self::new_internal(value) }
}
}

Expand Down Expand Up @@ -355,57 +406,35 @@ impl<T> Gc<T> {
#[cfg(not(no_global_oom_handling))]
#[unstable(feature = "gc", issue = "none")]
pub unsafe fn new_unsynchronised(value: T) -> Self {
let mut gc = unsafe { Self::new_internal(value) };
gc.register_finalizer();
gc
unsafe { Self::new_internal(value) }
}

#[inline(always)]
#[cfg(not(no_global_oom_handling))]
unsafe fn new_internal(value: T) -> Self {
unsafe { Self::from_inner(Box::leak(Box::new_in(GcBox(value), GcAllocator)).into()) }
}

fn register_finalizer(&mut self) {
#[cfg(not(bootstrap))]
if !core::mem::needs_finalizer::<T>() {
return;
}

#[cfg(profile_gc)]
FINALIZERS_REGISTERED.fetch_add(1, atomic::Ordering::Relaxed);

unsafe extern "C" fn finalizer<T>(obj: *mut u8, _meta: *mut u8) {
unsafe {
drop_in_place(obj as *mut T);
#[cfg(profile_gc)]
FINALIZERS_COMPLETED.fetch_add(1, atomic::Ordering::Relaxed);
}
}

unsafe {
bdwgc::GC_register_finalizer_no_order(
self.ptr.as_ptr() as *mut u8,
Some(finalizer::<T>),
null_mut(),
null_mut(),
null_mut(),
)
}
}

#[unstable(feature = "gc", issue = "none")]
pub fn unregister_finalizer(&mut self) {
let ptr = self.ptr.as_ptr() as *mut GcBox<T> as *mut u8;
unsafe {
bdwgc::GC_register_finalizer(
ptr,
None,
::core::ptr::null_mut(),
::core::ptr::null_mut(),
::core::ptr::null_mut(),
if !crate::mem::needs_finalizer::<T>() {
return Self::from_inner(
Box::leak(Box::new_in(GcBox { finalizer: None, value }, GcAllocator)).into(),
);
}

// By explicitly using type parameters here, we force rustc to compile monomorphised drop
// glue for `GcBox<T>`. This ensures that the fn pointer points to the correct drop method
// (or chain of drop methods) for the type `T`. After this, it's safe to cast it to the
// generic function pointer `Finalizer` and then pass that around inside the collector where
// the type of the object is unknown.
//
// Note that we reify a `drop_in_place` for `GcBox<T>` here and not just `T` -- even though
// `GcBox` has no drop implementation! This is because `T` is stored at some offset inside
// `GcBox`, and doing it this way means that we don't have to manually add these offsets
// later when we call the finaliser.
let drop_fn = drop_in_place::<GcBox<T>> as unsafe fn(_);
let tagged = transmute::<_, usize>(drop_fn) | 0x1;
let finalizer = Some(transmute::<usize, Finalizer>(tagged));
Self::from_inner(
Box::leak(Box::new_in(GcBox { finalizer, value }, GcFinalizedAllocator)).into(),
)
}
}

Expand Down Expand Up @@ -504,11 +533,9 @@ impl<T: Send + Sync> Gc<MaybeUninit<T>> {
#[unstable(feature = "gc", issue = "none")]
pub unsafe fn assume_init(self) -> Gc<T> {
let ptr = self.ptr.as_ptr() as *mut GcBox<MaybeUninit<T>>;
let mut gc = unsafe { Gc::from_inner((&mut *ptr).assume_init()) };
unsafe { Gc::from_inner((&mut *ptr).assume_init()) }
// Now that T is initialized, we must make sure that it's dropped when
// `GcBox<T>` is freed.
gc.register_finalizer();
gc
}
}

Expand Down Expand Up @@ -740,8 +767,9 @@ impl<T: ?Sized> fmt::Pointer for Gc<T> {
impl<T: ?Sized> Deref for Gc<T> {
type Target = T;

#[inline(always)]
fn deref(&self) -> &Self::Target {
unsafe { &*(self.ptr.as_ptr() as *const T) }
&self.inner().value
}
}

Expand Down
1 change: 1 addition & 0 deletions library/std/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,7 @@
#![feature(hashmap_internals)]
#![feature(ip)]
#![feature(ip_in_core)]
#![feature(layout_for_ptr)]
#![feature(maybe_uninit_slice)]
#![feature(maybe_uninit_uninit_array)]
#![feature(maybe_uninit_write_slice)]
Expand Down
42 changes: 0 additions & 42 deletions tests/ui/runtime/gc/run_finalizers.rs

This file was deleted.

16 changes: 12 additions & 4 deletions tests/ui/runtime/gc/unchecked_finalizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ impl Drop for UnsafeContainer {
impl !FinalizerSafe for UnsafeContainer {}

static FINALIZER_COUNT: AtomicUsize = AtomicUsize::new(0);
static ALLOCATED_COUNT: usize = 100;
static ALLOCATED_COUNT: usize = 10;
static SLEEP_MAX: u64 = 8192; // in millis.

fn foo() {
for i in 0..ALLOCATED_COUNT {
Expand All @@ -36,10 +37,17 @@ fn main() {
foo();
GcAllocator::force_gc();

// Wait enough time for the finaliser thread to finish running.
thread::sleep(time::Duration::from_millis(100));
let mut count = FINALIZER_COUNT.load(atomic::Ordering::Relaxed);
let mut sleep_duration = 2;
while count < ALLOCATED_COUNT - 1 && sleep_duration <= SLEEP_MAX {
// Wait an acceptable amount of time for the finalizer thread to do its work.
thread::sleep(time::Duration::from_millis(sleep_duration));
sleep_duration = sleep_duration * 2;
count = FINALIZER_COUNT.load(atomic::Ordering::Relaxed);
}

// On some platforms, the last object might not be finalised because it's
// kept alive by a lingering reference.
assert!(FINALIZER_COUNT.load(atomic::Ordering::Relaxed) >= ALLOCATED_COUNT -1);
assert!(count >= ALLOCATED_COUNT - 1);
assert!(count <= ALLOCATED_COUNT);
}
14 changes: 11 additions & 3 deletions tests/ui/runtime/gc/zero_vecs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use std::sync::atomic::{self, AtomicUsize};
use std::thread;
use std::time;

static SLEEP_MAX: u64 = 8192; // in millis.
struct Finalizable(usize);

impl Drop for Finalizable {
Expand Down Expand Up @@ -41,10 +42,17 @@ fn main() {
test_pop(&mut v1);
test_pop(&mut v1);

let expected = (capacity * 2) - 1;
GcAllocator::force_gc();

// Wait enough time for the finaliser thread to finish running.
thread::sleep(time::Duration::from_millis(100));
let mut count = FINALIZER_COUNT.load(atomic::Ordering::Relaxed);
let mut sleep_duration = 2;
while count < expected && sleep_duration <= SLEEP_MAX {
// Wait an acceptable amount of time for the finalizer thread to do its work.
thread::sleep(time::Duration::from_millis(sleep_duration));
sleep_duration = sleep_duration * 2;
count = FINALIZER_COUNT.load(atomic::Ordering::Relaxed);
}

// This tests that finalisation happened indirectly by trying to overwrite references to live GC
// objects in order for Boehm to consider them dead. This is inherently flaky because we might
Expand All @@ -55,5 +63,5 @@ fn main() {
// what matters is that *most* were, as this is enough to have confidence that popping an item
// from a vector does not allow it to be indirectly kept alive from within the vector's backing
// store.
assert!(FINALIZER_COUNT.load(atomic::Ordering::Relaxed) >= (capacity * 2) -1);
assert!(FINALIZER_COUNT.load(atomic::Ordering::Relaxed) >= expected);
}

0 comments on commit d4c6cf0

Please sign in to comment.