Skip to content

Commit

Permalink
Fix thread-locals to be visible by Boehm
Browse files Browse the repository at this point in the history
Previously, thread-locals are invisible to Boehm. In other words, a pointer to a
heap object stored solely inside a thread-local would cause the GC to free the
object too early.

In order to do this, we must disable compiler-based thread local implementation.
This is a shame because it's faster than the POSIX thread library impl but it
places thread locals in a table in a segment in the binary which Boehm can't
scan. It might be possible to fix this in the future, but for now we fallback to
using the POSIX thread implementation because there exists a fix for this.

We introduce a global thread-local rootset, which holds pointers to thread-local
values which Boehm can scan and use.
  • Loading branch information
jacob-hughes committed Dec 20, 2023
1 parent 52fc606 commit 3dd48d6
Show file tree
Hide file tree
Showing 6 changed files with 108 additions and 7 deletions.
27 changes: 27 additions & 0 deletions library/std/src/gc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ pub use alloc::gc::thread_registered;
pub use alloc::gc::GcAllocator;
pub use core::gc::*;

use crate::sync::Mutex;

#[cfg(profile_gc)]
use core::sync::atomic::{self, AtomicU64};

Expand All @@ -63,6 +65,31 @@ static FINALIZERS_REGISTERED: AtomicU64 = AtomicU64::new(0);
#[cfg(profile_gc)]
static FINALIZERS_COMPLETED: AtomicU64 = AtomicU64::new(0);

pub static TLS_ROOTSET: TLSRootset = TLSRootset::new();

#[derive(Debug)]
pub struct TLSRootset(Mutex<Vec<*mut u8, GcAllocator>>);

impl TLSRootset {
const fn new() -> Self {
Self(Mutex::new(Vec::new_in(GcAllocator)))
}

pub fn push(&self, root: *mut u8) {
let mut v = self.0.lock().unwrap();
v.push(root);
}

pub fn remove(&self, root: *mut u8) {
let mut v = self.0.lock().unwrap();
let idx = v.iter().position(|r| *r == root).unwrap();
v.swap_remove(idx);
}
}

unsafe impl Send for TLSRootset {}
unsafe impl Sync for TLSRootset {}

struct GcBox<T: ?Sized>(T);

/// A multi-threaded garbage collected pointer.
Expand Down
5 changes: 0 additions & 5 deletions library/std/src/sys/common/thread_local/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,6 @@ cfg_if::cfg_if! {
mod static_local;
#[doc(hidden)]
pub use static_local::{Key, thread_local_inner};
} else if #[cfg(target_thread_local)] {
#[doc(hidden)]
mod fast_local;
#[doc(hidden)]
pub use fast_local::{Key, thread_local_inner};
} else {
#[doc(hidden)]
mod os_local;
Expand Down
10 changes: 8 additions & 2 deletions library/std/src/sys/common/thread_local/os_local.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use super::lazy::LazyKeyInner;
use crate::cell::Cell;
use crate::gc::GcAllocator;
use crate::sys_common::thread_local_key::StaticKey as OsStaticKey;
use crate::{fmt, marker, panic, ptr};

Expand Down Expand Up @@ -142,7 +143,11 @@ impl<T: 'static> Key<T> {
let ptr = if ptr.is_null() {
// If the lookup returned null, we haven't initialized our own
// local copy, so do that now.
let ptr = Box::into_raw(Box::new(Value { inner: LazyKeyInner::new(), key: self }));
let ptr = Box::into_raw(Box::new_in(
Value { inner: LazyKeyInner::new(), key: self },
GcAllocator,
));
crate::gc::TLS_ROOTSET.push(ptr as *mut u8);
// SAFETY: At this point we are sure there is no value inside
// ptr so setting it will not affect anyone else.
unsafe {
Expand Down Expand Up @@ -174,7 +179,8 @@ unsafe extern "C" fn destroy_value<T: 'static>(ptr: *mut u8) {
// Wrap the call in a catch to ensure unwinding is caught in the event
// a panic takes place in a destructor.
if let Err(_) = panic::catch_unwind(|| unsafe {
let ptr = Box::from_raw(ptr as *mut Value<T>);
crate::gc::TLS_ROOTSET.remove(ptr);
let ptr = Box::from_raw_in(ptr as *mut Value<T>, GcAllocator);
let key = ptr.key;
key.os.set(ptr::invalid_mut(1));
drop(ptr);
Expand Down
1 change: 1 addition & 0 deletions library/std/src/sys/unix/thread_local_dtor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
// compiling from a newer linux to an older linux, so we also have a
// fallback implementation to use as well.
#[cfg(any(target_os = "linux", target_os = "fuchsia", target_os = "redox", target_os = "hurd"))]
#[allow(dead_code)]
pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
use crate::mem;
use crate::sys_common::thread_local_dtor::register_dtor_fallback;
Expand Down
1 change: 1 addition & 0 deletions library/std/src/sys_common/thread_local_key.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ impl StaticKey {
/// been allocated.
#[inline]
pub unsafe fn set(&self, val: *mut u8) {
crate::gc::TLS_ROOTSET.push(val as *mut u8);
imp::set(self.key(), val)
}

Expand Down
71 changes: 71 additions & 0 deletions tests/ui/runtime/gc/thread_local.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// run-pass
// ignore-tidy-linelength
#![feature(allocator_api)]
#![feature(gc)]
#![feature(negative_impls)]
#![feature(thread_local)]

use std::gc::{Gc, GcAllocator};
use std::{thread, time};
use std::sync::atomic::{self, AtomicUsize};
use std::time::{SystemTime, UNIX_EPOCH};

struct Finalizable(u32);

static FINALIZER_COUNT: AtomicUsize = AtomicUsize::new(0);

impl Drop for Finalizable {
fn drop(&mut self) {
FINALIZER_COUNT.fetch_add(1, atomic::Ordering::Relaxed);
}
}

thread_local!{
static LOCAL1: Gc<Finalizable> = Gc::new(Finalizable(1));
static LOCAL2: Gc<Finalizable> = Gc::new(Finalizable(2));
static LOCAL3: Gc<Finalizable> = Gc::new(Finalizable(3));

static LOCAL4: Box<Gc<Finalizable>, GcAllocator> = Box::new_in(Gc::new(Finalizable(4)), GcAllocator);
static LOCAL5: Box<Gc<Finalizable>, GcAllocator> = Box::new_in(Gc::new(Finalizable(5)), GcAllocator);
static LOCAL6: Box<Gc<Finalizable>, GcAllocator> = Box::new_in(Gc::new(Finalizable(6)), GcAllocator);
}

fn do_stuff_with_tls() {
let nanos = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().subsec_nanos();

// We need to use the thread-local at least once to ensure that it is initialised. By adding it
// to the current system time, we ensure that this use can't be optimised away (e.g. by constant
// folding).
let mut dynamic_value = nanos;

dynamic_value += LOCAL1.with(|l| l.0);
dynamic_value += LOCAL2.with(|l| l.0);
dynamic_value += LOCAL3.with(|l| l.0);
dynamic_value += LOCAL4.with(|l| l.0);
dynamic_value += LOCAL5.with(|l| l.0);
dynamic_value += LOCAL6.with(|l| l.0);

// Keep the thread alive long enough so that the GC has the chance to scan its thread-locals for
// roots.
thread::sleep(time::Duration::from_millis(20));


assert!(dynamic_value > 0);

// This ensures that a GC invoked from the main thread does not cause this thread's thread
// locals to be reclaimed too early.
assert_eq!(FINALIZER_COUNT.load(atomic::Ordering::Relaxed), 0);

}

fn main() {
let t2 = std::thread::spawn(do_stuff_with_tls);

// Wait a little bit of time for the t2 to initialise thread-locals.
thread::sleep(time::Duration::from_millis(10));

GcAllocator::force_gc();

let _ = t2.join().unwrap();

}

0 comments on commit 3dd48d6

Please sign in to comment.