From 75c32ea3e23e32fd63ad64a36e371e113bb72915 Mon Sep 17 00:00:00 2001 From: Jake Hughes Date: Wed, 20 Dec 2023 09:26:50 +0000 Subject: [PATCH] Fix thread-locals to be visible by Boehm Previously, thread-locals are invisible to Boehm. In other words, a pointer to a heap object stored solely inside a thread-local would cause the GC to free the object too early. In order to do this, we must disable compiler-based thread local implementation. This is a shame because it's faster than the POSIX thread library impl but it places thread locals in a table in a segment in the binary which Boehm can't scan. It might be possible to fix this in the future, but for now we fallback to using the POSIX thread implementation because there exists a fix for this. We introduce a global thread-local rootset, which holds pointers to thread-local values which Boehm can scan and use. --- library/std/src/gc.rs | 27 +++++++ .../std/src/sys/common/thread_local/mod.rs | 5 -- .../src/sys/common/thread_local/os_local.rs | 10 ++- library/std/src/sys/unix/thread_local_dtor.rs | 1 + .../std/src/sys_common/thread_local_key.rs | 1 + tests/ui/runtime/gc/thread_local.rs | 70 +++++++++++++++++++ 6 files changed, 107 insertions(+), 7 deletions(-) create mode 100644 tests/ui/runtime/gc/thread_local.rs diff --git a/library/std/src/gc.rs b/library/std/src/gc.rs index 077d5742dcd2e..b8e7dc9b8f1c8 100644 --- a/library/std/src/gc.rs +++ b/library/std/src/gc.rs @@ -52,6 +52,8 @@ pub use alloc::gc::thread_registered; pub use alloc::gc::GcAllocator; pub use core::gc::*; +use crate::sync::Mutex; + #[cfg(profile_gc)] use core::sync::atomic::{self, AtomicU64}; @@ -63,6 +65,31 @@ static FINALIZERS_REGISTERED: AtomicU64 = AtomicU64::new(0); #[cfg(profile_gc)] static FINALIZERS_COMPLETED: AtomicU64 = AtomicU64::new(0); +pub static TLS_ROOTSET: TLSRootset = TLSRootset::new(); + +#[derive(Debug)] +pub struct TLSRootset(Mutex>); + +impl TLSRootset { + const fn new() -> Self { + Self(Mutex::new(Vec::new_in(GcAllocator))) + } + + pub fn push(&self, root: *mut u8) { + let mut v = self.0.lock().unwrap(); + v.push(root); + } + + pub fn remove(&self, root: *mut u8) { + let mut v = self.0.lock().unwrap(); + let idx = v.iter().position(|r| *r == root).unwrap(); + v.swap_remove(idx); + } +} + +unsafe impl Send for TLSRootset {} +unsafe impl Sync for TLSRootset {} + struct GcBox(T); /// A multi-threaded garbage collected pointer. diff --git a/library/std/src/sys/common/thread_local/mod.rs b/library/std/src/sys/common/thread_local/mod.rs index 8b2c839f837d4..dbd2c3ad10efe 100644 --- a/library/std/src/sys/common/thread_local/mod.rs +++ b/library/std/src/sys/common/thread_local/mod.rs @@ -11,11 +11,6 @@ cfg_if::cfg_if! { mod static_local; #[doc(hidden)] pub use static_local::{Key, thread_local_inner}; - } else if #[cfg(target_thread_local)] { - #[doc(hidden)] - mod fast_local; - #[doc(hidden)] - pub use fast_local::{Key, thread_local_inner}; } else { #[doc(hidden)] mod os_local; diff --git a/library/std/src/sys/common/thread_local/os_local.rs b/library/std/src/sys/common/thread_local/os_local.rs index 7cf291921228b..9138be44bdd98 100644 --- a/library/std/src/sys/common/thread_local/os_local.rs +++ b/library/std/src/sys/common/thread_local/os_local.rs @@ -1,5 +1,6 @@ use super::lazy::LazyKeyInner; use crate::cell::Cell; +use crate::gc::GcAllocator; use crate::sys_common::thread_local_key::StaticKey as OsStaticKey; use crate::{fmt, marker, panic, ptr}; @@ -142,7 +143,11 @@ impl Key { let ptr = if ptr.is_null() { // If the lookup returned null, we haven't initialized our own // local copy, so do that now. - let ptr = Box::into_raw(Box::new(Value { inner: LazyKeyInner::new(), key: self })); + let ptr = Box::into_raw(Box::new_in( + Value { inner: LazyKeyInner::new(), key: self }, + GcAllocator, + )); + crate::gc::TLS_ROOTSET.push(ptr as *mut u8); // SAFETY: At this point we are sure there is no value inside // ptr so setting it will not affect anyone else. unsafe { @@ -174,7 +179,8 @@ unsafe extern "C" fn destroy_value(ptr: *mut u8) { // Wrap the call in a catch to ensure unwinding is caught in the event // a panic takes place in a destructor. if let Err(_) = panic::catch_unwind(|| unsafe { - let ptr = Box::from_raw(ptr as *mut Value); + crate::gc::TLS_ROOTSET.remove(ptr); + let ptr = Box::from_raw_in(ptr as *mut Value, GcAllocator); let key = ptr.key; key.os.set(ptr::invalid_mut(1)); drop(ptr); diff --git a/library/std/src/sys/unix/thread_local_dtor.rs b/library/std/src/sys/unix/thread_local_dtor.rs index fba2a676f280f..cfc34e162c367 100644 --- a/library/std/src/sys/unix/thread_local_dtor.rs +++ b/library/std/src/sys/unix/thread_local_dtor.rs @@ -12,6 +12,7 @@ // compiling from a newer linux to an older linux, so we also have a // fallback implementation to use as well. #[cfg(any(target_os = "linux", target_os = "fuchsia", target_os = "redox", target_os = "hurd"))] +#[allow(dead_code)] pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) { use crate::mem; use crate::sys_common::thread_local_dtor::register_dtor_fallback; diff --git a/library/std/src/sys_common/thread_local_key.rs b/library/std/src/sys_common/thread_local_key.rs index 204834984a227..f8030b45f1bbc 100644 --- a/library/std/src/sys_common/thread_local_key.rs +++ b/library/std/src/sys_common/thread_local_key.rs @@ -123,6 +123,7 @@ impl StaticKey { /// been allocated. #[inline] pub unsafe fn set(&self, val: *mut u8) { + crate::gc::TLS_ROOTSET.push(val as *mut u8); imp::set(self.key(), val) } diff --git a/tests/ui/runtime/gc/thread_local.rs b/tests/ui/runtime/gc/thread_local.rs new file mode 100644 index 0000000000000..8923b58e43af7 --- /dev/null +++ b/tests/ui/runtime/gc/thread_local.rs @@ -0,0 +1,70 @@ +// run-pass +// ignore-tidy-linelength +#![feature(allocator_api)] +#![feature(gc)] +#![feature(negative_impls)] +#![feature(thread_local)] + +use std::gc::{Gc, GcAllocator}; +use std::{thread, time}; +use std::sync::atomic::{self, AtomicUsize}; +use std::time::{SystemTime, UNIX_EPOCH}; + +struct Finalizable(u32); + +static FINALIZER_COUNT: AtomicUsize = AtomicUsize::new(0); + +impl Drop for Finalizable { + fn drop(&mut self) { + FINALIZER_COUNT.fetch_add(1, atomic::Ordering::Relaxed); + } +} + +thread_local!{ + static LOCAL1: Gc = Gc::new(Finalizable(1)); + static LOCAL2: Gc = Gc::new(Finalizable(2)); + static LOCAL3: Gc = Gc::new(Finalizable(3)); + + static LOCAL4: Box, GcAllocator> = Box::new_in(Gc::new(Finalizable(4)), GcAllocator); + static LOCAL5: Box, GcAllocator> = Box::new_in(Gc::new(Finalizable(5)), GcAllocator); + static LOCAL6: Box, GcAllocator> = Box::new_in(Gc::new(Finalizable(6)), GcAllocator); +} + +fn do_stuff_with_tls() { + let nanos = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().subsec_nanos(); + + // We need to use the thread-local at least once to ensure that it is initialised. By adding it + // to the current system time, we ensure that this use can't be optimised away (e.g. by constant + // folding). + let mut dynamic_value = nanos; + + dynamic_value += LOCAL1.with(|l| l.0); + dynamic_value += LOCAL2.with(|l| l.0); + dynamic_value += LOCAL3.with(|l| l.0); + dynamic_value += LOCAL4.with(|l| l.0); + dynamic_value += LOCAL5.with(|l| l.0); + dynamic_value += LOCAL6.with(|l| l.0); + + // Keep the thread alive long enough so that the GC has the chance to scan its thread-locals for + // roots. + thread::sleep(time::Duration::from_millis(20)); + + + assert!(dynamic_value > 0); + + // This ensures that a GC invoked from the main thread does not cause this thread's thread + // locals to be reclaimed too early. + assert_eq!(FINALIZER_COUNT.load(atomic::Ordering::Relaxed), 0); + +} + +fn main() { + let t2 = std::thread::spawn(do_stuff_with_tls); + + // Wait a little bit of time for the t2 to initialise thread-locals. + thread::sleep(time::Duration::from_millis(10)); + + GcAllocator::force_gc(); + + let _ = t2.join().unwrap(); +}