From 16aefa2118fffb086a946f1c5c9bcc7eab854d7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Fri, 27 Sep 2024 19:38:55 +0300 Subject: [PATCH] use_file: switch to `futex` on Linux and to `nanosleep` on other targets --- CHANGELOG.md | 9 +- src/linux_android_with_fallback.rs | 8 +- src/use_file.rs | 292 ++++++++++++++++------------- 3 files changed, 173 insertions(+), 136 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fc39b075..b781ac15 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,13 +7,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Breaking Changes -- Update MSRV to 1.38 [#425] +- Update MSRV to 1.60 [#472] - Remove support of the `wasm32-wasi` target (use `wasm32-wasip1` or `wasm32-wasip2` instead) [#499] +### Changed +- Switch to `futex` on Linux and to `nanosleep`-based wait loop on other targets + in the `use_file` backend [#490] + ### Added - `wasm32-wasip1` and `wasm32-wasip2` support [#499] -[#425]: https://github.com/rust-random/getrandom/pull/425 +[#472]: https://github.com/rust-random/getrandom/pull/472 +[#490]: https://github.com/rust-random/getrandom/pull/490 [#499]: https://github.com/rust-random/getrandom/pull/499 ## [0.2.15] - 2024-05-06 diff --git a/src/linux_android_with_fallback.rs b/src/linux_android_with_fallback.rs index 98fa15e8..b633faad 100644 --- a/src/linux_android_with_fallback.rs +++ b/src/linux_android_with_fallback.rs @@ -8,7 +8,13 @@ pub fn getrandom_inner(dest: &mut [MaybeUninit]) -> Result<(), Error> { if HAS_GETRANDOM.unsync_init(is_getrandom_available) { linux_android::getrandom_inner(dest) } else { - use_file::getrandom_inner(dest) + // prevent inlining of the fallback implementation + #[inline(never)] + fn inner(dest: &mut [MaybeUninit]) -> Result<(), Error> { + use_file::getrandom_inner(dest) + } + + inner(dest) } } diff --git a/src/use_file.rs b/src/use_file.rs index 4fdbe3d7..247272dc 100644 --- a/src/use_file.rs +++ b/src/use_file.rs @@ -4,7 +4,6 @@ use crate::{ Error, }; use core::{ - cell::UnsafeCell, ffi::c_void, mem::MaybeUninit, sync::atomic::{AtomicI32, Ordering}, @@ -18,159 +17,186 @@ use core::{ /// - On Haiku and QNX Neutrino they are identical. const FILE_PATH: &[u8] = b"/dev/urandom\0"; -// Do not inline this when it is the fallback implementation, but don't mark it -// `#[cold]` because it is hot when it is actually used. -#[cfg_attr(any(target_os = "android", target_os = "linux"), inline(never))] +// File descriptor is a "nonnegative integer", so we can safely use negative sentinel values. +const FD_UNINIT: libc::c_int = -1; +const FD_ONGOING_INIT: libc::c_int = -2; + +// In theory `libc::c_int` could be something other than `i32`, but for the +// targets we currently support that use `use_file`, it is always `i32`. +// If/when we add support for a target where that isn't the case, we may +// need to use a different atomic type or make other accomodations. The +// compiler will let us know if/when that is the case, because the +// `FD.store(fd)` would fail to compile. +// +// The opening of the file, by libc/libstd/etc. may write some unknown +// state into in-process memory. (Such state may include some sanitizer +// bookkeeping, or we might be operating in a unikernal-like environment +// where all the "kernel" file descriptor bookkeeping is done in our +// process.) `get_fd_locked` stores into FD using `Ordering::Release` to +// ensure any such state is synchronized. `get_fd` loads from `FD` with +// `Ordering::Acquire` to synchronize with it. +static FD: AtomicI32 = AtomicI32::new(FD_UNINIT); + pub fn getrandom_inner(dest: &mut [MaybeUninit]) -> Result<(), Error> { - let fd = get_rng_fd()?; + let mut fd = FD.load(Ordering::Acquire); + if fd == FD_UNINIT || fd == FD_ONGOING_INIT { + fd = open_or_wait()?; + } sys_fill_exact(dest, |buf| unsafe { libc::read(fd, buf.as_mut_ptr().cast::(), buf.len()) }) } -// Returns the file descriptor for the device file used to retrieve random -// bytes. The file will be opened exactly once. All subsequent calls will -// return the same file descriptor. This file descriptor is never closed. -fn get_rng_fd() -> Result { - // std::os::fd::{BorrowedFd, OwnedFd} guarantee that -1 is not a valid file descriptor. - const FD_UNINIT: libc::c_int = -1; - - // In theory `libc::c_int` could be something other than `i32`, but for the - // targets we currently support that use `use_file`, it is always `i32`. - // If/when we add support for a target where that isn't the case, we may - // need to use a different atomic type or make other accomodations. The - // compiler will let us know if/when that is the case, because the - // `FD.store(fd)` would fail to compile. - // - // The opening of the file, by libc/libstd/etc. may write some unknown - // state into in-process memory. (Such state may include some sanitizer - // bookkeeping, or we might be operating in a unikernal-like environment - // where all the "kernel" file descriptor bookkeeping is done in our - // process.) `get_fd_locked` stores into FD using `Ordering::Release` to - // ensure any such state is synchronized. `get_fd` loads from `FD` with - // `Ordering::Acquire` to synchronize with it. - static FD: AtomicI32 = AtomicI32::new(FD_UNINIT); - - fn get_fd() -> Option { +#[cold] +fn open_or_wait() -> Result { + loop { match FD.load(Ordering::Acquire) { - FD_UNINIT => None, - val => Some(val), + FD_UNINIT => { + let res = FD.compare_exchange_weak( + FD_UNINIT, + FD_ONGOING_INIT, + Ordering::AcqRel, + Ordering::Relaxed, + ); + if res.is_ok() { + break; + } + } + FD_ONGOING_INIT => sync::wait(), + fd => return Ok(fd), } } - #[cold] - fn get_fd_locked() -> Result { - // This mutex is used to prevent multiple threads from opening file - // descriptors concurrently, which could run into the limit on the - // number of open file descriptors. Our goal is to have no more than one - // file descriptor open, ever. - // - // SAFETY: We use the mutex only in this method, and we always unlock it - // before returning, making sure we don't violate the pthread_mutex_t API. - static MUTEX: Mutex = Mutex::new(); - unsafe { MUTEX.lock() }; - let _guard = DropGuard(|| unsafe { MUTEX.unlock() }); - - if let Some(fd) = get_fd() { - return Ok(fd); - } - - // On Linux, /dev/urandom might return insecure values. - #[cfg(any(target_os = "android", target_os = "linux"))] - wait_until_rng_ready()?; + let res = open_fd(); + let val = match res { + Ok(fd) => fd, + Err(_) => FD_UNINIT, + }; + FD.store(val, Ordering::Release); - let fd = open_readonly(FILE_PATH)?; - debug_assert!(fd != FD_UNINIT); - FD.store(fd, Ordering::Release); + // On non-Linux targets `wait` is just 1 ms sleep, + // so we don't need any explicit wake up in addition + // to updating value of `FD`. + #[cfg(any(target_os = "android", target_os = "linux"))] + sync::wake(); - Ok(fd) - } - - // Use double-checked locking to avoid acquiring the lock if possible. - if let Some(fd) = get_fd() { - Ok(fd) - } else { - get_fd_locked() - } + res } -// Polls /dev/random to make sure it is ok to read from /dev/urandom. -// -// Polling avoids draining the estimated entropy from /dev/random; -// short-lived processes reading even a single byte from /dev/random could -// be problematic if they are being executed faster than entropy is being -// collected. -// -// OTOH, reading a byte instead of polling is more compatible with -// sandboxes that disallow `poll()` but which allow reading /dev/random, -// e.g. sandboxes that assume that `poll()` is for network I/O. This way, -// fewer applications will have to insert pre-sandbox-initialization logic. -// Often (blocking) file I/O is not allowed in such early phases of an -// application for performance and/or security reasons. -// -// It is hard to write a sandbox policy to support `libc::poll()` because -// it may invoke the `poll`, `ppoll`, `ppoll_time64` (since Linux 5.1, with -// newer versions of glibc), and/or (rarely, and probably only on ancient -// systems) `select`. depending on the libc implementation (e.g. glibc vs -// musl), libc version, potentially the kernel version at runtime, and/or -// the target architecture. -// -// BoringSSL and libstd don't try to protect against insecure output from -// `/dev/urandom'; they don't open `/dev/random` at all. -// -// OpenSSL uses `libc::select()` unless the `dev/random` file descriptor -// is too large; if it is too large then it does what we do here. -// -// libsodium uses `libc::poll` similarly to this. -#[cfg(any(target_os = "android", target_os = "linux"))] -fn wait_until_rng_ready() -> Result<(), Error> { - let fd = open_readonly(b"/dev/random\0")?; - let mut pfd = libc::pollfd { - fd, - events: libc::POLLIN, - revents: 0, - }; - let _guard = DropGuard(|| unsafe { - libc::close(fd); - }); +fn open_fd() -> Result { + #[cfg(any(target_os = "android", target_os = "linux"))] + sync::wait_until_rng_ready()?; + let fd = open_readonly(FILE_PATH)?; + debug_assert!(fd >= 0); + Ok(fd) +} - loop { - // A negative timeout means an infinite timeout. - let res = unsafe { libc::poll(&mut pfd, 1, -1) }; - if res >= 0 { - debug_assert_eq!(res, 1); // We only used one fd, and cannot timeout. - return Ok(()); - } - let err = crate::util_libc::last_os_error(); - match err.raw_os_error() { - Some(libc::EINTR) | Some(libc::EAGAIN) => continue, - _ => return Err(err), +#[cfg(not(any(target_os = "android", target_os = "linux")))] +mod sync { + /// Sleep 1 ms before checking `FD` again. + /// + /// On non-Linux targets the critical section only opens file, + /// which should not block, so in the unlikely contended case, + /// we can sleep-wait for the opening operation to finish. + pub(super) fn wait() { + let rqtp = libc::timespec { + tv_sec: 0, + tv_nsec: 1_000_000, + }; + let mut rmtp = libc::timespec { + tv_sec: 0, + tv_nsec: 0, + }; + // We do not care if sleep gets interrupted, so the return value is ignored + unsafe { + libc::nanosleep(&rqtp, &mut rmtp); } } } -struct Mutex(UnsafeCell); - -impl Mutex { - const fn new() -> Self { - Self(UnsafeCell::new(libc::PTHREAD_MUTEX_INITIALIZER)) - } - unsafe fn lock(&self) { - let r = libc::pthread_mutex_lock(self.0.get()); - debug_assert_eq!(r, 0); - } - unsafe fn unlock(&self) { - let r = libc::pthread_mutex_unlock(self.0.get()); - debug_assert_eq!(r, 0); +#[cfg(any(target_os = "android", target_os = "linux"))] +mod sync { + use super::{Error, FD, FD_ONGOING_INIT}; + use crate::util_libc::{last_os_error, open_readonly}; + + /// Wait for atomic `FD` to change value from `FD_ONGOING_INIT` to something else. + /// + /// Futex syscall with `FUTEX_WAIT` op puts the current thread to sleep + /// until futex syscall with `FUTEX_WAKE` op gets executed for `FD`. + /// + /// For more information read: https://www.man7.org/linux/man-pages/man2/futex.2.html + pub(super) fn wait() { + let op = libc::FUTEX_WAIT | libc::FUTEX_PRIVATE_FLAG; + let timeout_ptr = core::ptr::null::(); + let ret = unsafe { libc::syscall(libc::SYS_futex, &FD, op, FD_ONGOING_INIT, timeout_ptr) }; + // FUTEX_WAIT should return either 0 or EAGAIN error + debug_assert!({ + match ret { + 0 => true, + -1 => last_os_error().raw_os_error() == Some(libc::EAGAIN), + _ => false, + } + }); } -} - -unsafe impl Sync for Mutex {} -struct DropGuard(F); + /// Wake up all threads which wait for value of atomic `FD` to change. + pub(super) fn wake() { + let op = libc::FUTEX_WAKE | libc::FUTEX_PRIVATE_FLAG; + let ret = unsafe { libc::syscall(libc::SYS_futex, &FD, op, libc::INT_MAX) }; + debug_assert!(ret >= 0); + } -impl Drop for DropGuard { - fn drop(&mut self) { - self.0() + // Polls /dev/random to make sure it is ok to read from /dev/urandom. + // + // Polling avoids draining the estimated entropy from /dev/random; + // short-lived processes reading even a single byte from /dev/random could + // be problematic if they are being executed faster than entropy is being + // collected. + // + // OTOH, reading a byte instead of polling is more compatible with + // sandboxes that disallow `poll()` but which allow reading /dev/random, + // e.g. sandboxes that assume that `poll()` is for network I/O. This way, + // fewer applications will have to insert pre-sandbox-initialization logic. + // Often (blocking) file I/O is not allowed in such early phases of an + // application for performance and/or security reasons. + // + // It is hard to write a sandbox policy to support `libc::poll()` because + // it may invoke the `poll`, `ppoll`, `ppoll_time64` (since Linux 5.1, with + // newer versions of glibc), and/or (rarely, and probably only on ancient + // systems) `select`. depending on the libc implementation (e.g. glibc vs + // musl), libc version, potentially the kernel version at runtime, and/or + // the target architecture. + // + // BoringSSL and libstd don't try to protect against insecure output from + // `/dev/urandom'; they don't open `/dev/random` at all. + // + // OpenSSL uses `libc::select()` unless the `dev/random` file descriptor + // is too large; if it is too large then it does what we do here. + // + // libsodium uses `libc::poll` similarly to this. + pub(super) fn wait_until_rng_ready() -> Result<(), Error> { + let fd = open_readonly(b"/dev/random\0")?; + let mut pfd = libc::pollfd { + fd, + events: libc::POLLIN, + revents: 0, + }; + + let res = loop { + // A negative timeout means an infinite timeout. + let res = unsafe { libc::poll(&mut pfd, 1, -1) }; + if res >= 0 { + // We only used one fd, and cannot timeout. + debug_assert_eq!(res, 1); + break Ok(()); + } + let err = last_os_error(); + match err.raw_os_error() { + Some(libc::EINTR) | Some(libc::EAGAIN) => continue, + _ => break Err(err), + } + }; + unsafe { libc::close(fd) }; + res } }