diff --git a/src/lib.rs b/src/lib.rs index f5e13c36..1792310e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -298,7 +298,6 @@ cfg_if! { ), ))] { mod util_libc; - mod use_file; mod linux_android; #[path = "linux_android_with_fallback.rs"] mod imp; } else if #[cfg(any(target_os = "android", target_os = "linux"))] { diff --git a/src/linux_android_with_fallback.rs b/src/linux_android_with_fallback.rs index 98fa15e8..a56051ce 100644 --- a/src/linux_android_with_fallback.rs +++ b/src/linux_android_with_fallback.rs @@ -1,6 +1,14 @@ //! Implementation for Linux / Android with `/dev/urandom` fallback -use crate::{lazy::LazyBool, linux_android, use_file, util_libc::last_os_error, Error}; -use core::mem::MaybeUninit; +use crate::{ + lazy::LazyBool, + linux_android, + util_libc::{last_os_error, open_readonly, sys_fill_exact}, + Error, +}; +use core::{ + mem::MaybeUninit, + sync::atomic::{AtomicI32, Ordering}, +}; pub fn getrandom_inner(dest: &mut [MaybeUninit]) -> Result<(), Error> { // getrandom(2) was introduced in Linux 3.17 @@ -8,7 +16,7 @@ pub fn getrandom_inner(dest: &mut [MaybeUninit]) -> Result<(), Error> { if HAS_GETRANDOM.unsync_init(is_getrandom_available) { linux_android::getrandom_inner(dest) } else { - use_file::getrandom_inner(dest) + use_file(dest) } } @@ -27,3 +35,135 @@ fn is_getrandom_available() -> bool { true } } + +// File descriptor is a "nonnegative integer" as per `open` man. +const FD_UNINIT: libc::c_int = -1; +const FD_ONGOING_INIT: libc::c_int = -2; + +// See comment for `FD` in use_file.rs +static FD: AtomicI32 = AtomicI32::new(FD_UNINIT); + +pub fn use_file(dest: &mut [MaybeUninit]) -> Result<(), Error> { + let fd = match FD.load(Ordering::Acquire) { + FD_UNINIT | FD_ONGOING_INIT => open_or_wait()?, + fd => fd, + }; + sys_fill_exact(dest, |buf| unsafe { + libc::read(fd, buf.as_mut_ptr().cast(), buf.len()) + }) +} + +#[cold] +pub(super) fn open_or_wait() -> Result { + loop { + match FD.load(Ordering::Acquire) { + FD_UNINIT => { + let res = FD.compare_exchange_weak( + FD_UNINIT, + FD_ONGOING_INIT, + Ordering::AcqRel, + Ordering::Relaxed, + ); + if res.is_ok() { + break; + } + } + FD_ONGOING_INIT => futex_wait(), + fd => return Ok(fd), + } + } + + let res = open_fd(); + let val = match res { + Ok(fd) => fd, + Err(_) => FD_UNINIT, + }; + FD.store(val, Ordering::Release); + futex_wake(); + res +} + +fn futex_wait() { + let op = libc::FUTEX_WAIT | libc::FUTEX_PRIVATE_FLAG; + let timeout_ptr = core::ptr::null::(); + let ret = unsafe { libc::syscall(libc::SYS_futex, &FD, op, FD_ONGOING_INIT, timeout_ptr) }; + // FUTEX_WAIT should return either 0 or EAGAIN error + debug_assert!({ + match ret { + 0 => true, + -1 => { + let err = crate::util_libc::last_os_error(); + err.raw_os_error() == Some(libc::EAGAIN) + } + _ => false, + } + }); +} + +fn futex_wake() { + let op = libc::FUTEX_WAKE | libc::FUTEX_PRIVATE_FLAG; + let ret = unsafe { libc::syscall(libc::SYS_futex, &FD, op, libc::INT_MAX) }; + debug_assert!(ret >= 0); +} + +fn open_fd() -> Result { + wait_until_rng_ready()?; + // "/dev/urandom is preferred and sufficient in all use cases" + let fd = open_readonly(b"/dev/urandom\0")?; + debug_assert!(fd >= 0); + Ok(fd) +} + +// Polls /dev/random to make sure it is ok to read from /dev/urandom. +// +// Polling avoids draining the estimated entropy from /dev/random; +// short-lived processes reading even a single byte from /dev/random could +// be problematic if they are being executed faster than entropy is being +// collected. +// +// OTOH, reading a byte instead of polling is more compatible with +// sandboxes that disallow `poll()` but which allow reading /dev/random, +// e.g. sandboxes that assume that `poll()` is for network I/O. This way, +// fewer applications will have to insert pre-sandbox-initialization logic. +// Often (blocking) file I/O is not allowed in such early phases of an +// application for performance and/or security reasons. +// +// It is hard to write a sandbox policy to support `libc::poll()` because +// it may invoke the `poll`, `ppoll`, `ppoll_time64` (since Linux 5.1, with +// newer versions of glibc), and/or (rarely, and probably only on ancient +// systems) `select`. depending on the libc implementation (e.g. glibc vs +// musl), libc version, potentially the kernel version at runtime, and/or +// the target architecture. +// +// BoringSSL and libstd don't try to protect against insecure output from +// `/dev/urandom'; they don't open `/dev/random` at all. +// +// OpenSSL uses `libc::select()` unless the `dev/random` file descriptor +// is too large; if it is too large then it does what we do here. +// +// libsodium uses `libc::poll` similarly to this. +fn wait_until_rng_ready() -> Result<(), Error> { + let fd = open_readonly(b"/dev/random\0")?; + let mut pfd = libc::pollfd { + fd, + events: libc::POLLIN, + revents: 0, + }; + + let res = loop { + // A negative timeout means an infinite timeout. + let res = unsafe { libc::poll(&mut pfd, 1, -1) }; + if res >= 0 { + // We only used one fd, and cannot timeout. + debug_assert_eq!(res, 1); + break Ok(()); + } + let err = crate::util_libc::last_os_error(); + match err.raw_os_error() { + Some(libc::EINTR) | Some(libc::EAGAIN) => continue, + _ => break Err(err), + } + }; + unsafe { libc::close(fd) }; + res +} diff --git a/src/use_file.rs b/src/use_file.rs index 4fdbe3d7..ae725fd8 100644 --- a/src/use_file.rs +++ b/src/use_file.rs @@ -12,141 +12,55 @@ use core::{ /// For all platforms, we use `/dev/urandom` rather than `/dev/random`. /// For more information see the linked man pages in lib.rs. -/// - On Linux, "/dev/urandom is preferred and sufficient in all use cases". /// - On Redox, only /dev/urandom is provided. /// - On AIX, /dev/urandom will "provide cryptographically secure output". /// - On Haiku and QNX Neutrino they are identical. const FILE_PATH: &[u8] = b"/dev/urandom\0"; -// Do not inline this when it is the fallback implementation, but don't mark it -// `#[cold]` because it is hot when it is actually used. -#[cfg_attr(any(target_os = "android", target_os = "linux"), inline(never))] +// std::os::fd::{BorrowedFd, OwnedFd} guarantee that -1 is not a valid file descriptor. +const FD_UNINIT: libc::c_int = -1; + +// In theory `libc::c_int` could be something other than `i32`, but for the +// targets we currently support that use `use_file`, it is always `i32`. +// If/when we add support for a target where that isn't the case, we may +// need to use a different atomic type or make other accomodations. The +// compiler will let us know if/when that is the case, because the +// `FD.store(fd)` would fail to compile. +// +// The opening of the file, by libc/libstd/etc. may write some unknown +// state into in-process memory. (Such state may include some sanitizer +// bookkeeping, or we might be operating in a unikernal-like environment +// where all the "kernel" file descriptor bookkeeping is done in our +// process.) `get_fd_locked` stores into FD using `Ordering::Release` to +// ensure any such state is synchronized. `get_fd` loads from `FD` with +// `Ordering::Acquire` to synchronize with it. +static FD: AtomicI32 = AtomicI32::new(FD_UNINIT); + +static FD_MUTEX: Mutex = Mutex::new(); + pub fn getrandom_inner(dest: &mut [MaybeUninit]) -> Result<(), Error> { - let fd = get_rng_fd()?; + let mut fd = FD.load(Ordering::Acquire); + if fd == FD_UNINIT { + fd = open_or_wait()?; + } sys_fill_exact(dest, |buf| unsafe { libc::read(fd, buf.as_mut_ptr().cast::(), buf.len()) }) } -// Returns the file descriptor for the device file used to retrieve random -// bytes. The file will be opened exactly once. All subsequent calls will -// return the same file descriptor. This file descriptor is never closed. -fn get_rng_fd() -> Result { - // std::os::fd::{BorrowedFd, OwnedFd} guarantee that -1 is not a valid file descriptor. - const FD_UNINIT: libc::c_int = -1; - - // In theory `libc::c_int` could be something other than `i32`, but for the - // targets we currently support that use `use_file`, it is always `i32`. - // If/when we add support for a target where that isn't the case, we may - // need to use a different atomic type or make other accomodations. The - // compiler will let us know if/when that is the case, because the - // `FD.store(fd)` would fail to compile. - // - // The opening of the file, by libc/libstd/etc. may write some unknown - // state into in-process memory. (Such state may include some sanitizer - // bookkeeping, or we might be operating in a unikernal-like environment - // where all the "kernel" file descriptor bookkeeping is done in our - // process.) `get_fd_locked` stores into FD using `Ordering::Release` to - // ensure any such state is synchronized. `get_fd` loads from `FD` with - // `Ordering::Acquire` to synchronize with it. - static FD: AtomicI32 = AtomicI32::new(FD_UNINIT); - - fn get_fd() -> Option { - match FD.load(Ordering::Acquire) { - FD_UNINIT => None, - val => Some(val), +#[cold] +fn open_or_wait() -> Result { + let _guard = FD_MUTEX.lock(); + let fd = match FD.load(Ordering::Acquire) { + FD_UNINIT => { + let fd = open_readonly(FILE_PATH)?; + FD.store(fd, Ordering::Release); + fd } - } - - #[cold] - fn get_fd_locked() -> Result { - // This mutex is used to prevent multiple threads from opening file - // descriptors concurrently, which could run into the limit on the - // number of open file descriptors. Our goal is to have no more than one - // file descriptor open, ever. - // - // SAFETY: We use the mutex only in this method, and we always unlock it - // before returning, making sure we don't violate the pthread_mutex_t API. - static MUTEX: Mutex = Mutex::new(); - unsafe { MUTEX.lock() }; - let _guard = DropGuard(|| unsafe { MUTEX.unlock() }); - - if let Some(fd) = get_fd() { - return Ok(fd); - } - - // On Linux, /dev/urandom might return insecure values. - #[cfg(any(target_os = "android", target_os = "linux"))] - wait_until_rng_ready()?; - - let fd = open_readonly(FILE_PATH)?; - debug_assert!(fd != FD_UNINIT); - FD.store(fd, Ordering::Release); - - Ok(fd) - } - - // Use double-checked locking to avoid acquiring the lock if possible. - if let Some(fd) = get_fd() { - Ok(fd) - } else { - get_fd_locked() - } -} - -// Polls /dev/random to make sure it is ok to read from /dev/urandom. -// -// Polling avoids draining the estimated entropy from /dev/random; -// short-lived processes reading even a single byte from /dev/random could -// be problematic if they are being executed faster than entropy is being -// collected. -// -// OTOH, reading a byte instead of polling is more compatible with -// sandboxes that disallow `poll()` but which allow reading /dev/random, -// e.g. sandboxes that assume that `poll()` is for network I/O. This way, -// fewer applications will have to insert pre-sandbox-initialization logic. -// Often (blocking) file I/O is not allowed in such early phases of an -// application for performance and/or security reasons. -// -// It is hard to write a sandbox policy to support `libc::poll()` because -// it may invoke the `poll`, `ppoll`, `ppoll_time64` (since Linux 5.1, with -// newer versions of glibc), and/or (rarely, and probably only on ancient -// systems) `select`. depending on the libc implementation (e.g. glibc vs -// musl), libc version, potentially the kernel version at runtime, and/or -// the target architecture. -// -// BoringSSL and libstd don't try to protect against insecure output from -// `/dev/urandom'; they don't open `/dev/random` at all. -// -// OpenSSL uses `libc::select()` unless the `dev/random` file descriptor -// is too large; if it is too large then it does what we do here. -// -// libsodium uses `libc::poll` similarly to this. -#[cfg(any(target_os = "android", target_os = "linux"))] -fn wait_until_rng_ready() -> Result<(), Error> { - let fd = open_readonly(b"/dev/random\0")?; - let mut pfd = libc::pollfd { - fd, - events: libc::POLLIN, - revents: 0, + fd => fd, }; - let _guard = DropGuard(|| unsafe { - libc::close(fd); - }); - - loop { - // A negative timeout means an infinite timeout. - let res = unsafe { libc::poll(&mut pfd, 1, -1) }; - if res >= 0 { - debug_assert_eq!(res, 1); // We only used one fd, and cannot timeout. - return Ok(()); - } - let err = crate::util_libc::last_os_error(); - match err.raw_os_error() { - Some(libc::EINTR) | Some(libc::EAGAIN) => continue, - _ => return Err(err), - } - } + debug_assert!(fd >= 0); + Ok(fd) } struct Mutex(UnsafeCell); @@ -155,22 +69,21 @@ impl Mutex { const fn new() -> Self { Self(UnsafeCell::new(libc::PTHREAD_MUTEX_INITIALIZER)) } - unsafe fn lock(&self) { - let r = libc::pthread_mutex_lock(self.0.get()); - debug_assert_eq!(r, 0); - } - unsafe fn unlock(&self) { - let r = libc::pthread_mutex_unlock(self.0.get()); + + fn lock(&self) -> MutexGuard<'_> { + let r = unsafe { libc::pthread_mutex_lock(self.0.get()) }; debug_assert_eq!(r, 0); + MutexGuard(self) } } unsafe impl Sync for Mutex {} -struct DropGuard(F); +struct MutexGuard<'a>(&'a Mutex); -impl Drop for DropGuard { +impl<'a> Drop for MutexGuard<'a> { fn drop(&mut self) { - self.0() + let r = unsafe { libc::pthread_mutex_unlock(self.0 .0.get()) }; + debug_assert_eq!(r, 0); } }