diff options
Diffstat (limited to 'library/std/src/sys')
| -rw-r--r-- | library/std/src/sys/cmath.rs | 88 | ||||
| -rw-r--r-- | library/std/src/sys/cmath/builtins.rs | 35 | ||||
| -rw-r--r-- | library/std/src/sys/cmath/mod.rs | 11 | ||||
| -rw-r--r-- | library/std/src/sys/cmath/windows.rs | 94 | ||||
| -rw-r--r-- | library/std/src/sys/pal/common/thread_local/fast_local.rs | 3 | ||||
| -rw-r--r-- | library/std/src/sys/pal/uefi/mod.rs | 1 | ||||
| -rw-r--r-- | library/std/src/sys/pal/uefi/thread.rs | 60 | ||||
| -rw-r--r-- | library/std/src/sys/pal/unix/locks/mod.rs | 4 | ||||
| -rw-r--r-- | library/std/src/sys/pal/unix/locks/pthread_rwlock.rs | 195 | ||||
| -rw-r--r-- | library/std/src/sys/pal/unix/locks/queue_rwlock.rs | 557 | ||||
| -rw-r--r-- | library/std/src/sys/pal/unix/rand.rs | 10 | ||||
| -rw-r--r-- | library/std/src/sys/pal/unix/thread.rs | 30 | ||||
| -rw-r--r-- | library/std/src/sys/pal/windows/c/README.md | 9 | ||||
| -rw-r--r-- | library/std/src/sys/pal/windows/c/bindings.txt (renamed from library/std/src/sys/pal/windows/c/windows_sys.lst) | 3 | ||||
| -rw-r--r-- | library/std/src/sys/pal/windows/c/windows_sys.rs | 7 |
15 files changed, 750 insertions, 357 deletions
diff --git a/library/std/src/sys/cmath.rs b/library/std/src/sys/cmath.rs new file mode 100644 index 00000000000..99df503b82d --- /dev/null +++ b/library/std/src/sys/cmath.rs @@ -0,0 +1,88 @@ +#![cfg(not(test))] + +// These symbols are all defined by `libm`, +// or by `compiler-builtins` on unsupported platforms. +extern "C" { + pub fn acos(n: f64) -> f64; + pub fn asin(n: f64) -> f64; + pub fn atan(n: f64) -> f64; + pub fn atan2(a: f64, b: f64) -> f64; + pub fn cbrt(n: f64) -> f64; + pub fn cbrtf(n: f32) -> f32; + pub fn cosh(n: f64) -> f64; + pub fn expm1(n: f64) -> f64; + pub fn expm1f(n: f32) -> f32; + pub fn fdim(a: f64, b: f64) -> f64; + pub fn fdimf(a: f32, b: f32) -> f32; + #[cfg_attr(target_env = "msvc", link_name = "_hypot")] + pub fn hypot(x: f64, y: f64) -> f64; + #[cfg_attr(target_env = "msvc", link_name = "_hypotf")] + pub fn hypotf(x: f32, y: f32) -> f32; + pub fn log1p(n: f64) -> f64; + pub fn log1pf(n: f32) -> f32; + pub fn sinh(n: f64) -> f64; + pub fn tan(n: f64) -> f64; + pub fn tanh(n: f64) -> f64; + pub fn tgamma(n: f64) -> f64; + pub fn tgammaf(n: f32) -> f32; + pub fn lgamma_r(n: f64, s: &mut i32) -> f64; + pub fn lgammaf_r(n: f32, s: &mut i32) -> f32; + + cfg_if::cfg_if! { + if #[cfg(not(all(target_os = "windows", target_env = "msvc", target_arch = "x86")))] { + pub fn acosf(n: f32) -> f32; + pub fn asinf(n: f32) -> f32; + pub fn atan2f(a: f32, b: f32) -> f32; + pub fn atanf(n: f32) -> f32; + pub fn coshf(n: f32) -> f32; + pub fn sinhf(n: f32) -> f32; + pub fn tanf(n: f32) -> f32; + pub fn tanhf(n: f32) -> f32; + }} +} + +// On 32-bit x86 MSVC these functions aren't defined, so we just define shims +// which promote everything to f64, perform the calculation, and then demote +// back to f32. While not precisely correct should be "correct enough" for now. +cfg_if::cfg_if! { +if #[cfg(all(target_os = "windows", target_env = "msvc", target_arch = "x86"))] { + #[inline] + pub unsafe fn acosf(n: f32) -> f32 { + f64::acos(n as f64) as f32 + } + + #[inline] + pub unsafe fn asinf(n: f32) -> f32 { + f64::asin(n as f64) as f32 + } + + #[inline] + pub unsafe fn atan2f(n: f32, b: f32) -> f32 { + f64::atan2(n as f64, b as f64) as f32 + } + + #[inline] + pub unsafe fn atanf(n: f32) -> f32 { + f64::atan(n as f64) as f32 + } + + #[inline] + pub unsafe fn coshf(n: f32) -> f32 { + f64::cosh(n as f64) as f32 + } + + #[inline] + pub unsafe fn sinhf(n: f32) -> f32 { + f64::sinh(n as f64) as f32 + } + + #[inline] + pub unsafe fn tanf(n: f32) -> f32 { + f64::tan(n as f64) as f32 + } + + #[inline] + pub unsafe fn tanhf(n: f32) -> f32 { + f64::tanh(n as f64) as f32 + } +}} diff --git a/library/std/src/sys/cmath/builtins.rs b/library/std/src/sys/cmath/builtins.rs deleted file mode 100644 index c680132efa4..00000000000 --- a/library/std/src/sys/cmath/builtins.rs +++ /dev/null @@ -1,35 +0,0 @@ -// These symbols are all defined by `libm`, -// or by `compiler-builtins` on unsupported platforms. - -extern "C" { - pub fn acos(n: f64) -> f64; - pub fn acosf(n: f32) -> f32; - pub fn asin(n: f64) -> f64; - pub fn asinf(n: f32) -> f32; - pub fn atan(n: f64) -> f64; - pub fn atan2(a: f64, b: f64) -> f64; - pub fn atan2f(a: f32, b: f32) -> f32; - pub fn atanf(n: f32) -> f32; - pub fn cbrt(n: f64) -> f64; - pub fn cbrtf(n: f32) -> f32; - pub fn cosh(n: f64) -> f64; - pub fn coshf(n: f32) -> f32; - pub fn expm1(n: f64) -> f64; - pub fn expm1f(n: f32) -> f32; - pub fn fdim(a: f64, b: f64) -> f64; - pub fn fdimf(a: f32, b: f32) -> f32; - pub fn hypot(x: f64, y: f64) -> f64; - pub fn hypotf(x: f32, y: f32) -> f32; - pub fn log1p(n: f64) -> f64; - pub fn log1pf(n: f32) -> f32; - pub fn sinh(n: f64) -> f64; - pub fn sinhf(n: f32) -> f32; - pub fn tan(n: f64) -> f64; - pub fn tanf(n: f32) -> f32; - pub fn tanh(n: f64) -> f64; - pub fn tanhf(n: f32) -> f32; - pub fn tgamma(n: f64) -> f64; - pub fn tgammaf(n: f32) -> f32; - pub fn lgamma_r(n: f64, s: &mut i32) -> f64; - pub fn lgammaf_r(n: f32, s: &mut i32) -> f32; -} diff --git a/library/std/src/sys/cmath/mod.rs b/library/std/src/sys/cmath/mod.rs deleted file mode 100644 index 79d5021dd8d..00000000000 --- a/library/std/src/sys/cmath/mod.rs +++ /dev/null @@ -1,11 +0,0 @@ -#![cfg(not(test))] - -cfg_if::cfg_if! { - if #[cfg(target_os = "windows")] { - mod windows; - pub use windows::*; - } else { - mod builtins; - pub use builtins::*; - } -} diff --git a/library/std/src/sys/cmath/windows.rs b/library/std/src/sys/cmath/windows.rs deleted file mode 100644 index 712097f06ff..00000000000 --- a/library/std/src/sys/cmath/windows.rs +++ /dev/null @@ -1,94 +0,0 @@ -use core::ffi::{c_double, c_float, c_int}; - -extern "C" { - pub fn acos(n: c_double) -> c_double; - pub fn asin(n: c_double) -> c_double; - pub fn atan(n: c_double) -> c_double; - pub fn atan2(a: c_double, b: c_double) -> c_double; - pub fn cbrt(n: c_double) -> c_double; - pub fn cbrtf(n: c_float) -> c_float; - pub fn cosh(n: c_double) -> c_double; - pub fn expm1(n: c_double) -> c_double; - pub fn expm1f(n: c_float) -> c_float; - pub fn fdim(a: c_double, b: c_double) -> c_double; - pub fn fdimf(a: c_float, b: c_float) -> c_float; - #[cfg_attr(target_env = "msvc", link_name = "_hypot")] - pub fn hypot(x: c_double, y: c_double) -> c_double; - #[cfg_attr(target_env = "msvc", link_name = "_hypotf")] - pub fn hypotf(x: c_float, y: c_float) -> c_float; - pub fn log1p(n: c_double) -> c_double; - pub fn log1pf(n: c_float) -> c_float; - pub fn sinh(n: c_double) -> c_double; - pub fn tan(n: c_double) -> c_double; - pub fn tanh(n: c_double) -> c_double; - pub fn tgamma(n: c_double) -> c_double; - pub fn tgammaf(n: c_float) -> c_float; - pub fn lgamma_r(n: c_double, s: &mut c_int) -> c_double; - pub fn lgammaf_r(n: c_float, s: &mut c_int) -> c_float; -} - -pub use self::shims::*; - -#[cfg(not(all(target_env = "msvc", target_arch = "x86")))] -mod shims { - use core::ffi::c_float; - - extern "C" { - pub fn acosf(n: c_float) -> c_float; - pub fn asinf(n: c_float) -> c_float; - pub fn atan2f(a: c_float, b: c_float) -> c_float; - pub fn atanf(n: c_float) -> c_float; - pub fn coshf(n: c_float) -> c_float; - pub fn sinhf(n: c_float) -> c_float; - pub fn tanf(n: c_float) -> c_float; - pub fn tanhf(n: c_float) -> c_float; - } -} - -// On 32-bit x86 MSVC these functions aren't defined, so we just define shims -// which promote everything to f64, perform the calculation, and then demote -// back to f32. While not precisely correct should be "correct enough" for now. -#[cfg(all(target_env = "msvc", target_arch = "x86"))] -mod shims { - use core::ffi::c_float; - - #[inline] - pub unsafe fn acosf(n: c_float) -> c_float { - f64::acos(n as f64) as c_float - } - - #[inline] - pub unsafe fn asinf(n: c_float) -> c_float { - f64::asin(n as f64) as c_float - } - - #[inline] - pub unsafe fn atan2f(n: c_float, b: c_float) -> c_float { - f64::atan2(n as f64, b as f64) as c_float - } - - #[inline] - pub unsafe fn atanf(n: c_float) -> c_float { - f64::atan(n as f64) as c_float - } - - #[inline] - pub unsafe fn coshf(n: c_float) -> c_float { - f64::cosh(n as f64) as c_float - } - - #[inline] - pub unsafe fn sinhf(n: c_float) -> c_float { - f64::sinh(n as f64) as c_float - } - - #[inline] - pub unsafe fn tanf(n: c_float) -> c_float { - f64::tan(n as f64) as c_float - } - - #[inline] - pub unsafe fn tanhf(n: c_float) -> c_float { - f64::tanh(n as f64) as c_float - } -} diff --git a/library/std/src/sys/pal/common/thread_local/fast_local.rs b/library/std/src/sys/pal/common/thread_local/fast_local.rs index 0fdca27852c..04c0dd6f750 100644 --- a/library/std/src/sys/pal/common/thread_local/fast_local.rs +++ b/library/std/src/sys/pal/common/thread_local/fast_local.rs @@ -94,7 +94,8 @@ pub macro thread_local_inner { if let $crate::option::Option::Some(init) = init { if let $crate::option::Option::Some(value) = init.take() { return value; - } else if $crate::cfg!(debug_assertions) { + } + if $crate::cfg!(debug_assertions) { $crate::unreachable!("missing default value"); } } diff --git a/library/std/src/sys/pal/uefi/mod.rs b/library/std/src/sys/pal/uefi/mod.rs index efa4ed67c50..5a96b8f1c3a 100644 --- a/library/std/src/sys/pal/uefi/mod.rs +++ b/library/std/src/sys/pal/uefi/mod.rs @@ -31,7 +31,6 @@ pub mod pipe; #[path = "../unsupported/process.rs"] pub mod process; pub mod stdio; -#[path = "../unsupported/thread.rs"] pub mod thread; #[path = "../unsupported/thread_local_key.rs"] pub mod thread_local_key; diff --git a/library/std/src/sys/pal/uefi/thread.rs b/library/std/src/sys/pal/uefi/thread.rs new file mode 100644 index 00000000000..ddfc3af2f50 --- /dev/null +++ b/library/std/src/sys/pal/uefi/thread.rs @@ -0,0 +1,60 @@ +use super::unsupported; +use crate::ffi::CStr; +use crate::io; +use crate::num::NonZeroUsize; +use crate::ptr::NonNull; +use crate::time::Duration; + +pub struct Thread(!); + +pub const DEFAULT_MIN_STACK_SIZE: usize = 4096; + +impl Thread { + // unsafe: see thread::Builder::spawn_unchecked for safety requirements + pub unsafe fn new(_stack: usize, _p: Box<dyn FnOnce()>) -> io::Result<Thread> { + unsupported() + } + + pub fn yield_now() { + // do nothing + } + + pub fn set_name(_name: &CStr) { + // nope + } + + pub fn sleep(dur: Duration) { + let boot_services: NonNull<r_efi::efi::BootServices> = + crate::os::uefi::env::boot_services().expect("can't sleep").cast(); + let mut dur_ms = dur.as_micros(); + // ceil up to the nearest microsecond + if dur.subsec_nanos() % 1000 > 0 { + dur_ms += 1; + } + + while dur_ms > 0 { + let ms = crate::cmp::min(dur_ms, usize::MAX as u128); + let _ = unsafe { ((*boot_services.as_ptr()).stall)(ms as usize) }; + dur_ms -= ms; + } + } + + pub fn join(self) { + self.0 + } +} + +pub fn available_parallelism() -> io::Result<NonZeroUsize> { + // UEFI is single threaded + Ok(NonZeroUsize::new(1).unwrap()) +} + +pub mod guard { + pub type Guard = !; + pub unsafe fn current() -> Option<Guard> { + None + } + pub unsafe fn init() -> Option<Guard> { + None + } +} diff --git a/library/std/src/sys/pal/unix/locks/mod.rs b/library/std/src/sys/pal/unix/locks/mod.rs index b2e0e49ad73..a49247310b5 100644 --- a/library/std/src/sys/pal/unix/locks/mod.rs +++ b/library/std/src/sys/pal/unix/locks/mod.rs @@ -22,10 +22,10 @@ cfg_if::cfg_if! { pub(crate) use futex_condvar::Condvar; } else { mod pthread_mutex; - mod pthread_rwlock; mod pthread_condvar; + mod queue_rwlock; pub(crate) use pthread_mutex::Mutex; - pub(crate) use pthread_rwlock::RwLock; + pub(crate) use queue_rwlock::RwLock; pub(crate) use pthread_condvar::Condvar; } } diff --git a/library/std/src/sys/pal/unix/locks/pthread_rwlock.rs b/library/std/src/sys/pal/unix/locks/pthread_rwlock.rs deleted file mode 100644 index 04662be9d82..00000000000 --- a/library/std/src/sys/pal/unix/locks/pthread_rwlock.rs +++ /dev/null @@ -1,195 +0,0 @@ -use crate::cell::UnsafeCell; -use crate::mem::forget; -use crate::sync::atomic::{AtomicUsize, Ordering}; -use crate::sys_common::lazy_box::{LazyBox, LazyInit}; - -struct AllocatedRwLock { - inner: UnsafeCell<libc::pthread_rwlock_t>, - write_locked: UnsafeCell<bool>, // guarded by the `inner` RwLock - num_readers: AtomicUsize, -} - -unsafe impl Send for AllocatedRwLock {} -unsafe impl Sync for AllocatedRwLock {} - -pub struct RwLock { - inner: LazyBox<AllocatedRwLock>, -} - -impl LazyInit for AllocatedRwLock { - fn init() -> Box<Self> { - Box::new(AllocatedRwLock { - inner: UnsafeCell::new(libc::PTHREAD_RWLOCK_INITIALIZER), - write_locked: UnsafeCell::new(false), - num_readers: AtomicUsize::new(0), - }) - } - - fn destroy(mut rwlock: Box<Self>) { - // We're not allowed to pthread_rwlock_destroy a locked rwlock, - // so check first if it's unlocked. - if *rwlock.write_locked.get_mut() || *rwlock.num_readers.get_mut() != 0 { - // The rwlock is locked. This happens if a RwLock{Read,Write}Guard is leaked. - // In this case, we just leak the RwLock too. - forget(rwlock); - } - } - - fn cancel_init(_: Box<Self>) { - // In this case, we can just drop it without any checks, - // since it cannot have been locked yet. - } -} - -impl AllocatedRwLock { - #[inline] - unsafe fn raw_unlock(&self) { - let r = libc::pthread_rwlock_unlock(self.inner.get()); - debug_assert_eq!(r, 0); - } -} - -impl Drop for AllocatedRwLock { - fn drop(&mut self) { - let r = unsafe { libc::pthread_rwlock_destroy(self.inner.get()) }; - // On DragonFly pthread_rwlock_destroy() returns EINVAL if called on a - // rwlock that was just initialized with - // libc::PTHREAD_RWLOCK_INITIALIZER. Once it is used (locked/unlocked) - // or pthread_rwlock_init() is called, this behaviour no longer occurs. - if cfg!(target_os = "dragonfly") { - debug_assert!(r == 0 || r == libc::EINVAL); - } else { - debug_assert_eq!(r, 0); - } - } -} - -impl RwLock { - #[inline] - pub const fn new() -> RwLock { - RwLock { inner: LazyBox::new() } - } - - #[inline] - pub fn read(&self) { - let lock = &*self.inner; - let r = unsafe { libc::pthread_rwlock_rdlock(lock.inner.get()) }; - - // According to POSIX, when a thread tries to acquire this read lock - // while it already holds the write lock - // (or vice versa, or tries to acquire the write lock twice), - // "the call shall either deadlock or return [EDEADLK]" - // (https://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_rwlock_wrlock.html, - // https://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_rwlock_rdlock.html). - // So, in principle, all we have to do here is check `r == 0` to be sure we properly - // got the lock. - // - // However, (at least) glibc before version 2.25 does not conform to this spec, - // and can return `r == 0` even when this thread already holds the write lock. - // We thus check for this situation ourselves and panic when detecting that a thread - // got the write lock more than once, or got a read and a write lock. - if r == libc::EAGAIN { - panic!("rwlock maximum reader count exceeded"); - } else if r == libc::EDEADLK || (r == 0 && unsafe { *lock.write_locked.get() }) { - // Above, we make sure to only access `write_locked` when `r == 0` to avoid - // data races. - if r == 0 { - // `pthread_rwlock_rdlock` succeeded when it should not have. - unsafe { - lock.raw_unlock(); - } - } - panic!("rwlock read lock would result in deadlock"); - } else { - // POSIX does not make guarantees about all the errors that may be returned. - // See issue #94705 for more details. - assert_eq!(r, 0, "unexpected error during rwlock read lock: {:?}", r); - lock.num_readers.fetch_add(1, Ordering::Relaxed); - } - } - - #[inline] - pub fn try_read(&self) -> bool { - let lock = &*self.inner; - let r = unsafe { libc::pthread_rwlock_tryrdlock(lock.inner.get()) }; - if r == 0 { - if unsafe { *lock.write_locked.get() } { - // `pthread_rwlock_tryrdlock` succeeded when it should not have. - unsafe { - lock.raw_unlock(); - } - false - } else { - lock.num_readers.fetch_add(1, Ordering::Relaxed); - true - } - } else { - false - } - } - - #[inline] - pub fn write(&self) { - let lock = &*self.inner; - let r = unsafe { libc::pthread_rwlock_wrlock(lock.inner.get()) }; - // See comments above for why we check for EDEADLK and write_locked. For the same reason, - // we also need to check that there are no readers (tracked in `num_readers`). - if r == libc::EDEADLK - || (r == 0 && unsafe { *lock.write_locked.get() }) - || lock.num_readers.load(Ordering::Relaxed) != 0 - { - // Above, we make sure to only access `write_locked` when `r == 0` to avoid - // data races. - if r == 0 { - // `pthread_rwlock_wrlock` succeeded when it should not have. - unsafe { - lock.raw_unlock(); - } - } - panic!("rwlock write lock would result in deadlock"); - } else { - // According to POSIX, for a properly initialized rwlock this can only - // return EDEADLK or 0. We rely on that. - debug_assert_eq!(r, 0); - } - - unsafe { - *lock.write_locked.get() = true; - } - } - - #[inline] - pub unsafe fn try_write(&self) -> bool { - let lock = &*self.inner; - let r = libc::pthread_rwlock_trywrlock(lock.inner.get()); - if r == 0 { - if *lock.write_locked.get() || lock.num_readers.load(Ordering::Relaxed) != 0 { - // `pthread_rwlock_trywrlock` succeeded when it should not have. - lock.raw_unlock(); - false - } else { - *lock.write_locked.get() = true; - true - } - } else { - false - } - } - - #[inline] - pub unsafe fn read_unlock(&self) { - let lock = &*self.inner; - debug_assert!(!*lock.write_locked.get()); - lock.num_readers.fetch_sub(1, Ordering::Relaxed); - lock.raw_unlock(); - } - - #[inline] - pub unsafe fn write_unlock(&self) { - let lock = &*self.inner; - debug_assert_eq!(lock.num_readers.load(Ordering::Relaxed), 0); - debug_assert!(*lock.write_locked.get()); - *lock.write_locked.get() = false; - lock.raw_unlock(); - } -} diff --git a/library/std/src/sys/pal/unix/locks/queue_rwlock.rs b/library/std/src/sys/pal/unix/locks/queue_rwlock.rs new file mode 100644 index 00000000000..0f02a98dfdd --- /dev/null +++ b/library/std/src/sys/pal/unix/locks/queue_rwlock.rs @@ -0,0 +1,557 @@ +//! Efficient read-write locking without `pthread_rwlock_t`. +//! +//! The readers-writer lock provided by the `pthread` library has a number of +//! problems which make it a suboptimal choice for `std`: +//! +//! * It is non-movable, so it needs to be allocated (lazily, to make the +//! constructor `const`). +//! * `pthread` is an external library, meaning the fast path of acquiring an +//! uncontended lock cannot be inlined. +//! * Some platforms (at least glibc before version 2.25) have buggy implementations +//! that can easily lead to undefined behaviour in safe Rust code when not properly +//! guarded against. +//! * On some platforms (e.g. macOS), the lock is very slow. +//! +//! Therefore, we implement our own `RwLock`! Naively, one might reach for a +//! spinlock, but those [can be quite problematic] when the lock is contended. +//! Instead, this readers-writer lock copies its implementation strategy from +//! the Windows [SRWLOCK] and the [usync] library. Spinning is still used for the +//! fast path, but it is bounded: after spinning fails, threads will locklessly +//! add an information structure containing a [`Thread`] handle into a queue of +//! waiters associated with the lock. The lock owner, upon releasing the lock, +//! will scan through the queue and wake up threads as appropriate, which will +//! then again try to acquire the lock. The resulting [`RwLock`] is: +//! +//! * adaptive, since it spins before doing any heavywheight parking operations +//! * allocation-free, modulo the per-thread [`Thread`] handle, which is +//! allocated regardless when using threads created by `std` +//! * writer-preferring, even if some readers may still slip through +//! * unfair, which reduces context-switching and thus drastically improves +//! performance +//! +//! and also quite fast in most cases. +//! +//! [can be quite problematic]: https://matklad.github.io/2020/01/02/spinlocks-considered-harmful.html +//! [SRWLOCK]: https://learn.microsoft.com/en-us/windows/win32/sync/slim-reader-writer--srw--locks +//! [usync]: https://crates.io/crates/usync +//! +//! # Implementation +//! +//! ## State +//! +//! A single [`AtomicPtr`] is used as state variable. The lowest three bits are used +//! to indicate the meaning of the remaining bits: +//! +//! | [`LOCKED`] | [`QUEUED`] | [`QUEUE_LOCKED`] | Remaining | | +//! |:-----------|:-----------|:-----------------|:-------------|:----------------------------------------------------------------------------------------------------------------------------| +//! | 0 | 0 | 0 | 0 | The lock is unlocked, no threads are waiting | +//! | 1 | 0 | 0 | 0 | The lock is write-locked, no threads waiting | +//! | 1 | 0 | 0 | n > 0 | The lock is read-locked with n readers | +//! | 0 | 1 | * | `*mut Node` | The lock is unlocked, but some threads are waiting. Only writers may lock the lock | +//! | 1 | 1 | * | `*mut Node` | The lock is locked, but some threads are waiting. If the lock is read-locked, the last queue node contains the reader count | +//! +//! ## Waiter queue +//! +//! When threads are waiting on the lock (`QUEUE` is set), the lock state +//! points to a queue of waiters, which is implemented as a linked list of +//! nodes stored on the stack to avoid memory allocation. To enable lockless +//! enqueuing of new nodes to the queue, the linked list is single-linked upon +//! creation. Since when the lock is read-locked, the lock count is stored in +//! the last link of the queue, threads have to traverse the queue to find the +//! last element upon releasing the lock. To avoid having to traverse the whole +//! list again and again, a pointer to the found tail is cached in the (current) +//! first element of the queue. +//! +//! Also, while the lock is unfair for performance reasons, it is still best to +//! wake the tail node first, which requires backlinks to previous nodes to be +//! created. This is done at the same time as finding the tail, and thus a set +//! tail field indicates the remaining portion of the queue is initialized. +//! +//! TLDR: Here's a diagram of what the queue looks like: +//! +//! ```text +//! state +//! │ +//! ▼ +//! ╭───────╮ next ╭───────╮ next ╭───────╮ next ╭───────╮ +//! │ ├─────►│ ├─────►│ ├─────►│ count │ +//! │ │ │ │ │ │ │ │ +//! │ │ │ │◄─────┤ │◄─────┤ │ +//! ╰───────╯ ╰───────╯ prev ╰───────╯ prev ╰───────╯ +//! │ ▲ +//! └───────────────────────────┘ +//! tail +//! ``` +//! +//! Invariants: +//! 1. At least one node must contain a non-null, current `tail` field. +//! 2. The first non-null `tail` field must be valid and current. +//! 3. All nodes preceding this node must have a correct, non-null `next` field. +//! 4. All nodes following this node must have a correct, non-null `prev` field. +//! +//! Access to the queue is controlled by the `QUEUE_LOCKED` bit, which threads +//! try to set both after enqueuing themselves to eagerly add backlinks to the +//! queue, which drastically improves performance, and after unlocking the lock +//! to wake the next waiter(s). This is done atomically at the same time as the +//! enqueuing/unlocking operation. The thread releasing the `QUEUE_LOCK` bit +//! will check the state of the lock and wake up waiters as appropriate. This +//! guarantees forward-progress even if the unlocking thread could not acquire +//! the queue lock. +//! +//! ## Memory orderings +//! +//! To properly synchronize changes to the data protected by the lock, the lock +//! is acquired and released with [`Acquire`] and [`Release`] ordering, respectively. +//! To propagate the initialization of nodes, changes to the queue lock are also +//! performed using these orderings. + +#![forbid(unsafe_op_in_unsafe_fn)] + +use crate::cell::OnceCell; +use crate::hint::spin_loop; +use crate::mem; +use crate::ptr::{self, invalid_mut, null_mut, NonNull}; +use crate::sync::atomic::{ + AtomicBool, AtomicPtr, + Ordering::{AcqRel, Acquire, Relaxed, Release}, +}; +use crate::sys_common::thread_info; +use crate::thread::Thread; + +// Locking uses exponential backoff. `SPIN_COUNT` indicates how many times the +// locking operation will be retried. +// `spin_loop` will be called `2.pow(SPIN_COUNT) - 1` times. +const SPIN_COUNT: usize = 7; + +type State = *mut (); +type AtomicState = AtomicPtr<()>; + +const UNLOCKED: State = invalid_mut(0); +const LOCKED: usize = 1; +const QUEUED: usize = 2; +const QUEUE_LOCKED: usize = 4; +const SINGLE: usize = 8; +const MASK: usize = !(QUEUE_LOCKED | QUEUED | LOCKED); + +/// Marks the state as write-locked, if possible. +#[inline] +fn write_lock(state: State) -> Option<State> { + let state = state.wrapping_byte_add(LOCKED); + if state.addr() & LOCKED == LOCKED { Some(state) } else { None } +} + +/// Marks the state as read-locked, if possible. +#[inline] +fn read_lock(state: State) -> Option<State> { + if state.addr() & QUEUED == 0 && state.addr() != LOCKED { + Some(invalid_mut(state.addr().checked_add(SINGLE)? | LOCKED)) + } else { + None + } +} + +/// Masks the state, assuming it points to a queue node. +/// +/// # Safety +/// The state must contain a valid pointer to a queue node. +#[inline] +unsafe fn to_node(state: State) -> NonNull<Node> { + unsafe { NonNull::new_unchecked(state.mask(MASK)).cast() } +} + +/// An atomic node pointer with relaxed operations. +struct AtomicLink(AtomicPtr<Node>); + +impl AtomicLink { + fn new(v: Option<NonNull<Node>>) -> AtomicLink { + AtomicLink(AtomicPtr::new(v.map_or(null_mut(), NonNull::as_ptr))) + } + + fn get(&self) -> Option<NonNull<Node>> { + NonNull::new(self.0.load(Relaxed)) + } + + fn set(&self, v: Option<NonNull<Node>>) { + self.0.store(v.map_or(null_mut(), NonNull::as_ptr), Relaxed); + } +} + +#[repr(align(8))] +struct Node { + next: AtomicLink, + prev: AtomicLink, + tail: AtomicLink, + write: bool, + thread: OnceCell<Thread>, + completed: AtomicBool, +} + +impl Node { + /// Create a new queue node. + fn new(write: bool) -> Node { + Node { + next: AtomicLink::new(None), + prev: AtomicLink::new(None), + tail: AtomicLink::new(None), + write, + thread: OnceCell::new(), + completed: AtomicBool::new(false), + } + } + + /// Prepare this node for waiting. + fn prepare(&mut self) { + // Fall back to creating an unnamed `Thread` handle to allow locking in + // TLS destructors. + self.thread + .get_or_init(|| thread_info::current_thread().unwrap_or_else(|| Thread::new(None))); + self.completed = AtomicBool::new(false); + } + + /// Wait until this node is marked as completed. + /// + /// # Safety + /// May only be called from the thread that created the node. + unsafe fn wait(&self) { + while !self.completed.load(Acquire) { + unsafe { + self.thread.get().unwrap().park(); + } + } + } + + /// Atomically mark this node as completed. The node may not outlive this call. + unsafe fn complete(this: NonNull<Node>) { + // Since the node may be destroyed immediately after the completed flag + // is set, clone the thread handle before that. + let thread = unsafe { this.as_ref().thread.get().unwrap().clone() }; + unsafe { + this.as_ref().completed.store(true, Release); + } + thread.unpark(); + } +} + +struct PanicGuard; + +impl Drop for PanicGuard { + fn drop(&mut self) { + rtabort!("tried to drop node in intrusive list."); + } +} + +/// Add backlinks to the queue, returning the tail. +/// +/// May be called from multiple threads at the same time, while the queue is not +/// modified (this happens when unlocking multiple readers). +/// +/// # Safety +/// * `head` must point to a node in a valid queue. +/// * `head` must be or be in front of the head of the queue at the time of the +/// last removal. +/// * The part of the queue starting with `head` must not be modified during this +/// call. +unsafe fn add_backlinks_and_find_tail(head: NonNull<Node>) -> NonNull<Node> { + let mut current = head; + let tail = loop { + let c = unsafe { current.as_ref() }; + match c.tail.get() { + Some(tail) => break tail, + // SAFETY: + // All `next` fields before the first node with a `set` tail are + // non-null and valid (invariant 3). + None => unsafe { + let next = c.next.get().unwrap_unchecked(); + next.as_ref().prev.set(Some(current)); + current = next; + }, + } + }; + + unsafe { + head.as_ref().tail.set(Some(tail)); + tail + } +} + +pub struct RwLock { + state: AtomicState, +} + +impl RwLock { + #[inline] + pub const fn new() -> RwLock { + RwLock { state: AtomicPtr::new(UNLOCKED) } + } + + #[inline] + pub fn try_read(&self) -> bool { + self.state.fetch_update(Acquire, Relaxed, read_lock).is_ok() + } + + #[inline] + pub fn read(&self) { + if !self.try_read() { + self.lock_contended(false) + } + } + + #[inline] + pub fn try_write(&self) -> bool { + // Atomically set the `LOCKED` bit. This is lowered to a single atomic + // instruction on most modern processors (e.g. "lock bts" on x86 and + // "ldseta" on modern AArch64), and therefore is more efficient than + // `fetch_update(lock(true))`, which can spuriously fail if a new node + // is appended to the queue. + self.state.fetch_or(LOCKED, Acquire).addr() & LOCKED == 0 + } + + #[inline] + pub fn write(&self) { + if !self.try_write() { + self.lock_contended(true) + } + } + + #[cold] + fn lock_contended(&self, write: bool) { + let update = if write { write_lock } else { read_lock }; + let mut node = Node::new(write); + let mut state = self.state.load(Relaxed); + let mut count = 0; + loop { + if let Some(next) = update(state) { + // The lock is available, try locking it. + match self.state.compare_exchange_weak(state, next, Acquire, Relaxed) { + Ok(_) => return, + Err(new) => state = new, + } + } else if state.addr() & QUEUED == 0 && count < SPIN_COUNT { + // If the lock is not available and no threads are queued, spin + // for a while, using exponential backoff to decrease cache + // contention. + for _ in 0..(1 << count) { + spin_loop(); + } + state = self.state.load(Relaxed); + count += 1; + } else { + // Fall back to parking. First, prepare the node. + node.prepare(); + + // If there are threads queued, set the `next` field to a + // pointer to the next node in the queue. Otherwise set it to + // the lock count if the state is read-locked or to zero if it + // is write-locked. + node.next.0 = AtomicPtr::new(state.mask(MASK).cast()); + node.prev = AtomicLink::new(None); + let mut next = ptr::from_ref(&node) + .map_addr(|addr| addr | QUEUED | (state.addr() & LOCKED)) + as State; + + if state.addr() & QUEUED == 0 { + // If this is the first node in the queue, set the tail field to + // the node itself to ensure there is a current `tail` field in + // the queue (invariants 1 and 2). This needs to use `set` to + // avoid invalidating the new pointer. + node.tail.set(Some(NonNull::from(&node))); + } else { + // Otherwise, the tail of the queue is not known. + node.tail.set(None); + // Try locking the queue to eagerly add backlinks. + next = next.map_addr(|addr| addr | QUEUE_LOCKED); + } + + // Register the node, using release ordering to propagate our + // changes to the waking thread. + if let Err(new) = self.state.compare_exchange_weak(state, next, AcqRel, Relaxed) { + // The state has changed, just try again. + state = new; + continue; + } + + // The node is registered, so the structure must not be + // mutably accessed or destroyed while other threads may + // be accessing it. Guard against unwinds using a panic + // guard that aborts when dropped. + let guard = PanicGuard; + + // If the current thread locked the queue, unlock it again, + // linking it in the process. + if state.addr() & (QUEUE_LOCKED | QUEUED) == QUEUED { + unsafe { + self.unlock_queue(next); + } + } + + // Wait until the node is removed from the queue. + // SAFETY: the node was created by the current thread. + unsafe { + node.wait(); + } + + // The node was removed from the queue, disarm the guard. + mem::forget(guard); + + // Reload the state and try again. + state = self.state.load(Relaxed); + count = 0; + } + } + } + + #[inline] + pub unsafe fn read_unlock(&self) { + match self.state.fetch_update(Release, Acquire, |state| { + if state.addr() & QUEUED == 0 { + let count = state.addr() - (SINGLE | LOCKED); + Some(if count > 0 { invalid_mut(count | LOCKED) } else { UNLOCKED }) + } else { + None + } + }) { + Ok(_) => {} + // There are waiters queued and the lock count was moved to the + // tail of the queue. + Err(state) => unsafe { self.read_unlock_contended(state) }, + } + } + + #[cold] + unsafe fn read_unlock_contended(&self, state: State) { + // The state was observed with acquire ordering above, so the current + // thread will observe all node initializations. + + // SAFETY: + // Because new read-locks cannot be acquired while threads are queued, + // all queue-lock owners will observe the set `LOCKED` bit. Because they + // do not modify the queue while there is a lock owner, the queue will + // not be removed from here. + let tail = unsafe { add_backlinks_and_find_tail(to_node(state)).as_ref() }; + // The lock count is stored in the `next` field of `tail`. + // Decrement it, making sure to observe all changes made to the queue + // by the other lock owners by using acquire-release ordering. + let was_last = tail.next.0.fetch_byte_sub(SINGLE, AcqRel).addr() - SINGLE == 0; + if was_last { + // SAFETY: + // Other threads cannot read-lock while threads are queued. Also, + // the `LOCKED` bit is still set, so there are no writers. Therefore, + // the current thread exclusively owns the lock. + unsafe { self.unlock_contended(state) } + } + } + + #[inline] + pub unsafe fn write_unlock(&self) { + if let Err(state) = + self.state.compare_exchange(invalid_mut(LOCKED), UNLOCKED, Release, Relaxed) + { + // SAFETY: + // Since other threads cannot acquire the lock, the state can only + // have changed because there are threads queued on the lock. + unsafe { self.unlock_contended(state) } + } + } + + /// # Safety + /// * The lock must be exclusively owned by this thread. + /// * There must be threads queued on the lock. + #[cold] + unsafe fn unlock_contended(&self, mut state: State) { + loop { + // Atomically release the lock and try to acquire the queue lock. + let next = state.map_addr(|a| (a & !LOCKED) | QUEUE_LOCKED); + match self.state.compare_exchange_weak(state, next, AcqRel, Relaxed) { + // The queue lock was acquired. Release it, waking up the next + // waiter in the process. + Ok(_) if state.addr() & QUEUE_LOCKED == 0 => unsafe { + return self.unlock_queue(next); + }, + // Another thread already holds the queue lock, leave waking up + // waiters to it. + Ok(_) => return, + Err(new) => state = new, + } + } + } + + /// Unlocks the queue. If the lock is unlocked, wakes up the next eligible + /// thread(s). + /// + /// # Safety + /// The queue lock must be held by the current thread. + unsafe fn unlock_queue(&self, mut state: State) { + debug_assert_eq!(state.addr() & (QUEUED | QUEUE_LOCKED), QUEUED | QUEUE_LOCKED); + + loop { + let tail = unsafe { add_backlinks_and_find_tail(to_node(state)) }; + + if state.addr() & LOCKED == LOCKED { + // Another thread has locked the lock. Leave waking up waiters + // to them by releasing the queue lock. + match self.state.compare_exchange_weak( + state, + state.mask(!QUEUE_LOCKED), + Release, + Acquire, + ) { + Ok(_) => return, + Err(new) => { + state = new; + continue; + } + } + } + + let is_writer = unsafe { tail.as_ref().write }; + if is_writer && let Some(prev) = unsafe { tail.as_ref().prev.get() } { + // `tail` is a writer and there is a node before `tail`. + // Split off `tail`. + + // There are no set `tail` links before the node pointed to by + // `state`, so the first non-null tail field will be current + // (invariant 2). Invariant 4 is fullfilled since `find_tail` + // was called on this node, which ensures all backlinks are set. + unsafe { + to_node(state).as_ref().tail.set(Some(prev)); + } + + // Release the queue lock. Doing this by subtraction is more + // efficient on modern processors since it is a single instruction + // instead of an update loop, which will fail if new threads are + // added to the list. + self.state.fetch_byte_sub(QUEUE_LOCKED, Release); + + // The tail was split off and the lock released. Mark the node as + // completed. + unsafe { + return Node::complete(tail); + } + } else { + // The next waiter is a reader or the queue only consists of one + // waiter. Just wake all threads. + + // The lock cannot be locked (checked above), so mark it as + // unlocked to reset the queue. + if let Err(new) = + self.state.compare_exchange_weak(state, UNLOCKED, Release, Acquire) + { + state = new; + continue; + } + + let mut current = tail; + loop { + let prev = unsafe { current.as_ref().prev.get() }; + unsafe { + Node::complete(current); + } + match prev { + Some(prev) => current = prev, + None => return, + } + } + } + } + } +} diff --git a/library/std/src/sys/pal/unix/rand.rs b/library/std/src/sys/pal/unix/rand.rs index 1dba1ccf64e..5c32957bc51 100644 --- a/library/std/src/sys/pal/unix/rand.rs +++ b/library/std/src/sys/pal/unix/rand.rs @@ -62,7 +62,7 @@ mod imp { unsafe { getrandom(buf.as_mut_ptr().cast(), buf.len(), libc::GRND_NONBLOCK) } } - #[cfg(any(target_os = "espidf", target_os = "horizon", target_os = "freebsd"))] + #[cfg(any(target_os = "espidf", target_os = "horizon", target_os = "freebsd", netbsd10))] fn getrandom(buf: &mut [u8]) -> libc::ssize_t { unsafe { libc::getrandom(buf.as_mut_ptr().cast(), buf.len(), 0) } } @@ -72,7 +72,8 @@ mod imp { target_os = "android", target_os = "espidf", target_os = "horizon", - target_os = "freebsd" + target_os = "freebsd", + netbsd10 )))] fn getrandom_fill_bytes(_buf: &mut [u8]) -> bool { false @@ -83,7 +84,8 @@ mod imp { target_os = "android", target_os = "espidf", target_os = "horizon", - target_os = "freebsd" + target_os = "freebsd", + netbsd10 ))] fn getrandom_fill_bytes(v: &mut [u8]) -> bool { use crate::sync::atomic::{AtomicBool, Ordering}; @@ -230,7 +232,7 @@ mod imp { } // FIXME: once the 10.x release becomes the minimum, this can be dropped for simplification. -#[cfg(target_os = "netbsd")] +#[cfg(all(target_os = "netbsd", not(netbsd10)))] mod imp { use crate::ptr; diff --git a/library/std/src/sys/pal/unix/thread.rs b/library/std/src/sys/pal/unix/thread.rs index 7e4a01a5ecd..ba8d31c23a5 100644 --- a/library/std/src/sys/pal/unix/thread.rs +++ b/library/std/src/sys/pal/unix/thread.rs @@ -847,11 +847,31 @@ pub mod guard { let stackptr = get_stack_start_aligned()?; let guardaddr = stackptr.addr(); // Technically the number of guard pages is tunable and controlled - // by the security.bsd.stack_guard_page sysctl, but there are - // few reasons to change it from the default. The default value has - // been 1 ever since FreeBSD 11.1 and 10.4. - const GUARD_PAGES: usize = 1; - let guard = guardaddr..guardaddr + GUARD_PAGES * page_size; + // by the security.bsd.stack_guard_page sysctl. + // By default it is 1, checking once is enough since it is + // a boot time config value. + static LOCK: crate::sync::OnceLock<usize> = crate::sync::OnceLock::new(); + let guard = guardaddr + ..guardaddr + + *LOCK.get_or_init(|| { + use crate::sys::weak::dlsym; + dlsym!(fn sysctlbyname(*const libc::c_char, *mut libc::c_void, *mut libc::size_t, *const libc::c_void, libc::size_t) -> libc::c_int); + let mut guard: usize = 0; + let mut size = crate::mem::size_of_val(&guard); + let oid = crate::ffi::CStr::from_bytes_with_nul( + b"security.bsd.stack_guard_page\0", + ) + .unwrap(); + match sysctlbyname.get() { + Some(fcn) => { + if fcn(oid.as_ptr(), &mut guard as *mut _ as *mut _, &mut size as *mut _ as *mut _, crate::ptr::null_mut(), 0) == 0 { + return guard; + } + return 1; + }, + _ => { return 1; } + } + }) * page_size; Some(guard) } else if cfg!(target_os = "openbsd") { // OpenBSD stack already includes a guard page, and stack is diff --git a/library/std/src/sys/pal/windows/c/README.md b/library/std/src/sys/pal/windows/c/README.md new file mode 100644 index 00000000000..d458e55efbc --- /dev/null +++ b/library/std/src/sys/pal/windows/c/README.md @@ -0,0 +1,9 @@ +The `windows_sys.rs` file is autogenerated from `bindings.txt` and must not +be edited manually. + +To add bindings, edit `bindings.txt` then regenerate using the following command: + + ./x run generate-windows-sys && ./x fmt library/std + +If you need to override generated functions or types then add them to +`library/std/src/sys/pal/windows/c.rs`. diff --git a/library/std/src/sys/pal/windows/c/windows_sys.lst b/library/std/src/sys/pal/windows/c/bindings.txt index f91e1054a04..726f1c3df82 100644 --- a/library/std/src/sys/pal/windows/c/windows_sys.lst +++ b/library/std/src/sys/pal/windows/c/bindings.txt @@ -1,7 +1,6 @@ --out windows_sys.rs --config flatten std --filter -// tidy-alphabetical-start !Windows.Win32.Foundation.INVALID_HANDLE_VALUE Windows.Wdk.Storage.FileSystem.FILE_COMPLETE_IF_OPLOCKED Windows.Wdk.Storage.FileSystem.FILE_CONTAINS_EXTENDED_CREATE_INFORMATION @@ -2592,5 +2591,3 @@ Windows.Win32.System.Threading.WakeAllConditionVariable Windows.Win32.System.Threading.WakeConditionVariable Windows.Win32.System.WindowsProgramming.PROGRESS_CONTINUE Windows.Win32.UI.Shell.GetUserProfileDirectoryW -// tidy-alphabetical-end - diff --git a/library/std/src/sys/pal/windows/c/windows_sys.rs b/library/std/src/sys/pal/windows/c/windows_sys.rs index b38b70c8983..c386b66a722 100644 --- a/library/std/src/sys/pal/windows/c/windows_sys.rs +++ b/library/std/src/sys/pal/windows/c/windows_sys.rs @@ -1,9 +1,3 @@ -// This file is autogenerated. -// -// To add bindings, edit windows_sys.lst then use `./x run generate-windows-sys` to -// regenerate the bindings. -// -// ignore-tidy-filelength // Bindings generated by `windows-bindgen` 0.52.0 #![allow(non_snake_case, non_upper_case_globals, non_camel_case_types, dead_code, clippy::all)] @@ -4351,3 +4345,4 @@ impl ::core::clone::Clone for XSAVE_FORMAT { *self } } +// ignore-tidy-filelength |
