about summary refs log tree commit diff
path: root/library/std/src
diff options
context:
space:
mode:
authorJakub Beránek <berykubik@gmail.com>2025-07-14 11:04:55 +0200
committerGitHub <noreply@github.com>2025-07-14 11:04:55 +0200
commit70301da7c7ea0678b72cd25ed869b023547e15cb (patch)
tree687facca58027662daa931aaf3caf0966d1c82a6 /library/std/src
parenta7ad680269cc250d87f584ce84c40f7f90dc7159 (diff)
parentf041962694875c0f0a33f8a5e27fd00597042169 (diff)
downloadrust-70301da7c7ea0678b72cd25ed869b023547e15cb.tar.gz
rust-70301da7c7ea0678b72cd25ed869b023547e15cb.zip
Rollup merge of #143881 - orlp:once-state-repr, r=tgross35
Use zero for initialized Once state

By re-labeling which integer represents which internal state for `Once` we can ensure that the initialized state is the all-zero state. This is beneficial because some CPU architectures (such as Arm) have specialized instructions to specifically branch on non-zero, and checking for the initialized state is by far the most important operation.

As an example, take this:

```rust
use std::sync::atomic::{AtomicU32, Ordering};

const INIT: u32 = 3;

#[inline(never)]
#[cold]
pub fn slow(state: &AtomicU32) {
    state.store(INIT, Ordering::Release);
}

pub fn ensure_init(state: &AtomicU32) {
    if state.load(Ordering::Acquire) != INIT {
        slow(state)
    }
}
```

If `INIT` is 3 (as is currently the state for `Once`), we see the following assembly on `aarch64-apple-darwin`:

```asm
example::ensure_init::h332061368366e313:
        ldapr   w8, [x0]
        cmp     w8, #3
        b.ne    LBB1_2
        ret
LBB1_2:
        b       example::slow::ha042bd6a4f33724e
```

By changing the `INIT` state to zero we get the following:

```asm
example::ensure_init::h332061368366e313:
        ldapr   w8, [x0]
        cbnz    w8, LBB1_2
        ret
LBB1_2:
        b       example::slow::ha042bd6a4f33724e
```

So this PR saves 1 instruction every time a `LazyLock` gets accessed on platforms such as these.
Diffstat (limited to 'library/std/src')
-rw-r--r--library/std/src/sys/sync/once/futex.rs10
-rw-r--r--library/std/src/sys/sync/once/queue.rs11
2 files changed, 12 insertions, 9 deletions
diff --git a/library/std/src/sys/sync/once/futex.rs b/library/std/src/sys/sync/once/futex.rs
index 539f0fe89ea..407fdcebcf5 100644
--- a/library/std/src/sys/sync/once/futex.rs
+++ b/library/std/src/sys/sync/once/futex.rs
@@ -8,16 +8,18 @@ use crate::sys::futex::{Futex, Primitive, futex_wait, futex_wake_all};
 // This means we only need one atomic value with 4 states:
 
 /// No initialization has run yet, and no thread is currently using the Once.
-const INCOMPLETE: Primitive = 0;
+const INCOMPLETE: Primitive = 3;
 /// Some thread has previously attempted to initialize the Once, but it panicked,
 /// so the Once is now poisoned. There are no other threads currently accessing
 /// this Once.
-const POISONED: Primitive = 1;
+const POISONED: Primitive = 2;
 /// Some thread is currently attempting to run initialization. It may succeed,
 /// so all future threads need to wait for it to finish.
-const RUNNING: Primitive = 2;
+const RUNNING: Primitive = 1;
 /// Initialization has completed and all future calls should finish immediately.
-const COMPLETE: Primitive = 3;
+/// By choosing this state as the all-zero state the `is_completed` check can be
+/// a bit faster on some platforms.
+const COMPLETE: Primitive = 0;
 
 // An additional bit indicates whether there are waiting threads:
 
diff --git a/library/std/src/sys/sync/once/queue.rs b/library/std/src/sys/sync/once/queue.rs
index 6a2ab0dcf1b..49e15d65f25 100644
--- a/library/std/src/sys/sync/once/queue.rs
+++ b/library/std/src/sys/sync/once/queue.rs
@@ -74,11 +74,12 @@ pub struct OnceState {
 }
 
 // Four states that a Once can be in, encoded into the lower bits of
-// `state_and_queue` in the Once structure.
-const INCOMPLETE: usize = 0x0;
-const POISONED: usize = 0x1;
-const RUNNING: usize = 0x2;
-const COMPLETE: usize = 0x3;
+// `state_and_queue` in the Once structure. By choosing COMPLETE as the all-zero
+// state the `is_completed` check can be a bit faster on some platforms.
+const INCOMPLETE: usize = 0x3;
+const POISONED: usize = 0x2;
+const RUNNING: usize = 0x1;
+const COMPLETE: usize = 0x0;
 
 // Mask to learn about the state. All other bits are the queue of waiters if
 // this is in the RUNNING state.