Rollup merge of #121596 - ChrisDenton:tls, r=joboet

Use volatile access instead of `#[used]` for `on_tls_callback` The first commit adds a volatile load of `p_thread_callback` when registering a dtor so that the compiler knows if the callback is used or not. I don't believe the added volatile instruction is otherwise significant in the context. In my testing using the volatile load allowed the compiler to correctly reason about whether `on_tls_callback` is used or not, allowing it to be omitted entirely in some cases. Admittedly it usually is used due to `Thread` but that can be avoided (e.g. in DLLs or with custom entry points that avoid the offending APIs). Ideally this would be something the compiler could help a bit more with so we didn't have to use tricks like `#[used]` or volatile. But alas. I also used this as an opportunity to clean up the `unused` lints which I don't think serve a purpose any more. The second commit removes the volatile load of `_tls_used` with `#cfg[target_thread_local]` because `#[thread_local]` already implies it. And if it ever didn't then `#[thread_local]` would be broken when there aren't any dtors.
author: Guillaume Gomez <guillaume1.gomez@gmail.com> 2024-02-29 17:08:37 +0100
committer: GitHub <noreply@github.com> 2024-02-29 17:08:37 +0100
commit: eea8ceed54033b7967fe9e7d4c26d2a0942ef722 (patch)
tree: 084254649e2fda2d54d52c5dbc1ab7a3fc90d708 /library/std/src/sys
parent: 2e0a26a32a51faba018d70310869e73b2743c959 (diff)
parent: ad4c4f4654bf78bb51638e58a9c418afcdd6ee15 (diff)
download: rust-eea8ceed54033b7967fe9e7d4c26d2a0942ef722.tar.gz
rust-eea8ceed54033b7967fe9e7d4c26d2a0942ef722.zip
1 files changed, 13 insertions, 22 deletions
diff --git a/library/std/src/sys/pal/windows/thread_local_key.rs b/library/std/src/sys/pal/windows/thread_local_key.rs
index 5eee4a9667b..1d9cb316a45 100644
--- a/library/std/src/sys/pal/windows/thread_local_key.rs
+++ b/library/std/src/sys/pal/windows/thread_local_key.rs
@@ -1,7 +1,7 @@
 use crate::cell::UnsafeCell;
 use crate::ptr;
 use crate::sync::atomic::{
-    AtomicBool, AtomicPtr, AtomicU32,
+    AtomicPtr, AtomicU32,
     Ordering::{AcqRel, Acquire, Relaxed, Release},
 };
 use crate::sys::c;
@@ -9,10 +9,6 @@ use crate::sys::c;
 #[cfg(test)]
 mod tests;
 
-/// An optimization hint. The compiler is often smart enough to know if an atomic
-/// is never set and can remove dead code based on that fact.
-static HAS_DTORS: AtomicBool = AtomicBool::new(false);
-
 // Using a per-thread list avoids the problems in synchronizing global state.
 #[thread_local]
 #[cfg(target_thread_local)]
@@ -24,12 +20,11 @@ static DESTRUCTORS: crate::cell::RefCell<Vec<(*mut u8, unsafe extern "C" fn(*mut
 #[inline(never)]
 #[cfg(target_thread_local)]
 pub unsafe fn register_keyless_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
+    dtors_used();
     match DESTRUCTORS.try_borrow_mut() {
         Ok(mut dtors) => dtors.push((t, dtor)),
         Err(_) => rtabort!("global allocator may not use TLS"),
     }
-
-    HAS_DTORS.store(true, Relaxed);
 }
 
 #[inline(never)] // See comment above
@@ -130,6 +125,7 @@ impl StaticKey {
     #[cold]
     unsafe fn init(&'static self) -> Key {
         if self.dtor.is_some() {
+            dtors_used();
             let mut pending = c::FALSE;
             let r = c::InitOnceBeginInitialize(self.once.get(), 0, &mut pending, ptr::null_mut());
             assert_eq!(r, c::TRUE);
@@ -215,7 +211,6 @@ unsafe fn register_dtor(key: &'static StaticKey) {
             Err(new) => head = new,
         }
     }
-    HAS_DTORS.store(true, Release);
 }
 
 // -------------------------------------------------------------------------
@@ -281,17 +276,16 @@ unsafe fn register_dtor(key: &'static StaticKey) {
 // the address of the symbol to ensure it sticks around.
 
 #[link_section = ".CRT$XLB"]
-#[allow(dead_code, unused_variables)]
-#[used] // we don't want LLVM eliminating this symbol for any reason, and
-// when the symbol makes it to the linker the linker will take over
 pub static p_thread_callback: unsafe extern "system" fn(c::LPVOID, c::DWORD, c::LPVOID) =
     on_tls_callback;
 
-#[allow(dead_code, unused_variables)]
-unsafe extern "system" fn on_tls_callback(h: c::LPVOID, dwReason: c::DWORD, pv: c::LPVOID) {
-    if !HAS_DTORS.load(Acquire) {
-        return;
-    }
+fn dtors_used() {
+    // we don't want LLVM eliminating p_thread_callback when destructors are used.
+    // when the symbol makes it to the linker the linker will take over
+    unsafe { crate::intrinsics::volatile_load(&p_thread_callback) };
+}
+
+unsafe extern "system" fn on_tls_callback(_h: c::LPVOID, dwReason: c::DWORD, _pv: c::LPVOID) {
     if dwReason == c::DLL_THREAD_DETACH || dwReason == c::DLL_PROCESS_DETACH {
         #[cfg(not(target_thread_local))]
         run_dtors();
@@ -301,19 +295,16 @@ unsafe extern "system" fn on_tls_callback(h: c::LPVOID, dwReason: c::DWORD, pv:
 
     // See comments above for what this is doing. Note that we don't need this
     // trickery on GNU windows, just on MSVC.
-    reference_tls_used();
-    #[cfg(target_env = "msvc")]
-    unsafe fn reference_tls_used() {
+    #[cfg(all(target_env = "msvc", not(target_thread_local)))]
+    {
         extern "C" {
             static _tls_used: u8;
         }
         crate::intrinsics::volatile_load(&_tls_used);
     }
-    #[cfg(not(target_env = "msvc"))]
-    unsafe fn reference_tls_used() {}
 }
 
-#[allow(dead_code)] // actually called below
+#[cfg(not(target_thread_local))]
 unsafe fn run_dtors() {
     for _ in 0..5 {
         let mut any_run = false;
author	Guillaume Gomez <guillaume1.gomez@gmail.com>	2024-02-29 17:08:37 +0100
committer	GitHub <noreply@github.com>	2024-02-29 17:08:37 +0100
commit	eea8ceed54033b7967fe9e7d4c26d2a0942ef722 (patch)
tree	084254649e2fda2d54d52c5dbc1ab7a3fc90d708 /library/std/src/sys
parent	2e0a26a32a51faba018d70310869e73b2743c959 (diff)
parent	ad4c4f4654bf78bb51638e58a9c418afcdd6ee15 (diff)
download	rust-eea8ceed54033b7967fe9e7d4c26d2a0942ef722.tar.gz rust-eea8ceed54033b7967fe9e7d4c26d2a0942ef722.zip