about summary refs log tree commit diff
diff options
context:
space:
mode:
authorjoboet <jonasboettiger@icloud.com>2025-08-30 00:21:38 +0200
committerjoboet <jonasboettiger@icloud.com>2025-09-03 17:58:45 +0200
commit82f5cdf33e15bf08548ec8b9f13bb4cad8ac5ae4 (patch)
treebf8530880b733f8e053e126b27b9574ecb703b07
parentfe55364329579d361b1ab565728bc033a7dba07e (diff)
downloadrust-82f5cdf33e15bf08548ec8b9f13bb4cad8ac5ae4.tar.gz
rust-82f5cdf33e15bf08548ec8b9f13bb4cad8ac5ae4.zip
std: improve the `dlsym!` macro and add a test for it
* Ensure nul-termination of the symbol name at compile-time
* Use an acquire load instead of a relaxed load and acquire fence
* Properly use `unsafe` and add safety comments
* Add tests
-rw-r--r--library/std/src/lib.rs1
-rw-r--r--library/std/src/sys/pal/unix/weak.rs120
-rw-r--r--library/std/src/sys/pal/unix/weak/tests.rs32
3 files changed, 102 insertions, 51 deletions
diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs
index 30a1b108817..69f640a6855 100644
--- a/library/std/src/lib.rs
+++ b/library/std/src/lib.rs
@@ -350,6 +350,7 @@
 #![feature(float_gamma)]
 #![feature(float_minimum_maximum)]
 #![feature(fmt_internals)]
+#![feature(fn_ptr_trait)]
 #![feature(generic_atomic)]
 #![feature(hasher_prefixfree_extras)]
 #![feature(hashmap_internals)]
diff --git a/library/std/src/sys/pal/unix/weak.rs b/library/std/src/sys/pal/unix/weak.rs
index c8cf75b876c..ad649fdbab6 100644
--- a/library/std/src/sys/pal/unix/weak.rs
+++ b/library/std/src/sys/pal/unix/weak.rs
@@ -22,11 +22,14 @@
 #![allow(dead_code, unused_macros)]
 #![forbid(unsafe_op_in_unsafe_fn)]
 
-use crate::ffi::CStr;
-use crate::marker::PhantomData;
-use crate::sync::atomic::{self, Atomic, AtomicPtr, Ordering};
+use crate::ffi::{CStr, c_char, c_void};
+use crate::marker::{FnPtr, PhantomData};
+use crate::sync::atomic::{Atomic, AtomicPtr, Ordering};
 use crate::{mem, ptr};
 
+#[cfg(test)]
+mod tests;
+
 // We can use true weak linkage on ELF targets.
 #[cfg(all(unix, not(target_vendor = "apple")))]
 pub(crate) macro weak {
@@ -64,7 +67,7 @@ impl<F: Copy> ExternWeak<F> {
 
 pub(crate) macro dlsym {
     (fn $name:ident($($param:ident : $t:ty),* $(,)?) -> $ret:ty;) => (
-         dlsym!(
+        dlsym!(
             #[link_name = stringify!($name)]
             fn $name($($param : $t),*) -> $ret;
         );
@@ -73,21 +76,39 @@ pub(crate) macro dlsym {
         #[link_name = $sym:expr]
         fn $name:ident($($param:ident : $t:ty),* $(,)?) -> $ret:ty;
     ) => (
-        static DLSYM: DlsymWeak<unsafe extern "C" fn($($t),*) -> $ret> =
-            DlsymWeak::new(concat!($sym, '\0'));
+        static DLSYM: DlsymWeak<unsafe extern "C" fn($($t),*) -> $ret> = {
+            let Ok(name) = CStr::from_bytes_with_nul(concat!($sym, '\0').as_bytes()) else {
+                panic!("symbol name may not contain NUL")
+            };
+
+            // SAFETY: Whoever calls the function pointer returned by `get()`
+            // is responsible for ensuring that the signature is correct. Just
+            // like with extern blocks, this is syntactically enforced by making
+            // the function pointer be unsafe.
+            unsafe { DlsymWeak::new(name) }
+        };
+
         let $name = &DLSYM;
     )
 }
+
 pub(crate) struct DlsymWeak<F> {
-    name: &'static str,
+    /// A pointer to the nul-terminated name of the symbol.
+    // Use a pointer instead of `&'static CStr` to save space.
+    name: *const c_char,
     func: Atomic<*mut libc::c_void>,
     _marker: PhantomData<F>,
 }
 
-impl<F> DlsymWeak<F> {
-    pub(crate) const fn new(name: &'static str) -> Self {
+impl<F: FnPtr> DlsymWeak<F> {
+    /// # Safety
+    ///
+    /// If the signature of `F` does not match the signature of the symbol (if
+    /// it exists), calling the function pointer returned by `get()` is
+    /// undefined behaviour.
+    pub(crate) const unsafe fn new(name: &'static CStr) -> Self {
         DlsymWeak {
-            name,
+            name: name.as_ptr(),
             func: AtomicPtr::new(ptr::without_provenance_mut(1)),
             _marker: PhantomData,
         }
@@ -95,62 +116,59 @@ impl<F> DlsymWeak<F> {
 
     #[inline]
     pub(crate) fn get(&self) -> Option<F> {
-        unsafe {
-            // Relaxed is fine here because we fence before reading through the
-            // pointer (see the comment below).
-            match self.func.load(Ordering::Relaxed) {
-                func if func.addr() == 1 => self.initialize(),
-                func if func.is_null() => None,
-                func => {
-                    let func = mem::transmute_copy::<*mut libc::c_void, F>(&func);
-                    // The caller is presumably going to read through this value
-                    // (by calling the function we've dlsymed). This means we'd
-                    // need to have loaded it with at least C11's consume
-                    // ordering in order to be guaranteed that the data we read
-                    // from the pointer isn't from before the pointer was
-                    // stored. Rust has no equivalent to memory_order_consume,
-                    // so we use an acquire fence (sorry, ARM).
-                    //
-                    // Now, in practice this likely isn't needed even on CPUs
-                    // where relaxed and consume mean different things. The
-                    // symbols we're loading are probably present (or not) at
-                    // init, and even if they aren't the runtime dynamic loader
-                    // is extremely likely have sufficient barriers internally
-                    // (possibly implicitly, for example the ones provided by
-                    // invoking `mprotect`).
-                    //
-                    // That said, none of that's *guaranteed*, and so we fence.
-                    atomic::fence(Ordering::Acquire);
-                    Some(func)
-                }
-            }
+        // The caller is presumably going to read through this value
+        // (by calling the function we've dlsymed). This means we'd
+        // need to have loaded it with at least C11's consume
+        // ordering in order to be guaranteed that the data we read
+        // from the pointer isn't from before the pointer was
+        // stored. Rust has no equivalent to memory_order_consume,
+        // so we use an acquire load (sorry, ARM).
+        //
+        // Now, in practice this likely isn't needed even on CPUs
+        // where relaxed and consume mean different things. The
+        // symbols we're loading are probably present (or not) at
+        // init, and even if they aren't the runtime dynamic loader
+        // is extremely likely have sufficient barriers internally
+        // (possibly implicitly, for example the ones provided by
+        // invoking `mprotect`).
+        //
+        // That said, none of that's *guaranteed*, so we use acquire.
+        match self.func.load(Ordering::Acquire) {
+            func if func.addr() == 1 => self.initialize(),
+            func if func.is_null() => None,
+            // SAFETY:
+            // `func` is not null and `F` implements `FnPtr`, thus this
+            // transmutation is well-defined. It is the responsibility of the
+            // creator of this `DlsymWeak` to ensure that calling the resulting
+            // function pointer does not result in undefined behaviour (though
+            // the `dlsym!` macro delegates this responsibility to the caller
+            // of the function by using `unsafe` function pointers).
+            // FIXME: use `transmute` once it stops complaining about generics.
+            func => Some(unsafe { mem::transmute_copy::<*mut c_void, F>(&func) }),
         }
     }
 
     // Cold because it should only happen during first-time initialization.
     #[cold]
-    unsafe fn initialize(&self) -> Option<F> {
-        assert_eq!(size_of::<F>(), size_of::<*mut libc::c_void>());
-
-        let val = unsafe { fetch(self.name) };
-        // This synchronizes with the acquire fence in `get`.
+    fn initialize(&self) -> Option<F> {
+        // SAFETY: `self.name` was created from a `&'static CStr` and is
+        // therefore a valid C string pointer.
+        let val = unsafe { libc::dlsym(libc::RTLD_DEFAULT, self.name) };
+        // This synchronizes with the acquire load in `get`.
         self.func.store(val, Ordering::Release);
 
         if val.is_null() {
             None
         } else {
+            // SAFETY: see the comment in `get`.
+            // FIXME: use `transmute` once it stops complaining about generics.
             Some(unsafe { mem::transmute_copy::<*mut libc::c_void, F>(&val) })
         }
     }
 }
 
-unsafe fn fetch(name: &str) -> *mut libc::c_void {
-    let name = match CStr::from_bytes_with_nul(name.as_bytes()) {
-        Ok(cstr) => cstr,
-        Err(..) => return ptr::null_mut(),
-    };
-    unsafe { libc::dlsym(libc::RTLD_DEFAULT, name.as_ptr()) }
-}
+unsafe impl<F> Send for DlsymWeak<F> {}
+unsafe impl<F> Sync for DlsymWeak<F> {}
 
 #[cfg(not(any(target_os = "linux", target_os = "android")))]
 pub(crate) macro syscall {
diff --git a/library/std/src/sys/pal/unix/weak/tests.rs b/library/std/src/sys/pal/unix/weak/tests.rs
new file mode 100644
index 00000000000..d807ba64e35
--- /dev/null
+++ b/library/std/src/sys/pal/unix/weak/tests.rs
@@ -0,0 +1,32 @@
+use super::*;
+
+#[test]
+fn dlsym_existing() {
+    const TEST_STRING: &'static CStr = c"Ferris!";
+
+    // Try to find a symbol that definitely exists.
+    dlsym! {
+        fn strlen(cs: *const c_char) -> usize;
+    }
+
+    dlsym! {
+        #[link_name = "strlen"]
+        fn custom_name(cs: *const c_char) -> usize;
+    }
+
+    let strlen = strlen.get().unwrap();
+    assert_eq!(unsafe { strlen(TEST_STRING.as_ptr()) }, TEST_STRING.count_bytes());
+
+    let custom_name = custom_name.get().unwrap();
+    assert_eq!(unsafe { custom_name(TEST_STRING.as_ptr()) }, TEST_STRING.count_bytes());
+}
+
+#[test]
+fn dlsym_missing() {
+    // Try to find a symbol that definitely does not exist.
+    dlsym! {
+        fn test_symbol_that_does_not_exist() -> i32;
+    }
+
+    assert!(test_symbol_that_does_not_exist.get().is_none());
+}