about summary refs log tree commit diff
path: root/library/std/src/sys
diff options
context:
space:
mode:
authorMateusz Guzik <mjguzik@gmail.com>2023-01-10 02:08:07 +0000
committerMateusz Guzik <mjguzik@gmail.com>2023-01-11 17:10:08 +0000
commitb49aa8d53e2e1424f5d9997734cfd71b2ae647b4 (patch)
tree1366592b15714196c761e575f19c810d0bd2eb1f /library/std/src/sys
parent753e57672296e13c534f87b6e2672a73fff4965c (diff)
downloadrust-b49aa8d53e2e1424f5d9997734cfd71b2ae647b4.tar.gz
rust-b49aa8d53e2e1424f5d9997734cfd71b2ae647b4.zip
Stop probing for statx unless necessary
As is the current toy program:
fn main() -> std::io::Result<()> {
    use std::fs;

    let metadata = fs::metadata("foo.txt")?;

    assert!(!metadata.is_dir());
    Ok(())
}

... observed under strace will issue:
[snip]
statx(0, NULL, AT_STATX_SYNC_AS_STAT, STATX_ALL, NULL) = -1 EFAULT (Bad address)
statx(AT_FDCWD, "foo.txt", AT_STATX_SYNC_AS_STAT, STATX_ALL, {stx_mask=STATX_ALL|STATX_MNT_ID, stx_attributes=0, stx_mode=S_IFREG|0644, stx_size=0, ...}) = 0

While statx is not necessarily always present, checking for it can be
delayed to the first error condition. Said condition may very well never
happen, in which case the check got avoided altogether.

Note this is still suboptimal as there still will be programs issuing
it, but bulk of the problem is removed.

Tested by forbidding the syscall for the binary and observing it
correctly falls back to newfstatat.

While here tidy up the commentary, in particular by denoting some
problems with the current approach.
Diffstat (limited to 'library/std/src/sys')
-rw-r--r--library/std/src/sys/unix/fs.rs68
1 files changed, 41 insertions, 27 deletions
diff --git a/library/std/src/sys/unix/fs.rs b/library/std/src/sys/unix/fs.rs
index aea0c26ee8b..0e7d23a5c7c 100644
--- a/library/std/src/sys/unix/fs.rs
+++ b/library/std/src/sys/unix/fs.rs
@@ -149,12 +149,13 @@ cfg_has_statx! {{
     ) -> Option<io::Result<FileAttr>> {
         use crate::sync::atomic::{AtomicU8, Ordering};
 
-        // Linux kernel prior to 4.11 or glibc prior to glibc 2.28 don't support `statx`
-        // We store the availability in global to avoid unnecessary syscalls.
-        // 0: Unknown
-        // 1: Not available
-        // 2: Available
-        static STATX_STATE: AtomicU8 = AtomicU8::new(0);
+        // Linux kernel prior to 4.11 or glibc prior to glibc 2.28 don't support `statx`.
+        // We check for it on first failure and remember availability to avoid having to
+        // do it again.
+        #[repr(u8)]
+        enum STATX_STATE{ Unknown = 0, Present, Unavailable }
+        static STATX_SAVED_STATE: AtomicU8 = AtomicU8::new(STATX_STATE::Unknown as u8);
+
         syscall! {
             fn statx(
                 fd: c_int,
@@ -165,31 +166,44 @@ cfg_has_statx! {{
             ) -> c_int
         }
 
-        match STATX_STATE.load(Ordering::Relaxed) {
-            0 => {
-                // It is a trick to call `statx` with null pointers to check if the syscall
-                // is available. According to the manual, it is expected to fail with EFAULT.
-                // We do this mainly for performance, since it is nearly hundreds times
-                // faster than a normal successful call.
-                let err = cvt(statx(0, ptr::null(), 0, libc::STATX_ALL, ptr::null_mut()))
-                    .err()
-                    .and_then(|e| e.raw_os_error());
-                // We don't check `err == Some(libc::ENOSYS)` because the syscall may be limited
-                // and returns `EPERM`. Listing all possible errors seems not a good idea.
-                // See: https://github.com/rust-lang/rust/issues/65662
-                if err != Some(libc::EFAULT) {
-                    STATX_STATE.store(1, Ordering::Relaxed);
-                    return None;
-                }
-                STATX_STATE.store(2, Ordering::Relaxed);
-            }
-            1 => return None,
-            _ => {}
+        if STATX_SAVED_STATE.load(Ordering::Relaxed) == STATX_STATE::Unavailable as u8 {
+            return None;
         }
 
         let mut buf: libc::statx = mem::zeroed();
         if let Err(err) = cvt(statx(fd, path, flags, mask, &mut buf)) {
-            return Some(Err(err));
+            if STATX_SAVED_STATE.load(Ordering::Relaxed) == STATX_STATE::Present as u8 {
+                return Some(Err(err));
+            }
+
+            // Availability not checked yet.
+            //
+            // First try the cheap way.
+            if err.raw_os_error() == Some(libc::ENOSYS) {
+                STATX_SAVED_STATE.store(STATX_STATE::Unavailable as u8, Ordering::Relaxed);
+                return None;
+            }
+
+            // Error other than `ENOSYS` is not a good enough indicator -- it is
+            // known that `EPERM` can be returned as a result of using seccomp to
+            // block the syscall.
+            // Availability is checked by performing a call which expects `EFAULT`
+            // if the syscall is usable.
+            // See: https://github.com/rust-lang/rust/issues/65662
+            // FIXME this can probably just do the call if `EPERM` was received, but
+            // previous iteration of the code checked it for all errors and for now
+            // this is retained.
+            // FIXME what about transient conditions like `ENOMEM`?
+            let err2 = cvt(statx(0, ptr::null(), 0, libc::STATX_ALL, ptr::null_mut()))
+                .err()
+                .and_then(|e| e.raw_os_error());
+            if err2 == Some(libc::EFAULT) {
+                STATX_SAVED_STATE.store(STATX_STATE::Present as u8, Ordering::Relaxed);
+                return Some(Err(err));
+            } else {
+                STATX_SAVED_STATE.store(STATX_STATE::Unavailable as u8, Ordering::Relaxed);
+                return None;
+            }
         }
 
         // We cannot fill `stat64` exhaustively because of private padding fields.