about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMichael Goulet <michael@errs.io>2023-11-19 19:14:34 -0800
committerGitHub <noreply@github.com>2023-11-19 19:14:34 -0800
commit6d33e900d840445b5fccfad3c0c46ec347292d0b (patch)
tree1e9ce6125021785bfb92e9c859e3a97757cfe8b4
parenta7f805d2777724f3cea465c19c658b68c63a3465 (diff)
parentf34e7f4768c21c5c8b178d3714eeed8be40502af (diff)
downloadrust-6d33e900d840445b5fccfad3c0c46ec347292d0b.tar.gz
rust-6d33e900d840445b5fccfad3c0c46ec347292d0b.zip
Rollup merge of #117957 - the8472:pidfd-wait, r=Mark-Simulacrum
if available use a Child's pidfd for kill/wait

This should get us closer to stabilization of pidfds since they now do something useful. And they're `CLOEXEC` now.

```
$ strace -ffe clone,sendmsg,recvmsg,execve,kill,pidfd_open,pidfd_send_signal,waitpid,waitid ./x test std --no-doc -- pidfd

[...]
running 1 tests
strace: Process 816007 attached
[pid 816007] pidfd_open(816006, 0)      = 3
[pid 816007] clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f0c6b787990) = 816008
strace: Process 816008 attached
[pid 816007] recvmsg(3,  <unfinished ...>
[pid 816008] pidfd_open(816008, 0)      = 3
[pid 816008] sendmsg(4, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="", iov_len=0}], msg_iovlen=1, msg_control=[{cmsg_len=20, cmsg_level=SOL_SOCKET, cmsg_type=SCM_RIGHTS, cmsg_data=[3]}], msg_controllen=24, msg_flags=0}, 0) = 0
[pid 816007] <... recvmsg resumed>{msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="", iov_len=0}], msg_iovlen=1, msg_control=[{cmsg_len=20, cmsg_level=SOL_SOCKET, cmsg_type=SCM_RIGHTS, cmsg_data=[4]}], msg_controllen=24, msg_flags=MSG_CMSG_CLOEXEC}, MSG_CMSG_CLOEXEC) = 0
[pid 816008] execve("/usr/bin/false", ["false"], 0x7ffcf2100048 /* 105 vars */) = 0
[pid 816007] waitid(P_PIDFD, 4,  <unfinished ...>
[pid 816008] +++ exited with 1 +++
[pid 816007] <... waitid resumed>{si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=816008, si_uid=1001, si_status=1, si_utime=0, si_stime=0}, WEXITED, NULL) = 0
[pid 816007] --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=816008, si_uid=1001, si_status=1, si_utime=0, si_stime=0} ---
[pid 816007] clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLDstrace: Process 816009 attached
, child_tidptr=0x7f0c6b787990) = 816009
[pid 816007] recvmsg(3,  <unfinished ...>
[pid 816009] pidfd_open(816009, 0)      = 3
[pid 816009] sendmsg(5, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="", iov_len=0}], msg_iovlen=1, msg_control=[{cmsg_len=20, cmsg_level=SOL_SOCKET, cmsg_type=SCM_RIGHTS, cmsg_data=[3]}], msg_controllen=24, msg_flags=0}, 0) = 0
[pid 816007] <... recvmsg resumed>{msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="", iov_len=0}], msg_iovlen=1, msg_control=[{cmsg_len=20, cmsg_level=SOL_SOCKET, cmsg_type=SCM_RIGHTS, cmsg_data=[5]}], msg_controllen=24, msg_flags=MSG_CMSG_CLOEXEC}, MSG_CMSG_CLOEXEC) = 0
[pid 816009] execve("/usr/bin/sleep", ["sleep", "1000"], 0x7ffcf2100048 /* 105 vars */) = 0
[pid 816007] waitid(P_PIDFD, 5, {}, WNOHANG|WEXITED, NULL) = 0
[pid 816007] pidfd_send_signal(5, SIGKILL, NULL, 0) = 0
[pid 816007] waitid(P_PIDFD, 5,  <unfinished ...>
[pid 816009] +++ killed by SIGKILL +++
[pid 816007] <... waitid resumed>{si_signo=SIGCHLD, si_code=CLD_KILLED, si_pid=816009, si_uid=1001, si_status=SIGKILL, si_utime=0, si_stime=0}, WEXITED, NULL) = 0
[pid 816007] --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_KILLED, si_pid=816009, si_uid=1001, si_status=SIGKILL, si_utime=0, si_stime=0} ---
[pid 816007] +++ exited with 0 +++
```
-rw-r--r--library/std/src/os/linux/process.rs6
-rw-r--r--library/std/src/sys/unix/process/process_unix.rs76
-rw-r--r--library/std/src/sys/unix/process/process_unix/tests.rs19
3 files changed, 90 insertions, 11 deletions
diff --git a/library/std/src/os/linux/process.rs b/library/std/src/os/linux/process.rs
index 2b3ff76d7a4..51af432d056 100644
--- a/library/std/src/os/linux/process.rs
+++ b/library/std/src/os/linux/process.rs
@@ -152,6 +152,12 @@ pub trait CommandExt: Sealed {
     /// in a guaranteed race-free manner (e.g. if the `clone3` system call
     /// is supported). Otherwise, [`pidfd`] will return an error.
     ///
+    /// If a pidfd has been successfully created and not been taken from the `Child`
+    /// then calls to `kill()`, `wait()` and `try_wait()` will use the pidfd
+    /// instead of the pid. This can prevent pid recycling races, e.g.
+    /// those  caused by rogue libraries in the same process prematurely reaping
+    /// zombie children via `waitpid(-1, ...)` calls.
+    ///
     /// [`Command`]: process::Command
     /// [`Child`]: process::Child
     /// [`pidfd`]: fn@ChildExt::pidfd
diff --git a/library/std/src/sys/unix/process/process_unix.rs b/library/std/src/sys/unix/process/process_unix.rs
index 72aca4e6659..ee86a5f88dd 100644
--- a/library/std/src/sys/unix/process/process_unix.rs
+++ b/library/std/src/sys/unix/process/process_unix.rs
@@ -9,6 +9,8 @@ use core::ffi::NonZero_c_int;
 
 #[cfg(target_os = "linux")]
 use crate::os::linux::process::PidFd;
+#[cfg(target_os = "linux")]
+use crate::os::unix::io::AsRawFd;
 
 #[cfg(any(
     target_os = "macos",
@@ -696,11 +698,12 @@ impl Command {
 
             msg.msg_iov = &mut iov as *mut _ as *mut _;
             msg.msg_iovlen = 1;
-            msg.msg_controllen = mem::size_of_val(&cmsg.buf) as _;
-            msg.msg_control = &mut cmsg.buf as *mut _ as *mut _;
 
             // only attach cmsg if we successfully acquired the pidfd
             if pidfd >= 0 {
+                msg.msg_controllen = mem::size_of_val(&cmsg.buf) as _;
+                msg.msg_control = &mut cmsg.buf as *mut _ as *mut _;
+
                 let hdr = CMSG_FIRSTHDR(&mut msg as *mut _ as *mut _);
                 (*hdr).cmsg_level = SOL_SOCKET;
                 (*hdr).cmsg_type = SCM_RIGHTS;
@@ -717,7 +720,7 @@ impl Command {
             // so we get a consistent SEQPACKET order
             match cvt_r(|| libc::sendmsg(sock.as_raw(), &msg, 0)) {
                 Ok(0) => {}
-                _ => rtabort!("failed to communicate with parent process"),
+                other => rtabort!("failed to communicate with parent process. {:?}", other),
             }
         }
     }
@@ -748,7 +751,7 @@ impl Command {
             msg.msg_controllen = mem::size_of::<Cmsg>() as _;
             msg.msg_control = &mut cmsg as *mut _ as *mut _;
 
-            match cvt_r(|| libc::recvmsg(sock.as_raw(), &mut msg, 0)) {
+            match cvt_r(|| libc::recvmsg(sock.as_raw(), &mut msg, libc::MSG_CMSG_CLOEXEC)) {
                 Err(_) => return -1,
                 Ok(_) => {}
             }
@@ -787,7 +790,7 @@ pub struct Process {
     // On Linux, stores the pidfd created for this child.
     // This is None if the user did not request pidfd creation,
     // or if the pidfd could not be created for some reason
-    // (e.g. the `clone3` syscall was not available).
+    // (e.g. the `pidfd_open` syscall was not available).
     #[cfg(target_os = "linux")]
     pidfd: Option<PidFd>,
 }
@@ -816,10 +819,23 @@ impl Process {
         // and used for another process, and we probably shouldn't be killing
         // random processes, so return Ok because the process has exited already.
         if self.status.is_some() {
-            Ok(())
-        } else {
-            cvt(unsafe { libc::kill(self.pid, libc::SIGKILL) }).map(drop)
+            return Ok(());
+        }
+        #[cfg(target_os = "linux")]
+        if let Some(pid_fd) = self.pidfd.as_ref() {
+            // pidfd_send_signal predates pidfd_open. so if we were able to get an fd then sending signals will work too
+            return cvt(unsafe {
+                libc::syscall(
+                    libc::SYS_pidfd_send_signal,
+                    pid_fd.as_raw_fd(),
+                    libc::SIGKILL,
+                    crate::ptr::null::<()>(),
+                    0,
+                )
+            })
+            .map(drop);
         }
+        cvt(unsafe { libc::kill(self.pid, libc::SIGKILL) }).map(drop)
     }
 
     pub fn wait(&mut self) -> io::Result<ExitStatus> {
@@ -827,6 +843,17 @@ impl Process {
         if let Some(status) = self.status {
             return Ok(status);
         }
+        #[cfg(target_os = "linux")]
+        if let Some(pid_fd) = self.pidfd.as_ref() {
+            let mut siginfo: libc::siginfo_t = unsafe { crate::mem::zeroed() };
+
+            cvt_r(|| unsafe {
+                libc::waitid(libc::P_PIDFD, pid_fd.as_raw_fd() as u32, &mut siginfo, libc::WEXITED)
+            })?;
+            let status = ExitStatus::from_waitid_siginfo(siginfo);
+            self.status = Some(status);
+            return Ok(status);
+        }
         let mut status = 0 as c_int;
         cvt_r(|| unsafe { libc::waitpid(self.pid, &mut status, 0) })?;
         self.status = Some(ExitStatus::new(status));
@@ -837,6 +864,25 @@ impl Process {
         if let Some(status) = self.status {
             return Ok(Some(status));
         }
+        #[cfg(target_os = "linux")]
+        if let Some(pid_fd) = self.pidfd.as_ref() {
+            let mut siginfo: libc::siginfo_t = unsafe { crate::mem::zeroed() };
+
+            cvt(unsafe {
+                libc::waitid(
+                    libc::P_PIDFD,
+                    pid_fd.as_raw_fd() as u32,
+                    &mut siginfo,
+                    libc::WEXITED | libc::WNOHANG,
+                )
+            })?;
+            if unsafe { siginfo.si_pid() } == 0 {
+                return Ok(None);
+            }
+            let status = ExitStatus::from_waitid_siginfo(siginfo);
+            self.status = Some(status);
+            return Ok(Some(status));
+        }
         let mut status = 0 as c_int;
         let pid = cvt(unsafe { libc::waitpid(self.pid, &mut status, libc::WNOHANG) })?;
         if pid == 0 {
@@ -866,6 +912,20 @@ impl ExitStatus {
         ExitStatus(status)
     }
 
+    #[cfg(target_os = "linux")]
+    pub fn from_waitid_siginfo(siginfo: libc::siginfo_t) -> ExitStatus {
+        let status = unsafe { siginfo.si_status() };
+
+        match siginfo.si_code {
+            libc::CLD_EXITED => ExitStatus((status & 0xff) << 8),
+            libc::CLD_KILLED => ExitStatus(status),
+            libc::CLD_DUMPED => ExitStatus(status | 0x80),
+            libc::CLD_CONTINUED => ExitStatus(0xffff),
+            libc::CLD_STOPPED | libc::CLD_TRAPPED => ExitStatus(((status & 0xff) << 8) | 0x7f),
+            _ => unreachable!("waitid() should only return the above codes"),
+        }
+    }
+
     fn exited(&self) -> bool {
         libc::WIFEXITED(self.0)
     }
diff --git a/library/std/src/sys/unix/process/process_unix/tests.rs b/library/std/src/sys/unix/process/process_unix/tests.rs
index 6aa79e7f9e7..6e952ed7c42 100644
--- a/library/std/src/sys/unix/process/process_unix/tests.rs
+++ b/library/std/src/sys/unix/process/process_unix/tests.rs
@@ -64,7 +64,8 @@ fn test_command_fork_no_unwind() {
 #[test]
 #[cfg(target_os = "linux")]
 fn test_command_pidfd() {
-    use crate::os::fd::RawFd;
+    use crate::assert_matches::assert_matches;
+    use crate::os::fd::{AsRawFd, RawFd};
     use crate::os::linux::process::{ChildExt, CommandExt};
     use crate::process::Command;
 
@@ -78,10 +79,22 @@ fn test_command_pidfd() {
     };
 
     // always exercise creation attempts
-    let child = Command::new("echo").create_pidfd(true).spawn().unwrap();
+    let mut child = Command::new("false").create_pidfd(true).spawn().unwrap();
 
     // but only check if we know that the kernel supports pidfds
     if pidfd_open_available {
-        assert!(child.pidfd().is_ok())
+        assert!(child.pidfd().is_ok());
     }
+    if let Ok(pidfd) = child.pidfd() {
+        let flags = super::cvt(unsafe { libc::fcntl(pidfd.as_raw_fd(), libc::F_GETFD) }).unwrap();
+        assert!(flags & libc::FD_CLOEXEC != 0);
+    }
+    let status = child.wait().expect("error waiting on pidfd");
+    assert_eq!(status.code(), Some(1));
+
+    let mut child = Command::new("sleep").arg("1000").create_pidfd(true).spawn().unwrap();
+    assert_matches!(child.try_wait(), Ok(None));
+    child.kill().expect("failed to kill child");
+    let status = child.wait().expect("error waiting on pidfd");
+    assert_eq!(status.signal(), Some(libc::SIGKILL));
 }