about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2021-08-01 16:45:47 +0000
committerbors <bors@rust-lang.org>2021-08-01 16:45:47 +0000
commit4e21ef2a4eca12180e24a345d66066fc1e4e36da (patch)
treef4a6738e9cc0b047efed979e2f5a9d70b9b851b2
parent2e9c8705e94826da6aebe46512b4e3bbfc9e008f (diff)
parent4a832d32f232a68acdabfd29e526d2a4b6366a1c (diff)
downloadrust-4e21ef2a4eca12180e24a345d66066fc1e4e36da.tar.gz
rust-4e21ef2a4eca12180e24a345d66066fc1e4e36da.zip
Auto merge of #81825 - voidc:pidfd, r=joshtriplett
Add Linux-specific pidfd process extensions (take 2)

Continuation of #77168.
I addressed the following concerns from the original PR:

- make `CommandExt` and `ChildExt` sealed traits
- wrap file descriptors in `PidFd` struct representing ownership over the fd
- add `take_pidfd` to take the fd out of `Child`
- close fd when dropped

Tracking Issue: #82971
-rw-r--r--library/std/src/os/linux/mod.rs1
-rw-r--r--library/std/src/os/linux/process.rs154
-rw-r--r--library/std/src/process.rs2
-rw-r--r--library/std/src/sys/unix/process/process_common.rs41
-rw-r--r--library/std/src/sys/unix/process/process_unix.rs136
-rw-r--r--src/test/ui/command/command-create-pidfd.rs45
-rw-r--r--src/test/ui/command/command-pre-exec.rs25
-rw-r--r--src/test/ui/process/process-panic-after-fork.rs17
8 files changed, 410 insertions, 11 deletions
diff --git a/library/std/src/os/linux/mod.rs b/library/std/src/os/linux/mod.rs
index 94438defc22..8e7776f6646 100644
--- a/library/std/src/os/linux/mod.rs
+++ b/library/std/src/os/linux/mod.rs
@@ -4,4 +4,5 @@
 #![doc(cfg(target_os = "linux"))]
 
 pub mod fs;
+pub mod process;
 pub mod raw;
diff --git a/library/std/src/os/linux/process.rs b/library/std/src/os/linux/process.rs
new file mode 100644
index 00000000000..91547b8f916
--- /dev/null
+++ b/library/std/src/os/linux/process.rs
@@ -0,0 +1,154 @@
+//! Linux-specific extensions to primitives in the `std::process` module.
+
+#![unstable(feature = "linux_pidfd", issue = "82971")]
+
+use crate::io::Result;
+use crate::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, RawFd};
+use crate::process;
+#[cfg(not(doc))]
+use crate::sys::fd::FileDesc;
+use crate::sys_common::{AsInner, AsInnerMut, FromInner, IntoInner};
+
+#[cfg(doc)]
+struct FileDesc;
+
+/// This type represents a file descriptor that refers to a process.
+///
+/// A `PidFd` can be obtained by setting the corresponding option on [`Command`]
+/// with [`create_pidfd`]. Subsequently, the created pidfd can be retrieved
+/// from the [`Child`] by calling [`pidfd`] or [`take_pidfd`].
+///
+/// Example:
+/// ```no_run
+/// #![feature(linux_pidfd)]
+/// use std::os::linux::process::{CommandExt, ChildExt};
+/// use std::process::Command;
+///
+/// let mut child = Command::new("echo")
+///     .create_pidfd(true)
+///     .spawn()
+///     .expect("Failed to spawn child");
+///
+/// let pidfd = child
+///     .take_pidfd()
+///     .expect("Failed to retrieve pidfd");
+///
+/// // The file descriptor will be closed when `pidfd` is dropped.
+/// ```
+/// Refer to the man page of [`pidfd_open(2)`] for further details.
+///
+/// [`Command`]: process::Command
+/// [`create_pidfd`]: CommandExt::create_pidfd
+/// [`Child`]: process::Child
+/// [`pidfd`]: fn@ChildExt::pidfd
+/// [`take_pidfd`]: ChildExt::take_pidfd
+/// [`pidfd_open(2)`]: https://man7.org/linux/man-pages/man2/pidfd_open.2.html
+#[derive(Debug)]
+pub struct PidFd {
+    inner: FileDesc,
+}
+
+impl AsInner<FileDesc> for PidFd {
+    fn as_inner(&self) -> &FileDesc {
+        &self.inner
+    }
+}
+
+impl FromInner<FileDesc> for PidFd {
+    fn from_inner(inner: FileDesc) -> PidFd {
+        PidFd { inner }
+    }
+}
+
+impl IntoInner<FileDesc> for PidFd {
+    fn into_inner(self) -> FileDesc {
+        self.inner
+    }
+}
+
+impl AsRawFd for PidFd {
+    fn as_raw_fd(&self) -> RawFd {
+        self.as_inner().raw()
+    }
+}
+
+impl FromRawFd for PidFd {
+    unsafe fn from_raw_fd(fd: RawFd) -> Self {
+        Self::from_inner(FileDesc::new(fd))
+    }
+}
+
+impl IntoRawFd for PidFd {
+    fn into_raw_fd(self) -> RawFd {
+        self.into_inner().into_raw()
+    }
+}
+
+mod private_child_ext {
+    pub trait Sealed {}
+    impl Sealed for crate::process::Child {}
+}
+
+/// Os-specific extensions for [`Child`]
+///
+/// [`Child`]: process::Child
+pub trait ChildExt: private_child_ext::Sealed {
+    /// Obtains a reference to the [`PidFd`] created for this [`Child`], if available.
+    ///
+    /// A pidfd will only be available if its creation was requested with
+    /// [`create_pidfd`] when the corresponding [`Command`] was created.
+    ///
+    /// Even if requested, a pidfd may not be available due to an older
+    /// version of Linux being in use, or if some other error occurred.
+    ///
+    /// [`Command`]: process::Command
+    /// [`create_pidfd`]: CommandExt::create_pidfd
+    /// [`Child`]: process::Child
+    fn pidfd(&self) -> Result<&PidFd>;
+
+    /// Takes ownership of the [`PidFd`] created for this [`Child`], if available.
+    ///
+    /// A pidfd will only be available if its creation was requested with
+    /// [`create_pidfd`] when the corresponding [`Command`] was created.
+    ///
+    /// Even if requested, a pidfd may not be available due to an older
+    /// version of Linux being in use, or if some other error occurred.
+    ///
+    /// [`Command`]: process::Command
+    /// [`create_pidfd`]: CommandExt::create_pidfd
+    /// [`Child`]: process::Child
+    fn take_pidfd(&mut self) -> Result<PidFd>;
+}
+
+mod private_command_ext {
+    pub trait Sealed {}
+    impl Sealed for crate::process::Command {}
+}
+
+/// Os-specific extensions for [`Command`]
+///
+/// [`Command`]: process::Command
+pub trait CommandExt: private_command_ext::Sealed {
+    /// Sets whether a [`PidFd`](struct@PidFd) should be created for the [`Child`]
+    /// spawned by this [`Command`].
+    /// By default, no pidfd will be created.
+    ///
+    /// The pidfd can be retrieved from the child with [`pidfd`] or [`take_pidfd`].
+    ///
+    /// A pidfd will only be created if it is possible to do so
+    /// in a guaranteed race-free manner (e.g. if the `clone3` system call
+    /// is supported). Otherwise, [`pidfd`] will return an error.
+    ///
+    /// [`Command`]: process::Command
+    /// [`Child`]: process::Child
+    /// [`pidfd`]: fn@ChildExt::pidfd
+    /// [`take_pidfd`]: ChildExt::take_pidfd
+    fn create_pidfd(&mut self, val: bool) -> &mut process::Command;
+}
+
+impl CommandExt for process::Command {
+    fn create_pidfd(&mut self, val: bool) -> &mut process::Command {
+        self.as_inner_mut().create_pidfd(val);
+        self
+    }
+}
diff --git a/library/std/src/process.rs b/library/std/src/process.rs
index 11a0432ce27..99c3369425b 100644
--- a/library/std/src/process.rs
+++ b/library/std/src/process.rs
@@ -166,7 +166,7 @@ use crate::sys_common::{AsInner, AsInnerMut, FromInner, IntoInner};
 /// [`wait`]: Child::wait
 #[stable(feature = "process", since = "1.0.0")]
 pub struct Child {
-    handle: imp::Process,
+    pub(crate) handle: imp::Process,
 
     /// The handle for writing to the child's standard input (stdin), if it has
     /// been captured. To avoid partially moving
diff --git a/library/std/src/sys/unix/process/process_common.rs b/library/std/src/sys/unix/process/process_common.rs
index f2f161e4eaa..a1972380a9f 100644
--- a/library/std/src/sys/unix/process/process_common.rs
+++ b/library/std/src/sys/unix/process/process_common.rs
@@ -79,6 +79,8 @@ pub struct Command {
     stdin: Option<Stdio>,
     stdout: Option<Stdio>,
     stderr: Option<Stdio>,
+    #[cfg(target_os = "linux")]
+    create_pidfd: bool,
 }
 
 // Create a new type for argv, so that we can make it `Send` and `Sync`
@@ -124,6 +126,7 @@ pub enum Stdio {
 }
 
 impl Command {
+    #[cfg(not(target_os = "linux"))]
     pub fn new(program: &OsStr) -> Command {
         let mut saw_nul = false;
         let program = os2c(program, &mut saw_nul);
@@ -144,6 +147,28 @@ impl Command {
         }
     }
 
+    #[cfg(target_os = "linux")]
+    pub fn new(program: &OsStr) -> Command {
+        let mut saw_nul = false;
+        let program = os2c(program, &mut saw_nul);
+        Command {
+            argv: Argv(vec![program.as_ptr(), ptr::null()]),
+            args: vec![program.clone()],
+            program,
+            env: Default::default(),
+            cwd: None,
+            uid: None,
+            gid: None,
+            saw_nul,
+            closures: Vec::new(),
+            groups: None,
+            stdin: None,
+            stdout: None,
+            stderr: None,
+            create_pidfd: false,
+        }
+    }
+
     pub fn set_arg_0(&mut self, arg: &OsStr) {
         // Set a new arg0
         let arg = os2c(arg, &mut self.saw_nul);
@@ -177,6 +202,22 @@ impl Command {
         self.groups = Some(Box::from(groups));
     }
 
+    #[cfg(target_os = "linux")]
+    pub fn create_pidfd(&mut self, val: bool) {
+        self.create_pidfd = val;
+    }
+
+    #[cfg(not(target_os = "linux"))]
+    #[allow(dead_code)]
+    pub fn get_create_pidfd(&self) -> bool {
+        false
+    }
+
+    #[cfg(target_os = "linux")]
+    pub fn get_create_pidfd(&self) -> bool {
+        self.create_pidfd
+    }
+
     pub fn saw_nul(&self) -> bool {
         self.saw_nul
     }
diff --git a/library/std/src/sys/unix/process/process_unix.rs b/library/std/src/sys/unix/process/process_unix.rs
index c888dd0d87d..4b210d6af13 100644
--- a/library/std/src/sys/unix/process/process_unix.rs
+++ b/library/std/src/sys/unix/process/process_unix.rs
@@ -9,6 +9,12 @@ use crate::sys;
 use crate::sys::cvt;
 use crate::sys::process::process_common::*;
 
+#[cfg(target_os = "linux")]
+use crate::os::linux::process::PidFd;
+
+#[cfg(target_os = "linux")]
+use crate::sys::weak::syscall;
+
 #[cfg(any(
     target_os = "macos",
     target_os = "freebsd",
@@ -61,7 +67,8 @@ impl Command {
         // a lock any more because the parent won't do anything and the child is
         // in its own process. Thus the parent drops the lock guard while the child
         // forgets it to avoid unlocking it on a new thread, which would be invalid.
-        let (env_lock, pid) = unsafe { (sys::os::env_read_lock(), cvt(libc::fork())?) };
+        let env_lock = sys::os::env_read_lock();
+        let (pid, pidfd) = unsafe { self.do_fork()? };
 
         if pid == 0 {
             crate::panic::always_abort();
@@ -90,7 +97,7 @@ impl Command {
         drop(env_lock);
         drop(output);
 
-        let mut p = Process { pid, status: None };
+        let mut p = Process::new(pid, pidfd);
         let mut bytes = [0; 8];
 
         // loop to handle EINTR
@@ -122,6 +129,92 @@ impl Command {
         }
     }
 
+    // Attempts to fork the process. If successful, returns Ok((0, -1))
+    // in the child, and Ok((child_pid, -1)) in the parent.
+    #[cfg(not(target_os = "linux"))]
+    unsafe fn do_fork(&mut self) -> Result<(pid_t, pid_t), io::Error> {
+        cvt(libc::fork()).map(|res| (res, -1))
+    }
+
+    // Attempts to fork the process. If successful, returns Ok((0, -1))
+    // in the child, and Ok((child_pid, child_pidfd)) in the parent.
+    #[cfg(target_os = "linux")]
+    unsafe fn do_fork(&mut self) -> Result<(pid_t, pid_t), io::Error> {
+        use crate::sync::atomic::{AtomicBool, Ordering};
+
+        static HAS_CLONE3: AtomicBool = AtomicBool::new(true);
+        const CLONE_PIDFD: u64 = 0x00001000;
+
+        #[repr(C)]
+        struct clone_args {
+            flags: u64,
+            pidfd: u64,
+            child_tid: u64,
+            parent_tid: u64,
+            exit_signal: u64,
+            stack: u64,
+            stack_size: u64,
+            tls: u64,
+            set_tid: u64,
+            set_tid_size: u64,
+            cgroup: u64,
+        }
+
+        syscall! {
+            fn clone3(cl_args: *mut clone_args, len: libc::size_t) -> libc::c_long
+        }
+
+        // If we fail to create a pidfd for any reason, this will
+        // stay as -1, which indicates an error.
+        let mut pidfd: pid_t = -1;
+
+        // Attempt to use the `clone3` syscall, which supports more arguments
+        // (in particular, the ability to create a pidfd). If this fails,
+        // we will fall through this block to a call to `fork()`
+        if HAS_CLONE3.load(Ordering::Relaxed) {
+            let mut flags = 0;
+            if self.get_create_pidfd() {
+                flags |= CLONE_PIDFD;
+            }
+
+            let mut args = clone_args {
+                flags,
+                pidfd: &mut pidfd as *mut pid_t as u64,
+                child_tid: 0,
+                parent_tid: 0,
+                exit_signal: libc::SIGCHLD as u64,
+                stack: 0,
+                stack_size: 0,
+                tls: 0,
+                set_tid: 0,
+                set_tid_size: 0,
+                cgroup: 0,
+            };
+
+            let args_ptr = &mut args as *mut clone_args;
+            let args_size = crate::mem::size_of::<clone_args>();
+
+            let res = cvt(clone3(args_ptr, args_size));
+            match res {
+                Ok(n) => return Ok((n as pid_t, pidfd)),
+                Err(e) => match e.raw_os_error() {
+                    // Multiple threads can race to execute this store,
+                    // but that's fine - that just means that multiple threads
+                    // will have tried and failed to execute the same syscall,
+                    // with no other side effects.
+                    Some(libc::ENOSYS) => HAS_CLONE3.store(false, Ordering::Relaxed),
+                    // Fallback to fork if `EPERM` is returned. (e.g. blocked by seccomp)
+                    Some(libc::EPERM) => {}
+                    _ => return Err(e),
+                },
+            }
+        }
+
+        // If we get here, the 'clone3' syscall does not exist
+        // or we do not have permission to call it
+        cvt(libc::fork()).map(|res| (res, pidfd))
+    }
+
     pub fn exec(&mut self, default: Stdio) -> io::Error {
         let envp = self.capture_env();
 
@@ -308,6 +401,7 @@ impl Command {
             || (self.env_saw_path() && !self.program_is_path())
             || !self.get_closures().is_empty()
             || self.get_groups().is_some()
+            || self.get_create_pidfd()
         {
             return Ok(None);
         }
@@ -352,7 +446,7 @@ impl Command {
             None => None,
         };
 
-        let mut p = Process { pid: 0, status: None };
+        let mut p = Process::new(0, -1);
 
         struct PosixSpawnFileActions<'a>(&'a mut MaybeUninit<libc::posix_spawn_file_actions_t>);
 
@@ -441,9 +535,27 @@ impl Command {
 pub struct Process {
     pid: pid_t,
     status: Option<ExitStatus>,
+    // On Linux, stores the pidfd created for this child.
+    // This is None if the user did not request pidfd creation,
+    // or if the pidfd could not be created for some reason
+    // (e.g. the `clone3` syscall was not available).
+    #[cfg(target_os = "linux")]
+    pidfd: Option<PidFd>,
 }
 
 impl Process {
+    #[cfg(target_os = "linux")]
+    fn new(pid: pid_t, pidfd: pid_t) -> Self {
+        use crate::sys_common::FromInner;
+        let pidfd = (pidfd >= 0).then(|| PidFd::from_inner(sys::fd::FileDesc::new(pidfd)));
+        Process { pid, status: None, pidfd }
+    }
+
+    #[cfg(not(target_os = "linux"))]
+    fn new(pid: pid_t, _pidfd: pid_t) -> Self {
+        Process { pid, status: None }
+    }
+
     pub fn id(&self) -> u32 {
         self.pid as u32
     }
@@ -580,6 +692,24 @@ impl ExitStatusError {
     }
 }
 
+#[cfg(target_os = "linux")]
+#[unstable(feature = "linux_pidfd", issue = "82971")]
+impl crate::os::linux::process::ChildExt for crate::process::Child {
+    fn pidfd(&self) -> io::Result<&PidFd> {
+        self.handle
+            .pidfd
+            .as_ref()
+            .ok_or_else(|| Error::new(ErrorKind::Other, "No pidfd was created."))
+    }
+
+    fn take_pidfd(&mut self) -> io::Result<PidFd> {
+        self.handle
+            .pidfd
+            .take()
+            .ok_or_else(|| Error::new(ErrorKind::Other, "No pidfd was created."))
+    }
+}
+
 #[cfg(test)]
 #[path = "process_unix/tests.rs"]
 mod tests;
diff --git a/src/test/ui/command/command-create-pidfd.rs b/src/test/ui/command/command-create-pidfd.rs
new file mode 100644
index 00000000000..93321ac536a
--- /dev/null
+++ b/src/test/ui/command/command-create-pidfd.rs
@@ -0,0 +1,45 @@
+// run-pass
+// only-linux - pidfds are a linux-specific concept
+
+#![feature(linux_pidfd)]
+#![feature(rustc_private)]
+
+extern crate libc;
+
+use std::io::Error;
+use std::os::linux::process::{ChildExt, CommandExt};
+use std::process::Command;
+
+fn has_clone3() -> bool {
+    let res = unsafe { libc::syscall(libc::SYS_clone3, 0, 0) };
+    let err = (res == -1)
+        .then(|| Error::last_os_error())
+        .expect("probe syscall should not succeed");
+    err.raw_os_error() != Some(libc::ENOSYS)
+}
+
+fn main() {
+    // pidfds require the clone3 syscall
+    if !has_clone3() {
+        return;
+    }
+
+    // We don't assert the precise value, since the standard library
+    // might have opened other file descriptors before our code runs.
+    let _ = Command::new("echo")
+        .create_pidfd(true)
+        .spawn()
+        .unwrap()
+        .pidfd().expect("failed to obtain pidfd");
+
+    let _ = Command::new("echo")
+        .create_pidfd(false)
+        .spawn()
+        .unwrap()
+        .pidfd().expect_err("pidfd should not have been created when create_pid(false) is set");
+
+    let _ = Command::new("echo")
+        .spawn()
+        .unwrap()
+        .pidfd().expect_err("pidfd should not have been created");
+}
diff --git a/src/test/ui/command/command-pre-exec.rs b/src/test/ui/command/command-pre-exec.rs
index 61914e22930..10a8b19159e 100644
--- a/src/test/ui/command/command-pre-exec.rs
+++ b/src/test/ui/command/command-pre-exec.rs
@@ -8,8 +8,6 @@
 // ignore-sgx no processes
 #![feature(process_exec, rustc_private)]
 
-extern crate libc;
-
 use std::env;
 use std::io::Error;
 use std::os::unix::process::CommandExt;
@@ -17,6 +15,23 @@ use std::process::Command;
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::Arc;
 
+#[cfg(not(target_os = "linux"))]
+fn getpid() -> u32 {
+    use std::process;
+    process::id()
+}
+
+/// We need to directly use the getpid syscall instead of using `process::id()`
+/// because the libc wrapper might return incorrect values after a process was
+/// forked.
+#[cfg(target_os = "linux")]
+fn getpid() -> u32 {
+    extern crate libc;
+    unsafe {
+        libc::syscall(libc::SYS_getpid) as _
+    }
+}
+
 fn main() {
     if let Some(arg) = env::args().nth(1) {
         match &arg[..] {
@@ -68,14 +83,12 @@ fn main() {
     };
     assert_eq!(output.raw_os_error(), Some(102));
 
-    let pid = unsafe { libc::getpid() };
-    assert!(pid >= 0);
+    let pid = getpid();
     let output = unsafe {
         Command::new(&me)
             .arg("empty")
             .pre_exec(move || {
-                let child = libc::getpid();
-                assert!(child >= 0);
+                let child = getpid();
                 assert!(pid != child);
                 Ok(())
             })
diff --git a/src/test/ui/process/process-panic-after-fork.rs b/src/test/ui/process/process-panic-after-fork.rs
index 1ccf6bb051c..ad749371bea 100644
--- a/src/test/ui/process/process-panic-after-fork.rs
+++ b/src/test/ui/process/process-panic-after-fork.rs
@@ -23,6 +23,21 @@ use std::sync::atomic::{AtomicU32, Ordering};
 
 use libc::c_int;
 
+#[cfg(not(target_os = "linux"))]
+fn getpid() -> u32 {
+    process::id()
+}
+
+/// We need to directly use the getpid syscall instead of using `process::id()`
+/// because the libc wrapper might return incorrect values after a process was
+/// forked.
+#[cfg(target_os = "linux")]
+fn getpid() -> u32 {
+    unsafe {
+        libc::syscall(libc::SYS_getpid) as _
+    }
+}
+
 /// This stunt allocator allows us to spot heap allocations in the child.
 struct PidChecking<A> {
     parent: A,
@@ -44,7 +59,7 @@ impl<A> PidChecking<A> {
     fn check(&self) {
         let require_pid = self.require_pid.load(Ordering::Acquire);
         if require_pid != 0 {
-            let actual_pid = process::id();
+            let actual_pid = getpid();
             if require_pid != actual_pid {
                 unsafe {
                     libc::raise(libc::SIGUSR1);