about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMatthias Krüger <476013+matthiaskrgr@users.noreply.github.com>2025-05-23 20:30:08 +0200
committerGitHub <noreply@github.com>2025-05-23 20:30:08 +0200
commitc66c8e6b9c227d2e44701a50294e23a7a2143fa4 (patch)
tree888cee6f55c5f0215853422bff3000c318b54720
parente88e85463468ce5d5ce468414eb69e3b15fa8d42 (diff)
parent89a90d664082280584a27cc8030aba85d122448f (diff)
downloadrust-c66c8e6b9c227d2e44701a50294e23a7a2143fa4.tar.gz
rust-c66c8e6b9c227d2e44701a50294e23a7a2143fa4.zip
Rollup merge of #138896 - joboet:process_noalias, r=Noratrieb
std: fix aliasing bug in UNIX process implementation

`CStringArray` contained both `CString`s and their pointers. Unfortunately, since `CString` uses `Box`, moving the `CString`s into the `Vec` can (under stacked borrows) invalidate the pointer to the string, meaning the resulting `Vec<*const c_char>` was, from an opsem perspective, unusable. This PR removes removes the `Vec<CString>` from `CStringArray`, instead recreating the `CString`/`CStr` from the pointers when necessary. Also,`CStringArray` is now used for the process args as well, the old implementation was suffering from the same kind of bug.
-rw-r--r--library/std/src/sys/process/unix/common.rs116
-rw-r--r--library/std/src/sys/process/unix/common/cstring_array.rs115
2 files changed, 142 insertions, 89 deletions
diff --git a/library/std/src/sys/process/unix/common.rs b/library/std/src/sys/process/unix/common.rs
index e205a839005..b6777b76668 100644
--- a/library/std/src/sys/process/unix/common.rs
+++ b/library/std/src/sys/process/unix/common.rs
@@ -1,8 +1,10 @@
 #[cfg(all(test, not(target_os = "emscripten")))]
 mod tests;
 
-use libc::{EXIT_FAILURE, EXIT_SUCCESS, c_char, c_int, gid_t, pid_t, uid_t};
+use libc::{EXIT_FAILURE, EXIT_SUCCESS, c_int, gid_t, pid_t, uid_t};
 
+pub use self::cstring_array::CStringArray;
+use self::cstring_array::CStringIter;
 use crate::collections::BTreeMap;
 use crate::ffi::{CStr, CString, OsStr, OsString};
 use crate::os::unix::prelude::*;
@@ -14,7 +16,9 @@ use crate::sys::fs::OpenOptions;
 use crate::sys::pipe::{self, AnonPipe};
 use crate::sys::process::env::{CommandEnv, CommandEnvs};
 use crate::sys_common::{FromInner, IntoInner};
-use crate::{fmt, io, ptr};
+use crate::{fmt, io};
+
+mod cstring_array;
 
 cfg_if::cfg_if! {
     if #[cfg(target_os = "fuchsia")] {
@@ -77,13 +81,7 @@ cfg_if::cfg_if! {
 
 pub struct Command {
     program: CString,
-    args: Vec<CString>,
-    /// Exactly what will be passed to `execvp`.
-    ///
-    /// First element is a pointer to `program`, followed by pointers to
-    /// `args`, followed by a `null`. Be careful when modifying `program` or
-    /// `args` to properly update this as well.
-    argv: Argv,
+    args: CStringArray,
     env: CommandEnv,
 
     program_kind: ProgramKind,
@@ -102,14 +100,6 @@ pub struct Command {
     pgroup: Option<pid_t>,
 }
 
-// Create a new type for argv, so that we can make it `Send` and `Sync`
-struct Argv(Vec<*const c_char>);
-
-// It is safe to make `Argv` `Send` and `Sync`, because it contains
-// pointers to memory owned by `Command.args`
-unsafe impl Send for Argv {}
-unsafe impl Sync for Argv {}
-
 // passed back to std::process with the pipes connected to the child, if any
 // were requested
 pub struct StdioPipes {
@@ -171,42 +161,17 @@ impl ProgramKind {
 }
 
 impl Command {
-    #[cfg(not(target_os = "linux"))]
     pub fn new(program: &OsStr) -> Command {
         let mut saw_nul = false;
         let program_kind = ProgramKind::new(program.as_ref());
         let program = os2c(program, &mut saw_nul);
+        let mut args = CStringArray::with_capacity(1);
+        args.push(program.clone());
         Command {
-            argv: Argv(vec![program.as_ptr(), ptr::null()]),
-            args: vec![program.clone()],
             program,
-            program_kind,
+            args,
             env: Default::default(),
-            cwd: None,
-            chroot: None,
-            uid: None,
-            gid: None,
-            saw_nul,
-            closures: Vec::new(),
-            groups: None,
-            stdin: None,
-            stdout: None,
-            stderr: None,
-            pgroup: None,
-        }
-    }
-
-    #[cfg(target_os = "linux")]
-    pub fn new(program: &OsStr) -> Command {
-        let mut saw_nul = false;
-        let program_kind = ProgramKind::new(program.as_ref());
-        let program = os2c(program, &mut saw_nul);
-        Command {
-            argv: Argv(vec![program.as_ptr(), ptr::null()]),
-            args: vec![program.clone()],
-            program,
             program_kind,
-            env: Default::default(),
             cwd: None,
             chroot: None,
             uid: None,
@@ -217,6 +182,7 @@ impl Command {
             stdin: None,
             stdout: None,
             stderr: None,
+            #[cfg(target_os = "linux")]
             create_pidfd: false,
             pgroup: None,
         }
@@ -225,20 +191,11 @@ impl Command {
     pub fn set_arg_0(&mut self, arg: &OsStr) {
         // Set a new arg0
         let arg = os2c(arg, &mut self.saw_nul);
-        debug_assert!(self.argv.0.len() > 1);
-        self.argv.0[0] = arg.as_ptr();
-        self.args[0] = arg;
+        self.args.write(0, arg);
     }
 
     pub fn arg(&mut self, arg: &OsStr) {
-        // Overwrite the trailing null pointer in `argv` and then add a new null
-        // pointer.
         let arg = os2c(arg, &mut self.saw_nul);
-        self.argv.0[self.args.len()] = arg.as_ptr();
-        self.argv.0.push(ptr::null());
-
-        // Also make sure we keep track of the owned value to schedule a
-        // destructor for this memory.
         self.args.push(arg);
     }
 
@@ -295,6 +252,8 @@ impl Command {
 
     pub fn get_args(&self) -> CommandArgs<'_> {
         let mut iter = self.args.iter();
+        // argv[0] contains the program name, but we are only interested in the
+        // arguments so skip it.
         iter.next();
         CommandArgs { iter }
     }
@@ -307,12 +266,12 @@ impl Command {
         self.cwd.as_ref().map(|cs| Path::new(OsStr::from_bytes(cs.as_bytes())))
     }
 
-    pub fn get_argv(&self) -> &Vec<*const c_char> {
-        &self.argv.0
+    pub fn get_argv(&self) -> &CStringArray {
+        &self.args
     }
 
     pub fn get_program_cstr(&self) -> &CStr {
-        &*self.program
+        &self.program
     }
 
     #[allow(dead_code)]
@@ -405,32 +364,6 @@ fn os2c(s: &OsStr, saw_nul: &mut bool) -> CString {
     })
 }
 
-// Helper type to manage ownership of the strings within a C-style array.
-pub struct CStringArray {
-    items: Vec<CString>,
-    ptrs: Vec<*const c_char>,
-}
-
-impl CStringArray {
-    pub fn with_capacity(capacity: usize) -> Self {
-        let mut result = CStringArray {
-            items: Vec::with_capacity(capacity),
-            ptrs: Vec::with_capacity(capacity + 1),
-        };
-        result.ptrs.push(ptr::null());
-        result
-    }
-    pub fn push(&mut self, item: CString) {
-        let l = self.ptrs.len();
-        self.ptrs[l - 1] = item.as_ptr();
-        self.ptrs.push(ptr::null());
-        self.items.push(item);
-    }
-    pub fn as_ptr(&self) -> *const *const c_char {
-        self.ptrs.as_ptr()
-    }
-}
-
 fn construct_envp(env: BTreeMap<OsString, OsString>, saw_nul: &mut bool) -> CStringArray {
     let mut result = CStringArray::with_capacity(env.len());
     for (mut k, v) in env {
@@ -619,14 +552,16 @@ impl fmt::Debug for Command {
                     write!(f, "{}={value:?} ", key.to_string_lossy())?;
                 }
             }
-            if self.program != self.args[0] {
+
+            if *self.program != self.args[0] {
                 write!(f, "[{:?}] ", self.program)?;
             }
-            write!(f, "{:?}", self.args[0])?;
+            write!(f, "{:?}", &self.args[0])?;
 
-            for arg in &self.args[1..] {
+            for arg in self.get_args() {
                 write!(f, " {:?}", arg)?;
             }
+
             Ok(())
         }
     }
@@ -658,14 +593,16 @@ impl From<u8> for ExitCode {
 }
 
 pub struct CommandArgs<'a> {
-    iter: crate::slice::Iter<'a, CString>,
+    iter: CStringIter<'a>,
 }
 
 impl<'a> Iterator for CommandArgs<'a> {
     type Item = &'a OsStr;
+
     fn next(&mut self) -> Option<&'a OsStr> {
-        self.iter.next().map(|cs| OsStr::from_bytes(cs.as_bytes()))
+        self.iter.next().map(|cs| OsStr::from_bytes(cs.to_bytes()))
     }
+
     fn size_hint(&self) -> (usize, Option<usize>) {
         self.iter.size_hint()
     }
@@ -675,6 +612,7 @@ impl<'a> ExactSizeIterator for CommandArgs<'a> {
     fn len(&self) -> usize {
         self.iter.len()
     }
+
     fn is_empty(&self) -> bool {
         self.iter.is_empty()
     }
diff --git a/library/std/src/sys/process/unix/common/cstring_array.rs b/library/std/src/sys/process/unix/common/cstring_array.rs
new file mode 100644
index 00000000000..1c840a85df9
--- /dev/null
+++ b/library/std/src/sys/process/unix/common/cstring_array.rs
@@ -0,0 +1,115 @@
+use crate::ffi::{CStr, CString, c_char};
+use crate::ops::Index;
+use crate::{fmt, mem, ptr};
+
+/// Helper type to manage ownership of the strings within a C-style array.
+///
+/// This type manages an array of C-string pointers terminated by a null
+/// pointer. The pointer to the array (as returned by `as_ptr`) can be used as
+/// a value of `argv` or `environ`.
+pub struct CStringArray {
+    ptrs: Vec<*const c_char>,
+}
+
+impl CStringArray {
+    /// Creates a new `CStringArray` with enough capacity to hold `capacity`
+    /// strings.
+    pub fn with_capacity(capacity: usize) -> Self {
+        let mut result = CStringArray { ptrs: Vec::with_capacity(capacity + 1) };
+        result.ptrs.push(ptr::null());
+        result
+    }
+
+    /// Replace the string at position `index`.
+    pub fn write(&mut self, index: usize, item: CString) {
+        let argc = self.ptrs.len() - 1;
+        let ptr = &mut self.ptrs[..argc][index];
+        let old = mem::replace(ptr, item.into_raw());
+        // SAFETY:
+        // `CStringArray` owns all of its strings, and they were all transformed
+        // into pointers using `CString::into_raw`. Also, this is not the null
+        // pointer since the indexing above would have failed.
+        drop(unsafe { CString::from_raw(old.cast_mut()) });
+    }
+
+    /// Push an additional string to the array.
+    pub fn push(&mut self, item: CString) {
+        let argc = self.ptrs.len() - 1;
+        // Replace the null pointer at the end of the array...
+        self.ptrs[argc] = item.into_raw();
+        // ... and recreate it to restore the data structure invariant.
+        self.ptrs.push(ptr::null());
+    }
+
+    /// Returns a pointer to the C-string array managed by this type.
+    pub fn as_ptr(&self) -> *const *const c_char {
+        self.ptrs.as_ptr()
+    }
+
+    /// Returns an iterator over all `CStr`s contained in this array.
+    pub fn iter(&self) -> CStringIter<'_> {
+        CStringIter { iter: self.ptrs[..self.ptrs.len() - 1].iter() }
+    }
+}
+
+impl Index<usize> for CStringArray {
+    type Output = CStr;
+    fn index(&self, index: usize) -> &CStr {
+        let ptr = self.ptrs[..self.ptrs.len() - 1][index];
+        // SAFETY:
+        // `CStringArray` owns all of its strings. Also, this is not the null
+        // pointer since the indexing above would have failed.
+        unsafe { CStr::from_ptr(ptr) }
+    }
+}
+
+impl fmt::Debug for CStringArray {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_list().entries(self.iter()).finish()
+    }
+}
+
+// SAFETY: `CStringArray` is basically just a `Vec<CString>`
+unsafe impl Send for CStringArray {}
+// SAFETY: `CStringArray` is basically just a `Vec<CString>`
+unsafe impl Sync for CStringArray {}
+
+impl Drop for CStringArray {
+    fn drop(&mut self) {
+        // SAFETY:
+        // `CStringArray` owns all of its strings, and they were all transformed
+        // into pointers using `CString::into_raw`.
+        self.ptrs[..self.ptrs.len() - 1]
+            .iter()
+            .for_each(|&p| drop(unsafe { CString::from_raw(p.cast_mut()) }))
+    }
+}
+
+/// An iterator over all `CStr`s contained in a `CStringArray`.
+#[derive(Clone)]
+pub struct CStringIter<'a> {
+    iter: crate::slice::Iter<'a, *const c_char>,
+}
+
+impl<'a> Iterator for CStringIter<'a> {
+    type Item = &'a CStr;
+    fn next(&mut self) -> Option<&'a CStr> {
+        // SAFETY:
+        // `CStringArray` owns all of its strings. Also, this is not the null
+        // pointer since the last element is excluded when creating `iter`.
+        self.iter.next().map(|&p| unsafe { CStr::from_ptr(p) })
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.iter.size_hint()
+    }
+}
+
+impl<'a> ExactSizeIterator for CStringIter<'a> {
+    fn len(&self) -> usize {
+        self.iter.len()
+    }
+    fn is_empty(&self) -> bool {
+        self.iter.is_empty()
+    }
+}