about summary refs log tree commit diff
path: root/library/std/src
diff options
context:
space:
mode:
Diffstat (limited to 'library/std/src')
-rw-r--r--library/std/src/sys/unix/kernel_copy.rs62
-rw-r--r--library/std/src/sys/unix/kernel_copy/tests.rs42
2 files changed, 88 insertions, 16 deletions
diff --git a/library/std/src/sys/unix/kernel_copy.rs b/library/std/src/sys/unix/kernel_copy.rs
index 73b9bef7e2a..16c8e0c0ebf 100644
--- a/library/std/src/sys/unix/kernel_copy.rs
+++ b/library/std/src/sys/unix/kernel_copy.rs
@@ -17,11 +17,9 @@
 //! Once it has obtained all necessary pieces and brought any wrapper types into a state where they
 //! can be safely bypassed it will attempt to use the `copy_file_range(2)`,
 //! `sendfile(2)` or `splice(2)` syscalls to move data directly between file descriptors.
-//! Since those syscalls have requirements that cannot be fully checked in advance and
-//! gathering additional information about file descriptors would require additional syscalls
-//! anyway it simply attempts to use them one after another (guided by inaccurate hints) to
-//! figure out which one works and falls back to the generic read-write copy loop if none of them
-//! does.
+//! Since those syscalls have requirements that cannot be fully checked in advance it attempts
+//! to use them one after another (guided by hints) to figure out which one works and
+//! falls back to the generic read-write copy loop if none of them does.
 //! Once a working syscall is found for a pair of file descriptors it will be called in a loop
 //! until the copy operation is completed.
 //!
@@ -84,14 +82,10 @@ pub(crate) fn copy_spec<R: Read + ?Sized, W: Write + ?Sized>(
 /// The methods on this type only provide hints, due to `AsRawFd` and `FromRawFd` the inferred
 /// type may be wrong.
 enum FdMeta {
-    /// We obtained the FD from a type that can contain any type of `FileType` and queried the metadata
-    /// because it is cheaper than probing all possible syscalls (reader side)
     Metadata(Metadata),
     Socket,
     Pipe,
-    /// We don't have any metadata, e.g. because the original type was `File` which can represent
-    /// any `FileType` and we did not query the metadata either since it did not seem beneficial
-    /// (writer side)
+    /// We don't have any metadata because the stat syscall failed
     NoneObtained,
 }
 
@@ -131,6 +125,39 @@ impl FdMeta {
     }
 }
 
+/// Returns true either if changes made to the source after a sendfile/splice call won't become
+/// visible in the sink or the source has explicitly opted into such behavior (e.g. by splicing
+/// a file into a pipe, the pipe being the source in this case).
+///
+/// This will prevent File -> Pipe and File -> Socket splicing/sendfile optimizations to uphold
+/// the Read/Write API semantics of io::copy.
+///
+/// Note: This is not 100% airtight, the caller can use the RawFd conversion methods to turn a
+/// regular file into a TcpSocket which will be treated as a socket here without checking.
+fn safe_kernel_copy(source: &FdMeta, sink: &FdMeta) -> bool {
+    match (source, sink) {
+        // Data arriving from a socket is safe because the sender can't modify the socket buffer.
+        // Data arriving from a pipe is safe(-ish) because either the sender *copied*
+        // the bytes into the pipe OR explicitly performed an operation that enables zero-copy,
+        // thus promising not to modify the data later.
+        (FdMeta::Socket, _) => true,
+        (FdMeta::Pipe, _) => true,
+        (FdMeta::Metadata(meta), _)
+            if meta.file_type().is_fifo() || meta.file_type().is_socket() =>
+        {
+            true
+        }
+        // Data going into non-pipes/non-sockets is safe because the "later changes may become visible" issue
+        // only happens for pages sitting in send buffers or pipes.
+        (_, FdMeta::Metadata(meta))
+            if !meta.file_type().is_fifo() && !meta.file_type().is_socket() =>
+        {
+            true
+        }
+        _ => false,
+    }
+}
+
 struct CopyParams(FdMeta, Option<RawFd>);
 
 struct Copier<'a, 'b, R: Read + ?Sized, W: Write + ?Sized> {
@@ -186,7 +213,8 @@ impl<R: CopyRead, W: CopyWrite> SpecCopy for Copier<'_, '_, R, W> {
             // So we just try and fallback if needed.
             // If current file offsets + write sizes overflow it may also fail, we do not try to fix that and instead
             // fall back to the generic copy loop.
-            if input_meta.potential_sendfile_source() {
+            if input_meta.potential_sendfile_source() && safe_kernel_copy(&input_meta, &output_meta)
+            {
                 let result = sendfile_splice(SpliceMode::Sendfile, readfd, writefd, max_write);
                 result.update_take(reader);
 
@@ -197,7 +225,9 @@ impl<R: CopyRead, W: CopyWrite> SpecCopy for Copier<'_, '_, R, W> {
                 }
             }
 
-            if input_meta.maybe_fifo() || output_meta.maybe_fifo() {
+            if (input_meta.maybe_fifo() || output_meta.maybe_fifo())
+                && safe_kernel_copy(&input_meta, &output_meta)
+            {
                 let result = sendfile_splice(SpliceMode::Splice, readfd, writefd, max_write);
                 result.update_take(reader);
 
@@ -298,13 +328,13 @@ impl CopyRead for &File {
 
 impl CopyWrite for File {
     fn properties(&self) -> CopyParams {
-        CopyParams(FdMeta::NoneObtained, Some(self.as_raw_fd()))
+        CopyParams(fd_to_meta(self), Some(self.as_raw_fd()))
     }
 }
 
 impl CopyWrite for &File {
     fn properties(&self) -> CopyParams {
-        CopyParams(FdMeta::NoneObtained, Some(self.as_raw_fd()))
+        CopyParams(fd_to_meta(*self), Some(self.as_raw_fd()))
     }
 }
 
@@ -401,13 +431,13 @@ impl CopyRead for StdinLock<'_> {
 
 impl CopyWrite for StdoutLock<'_> {
     fn properties(&self) -> CopyParams {
-        CopyParams(FdMeta::NoneObtained, Some(self.as_raw_fd()))
+        CopyParams(fd_to_meta(self), Some(self.as_raw_fd()))
     }
 }
 
 impl CopyWrite for StderrLock<'_> {
     fn properties(&self) -> CopyParams {
-        CopyParams(FdMeta::NoneObtained, Some(self.as_raw_fd()))
+        CopyParams(fd_to_meta(self), Some(self.as_raw_fd()))
     }
 }
 
diff --git a/library/std/src/sys/unix/kernel_copy/tests.rs b/library/std/src/sys/unix/kernel_copy/tests.rs
index 3fe849e23e2..a524270e3fb 100644
--- a/library/std/src/sys/unix/kernel_copy/tests.rs
+++ b/library/std/src/sys/unix/kernel_copy/tests.rs
@@ -83,6 +83,48 @@ fn copies_append_mode_sink() -> Result<()> {
     Ok(())
 }
 
+#[test]
+fn dont_splice_pipes_from_files() -> Result<()> {
+    // splicing to a pipe and then modifying the source could lead to changes
+    // becoming visible in an unexpected order.
+
+    use crate::io::SeekFrom;
+    use crate::os::unix::fs::FileExt;
+    use crate::process::{ChildStdin, ChildStdout};
+    use crate::sys_common::FromInner;
+
+    let (read_end, write_end) = crate::sys::pipe::anon_pipe()?;
+
+    let mut read_end = ChildStdout::from_inner(read_end);
+    let mut write_end = ChildStdin::from_inner(write_end);
+
+    let tmp_path = tmpdir();
+    let file = tmp_path.join("to_be_modified");
+    let mut file =
+        crate::fs::OpenOptions::new().create_new(true).read(true).write(true).open(file)?;
+
+    const SZ: usize = libc::PIPE_BUF as usize;
+
+    // put data in page cache
+    let mut buf: [u8; SZ] = [0x01; SZ];
+    file.write_all(&buf).unwrap();
+
+    // copy page into pipe
+    file.seek(SeekFrom::Start(0)).unwrap();
+    assert!(io::copy(&mut file, &mut write_end).unwrap() == SZ as u64);
+
+    // modify file
+    buf[0] = 0x02;
+    file.write_at(&buf, 0).unwrap();
+
+    // read from pipe
+    read_end.read_exact(buf.as_mut_slice()).unwrap();
+
+    assert_eq!(buf[0], 0x01, "data in pipe should reflect the original, not later modifications");
+
+    Ok(())
+}
+
 #[bench]
 fn bench_file_to_file_copy(b: &mut test::Bencher) {
     const BYTES: usize = 128 * 1024;