about summary refs log tree commit diff
diff options
context:
space:
mode:
authorThe8472 <git@infinite-source.de>2020-11-01 19:59:24 +0100
committerThe8472 <git@infinite-source.de>2021-01-31 14:58:03 +0100
commit410550665601a8abe8935f7b55d5732fe4c4224f (patch)
tree20f3e1a0e45fdd069ccfda668b2541306aac02a1
parent0e63af5da3400ace48a0345117980473fd21ad73 (diff)
downloadrust-410550665601a8abe8935f7b55d5732fe4c4224f.tar.gz
rust-410550665601a8abe8935f7b55d5732fe4c4224f.zip
specialize io::copy to use the memory of the writer if it is a BufWriter
-rw-r--r--library/std/src/io/buffered/bufwriter.rs14
-rw-r--r--library/std/src/io/copy.rs80
-rw-r--r--library/std/src/io/util/tests.rs50
-rw-r--r--library/std/src/lib.rs2
4 files changed, 138 insertions, 8 deletions
diff --git a/library/std/src/io/buffered/bufwriter.rs b/library/std/src/io/buffered/bufwriter.rs
index fa7bd0d72d1..65bc2fcf00a 100644
--- a/library/std/src/io/buffered/bufwriter.rs
+++ b/library/std/src/io/buffered/bufwriter.rs
@@ -117,7 +117,7 @@ impl<W: Write> BufWriter<W> {
     /// "successfully written" (by returning nonzero success values from
     /// `write`), any 0-length writes from `inner` must be reported as i/o
     /// errors from this method.
-    pub(super) fn flush_buf(&mut self) -> io::Result<()> {
+    pub(in crate::io) fn flush_buf(&mut self) -> io::Result<()> {
         /// Helper struct to ensure the buffer is updated after all the writes
         /// are complete. It tracks the number of written bytes and drains them
         /// all from the front of the buffer when dropped.
@@ -243,6 +243,18 @@ impl<W: Write> BufWriter<W> {
         &self.buf
     }
 
+    /// Returns a mutable reference to the internal buffer.
+    ///
+    /// This can be used to write data directly into the buffer without triggering writers
+    /// to the underlying writer.
+    ///
+    /// That the buffer is a `Vec` is an implementation detail.
+    /// Callers should not modify the capacity as there currently is no public API to do so
+    /// and thus any capacity changes would be unexpected by the user.
+    pub(in crate::io) fn buffer_mut(&mut self) -> &mut Vec<u8> {
+        &mut self.buf
+    }
+
     /// Returns the number of bytes the internal buffer can hold without flushing.
     ///
     /// # Examples
diff --git a/library/std/src/io/copy.rs b/library/std/src/io/copy.rs
index b88bca2f2b4..3780f2044cb 100644
--- a/library/std/src/io/copy.rs
+++ b/library/std/src/io/copy.rs
@@ -1,4 +1,4 @@
-use crate::io::{self, ErrorKind, Read, Write};
+use super::{BufWriter, ErrorKind, Read, Result, Write, DEFAULT_BUF_SIZE};
 use crate::mem::MaybeUninit;
 
 /// Copies the entire contents of a reader into a writer.
@@ -40,7 +40,7 @@ use crate::mem::MaybeUninit;
 /// }
 /// ```
 #[stable(feature = "rust1", since = "1.0.0")]
-pub fn copy<R: ?Sized, W: ?Sized>(reader: &mut R, writer: &mut W) -> io::Result<u64>
+pub fn copy<R: ?Sized, W: ?Sized>(reader: &mut R, writer: &mut W) -> Result<u64>
 where
     R: Read,
     W: Write,
@@ -54,14 +54,82 @@ where
     }
 }
 
-/// The general read-write-loop implementation of
-/// `io::copy` that is used when specializations are not available or not applicable.
-pub(crate) fn generic_copy<R: ?Sized, W: ?Sized>(reader: &mut R, writer: &mut W) -> io::Result<u64>
+/// The userspace read-write-loop implementation of `io::copy` that is used when
+/// OS-specific specializations for copy offloading are not available or not applicable.
+pub(crate) fn generic_copy<R: ?Sized, W: ?Sized>(reader: &mut R, writer: &mut W) -> Result<u64>
 where
     R: Read,
     W: Write,
 {
-    let mut buf = MaybeUninit::<[u8; super::DEFAULT_BUF_SIZE]>::uninit();
+    BufferedCopySpec::copy_to(reader, writer)
+}
+
+/// Specialization of the read-write loop that either uses a stack buffer
+/// or reuses the internal buffer of a BufWriter
+trait BufferedCopySpec: Write {
+    fn copy_to<R: Read + ?Sized>(reader: &mut R, writer: &mut Self) -> Result<u64>;
+}
+
+impl<W: Write + ?Sized> BufferedCopySpec for W {
+    default fn copy_to<R: Read + ?Sized>(reader: &mut R, writer: &mut Self) -> Result<u64> {
+        stack_buffer_copy(reader, writer)
+    }
+}
+
+impl<I: Write> BufferedCopySpec for BufWriter<I> {
+    fn copy_to<R: Read + ?Sized>(reader: &mut R, writer: &mut Self) -> Result<u64> {
+        if writer.capacity() < DEFAULT_BUF_SIZE {
+            return stack_buffer_copy(reader, writer);
+        }
+
+        // FIXME: #42788
+        //
+        //   - This creates a (mut) reference to a slice of
+        //     _uninitialized_ integers, which is **undefined behavior**
+        //
+        //   - Only the standard library gets to soundly "ignore" this,
+        //     based on its privileged knowledge of unstable rustc
+        //     internals;
+        unsafe {
+            let spare_cap = writer.buffer_mut().spare_capacity_mut();
+            reader.initializer().initialize(MaybeUninit::slice_assume_init_mut(spare_cap));
+        }
+
+        let mut len = 0;
+
+        loop {
+            let buf = writer.buffer_mut();
+            let spare_cap = buf.spare_capacity_mut();
+
+            if spare_cap.len() >= DEFAULT_BUF_SIZE {
+                match reader.read(unsafe { MaybeUninit::slice_assume_init_mut(spare_cap) }) {
+                    Ok(0) => return Ok(len), // EOF reached
+                    Ok(bytes_read) => {
+                        assert!(bytes_read <= spare_cap.len());
+                        // Safety: The initializer contract guarantees that either it or `read`
+                        // will have initialized these bytes. And we just checked that the number
+                        // of bytes is within the buffer capacity.
+                        unsafe { buf.set_len(buf.len() + bytes_read) };
+                        len += bytes_read as u64;
+                        // Read again if the buffer still has enough capacity, as BufWriter itself would do
+                        // This will occur if the reader returns short reads
+                        continue;
+                    }
+                    Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
+                    Err(e) => return Err(e),
+                }
+            }
+
+            writer.flush_buf()?;
+        }
+    }
+}
+
+fn stack_buffer_copy<R: Read + ?Sized, W: Write + ?Sized>(
+    reader: &mut R,
+    writer: &mut W,
+) -> Result<u64> {
+    let mut buf = MaybeUninit::<[u8; DEFAULT_BUF_SIZE]>::uninit();
     // FIXME: #42788
     //
     //   - This creates a (mut) reference to a slice of
diff --git a/library/std/src/io/util/tests.rs b/library/std/src/io/util/tests.rs
index df34e27d136..7632eaf872a 100644
--- a/library/std/src/io/util/tests.rs
+++ b/library/std/src/io/util/tests.rs
@@ -1,5 +1,8 @@
+use crate::cmp::{max, min};
 use crate::io::prelude::*;
-use crate::io::{copy, empty, repeat, sink, Empty, Repeat, SeekFrom, Sink};
+use crate::io::{
+    copy, empty, repeat, sink, BufWriter, Empty, Repeat, Result, SeekFrom, Sink, DEFAULT_BUF_SIZE,
+};
 
 #[test]
 fn copy_copies() {
@@ -11,6 +14,51 @@ fn copy_copies() {
     assert_eq!(copy(&mut r as &mut dyn Read, &mut w as &mut dyn Write).unwrap(), 1 << 17);
 }
 
+struct ShortReader {
+    cap: usize,
+    read_size: usize,
+    observed_buffer: usize,
+}
+
+impl Read for ShortReader {
+    fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
+        let bytes = min(self.cap, self.read_size);
+        self.cap -= bytes;
+        self.observed_buffer = max(self.observed_buffer, buf.len());
+        Ok(bytes)
+    }
+}
+
+struct WriteObserver {
+    observed_buffer: usize,
+}
+
+impl Write for WriteObserver {
+    fn write(&mut self, buf: &[u8]) -> Result<usize> {
+        self.observed_buffer = max(self.observed_buffer, buf.len());
+        Ok(buf.len())
+    }
+
+    fn flush(&mut self) -> Result<()> {
+        Ok(())
+    }
+}
+
+#[test]
+fn copy_specializes_bufwriter() {
+    let cap = 117 * 1024;
+    let buf_sz = 16 * 1024;
+    let mut r = ShortReader { cap, observed_buffer: 0, read_size: 1337 };
+    let mut w = BufWriter::with_capacity(buf_sz, WriteObserver { observed_buffer: 0 });
+    assert_eq!(
+        copy(&mut r, &mut w).unwrap(),
+        cap as u64,
+        "expected the whole capacity to be copied"
+    );
+    assert_eq!(r.observed_buffer, buf_sz, "expected a large buffer to be provided to the reader");
+    assert!(w.get_mut().observed_buffer > DEFAULT_BUF_SIZE, "expected coalesced writes");
+}
+
 #[test]
 fn sink_sinks() {
     let mut s = sink();
diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs
index c1b79ff716c..f16fb3d04db 100644
--- a/library/std/src/lib.rs
+++ b/library/std/src/lib.rs
@@ -283,6 +283,7 @@
 #![feature(maybe_uninit_extra)]
 #![feature(maybe_uninit_ref)]
 #![feature(maybe_uninit_slice)]
+#![feature(maybe_uninit_uninit_array)]
 #![feature(min_specialization)]
 #![feature(needs_panic_runtime)]
 #![feature(negative_impls)]
@@ -326,6 +327,7 @@
 #![feature(unsafe_cell_raw_get)]
 #![feature(unwind_attributes)]
 #![feature(vec_into_raw_parts)]
+#![feature(vec_spare_capacity)]
 #![feature(wake_trait)]
 // NB: the above list is sorted to minimize merge conflicts.
 #![default_lib_allocator]