about summary refs log tree commit diff
path: root/library/std/src/io/mod.rs
diff options
context:
space:
mode:
authorMatthias Krüger <matthias.krueger@famsik.de>2022-08-28 09:35:11 +0200
committerGitHub <noreply@github.com>2022-08-28 09:35:11 +0200
commitb9306c231a9ffaf83d420ec09f165f58444d142e (patch)
tree94de45df1e1e09e43ddeb4bcecc929af3e21d5d4 /library/std/src/io/mod.rs
parent91f128baf7704a477ab7c499143a160fb069b3ad (diff)
parentac70aea98509c33ec75208f7b42c8d905c74ebaf (diff)
downloadrust-b9306c231a9ffaf83d420ec09f165f58444d142e.tar.gz
rust-b9306c231a9ffaf83d420ec09f165f58444d142e.zip
Rollup merge of #97015 - nrc:read-buf-cursor, r=Mark-Simulacrum
std::io: migrate ReadBuf to BorrowBuf/BorrowCursor

This PR replaces `ReadBuf` (used by the `Read::read_buf` family of methods) with `BorrowBuf` and `BorrowCursor`.

The general idea is to split `ReadBuf` because its API is large and confusing. `BorrowBuf` represents a borrowed buffer which is mostly read-only and (other than for construction) deals only with filled vs unfilled segments. a `BorrowCursor` is a mostly write-only view of the unfilled part of a `BorrowBuf` which distinguishes between initialized and uninitialized segments. For `Read::read_buf`, the caller would create a `BorrowBuf`, then pass a `BorrowCursor` to `read_buf`.

In addition to the major API split, I've made the following smaller changes:

* Removed some methods entirely from the API (mostly the functionality can be replicated with two calls rather than a single one)
* Unified naming, e.g., by replacing initialized with init and assume_init with set_init
* Added an easy way to get the number of bytes written to a cursor (`written` method)

As well as simplifying the API (IMO), this approach has the following advantages:

* Since we pass the cursor by value, we remove the 'unsoundness footgun' where a malicious `read_buf` could swap out the `ReadBuf`.
* Since `read_buf` cannot write into the filled part of the buffer, we prevent the filled part shrinking or changing which could cause underflow for the caller or unexpected behaviour.

## Outline

```rust
pub struct BorrowBuf<'a>

impl Debug for BorrowBuf<'_>

impl<'a> From<&'a mut [u8]> for BorrowBuf<'a>
impl<'a> From<&'a mut [MaybeUninit<u8>]> for BorrowBuf<'a>

impl<'a> BorrowBuf<'a> {
    pub fn capacity(&self) -> usize
    pub fn len(&self) -> usize
    pub fn init_len(&self) -> usize
    pub fn filled(&self) -> &[u8]
    pub fn unfilled<'this>(&'this mut self) -> BorrowCursor<'this, 'a>
    pub fn clear(&mut self) -> &mut Self
    pub unsafe fn set_init(&mut self, n: usize) -> &mut Self
}

pub struct BorrowCursor<'buf, 'data>

impl<'buf, 'data> BorrowCursor<'buf, 'data> {
    pub fn clone<'this>(&'this mut self) -> BorrowCursor<'this, 'data>
    pub fn capacity(&self) -> usize
    pub fn written(&self) -> usize
    pub fn init_ref(&self) -> &[u8]
    pub fn init_mut(&mut self) -> &mut [u8]
    pub fn uninit_mut(&mut self) -> &mut [MaybeUninit<u8>]
    pub unsafe fn as_mut(&mut self) -> &mut [MaybeUninit<u8>]
    pub unsafe fn advance(&mut self, n: usize) -> &mut Self
    pub fn ensure_init(&mut self) -> &mut Self
    pub unsafe fn set_init(&mut self, n: usize) -> &mut Self
    pub fn append(&mut self, buf: &[u8])
}
```

## TODO

* ~~Migrate non-unix libs and tests~~
* ~~Naming~~
  * ~~`BorrowBuf` or `BorrowedBuf` or `SliceBuf`? (We might want an owned equivalent for the async IO traits)~~
  * ~~Should we rename the `readbuf` module? We might keep the name indicate it includes both the buf and cursor variations and someday the owned version too. Or we could change it. It is not publicly exposed, so it is not that important~~.
  * ~~`read_buf` method: we read into the cursor now, so the `_buf` suffix is a bit weird.~~
* ~~Documentation~~
* Tests are incomplete (I adjusted existing tests, but did not add new ones).

cc https://github.com/rust-lang/rust/issues/78485, https://github.com/rust-lang/rust/issues/94741
supersedes: https://github.com/rust-lang/rust/pull/95770, https://github.com/rust-lang/rust/pull/93359
fixes #93305
Diffstat (limited to 'library/std/src/io/mod.rs')
-rw-r--r--library/std/src/io/mod.rs82
1 files changed, 42 insertions, 40 deletions
diff --git a/library/std/src/io/mod.rs b/library/std/src/io/mod.rs
index 96addbd1a05..8b8ec32bf5b 100644
--- a/library/std/src/io/mod.rs
+++ b/library/std/src/io/mod.rs
@@ -278,7 +278,7 @@ pub use self::{
 };
 
 #[unstable(feature = "read_buf", issue = "78485")]
-pub use self::readbuf::ReadBuf;
+pub use self::readbuf::{BorrowedBuf, BorrowedCursor};
 pub(crate) use error::const_io_error;
 
 mod buffered;
@@ -362,29 +362,30 @@ pub(crate) fn default_read_to_end<R: Read + ?Sized>(r: &mut R, buf: &mut Vec<u8>
             buf.reserve(32); // buf is full, need more space
         }
 
-        let mut read_buf = ReadBuf::uninit(buf.spare_capacity_mut());
+        let mut read_buf: BorrowedBuf<'_> = buf.spare_capacity_mut().into();
 
         // SAFETY: These bytes were initialized but not filled in the previous loop
         unsafe {
-            read_buf.assume_init(initialized);
+            read_buf.set_init(initialized);
         }
 
-        match r.read_buf(&mut read_buf) {
+        let mut cursor = read_buf.unfilled();
+        match r.read_buf(cursor.reborrow()) {
             Ok(()) => {}
             Err(e) if e.kind() == ErrorKind::Interrupted => continue,
             Err(e) => return Err(e),
         }
 
-        if read_buf.filled_len() == 0 {
+        if cursor.written() == 0 {
             return Ok(buf.len() - start_len);
         }
 
         // store how much was initialized but not filled
-        initialized = read_buf.initialized_len() - read_buf.filled_len();
-        let new_len = read_buf.filled_len() + buf.len();
+        initialized = cursor.init_ref().len();
 
-        // SAFETY: ReadBuf's invariants mean this much memory is init
+        // SAFETY: BorrowedBuf's invariants mean this much memory is initialized.
         unsafe {
+            let new_len = read_buf.filled().len() + buf.len();
             buf.set_len(new_len);
         }
 
@@ -461,12 +462,15 @@ pub(crate) fn default_read_exact<R: Read + ?Sized>(this: &mut R, mut buf: &mut [
     }
 }
 
-pub(crate) fn default_read_buf<F>(read: F, buf: &mut ReadBuf<'_>) -> Result<()>
+pub(crate) fn default_read_buf<F>(read: F, mut cursor: BorrowedCursor<'_>) -> Result<()>
 where
     F: FnOnce(&mut [u8]) -> Result<usize>,
 {
-    let n = read(buf.initialize_unfilled())?;
-    buf.add_filled(n);
+    let n = read(cursor.ensure_init().init_mut())?;
+    unsafe {
+        // SAFETY: we initialised using `ensure_init` so there is no uninit data to advance to.
+        cursor.advance(n);
+    }
     Ok(())
 }
 
@@ -803,30 +807,30 @@ pub trait Read {
 
     /// Pull some bytes from this source into the specified buffer.
     ///
-    /// This is equivalent to the [`read`](Read::read) method, except that it is passed a [`ReadBuf`] rather than `[u8]` to allow use
+    /// This is equivalent to the [`read`](Read::read) method, except that it is passed a [`BorrowedCursor`] rather than `[u8]` to allow use
     /// with uninitialized buffers. The new data will be appended to any existing contents of `buf`.
     ///
     /// The default implementation delegates to `read`.
     #[unstable(feature = "read_buf", issue = "78485")]
-    fn read_buf(&mut self, buf: &mut ReadBuf<'_>) -> Result<()> {
+    fn read_buf(&mut self, buf: BorrowedCursor<'_>) -> Result<()> {
         default_read_buf(|b| self.read(b), buf)
     }
 
-    /// Read the exact number of bytes required to fill `buf`.
+    /// Read the exact number of bytes required to fill `cursor`.
     ///
-    /// This is equivalent to the [`read_exact`](Read::read_exact) method, except that it is passed a [`ReadBuf`] rather than `[u8]` to
+    /// This is equivalent to the [`read_exact`](Read::read_exact) method, except that it is passed a [`BorrowedCursor`] rather than `[u8]` to
     /// allow use with uninitialized buffers.
     #[unstable(feature = "read_buf", issue = "78485")]
-    fn read_buf_exact(&mut self, buf: &mut ReadBuf<'_>) -> Result<()> {
-        while buf.remaining() > 0 {
-            let prev_filled = buf.filled().len();
-            match self.read_buf(buf) {
+    fn read_buf_exact(&mut self, mut cursor: BorrowedCursor<'_>) -> Result<()> {
+        while cursor.capacity() > 0 {
+            let prev_written = cursor.written();
+            match self.read_buf(cursor.reborrow()) {
                 Ok(()) => {}
                 Err(e) if e.kind() == ErrorKind::Interrupted => continue,
                 Err(e) => return Err(e),
             }
 
-            if buf.filled().len() == prev_filled {
+            if cursor.written() == prev_written {
                 return Err(Error::new(ErrorKind::UnexpectedEof, "failed to fill buffer"));
             }
         }
@@ -2582,50 +2586,48 @@ impl<T: Read> Read for Take<T> {
         Ok(n)
     }
 
-    fn read_buf(&mut self, buf: &mut ReadBuf<'_>) -> Result<()> {
+    fn read_buf(&mut self, mut buf: BorrowedCursor<'_>) -> Result<()> {
         // Don't call into inner reader at all at EOF because it may still block
         if self.limit == 0 {
             return Ok(());
         }
 
-        let prev_filled = buf.filled_len();
-
-        if self.limit <= buf.remaining() as u64 {
+        if self.limit <= buf.capacity() as u64 {
             // if we just use an as cast to convert, limit may wrap around on a 32 bit target
             let limit = cmp::min(self.limit, usize::MAX as u64) as usize;
 
-            let extra_init = cmp::min(limit as usize, buf.initialized_len() - buf.filled_len());
+            let extra_init = cmp::min(limit as usize, buf.init_ref().len());
 
             // SAFETY: no uninit data is written to ibuf
-            let ibuf = unsafe { &mut buf.unfilled_mut()[..limit] };
+            let ibuf = unsafe { &mut buf.as_mut()[..limit] };
 
-            let mut sliced_buf = ReadBuf::uninit(ibuf);
+            let mut sliced_buf: BorrowedBuf<'_> = ibuf.into();
 
             // SAFETY: extra_init bytes of ibuf are known to be initialized
             unsafe {
-                sliced_buf.assume_init(extra_init);
+                sliced_buf.set_init(extra_init);
             }
 
-            self.inner.read_buf(&mut sliced_buf)?;
+            let mut cursor = sliced_buf.unfilled();
+            self.inner.read_buf(cursor.reborrow())?;
 
-            let new_init = sliced_buf.initialized_len();
-            let filled = sliced_buf.filled_len();
+            let new_init = cursor.init_ref().len();
+            let filled = sliced_buf.len();
 
-            // sliced_buf / ibuf must drop here
+            // cursor / sliced_buf / ibuf must drop here
 
-            // SAFETY: new_init bytes of buf's unfilled buffer have been initialized
             unsafe {
-                buf.assume_init(new_init);
+                // SAFETY: filled bytes have been filled and therefore initialized
+                buf.advance(filled);
+                // SAFETY: new_init bytes of buf's unfilled buffer have been initialized
+                buf.set_init(new_init);
             }
 
-            buf.add_filled(filled);
-
             self.limit -= filled as u64;
         } else {
-            self.inner.read_buf(buf)?;
-
-            //inner may unfill
-            self.limit -= buf.filled_len().saturating_sub(prev_filled) as u64;
+            let written = buf.written();
+            self.inner.read_buf(buf.reborrow())?;
+            self.limit -= (buf.written() - written) as u64;
         }
 
         Ok(())