diff options
| author | bors <bors@rust-lang.org> | 2023-11-23 22:28:14 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2023-11-23 22:28:14 +0000 |
| commit | e68f935117fef05e7663605a5a6671a6bb4ce719 (patch) | |
| tree | 4975abe955374ee74c729da510d5c9ce50e0abcf | |
| parent | a1a37735cbc3db359d0b24ba9085c9fcbe1bc274 (diff) | |
| parent | 7c1ab71f71087d88aade79925ae68a447397422f (diff) | |
| download | rust-e68f935117fef05e7663605a5a6671a6bb4ce719.tar.gz rust-e68f935117fef05e7663605a5a6671a6bb4ce719.zip | |
Auto merge of #98943 - WilliamVenner:feat/bufread_skip_until, r=dtolnay
Add `BufRead::skip_until`
Alternative version of `BufRead::read_until` that simply discards data, rather than copying it into a buffer.
Useful for situations like skipping irrelevant data in a binary file format that is NUL-terminated.
<details>
<summary>Benchmark</summary>
```
running 2 tests
test bench_read_until ... bench: 123 ns/iter (+/- 6)
test bench_skip_until ... bench: 66 ns/iter (+/- 3)
```
```rs
#![feature(test)]
extern crate test;
use test::Bencher;
use std::io::{ErrorKind, BufRead};
fn skip_until<R: BufRead + ?Sized>(r: &mut R, delim: u8) -> Result<usize, std::io::Error> {
let mut read = 0;
loop {
let (done, used) = {
let available = match r.fill_buf() {
Ok(n) => n,
Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
Err(e) => return Err(e),
};
match memchr::memchr(delim, available) {
Some(i) => (true, i + 1),
None => (false, available.len()),
}
};
r.consume(used);
read += used;
if done || used == 0 {
return Ok(read);
}
}
}
const STR: &[u8] = b"Ferris\0Hello, world!\0";
#[bench]
fn bench_skip_until(b: &mut Bencher) {
b.iter(|| {
let mut io = std::io::Cursor::new(test::black_box(STR));
skip_until(&mut io, b'\0').unwrap();
let mut hello = Vec::with_capacity(b"Hello, world!\0".len());
let num_bytes = io.read_until(b'\0', &mut hello).unwrap();
assert_eq!(num_bytes, b"Hello, world!\0".len());
assert_eq!(hello, b"Hello, world!\0");
});
}
#[bench]
fn bench_read_until(b: &mut Bencher) {
b.iter(|| {
let mut io = std::io::Cursor::new(test::black_box(STR));
io.read_until(b'\0', &mut Vec::new()).unwrap();
let mut hello = Vec::with_capacity(b"Hello, world!\0".len());
let num_bytes = io.read_until(b'\0', &mut hello).unwrap();
assert_eq!(num_bytes, b"Hello, world!\0".len());
assert_eq!(hello, b"Hello, world!\0");
});
}
```
</details>
| -rw-r--r-- | library/std/src/io/mod.rs | 84 | ||||
| -rw-r--r-- | library/std/src/io/tests.rs | 30 |
2 files changed, 114 insertions, 0 deletions
diff --git a/library/std/src/io/mod.rs b/library/std/src/io/mod.rs index ff5b4e76d44..260200115c8 100644 --- a/library/std/src/io/mod.rs +++ b/library/std/src/io/mod.rs @@ -2044,6 +2044,28 @@ fn read_until<R: BufRead + ?Sized>(r: &mut R, delim: u8, buf: &mut Vec<u8>) -> R } } +fn skip_until<R: BufRead + ?Sized>(r: &mut R, delim: u8) -> Result<usize> { + let mut read = 0; + loop { + let (done, used) = { + let available = match r.fill_buf() { + Ok(n) => n, + Err(ref e) if e.kind() == ErrorKind::Interrupted => continue, + Err(e) => return Err(e), + }; + match memchr::memchr(delim, available) { + Some(i) => (true, i + 1), + None => (false, available.len()), + } + }; + r.consume(used); + read += used; + if done || used == 0 { + return Ok(read); + } + } +} + /// A `BufRead` is a type of `Read`er which has an internal buffer, allowing it /// to perform extra ways of reading. /// @@ -2247,6 +2269,68 @@ pub trait BufRead: Read { read_until(self, byte, buf) } + /// Skip all bytes until the delimiter `byte` or EOF is reached. + /// + /// This function will read (and discard) bytes from the underlying stream until the + /// delimiter or EOF is found. + /// + /// If successful, this function will return the total number of bytes read, + /// including the delimiter byte. + /// + /// This is useful for efficiently skipping data such as NUL-terminated strings + /// in binary file formats without buffering. + /// + /// This function is blocking and should be used carefully: it is possible for + /// an attacker to continuously send bytes without ever sending the delimiter + /// or EOF. + /// + /// # Errors + /// + /// This function will ignore all instances of [`ErrorKind::Interrupted`] and + /// will otherwise return any errors returned by [`fill_buf`]. + /// + /// If an I/O error is encountered then all bytes read so far will be + /// present in `buf` and its length will have been adjusted appropriately. + /// + /// [`fill_buf`]: BufRead::fill_buf + /// + /// # Examples + /// + /// [`std::io::Cursor`][`Cursor`] is a type that implements `BufRead`. In + /// this example, we use [`Cursor`] to read some NUL-terminated information + /// about Ferris from a binary string, skipping the fun fact: + /// + /// ``` + /// #![feature(bufread_skip_until)] + /// + /// use std::io::{self, BufRead}; + /// + /// let mut cursor = io::Cursor::new(b"Ferris\0Likes long walks on the beach\0Crustacean\0"); + /// + /// // read name + /// let mut name = Vec::new(); + /// let num_bytes = cursor.read_until(b'\0', &mut name) + /// .expect("reading from cursor won't fail"); + /// assert_eq!(num_bytes, 7); + /// assert_eq!(name, b"Ferris\0"); + /// + /// // skip fun fact + /// let num_bytes = cursor.skip_until(b'\0') + /// .expect("reading from cursor won't fail"); + /// assert_eq!(num_bytes, 30); + /// + /// // read animal type + /// let mut animal = Vec::new(); + /// let num_bytes = cursor.read_until(b'\0', &mut animal) + /// .expect("reading from cursor won't fail"); + /// assert_eq!(num_bytes, 11); + /// assert_eq!(animal, b"Crustacean\0"); + /// ``` + #[unstable(feature = "bufread_skip_until", issue = "111735")] + fn skip_until(&mut self, byte: u8) -> Result<usize> { + skip_until(self, byte) + } + /// Read all bytes until a newline (the `0xA` byte) is reached, and append /// them to the provided `String` buffer. /// diff --git a/library/std/src/io/tests.rs b/library/std/src/io/tests.rs index 6d30f5e6c6c..bda5b721adc 100644 --- a/library/std/src/io/tests.rs +++ b/library/std/src/io/tests.rs @@ -26,6 +26,36 @@ fn read_until() { } #[test] +fn skip_until() { + let bytes: &[u8] = b"read\0ignore\0read\0ignore\0read\0ignore\0"; + let mut reader = BufReader::new(bytes); + + // read from the bytes, alternating between + // consuming `read\0`s and skipping `ignore\0`s + loop { + // consume `read\0` + let mut out = Vec::new(); + let read = reader.read_until(0, &mut out).unwrap(); + if read == 0 { + // eof + break; + } else { + assert_eq!(out, b"read\0"); + assert_eq!(read, b"read\0".len()); + } + + // skip past `ignore\0` + let skipped = reader.skip_until(0).unwrap(); + assert_eq!(skipped, b"ignore\0".len()); + } + + // ensure we are at the end of the byte slice and that we can skip no further + // also ensure skip_until matches the behavior of read_until at EOF + let skipped = reader.skip_until(0).unwrap(); + assert_eq!(skipped, 0); +} + +#[test] fn split() { let buf = Cursor::new(&b"12"[..]); let mut s = buf.split(b'3'); |
