diff options
| author | Mara <m-ou.se@m-ou.se> | 2021-03-05 10:57:17 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-03-05 10:57:17 +0100 |
| commit | 60138110d7b00c8c788526cad525942e732db6ca (patch) | |
| tree | bf0ee3d1f4fe6994b62a6298b5cbed5f7c04caad | |
| parent | ec2619ca6219a74f3c97ac7c32aeecd79368788f (diff) | |
| parent | 7674ae1a4e10ab6bafbda57f9fc2175fdf012f5e (diff) | |
| download | rust-60138110d7b00c8c788526cad525942e732db6ca.tar.gz rust-60138110d7b00c8c788526cad525942e732db6ca.zip | |
Rollup merge of #81136 - Xavientois:io_reader_size_hint, r=cramertj
Improved IO Bytes Size Hint After trying to implement better `size_hint()` return values for `File` in [this PR](https://github.com/rust-lang/rust/pull/81044) and changing to implementing it for `BufReader` in [this PR](https://github.com/rust-lang/rust/pull/81052), I have arrived at this implementation that provides tighter bounds for the `Bytes` iterator of various readers including `BufReader`, `Empty`, and `Chain`. Unfortunately, for `BufReader`, the size_hint only improves after calling `fill_buffer` due to it using the contents of the buffer for the hint. Nevertheless, the the tighter bounds should result in better pre-allocation of space to handle the contents of the `Bytes` iterator. Closes #81052
| -rw-r--r-- | library/std/src/io/buffered/bufreader.rs | 10 | ||||
| -rw-r--r-- | library/std/src/io/mod.rs | 37 | ||||
| -rw-r--r-- | library/std/src/io/tests.rs | 49 | ||||
| -rw-r--r-- | library/std/src/io/util.rs | 10 |
4 files changed, 103 insertions, 3 deletions
diff --git a/library/std/src/io/buffered/bufreader.rs b/library/std/src/io/buffered/bufreader.rs index 987371f50ec..839c64ee5c4 100644 --- a/library/std/src/io/buffered/bufreader.rs +++ b/library/std/src/io/buffered/bufreader.rs @@ -1,6 +1,8 @@ use crate::cmp; use crate::fmt; -use crate::io::{self, BufRead, Initializer, IoSliceMut, Read, Seek, SeekFrom, DEFAULT_BUF_SIZE}; +use crate::io::{ + self, BufRead, Initializer, IoSliceMut, Read, Seek, SeekFrom, SizeHint, DEFAULT_BUF_SIZE, +}; /// The `BufReader<R>` struct adds buffering to any reader. /// @@ -435,3 +437,9 @@ impl<R: Seek> Seek for BufReader<R> { }) } } + +impl<T> SizeHint for BufReader<T> { + fn lower_bound(&self) -> usize { + self.buffer().len() + } +} diff --git a/library/std/src/io/mod.rs b/library/std/src/io/mod.rs index 22914987405..17002e3b860 100644 --- a/library/std/src/io/mod.rs +++ b/library/std/src/io/mod.rs @@ -2238,6 +2238,19 @@ impl<T: BufRead, U: BufRead> BufRead for Chain<T, U> { } } +impl<T, U> SizeHint for Chain<T, U> { + fn lower_bound(&self) -> usize { + SizeHint::lower_bound(&self.first) + SizeHint::lower_bound(&self.second) + } + + fn upper_bound(&self) -> Option<usize> { + match (SizeHint::upper_bound(&self.first), SizeHint::upper_bound(&self.second)) { + (Some(first), Some(second)) => Some(first + second), + _ => None, + } + } +} + /// Reader adaptor which limits the bytes read from an underlying reader. /// /// This struct is generally created by calling [`take`] on a reader. @@ -2464,6 +2477,30 @@ impl<R: Read> Iterator for Bytes<R> { }; } } + + fn size_hint(&self) -> (usize, Option<usize>) { + SizeHint::size_hint(&self.inner) + } +} + +trait SizeHint { + fn lower_bound(&self) -> usize; + + fn upper_bound(&self) -> Option<usize>; + + fn size_hint(&self) -> (usize, Option<usize>) { + (self.lower_bound(), self.upper_bound()) + } +} + +impl<T> SizeHint for T { + default fn lower_bound(&self) -> usize { + 0 + } + + default fn upper_bound(&self) -> Option<usize> { + None + } } /// An iterator over the contents of an instance of `BufRead` split on a diff --git a/library/std/src/io/tests.rs b/library/std/src/io/tests.rs index f176c2f088c..a85dd0d9827 100644 --- a/library/std/src/io/tests.rs +++ b/library/std/src/io/tests.rs @@ -1,7 +1,7 @@ use super::{repeat, Cursor, SeekFrom}; use crate::cmp::{self, min}; use crate::io::{self, IoSlice, IoSliceMut}; -use crate::io::{BufRead, Read, Seek, Write}; +use crate::io::{BufRead, BufReader, Read, Seek, Write}; use crate::ops::Deref; #[test] @@ -199,6 +199,53 @@ fn chain_bufread() { } #[test] +fn bufreader_size_hint() { + let testdata = b"ABCDEFGHIJKL"; + let mut buf_reader = BufReader::new(&testdata[..]); + assert_eq!(buf_reader.buffer().len(), 0); + + let buffer_length = testdata.len(); + buf_reader.fill_buf().unwrap(); + + // Check that size hint matches buffer contents + let mut buffered_bytes = buf_reader.bytes(); + let (lower_bound, _upper_bound) = buffered_bytes.size_hint(); + assert_eq!(lower_bound, buffer_length); + + // Check that size hint matches buffer contents after advancing + buffered_bytes.next().unwrap().unwrap(); + let (lower_bound, _upper_bound) = buffered_bytes.size_hint(); + assert_eq!(lower_bound, buffer_length - 1); +} + +#[test] +fn empty_size_hint() { + let size_hint = io::empty().bytes().size_hint(); + assert_eq!(size_hint, (0, Some(0))); +} + +#[test] +fn chain_empty_size_hint() { + let chain = io::empty().chain(io::empty()); + let size_hint = chain.bytes().size_hint(); + assert_eq!(size_hint, (0, Some(0))); +} + +#[test] +fn chain_size_hint() { + let testdata = b"ABCDEFGHIJKL"; + let mut buf_reader_1 = BufReader::new(&testdata[..6]); + let mut buf_reader_2 = BufReader::new(&testdata[6..]); + + buf_reader_1.fill_buf().unwrap(); + buf_reader_2.fill_buf().unwrap(); + + let chain = buf_reader_1.chain(buf_reader_2); + let size_hint = chain.bytes().size_hint(); + assert_eq!(size_hint, (testdata.len(), None)); +} + +#[test] fn chain_zero_length_read_is_not_eof() { let a = b"A"; let b = b"B"; diff --git a/library/std/src/io/util.rs b/library/std/src/io/util.rs index e43ce4cdb4b..f472361f916 100644 --- a/library/std/src/io/util.rs +++ b/library/std/src/io/util.rs @@ -4,7 +4,9 @@ mod tests; use crate::fmt; -use crate::io::{self, BufRead, Initializer, IoSlice, IoSliceMut, Read, Seek, SeekFrom, Write}; +use crate::io::{ + self, BufRead, Initializer, IoSlice, IoSliceMut, Read, Seek, SeekFrom, SizeHint, Write, +}; /// A reader which is always at EOF. /// @@ -80,6 +82,12 @@ impl fmt::Debug for Empty { } } +impl SizeHint for Empty { + fn upper_bound(&self) -> Option<usize> { + Some(0) + } +} + /// A reader which yields one byte over and over and over and over and over and... /// /// This struct is generally created by calling [`repeat()`]. Please |
