about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMara <m-ou.se@m-ou.se>2021-03-05 10:57:17 +0100
committerGitHub <noreply@github.com>2021-03-05 10:57:17 +0100
commit60138110d7b00c8c788526cad525942e732db6ca (patch)
treebf0ee3d1f4fe6994b62a6298b5cbed5f7c04caad
parentec2619ca6219a74f3c97ac7c32aeecd79368788f (diff)
parent7674ae1a4e10ab6bafbda57f9fc2175fdf012f5e (diff)
downloadrust-60138110d7b00c8c788526cad525942e732db6ca.tar.gz
rust-60138110d7b00c8c788526cad525942e732db6ca.zip
Rollup merge of #81136 - Xavientois:io_reader_size_hint, r=cramertj
Improved IO Bytes Size Hint

After trying to implement better `size_hint()` return values for `File` in [this PR](https://github.com/rust-lang/rust/pull/81044) and changing to implementing it for `BufReader` in [this PR](https://github.com/rust-lang/rust/pull/81052), I have arrived at this implementation that provides tighter bounds for the `Bytes` iterator of various readers including `BufReader`, `Empty`, and `Chain`.

Unfortunately, for `BufReader`, the size_hint only improves after calling `fill_buffer` due to it using the contents of the buffer for the hint. Nevertheless, the the tighter bounds  should result in better pre-allocation of space to handle the contents of the `Bytes` iterator.

Closes #81052
-rw-r--r--library/std/src/io/buffered/bufreader.rs10
-rw-r--r--library/std/src/io/mod.rs37
-rw-r--r--library/std/src/io/tests.rs49
-rw-r--r--library/std/src/io/util.rs10
4 files changed, 103 insertions, 3 deletions
diff --git a/library/std/src/io/buffered/bufreader.rs b/library/std/src/io/buffered/bufreader.rs
index 987371f50ec..839c64ee5c4 100644
--- a/library/std/src/io/buffered/bufreader.rs
+++ b/library/std/src/io/buffered/bufreader.rs
@@ -1,6 +1,8 @@
 use crate::cmp;
 use crate::fmt;
-use crate::io::{self, BufRead, Initializer, IoSliceMut, Read, Seek, SeekFrom, DEFAULT_BUF_SIZE};
+use crate::io::{
+    self, BufRead, Initializer, IoSliceMut, Read, Seek, SeekFrom, SizeHint, DEFAULT_BUF_SIZE,
+};
 
 /// The `BufReader<R>` struct adds buffering to any reader.
 ///
@@ -435,3 +437,9 @@ impl<R: Seek> Seek for BufReader<R> {
         })
     }
 }
+
+impl<T> SizeHint for BufReader<T> {
+    fn lower_bound(&self) -> usize {
+        self.buffer().len()
+    }
+}
diff --git a/library/std/src/io/mod.rs b/library/std/src/io/mod.rs
index 22914987405..17002e3b860 100644
--- a/library/std/src/io/mod.rs
+++ b/library/std/src/io/mod.rs
@@ -2238,6 +2238,19 @@ impl<T: BufRead, U: BufRead> BufRead for Chain<T, U> {
     }
 }
 
+impl<T, U> SizeHint for Chain<T, U> {
+    fn lower_bound(&self) -> usize {
+        SizeHint::lower_bound(&self.first) + SizeHint::lower_bound(&self.second)
+    }
+
+    fn upper_bound(&self) -> Option<usize> {
+        match (SizeHint::upper_bound(&self.first), SizeHint::upper_bound(&self.second)) {
+            (Some(first), Some(second)) => Some(first + second),
+            _ => None,
+        }
+    }
+}
+
 /// Reader adaptor which limits the bytes read from an underlying reader.
 ///
 /// This struct is generally created by calling [`take`] on a reader.
@@ -2464,6 +2477,30 @@ impl<R: Read> Iterator for Bytes<R> {
             };
         }
     }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        SizeHint::size_hint(&self.inner)
+    }
+}
+
+trait SizeHint {
+    fn lower_bound(&self) -> usize;
+
+    fn upper_bound(&self) -> Option<usize>;
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        (self.lower_bound(), self.upper_bound())
+    }
+}
+
+impl<T> SizeHint for T {
+    default fn lower_bound(&self) -> usize {
+        0
+    }
+
+    default fn upper_bound(&self) -> Option<usize> {
+        None
+    }
 }
 
 /// An iterator over the contents of an instance of `BufRead` split on a
diff --git a/library/std/src/io/tests.rs b/library/std/src/io/tests.rs
index f176c2f088c..a85dd0d9827 100644
--- a/library/std/src/io/tests.rs
+++ b/library/std/src/io/tests.rs
@@ -1,7 +1,7 @@
 use super::{repeat, Cursor, SeekFrom};
 use crate::cmp::{self, min};
 use crate::io::{self, IoSlice, IoSliceMut};
-use crate::io::{BufRead, Read, Seek, Write};
+use crate::io::{BufRead, BufReader, Read, Seek, Write};
 use crate::ops::Deref;
 
 #[test]
@@ -199,6 +199,53 @@ fn chain_bufread() {
 }
 
 #[test]
+fn bufreader_size_hint() {
+    let testdata = b"ABCDEFGHIJKL";
+    let mut buf_reader = BufReader::new(&testdata[..]);
+    assert_eq!(buf_reader.buffer().len(), 0);
+
+    let buffer_length = testdata.len();
+    buf_reader.fill_buf().unwrap();
+
+    // Check that size hint matches buffer contents
+    let mut buffered_bytes = buf_reader.bytes();
+    let (lower_bound, _upper_bound) = buffered_bytes.size_hint();
+    assert_eq!(lower_bound, buffer_length);
+
+    // Check that size hint matches buffer contents after advancing
+    buffered_bytes.next().unwrap().unwrap();
+    let (lower_bound, _upper_bound) = buffered_bytes.size_hint();
+    assert_eq!(lower_bound, buffer_length - 1);
+}
+
+#[test]
+fn empty_size_hint() {
+    let size_hint = io::empty().bytes().size_hint();
+    assert_eq!(size_hint, (0, Some(0)));
+}
+
+#[test]
+fn chain_empty_size_hint() {
+    let chain = io::empty().chain(io::empty());
+    let size_hint = chain.bytes().size_hint();
+    assert_eq!(size_hint, (0, Some(0)));
+}
+
+#[test]
+fn chain_size_hint() {
+    let testdata = b"ABCDEFGHIJKL";
+    let mut buf_reader_1 = BufReader::new(&testdata[..6]);
+    let mut buf_reader_2 = BufReader::new(&testdata[6..]);
+
+    buf_reader_1.fill_buf().unwrap();
+    buf_reader_2.fill_buf().unwrap();
+
+    let chain = buf_reader_1.chain(buf_reader_2);
+    let size_hint = chain.bytes().size_hint();
+    assert_eq!(size_hint, (testdata.len(), None));
+}
+
+#[test]
 fn chain_zero_length_read_is_not_eof() {
     let a = b"A";
     let b = b"B";
diff --git a/library/std/src/io/util.rs b/library/std/src/io/util.rs
index e43ce4cdb4b..f472361f916 100644
--- a/library/std/src/io/util.rs
+++ b/library/std/src/io/util.rs
@@ -4,7 +4,9 @@
 mod tests;
 
 use crate::fmt;
-use crate::io::{self, BufRead, Initializer, IoSlice, IoSliceMut, Read, Seek, SeekFrom, Write};
+use crate::io::{
+    self, BufRead, Initializer, IoSlice, IoSliceMut, Read, Seek, SeekFrom, SizeHint, Write,
+};
 
 /// A reader which is always at EOF.
 ///
@@ -80,6 +82,12 @@ impl fmt::Debug for Empty {
     }
 }
 
+impl SizeHint for Empty {
+    fn upper_bound(&self) -> Option<usize> {
+        Some(0)
+    }
+}
+
 /// A reader which yields one byte over and over and over and over and over and...
 ///
 /// This struct is generally created by calling [`repeat()`]. Please