about summary refs log tree commit diff
path: root/src/libstd
diff options
context:
space:
mode:
authorSteven Fackler <sfackler@gmail.com>2015-03-28 18:47:29 -0700
committerSteven Fackler <sfackler@gmail.com>2015-03-28 22:32:08 -0700
commitccb4e8423e50fef0f13a642715c3e617ee9f78fe (patch)
tree41b8423da7c42def2bffba2fc14402d8f4df3afe /src/libstd
parent3e7385aae9d58c8e12137d7c07aad48551048c13 (diff)
downloadrust-ccb4e8423e50fef0f13a642715c3e617ee9f78fe.tar.gz
rust-ccb4e8423e50fef0f13a642715c3e617ee9f78fe.zip
Fix massive performance issue in read_to_end
with_end_to_cap is enormously expensive now that it's initializing
memory since it involves 64k allocation + memset on every call. This is
most noticable when calling read_to_end on very small readers, where the
new version if **4 orders of magnitude** faster.

BufReader also depended on with_end_to_cap so I've rewritten it in its
original form.

As a bonus, converted the buffered IO struct Debug impls to use the
debug builders.

Fixes #23815
Diffstat (limited to 'src/libstd')
-rw-r--r--src/libstd/io/buffered.rs71
-rw-r--r--src/libstd/io/mod.rs64
-rw-r--r--src/libstd/lib.rs1
3 files changed, 77 insertions, 59 deletions
diff --git a/src/libstd/io/buffered.rs b/src/libstd/io/buffered.rs
index 998f37e6a68..98581fc43f8 100644
--- a/src/libstd/io/buffered.rs
+++ b/src/libstd/io/buffered.rs
@@ -18,8 +18,9 @@ use io::prelude::*;
 use cmp;
 use error::{self, FromError};
 use fmt;
-use io::{self, Cursor, DEFAULT_BUF_SIZE, Error, ErrorKind};
+use io::{self, DEFAULT_BUF_SIZE, Error, ErrorKind};
 use ptr;
+use iter;
 
 /// Wraps a `Read` and buffers input from it
 ///
@@ -30,7 +31,9 @@ use ptr;
 #[stable(feature = "rust1", since = "1.0.0")]
 pub struct BufReader<R> {
     inner: R,
-    buf: Cursor<Vec<u8>>,
+    buf: Vec<u8>,
+    pos: usize,
+    cap: usize,
 }
 
 impl<R: Read> BufReader<R> {
@@ -43,9 +46,13 @@ impl<R: Read> BufReader<R> {
     /// Creates a new `BufReader` with the specified buffer capacity
     #[stable(feature = "rust1", since = "1.0.0")]
     pub fn with_capacity(cap: usize, inner: R) -> BufReader<R> {
+        let mut buf = Vec::with_capacity(cap);
+        buf.extend(iter::repeat(0).take(cap));
         BufReader {
             inner: inner,
-            buf: Cursor::new(Vec::with_capacity(cap)),
+            buf: buf,
+            pos: 0,
+            cap: 0,
         }
     }
 
@@ -74,12 +81,15 @@ impl<R: Read> Read for BufReader<R> {
         // If we don't have any buffered data and we're doing a massive read
         // (larger than our internal buffer), bypass our internal buffer
         // entirely.
-        if self.buf.get_ref().len() == self.buf.position() as usize &&
-            buf.len() >= self.buf.get_ref().capacity() {
+        if self.pos == self.cap && buf.len() >= self.buf.len() {
             return self.inner.read(buf);
         }
-        try!(self.fill_buf());
-        self.buf.read(buf)
+        let nread = {
+            let mut rem = try!(self.fill_buf());
+            try!(rem.read(buf))
+        };
+        self.consume(nread);
+        Ok(nread)
     }
 }
 
@@ -88,26 +98,25 @@ impl<R: Read> BufRead for BufReader<R> {
     fn fill_buf(&mut self) -> io::Result<&[u8]> {
         // If we've reached the end of our internal buffer then we need to fetch
         // some more data from the underlying reader.
-        if self.buf.position() as usize == self.buf.get_ref().len() {
-            self.buf.set_position(0);
-            let v = self.buf.get_mut();
-            v.truncate(0);
-            let inner = &mut self.inner;
-            try!(super::with_end_to_cap(v, |b| inner.read(b)));
+        if self.pos == self.cap {
+            self.cap = try!(self.inner.read(&mut self.buf));
+            self.pos = 0;
         }
-        self.buf.fill_buf()
+        Ok(&self.buf[self.pos..self.cap])
     }
 
     fn consume(&mut self, amt: usize) {
-        self.buf.consume(amt)
+        self.pos = cmp::min(self.pos + amt, self.cap);
     }
 }
 
 #[stable(feature = "rust1", since = "1.0.0")]
 impl<R> fmt::Debug for BufReader<R> where R: fmt::Debug {
     fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
-        write!(fmt, "BufReader {{ reader: {:?}, buffer: {}/{} }}",
-               self.inner, self.buf.position(), self.buf.get_ref().len())
+        fmt.debug_struct("BufReader")
+            .field("reader", &self.inner)
+            .field("buffer", &format_args!("{}/{}", self.cap - self.pos, self.buf.len()))
+            .finish()
     }
 }
 
@@ -222,8 +231,10 @@ impl<W: Write> Write for BufWriter<W> {
 #[stable(feature = "rust1", since = "1.0.0")]
 impl<W: Write> fmt::Debug for BufWriter<W> where W: fmt::Debug {
     fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
-        write!(fmt, "BufWriter {{ writer: {:?}, buffer: {}/{} }}",
-               self.inner.as_ref().unwrap(), self.buf.len(), self.buf.capacity())
+        fmt.debug_struct("BufWriter")
+            .field("writer", &self.inner.as_ref().unwrap())
+            .field("buffer", &format_args!("{}/{}", self.buf.len(), self.buf.capacity()))
+            .finish()
     }
 }
 
@@ -337,9 +348,11 @@ impl<W: Write> Write for LineWriter<W> {
 #[stable(feature = "rust1", since = "1.0.0")]
 impl<W: Write> fmt::Debug for LineWriter<W> where W: fmt::Debug {
     fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
-        write!(fmt, "LineWriter {{ writer: {:?}, buffer: {}/{} }}",
-               self.inner.inner, self.inner.buf.len(),
-               self.inner.buf.capacity())
+        fmt.debug_struct("LineWriter")
+            .field("writer", &self.inner.inner)
+            .field("buffer",
+                   &format_args!("{}/{}", self.inner.buf.len(), self.inner.buf.capacity()))
+            .finish()
     }
 }
 
@@ -415,10 +428,10 @@ impl<S: Read + Write> BufStream<S> {
     /// Any leftover data in the read buffer is lost.
     #[stable(feature = "rust1", since = "1.0.0")]
     pub fn into_inner(self) -> Result<S, IntoInnerError<BufStream<S>>> {
-        let BufReader { inner: InternalBufWriter(w), buf } = self.inner;
+        let BufReader { inner: InternalBufWriter(w), buf, pos, cap } = self.inner;
         w.into_inner().map_err(|IntoInnerError(w, e)| {
             IntoInnerError(BufStream {
-                inner: BufReader { inner: InternalBufWriter(w), buf: buf },
+                inner: BufReader { inner: InternalBufWriter(w), buf: buf, pos: pos, cap: cap },
             }, e)
         })
     }
@@ -452,10 +465,12 @@ impl<S: Write> fmt::Debug for BufStream<S> where S: fmt::Debug {
     fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
         let reader = &self.inner;
         let writer = &self.inner.inner.0;
-        write!(fmt, "BufStream {{ stream: {:?}, write_buffer: {}/{}, read_buffer: {}/{} }}",
-               writer.inner,
-               writer.buf.len(), writer.buf.capacity(),
-               reader.buf.position(), reader.buf.get_ref().len())
+        fmt.debug_struct("BufStream")
+            .field("stream", &writer.inner)
+            .field("write_buffer", &format_args!("{}/{}", writer.buf.len(), writer.buf.capacity()))
+            .field("read_buffer",
+                   &format_args!("{}/{}", reader.cap - reader.pos, reader.buf.len()))
+            .finish()
     }
 }
 
diff --git a/src/libstd/io/mod.rs b/src/libstd/io/mod.rs
index 5d62f1341e3..4a93d96becc 100644
--- a/src/libstd/io/mod.rs
+++ b/src/libstd/io/mod.rs
@@ -48,30 +48,6 @@ mod stdio;
 
 const DEFAULT_BUF_SIZE: usize = 64 * 1024;
 
-// Acquires a slice of the vector `v` from its length to its capacity
-// (after initializing the data), reads into it, and then updates the length.
-//
-// This function is leveraged to efficiently read some bytes into a destination
-// vector without extra copying and taking advantage of the space that's already
-// in `v`.
-fn with_end_to_cap<F>(v: &mut Vec<u8>, f: F) -> Result<usize>
-    where F: FnOnce(&mut [u8]) -> Result<usize>
-{
-    let len = v.len();
-    let new_area = v.capacity() - len;
-    v.extend(iter::repeat(0).take(new_area));
-    match f(&mut v[len..]) {
-        Ok(n) => {
-            v.truncate(len + n);
-            Ok(n)
-        }
-        Err(e) => {
-            v.truncate(len);
-            Err(e)
-        }
-    }
-}
-
 // A few methods below (read_to_string, read_line) will append data into a
 // `String` buffer, but we need to be pretty careful when doing this. The
 // implementation will just call `.as_mut_vec()` and then delegate to a
@@ -116,19 +92,45 @@ fn append_to_string<F>(buf: &mut String, f: F) -> Result<usize>
     }
 }
 
+// This uses an adaptive system to extend the vector when it fills. We want to
+// avoid paying to allocate and zero a huge chunk of memory if the reader only
+// has 4 bytes while still making large reads if the reader does have a ton
+// of data to return. Simply tacking on an extra DEFAULT_BUF_SIZE space every
+// time is 4,500 times (!) slower than this if the reader has a very small
+// amount of data to return.
 fn read_to_end<R: Read + ?Sized>(r: &mut R, buf: &mut Vec<u8>) -> Result<usize> {
-    let mut read = 0;
+    let start_len = buf.len();
+    let mut len = start_len;
+    let mut cap_bump = 16;
+    let ret;
     loop {
-        if buf.capacity() == buf.len() {
-            buf.reserve(DEFAULT_BUF_SIZE);
+        if len == buf.len() {
+            if buf.capacity() == buf.len() {
+                if cap_bump < DEFAULT_BUF_SIZE {
+                    cap_bump *= 2;
+                }
+                buf.reserve(cap_bump);
+            }
+            let new_area = buf.capacity() - buf.len();
+            buf.extend(iter::repeat(0).take(new_area));
         }
-        match with_end_to_cap(buf, |b| r.read(b)) {
-            Ok(0) => return Ok(read),
-            Ok(n) => read += n,
+
+        match r.read(&mut buf[len..]) {
+            Ok(0) => {
+                ret = Ok(len - start_len);
+                break;
+            }
+            Ok(n) => len += n,
             Err(ref e) if e.kind() == ErrorKind::Interrupted => {}
-            Err(e) => return Err(e),
+            Err(e) => {
+                ret = Err(e);
+                break;
+            }
         }
     }
+
+    buf.truncate(len);
+    ret
 }
 
 /// A trait for objects which are byte-oriented sources.
diff --git a/src/libstd/lib.rs b/src/libstd/lib.rs
index 5f5f2fed567..b7cb8f9ed50 100644
--- a/src/libstd/lib.rs
+++ b/src/libstd/lib.rs
@@ -128,6 +128,7 @@
 #![feature(into_cow)]
 #![feature(slice_patterns)]
 #![feature(std_misc)]
+#![feature(debug_builders)]
 #![cfg_attr(test, feature(test, rustc_private, std_misc))]
 
 // Don't link to std. We are std.