about summary refs log tree commit diff
path: root/src/libstd
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2013-11-17 01:21:36 -0800
committerbors <bors@rust-lang.org>2013-11-17 01:21:36 -0800
commit66df86ae98364bd6bc29463dc52d5c9c7d2fdd7a (patch)
tree323febd79e7a86a12ccd1dd75358b012cbecd76b /src/libstd
parent0a577f384e22b51bc7299ba9521a6054b3a1ddfc (diff)
parent01343d3d2987dfea6a3982aba3735a95c9a0c51c (diff)
downloadrust-66df86ae98364bd6bc29463dc52d5c9c7d2fdd7a.tar.gz
rust-66df86ae98364bd6bc29463dc52d5c9c7d2fdd7a.zip
auto merge of #10466 : alexcrichton/rust/issue-10334, r=cmr
These commits create a `Buffer` trait in the `io` module which represents an I/O reader which is internally buffered. This abstraction is used to reasonably implement `read_line` and `read_until` along with at least an ok implementation of `read_char` (although I certainly haven't benchmarked `read_char`).
Diffstat (limited to 'src/libstd')
-rw-r--r--src/libstd/io/buffered.rs107
-rw-r--r--src/libstd/io/mem.rs48
-rw-r--r--src/libstd/io/mod.rs102
-rw-r--r--src/libstd/prelude.rs2
4 files changed, 157 insertions, 102 deletions
diff --git a/src/libstd/io/buffered.rs b/src/libstd/io/buffered.rs
index 68fda813a1f..2d8f1a72166 100644
--- a/src/libstd/io/buffered.rs
+++ b/src/libstd/io/buffered.rs
@@ -55,8 +55,7 @@ use prelude::*;
 
 use num;
 use vec;
-use str;
-use super::{Reader, Writer, Stream, Decorator};
+use super::{Stream, Decorator};
 
 // libuv recommends 64k buffers to maximize throughput
 // https://groups.google.com/forum/#!topic/libuv/oQO1HJAIDdA
@@ -93,45 +92,10 @@ impl<R: Reader> BufferedReader<R> {
     pub fn new(inner: R) -> BufferedReader<R> {
         BufferedReader::with_capacity(DEFAULT_CAPACITY, inner)
     }
+}
 
-    /// Reads the next line of input, interpreted as a sequence of utf-8
-    /// encoded unicode codepoints. If a newline is encountered, then the
-    /// newline is contained in the returned string.
-    pub fn read_line(&mut self) -> Option<~str> {
-        self.read_until('\n' as u8).map(str::from_utf8_owned)
-    }
-
-    /// Reads a sequence of bytes leading up to a specified delimeter. Once the
-    /// specified byte is encountered, reading ceases and the bytes up to and
-    /// including the delimiter are returned.
-    pub fn read_until(&mut self, byte: u8) -> Option<~[u8]> {
-        let mut res = ~[];
-        let mut used;
-        loop {
-            {
-                let available = self.fill_buffer();
-                match available.iter().position(|&b| b == byte) {
-                    Some(i) => {
-                        res.push_all(available.slice_to(i + 1));
-                        used = i + 1;
-                        break
-                    }
-                    None => {
-                        res.push_all(available);
-                        used = available.len();
-                    }
-                }
-            }
-            if used == 0 {
-                break
-            }
-            self.pos += used;
-        }
-        self.pos += used;
-        return if res.len() == 0 {None} else {Some(res)};
-    }
-
-    fn fill_buffer<'a>(&'a mut self) -> &'a [u8] {
+impl<R: Reader> Buffer for BufferedReader<R> {
+    fn fill<'a>(&'a mut self) -> &'a [u8] {
         if self.pos == self.cap {
             match self.inner.read(self.buf) {
                 Some(cap) => {
@@ -143,12 +107,17 @@ impl<R: Reader> BufferedReader<R> {
         }
         return self.buf.slice(self.pos, self.cap);
     }
+
+    fn consume(&mut self, amt: uint) {
+        self.pos += amt;
+        assert!(self.pos <= self.cap);
+    }
 }
 
 impl<R: Reader> Reader for BufferedReader<R> {
     fn read(&mut self, buf: &mut [u8]) -> Option<uint> {
         let nread = {
-            let available = self.fill_buffer();
+            let available = self.fill();
             if available.len() == 0 {
                 return None;
             }
@@ -166,17 +135,9 @@ impl<R: Reader> Reader for BufferedReader<R> {
 }
 
 impl<R: Reader> Decorator<R> for BufferedReader<R> {
-    fn inner(self) -> R {
-        self.inner
-    }
-
-    fn inner_ref<'a>(&'a self) -> &'a R {
-        &self.inner
-    }
-
-    fn inner_mut_ref<'a>(&'a mut self) -> &'a mut R {
-        &mut self.inner
-    }
+    fn inner(self) -> R { self.inner }
+    fn inner_ref<'a>(&'a self) -> &'a R { &self.inner }
+    fn inner_mut_ref<'a>(&'a mut self) -> &'a mut R { &mut self.inner }
 }
 
 /// Wraps a Writer and buffers output to it
@@ -279,13 +240,8 @@ impl<W: Writer> Decorator<W> for LineBufferedWriter<W> {
 struct InternalBufferedWriter<W>(BufferedWriter<W>);
 
 impl<W: Reader> Reader for InternalBufferedWriter<W> {
-    fn read(&mut self, buf: &mut [u8]) -> Option<uint> {
-        self.inner.read(buf)
-    }
-
-    fn eof(&mut self) -> bool {
-        self.inner.eof()
-    }
+    fn read(&mut self, buf: &mut [u8]) -> Option<uint> { self.inner.read(buf) }
+    fn eof(&mut self) -> bool { self.inner.eof() }
 }
 
 /// Wraps a Stream and buffers input and output to and from it
@@ -311,35 +267,24 @@ impl<S: Stream> BufferedStream<S> {
     }
 }
 
-impl<S: Stream> Reader for BufferedStream<S> {
-    fn read(&mut self, buf: &mut [u8]) -> Option<uint> {
-        self.inner.read(buf)
-    }
+impl<S: Stream> Buffer for BufferedStream<S> {
+    fn fill<'a>(&'a mut self) -> &'a [u8] { self.inner.fill() }
+    fn consume(&mut self, amt: uint) { self.inner.consume(amt) }
+}
 
-    fn eof(&mut self) -> bool {
-        self.inner.eof()
-    }
+impl<S: Stream> Reader for BufferedStream<S> {
+    fn read(&mut self, buf: &mut [u8]) -> Option<uint> { self.inner.read(buf) }
+    fn eof(&mut self) -> bool { self.inner.eof() }
 }
 
 impl<S: Stream> Writer for BufferedStream<S> {
-    fn write(&mut self, buf: &[u8]) {
-        self.inner.inner.write(buf)
-    }
-
-    fn flush(&mut self) {
-        self.inner.inner.flush()
-    }
+    fn write(&mut self, buf: &[u8]) { self.inner.inner.write(buf) }
+    fn flush(&mut self) { self.inner.inner.flush() }
 }
 
 impl<S: Stream> Decorator<S> for BufferedStream<S> {
-    fn inner(self) -> S {
-        self.inner.inner.inner()
-    }
-
-    fn inner_ref<'a>(&'a self) -> &'a S {
-        self.inner.inner.inner_ref()
-    }
-
+    fn inner(self) -> S { self.inner.inner.inner() }
+    fn inner_ref<'a>(&'a self) -> &'a S { self.inner.inner.inner_ref() }
     fn inner_mut_ref<'a>(&'a mut self) -> &'a mut S {
         self.inner.inner.inner_mut_ref()
     }
diff --git a/src/libstd/io/mem.rs b/src/libstd/io/mem.rs
index 73f38771ada..4b9c5ca5d4a 100644
--- a/src/libstd/io/mem.rs
+++ b/src/libstd/io/mem.rs
@@ -123,29 +123,18 @@ impl Reader for MemReader {
 
 impl Seek for MemReader {
     fn tell(&self) -> u64 { self.pos as u64 }
-
     fn seek(&mut self, _pos: i64, _style: SeekStyle) { fail!() }
 }
 
-impl Decorator<~[u8]> for MemReader {
-
-    fn inner(self) -> ~[u8] {
-        match self {
-            MemReader { buf: buf, _ } => buf
-        }
-    }
-
-    fn inner_ref<'a>(&'a self) -> &'a ~[u8] {
-        match *self {
-            MemReader { buf: ref buf, _ } => buf
-        }
-    }
+impl Buffer for MemReader {
+    fn fill<'a>(&'a mut self) -> &'a [u8] { self.buf.slice_from(self.pos) }
+    fn consume(&mut self, amt: uint) { self.pos += amt; }
+}
 
-    fn inner_mut_ref<'a>(&'a mut self) -> &'a mut ~[u8] {
-        match *self {
-            MemReader { buf: ref mut buf, _ } => buf
-        }
-    }
+impl Decorator<~[u8]> for MemReader {
+    fn inner(self) -> ~[u8] { self.buf }
+    fn inner_ref<'a>(&'a self) -> &'a ~[u8] { &self.buf }
+    fn inner_mut_ref<'a>(&'a mut self) -> &'a mut ~[u8] { &mut self.buf }
 }
 
 
@@ -244,6 +233,11 @@ impl<'self> Seek for BufReader<'self> {
     fn seek(&mut self, _pos: i64, _style: SeekStyle) { fail!() }
 }
 
+impl<'self> Buffer for BufReader<'self> {
+    fn fill<'a>(&'a mut self) -> &'a [u8] { self.buf.slice_from(self.pos) }
+    fn consume(&mut self, amt: uint) { self.pos += amt; }
+}
+
 ///Calls a function with a MemWriter and returns
 ///the writer's stored vector.
 pub fn with_mem_writer(writeFn:&fn(&mut MemWriter)) -> ~[u8] {
@@ -394,4 +388,20 @@ mod test {
         let buf = with_mem_writer(|wr| wr.write([1,2,3,4,5,6,7]));
         assert_eq!(buf, ~[1,2,3,4,5,6,7]);
     }
+
+    #[test]
+    fn test_read_char() {
+        let mut r = BufReader::new(bytes!("Việt"));
+        assert_eq!(r.read_char(), Some('V'));
+        assert_eq!(r.read_char(), Some('i'));
+        assert_eq!(r.read_char(), Some('ệ'));
+        assert_eq!(r.read_char(), Some('t'));
+        assert_eq!(r.read_char(), None);
+    }
+
+    #[test]
+    fn test_read_bad_char() {
+        let mut r = BufReader::new(bytes!(0x80));
+        assert_eq!(r.read_char(), None);
+    }
 }
diff --git a/src/libstd/io/mod.rs b/src/libstd/io/mod.rs
index 2aed3bdb849..c56189dbb2b 100644
--- a/src/libstd/io/mod.rs
+++ b/src/libstd/io/mod.rs
@@ -247,11 +247,12 @@ use iter::Iterator;
 use option::{Option, Some, None};
 use path::Path;
 use result::{Ok, Err, Result};
+use str;
 use str::{StrSlice, OwnedStr};
 use to_str::ToStr;
 use uint;
 use unstable::finally::Finally;
-use vec::{OwnedVector, MutableVector};
+use vec::{OwnedVector, MutableVector, ImmutableVector, OwnedCopyableVector};
 use vec;
 
 // Reexports
@@ -960,6 +961,105 @@ pub trait Stream: Reader + Writer { }
 
 impl<T: Reader + Writer> Stream for T {}
 
+/// A Buffer is a type of reader which has some form of internal buffering to
+/// allow certain kinds of reading operations to be more optimized than others.
+/// This type extends the `Reader` trait with a few methods that are not
+/// possible to reasonably implement with purely a read interface.
+pub trait Buffer: Reader {
+    /// Fills the internal buffer of this object, returning the buffer contents.
+    /// Note that none of the contents will be "read" in the sense that later
+    /// calling `read` may return the same contents.
+    ///
+    /// The `consume` function must be called with the number of bytes that are
+    /// consumed from this buffer returned to ensure that the bytes are never
+    /// returned twice.
+    ///
+    /// # Failure
+    ///
+    /// This function will raise on the `io_error` condition if a read error is
+    /// encountered.
+    fn fill<'a>(&'a mut self) -> &'a [u8];
+
+    /// Tells this buffer that `amt` bytes have been consumed from the buffer,
+    /// so they should no longer be returned in calls to `fill` or `read`.
+    fn consume(&mut self, amt: uint);
+
+    /// Reads the next line of input, interpreted as a sequence of utf-8
+    /// encoded unicode codepoints. If a newline is encountered, then the
+    /// newline is contained in the returned string.
+    ///
+    /// # Failure
+    ///
+    /// This function will raise on the `io_error` condition if a read error is
+    /// encountered. The task will also fail if sequence of bytes leading up to
+    /// the newline character are not valid utf-8.
+    fn read_line(&mut self) -> Option<~str> {
+        self.read_until('\n' as u8).map(str::from_utf8_owned)
+    }
+
+    /// Reads a sequence of bytes leading up to a specified delimeter. Once the
+    /// specified byte is encountered, reading ceases and the bytes up to and
+    /// including the delimiter are returned.
+    ///
+    /// # Failure
+    ///
+    /// This function will raise on the `io_error` condition if a read error is
+    /// encountered.
+    fn read_until(&mut self, byte: u8) -> Option<~[u8]> {
+        let mut res = ~[];
+        let mut used;
+        loop {
+            {
+                let available = self.fill();
+                match available.iter().position(|&b| b == byte) {
+                    Some(i) => {
+                        res.push_all(available.slice_to(i + 1));
+                        used = i + 1;
+                        break
+                    }
+                    None => {
+                        res.push_all(available);
+                        used = available.len();
+                    }
+                }
+            }
+            if used == 0 {
+                break
+            }
+            self.consume(used);
+        }
+        self.consume(used);
+        return if res.len() == 0 {None} else {Some(res)};
+    }
+
+    /// Reads the next utf8-encoded character from the underlying stream.
+    ///
+    /// This will return `None` if the following sequence of bytes in the
+    /// stream are not a valid utf8-sequence, or if an I/O error is encountered.
+    ///
+    /// # Failure
+    ///
+    /// This function will raise on the `io_error` condition if a read error is
+    /// encountered.
+    fn read_char(&mut self) -> Option<char> {
+        let width = {
+            let available = self.fill();
+            if available.len() == 0 { return None } // read error
+            str::utf8_char_width(available[0])
+        };
+        if width == 0 { return None } // not uf8
+        let mut buf = [0, ..4];
+        match self.read(buf.mut_slice_to(width)) {
+            Some(n) if n == width => {}
+            Some(*) | None => return None // read error
+        }
+        match str::from_utf8_slice_opt(buf.slice_to(width)) {
+            Some(s) => Some(s.char_at(0)),
+            None => None
+        }
+    }
+}
+
 pub enum SeekStyle {
     /// Seek from the beginning of the stream
     SeekSet,
diff --git a/src/libstd/prelude.rs b/src/libstd/prelude.rs
index d3797cf8099..60fe21bb8d2 100644
--- a/src/libstd/prelude.rs
+++ b/src/libstd/prelude.rs
@@ -67,7 +67,7 @@ pub use num::{Orderable, Signed, Unsigned, Round};
 pub use num::{Primitive, Int, Float, ToStrRadix, ToPrimitive, FromPrimitive};
 pub use path::{GenericPath, Path, PosixPath, WindowsPath};
 pub use ptr::RawPtr;
-pub use io::{Writer, Reader, Seek};
+pub use io::{Buffer, Writer, Reader, Seek};
 pub use send_str::{SendStr, SendStrOwned, SendStrStatic, IntoSendStr};
 pub use str::{Str, StrVector, StrSlice, OwnedStr};
 pub use to_bytes::IterBytes;