diff options
| author | bors <bors@rust-lang.org> | 2013-11-17 01:21:36 -0800 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2013-11-17 01:21:36 -0800 |
| commit | 66df86ae98364bd6bc29463dc52d5c9c7d2fdd7a (patch) | |
| tree | 323febd79e7a86a12ccd1dd75358b012cbecd76b /src/libstd | |
| parent | 0a577f384e22b51bc7299ba9521a6054b3a1ddfc (diff) | |
| parent | 01343d3d2987dfea6a3982aba3735a95c9a0c51c (diff) | |
| download | rust-66df86ae98364bd6bc29463dc52d5c9c7d2fdd7a.tar.gz rust-66df86ae98364bd6bc29463dc52d5c9c7d2fdd7a.zip | |
auto merge of #10466 : alexcrichton/rust/issue-10334, r=cmr
These commits create a `Buffer` trait in the `io` module which represents an I/O reader which is internally buffered. This abstraction is used to reasonably implement `read_line` and `read_until` along with at least an ok implementation of `read_char` (although I certainly haven't benchmarked `read_char`).
Diffstat (limited to 'src/libstd')
| -rw-r--r-- | src/libstd/io/buffered.rs | 107 | ||||
| -rw-r--r-- | src/libstd/io/mem.rs | 48 | ||||
| -rw-r--r-- | src/libstd/io/mod.rs | 102 | ||||
| -rw-r--r-- | src/libstd/prelude.rs | 2 |
4 files changed, 157 insertions, 102 deletions
diff --git a/src/libstd/io/buffered.rs b/src/libstd/io/buffered.rs index 68fda813a1f..2d8f1a72166 100644 --- a/src/libstd/io/buffered.rs +++ b/src/libstd/io/buffered.rs @@ -55,8 +55,7 @@ use prelude::*; use num; use vec; -use str; -use super::{Reader, Writer, Stream, Decorator}; +use super::{Stream, Decorator}; // libuv recommends 64k buffers to maximize throughput // https://groups.google.com/forum/#!topic/libuv/oQO1HJAIDdA @@ -93,45 +92,10 @@ impl<R: Reader> BufferedReader<R> { pub fn new(inner: R) -> BufferedReader<R> { BufferedReader::with_capacity(DEFAULT_CAPACITY, inner) } +} - /// Reads the next line of input, interpreted as a sequence of utf-8 - /// encoded unicode codepoints. If a newline is encountered, then the - /// newline is contained in the returned string. - pub fn read_line(&mut self) -> Option<~str> { - self.read_until('\n' as u8).map(str::from_utf8_owned) - } - - /// Reads a sequence of bytes leading up to a specified delimeter. Once the - /// specified byte is encountered, reading ceases and the bytes up to and - /// including the delimiter are returned. - pub fn read_until(&mut self, byte: u8) -> Option<~[u8]> { - let mut res = ~[]; - let mut used; - loop { - { - let available = self.fill_buffer(); - match available.iter().position(|&b| b == byte) { - Some(i) => { - res.push_all(available.slice_to(i + 1)); - used = i + 1; - break - } - None => { - res.push_all(available); - used = available.len(); - } - } - } - if used == 0 { - break - } - self.pos += used; - } - self.pos += used; - return if res.len() == 0 {None} else {Some(res)}; - } - - fn fill_buffer<'a>(&'a mut self) -> &'a [u8] { +impl<R: Reader> Buffer for BufferedReader<R> { + fn fill<'a>(&'a mut self) -> &'a [u8] { if self.pos == self.cap { match self.inner.read(self.buf) { Some(cap) => { @@ -143,12 +107,17 @@ impl<R: Reader> BufferedReader<R> { } return self.buf.slice(self.pos, self.cap); } + + fn consume(&mut self, amt: uint) { + self.pos += amt; + assert!(self.pos <= self.cap); + } } impl<R: Reader> Reader for BufferedReader<R> { fn read(&mut self, buf: &mut [u8]) -> Option<uint> { let nread = { - let available = self.fill_buffer(); + let available = self.fill(); if available.len() == 0 { return None; } @@ -166,17 +135,9 @@ impl<R: Reader> Reader for BufferedReader<R> { } impl<R: Reader> Decorator<R> for BufferedReader<R> { - fn inner(self) -> R { - self.inner - } - - fn inner_ref<'a>(&'a self) -> &'a R { - &self.inner - } - - fn inner_mut_ref<'a>(&'a mut self) -> &'a mut R { - &mut self.inner - } + fn inner(self) -> R { self.inner } + fn inner_ref<'a>(&'a self) -> &'a R { &self.inner } + fn inner_mut_ref<'a>(&'a mut self) -> &'a mut R { &mut self.inner } } /// Wraps a Writer and buffers output to it @@ -279,13 +240,8 @@ impl<W: Writer> Decorator<W> for LineBufferedWriter<W> { struct InternalBufferedWriter<W>(BufferedWriter<W>); impl<W: Reader> Reader for InternalBufferedWriter<W> { - fn read(&mut self, buf: &mut [u8]) -> Option<uint> { - self.inner.read(buf) - } - - fn eof(&mut self) -> bool { - self.inner.eof() - } + fn read(&mut self, buf: &mut [u8]) -> Option<uint> { self.inner.read(buf) } + fn eof(&mut self) -> bool { self.inner.eof() } } /// Wraps a Stream and buffers input and output to and from it @@ -311,35 +267,24 @@ impl<S: Stream> BufferedStream<S> { } } -impl<S: Stream> Reader for BufferedStream<S> { - fn read(&mut self, buf: &mut [u8]) -> Option<uint> { - self.inner.read(buf) - } +impl<S: Stream> Buffer for BufferedStream<S> { + fn fill<'a>(&'a mut self) -> &'a [u8] { self.inner.fill() } + fn consume(&mut self, amt: uint) { self.inner.consume(amt) } +} - fn eof(&mut self) -> bool { - self.inner.eof() - } +impl<S: Stream> Reader for BufferedStream<S> { + fn read(&mut self, buf: &mut [u8]) -> Option<uint> { self.inner.read(buf) } + fn eof(&mut self) -> bool { self.inner.eof() } } impl<S: Stream> Writer for BufferedStream<S> { - fn write(&mut self, buf: &[u8]) { - self.inner.inner.write(buf) - } - - fn flush(&mut self) { - self.inner.inner.flush() - } + fn write(&mut self, buf: &[u8]) { self.inner.inner.write(buf) } + fn flush(&mut self) { self.inner.inner.flush() } } impl<S: Stream> Decorator<S> for BufferedStream<S> { - fn inner(self) -> S { - self.inner.inner.inner() - } - - fn inner_ref<'a>(&'a self) -> &'a S { - self.inner.inner.inner_ref() - } - + fn inner(self) -> S { self.inner.inner.inner() } + fn inner_ref<'a>(&'a self) -> &'a S { self.inner.inner.inner_ref() } fn inner_mut_ref<'a>(&'a mut self) -> &'a mut S { self.inner.inner.inner_mut_ref() } diff --git a/src/libstd/io/mem.rs b/src/libstd/io/mem.rs index 73f38771ada..4b9c5ca5d4a 100644 --- a/src/libstd/io/mem.rs +++ b/src/libstd/io/mem.rs @@ -123,29 +123,18 @@ impl Reader for MemReader { impl Seek for MemReader { fn tell(&self) -> u64 { self.pos as u64 } - fn seek(&mut self, _pos: i64, _style: SeekStyle) { fail!() } } -impl Decorator<~[u8]> for MemReader { - - fn inner(self) -> ~[u8] { - match self { - MemReader { buf: buf, _ } => buf - } - } - - fn inner_ref<'a>(&'a self) -> &'a ~[u8] { - match *self { - MemReader { buf: ref buf, _ } => buf - } - } +impl Buffer for MemReader { + fn fill<'a>(&'a mut self) -> &'a [u8] { self.buf.slice_from(self.pos) } + fn consume(&mut self, amt: uint) { self.pos += amt; } +} - fn inner_mut_ref<'a>(&'a mut self) -> &'a mut ~[u8] { - match *self { - MemReader { buf: ref mut buf, _ } => buf - } - } +impl Decorator<~[u8]> for MemReader { + fn inner(self) -> ~[u8] { self.buf } + fn inner_ref<'a>(&'a self) -> &'a ~[u8] { &self.buf } + fn inner_mut_ref<'a>(&'a mut self) -> &'a mut ~[u8] { &mut self.buf } } @@ -244,6 +233,11 @@ impl<'self> Seek for BufReader<'self> { fn seek(&mut self, _pos: i64, _style: SeekStyle) { fail!() } } +impl<'self> Buffer for BufReader<'self> { + fn fill<'a>(&'a mut self) -> &'a [u8] { self.buf.slice_from(self.pos) } + fn consume(&mut self, amt: uint) { self.pos += amt; } +} + ///Calls a function with a MemWriter and returns ///the writer's stored vector. pub fn with_mem_writer(writeFn:&fn(&mut MemWriter)) -> ~[u8] { @@ -394,4 +388,20 @@ mod test { let buf = with_mem_writer(|wr| wr.write([1,2,3,4,5,6,7])); assert_eq!(buf, ~[1,2,3,4,5,6,7]); } + + #[test] + fn test_read_char() { + let mut r = BufReader::new(bytes!("Việt")); + assert_eq!(r.read_char(), Some('V')); + assert_eq!(r.read_char(), Some('i')); + assert_eq!(r.read_char(), Some('ệ')); + assert_eq!(r.read_char(), Some('t')); + assert_eq!(r.read_char(), None); + } + + #[test] + fn test_read_bad_char() { + let mut r = BufReader::new(bytes!(0x80)); + assert_eq!(r.read_char(), None); + } } diff --git a/src/libstd/io/mod.rs b/src/libstd/io/mod.rs index 2aed3bdb849..c56189dbb2b 100644 --- a/src/libstd/io/mod.rs +++ b/src/libstd/io/mod.rs @@ -247,11 +247,12 @@ use iter::Iterator; use option::{Option, Some, None}; use path::Path; use result::{Ok, Err, Result}; +use str; use str::{StrSlice, OwnedStr}; use to_str::ToStr; use uint; use unstable::finally::Finally; -use vec::{OwnedVector, MutableVector}; +use vec::{OwnedVector, MutableVector, ImmutableVector, OwnedCopyableVector}; use vec; // Reexports @@ -960,6 +961,105 @@ pub trait Stream: Reader + Writer { } impl<T: Reader + Writer> Stream for T {} +/// A Buffer is a type of reader which has some form of internal buffering to +/// allow certain kinds of reading operations to be more optimized than others. +/// This type extends the `Reader` trait with a few methods that are not +/// possible to reasonably implement with purely a read interface. +pub trait Buffer: Reader { + /// Fills the internal buffer of this object, returning the buffer contents. + /// Note that none of the contents will be "read" in the sense that later + /// calling `read` may return the same contents. + /// + /// The `consume` function must be called with the number of bytes that are + /// consumed from this buffer returned to ensure that the bytes are never + /// returned twice. + /// + /// # Failure + /// + /// This function will raise on the `io_error` condition if a read error is + /// encountered. + fn fill<'a>(&'a mut self) -> &'a [u8]; + + /// Tells this buffer that `amt` bytes have been consumed from the buffer, + /// so they should no longer be returned in calls to `fill` or `read`. + fn consume(&mut self, amt: uint); + + /// Reads the next line of input, interpreted as a sequence of utf-8 + /// encoded unicode codepoints. If a newline is encountered, then the + /// newline is contained in the returned string. + /// + /// # Failure + /// + /// This function will raise on the `io_error` condition if a read error is + /// encountered. The task will also fail if sequence of bytes leading up to + /// the newline character are not valid utf-8. + fn read_line(&mut self) -> Option<~str> { + self.read_until('\n' as u8).map(str::from_utf8_owned) + } + + /// Reads a sequence of bytes leading up to a specified delimeter. Once the + /// specified byte is encountered, reading ceases and the bytes up to and + /// including the delimiter are returned. + /// + /// # Failure + /// + /// This function will raise on the `io_error` condition if a read error is + /// encountered. + fn read_until(&mut self, byte: u8) -> Option<~[u8]> { + let mut res = ~[]; + let mut used; + loop { + { + let available = self.fill(); + match available.iter().position(|&b| b == byte) { + Some(i) => { + res.push_all(available.slice_to(i + 1)); + used = i + 1; + break + } + None => { + res.push_all(available); + used = available.len(); + } + } + } + if used == 0 { + break + } + self.consume(used); + } + self.consume(used); + return if res.len() == 0 {None} else {Some(res)}; + } + + /// Reads the next utf8-encoded character from the underlying stream. + /// + /// This will return `None` if the following sequence of bytes in the + /// stream are not a valid utf8-sequence, or if an I/O error is encountered. + /// + /// # Failure + /// + /// This function will raise on the `io_error` condition if a read error is + /// encountered. + fn read_char(&mut self) -> Option<char> { + let width = { + let available = self.fill(); + if available.len() == 0 { return None } // read error + str::utf8_char_width(available[0]) + }; + if width == 0 { return None } // not uf8 + let mut buf = [0, ..4]; + match self.read(buf.mut_slice_to(width)) { + Some(n) if n == width => {} + Some(*) | None => return None // read error + } + match str::from_utf8_slice_opt(buf.slice_to(width)) { + Some(s) => Some(s.char_at(0)), + None => None + } + } +} + pub enum SeekStyle { /// Seek from the beginning of the stream SeekSet, diff --git a/src/libstd/prelude.rs b/src/libstd/prelude.rs index d3797cf8099..60fe21bb8d2 100644 --- a/src/libstd/prelude.rs +++ b/src/libstd/prelude.rs @@ -67,7 +67,7 @@ pub use num::{Orderable, Signed, Unsigned, Round}; pub use num::{Primitive, Int, Float, ToStrRadix, ToPrimitive, FromPrimitive}; pub use path::{GenericPath, Path, PosixPath, WindowsPath}; pub use ptr::RawPtr; -pub use io::{Writer, Reader, Seek}; +pub use io::{Buffer, Writer, Reader, Seek}; pub use send_str::{SendStr, SendStrOwned, SendStrStatic, IntoSendStr}; pub use str::{Str, StrVector, StrSlice, OwnedStr}; pub use to_bytes::IterBytes; |
