about summary refs log tree commit diff
path: root/src/libstd/fileinput.rs
diff options
context:
space:
mode:
authorHuon Wilson <dbau.pp+github@gmail.com>2013-03-25 21:34:36 +1100
committerHuon Wilson <dbau.pp+github@gmail.com>2013-04-03 11:36:05 +1100
commit1e28d8fdb63a469ec9b1c8f2e79eae04f228b65e (patch)
treec3f165a9bc5a9d32bc0fd6aea36b4e470b437acc /src/libstd/fileinput.rs
parente4edfa046af08a345c607ec9c80426d9eabc7c82 (diff)
downloadrust-1e28d8fdb63a469ec9b1c8f2e79eae04f228b65e.tar.gz
rust-1e28d8fdb63a469ec9b1c8f2e79eae04f228b65e.zip
libstd: implement io::Reader for fileinput.
Diffstat (limited to 'src/libstd/fileinput.rs')
-rw-r--r--src/libstd/fileinput.rs397
1 files changed, 271 insertions, 126 deletions
diff --git a/src/libstd/fileinput.rs b/src/libstd/fileinput.rs
index 3f281150331..0056b34eccb 100644
--- a/src/libstd/fileinput.rs
+++ b/src/libstd/fileinput.rs
@@ -9,7 +9,7 @@
 // except according to those terms.
 
 /*!
-A convience device for iterating through the lines in a series of
+A library for iterating through the lines in a series of
 files. Very similar to [the Python module of the same
 name](http://docs.python.org/3.3/library/fileinput.html).
 
@@ -47,8 +47,9 @@ or a program that numbers lines after concatenating two files
                                    line));
     }
 
-The 2 `_vec` functions take a vec of file names (and empty means
-read from `stdin`), the other 2 use the command line arguments.
+The two `input_vec*` functions take a vec of file names (where empty
+means read from `stdin`), the other two functions use the command line
+arguments.
 
 # Advanced
 
@@ -56,22 +57,25 @@ For more complicated uses (e.g. if one needs to pause iteration and
 resume it later), a `FileInput` instance can be constructed via the
 `from_vec`, `from_vec_raw` and `from_args` functions.
 
-Once created, the `lines_each` and `lines_each_state` methods
-allow one to iterate on the lines (the latter provides more
-information about the position within the iteration to the caller.
+Once created, the `each_line` (from the `core::io::ReaderUtil` trait)
+and `each_line_state` methods allow one to iterate on the lines; the
+latter provides more information about the position within the
+iteration to the caller.
 
 It is possible (and safe) to skip lines and files using the
-`read_line` and `next_file` methods.
+`read_line` and `next_file` methods. Also, `FileInput` implements
+`core::io::Reader`, and the state will be updated correctly while
+using any of those methods.
 
-E.g. the following (pointless) program reads until an empty line,
-pauses for user input, skips the current file and then numbers the
-remaining lines (where the numbers are from the start of the file,
-rather than the total line count).
+E.g. the following program reads until an empty line, pauses for user
+input, skips the current file and then numbers the remaining lines
+(where the numbers are from the start of each file, rather than the
+total line count).
 
-    let mut in = FileInput::from_vec(pathify([~"a.txt", ~"b.txt", ~"c.txt"],
+    let in = FileInput::from_vec(pathify([~"a.txt", ~"b.txt", ~"c.txt"],
                                              true));
 
-    for in.lines_each |line| {
+    for in.each_line |line| {
         if line.is_empty() {
             break
         }
@@ -83,20 +87,23 @@ rather than the total line count).
     if io::stdin().read_line() == ~"yes" {
         in.next_file(); // skip!
 
-        for in.lines_each_state |line, state| {
+        for in.each_line_state |line, state| {
            io::println(fmt!("%u: %s", state.line_num_file,
                                       line))
         }
     }
 */
 
+#[allow(deprecated_mutable_fields)];
+
 use core::prelude::*;
 use core::io::ReaderUtil;
 
 /**
-A summary of the internal state of a FileInput object. `line_num` and
-`line_num_file` represent the number of lines read in total and in the
-current file respectively.
+A summary of the internal state of a `FileInput` object. `line_num`
+and `line_num_file` represent the number of lines read in total and in
+the current file respectively. `current_path` is `None` if the current
+file is `stdin`.
 */
 pub struct FileInputState {
     current_path: Option<Path>,
@@ -114,18 +121,32 @@ impl FileInputState {
     }
 }
 
-priv struct FileInput {
+struct FileInput_ {
     /**
     `Some(path)` is the file represented by `path`, `None` is
     `stdin`. Consumed as the files are read.
     */
-    files: ~[Option<Path>],
+    priv files: ~[Option<Path>],
     /**
     The current file: `Some(r)` for an open file, `None` before
     starting and after reading everything.
     */
-    current_reader: Option<@io::Reader>,
-    state: FileInputState
+    priv current_reader: Option<@io::Reader>,
+    priv state: FileInputState,
+
+    /**
+    Used to keep track of whether we need to insert the newline at the
+    end of a file that is missing it, which is needed to separate the
+    last and first lines.
+    */
+    priv previous_was_newline: bool
+}
+
+// XXX: remove this when Reader has &mut self. Should be removable via
+// "self.fi." -> "self." and renaming FileInput_. Documentation above
+// will likely have to be updated to use `let mut in = ...`.
+pub struct FileInput  {
+    priv mut fi: FileInput_
 }
 
 impl FileInput {
@@ -134,7 +155,7 @@ impl FileInput {
     vec means lines are read from `stdin` (use `from_vec_raw` to stop
     this behaviour). Any occurence of `None` represents `stdin`.
     */
-    static pure fn from_vec(files: ~[Option<Path>]) -> FileInput {
+    pub fn from_vec(files: ~[Option<Path>]) -> FileInput {
         FileInput::from_vec_raw(
             if files.is_empty() {
                 ~[None]
@@ -147,31 +168,35 @@ impl FileInput {
     Identical to `from_vec`, but an empty `files` vec stays
     empty. (`None` is `stdin`.)
     */
-    static pure fn from_vec_raw(files: ~[Option<Path>])
+    pub fn from_vec_raw(files: ~[Option<Path>])
                                          -> FileInput {
-        FileInput {
-            files: files,
-            current_reader: None,
-            state: FileInputState {
-                current_path: None,
-                line_num: 0,
-                line_num_file: 0
+        FileInput{
+            fi: FileInput_ {
+                files: files,
+                current_reader: None,
+                state: FileInputState {
+                    current_path: None,
+                    line_num: 0,
+                    line_num_file: 0
+                },
+                // there was no previous unended line
+                previous_was_newline: true
             }
         }
     }
 
     /**
     Create a `FileInput` object from the command line
-    arguments. `-` represents `stdin`.
+    arguments. `"-"` represents `stdin`.
     */
-    static fn from_args() -> FileInput {
+    pub fn from_args() -> FileInput {
         let args = os::args(),
             pathed = pathify(args.tail(), true);
         FileInput::from_vec(pathed)
     }
 
     priv fn current_file_eof(&self) -> bool {
-        match self.current_reader {
+        match self.fi.current_reader {
             None => false,
             Some(r) => r.eof()
         }
@@ -180,110 +205,143 @@ impl FileInput {
     /**
     Skip to the next file in the queue. Can `fail` when opening
     a file.
+
+    Returns `false` if there is no more files, and `true` when it
+    successfully opens the next file.
     */
-    pub fn next_file(&mut self) {
+
+    pub fn next_file(&self) -> bool {
         // No more files
-        if self.files.is_empty() {
-            self.current_reader = None;
-            return;
+
+        // Compiler whines about "illegal borrow unless pure" for
+        // files.is_empty()
+        if unsafe { self.fi.files.is_empty() } {
+            self.fi.current_reader = None;
+            return false;
         }
 
-        let path_option = self.files.shift(),
+        let path_option = self.fi.files.shift(),
             file = match path_option {
                 None => io::stdin(),
                 Some(ref path) => io::file_reader(path).get()
             };
 
-        self.current_reader = Some(file);
-        self.state.current_path = path_option;
-        self.state.line_num_file = 0;
+        self.fi.current_reader = Some(file);
+        self.fi.state.current_path = path_option;
+        self.fi.state.line_num_file = 0;
+        true
     }
 
     /**
     Attempt to open the next file if there is none currently open,
     or if the current one is EOF'd.
+
+    Returns `true` if it had to move to the next file and did
+    so successfully.
     */
-    priv fn next_file_if_eof(&mut self) {
-        match self.current_reader {
+    priv fn next_file_if_eof(&self) -> bool {
+        match self.fi.current_reader {
             None => self.next_file(),
             Some(r) => {
                 if r.eof() {
                     self.next_file()
+                } else {
+                    false
                 }
             }
         }
     }
 
     /**
-    Read a single line. Returns `None` if there are no remaining lines
-    in any remaining file. (Automatically opens files as required, see
-    `next_file` for details.)
+    Apply `f` to each line successively, along with some state
+    (line numbers and file names, see documentation for
+    `FileInputState`). Otherwise identical to `lines_each`.
+    */
+    pub fn each_line_state(&self,
+                            f: &fn(&str, FileInputState) -> bool) {
+         self.each_line(|line| f(line, copy self.fi.state));
+    }
 
-    (Name to avoid conflicting with `core::io::ReaderUtil::read_line`.)
+
+    /**
+    Retrieve the current `FileInputState` information.
     */
-    pub fn next_line(&mut self) -> Option<~str> {
+    pub fn state(&self) -> FileInputState {
+        copy self.fi.state
+    }
+}
+
+impl io::Reader for FileInput {
+    fn read_byte(&self) -> int {
         loop {
-            // iterate until there is a file that can be read from
-            self.next_file_if_eof();
-            match self.current_reader {
-                None => {
-                    // no file has any content
-                    return None;
-                },
+            let stepped = self.next_file_if_eof();
+
+            // if we moved to the next file, and the previous
+            // character wasn't \n, then there is an unfinished line
+            // from the previous file. This library models
+            // line-by-line processing and the trailing line of the
+            // previous file and the leading of the current file
+            // should be considered different, so we need to insert a
+            // fake line separator
+            if stepped && !self.fi.previous_was_newline {
+                self.fi.state.line_num += 1;
+                self.fi.state.line_num_file += 1;
+                self.fi.previous_was_newline = true;
+                return '\n' as int;
+            }
+
+            match self.fi.current_reader {
+                None => return -1,
                 Some(r) => {
-                    let l = r.read_line();
+                    let b = r.read_byte();
 
-                    // at the end of this file, and we read nothing, so
-                    // go to the next file
-                    if r.eof() && l.is_empty() {
+                    if b < 0 {
                         loop;
                     }
-                    self.state.line_num += 1;
-                    self.state.line_num_file += 1;
-                    return Some(l);
+
+                    if b == '\n' as int {
+                        self.fi.state.line_num += 1;
+                        self.fi.state.line_num_file += 1;
+                        self.fi.previous_was_newline = true;
+                    } else {
+                        self.fi.previous_was_newline = false;
+                    }
+
+                    return b;
                 }
             }
         }
     }
+    fn read(&self, buf: &mut [u8], len: uint) -> uint {
+        let mut count = 0;
+        while count < len {
+            let b = self.read_byte();
+            if b < 0 { break }
+
+            buf[count] = b as u8;
+            count += 1;
+        }
 
-    /**
-    Call `f` on the lines in the files in succession, stopping if
-    it ever returns `false`.
+        count
+    }
+    fn eof(&self) -> bool {
+        // we've run out of files, and current_reader is either None or eof.
 
-    State is preserved across calls.
+        // compiler whines about illegal borrows for files.is_empty()
+        (unsafe { self.fi.files.is_empty() }) &&
+            match self.fi.current_reader { None => true, Some(r) => r.eof() }
 
-    (The name is to avoid conflict with
-    `core::io::ReaderUtil::each_line`.)
-    */
-    pub fn lines_each(&mut self, f: &fn(~str) -> bool) {
-        loop {
-            match self.next_line() {
-                None => break,
-                Some(line) => {
-                    if !f(line) {
-                        break;
-                    }
-                }
-            }
+    }
+    fn seek(&self, offset: int, whence: io::SeekStyle) {
+        match self.fi.current_reader {
+            None => {},
+            Some(r) => r.seek(offset, whence)
         }
     }
-
-    /**
-    Apply `f` to each line successively, along with some state
-    (line numbers and file names, see documentation for
-    `FileInputState`). Otherwise identical to `lines_each`.
-    */
-    pub fn lines_each_state(&mut self,
-                            f: &fn(~str, &FileInputState) -> bool) {
-        loop {
-            match self.next_line() {
-                None => break,
-                Some(line) => {
-                    if !f(line, &self.state) {
-                        break;
-                    }
-                }
-            }
+    fn tell(&self) -> uint {
+        match self.fi.current_reader {
+            None => 0,
+            Some(r) => r.tell()
         }
     }
 }
@@ -291,7 +349,7 @@ impl FileInput {
 /**
 Convert a list of strings to an appropriate form for a `FileInput`
 instance. `stdin_hyphen` controls whether `-` represents `stdin` or
-not.
+a literal `-`.
 */
 // XXX: stupid, unclear name
 pub fn pathify(vec: &[~str], stdin_hyphen : bool) -> ~[Option<Path>] {
@@ -310,9 +368,9 @@ reading from `stdin`).
 
 Fails when attempting to read from a file that can't be opened.
 */
-pub fn input(f: &fn(~str) -> bool) {
+pub fn input(f: &fn(&str) -> bool) {
     let mut i = FileInput::from_args();
-    i.lines_each(f);
+    i.each_line(f);
 }
 
 /**
@@ -322,31 +380,31 @@ provided at each call.
 
 Fails when attempting to read from a file that can't be opened.
 */
-pub fn input_state(f: &fn(~str, &FileInputState) -> bool) {
+pub fn input_state(f: &fn(&str, FileInputState) -> bool) {
     let mut i = FileInput::from_args();
-    i.lines_each_state(f);
+    i.each_line_state(f);
 }
 
 /**
-Iterate over a vec of files (an empty vec implies just `stdin`).
+Iterate over a vector of files (an empty vector implies just `stdin`).
 
 Fails when attempting to read from a file that can't be opened.
 */
-pub fn input_vec(files: ~[Option<Path>], f: &fn(~str) -> bool) {
+pub fn input_vec(files: ~[Option<Path>], f: &fn(&str) -> bool) {
     let mut i = FileInput::from_vec(files);
-    i.lines_each(f);
+    i.each_line(f);
 }
 
 /**
-Iterate over a vec of files (an empty vec implies just `stdin`) with
-the current state of the iteration provided at each call.
+Iterate over a vector of files (an empty vector implies just `stdin`)
+with the current state of the iteration provided at each call.
 
 Fails when attempting to read from a file that can't be opened.
 */
 pub fn input_vec_state(files: ~[Option<Path>],
-                       f: &fn(~str, &FileInputState) -> bool) {
+                       f: &fn(&str, FileInputState) -> bool) {
     let mut i = FileInput::from_vec(files);
-    i.lines_each_state(f);
+    i.each_line_state(f);
 }
 
 #[cfg(test)]
@@ -371,11 +429,61 @@ mod test {
             paths = ~[Some(Path("some/path")),
                       Some(Path("some/other/path"))];
 
-        fail_unless!(pathify(strs, true) == paths);
-        fail_unless!(pathify(strs, false) == paths);
+        assert_eq!(pathify(strs, true), copy paths);
+        assert_eq!(pathify(strs, false), paths);
 
-        fail_unless!(pathify([~"-"], true) == ~[None]);
-        fail_unless!(pathify([~"-"], false) == ~[Some(Path("-"))]);
+        assert_eq!(pathify([~"-"], true), ~[None]);
+        assert_eq!(pathify([~"-"], false), ~[Some(Path("-"))]);
+    }
+
+    #[test]
+    fn test_fileinput_read_byte() {
+        let filenames = pathify(vec::from_fn(
+            3,
+            |i| fmt!("tmp/lib-fileinput-test-fileinput-read-byte-%u.tmp", i)), true);
+
+        // 3 files containing 0\n, 1\n, and 2\n respectively
+        for filenames.eachi |i, &filename| {
+            make_file(filename.get_ref(), ~[fmt!("%u", i)]);
+        }
+
+        let fi = FileInput::from_vec(copy filenames);
+
+        for "012".each_chari |line, c| {
+            assert_eq!(fi.read_byte(), c as int);
+            assert_eq!(fi.state().line_num, line);
+            assert_eq!(fi.state().line_num_file, 0);
+            assert_eq!(fi.read_byte(), '\n' as int);
+            assert_eq!(fi.state().line_num, line + 1);
+            assert_eq!(fi.state().line_num_file, 1);
+
+            assert_eq!(copy fi.state().current_path, copy filenames[line]);
+        }
+
+        assert_eq!(fi.read_byte(), -1);
+        fail_unless!(fi.eof());
+        assert_eq!(fi.state().line_num, 3)
+
+    }
+
+    #[test]
+    fn test_fileinput_read() {
+        let filenames = pathify(vec::from_fn(
+            3,
+            |i| fmt!("tmp/lib-fileinput-test-fileinput-read-%u.tmp", i)), true);
+
+        // 3 files containing 1\n, 2\n, and 3\n respectively
+        for filenames.eachi |i, &filename| {
+            make_file(filename.get_ref(), ~[fmt!("%u", i)]);
+        }
+
+        let fi = FileInput::from_vec(filenames);
+        let mut buf : ~[u8] = vec::from_elem(6, 0u8);
+        let count = fi.read(buf, 10);
+        assert_eq!(count, 6);
+        assert_eq!(buf, "0\n1\n2\n".to_bytes());
+        fail_unless!(fi.eof())
+        assert_eq!(fi.state().line_num, 3);
     }
 
     #[test]
@@ -388,47 +496,84 @@ mod test {
         for filenames.eachi |i, &filename| {
             let contents =
                 vec::from_fn(3, |j| fmt!("%u %u", i, j));
-            make_file(&filename.get(), contents);
+            make_file(filename.get_ref(), contents);
             all_lines.push_all(contents);
         }
 
         let mut read_lines = ~[];
         for input_vec(filenames) |line| {
-            read_lines.push(line);
+            read_lines.push(line.to_owned());
         }
-        fail_unless!(read_lines == all_lines);
+        assert_eq!(read_lines, all_lines);
     }
 
     #[test]
     fn test_input_vec_state() {
         let filenames = pathify(vec::from_fn(
             3,
-            |i|
-            fmt!("tmp/lib-fileinput-test-input-vec-state-%u.tmp", i)),true);
+            |i| fmt!("tmp/lib-fileinput-test-input-vec-state-%u.tmp", i)),true);
 
         for filenames.eachi |i, &filename| {
             let contents =
                 vec::from_fn(3, |j| fmt!("%u %u", i, j + 1));
-            make_file(&filename.get(), contents);
+            make_file(filename.get_ref(), contents);
         }
 
         for input_vec_state(filenames) |line, state| {
             let nums = str::split_char(line, ' ');
-
             let file_num = uint::from_str(nums[0]).get();
             let line_num = uint::from_str(nums[1]).get();
+            assert_eq!(line_num, state.line_num_file);
+            assert_eq!(file_num * 3 + line_num, state.line_num);
+        }
+    }
+
+    #[test]
+    fn test_empty_files() {
+        let filenames = pathify(vec::from_fn(
+            3,
+            |i| fmt!("tmp/lib-fileinput-test-next-file-%u.tmp", i)),true);
+
+        make_file(filenames[0].get_ref(), ~[~"1", ~"2"]);
+        make_file(filenames[1].get_ref(), ~[]);
+        make_file(filenames[2].get_ref(), ~[~"3", ~"4"]);
+
+        let mut count = 0;
+        for input_vec_state(copy filenames) |line, state| {
+            let expected_path = match line {
+                "1" | "2" => copy filenames[0],
+                "3" | "4" => copy filenames[2],
+                _ => fail!(~"unexpected line")
+            };
+            assert_eq!(copy state.current_path, expected_path);
+            count += 1;
+        }
+        assert_eq!(count, 4);
+    }
 
-            fail_unless!(line_num == state.line_num_file);
-            fail_unless!(file_num * 3 + line_num == state.line_num);
+    #[test]
+    fn test_no_trailing_newline() {
+        let f1 = Some(Path("tmp/lib-fileinput-test-no-trailing-newline-1.tmp")),
+            f2 = Some(Path("tmp/lib-fileinput-test-no-trailing-newline-2.tmp"));
+
+        let wr = io::file_writer(f1.get_ref(), [io::Create, io::Truncate]).get();
+        wr.write_str("1\n2");
+        let wr = io::file_writer(f2.get_ref(), [io::Create, io::Truncate]).get();
+        wr.write_str("3\n4");
+
+        let mut lines = ~[];
+        for input_vec(~[f1, f2]) |line| {
+            lines.push(line.to_owned());
         }
+        assert_eq!(lines, ~[~"1", ~"2", ~"3", ~"4"]);
     }
 
+
     #[test]
     fn test_next_file() {
         let filenames = pathify(vec::from_fn(
             3,
-            |i|
-            fmt!("tmp/lib-fileinput-test-next-file-%u.tmp", i)),true);
+            |i| fmt!("tmp/lib-fileinput-test-next-file-%u.tmp", i)),true);
 
         for filenames.eachi |i, &filename| {
             let contents =
@@ -439,19 +584,19 @@ mod test {
         let mut in = FileInput::from_vec(filenames);
 
         // read once from 0
-        fail_unless!(in.next_line() == Some(~"0 1"));
+        assert_eq!(in.read_line(), ~"0 1");
         in.next_file(); // skip the rest of 1
 
         // read all lines from 1 (but don't read any from 2),
         for uint::range(1, 4) |i| {
-            fail_unless!(in.next_line() == Some(fmt!("1 %u", i)));
+            assert_eq!(in.read_line(), fmt!("1 %u", i));
         }
         // 1 is finished, but 2 hasn't been started yet, so this will
         // just "skip" to the beginning of 2 (Python's fileinput does
         // the same)
         in.next_file();
 
-        fail_unless!(in.next_line() == Some(~"2 1"));
+        assert_eq!(in.read_line(), ~"2 1");
     }
 
     #[test]