about summary refs log tree commit diff
path: root/src/libsyntax
diff options
context:
space:
mode:
authorPaul Stansifer <paul.stansifer@gmail.com>2012-06-01 15:52:07 -0700
committerPaul Stansifer <paul.stansifer@gmail.com>2012-06-15 12:41:41 -0700
commitd619e8995f70556d58e57a699ba726c17f9e48ad (patch)
tree0c83bf73c9a629c34c19e9267a8e897cdcb1c6cf /src/libsyntax
parent1d25981c730f41591622a9c1a04cdd3de84e5a30 (diff)
downloadrust-d619e8995f70556d58e57a699ba726c17f9e48ad.tar.gz
rust-d619e8995f70556d58e57a699ba726c17f9e48ad.zip
Make token trees re-lexable.
Diffstat (limited to 'src/libsyntax')
-rw-r--r--src/libsyntax/parse/lexer.rs68
1 files changed, 66 insertions, 2 deletions
diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs
index e5be37dc4f0..bf151ca4a68 100644
--- a/src/libsyntax/parse/lexer.rs
+++ b/src/libsyntax/parse/lexer.rs
@@ -1,6 +1,7 @@
 import util::interner;
 import util::interner::intern;
 import diagnostic;
+import ast::{tt_delim,tt_flat};
 
 export reader, string_reader, new_string_reader, is_whitespace;
 export nextch, is_eof, bump, get_str_from;
@@ -14,6 +15,26 @@ iface reader {
     fn interner() -> @interner::interner<@str>;
 }
 
+enum tt_frame_up { /* to break a circularity */
+    tt_frame_up(option<tt_frame>)
+}
+
+type tt_frame = @{
+    /* invariant: readme[idx] is always a tt_flat */
+    readme: [ast::token_tree],
+    mut idx: uint,
+    up: tt_frame_up
+};
+
+type tt_reader = @{
+    mut cur: tt_frame,
+    interner: @interner::interner<@str>,
+    span_diagnostic: diagnostic::span_handler,
+    /* cached: */
+    mut cur_tok: token::token,
+    mut cur_chpos: uint
+};
+
 type string_reader = @{
     span_diagnostic: diagnostic::span_handler,
     src: @str,
@@ -39,13 +60,56 @@ impl string_reader_as_reader of reader for string_reader {
     }
     fn fatal(m: str) -> ! {
         self.span_diagnostic.span_fatal(
-            ast_util::mk_sp(self.chpos, self.chpos),
-            m)
+            ast_util::mk_sp(self.chpos, self.chpos), m)
     }
     fn chpos() -> uint { self.chpos }
     fn interner() -> @interner::interner<@str> { self.interner }
 }
 
+impl tt_reader_as_reader of reader for tt_reader {
+    fn is_eof() -> bool { self.cur_tok == token::EOF }
+    fn next_token() -> {tok: token::token, chpos: uint} {
+        let ret_val = { tok: self.cur_tok, chpos: self.cur_chpos };
+        self.cur.idx += 1u;
+        if self.cur.idx >= vec::len(self.cur.readme) {
+            /* done with this set; pop */
+            alt self.cur.up {
+              tt_frame_up(option::none) {
+                self.cur_tok = token::EOF;
+                ret ret_val;
+              }
+              tt_frame_up(option::some(tt_f)) {
+                self.cur = tt_f;
+                /* the above `if` would need to be a `while` if we didn't know
+                that the last thing in a `tt_delim` is always a `tt_flat` */
+                self.cur.idx += 1u;
+              }
+            }
+        }
+        /* if `tt_delim`s could be 0-length, we'd need to be able to switch
+        between popping and pushing until we got to an actual `tt_flat` */
+        loop { /* because it's easiest, this handles `tt_delim` not starting
+                  with a `tt_flat`, even though it won't happen */
+            alt self.cur.readme[self.cur.idx] {
+              tt_delim(tts) {
+                self.cur = @{readme: tts, mut idx: 0u,
+                             up: tt_frame_up(option::some(self.cur)) };
+              }
+              tt_flat(chpos, tok) {
+                self.cur_chpos = chpos; self.cur_tok = tok;
+                ret ret_val;
+              }
+          }
+        }
+    }
+    fn fatal(m: str) -> ! {
+        self.span_diagnostic.span_fatal(
+            ast_util::mk_sp(self.chpos(), self.chpos()), m);
+    }
+    fn chpos() -> uint { self.cur_chpos }
+    fn interner() -> @interner::interner<@str> { self.interner }
+}
+
 fn get_str_from(rdr: string_reader, start: uint) -> str unsafe {
     // I'm pretty skeptical about this subtraction. What if there's a
     // multi-byte character before the mark?