lexer: lex WS/COMMENT/SHEBANG rather than skipping

Now, the lexer will categorize every byte in its input according to the grammar. The parser skips over these while parsing, thus avoiding their presence in the input to syntax extensions.
author: Corey Richardson <corey@octayn.net> 2014-07-04 22:30:39 -0700
committer: Corey Richardson <corey@octayn.net> 2014-07-09 00:06:29 -0700
commit: f512779554a436d11dd9ffde4c198da6241dfd58 (patch)
tree: 6431e33802c11c3ba27484028fb93d2451e3373e /src/libsyntax/parse/parser.rs
parent: cc4213418e3ab225867d8e3911f592481b1bbffc (diff)
download: rust-f512779554a436d11dd9ffde4c198da6241dfd58.tar.gz
rust-f512779554a436d11dd9ffde4c198da6241dfd58.zip
1 files changed, 17 insertions, 3 deletions
diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs
index e0bcb41a753..51f2c74d3ae 100644
--- a/src/libsyntax/parse/parser.rs
+++ b/src/libsyntax/parse/parser.rs
@@ -325,10 +325,24 @@ fn is_plain_ident_or_underscore(t: &token::Token) -> bool {
     is_plain_ident(t) || *t == token::UNDERSCORE
 }
 
+/// Get a token the parser cares about
+fn real_token(rdr: &mut Reader) -> TokenAndSpan {
+    let mut t = rdr.next_token();
+    loop {
+        match t.tok {
+            token::WS | token::COMMENT | token::SHEBANG(_) => {
+                t = rdr.next_token();
+            },
+            _ => break
+        }
+    }
+    t
+}
+
 impl<'a> Parser<'a> {
     pub fn new(sess: &'a ParseSess, cfg: ast::CrateConfig,
                mut rdr: Box<Reader>) -> Parser<'a> {
-        let tok0 = rdr.next_token();
+        let tok0 = real_token(rdr);
         let span = tok0.sp;
         let placeholder = TokenAndSpan {
             tok: token::UNDERSCORE,
@@ -864,7 +878,7 @@ impl<'a> Parser<'a> {
             None
         };
         let next = if self.buffer_start == self.buffer_end {
-            self.reader.next_token()
+            real_token(self.reader)
         } else {
             // Avoid token copies with `replace`.
             let buffer_start = self.buffer_start as uint;
@@ -908,7 +922,7 @@ impl<'a> Parser<'a> {
                       -> R {
         let dist = distance as int;
         while self.buffer_length() < dist {
-            self.buffer[self.buffer_end as uint] = self.reader.next_token();
+            self.buffer[self.buffer_end as uint] = real_token(self.reader);
             self.buffer_end = (self.buffer_end + 1) & 3;
         }
         f(&self.buffer[((self.buffer_start + dist - 1) & 3) as uint].tok)
author	Corey Richardson <corey@octayn.net>	2014-07-04 22:30:39 -0700
committer	Corey Richardson <corey@octayn.net>	2014-07-09 00:06:29 -0700
commit	f512779554a436d11dd9ffde4c198da6241dfd58 (patch)
tree	6431e33802c11c3ba27484028fb93d2451e3373e /src/libsyntax/parse/parser.rs
parent	cc4213418e3ab225867d8e3911f592481b1bbffc (diff)
download	rust-f512779554a436d11dd9ffde4c198da6241dfd58.tar.gz rust-f512779554a436d11dd9ffde4c198da6241dfd58.zip