From f512779554a436d11dd9ffde4c198da6241dfd58 Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Fri, 4 Jul 2014 22:30:39 -0700 Subject: lexer: lex WS/COMMENT/SHEBANG rather than skipping Now, the lexer will categorize every byte in its input according to the grammar. The parser skips over these while parsing, thus avoiding their presence in the input to syntax extensions. --- src/libsyntax/parse/parser.rs | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'src/libsyntax/parse/parser.rs') diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index e0bcb41a753..51f2c74d3ae 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -325,10 +325,24 @@ fn is_plain_ident_or_underscore(t: &token::Token) -> bool { is_plain_ident(t) || *t == token::UNDERSCORE } +/// Get a token the parser cares about +fn real_token(rdr: &mut Reader) -> TokenAndSpan { + let mut t = rdr.next_token(); + loop { + match t.tok { + token::WS | token::COMMENT | token::SHEBANG(_) => { + t = rdr.next_token(); + }, + _ => break + } + } + t +} + impl<'a> Parser<'a> { pub fn new(sess: &'a ParseSess, cfg: ast::CrateConfig, mut rdr: Box) -> Parser<'a> { - let tok0 = rdr.next_token(); + let tok0 = real_token(rdr); let span = tok0.sp; let placeholder = TokenAndSpan { tok: token::UNDERSCORE, @@ -864,7 +878,7 @@ impl<'a> Parser<'a> { None }; let next = if self.buffer_start == self.buffer_end { - self.reader.next_token() + real_token(self.reader) } else { // Avoid token copies with `replace`. let buffer_start = self.buffer_start as uint; @@ -908,7 +922,7 @@ impl<'a> Parser<'a> { -> R { let dist = distance as int; while self.buffer_length() < dist { - self.buffer[self.buffer_end as uint] = self.reader.next_token(); + self.buffer[self.buffer_end as uint] = real_token(self.reader); self.buffer_end = (self.buffer_end + 1) & 3; } f(&self.buffer[((self.buffer_start + dist - 1) & 3) as uint].tok) -- cgit 1.4.1-3-g733a5