diff options
Diffstat (limited to 'src/libsyntax/parse')
| -rw-r--r-- | src/libsyntax/parse/attr.rs | 2 | ||||
| -rw-r--r-- | src/libsyntax/parse/lexer/comments.rs | 8 | ||||
| -rw-r--r-- | src/libsyntax/parse/lexer/mod.rs | 134 | ||||
| -rw-r--r-- | src/libsyntax/parse/parser.rs | 20 | ||||
| -rw-r--r-- | src/libsyntax/parse/token.rs | 16 |
5 files changed, 118 insertions, 62 deletions
diff --git a/src/libsyntax/parse/attr.rs b/src/libsyntax/parse/attr.rs index b2297ec770c..c227d8a0fed 100644 --- a/src/libsyntax/parse/attr.rs +++ b/src/libsyntax/parse/attr.rs @@ -34,7 +34,7 @@ impl<'a> ParserAttr for Parser<'a> { fn parse_outer_attributes(&mut self) -> Vec<ast::Attribute> { let mut attrs: Vec<ast::Attribute> = Vec::new(); loop { - debug!("parse_outer_attributes: self.token={:?}", + debug!("parse_outer_attributes: self.token={}", self.token); match self.token { token::POUND => { diff --git a/src/libsyntax/parse/lexer/comments.rs b/src/libsyntax/parse/lexer/comments.rs index c5dd10382a9..3f3a8a723f1 100644 --- a/src/libsyntax/parse/lexer/comments.rs +++ b/src/libsyntax/parse/lexer/comments.rs @@ -13,7 +13,7 @@ use codemap::{BytePos, CharPos, CodeMap, Pos}; use diagnostic; use parse::lexer::{is_whitespace, Reader}; use parse::lexer::{StringReader, TokenAndSpan}; -use parse::lexer::{is_line_non_doc_comment, is_block_non_doc_comment}; +use parse::lexer::is_block_doc_comment; use parse::lexer; use parse::token; @@ -42,9 +42,9 @@ pub struct Comment { } pub fn is_doc_comment(s: &str) -> bool { - (s.starts_with("///") && !is_line_non_doc_comment(s)) || + (s.starts_with("///") && super::is_doc_comment(s)) || s.starts_with("//!") || - (s.starts_with("/**") && !is_block_non_doc_comment(s)) || + (s.starts_with("/**") && is_block_doc_comment(s)) || s.starts_with("/*!") } @@ -260,7 +260,7 @@ fn read_block_comment(rdr: &mut StringReader, rdr.bump(); rdr.bump(); } - if !is_block_non_doc_comment(curr_line.as_slice()) { + if is_block_doc_comment(curr_line.as_slice()) { return } assert!(!curr_line.as_slice().contains_char('\n')); diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 61a37f77d34..947f3d59b86 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -187,7 +187,7 @@ impl<'a> StringReader<'a> { /// Advance peek_tok and peek_span to refer to the next token, and /// possibly update the interner. fn advance_token(&mut self) { - match self.consume_whitespace_and_comments() { + match self.scan_whitespace_or_comment() { Some(comment) => { self.peek_span = comment.sp; self.peek_tok = comment.tok; @@ -339,8 +339,7 @@ impl<'a> StringReader<'a> { /// PRECONDITION: self.curr is not whitespace /// Eats any kind of comment. - /// Returns a Some(sugared-doc-attr) if one exists, None otherwise - fn consume_any_line_comment(&mut self) -> Option<TokenAndSpan> { + fn scan_comment(&mut self) -> Option<TokenAndSpan> { match self.curr { Some(c) => { if c.is_whitespace() { @@ -375,28 +374,32 @@ impl<'a> StringReader<'a> { } self.bump(); } - let ret = self.with_str_from(start_bpos, |string| { + return self.with_str_from(start_bpos, |string| { // but comments with only more "/"s are not - if !is_line_non_doc_comment(string) { - Some(TokenAndSpan{ - tok: token::DOC_COMMENT(str_to_ident(string)), - sp: codemap::mk_sp(start_bpos, self.last_pos) - }) + let tok = if is_doc_comment(string) { + token::DOC_COMMENT(str_to_ident(string)) } else { - None - } - }); + token::COMMENT + }; - if ret.is_some() { - return ret; - } + return Some(TokenAndSpan{ + tok: tok, + sp: codemap::mk_sp(start_bpos, self.last_pos) + }); + }); } else { + let start_bpos = self.last_pos - BytePos(2); while !self.curr_is('\n') && !self.is_eof() { self.bump(); } + return Some(TokenAndSpan { + tok: token::COMMENT, + sp: codemap::mk_sp(start_bpos, self.last_pos) + }); } - // Restart whitespace munch. - self.consume_whitespace_and_comments() } - Some('*') => { self.bump(); self.bump(); self.consume_block_comment() } + Some('*') => { + self.bump(); self.bump(); + self.scan_block_comment() + } _ => None } } else if self.curr_is('#') { @@ -412,9 +415,15 @@ impl<'a> StringReader<'a> { let cmap = CodeMap::new(); cmap.files.borrow_mut().push(self.filemap.clone()); let loc = cmap.lookup_char_pos_adj(self.last_pos); + debug!("Skipping a shebang"); if loc.line == 1u && loc.col == CharPos(0u) { + // FIXME: Add shebang "token", return it + let start = self.last_pos; while !self.curr_is('\n') && !self.is_eof() { self.bump(); } - return self.consume_whitespace_and_comments(); + return Some(TokenAndSpan { + tok: token::SHEBANG(self.ident_from(start)), + sp: codemap::mk_sp(start, self.last_pos) + }); } } None @@ -423,15 +432,33 @@ impl<'a> StringReader<'a> { } } - /// EFFECT: eats whitespace and comments. - /// Returns a Some(sugared-doc-attr) if one exists, None otherwise. - fn consume_whitespace_and_comments(&mut self) -> Option<TokenAndSpan> { - while is_whitespace(self.curr) { self.bump(); } - return self.consume_any_line_comment(); + /// If there is whitespace, shebang, or a comment, scan it. Otherwise, + /// return None. + fn scan_whitespace_or_comment(&mut self) -> Option<TokenAndSpan> { + match self.curr.unwrap_or('\0') { + // # to handle shebang at start of file -- this is the entry point + // for skipping over all "junk" + '/' | '#' => { + let c = self.scan_comment(); + debug!("scanning a comment {}", c); + c + }, + c if is_whitespace(Some(c)) => { + let start_bpos = self.last_pos; + while is_whitespace(self.curr) { self.bump(); } + let c = Some(TokenAndSpan { + tok: token::WS, + sp: codemap::mk_sp(start_bpos, self.last_pos) + }); + debug!("scanning whitespace: {}", c); + c + }, + _ => None + } } /// Might return a sugared-doc-attr - fn consume_block_comment(&mut self) -> Option<TokenAndSpan> { + fn scan_block_comment(&mut self) -> Option<TokenAndSpan> { // block comments starting with "/**" or "/*!" are doc-comments let is_doc_comment = self.curr_is('*') || self.curr_is('!'); let start_bpos = self.last_pos - BytePos(2); @@ -466,28 +493,23 @@ impl<'a> StringReader<'a> { self.bump(); } - let res = if is_doc_comment { - self.with_str_from(start_bpos, |string| { - // but comments with only "*"s between two "/"s are not - if !is_block_non_doc_comment(string) { - let string = if has_cr { - self.translate_crlf(start_bpos, string, - "bare CR not allowed in block doc-comment") - } else { string.into_maybe_owned() }; - Some(TokenAndSpan{ - tok: token::DOC_COMMENT(str_to_ident(string.as_slice())), - sp: codemap::mk_sp(start_bpos, self.last_pos) - }) - } else { - None - } - }) - } else { - None - }; + self.with_str_from(start_bpos, |string| { + // but comments with only "*"s between two "/"s are not + let tok = if is_block_doc_comment(string) { + let string = if has_cr { + self.translate_crlf(start_bpos, string, + "bare CR not allowed in block doc-comment") + } else { string.into_maybe_owned() }; + token::DOC_COMMENT(str_to_ident(string.as_slice())) + } else { + token::COMMENT + }; - // restart whitespace munch. - if res.is_some() { res } else { self.consume_whitespace_and_comments() } + Some(TokenAndSpan{ + tok: tok, + sp: codemap::mk_sp(start_bpos, self.last_pos) + }) + }) } /// Scan through any digits (base `radix`) or underscores, and return how @@ -1242,12 +1264,18 @@ fn in_range(c: Option<char>, lo: char, hi: char) -> bool { fn is_dec_digit(c: Option<char>) -> bool { return in_range(c, '0', '9'); } -pub fn is_line_non_doc_comment(s: &str) -> bool { - s.starts_with("////") +pub fn is_doc_comment(s: &str) -> bool { + let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/') + || s.starts_with("//!"); + debug!("is `{}` a doc comment? {}", s, res); + res } -pub fn is_block_non_doc_comment(s: &str) -> bool { - s.starts_with("/***") +pub fn is_block_doc_comment(s: &str) -> bool { + let res = (s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*') + || s.starts_with("/*!"); + debug!("is `{}` a doc comment? {}", s, res); + res } fn ident_start(c: Option<char>) -> bool { @@ -1383,9 +1411,9 @@ mod test { } #[test] fn line_doc_comments() { - assert!(!is_line_non_doc_comment("///")); - assert!(!is_line_non_doc_comment("/// blah")); - assert!(is_line_non_doc_comment("////")); + assert!(is_doc_comment("///")); + assert!(is_doc_comment("/// blah")); + assert!(!is_doc_comment("////")); } #[test] fn nested_block_comments() { diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index e0bcb41a753..51f2c74d3ae 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -325,10 +325,24 @@ fn is_plain_ident_or_underscore(t: &token::Token) -> bool { is_plain_ident(t) || *t == token::UNDERSCORE } +/// Get a token the parser cares about +fn real_token(rdr: &mut Reader) -> TokenAndSpan { + let mut t = rdr.next_token(); + loop { + match t.tok { + token::WS | token::COMMENT | token::SHEBANG(_) => { + t = rdr.next_token(); + }, + _ => break + } + } + t +} + impl<'a> Parser<'a> { pub fn new(sess: &'a ParseSess, cfg: ast::CrateConfig, mut rdr: Box<Reader>) -> Parser<'a> { - let tok0 = rdr.next_token(); + let tok0 = real_token(rdr); let span = tok0.sp; let placeholder = TokenAndSpan { tok: token::UNDERSCORE, @@ -864,7 +878,7 @@ impl<'a> Parser<'a> { None }; let next = if self.buffer_start == self.buffer_end { - self.reader.next_token() + real_token(self.reader) } else { // Avoid token copies with `replace`. let buffer_start = self.buffer_start as uint; @@ -908,7 +922,7 @@ impl<'a> Parser<'a> { -> R { let dist = distance as int; while self.buffer_length() < dist { - self.buffer[self.buffer_end as uint] = self.reader.next_token(); + self.buffer[self.buffer_end as uint] = real_token(self.reader); self.buffer_end = (self.buffer_end + 1) & 3; } f(&self.buffer[((self.buffer_start + dist - 1) & 3) as uint].tok) diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index 83d373d033b..e65f9f208a3 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -97,8 +97,18 @@ pub enum Token { /* For interpolation */ INTERPOLATED(Nonterminal), - DOC_COMMENT(Ident), + + // Junk. These carry no data because we don't really care about the data + // they *would* carry, and don't really want to allocate a new ident for + // them. Instead, users could extract that from the associated span. + + /// Whitespace + WS, + /// Comment + COMMENT, + SHEBANG(Ident), + EOF, } @@ -231,6 +241,10 @@ pub fn to_string(t: &Token) -> String { /* Other */ DOC_COMMENT(s) => get_ident(s).get().to_string(), EOF => "<eof>".to_string(), + WS => " ".to_string(), + COMMENT => "/* */".to_string(), + SHEBANG(s) => format!("/* shebang: {}*/", s.as_str()), + INTERPOLATED(ref nt) => { match nt { &NtExpr(ref e) => ::print::pprust::expr_to_string(&**e), |
