diff options
| author | Alex Crichton <alex@alexcrichton.com> | 2017-01-19 15:49:34 -0800 |
|---|---|---|
| committer | Alex Crichton <alex@alexcrichton.com> | 2017-01-20 08:35:47 -0800 |
| commit | 17294d98b915364f64a8a1cbc071aa6ee200a03b (patch) | |
| tree | 6a79c290e7f4ee37587b3b24fbd35a5f500a557b /src/libsyntax/parse/parser.rs | |
| parent | dd5d85ea761e2d570682fccdb8608319d6bd2bf4 (diff) | |
| parent | 0b9e26f390403aa95620d3b813f046732b371fb1 (diff) | |
| download | rust-17294d98b915364f64a8a1cbc071aa6ee200a03b.tar.gz rust-17294d98b915364f64a8a1cbc071aa6ee200a03b.zip | |
Rollup merge of #39118 - jseyfried:token_tree_based_parser, r=nrc
Refactor the parser to consume token trees This is groundwork for efficiently parsing attribute proc macro invocations, bang macro invocations, and `TokenStream`-based attributes and fragment matchers. This improves parsing performance by 8-15% and expansion performance by 0-5% on a sampling of the compiler's crates. r? @nrc
Diffstat (limited to 'src/libsyntax/parse/parser.rs')
| -rw-r--r-- | src/libsyntax/parse/parser.rs | 195 |
1 files changed, 38 insertions, 157 deletions
diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index d1a683b0bd5..fd6abc58b63 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -46,7 +46,7 @@ use ext::tt::macro_parser; use parse; use parse::classify; use parse::common::SeqSep; -use parse::lexer::{Reader, TokenAndSpan}; +use parse::lexer::TokenAndSpan; use parse::obsolete::ObsoleteSyntax; use parse::token::{self, MatchNt, SubstNt}; use parse::{new_sub_parser_from_file, ParseSess, Directory, DirectoryOwnership}; @@ -156,22 +156,6 @@ enum PrevTokenKind { Other, } -// Simple circular buffer used for keeping few next tokens. -#[derive(Default)] -struct LookaheadBuffer { - buffer: [TokenAndSpan; LOOKAHEAD_BUFFER_CAPACITY], - start: usize, - end: usize, -} - -const LOOKAHEAD_BUFFER_CAPACITY: usize = 8; - -impl LookaheadBuffer { - fn len(&self) -> usize { - (LOOKAHEAD_BUFFER_CAPACITY + self.end - self.start) % LOOKAHEAD_BUFFER_CAPACITY - } -} - /* ident is handled by common.rs */ pub struct Parser<'a> { @@ -184,19 +168,14 @@ pub struct Parser<'a> { pub prev_span: Span, /// the previous token kind prev_token_kind: PrevTokenKind, - lookahead_buffer: LookaheadBuffer, - pub tokens_consumed: usize, pub restrictions: Restrictions, pub quote_depth: usize, // not (yet) related to the quasiquoter parsing_token_tree: bool, - pub reader: Box<Reader+'a>, /// The set of seen errors about obsolete syntax. Used to suppress /// extra detail when the same error is seen twice pub obsolete_set: HashSet<ObsoleteSyntax>, /// Used to determine the path to externally loaded source files pub directory: Directory, - /// Stack of open delimiters and their spans. Used for error message. - pub open_braces: Vec<(token::DelimToken, Span)>, /// Name of the root module this parser originated from. If `None`, then the /// name is not known. This does not change while the parser is descending /// into modules, and sub-parsers have new values for this name. @@ -204,7 +183,6 @@ pub struct Parser<'a> { pub expected_tokens: Vec<TokenType>, pub tts: Vec<(TokenTree, usize)>, pub desugar_doc_comments: bool, - pub allow_interpolated_tts: bool, } #[derive(PartialEq, Eq, Clone)] @@ -270,30 +248,31 @@ impl From<P<Expr>> for LhsExpr { impl<'a> Parser<'a> { pub fn new(sess: &'a ParseSess, - rdr: Box<Reader+'a>, + tokens: Vec<TokenTree>, directory: Option<Directory>, desugar_doc_comments: bool) -> Self { + let tt = TokenTree::Delimited(syntax_pos::DUMMY_SP, Rc::new(Delimited { + delim: token::NoDelim, + open_span: syntax_pos::DUMMY_SP, + tts: tokens, + close_span: syntax_pos::DUMMY_SP, + })); let mut parser = Parser { - reader: rdr, sess: sess, token: token::Underscore, span: syntax_pos::DUMMY_SP, prev_span: syntax_pos::DUMMY_SP, prev_token_kind: PrevTokenKind::Other, - lookahead_buffer: Default::default(), - tokens_consumed: 0, restrictions: Restrictions::empty(), quote_depth: 0, parsing_token_tree: false, obsolete_set: HashSet::new(), directory: Directory { path: PathBuf::new(), ownership: DirectoryOwnership::Owned }, - open_braces: Vec::new(), root_module_name: None, expected_tokens: Vec::new(), - tts: Vec::new(), + tts: if tt.len() > 0 { vec![(tt, 0)] } else { Vec::new() }, desugar_doc_comments: desugar_doc_comments, - allow_interpolated_tts: true, }; let tok = parser.next_tok(); @@ -309,8 +288,8 @@ impl<'a> Parser<'a> { } fn next_tok(&mut self) -> TokenAndSpan { - 'outer: loop { - let mut tok = if let Some((tts, i)) = self.tts.pop() { + loop { + let tok = if let Some((tts, i)) = self.tts.pop() { let tt = tts.get_tt(i); if i + 1 < tts.len() { self.tts.push((tts, i + 1)); @@ -322,28 +301,14 @@ impl<'a> Parser<'a> { continue } } else { - self.reader.real_token() + TokenAndSpan { tok: token::Eof, sp: self.span } }; - loop { - let nt = match tok.tok { - token::Interpolated(ref nt) => nt.clone(), - token::DocComment(name) if self.desugar_doc_comments => { - self.tts.push((TokenTree::Token(tok.sp, token::DocComment(name)), 0)); - continue 'outer - } - _ => return tok, - }; - match *nt { - token::NtTT(TokenTree::Token(sp, ref t)) => { - tok = TokenAndSpan { tok: t.clone(), sp: sp }; - } - token::NtTT(ref tt) => { - self.tts.push((tt.clone(), 0)); - continue 'outer - } - _ => return tok, + match tok.tok { + token::DocComment(name) if self.desugar_doc_comments => { + self.tts.push((TokenTree::Token(tok.sp, token::DocComment(name)), 0)); } + _ => return tok, } } } @@ -892,17 +857,9 @@ impl<'a> Parser<'a> { _ => PrevTokenKind::Other, }; - let next = if self.lookahead_buffer.start == self.lookahead_buffer.end { - self.next_tok() - } else { - // Avoid token copies with `replace`. - let old_start = self.lookahead_buffer.start; - self.lookahead_buffer.start = (old_start + 1) % LOOKAHEAD_BUFFER_CAPACITY; - mem::replace(&mut self.lookahead_buffer.buffer[old_start], Default::default()) - }; + let next = self.next_tok(); self.span = next.sp; self.token = next.tok; - self.tokens_consumed += 1; self.expected_tokens.clear(); // check after each token self.check_unknown_macro_variable(); @@ -935,18 +892,20 @@ impl<'a> Parser<'a> { F: FnOnce(&token::Token) -> R, { if dist == 0 { - f(&self.token) - } else if dist < LOOKAHEAD_BUFFER_CAPACITY { - while self.lookahead_buffer.len() < dist { - self.lookahead_buffer.buffer[self.lookahead_buffer.end] = self.next_tok(); - self.lookahead_buffer.end = - (self.lookahead_buffer.end + 1) % LOOKAHEAD_BUFFER_CAPACITY; - } - let index = (self.lookahead_buffer.start + dist - 1) % LOOKAHEAD_BUFFER_CAPACITY; - f(&self.lookahead_buffer.buffer[index].tok) - } else { - self.bug("lookahead distance is too large"); + return f(&self.token); + } + let mut tok = token::Eof; + if let Some(&(ref tts, mut i)) = self.tts.last() { + i += dist - 1; + if i < tts.len() { + tok = match tts.get_tt(i) { + TokenTree::Token(_, tok) => tok, + TokenTree::Delimited(_, delimited) => token::OpenDelim(delimited.delim), + TokenTree::Sequence(..) => token::Dollar, + }; + } } + f(&tok) } pub fn fatal(&self, m: &str) -> DiagnosticBuilder<'a> { self.sess.span_diagnostic.struct_span_fatal(self.span, m) @@ -2743,94 +2702,28 @@ impl<'a> Parser<'a> { // whether something will be a nonterminal or a seq // yet. match self.token { - token::Eof => { - let mut err: DiagnosticBuilder<'a> = - self.diagnostic().struct_span_err(self.span, - "this file contains an un-closed delimiter"); - for &(_, sp) in &self.open_braces { - err.span_help(sp, "did you mean to close this delimiter?"); - } - - Err(err) - }, token::OpenDelim(delim) => { - if self.tts.last().map(|&(_, i)| i == 1).unwrap_or(false) { + if self.quote_depth == 0 && self.tts.last().map(|&(_, i)| i == 1).unwrap_or(false) { let tt = self.tts.pop().unwrap().0; self.bump(); - return Ok(if self.allow_interpolated_tts { - // avoid needlessly reparsing token trees in recursive macro expansions - TokenTree::Token(tt.span(), token::Interpolated(Rc::new(token::NtTT(tt)))) - } else { - tt - }); + return Ok(tt); } let parsing_token_tree = ::std::mem::replace(&mut self.parsing_token_tree, true); - // The span for beginning of the delimited section - let pre_span = self.span; - - // Parse the open delimiter. - self.open_braces.push((delim, self.span)); let open_span = self.span; self.bump(); - - // Parse the token trees within the delimiters. - // We stop at any delimiter so we can try to recover if the user - // uses an incorrect delimiter. let tts = self.parse_seq_to_before_tokens(&[&token::CloseDelim(token::Brace), &token::CloseDelim(token::Paren), &token::CloseDelim(token::Bracket)], SeqSep::none(), |p| p.parse_token_tree(), |mut e| e.emit()); + self.parsing_token_tree = parsing_token_tree; let close_span = self.span; - // Expand to cover the entire delimited token tree - let span = Span { hi: close_span.hi, ..pre_span }; - - match self.token { - // Correct delimiter. - token::CloseDelim(d) if d == delim => { - self.open_braces.pop().unwrap(); - - // Parse the close delimiter. - self.bump(); - } - // Incorrect delimiter. - token::CloseDelim(other) => { - let token_str = self.this_token_to_string(); - let mut err = self.diagnostic().struct_span_err(self.span, - &format!("incorrect close delimiter: `{}`", token_str)); - // This is a conservative error: only report the last unclosed delimiter. - // The previous unclosed delimiters could actually be closed! The parser - // just hasn't gotten to them yet. - if let Some(&(_, sp)) = self.open_braces.last() { - err.span_note(sp, "unclosed delimiter"); - }; - err.emit(); - - self.open_braces.pop().unwrap(); - - // If the incorrect delimiter matches an earlier opening - // delimiter, then don't consume it (it can be used to - // close the earlier one). Otherwise, consume it. - // E.g., we try to recover from: - // fn foo() { - // bar(baz( - // } // Incorrect delimiter but matches the earlier `{` - if !self.open_braces.iter().any(|&(b, _)| b == other) { - self.bump(); - } - } - token::Eof => { - // Silently recover, the EOF token will be seen again - // and an error emitted then. Thus we don't pop from - // self.open_braces here. - }, - _ => {} - } + self.bump(); - self.parsing_token_tree = parsing_token_tree; + let span = Span { lo: open_span.lo, ..close_span }; Ok(TokenTree::Delimited(span, Rc::new(Delimited { delim: delim, open_span: open_span, @@ -2838,21 +2731,9 @@ impl<'a> Parser<'a> { close_span: close_span, }))) }, - token::CloseDelim(_) => { - // An unexpected closing delimiter (i.e., there is no - // matching opening delimiter). - let token_str = self.this_token_to_string(); - let err = self.diagnostic().struct_span_err(self.span, - &format!("unexpected close delimiter: `{}`", token_str)); - Err(err) - }, - /* we ought to allow different depths of unquotation */ - token::Dollar | token::SubstNt(..) if self.quote_depth > 0 => { - self.parse_unquoted() - } - _ => { - Ok(TokenTree::Token(self.span, self.bump_and_get())) - } + token::CloseDelim(_) | token::Eof => unreachable!(), + token::Dollar | token::SubstNt(..) if self.quote_depth > 0 => self.parse_unquoted(), + _ => Ok(TokenTree::Token(self.span, self.bump_and_get())), } } |
