diff options
Diffstat (limited to 'compiler/rustc_parse/src/lib.rs')
| -rw-r--r-- | compiler/rustc_parse/src/lib.rs | 61 |
1 files changed, 44 insertions, 17 deletions
diff --git a/compiler/rustc_parse/src/lib.rs b/compiler/rustc_parse/src/lib.rs index 8a6b0230023..44999c9b63a 100644 --- a/compiler/rustc_parse/src/lib.rs +++ b/compiler/rustc_parse/src/lib.rs @@ -6,15 +6,18 @@ #![feature(or_patterns)] use rustc_ast as ast; +use rustc_ast::attr::HasAttrs; use rustc_ast::token::{self, DelimToken, Nonterminal, Token, TokenKind}; use rustc_ast::tokenstream::{self, LazyTokenStream, TokenStream, TokenTree}; use rustc_ast_pretty::pprust; +use rustc_data_structures::fx::FxHashSet; use rustc_data_structures::sync::Lrc; use rustc_errors::{Diagnostic, FatalError, Level, PResult}; use rustc_session::parse::ParseSess; use rustc_span::{symbol::kw, FileName, SourceFile, Span, DUMMY_SP}; use smallvec::SmallVec; +use std::cell::RefCell; use std::mem; use std::path::Path; use std::str; @@ -249,29 +252,23 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke // before we fall back to the stringification. let convert_tokens = - |tokens: &Option<LazyTokenStream>| tokens.as_ref().map(|t| t.create_token_stream()); + |tokens: Option<&LazyTokenStream>| tokens.as_ref().map(|t| t.create_token_stream()); let tokens = match *nt { Nonterminal::NtItem(ref item) => prepend_attrs(&item.attrs, item.tokens.as_ref()), - Nonterminal::NtBlock(ref block) => convert_tokens(&block.tokens), - Nonterminal::NtStmt(ref stmt) => { - // FIXME: We currently only collect tokens for `:stmt` - // matchers in `macro_rules!` macros. When we start collecting - // tokens for attributes on statements, we will need to prepend - // attributes here - convert_tokens(&stmt.tokens) - } - Nonterminal::NtPat(ref pat) => convert_tokens(&pat.tokens), - Nonterminal::NtTy(ref ty) => convert_tokens(&ty.tokens), + Nonterminal::NtBlock(ref block) => convert_tokens(block.tokens.as_ref()), + Nonterminal::NtStmt(ref stmt) => prepend_attrs(stmt.attrs(), stmt.tokens()), + Nonterminal::NtPat(ref pat) => convert_tokens(pat.tokens.as_ref()), + Nonterminal::NtTy(ref ty) => convert_tokens(ty.tokens.as_ref()), Nonterminal::NtIdent(ident, is_raw) => { Some(tokenstream::TokenTree::token(token::Ident(ident.name, is_raw), ident.span).into()) } Nonterminal::NtLifetime(ident) => { Some(tokenstream::TokenTree::token(token::Lifetime(ident.name), ident.span).into()) } - Nonterminal::NtMeta(ref attr) => convert_tokens(&attr.tokens), - Nonterminal::NtPath(ref path) => convert_tokens(&path.tokens), - Nonterminal::NtVis(ref vis) => convert_tokens(&vis.tokens), + Nonterminal::NtMeta(ref attr) => convert_tokens(attr.tokens.as_ref()), + Nonterminal::NtPath(ref path) => convert_tokens(path.tokens.as_ref()), + Nonterminal::NtVis(ref vis) => convert_tokens(vis.tokens.as_ref()), Nonterminal::NtTT(ref tt) => Some(tt.clone().into()), Nonterminal::NtExpr(ref expr) | Nonterminal::NtLiteral(ref expr) => { if expr.tokens.is_none() { @@ -281,6 +278,25 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke } }; + // Caches the stringification of 'good' `TokenStreams` which passed + // `tokenstream_probably_equal_for_proc_macro`. This allows us to avoid + // repeatedly stringifying and comparing the same `TokenStream` for deeply + // nested nonterminals. + // + // We cache by the strinification instead of the `TokenStream` to avoid + // needing to implement `Hash` for `TokenStream`. Note that it's possible to + // have two distinct `TokenStream`s that stringify to the same result + // (e.g. if they differ only in hygiene information). However, any + // information lost during the stringification process is also intentionally + // ignored by `tokenstream_probably_equal_for_proc_macro`, so it's fine + // that a single cache entry may 'map' to multiple distinct `TokenStream`s. + // + // This is a temporary hack to prevent compilation blowup on certain inputs. + // The entire pretty-print/retokenize process will be removed soon. + thread_local! { + static GOOD_TOKEN_CACHE: RefCell<FxHashSet<String>> = Default::default(); + } + // FIXME(#43081): Avoid this pretty-print + reparse hack // Pretty-print the AST struct without inserting any parenthesis // beyond those explicitly written by the user (e.g. `ExpnKind::Paren`). @@ -288,7 +304,7 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke // ever used for a comparison against the capture tokenstream. let source = pprust::nonterminal_to_string_no_extra_parens(nt); let filename = FileName::macro_expansion_source_code(&source); - let reparsed_tokens = parse_stream_from_source_str(filename, source, sess, Some(span)); + let reparsed_tokens = parse_stream_from_source_str(filename, source.clone(), sess, Some(span)); // During early phases of the compiler the AST could get modified // directly (e.g., attributes added or removed) and the internal cache @@ -314,8 +330,13 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke // modifications, including adding/removing typically non-semantic // tokens such as extra braces and commas, don't happen. if let Some(tokens) = tokens { + if GOOD_TOKEN_CACHE.with(|cache| cache.borrow().contains(&source)) { + return tokens; + } + // Compare with a non-relaxed delim match to start. if tokenstream_probably_equal_for_proc_macro(&tokens, &reparsed_tokens, sess, false) { + GOOD_TOKEN_CACHE.with(|cache| cache.borrow_mut().insert(source.clone())); return tokens; } @@ -324,6 +345,11 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke // token stream to match up with inserted parenthesis in the reparsed stream. let source_with_parens = pprust::nonterminal_to_string(nt); let filename_with_parens = FileName::macro_expansion_source_code(&source_with_parens); + + if GOOD_TOKEN_CACHE.with(|cache| cache.borrow().contains(&source_with_parens)) { + return tokens; + } + let reparsed_tokens_with_parens = parse_stream_from_source_str( filename_with_parens, source_with_parens, @@ -339,6 +365,7 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke sess, true, ) { + GOOD_TOKEN_CACHE.with(|cache| cache.borrow_mut().insert(source.clone())); return tokens; } @@ -418,9 +445,9 @@ pub fn tokenstream_probably_equal_for_proc_macro( // to iterate breaking tokens mutliple times. For example: // '[BinOpEq(Shr)] => [Gt, Ge] -> [Gt, Gt, Eq]' let mut token_trees: SmallVec<[_; 2]>; - if let TokenTree::Token(token) = &tree { + if let TokenTree::Token(token) = tree { let mut out = SmallVec::<[_; 2]>::new(); - out.push(token.clone()); + out.push(token); // Iterate to fixpoint: // * We start off with 'out' containing our initial token, and `temp` empty // * If we are able to break any tokens in `out`, then `out` will have |
