From 7504b9bb96236a340c41c95f95cf3c4a09341afc Mon Sep 17 00:00:00 2001 From: Aaron Hill Date: Thu, 25 Mar 2021 18:05:49 -0400 Subject: Avoid double-collection for expression nonterminals --- compiler/rustc_parse/src/parser/expr.rs | 15 +++++++++++++++ compiler/rustc_parse/src/parser/mod.rs | 2 +- compiler/rustc_parse/src/parser/nonterminal.rs | 17 +---------------- 3 files changed, 17 insertions(+), 17 deletions(-) (limited to 'compiler/rustc_parse/src/parser') diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index d64e5173b92..fe190bfe9d9 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -92,6 +92,21 @@ impl<'a> Parser<'a> { self.parse_expr_res(Restrictions::empty(), None) } + /// Parses an expression, forcing tokens to be collected + pub fn parse_expr_force_collect(&mut self) -> PResult<'a, P> { + // If we have outer attributes, then the call to `collect_tokens_trailing_token` + // will be made for us. + if matches!(self.token.kind, TokenKind::Pound | TokenKind::DocComment(..)) { + self.parse_expr() + } else { + // If we don't have outer attributes, then we need to ensure + // that collection happens by using `collect_tokens_no_attrs`. + // Expression don't support custom inner attributes, so `parse_expr` + // will never try to collect tokens if we don't have outer attributes. + self.collect_tokens_no_attrs(|this| this.parse_expr()) + } + } + pub(super) fn parse_anon_const_expr(&mut self) -> PResult<'a, AnonConst> { self.parse_expr().map(|value| AnonConst { id: DUMMY_NODE_ID, value }) } diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 71103840f13..f0ee76d328c 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -987,7 +987,7 @@ impl<'a> Parser<'a> { } // Collect tokens because they are used during lowering to HIR. - let expr = self.collect_tokens_no_attrs(|this| this.parse_expr())?; + let expr = self.parse_expr_force_collect()?; let span = expr.span; match &expr.kind { diff --git a/compiler/rustc_parse/src/parser/nonterminal.rs b/compiler/rustc_parse/src/parser/nonterminal.rs index fc25e883666..0c49d103583 100644 --- a/compiler/rustc_parse/src/parser/nonterminal.rs +++ b/compiler/rustc_parse/src/parser/nonterminal.rs @@ -128,22 +128,7 @@ impl<'a> Parser<'a> { })?) } - // If there are attributes present, then `parse_expr` will end up collecting tokens, - // turning the outer `collect_tokens_no_attrs` into a no-op due to the already present - // tokens. If there are *not* attributes present, then the outer - // `collect_tokens_no_attrs` will ensure that we will end up collecting tokens for the - // expressions. - // - // This is less efficient than it could be, since the outer `collect_tokens_no_attrs` - // still needs to snapshot the `TokenCursor` before calling `parse_expr`, even when - // `parse_expr` will end up collecting tokens. Ideally, this would work more like - // `parse_item`, and take in a `ForceCollect` parameter. However, this would require - // adding a `ForceCollect` parameter in a bunch of places in expression parsing - // for little gain. If the perf impact from this turns out to be noticeable, we should - // revisit this apporach. - NonterminalKind::Expr => { - token::NtExpr(self.collect_tokens_no_attrs(|this| this.parse_expr())?) - } + NonterminalKind::Expr => token::NtExpr(self.parse_expr_force_collect()?), NonterminalKind::Literal => { // The `:literal` matcher does not support attributes token::NtLiteral( -- cgit 1.4.1-3-g733a5 From f94360fd83b49554b6c26999a0030e9cfe800f32 Mon Sep 17 00:00:00 2001 From: Aaron Hill Date: Fri, 26 Mar 2021 23:23:20 -0400 Subject: Always preserve `None`-delimited groups in a captured `TokenStream` Previously, we would silently remove any `None`-delimiters when capturing a `TokenStream`, 'flattenting' them to their inner tokens. This was not normally visible, since we usually have `TokenKind::Interpolated` (which gets converted to a `None`-delimited group during macro invocation) instead of an actual `None`-delimited group. However, there are a couple of cases where this becomes visible to proc-macros: 1. A cross-crate `macro_rules!` macro has a `None`-delimited group stored in its body (as a result of being produced by another `macro_rules!` macro). The cross-crate `macro_rules!` invocation can then expand to an attribute macro invocation, which needs to be able to see the `None`-delimited group. 2. A proc-macro can invoke an attribute proc-macro with its re-collected input. If there are any nonterminals present in the input, they will get re-collected to `None`-delimited groups, which will then get captured as part of the attribute macro invocation. Both of these cases are incredibly obscure, so there hopefully won't be any breakage. This change will allow more agressive 'flattenting' of nonterminals in #82608 without losing `None`-delimited groups. --- compiler/rustc_parse/src/parser/attr_wrapper.rs | 45 ++++++++++++++----- compiler/rustc_parse/src/parser/mod.rs | 25 ++++++++--- .../ui/proc-macro/auxiliary/nested-macro-rules.rs | 9 ++-- src/test/ui/proc-macro/nested-macro-rules.rs | 10 ++--- src/test/ui/proc-macro/nested-macro-rules.stdout | 52 ++++++++++++++++++++-- 5 files changed, 113 insertions(+), 28 deletions(-) (limited to 'compiler/rustc_parse/src/parser') diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index 7512f46988c..36a0fda6458 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -98,21 +98,46 @@ impl<'a> Parser<'a> { } impl CreateTokenStream for LazyTokenStreamImpl { fn create_token_stream(&self) -> TokenStream { - // The token produced by the final call to `next` or `next_desugared` - // was not actually consumed by the callback. The combination - // of chaining the initial token and using `take` produces the desired - // result - we produce an empty `TokenStream` if no calls were made, - // and omit the final token otherwise. + if self.num_calls == 0 { + return TokenStream::new(vec![]); + } + let mut cursor_snapshot = self.cursor_snapshot.clone(); - let tokens = std::iter::once(self.start_token.clone()) - .chain((0..self.num_calls).map(|_| { - if self.desugar_doc_comments { + // Don't skip `None` delimiters, since we want to pass them to + // proc macros. Normally, we'll end up capturing `TokenKind::Interpolated`, + // which gets converted to a `None`-delimited group when we invoke + // a proc-macro. However, it's possible to already have a `None`-delimited + // group in the stream (such as when parsing the output of a proc-macro, + // or in certain unusual cases with cross-crate `macro_rules!` macros). + cursor_snapshot.skip_none_delims = false; + + // The token produced by the final call to `next` or `next_desugared` + // was not actually consumed by the callback. + let num_calls = self.num_calls - 1; + let mut i = 0; + let tokens = + std::iter::once(self.start_token.clone()).chain(std::iter::from_fn(|| { + if i >= num_calls { + return None; + } + + let token = if self.desugar_doc_comments { cursor_snapshot.next_desugared() } else { cursor_snapshot.next() + }; + + // When the `LazyTokenStreamImpl` was original produced, we did *not* + // include `NoDelim` tokens in `num_calls`, since they are normally ignored + // by the parser. Therefore, we only increment our counter for other types of tokens. + if !matches!( + token.0.kind, + token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim) + ) { + i += 1; } - })) - .take(self.num_calls); + Some(token) + })); make_token_stream(tokens, self.append_unglued_token.clone()) } diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index f0ee76d328c..748a8e2bb49 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -172,6 +172,13 @@ struct TokenCursor { // appended to the captured stream when // we evaluate a `LazyTokenStream` append_unglued_token: Option, + // If `true`, skip the delimiters for `None`-delimited groups, + // and just yield the inner tokens. This is `true` during + // normal parsing, since the parser code is not currently prepared + // to handle `None` delimiters. When capturing a `TokenStream`, + // however, we want to handle `None`-delimiters, since + // proc-macros always see `None`-delimited groups. + skip_none_delims: bool, } #[derive(Clone)] @@ -184,13 +191,13 @@ struct TokenCursorFrame { } impl TokenCursorFrame { - fn new(span: DelimSpan, delim: DelimToken, tts: TokenStream) -> Self { + fn new(span: DelimSpan, delim: DelimToken, tts: TokenStream, skip_none_delims: bool) -> Self { TokenCursorFrame { delim, span, - open_delim: delim == token::NoDelim, + open_delim: delim == token::NoDelim && skip_none_delims, tree_cursor: tts.into_trees(), - close_delim: delim == token::NoDelim, + close_delim: delim == token::NoDelim && skip_none_delims, } } } @@ -218,7 +225,7 @@ impl TokenCursor { return (token, spacing); } TokenTree::Delimited(sp, delim, tts) => { - let frame = TokenCursorFrame::new(sp, delim, tts); + let frame = TokenCursorFrame::new(sp, delim, tts, self.skip_none_delims); self.stack.push(mem::replace(&mut self.frame, frame)); } } @@ -276,6 +283,7 @@ impl TokenCursor { .cloned() .collect::() }, + self.skip_none_delims, ), )); @@ -371,12 +379,19 @@ impl<'a> Parser<'a> { prev_token: Token::dummy(), restrictions: Restrictions::empty(), expected_tokens: Vec::new(), + // Skip over the delimiters for `None`-delimited groups token_cursor: TokenCursor { - frame: TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, tokens), + frame: TokenCursorFrame::new( + DelimSpan::dummy(), + token::NoDelim, + tokens, + /* skip_none_delims */ true, + ), stack: Vec::new(), num_next_calls: 0, desugar_doc_comments, append_unglued_token: None, + skip_none_delims: true, }, desugar_doc_comments, unmatched_angle_bracket_count: 0, diff --git a/src/test/ui/proc-macro/auxiliary/nested-macro-rules.rs b/src/test/ui/proc-macro/auxiliary/nested-macro-rules.rs index 52ebe8e7fb2..27676a5cb81 100644 --- a/src/test/ui/proc-macro/auxiliary/nested-macro-rules.rs +++ b/src/test/ui/proc-macro/auxiliary/nested-macro-rules.rs @@ -2,14 +2,15 @@ pub struct FirstStruct; #[macro_export] macro_rules! outer_macro { - ($name:ident) => { + ($name:ident, $attr_struct_name:ident) => { #[macro_export] macro_rules! inner_macro { - ($wrapper:ident) => { - $wrapper!($name) + ($bang_macro:ident, $attr_macro:ident) => { + $bang_macro!($name); + #[$attr_macro] struct $attr_struct_name {} } } } } -outer_macro!(FirstStruct); +outer_macro!(FirstStruct, FirstAttrStruct); diff --git a/src/test/ui/proc-macro/nested-macro-rules.rs b/src/test/ui/proc-macro/nested-macro-rules.rs index 2fef0e5fad0..25ffcfad7c7 100644 --- a/src/test/ui/proc-macro/nested-macro-rules.rs +++ b/src/test/ui/proc-macro/nested-macro-rules.rs @@ -1,7 +1,7 @@ // run-pass // aux-build:nested-macro-rules.rs // aux-build:test-macros.rs -// compile-flags: -Z span-debug +// compile-flags: -Z span-debug -Z macro-backtrace // edition:2018 #![no_std] // Don't load unnecessary hygiene information from std @@ -10,14 +10,14 @@ extern crate std; extern crate nested_macro_rules; extern crate test_macros; -use test_macros::print_bang; +use test_macros::{print_bang, print_attr}; use nested_macro_rules::FirstStruct; struct SecondStruct; fn main() { - nested_macro_rules::inner_macro!(print_bang); + nested_macro_rules::inner_macro!(print_bang, print_attr); - nested_macro_rules::outer_macro!(SecondStruct); - inner_macro!(print_bang); + nested_macro_rules::outer_macro!(SecondStruct, SecondAttrStruct); + inner_macro!(print_bang, print_attr); } diff --git a/src/test/ui/proc-macro/nested-macro-rules.stdout b/src/test/ui/proc-macro/nested-macro-rules.stdout index dcafe3b4bda..8292617fc16 100644 --- a/src/test/ui/proc-macro/nested-macro-rules.stdout +++ b/src/test/ui/proc-macro/nested-macro-rules.stdout @@ -5,10 +5,32 @@ PRINT-BANG INPUT (DEBUG): TokenStream [ stream: TokenStream [ Ident { ident: "FirstStruct", - span: $DIR/auxiliary/nested-macro-rules.rs:15:14: 15:25 (#7), + span: $DIR/auxiliary/nested-macro-rules.rs:16:14: 16:25 (#7), }, ], - span: $DIR/auxiliary/nested-macro-rules.rs:9:27: 9:32 (#6), + span: $DIR/auxiliary/nested-macro-rules.rs:9:30: 9:35 (#6), + }, +] +PRINT-ATTR INPUT (DISPLAY): struct FirstAttrStruct { } +PRINT-ATTR INPUT (DEBUG): TokenStream [ + Ident { + ident: "struct", + span: $DIR/auxiliary/nested-macro-rules.rs:10:32: 10:38 (#6), + }, + Group { + delimiter: None, + stream: TokenStream [ + Ident { + ident: "FirstAttrStruct", + span: $DIR/auxiliary/nested-macro-rules.rs:16:27: 16:42 (#7), + }, + ], + span: $DIR/auxiliary/nested-macro-rules.rs:10:39: 10:56 (#6), + }, + Group { + delimiter: Brace, + stream: TokenStream [], + span: $DIR/auxiliary/nested-macro-rules.rs:10:57: 10:59 (#6), }, ] PRINT-BANG INPUT (DISPLAY): SecondStruct @@ -18,9 +40,31 @@ PRINT-BANG INPUT (DEBUG): TokenStream [ stream: TokenStream [ Ident { ident: "SecondStruct", - span: $DIR/nested-macro-rules.rs:21:38: 21:50 (#13), + span: $DIR/nested-macro-rules.rs:21:38: 21:50 (#16), }, ], - span: $DIR/auxiliary/nested-macro-rules.rs:9:27: 9:32 (#12), + span: $DIR/auxiliary/nested-macro-rules.rs:9:30: 9:35 (#15), + }, +] +PRINT-ATTR INPUT (DISPLAY): struct SecondAttrStruct { } +PRINT-ATTR INPUT (DEBUG): TokenStream [ + Ident { + ident: "struct", + span: $DIR/auxiliary/nested-macro-rules.rs:10:32: 10:38 (#15), + }, + Group { + delimiter: None, + stream: TokenStream [ + Ident { + ident: "SecondAttrStruct", + span: $DIR/nested-macro-rules.rs:21:52: 21:68 (#16), + }, + ], + span: $DIR/auxiliary/nested-macro-rules.rs:10:39: 10:56 (#15), + }, + Group { + delimiter: Brace, + stream: TokenStream [], + span: $DIR/auxiliary/nested-macro-rules.rs:10:57: 10:59 (#15), }, ] -- cgit 1.4.1-3-g733a5