1 files changed, 107 insertions, 40 deletions
diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs
index 2fa25e40c6b..4b97c8b0a81 100644
--- a/compiler/rustc_parse/src/parser/mod.rs
+++ b/compiler/rustc_parse/src/parser/mod.rs
@@ -19,13 +19,16 @@ pub use path::PathStyle;
 
 use rustc_ast::ptr::P;
 use rustc_ast::token::{self, DelimToken, Token, TokenKind};
+use rustc_ast::tokenstream::AttributesData;
 use rustc_ast::tokenstream::{self, DelimSpan, Spacing};
-use rustc_ast::tokenstream::{TokenStream, TokenTree, TreeAndSpacing};
+use rustc_ast::tokenstream::{TokenStream, TokenTree};
+use rustc_ast::AttrId;
 use rustc_ast::DUMMY_NODE_ID;
 use rustc_ast::{self as ast, AnonConst, AstLike, AttrStyle, AttrVec, Const, CrateSugar, Extern};
 use rustc_ast::{Async, Expr, ExprKind, MacArgs, MacDelimiter, Mutability, StrLit, Unsafe};
 use rustc_ast::{Visibility, VisibilityKind};
 use rustc_ast_pretty::pprust;
+use rustc_data_structures::fx::FxHashMap;
 use rustc_data_structures::sync::Lrc;
 use rustc_errors::PResult;
 use rustc_errors::{struct_span_err, Applicability, DiagnosticBuilder, FatalError};
@@ -34,6 +37,7 @@ use rustc_span::source_map::{Span, DUMMY_SP};
 use rustc_span::symbol::{kw, sym, Ident, Symbol};
 use tracing::debug;
 
+use std::ops::Range;
 use std::{cmp, mem, slice};
 
 bitflags::bitflags! {
@@ -64,6 +68,7 @@ pub enum ForceCollect {
     No,
 }
 
+#[derive(Debug, Eq, PartialEq)]
 pub enum TrailingToken {
     None,
     Semi,
@@ -111,6 +116,7 @@ pub struct Parser<'a> {
     pub token_spacing: Spacing,
     /// The previous token.
     pub prev_token: Token,
+    pub capture_cfg: bool,
     restrictions: Restrictions,
     expected_tokens: Vec<TokenType>,
     // Important: This must only be advanced from `next_tok`
@@ -134,6 +140,44 @@ pub struct Parser<'a> {
     pub last_type_ascription: Option<(Span, bool /* likely path typo */)>,
     /// If present, this `Parser` is not parsing Rust code but rather a macro call.
     subparser_name: Option<&'static str>,
+    capture_state: CaptureState,
+}
+
+/// Indicates a range of tokens that should be replaced by
+/// the tokens in the provided vector. This is used in two
+/// places during token collection:
+///
+/// 1. During the parsing of an AST node that may have a `#[derive]`
+/// attribute, we parse a nested AST node that has `#[cfg]` or `#[cfg_attr]`
+/// In this case, we use a `ReplaceRange` to replace the entire inner AST node
+/// with `FlatToken::AttrTarget`, allowing us to perform eager cfg-expansion
+/// on a `AttrAnnotatedTokenStream`
+///
+/// 2. When we parse an inner attribute while collecting tokens. We
+/// remove inner attributes from the token stream entirely, and
+/// instead track them through the `attrs` field on the AST node.
+/// This allows us to easily manipulate them (for example, removing
+/// the first macro inner attribute to invoke a proc-macro).
+/// When create a `TokenStream`, the inner attributes get inserted
+/// into the proper place in the token stream.
+pub type ReplaceRange = (Range<u32>, Vec<(FlatToken, Spacing)>);
+
+/// Controls how we capture tokens. Capturing can be expensive,
+/// so we try to avoid performing capturing in cases where
+/// we will never need a `AttrAnnotatedTokenStream`
+#[derive(Copy, Clone)]
+pub enum Capturing {
+    /// We aren't performing any capturing - this is the default mode.
+    No,
+    /// We are capturing tokens
+    Yes,
+}
+
+#[derive(Clone)]
+struct CaptureState {
+    capturing: Capturing,
+    replace_ranges: Vec<ReplaceRange>,
+    inner_attr_ranges: FxHashMap<AttrId, ReplaceRange>,
 }
 
 impl<'a> Drop for Parser<'a> {
@@ -167,18 +211,11 @@ struct TokenCursor {
     // want to capture just the first 'unglued' token.
     // For example, capturing the `Vec<u8>`
     // in `Option<Vec<u8>>` requires us to unglue
-    // the trailing `>>` token. The `append_unglued_token`
+    // the trailing `>>` token. The `break_last_token`
     // field is used to track this token - it gets
     // appended to the captured stream when
     // we evaluate a `LazyTokenStream`
-    append_unglued_token: Option<TreeAndSpacing>,
-    // If `true`, skip the delimiters for `None`-delimited groups,
-    // and just yield the inner tokens. This is `true` during
-    // normal parsing, since the parser code is not currently prepared
-    // to handle `None` delimiters. When capturing a `TokenStream`,
-    // however, we want to handle `None`-delimiters, since
-    // proc-macros always see `None`-delimited groups.
-    skip_none_delims: bool,
+    break_last_token: bool,
 }
 
 #[derive(Clone)]
@@ -191,13 +228,13 @@ struct TokenCursorFrame {
 }
 
 impl TokenCursorFrame {
-    fn new(span: DelimSpan, delim: DelimToken, tts: TokenStream, skip_none_delims: bool) -> Self {
+    fn new(span: DelimSpan, delim: DelimToken, tts: TokenStream) -> Self {
         TokenCursorFrame {
             delim,
             span,
-            open_delim: delim == token::NoDelim && skip_none_delims,
+            open_delim: false,
             tree_cursor: tts.into_trees(),
-            close_delim: delim == token::NoDelim && skip_none_delims,
+            close_delim: false,
         }
     }
 }
@@ -225,7 +262,7 @@ impl TokenCursor {
                     return (token, spacing);
                 }
                 TokenTree::Delimited(sp, delim, tts) => {
-                    let frame = TokenCursorFrame::new(sp, delim, tts, self.skip_none_delims);
+                    let frame = TokenCursorFrame::new(sp, delim, tts);
                     self.stack.push(mem::replace(&mut self.frame, frame));
                 }
             }
@@ -283,7 +320,6 @@ impl TokenCursor {
                         .cloned()
                         .collect::<TokenStream>()
                 },
-                self.skip_none_delims,
             ),
         ));
 
@@ -372,26 +408,24 @@ impl<'a> Parser<'a> {
         desugar_doc_comments: bool,
         subparser_name: Option<&'static str>,
     ) -> Self {
+        let mut start_frame = TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, tokens);
+        start_frame.open_delim = true;
+        start_frame.close_delim = true;
+
         let mut parser = Parser {
             sess,
             token: Token::dummy(),
             token_spacing: Spacing::Alone,
             prev_token: Token::dummy(),
+            capture_cfg: false,
             restrictions: Restrictions::empty(),
             expected_tokens: Vec::new(),
-            // Skip over the delimiters for `None`-delimited groups
             token_cursor: TokenCursor {
-                frame: TokenCursorFrame::new(
-                    DelimSpan::dummy(),
-                    token::NoDelim,
-                    tokens,
-                    /* skip_none_delims */ true,
-                ),
+                frame: start_frame,
                 stack: Vec::new(),
                 num_next_calls: 0,
                 desugar_doc_comments,
-                append_unglued_token: None,
-                skip_none_delims: true,
+                break_last_token: false,
             },
             desugar_doc_comments,
             unmatched_angle_bracket_count: 0,
@@ -400,6 +434,11 @@ impl<'a> Parser<'a> {
             last_unexpected_token_span: None,
             last_type_ascription: None,
             subparser_name,
+            capture_state: CaptureState {
+                capturing: Capturing::No,
+                replace_ranges: Vec::new(),
+                inner_attr_ranges: Default::default(),
+            },
         };
 
         // Make parser point to the first token.
@@ -409,21 +448,29 @@ impl<'a> Parser<'a> {
     }
 
     fn next_tok(&mut self, fallback_span: Span) -> (Token, Spacing) {
-        let (mut next, spacing) = if self.desugar_doc_comments {
-            self.token_cursor.next_desugared()
-        } else {
-            self.token_cursor.next()
-        };
-        self.token_cursor.num_next_calls += 1;
-        // We've retrieved an token from the underlying
-        // cursor, so we no longer need to worry about
-        // an unglued token. See `break_and_eat` for more details
-        self.token_cursor.append_unglued_token = None;
-        if next.span.is_dummy() {
-            // Tweak the location for better diagnostics, but keep syntactic context intact.
-            next.span = fallback_span.with_ctxt(next.span.ctxt());
+        loop {
+            let (mut next, spacing) = if self.desugar_doc_comments {
+                self.token_cursor.next_desugared()
+            } else {
+                self.token_cursor.next()
+            };
+            self.token_cursor.num_next_calls += 1;
+            // We've retrieved an token from the underlying
+            // cursor, so we no longer need to worry about
+            // an unglued token. See `break_and_eat` for more details
+            self.token_cursor.break_last_token = false;
+            if next.span.is_dummy() {
+                // Tweak the location for better diagnostics, but keep syntactic context intact.
+                next.span = fallback_span.with_ctxt(next.span.ctxt());
+            }
+            if matches!(
+                next.kind,
+                token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim)
+            ) {
+                continue;
+            }
+            return (next, spacing);
         }
-        (next, spacing)
     }
 
     pub fn unexpected<T>(&mut self) -> PResult<'a, T> {
@@ -621,8 +668,7 @@ impl<'a> Parser<'a> {
                 // If we consume any additional tokens, then this token
                 // is not needed (we'll capture the entire 'glued' token),
                 // and `next_tok` will set this field to `None`
-                self.token_cursor.append_unglued_token =
-                    Some((TokenTree::Token(self.token.clone()), Spacing::Alone));
+                self.token_cursor.break_last_token = true;
                 // Use the spacing of the glued token as the spacing
                 // of the unglued second token.
                 self.bump_with((Token::new(second, second_span), self.token_spacing));
@@ -1304,3 +1350,24 @@ pub fn emit_unclosed_delims(unclosed_delims: &mut Vec<UnmatchedBrace>, sess: &Pa
         }
     }
 }
+
+/// A helper struct used when building a `AttrAnnotatedTokenStream` from
+/// a `LazyTokenStream`. Both delimiter and non-delimited tokens
+/// are stored as `FlatToken::Token`. A vector of `FlatToken`s
+/// is then 'parsed' to build up a `AttrAnnotatedTokenStream` with nested
+/// `AttrAnnotatedTokenTree::Delimited` tokens
+#[derive(Debug, Clone)]
+pub enum FlatToken {
+    /// A token - this holds both delimiter (e.g. '{' and '}')
+    /// and non-delimiter tokens
+    Token(Token),
+    /// Holds the `AttributesData` for an AST node. The
+    /// `AttributesData` is inserted directly into the
+    /// constructed `AttrAnnotatedTokenStream` as
+    /// a `AttrAnnotatedTokenTree::Attributes`
+    AttrTarget(AttributesData),
+    /// A special 'empty' token that is ignored during the conversion
+    /// to a `AttrAnnotatedTokenStream`. This is used to simplify the
+    /// handling of replace ranges.
+    Empty,
+}