about summary refs log tree commit diff
path: root/compiler/rustc_parse/src/parser/mod.rs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_parse/src/parser/mod.rs')
-rw-r--r--compiler/rustc_parse/src/parser/mod.rs256
1 files changed, 115 insertions, 141 deletions
diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs
index cb6be8f412c..cd61584a876 100644
--- a/compiler/rustc_parse/src/parser/mod.rs
+++ b/compiler/rustc_parse/src/parser/mod.rs
@@ -19,7 +19,7 @@ pub use pat::{CommaRecoveryMode, RecoverColon, RecoverComma};
 pub use path::PathStyle;
 
 use rustc_ast::ptr::P;
-use rustc_ast::token::{self, DelimToken, Nonterminal, Token, TokenKind};
+use rustc_ast::token::{self, Delimiter, Nonterminal, Token, TokenKind};
 use rustc_ast::tokenstream::AttributesData;
 use rustc_ast::tokenstream::{self, DelimSpan, Spacing};
 use rustc_ast::tokenstream::{TokenStream, TokenTree};
@@ -123,8 +123,8 @@ pub struct Parser<'a> {
     pub capture_cfg: bool,
     restrictions: Restrictions,
     expected_tokens: Vec<TokenType>,
-    // Important: This must only be advanced from `next_tok`
-    // to ensure that `token_cursor.num_next_calls` is updated properly
+    // Important: This must only be advanced from `bump` to ensure that
+    // `token_cursor.num_next_calls` is updated properly.
     token_cursor: TokenCursor,
     desugar_doc_comments: bool,
     /// This field is used to keep track of how many left angle brackets we have seen. This is
@@ -150,6 +150,11 @@ pub struct Parser<'a> {
     pub current_closure: Option<ClosureSpans>,
 }
 
+// This type is used a lot, e.g. it's cloned when matching many declarative macro rules. Make sure
+// it doesn't unintentionally get bigger.
+#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
+rustc_data_structures::static_assert_size!(Parser<'_>, 328);
+
 /// Stores span information about a closure.
 #[derive(Clone)]
 pub struct ClosureSpans {
@@ -203,12 +208,15 @@ impl<'a> Drop for Parser<'a> {
 
 #[derive(Clone)]
 struct TokenCursor {
+    // The current (innermost) frame. `frame` and `stack` could be combined,
+    // but it's faster to have them separately to access `frame` directly
+    // rather than via something like `stack.last().unwrap()` or
+    // `stack[stack.len() - 1]`.
     frame: TokenCursorFrame,
+    // Additional frames that enclose `frame`.
     stack: Vec<TokenCursorFrame>,
     desugar_doc_comments: bool,
-    // Counts the number of calls to `{,inlined_}next` or
-    // `{,inlined_}next_desugared`, depending on whether
-    // `desugar_doc_comments` is set.
+    // Counts the number of calls to `{,inlined_}next`.
     num_next_calls: usize,
     // During parsing, we may sometimes need to 'unglue' a
     // glued token into two component tokens
@@ -236,75 +244,60 @@ struct TokenCursor {
 
 #[derive(Clone)]
 struct TokenCursorFrame {
-    delim: token::DelimToken,
-    span: DelimSpan,
-    open_delim: bool,
+    delim_sp: Option<(Delimiter, DelimSpan)>,
     tree_cursor: tokenstream::Cursor,
-    close_delim: bool,
 }
 
 impl TokenCursorFrame {
-    fn new(span: DelimSpan, delim: DelimToken, tts: TokenStream) -> Self {
-        TokenCursorFrame {
-            delim,
-            span,
-            open_delim: false,
-            tree_cursor: tts.into_trees(),
-            close_delim: false,
-        }
+    fn new(delim_sp: Option<(Delimiter, DelimSpan)>, tts: TokenStream) -> Self {
+        TokenCursorFrame { delim_sp, tree_cursor: tts.into_trees() }
     }
 }
 
 impl TokenCursor {
-    fn next(&mut self) -> (Token, Spacing) {
-        self.inlined_next()
+    fn next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) {
+        self.inlined_next(desugar_doc_comments)
     }
 
     /// This always-inlined version should only be used on hot code paths.
     #[inline(always)]
-    fn inlined_next(&mut self) -> (Token, Spacing) {
+    fn inlined_next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) {
         loop {
-            let (tree, spacing) = if !self.frame.open_delim {
-                self.frame.open_delim = true;
-                TokenTree::open_tt(self.frame.span, self.frame.delim).into()
-            } else if let Some(tree) = self.frame.tree_cursor.next_with_spacing() {
-                tree
-            } else if !self.frame.close_delim {
-                self.frame.close_delim = true;
-                TokenTree::close_tt(self.frame.span, self.frame.delim).into()
+            // FIXME: we currently don't return `Delimiter` open/close delims. To fix #67062 we will
+            // need to, whereupon the `delim != Delimiter::Invisible` conditions below can be
+            // removed.
+            if let Some((tree, spacing)) = self.frame.tree_cursor.next_with_spacing_ref() {
+                match tree {
+                    &TokenTree::Token(ref token) => match (desugar_doc_comments, token) {
+                        (true, &Token { kind: token::DocComment(_, attr_style, data), span }) => {
+                            return self.desugar(attr_style, data, span);
+                        }
+                        _ => return (token.clone(), *spacing),
+                    },
+                    &TokenTree::Delimited(sp, delim, ref tts) => {
+                        // Set `open_delim` to true here because we deal with it immediately.
+                        let frame = TokenCursorFrame::new(Some((delim, sp)), tts.clone());
+                        self.stack.push(mem::replace(&mut self.frame, frame));
+                        if delim != Delimiter::Invisible {
+                            return (Token::new(token::OpenDelim(delim), sp.open), Spacing::Alone);
+                        }
+                        // No open delimeter to return; continue on to the next iteration.
+                    }
+                };
             } else if let Some(frame) = self.stack.pop() {
+                if let Some((delim, span)) = self.frame.delim_sp && delim != Delimiter::Invisible {
+                    self.frame = frame;
+                    return (Token::new(token::CloseDelim(delim), span.close), Spacing::Alone);
+                }
                 self.frame = frame;
-                continue;
+                // No close delimiter to return; continue on to the next iteration.
             } else {
-                (TokenTree::Token(Token::new(token::Eof, DUMMY_SP)), Spacing::Alone)
-            };
-
-            match tree {
-                TokenTree::Token(token) => {
-                    return (token, spacing);
-                }
-                TokenTree::Delimited(sp, delim, tts) => {
-                    let frame = TokenCursorFrame::new(sp, delim, tts);
-                    self.stack.push(mem::replace(&mut self.frame, frame));
-                }
+                return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone);
             }
         }
     }
 
-    fn next_desugared(&mut self) -> (Token, Spacing) {
-        self.inlined_next_desugared()
-    }
-
-    /// This always-inlined version should only be used on hot code paths.
-    #[inline(always)]
-    fn inlined_next_desugared(&mut self) -> (Token, Spacing) {
-        let (data, attr_style, sp) = match self.inlined_next() {
-            (Token { kind: token::DocComment(_, attr_style, data), span }, _) => {
-                (data, attr_style, span)
-            }
-            tok => return tok,
-        };
-
+    fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> (Token, Spacing) {
         // Searches for the occurrences of `"#*` and returns the minimum number of `#`s
         // required to wrap the text.
         let mut num_of_hashes = 0;
@@ -318,14 +311,14 @@ impl TokenCursor {
             num_of_hashes = cmp::max(num_of_hashes, count);
         }
 
-        let delim_span = DelimSpan::from_single(sp);
+        let delim_span = DelimSpan::from_single(span);
         let body = TokenTree::Delimited(
             delim_span,
-            token::Bracket,
+            Delimiter::Bracket,
             [
-                TokenTree::token(token::Ident(sym::doc, false), sp),
-                TokenTree::token(token::Eq, sp),
-                TokenTree::token(TokenKind::lit(token::StrRaw(num_of_hashes), data, None), sp),
+                TokenTree::token(token::Ident(sym::doc, false), span),
+                TokenTree::token(token::Eq, span),
+                TokenTree::token(TokenKind::lit(token::StrRaw(num_of_hashes), data, None), span),
             ]
             .iter()
             .cloned()
@@ -335,15 +328,14 @@ impl TokenCursor {
         self.stack.push(mem::replace(
             &mut self.frame,
             TokenCursorFrame::new(
-                delim_span,
-                token::NoDelim,
+                None,
                 if attr_style == AttrStyle::Inner {
-                    [TokenTree::token(token::Pound, sp), TokenTree::token(token::Not, sp), body]
+                    [TokenTree::token(token::Pound, span), TokenTree::token(token::Not, span), body]
                         .iter()
                         .cloned()
                         .collect::<TokenStream>()
                 } else {
-                    [TokenTree::token(token::Pound, sp), body]
+                    [TokenTree::token(token::Pound, span), body]
                         .iter()
                         .cloned()
                         .collect::<TokenStream>()
@@ -351,7 +343,7 @@ impl TokenCursor {
             ),
         ));
 
-        self.next()
+        self.next(/* desugar_doc_comments */ false)
     }
 }
 
@@ -436,10 +428,6 @@ impl<'a> Parser<'a> {
         desugar_doc_comments: bool,
         subparser_name: Option<&'static str>,
     ) -> Self {
-        let mut start_frame = TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, tokens);
-        start_frame.open_delim = true;
-        start_frame.close_delim = true;
-
         let mut parser = Parser {
             sess,
             token: Token::dummy(),
@@ -449,7 +437,7 @@ impl<'a> Parser<'a> {
             restrictions: Restrictions::empty(),
             expected_tokens: Vec::new(),
             token_cursor: TokenCursor {
-                frame: start_frame,
+                frame: TokenCursorFrame::new(None, tokens),
                 stack: Vec::new(),
                 num_next_calls: 0,
                 desugar_doc_comments,
@@ -476,33 +464,6 @@ impl<'a> Parser<'a> {
         parser
     }
 
-    #[inline]
-    fn next_tok(&mut self, fallback_span: Span) -> (Token, Spacing) {
-        loop {
-            let (mut next, spacing) = if self.desugar_doc_comments {
-                self.token_cursor.inlined_next_desugared()
-            } else {
-                self.token_cursor.inlined_next()
-            };
-            self.token_cursor.num_next_calls += 1;
-            // We've retrieved an token from the underlying
-            // cursor, so we no longer need to worry about
-            // an unglued token. See `break_and_eat` for more details
-            self.token_cursor.break_last_token = false;
-            if next.span.is_dummy() {
-                // Tweak the location for better diagnostics, but keep syntactic context intact.
-                next.span = fallback_span.with_ctxt(next.span.ctxt());
-            }
-            if matches!(
-                next.kind,
-                token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim)
-            ) {
-                continue;
-            }
-            return (next, spacing);
-        }
-    }
-
     pub fn unexpected<T>(&mut self) -> PResult<'a, T> {
         match self.expect_one_of(&[], &[]) {
             Err(e) => Err(e),
@@ -665,7 +626,7 @@ impl<'a> Parser<'a> {
         self.is_keyword_ahead(dist, &[kw::Const])
             && self.look_ahead(dist + 1, |t| match t.kind {
                 token::Interpolated(ref nt) => matches!(**nt, token::NtBlock(..)),
-                token::OpenDelim(DelimToken::Brace) => true,
+                token::OpenDelim(Delimiter::Brace) => true,
                 _ => false,
             })
     }
@@ -697,7 +658,7 @@ impl<'a> Parser<'a> {
                 //
                 // If we consume any additional tokens, then this token
                 // is not needed (we'll capture the entire 'glued' token),
-                // and `next_tok` will set this field to `None`
+                // and `bump` will set this field to `None`
                 self.token_cursor.break_last_token = true;
                 // Use the spacing of the glued token as the spacing
                 // of the unglued second token.
@@ -841,7 +802,7 @@ impl<'a> Parser<'a> {
                                     .span_suggestion_verbose(
                                         self.prev_token.span.shrink_to_hi().until(self.token.span),
                                         &msg,
-                                        " @ ".to_string(),
+                                        " @ ",
                                         Applicability::MaybeIncorrect,
                                     )
                                     .emit();
@@ -857,7 +818,7 @@ impl<'a> Parser<'a> {
                                         .span_suggestion_short(
                                             sp,
                                             &format!("missing `{}`", token_str),
-                                            token_str.into(),
+                                            token_str,
                                             Applicability::MaybeIncorrect,
                                         )
                                         .emit();
@@ -993,7 +954,7 @@ impl<'a> Parser<'a> {
 
     fn parse_delim_comma_seq<T>(
         &mut self,
-        delim: DelimToken,
+        delim: Delimiter,
         f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
     ) -> PResult<'a, (Vec<T>, bool)> {
         self.parse_unspanned_seq(
@@ -1008,7 +969,7 @@ impl<'a> Parser<'a> {
         &mut self,
         f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
     ) -> PResult<'a, (Vec<T>, bool)> {
-        self.parse_delim_comma_seq(token::Paren, f)
+        self.parse_delim_comma_seq(Delimiter::Parenthesis, f)
     }
 
     /// Advance the parser by one token using provided token as the next one.
@@ -1019,12 +980,6 @@ impl<'a> Parser<'a> {
     /// This always-inlined version should only be used on hot code paths.
     #[inline(always)]
     fn inlined_bump_with(&mut self, (next_token, next_spacing): (Token, Spacing)) {
-        // Bumping after EOF is a bad sign, usually an infinite loop.
-        if self.prev_token.kind == TokenKind::Eof {
-            let msg = "attempted to bump the parser past EOF (may be stuck in a loop)";
-            self.span_bug(self.token.span, msg);
-        }
-
         // Update the current and previous tokens.
         self.prev_token = mem::replace(&mut self.token, next_token);
         self.token_spacing = next_spacing;
@@ -1035,8 +990,24 @@ impl<'a> Parser<'a> {
 
     /// Advance the parser by one token.
     pub fn bump(&mut self) {
-        let next_token = self.next_tok(self.token.span);
-        self.inlined_bump_with(next_token);
+        // Note: destructuring here would give nicer code, but it was found in #96210 to be slower
+        // than `.0`/`.1` access.
+        let mut next = self.token_cursor.inlined_next(self.desugar_doc_comments);
+        self.token_cursor.num_next_calls += 1;
+        // We've retrieved an token from the underlying
+        // cursor, so we no longer need to worry about
+        // an unglued token. See `break_and_eat` for more details
+        self.token_cursor.break_last_token = false;
+        if next.0.span.is_dummy() {
+            // Tweak the location for better diagnostics, but keep syntactic context intact.
+            let fallback_span = self.token.span;
+            next.0.span = fallback_span.with_ctxt(next.0.span.ctxt());
+        }
+        debug_assert!(!matches!(
+            next.0.kind,
+            token::OpenDelim(Delimiter::Invisible) | token::CloseDelim(Delimiter::Invisible)
+        ));
+        self.inlined_bump_with(next)
     }
 
     /// Look-ahead `dist` tokens of `self.token` and get access to that token there.
@@ -1047,10 +1018,10 @@ impl<'a> Parser<'a> {
         }
 
         let frame = &self.token_cursor.frame;
-        if frame.delim != DelimToken::NoDelim {
+        if let Some((delim, span)) = frame.delim_sp && delim != Delimiter::Invisible {
             let all_normal = (0..dist).all(|i| {
                 let token = frame.tree_cursor.look_ahead(i);
-                !matches!(token, Some(TokenTree::Delimited(_, DelimToken::NoDelim, _)))
+                !matches!(token, Some(TokenTree::Delimited(_, Delimiter::Invisible, _)))
             });
             if all_normal {
                 return match frame.tree_cursor.look_ahead(dist - 1) {
@@ -1060,7 +1031,7 @@ impl<'a> Parser<'a> {
                             looker(&Token::new(token::OpenDelim(*delim), dspan.open))
                         }
                     },
-                    None => looker(&Token::new(token::CloseDelim(frame.delim), frame.span.close)),
+                    None => looker(&Token::new(token::CloseDelim(delim), span.close)),
                 };
             }
         }
@@ -1069,10 +1040,10 @@ impl<'a> Parser<'a> {
         let mut i = 0;
         let mut token = Token::dummy();
         while i < dist {
-            token = cursor.next().0;
+            token = cursor.next(/* desugar_doc_comments */ false).0;
             if matches!(
                 token.kind,
-                token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim)
+                token::OpenDelim(Delimiter::Invisible) | token::CloseDelim(Delimiter::Invisible)
             ) {
                 continue;
             }
@@ -1108,7 +1079,7 @@ impl<'a> Parser<'a> {
     /// Parses constness: `const` or nothing.
     fn parse_constness(&mut self) -> Const {
         // Avoid const blocks to be parsed as const items
-        if self.look_ahead(1, |t| t != &token::OpenDelim(DelimToken::Brace))
+        if self.look_ahead(1, |t| t != &token::OpenDelim(Delimiter::Brace))
             && self.eat_keyword(kw::Const)
         {
             Const::Yes(self.prev_token.uninterpolated_span())
@@ -1171,9 +1142,9 @@ impl<'a> Parser<'a> {
 
     fn parse_mac_args_common(&mut self, delimited_only: bool) -> PResult<'a, MacArgs> {
         Ok(
-            if self.check(&token::OpenDelim(DelimToken::Paren))
-                || self.check(&token::OpenDelim(DelimToken::Bracket))
-                || self.check(&token::OpenDelim(DelimToken::Brace))
+            if self.check(&token::OpenDelim(Delimiter::Parenthesis))
+                || self.check(&token::OpenDelim(Delimiter::Bracket))
+                || self.check(&token::OpenDelim(Delimiter::Brace))
             {
                 match self.parse_token_tree() {
                     TokenTree::Delimited(dspan, delim, tokens) =>
@@ -1217,24 +1188,27 @@ impl<'a> Parser<'a> {
     pub(crate) fn parse_token_tree(&mut self) -> TokenTree {
         match self.token.kind {
             token::OpenDelim(..) => {
-                let depth = self.token_cursor.stack.len();
-
-                // We keep advancing the token cursor until we hit
-                // the matching `CloseDelim` token.
-                while !(depth == self.token_cursor.stack.len()
-                    && matches!(self.token.kind, token::CloseDelim(_)))
-                {
+                // Grab the tokens from this frame.
+                let frame = &self.token_cursor.frame;
+                let stream = frame.tree_cursor.stream.clone();
+                let (delim, span) = frame.delim_sp.unwrap();
+
+                // Advance the token cursor through the entire delimited
+                // sequence. After getting the `OpenDelim` we are *within* the
+                // delimited sequence, i.e. at depth `d`. After getting the
+                // matching `CloseDelim` we are *after* the delimited sequence,
+                // i.e. at depth `d - 1`.
+                let target_depth = self.token_cursor.stack.len() - 1;
+                loop {
                     // Advance one token at a time, so `TokenCursor::next()`
                     // can capture these tokens if necessary.
                     self.bump();
+                    if self.token_cursor.stack.len() == target_depth {
+                        debug_assert!(matches!(self.token.kind, token::CloseDelim(_)));
+                        break;
+                    }
                 }
-                // We are still inside the frame corresponding
-                // to the delimited stream we captured, so grab
-                // the tokens from this frame.
-                let frame = &self.token_cursor.frame;
-                let stream = frame.tree_cursor.stream.clone();
-                let span = frame.span;
-                let delim = frame.delim;
+
                 // Consume close delimiter
                 self.bump();
                 TokenTree::Delimited(span, delim, stream)
@@ -1314,7 +1288,7 @@ impl<'a> Parser<'a> {
         }
         let lo = self.prev_token.span;
 
-        if self.check(&token::OpenDelim(token::Paren)) {
+        if self.check(&token::OpenDelim(Delimiter::Parenthesis)) {
             // We don't `self.bump()` the `(` yet because this might be a struct definition where
             // `()` or a tuple might be allowed. For example, `struct Struct(pub (), pub (usize));`.
             // Because of this, we only `bump` the `(` if we're assured it is appropriate to do so
@@ -1325,7 +1299,7 @@ impl<'a> Parser<'a> {
                 // Parse `pub(crate)`.
                 self.bump(); // `(`
                 self.bump(); // `crate`
-                self.expect(&token::CloseDelim(token::Paren))?; // `)`
+                self.expect(&token::CloseDelim(Delimiter::Parenthesis))?; // `)`
                 let vis = VisibilityKind::Crate(CrateSugar::PubCrate);
                 return Ok(Visibility {
                     span: lo.to(self.prev_token.span),
@@ -1337,20 +1311,20 @@ impl<'a> Parser<'a> {
                 self.bump(); // `(`
                 self.bump(); // `in`
                 let path = self.parse_path(PathStyle::Mod)?; // `path`
-                self.expect(&token::CloseDelim(token::Paren))?; // `)`
+                self.expect(&token::CloseDelim(Delimiter::Parenthesis))?; // `)`
                 let vis = VisibilityKind::Restricted { path: P(path), id: ast::DUMMY_NODE_ID };
                 return Ok(Visibility {
                     span: lo.to(self.prev_token.span),
                     kind: vis,
                     tokens: None,
                 });
-            } else if self.look_ahead(2, |t| t == &token::CloseDelim(token::Paren))
+            } else if self.look_ahead(2, |t| t == &token::CloseDelim(Delimiter::Parenthesis))
                 && self.is_keyword_ahead(1, &[kw::Super, kw::SelfLower])
             {
                 // Parse `pub(self)` or `pub(super)`.
                 self.bump(); // `(`
                 let path = self.parse_path(PathStyle::Mod)?; // `super`/`self`
-                self.expect(&token::CloseDelim(token::Paren))?; // `)`
+                self.expect(&token::CloseDelim(Delimiter::Parenthesis))?; // `)`
                 let vis = VisibilityKind::Restricted { path: P(path), id: ast::DUMMY_NODE_ID };
                 return Ok(Visibility {
                     span: lo.to(self.prev_token.span),
@@ -1372,7 +1346,7 @@ impl<'a> Parser<'a> {
     fn recover_incorrect_vis_restriction(&mut self) -> PResult<'a, ()> {
         self.bump(); // `(`
         let path = self.parse_path(PathStyle::Mod)?;
-        self.expect(&token::CloseDelim(token::Paren))?; // `)`
+        self.expect(&token::CloseDelim(Delimiter::Parenthesis))?; // `)`
 
         let msg = "incorrect visibility restriction";
         let suggestion = r##"some possible visibility restrictions are:
@@ -1439,7 +1413,7 @@ impl<'a> Parser<'a> {
     fn is_import_coupler(&mut self) -> bool {
         self.check(&token::ModSep)
             && self.look_ahead(1, |t| {
-                *t == token::OpenDelim(token::Brace) || *t == token::BinOp(token::Star)
+                *t == token::OpenDelim(Delimiter::Brace) || *t == token::BinOp(token::Star)
             })
     }