Rollup merge of #107544 - nnethercote:improve-TokenCursor, r=petrochenkov

Improve `TokenCursor`. Some small improvements, for things that were bugging me. Best reviewed one commit at a time. r? ``@petrochenkov``
author: Dylan DPC <99973273+Dylan-DPC@users.noreply.github.com> 2023-02-03 23:04:51 +0530
committer: GitHub <noreply@github.com> 2023-02-03 23:04:51 +0530
commit: 815dc9c48097b4b31dd7d7a90e2fa0188895dc62 (patch)
tree: 09f441dd2590277d7e87ef4d414b91310d03453c /compiler/rustc_parse/src/parser
parent: d9db35785d33e2b6c6e9b4971dfdbe0984a69b9e (diff)
parent: a86fc727fa9b9fa1ac60b67147736783b3376e91 (diff)
download: rust-815dc9c48097b4b31dd7d7a90e2fa0188895dc62.tar.gz
rust-815dc9c48097b4b31dd7d7a90e2fa0188895dc62.zip
3 files changed, 61 insertions, 66 deletions
diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs
index b97f22417cb..dbd3b76786f 100644
--- a/compiler/rustc_parse/src/parser/attr_wrapper.rs
+++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs
@@ -469,6 +469,6 @@ mod size_asserts {
     use rustc_data_structures::static_assert_size;
     // tidy-alphabetical-start
     static_assert_size!(AttrWrapper, 16);
-    static_assert_size!(LazyAttrTokenStreamImpl, 144);
+    static_assert_size!(LazyAttrTokenStreamImpl, 120);
     // tidy-alphabetical-end
 }
diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs
index 3d42a9dcbbe..8a6436b041b 100644
--- a/compiler/rustc_parse/src/parser/expr.rs
+++ b/compiler/rustc_parse/src/parser/expr.rs
@@ -2141,7 +2141,7 @@ impl<'a> Parser<'a> {
         }
 
         if self.token.kind == TokenKind::Semi
-            && matches!(self.token_cursor.frame.delim_sp, Some((Delimiter::Parenthesis, _)))
+            && matches!(self.token_cursor.stack.last(), Some((_, Delimiter::Parenthesis, _)))
             && self.may_recover()
         {
             // It is likely that the closure body is a block but where the
diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs
index ffb23b50a16..2ea55f838a3 100644
--- a/compiler/rustc_parse/src/parser/mod.rs
+++ b/compiler/rustc_parse/src/parser/mod.rs
@@ -19,9 +19,8 @@ pub use path::PathStyle;
 
 use rustc_ast::ptr::P;
 use rustc_ast::token::{self, Delimiter, Nonterminal, Token, TokenKind};
-use rustc_ast::tokenstream::AttributesData;
-use rustc_ast::tokenstream::{self, DelimSpan, Spacing};
-use rustc_ast::tokenstream::{TokenStream, TokenTree};
+use rustc_ast::tokenstream::{AttributesData, DelimSpan, Spacing};
+use rustc_ast::tokenstream::{TokenStream, TokenTree, TokenTreeCursor};
 use rustc_ast::util::case::Case;
 use rustc_ast::AttrId;
 use rustc_ast::DUMMY_NODE_ID;
@@ -168,7 +167,7 @@ pub struct Parser<'a> {
 // This type is used a lot, e.g. it's cloned when matching many declarative macro rules with nonterminals. Make sure
 // it doesn't unintentionally get bigger.
 #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
-rustc_data_structures::static_assert_size!(Parser<'_>, 336);
+rustc_data_structures::static_assert_size!(Parser<'_>, 312);
 
 /// Stores span information about a closure.
 #[derive(Clone)]
@@ -221,18 +220,27 @@ impl<'a> Drop for Parser<'a> {
     }
 }
 
+/// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that
+/// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b)
+/// use this type to emit them as a linear sequence. But a linear sequence is
+/// what the parser expects, for the most part.
 #[derive(Clone)]
 struct TokenCursor {
-    // The current (innermost) frame. `frame` and `stack` could be combined,
-    // but it's faster to have them separately to access `frame` directly
-    // rather than via something like `stack.last().unwrap()` or
-    // `stack[stack.len() - 1]`.
-    frame: TokenCursorFrame,
-    // Additional frames that enclose `frame`.
-    stack: Vec<TokenCursorFrame>,
+    // Cursor for the current (innermost) token stream. The delimiters for this
+    // token stream are found in `self.stack.last()`; when that is `None` then
+    // we are in the outermost token stream which never has delimiters.
+    tree_cursor: TokenTreeCursor,
+
+    // Token streams surrounding the current one. The delimiters for stack[n]'s
+    // tokens are in `stack[n-1]`. `stack[0]` (when present) has no delimiters
+    // because it's the outermost token stream which never has delimiters.
+    stack: Vec<(TokenTreeCursor, Delimiter, DelimSpan)>,
+
     desugar_doc_comments: bool,
+
     // Counts the number of calls to `{,inlined_}next`.
     num_next_calls: usize,
+
     // During parsing, we may sometimes need to 'unglue' a
     // glued token into two component tokens
     // (e.g. '>>' into '>' and '>), so that the parser
@@ -257,18 +265,6 @@ struct TokenCursor {
     break_last_token: bool,
 }
 
-#[derive(Clone)]
-struct TokenCursorFrame {
-    delim_sp: Option<(Delimiter, DelimSpan)>,
-    tree_cursor: tokenstream::Cursor,
-}
-
-impl TokenCursorFrame {
-    fn new(delim_sp: Option<(Delimiter, DelimSpan)>, tts: TokenStream) -> Self {
-        TokenCursorFrame { delim_sp, tree_cursor: tts.into_trees() }
-    }
-}
-
 impl TokenCursor {
     fn next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) {
         self.inlined_next(desugar_doc_comments)
@@ -281,38 +277,47 @@ impl TokenCursor {
             // FIXME: we currently don't return `Delimiter` open/close delims. To fix #67062 we will
             // need to, whereupon the `delim != Delimiter::Invisible` conditions below can be
             // removed.
-            if let Some(tree) = self.frame.tree_cursor.next_ref() {
+            if let Some(tree) = self.tree_cursor.next_ref() {
                 match tree {
                     &TokenTree::Token(ref token, spacing) => match (desugar_doc_comments, token) {
                         (true, &Token { kind: token::DocComment(_, attr_style, data), span }) => {
-                            return self.desugar(attr_style, data, span);
+                            let desugared = self.desugar(attr_style, data, span);
+                            self.tree_cursor.replace_prev_and_rewind(desugared);
+                            // Continue to get the first token of the desugared doc comment.
+                        }
+                        _ => {
+                            debug_assert!(!matches!(
+                                token.kind,
+                                token::OpenDelim(_) | token::CloseDelim(_)
+                            ));
+                            return (token.clone(), spacing);
                         }
-                        _ => return (token.clone(), spacing),
                     },
                     &TokenTree::Delimited(sp, delim, ref tts) => {
-                        // Set `open_delim` to true here because we deal with it immediately.
-                        let frame = TokenCursorFrame::new(Some((delim, sp)), tts.clone());
-                        self.stack.push(mem::replace(&mut self.frame, frame));
+                        let trees = tts.clone().into_trees();
+                        self.stack.push((mem::replace(&mut self.tree_cursor, trees), delim, sp));
                         if delim != Delimiter::Invisible {
                             return (Token::new(token::OpenDelim(delim), sp.open), Spacing::Alone);
                         }
                         // No open delimiter to return; continue on to the next iteration.
                     }
                 };
-            } else if let Some(frame) = self.stack.pop() {
-                if let Some((delim, span)) = self.frame.delim_sp && delim != Delimiter::Invisible {
-                    self.frame = frame;
+            } else if let Some((tree_cursor, delim, span)) = self.stack.pop() {
+                // We have exhausted this token stream. Move back to its parent token stream.
+                self.tree_cursor = tree_cursor;
+                if delim != Delimiter::Invisible {
                     return (Token::new(token::CloseDelim(delim), span.close), Spacing::Alone);
                 }
-                self.frame = frame;
                 // No close delimiter to return; continue on to the next iteration.
             } else {
+                // We have exhausted the outermost token stream.
                 return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone);
             }
         }
     }
 
-    fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> (Token, Spacing) {
+    // Desugar a doc comment into something like `#[doc = r"foo"]`.
+    fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> Vec<TokenTree> {
         // Searches for the occurrences of `"#*` and returns the minimum number of `#`s
         // required to wrap the text. E.g.
         // - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0)
@@ -346,27 +351,15 @@ impl TokenCursor {
             .collect::<TokenStream>(),
         );
 
-        self.stack.push(mem::replace(
-            &mut self.frame,
-            TokenCursorFrame::new(
-                None,
-                if attr_style == AttrStyle::Inner {
-                    [
-                        TokenTree::token_alone(token::Pound, span),
-                        TokenTree::token_alone(token::Not, span),
-                        body,
-                    ]
-                    .into_iter()
-                    .collect::<TokenStream>()
-                } else {
-                    [TokenTree::token_alone(token::Pound, span), body]
-                        .into_iter()
-                        .collect::<TokenStream>()
-                },
-            ),
-        ));
-
-        self.next(/* desugar_doc_comments */ false)
+        if attr_style == AttrStyle::Inner {
+            vec![
+                TokenTree::token_alone(token::Pound, span),
+                TokenTree::token_alone(token::Not, span),
+                body,
+            ]
+        } else {
+            vec![TokenTree::token_alone(token::Pound, span), body]
+        }
     }
 }
 
@@ -475,7 +468,7 @@ impl<'a> Parser<'a> {
             restrictions: Restrictions::empty(),
             expected_tokens: Vec::new(),
             token_cursor: TokenCursor {
-                frame: TokenCursorFrame::new(None, tokens),
+                tree_cursor: tokens.into_trees(),
                 stack: Vec::new(),
                 num_next_calls: 0,
                 desugar_doc_comments,
@@ -1142,14 +1135,16 @@ impl<'a> Parser<'a> {
             return looker(&self.token);
         }
 
-        let frame = &self.token_cursor.frame;
-        if let Some((delim, span)) = frame.delim_sp && delim != Delimiter::Invisible {
+        let tree_cursor = &self.token_cursor.tree_cursor;
+        if let Some(&(_, delim, span)) = self.token_cursor.stack.last()
+            && delim != Delimiter::Invisible
+        {
             let all_normal = (0..dist).all(|i| {
-                let token = frame.tree_cursor.look_ahead(i);
+                let token = tree_cursor.look_ahead(i);
                 !matches!(token, Some(TokenTree::Delimited(_, Delimiter::Invisible, _)))
             });
             if all_normal {
-                return match frame.tree_cursor.look_ahead(dist - 1) {
+                return match tree_cursor.look_ahead(dist - 1) {
                     Some(tree) => match tree {
                         TokenTree::Token(token, _) => looker(token),
                         TokenTree::Delimited(dspan, delim, _) => {
@@ -1310,10 +1305,10 @@ impl<'a> Parser<'a> {
     pub(crate) fn parse_token_tree(&mut self) -> TokenTree {
         match self.token.kind {
             token::OpenDelim(..) => {
-                // Grab the tokens from this frame.
-                let frame = &self.token_cursor.frame;
-                let stream = frame.tree_cursor.stream.clone();
-                let (delim, span) = frame.delim_sp.unwrap();
+                // Grab the tokens within the delimiters.
+                let tree_cursor = &self.token_cursor.tree_cursor;
+                let stream = tree_cursor.stream.clone();
+                let (_, delim, span) = *self.token_cursor.stack.last().unwrap();
 
                 // Advance the token cursor through the entire delimited
                 // sequence. After getting the `OpenDelim` we are *within* the
author	Dylan DPC <99973273+Dylan-DPC@users.noreply.github.com>	2023-02-03 23:04:51 +0530
committer	GitHub <noreply@github.com>	2023-02-03 23:04:51 +0530
commit	815dc9c48097b4b31dd7d7a90e2fa0188895dc62 (patch)
tree	09f441dd2590277d7e87ef4d414b91310d03453c /compiler/rustc_parse/src/parser
parent	d9db35785d33e2b6c6e9b4971dfdbe0984a69b9e (diff)
parent	a86fc727fa9b9fa1ac60b67147736783b3376e91 (diff)
download	rust-815dc9c48097b4b31dd7d7a90e2fa0188895dc62.tar.gz rust-815dc9c48097b4b31dd7d7a90e2fa0188895dc62.zip