Auto merge of #114353 - nnethercote:parser-ast-cleanups, r=petrochenkov

Some parser and AST cleanups Things I found while looking closely at this code. r? `@petrochenkov`
author: bors <bors@rust-lang.org> 2023-08-03 04:26:42 +0000
committer: bors <bors@rust-lang.org> 2023-08-03 04:26:42 +0000
commit: fb31b3c34ef4a67bcd5b1dad919dfaa4cab2c569 (patch)
tree: 7ac3711aa1751e27e3bed79e7cd7f24a74031991 /compiler/rustc_parse/src
parent: d8bbef50bbad789e26219f4ec88b5d73b05570a3 (diff)
parent: d75ee2a6bcfc2ec9b21c0bb1ffaf34ff6b7161f1 (diff)
download: rust-fb31b3c34ef4a67bcd5b1dad919dfaa4cab2c569.tar.gz
rust-fb31b3c34ef4a67bcd5b1dad919dfaa4cab2c569.zip
7 files changed, 52 insertions, 82 deletions
diff --git a/compiler/rustc_parse/src/parser/attr.rs b/compiler/rustc_parse/src/parser/attr.rs
index ee0abba1c17..104de47b97d 100644
--- a/compiler/rustc_parse/src/parser/attr.rs
+++ b/compiler/rustc_parse/src/parser/attr.rs
@@ -36,7 +36,7 @@ impl<'a> Parser<'a> {
     pub(super) fn parse_outer_attributes(&mut self) -> PResult<'a, AttrWrapper> {
         let mut outer_attrs = ast::AttrVec::new();
         let mut just_parsed_doc_comment = false;
-        let start_pos = self.token_cursor.num_next_calls;
+        let start_pos = self.num_bump_calls;
         loop {
             let attr = if self.check(&token::Pound) {
                 let prev_outer_attr_sp = outer_attrs.last().map(|attr| attr.span);
@@ -277,7 +277,7 @@ impl<'a> Parser<'a> {
     pub(crate) fn parse_inner_attributes(&mut self) -> PResult<'a, ast::AttrVec> {
         let mut attrs = ast::AttrVec::new();
         loop {
-            let start_pos: u32 = self.token_cursor.num_next_calls.try_into().unwrap();
+            let start_pos: u32 = self.num_bump_calls.try_into().unwrap();
             // Only try to parse if it is an inner attribute (has `!`).
             let attr = if self.check(&token::Pound) && self.look_ahead(1, |t| t == &token::Not) {
                 Some(self.parse_attribute(InnerAttrPolicy::Permitted)?)
@@ -298,7 +298,7 @@ impl<'a> Parser<'a> {
                 None
             };
             if let Some(attr) = attr {
-                let end_pos: u32 = self.token_cursor.num_next_calls.try_into().unwrap();
+                let end_pos: u32 = self.num_bump_calls.try_into().unwrap();
                 // If we are currently capturing tokens, mark the location of this inner attribute.
                 // If capturing ends up creating a `LazyAttrTokenStream`, we will include
                 // this replace range with it, removing the inner attribute from the final
diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs
index 158ab2a2956..5d6c574baa6 100644
--- a/compiler/rustc_parse/src/parser/attr_wrapper.rs
+++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs
@@ -213,6 +213,7 @@ impl<'a> Parser<'a> {
 
         let start_token = (self.token.clone(), self.token_spacing);
         let cursor_snapshot = self.token_cursor.clone();
+        let start_pos = self.num_bump_calls;
 
         let has_outer_attrs = !attrs.attrs.is_empty();
         let prev_capturing = std::mem::replace(&mut self.capture_state.capturing, Capturing::Yes);
@@ -273,8 +274,7 @@ impl<'a> Parser<'a> {
 
         let replace_ranges_end = self.capture_state.replace_ranges.len();
 
-        let cursor_snapshot_next_calls = cursor_snapshot.num_next_calls;
-        let mut end_pos = self.token_cursor.num_next_calls;
+        let mut end_pos = self.num_bump_calls;
 
         let mut captured_trailing = false;
 
@@ -301,12 +301,12 @@ impl<'a> Parser<'a> {
         // then extend the range of captured tokens to include it, since the parser
         // was not actually bumped past it. When the `LazyAttrTokenStream` gets converted
         // into an `AttrTokenStream`, we will create the proper token.
-        if self.token_cursor.break_last_token {
+        if self.break_last_token {
             assert!(!captured_trailing, "Cannot set break_last_token and have trailing token");
             end_pos += 1;
         }
 
-        let num_calls = end_pos - cursor_snapshot_next_calls;
+        let num_calls = end_pos - start_pos;
 
         // If we have no attributes, then we will never need to
         // use any replace ranges.
@@ -316,7 +316,7 @@ impl<'a> Parser<'a> {
             // Grab any replace ranges that occur *inside* the current AST node.
             // We will perform the actual replacement when we convert the `LazyAttrTokenStream`
             // to an `AttrTokenStream`.
-            let start_calls: u32 = cursor_snapshot_next_calls.try_into().unwrap();
+            let start_calls: u32 = start_pos.try_into().unwrap();
             self.capture_state.replace_ranges[replace_ranges_start..replace_ranges_end]
                 .iter()
                 .cloned()
@@ -331,7 +331,7 @@ impl<'a> Parser<'a> {
             start_token,
             num_calls,
             cursor_snapshot,
-            break_last_token: self.token_cursor.break_last_token,
+            break_last_token: self.break_last_token,
             replace_ranges,
         });
 
@@ -359,14 +359,10 @@ impl<'a> Parser<'a> {
             // with a `FlatToken::AttrTarget`. If this AST node is inside an item
             // that has `#[derive]`, then this will allow us to cfg-expand this
             // AST node.
-            let start_pos =
-                if has_outer_attrs { attrs.start_pos } else { cursor_snapshot_next_calls };
+            let start_pos = if has_outer_attrs { attrs.start_pos } else { start_pos };
             let new_tokens = vec![(FlatToken::AttrTarget(attr_data), Spacing::Alone)];
 
-            assert!(
-                !self.token_cursor.break_last_token,
-                "Should not have unglued last token with cfg attr"
-            );
+            assert!(!self.break_last_token, "Should not have unglued last token with cfg attr");
             let range: Range<u32> = (start_pos.try_into().unwrap())..(end_pos.try_into().unwrap());
             self.capture_state.replace_ranges.push((range, new_tokens));
             self.capture_state.replace_ranges.extend(inner_attr_replace_ranges);
@@ -464,6 +460,6 @@ mod size_asserts {
     use rustc_data_structures::static_assert_size;
     // tidy-alphabetical-start
     static_assert_size!(AttrWrapper, 16);
-    static_assert_size!(LazyAttrTokenStreamImpl, 120);
+    static_assert_size!(LazyAttrTokenStreamImpl, 104);
     // tidy-alphabetical-end
 }
diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs
index 55f857aa31c..dc3b131e7f2 100644
--- a/compiler/rustc_parse/src/parser/expr.rs
+++ b/compiler/rustc_parse/src/parser/expr.rs
@@ -1167,7 +1167,7 @@ impl<'a> Parser<'a> {
             DestructuredFloat::TrailingDot(sym, sym_span, dot_span) => {
                 assert!(suffix.is_none());
                 // Analogous to `Self::break_and_eat`
-                self.token_cursor.break_last_token = true;
+                self.break_last_token = true;
                 // This might work, in cases like `1. 2`, and might not,
                 // in cases like `offset_of!(Ty, 1.)`. It depends on what comes
                 // after the float-like token, and therefore we have to make
@@ -2599,7 +2599,7 @@ impl<'a> Parser<'a> {
 
         // Recover from missing expression in `for` loop
         if matches!(expr.kind, ExprKind::Block(..))
-            && !matches!(self.token.kind, token::OpenDelim(token::Delimiter::Brace))
+            && !matches!(self.token.kind, token::OpenDelim(Delimiter::Brace))
             && self.may_recover()
         {
             self.sess
diff --git a/compiler/rustc_parse/src/parser/item.rs b/compiler/rustc_parse/src/parser/item.rs
index 1301ed3e388..e9cc858c8c5 100644
--- a/compiler/rustc_parse/src/parser/item.rs
+++ b/compiler/rustc_parse/src/parser/item.rs
@@ -9,12 +9,12 @@ use rustc_ast::ptr::P;
 use rustc_ast::token::{self, Delimiter, TokenKind};
 use rustc_ast::tokenstream::{DelimSpan, TokenStream, TokenTree};
 use rustc_ast::util::case::Case;
+use rustc_ast::MacCall;
 use rustc_ast::{self as ast, AttrVec, Attribute, DUMMY_NODE_ID};
 use rustc_ast::{Async, Const, Defaultness, IsAuto, Mutability, Unsafe, UseTree, UseTreeKind};
 use rustc_ast::{BindingAnnotation, Block, FnDecl, FnSig, Param, SelfKind};
 use rustc_ast::{EnumDef, FieldDef, Generics, TraitRef, Ty, TyKind, Variant, VariantData};
 use rustc_ast::{FnHeader, ForeignItem, Path, PathSegment, Visibility, VisibilityKind};
-use rustc_ast::{MacCall, MacDelimiter};
 use rustc_ast_pretty::pprust;
 use rustc_errors::{
     struct_span_err, Applicability, DiagnosticBuilder, ErrorGuaranteed, IntoDiagnostic, PResult,
@@ -1968,7 +1968,7 @@ impl<'a> Parser<'a> {
             let arrow = TokenTree::token_alone(token::FatArrow, pspan.between(bspan)); // `=>`
             let tokens = TokenStream::new(vec![params, arrow, body]);
             let dspan = DelimSpan::from_pair(pspan.shrink_to_lo(), bspan.shrink_to_hi());
-            P(DelimArgs { dspan, delim: MacDelimiter::Brace, tokens })
+            P(DelimArgs { dspan, delim: Delimiter::Brace, tokens })
         } else {
             return self.unexpected();
         };
diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs
index 57778d67098..c5b46b809b1 100644
--- a/compiler/rustc_parse/src/parser/mod.rs
+++ b/compiler/rustc_parse/src/parser/mod.rs
@@ -25,7 +25,7 @@ use rustc_ast::util::case::Case;
 use rustc_ast::AttrId;
 use rustc_ast::DUMMY_NODE_ID;
 use rustc_ast::{self as ast, AnonConst, Const, DelimArgs, Extern};
-use rustc_ast::{Async, AttrArgs, AttrArgsEq, Expr, ExprKind, MacDelimiter, Mutability, StrLit};
+use rustc_ast::{Async, AttrArgs, AttrArgsEq, Expr, ExprKind, Mutability, StrLit};
 use rustc_ast::{HasAttrs, HasTokens, Unsafe, Visibility, VisibilityKind};
 use rustc_ast_pretty::pprust;
 use rustc_data_structures::fx::FxHashMap;
@@ -135,9 +135,24 @@ pub struct Parser<'a> {
     pub capture_cfg: bool,
     restrictions: Restrictions,
     expected_tokens: Vec<TokenType>,
-    // Important: This must only be advanced from `bump` to ensure that
-    // `token_cursor.num_next_calls` is updated properly.
     token_cursor: TokenCursor,
+    // The number of calls to `bump`, i.e. the position in the token stream.
+    num_bump_calls: usize,
+    // During parsing we may sometimes need to 'unglue' a glued token into two
+    // component tokens (e.g. '>>' into '>' and '>), so the parser can consume
+    // them one at a time. This process bypasses the normal capturing mechanism
+    // (e.g. `num_bump_calls` will not be incremented), since the 'unglued'
+    // tokens due not exist in the original `TokenStream`.
+    //
+    // If we end up consuming both unglued tokens, this is not an issue. We'll
+    // end up capturing the single 'glued' token.
+    //
+    // However, sometimes we may want to capture just the first 'unglued'
+    // token. For example, capturing the `Vec<u8>` in `Option<Vec<u8>>`
+    // requires us to unglue the trailing `>>` token. The `break_last_token`
+    // field is used to track this token. It gets appended to the captured
+    // stream when we evaluate a `LazyAttrTokenStream`.
+    break_last_token: bool,
     /// This field is used to keep track of how many left angle brackets we have seen. This is
     /// required in order to detect extra leading left angle brackets (`<` characters) and error
     /// appropriately.
@@ -161,7 +176,7 @@ pub struct Parser<'a> {
 // This type is used a lot, e.g. it's cloned when matching many declarative macro rules with nonterminals. Make sure
 // it doesn't unintentionally get bigger.
 #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
-rustc_data_structures::static_assert_size!(Parser<'_>, 272);
+rustc_data_structures::static_assert_size!(Parser<'_>, 264);
 
 /// Stores span information about a closure.
 #[derive(Clone)]
@@ -223,32 +238,6 @@ struct TokenCursor {
     // tokens are in `stack[n-1]`. `stack[0]` (when present) has no delimiters
     // because it's the outermost token stream which never has delimiters.
     stack: Vec<(TokenTreeCursor, Delimiter, DelimSpan)>,
-
-    // Counts the number of calls to `{,inlined_}next`.
-    num_next_calls: usize,
-
-    // During parsing, we may sometimes need to 'unglue' a
-    // glued token into two component tokens
-    // (e.g. '>>' into '>' and '>), so that the parser
-    // can consume them one at a time. This process
-    // bypasses the normal capturing mechanism
-    // (e.g. `num_next_calls` will not be incremented),
-    // since the 'unglued' tokens due not exist in
-    // the original `TokenStream`.
-    //
-    // If we end up consuming both unglued tokens,
-    // then this is not an issue - we'll end up
-    // capturing the single 'glued' token.
-    //
-    // However, in certain circumstances, we may
-    // want to capture just the first 'unglued' token.
-    // For example, capturing the `Vec<u8>`
-    // in `Option<Vec<u8>>` requires us to unglue
-    // the trailing `>>` token. The `break_last_token`
-    // field is used to track this token - it gets
-    // appended to the captured stream when
-    // we evaluate a `LazyAttrTokenStream`.
-    break_last_token: bool,
 }
 
 impl TokenCursor {
@@ -399,12 +388,9 @@ impl<'a> Parser<'a> {
             capture_cfg: false,
             restrictions: Restrictions::empty(),
             expected_tokens: Vec::new(),
-            token_cursor: TokenCursor {
-                tree_cursor: stream.into_trees(),
-                stack: Vec::new(),
-                num_next_calls: 0,
-                break_last_token: false,
-            },
+            token_cursor: TokenCursor { tree_cursor: stream.into_trees(), stack: Vec::new() },
+            num_bump_calls: 0,
+            break_last_token: false,
             unmatched_angle_bracket_count: 0,
             max_angle_bracket_count: 0,
             last_unexpected_token_span: None,
@@ -707,7 +693,7 @@ impl<'a> Parser<'a> {
                 // If we consume any additional tokens, then this token
                 // is not needed (we'll capture the entire 'glued' token),
                 // and `bump` will set this field to `None`
-                self.token_cursor.break_last_token = true;
+                self.break_last_token = true;
                 // Use the spacing of the glued token as the spacing
                 // of the unglued second token.
                 self.bump_with((Token::new(second, second_span), self.token_spacing));
@@ -1049,11 +1035,11 @@ impl<'a> Parser<'a> {
         // Note: destructuring here would give nicer code, but it was found in #96210 to be slower
         // than `.0`/`.1` access.
         let mut next = self.token_cursor.inlined_next();
-        self.token_cursor.num_next_calls += 1;
+        self.num_bump_calls += 1;
         // We've retrieved an token from the underlying
         // cursor, so we no longer need to worry about
         // an unglued token. See `break_and_eat` for more details
-        self.token_cursor.break_last_token = false;
+        self.break_last_token = false;
         if next.0.span.is_dummy() {
             // Tweak the location for better diagnostics, but keep syntactic context intact.
             let fallback_span = self.token.span;
@@ -1230,12 +1216,10 @@ impl<'a> Parser<'a> {
             || self.check(&token::OpenDelim(Delimiter::Brace));
 
         delimited.then(|| {
-            // We've confirmed above that there is a delimiter so unwrapping is OK.
             let TokenTree::Delimited(dspan, delim, tokens) = self.parse_token_tree() else {
                 unreachable!()
             };
-
-            DelimArgs { dspan, delim: MacDelimiter::from_token(delim).unwrap(), tokens }
+            DelimArgs { dspan, delim, tokens }
         })
     }
 
@@ -1251,12 +1235,11 @@ impl<'a> Parser<'a> {
     }
 
     /// Parses a single token tree from the input.
-    pub(crate) fn parse_token_tree(&mut self) -> TokenTree {
+    pub fn parse_token_tree(&mut self) -> TokenTree {
         match self.token.kind {
             token::OpenDelim(..) => {
                 // Grab the tokens within the delimiters.
-                let tree_cursor = &self.token_cursor.tree_cursor;
-                let stream = tree_cursor.stream.clone();
+                let stream = self.token_cursor.tree_cursor.stream.clone();
                 let (_, delim, span) = *self.token_cursor.stack.last().unwrap();
 
                 // Advance the token cursor through the entire delimited
@@ -1287,15 +1270,6 @@ impl<'a> Parser<'a> {
         }
     }
 
-    /// Parses a stream of tokens into a list of `TokenTree`s, up to EOF.
-    pub fn parse_all_token_trees(&mut self) -> PResult<'a, Vec<TokenTree>> {
-        let mut tts = Vec::new();
-        while self.token != token::Eof {
-            tts.push(self.parse_token_tree());
-        }
-        Ok(tts)
-    }
-
     pub fn parse_tokens(&mut self) -> TokenStream {
         let mut result = Vec::new();
         loop {
@@ -1455,7 +1429,7 @@ impl<'a> Parser<'a> {
     }
 
     pub fn approx_token_stream_pos(&self) -> usize {
-        self.token_cursor.num_next_calls
+        self.num_bump_calls
     }
 }
 
diff --git a/compiler/rustc_parse/src/parser/stmt.rs b/compiler/rustc_parse/src/parser/stmt.rs
index 1cdf2efa764..12c267351b9 100644
--- a/compiler/rustc_parse/src/parser/stmt.rs
+++ b/compiler/rustc_parse/src/parser/stmt.rs
@@ -193,10 +193,9 @@ impl<'a> Parser<'a> {
     /// At this point, the `!` token after the path has already been eaten.
     fn parse_stmt_mac(&mut self, lo: Span, attrs: AttrVec, path: ast::Path) -> PResult<'a, Stmt> {
         let args = self.parse_delim_args()?;
-        let delim = args.delim.to_token();
         let hi = self.prev_token.span;
 
-        let style = match delim {
+        let style = match args.delim {
             Delimiter::Brace => MacStmtStyle::Braces,
             _ => MacStmtStyle::NoBraces,
         };
diff --git a/compiler/rustc_parse/src/validate_attr.rs b/compiler/rustc_parse/src/validate_attr.rs
index 2011083019c..f7396598220 100644
--- a/compiler/rustc_parse/src/validate_attr.rs
+++ b/compiler/rustc_parse/src/validate_attr.rs
@@ -2,9 +2,10 @@
 
 use crate::{errors, parse_in};
 
+use rustc_ast::token::Delimiter;
 use rustc_ast::tokenstream::DelimSpan;
 use rustc_ast::MetaItemKind;
-use rustc_ast::{self as ast, AttrArgs, AttrArgsEq, Attribute, DelimArgs, MacDelimiter, MetaItem};
+use rustc_ast::{self as ast, AttrArgs, AttrArgsEq, Attribute, DelimArgs, MetaItem};
 use rustc_ast_pretty::pprust;
 use rustc_errors::{Applicability, FatalError, PResult};
 use rustc_feature::{AttributeTemplate, BuiltinAttribute, BUILTIN_ATTRIBUTE_MAP};
@@ -84,8 +85,8 @@ pub fn parse_meta<'a>(sess: &'a ParseSess, attr: &Attribute) -> PResult<'a, Meta
     })
 }
 
-pub fn check_meta_bad_delim(sess: &ParseSess, span: DelimSpan, delim: MacDelimiter) {
-    if let ast::MacDelimiter::Parenthesis = delim {
+pub fn check_meta_bad_delim(sess: &ParseSess, span: DelimSpan, delim: Delimiter) {
+    if let Delimiter::Parenthesis = delim {
         return;
     }
     sess.emit_err(errors::MetaBadDelim {
@@ -94,8 +95,8 @@ pub fn check_meta_bad_delim(sess: &ParseSess, span: DelimSpan, delim: MacDelimit
     });
 }
 
-pub fn check_cfg_attr_bad_delim(sess: &ParseSess, span: DelimSpan, delim: MacDelimiter) {
-    if let ast::MacDelimiter::Parenthesis = delim {
+pub fn check_cfg_attr_bad_delim(sess: &ParseSess, span: DelimSpan, delim: Delimiter) {
+    if let Delimiter::Parenthesis = delim {
         return;
     }
     sess.emit_err(errors::CfgAttrBadDelim {
author	bors <bors@rust-lang.org>	2023-08-03 04:26:42 +0000
committer	bors <bors@rust-lang.org>	2023-08-03 04:26:42 +0000
commit	fb31b3c34ef4a67bcd5b1dad919dfaa4cab2c569 (patch)
tree	7ac3711aa1751e27e3bed79e7cd7f24a74031991 /compiler/rustc_parse/src
parent	d8bbef50bbad789e26219f4ec88b5d73b05570a3 (diff)
parent	d75ee2a6bcfc2ec9b21c0bb1ffaf34ff6b7161f1 (diff)
download	rust-fb31b3c34ef4a67bcd5b1dad919dfaa4cab2c569.tar.gz rust-fb31b3c34ef4a67bcd5b1dad919dfaa4cab2c569.zip