Auto merge of #78296 - Aaron1011:fix/stmt-tokens, r=petrochenkov

Properly handle attributes on statements We now collect tokens for the underlying node wrapped by `StmtKind` nstead of storing tokens directly in `Stmt`. `LazyTokenStream` now supports capturing a trailing semicolon after it is initially constructed. This allows us to avoid refactoring statement parsing to wrap the parsing of the semicolon in `parse_tokens`. Attributes on item statements (e.g. `fn foo() { #[bar] struct MyStruct; }`) are now treated as item attributes, not statement attributes, which is consistent with how we handle attributes on other kinds of statements. The feature-gating code is adjusted so that proc-macro attributes are still allowed on item statements on stable. Two built-in macros (`#[global_allocator]` and `#[test]`) needed to be adjusted to support being passed `Annotatable::Stmt`.
author: bors <bors@rust-lang.org> 2020-11-28 07:48:56 +0000
committer: bors <bors@rust-lang.org> 2020-11-28 07:48:56 +0000
commit: 4ae328bef47dffcbf363e5ae873f419c06a5511d (patch)
tree: a4638ba9c7a64bae7fa30ec681219ef1a00dc3d3 /compiler/rustc_parse/src/parser
parent: f8e5209a21c698398a0f8c04af55e2ad6cbd113c (diff)
parent: 92bfa05b07090be1b3bf32af0c8947a0f0b387e8 (diff)
download: rust-4ae328bef47dffcbf363e5ae873f419c06a5511d.tar.gz
rust-4ae328bef47dffcbf363e5ae873f419c06a5511d.zip
3 files changed, 103 insertions, 44 deletions
diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs
index 40aa2db58c7..2a779c37b89 100644
--- a/compiler/rustc_parse/src/parser/mod.rs
+++ b/compiler/rustc_parse/src/parser/mod.rs
@@ -1213,14 +1213,20 @@ impl<'a> Parser<'a> {
         //
         // This also makes `Parser` very cheap to clone, since
         // there is no intermediate collection buffer to clone.
+        #[derive(Clone)]
         struct LazyTokenStreamImpl {
             start_token: (Token, Spacing),
             cursor_snapshot: TokenCursor,
             num_calls: usize,
             desugar_doc_comments: bool,
+            trailing_semi: bool,
         }
         impl CreateTokenStream for LazyTokenStreamImpl {
             fn create_token_stream(&self) -> TokenStream {
+                let mut num_calls = self.num_calls;
+                if self.trailing_semi {
+                    num_calls += 1;
+                }
                 // The token produced by the final call to `next` or `next_desugared`
                 // was not actually consumed by the callback. The combination
                 // of chaining the initial token and using `take` produces the desired
@@ -1228,17 +1234,25 @@ impl<'a> Parser<'a> {
                 // and omit the final token otherwise.
                 let mut cursor_snapshot = self.cursor_snapshot.clone();
                 let tokens = std::iter::once(self.start_token.clone())
-                    .chain((0..self.num_calls).map(|_| {
+                    .chain((0..num_calls).map(|_| {
                         if self.desugar_doc_comments {
                             cursor_snapshot.next_desugared()
                         } else {
                             cursor_snapshot.next()
                         }
                     }))
-                    .take(self.num_calls);
+                    .take(num_calls);
 
                 make_token_stream(tokens)
             }
+            fn add_trailing_semi(&self) -> Box<dyn CreateTokenStream> {
+                if self.trailing_semi {
+                    panic!("Called `add_trailing_semi` twice!");
+                }
+                let mut new = self.clone();
+                new.trailing_semi = true;
+                Box::new(new)
+            }
         }
 
         let lazy_impl = LazyTokenStreamImpl {
@@ -1246,6 +1260,7 @@ impl<'a> Parser<'a> {
             num_calls: self.token_cursor.num_next_calls - cursor_snapshot.num_next_calls,
             cursor_snapshot,
             desugar_doc_comments: self.desugar_doc_comments,
+            trailing_semi: false,
         };
         Ok((ret, Some(LazyTokenStream::new(lazy_impl))))
     }
diff --git a/compiler/rustc_parse/src/parser/nonterminal.rs b/compiler/rustc_parse/src/parser/nonterminal.rs
index ab88362dad9..c007f96a798 100644
--- a/compiler/rustc_parse/src/parser/nonterminal.rs
+++ b/compiler/rustc_parse/src/parser/nonterminal.rs
@@ -117,8 +117,8 @@ impl<'a> Parser<'a> {
                 let (stmt, tokens) = self.collect_tokens(|this| this.parse_stmt())?;
                 match stmt {
                     Some(mut s) => {
-                        if s.tokens.is_none() {
-                            s.tokens = tokens;
+                        if s.tokens().is_none() {
+                            s.set_tokens(tokens);
                         }
                         token::NtStmt(s)
                     }
diff --git a/compiler/rustc_parse/src/parser/stmt.rs b/compiler/rustc_parse/src/parser/stmt.rs
index 131ff1ae6b3..e974556f43a 100644
--- a/compiler/rustc_parse/src/parser/stmt.rs
+++ b/compiler/rustc_parse/src/parser/stmt.rs
@@ -7,8 +7,10 @@ use super::{BlockMode, Parser, Restrictions, SemiColonMode};
 use crate::maybe_whole;
 
 use rustc_ast as ast;
+use rustc_ast::attr::HasAttrs;
 use rustc_ast::ptr::P;
 use rustc_ast::token::{self, TokenKind};
+use rustc_ast::tokenstream::LazyTokenStream;
 use rustc_ast::util::classify;
 use rustc_ast::{AttrStyle, AttrVec, Attribute, MacCall, MacCallStmt, MacStmtStyle};
 use rustc_ast::{Block, BlockCheckMode, Expr, ExprKind, Local, Stmt, StmtKind, DUMMY_NODE_ID};
@@ -31,45 +33,75 @@ impl<'a> Parser<'a> {
     }
 
     fn parse_stmt_without_recovery(&mut self) -> PResult<'a, Option<Stmt>> {
-        maybe_whole!(self, NtStmt, |x| Some(x));
-
-        let attrs = self.parse_outer_attributes()?;
+        let mut attrs = self.parse_outer_attributes()?;
+        let has_attrs = !attrs.is_empty();
         let lo = self.token.span;
 
-        let stmt = if self.eat_keyword(kw::Let) {
-            self.parse_local_mk(lo, attrs.into())?
-        } else if self.is_kw_followed_by_ident(kw::Mut) {
-            self.recover_stmt_local(lo, attrs.into(), "missing keyword", "let mut")?
-        } else if self.is_kw_followed_by_ident(kw::Auto) {
-            self.bump(); // `auto`
-            let msg = "write `let` instead of `auto` to introduce a new variable";
-            self.recover_stmt_local(lo, attrs.into(), msg, "let")?
-        } else if self.is_kw_followed_by_ident(sym::var) {
-            self.bump(); // `var`
-            let msg = "write `let` instead of `var` to introduce a new variable";
-            self.recover_stmt_local(lo, attrs.into(), msg, "let")?
-        } else if self.check_path() && !self.token.is_qpath_start() && !self.is_path_start_item() {
-            // We have avoided contextual keywords like `union`, items with `crate` visibility,
-            // or `auto trait` items. We aim to parse an arbitrary path `a::b` but not something
-            // that starts like a path (1 token), but it fact not a path.
-            // Also, we avoid stealing syntax from `parse_item_`.
-            self.parse_stmt_path_start(lo, attrs)?
-        } else if let Some(item) = self.parse_item_common(attrs.clone(), false, true, |_| true)? {
-            // FIXME: Bad copy of attrs
-            self.mk_stmt(lo.to(item.span), StmtKind::Item(P(item)))
-        } else if self.eat(&token::Semi) {
-            // Do not attempt to parse an expression if we're done here.
-            self.error_outer_attrs(&attrs);
-            self.mk_stmt(lo, StmtKind::Empty)
-        } else if self.token != token::CloseDelim(token::Brace) {
-            // Remainder are line-expr stmts.
-            let e = self.parse_expr_res(Restrictions::STMT_EXPR, Some(attrs.into()))?;
-            self.mk_stmt(lo.to(e.span), StmtKind::Expr(e))
+        maybe_whole!(self, NtStmt, |stmt| {
+            let mut stmt = stmt;
+            stmt.visit_attrs(|stmt_attrs| {
+                mem::swap(stmt_attrs, &mut attrs);
+                stmt_attrs.extend(attrs);
+            });
+            Some(stmt)
+        });
+
+        let parse_stmt_inner = |this: &mut Self| {
+            let stmt = if this.eat_keyword(kw::Let) {
+                this.parse_local_mk(lo, attrs.into())?
+            } else if this.is_kw_followed_by_ident(kw::Mut) {
+                this.recover_stmt_local(lo, attrs.into(), "missing keyword", "let mut")?
+            } else if this.is_kw_followed_by_ident(kw::Auto) {
+                this.bump(); // `auto`
+                let msg = "write `let` instead of `auto` to introduce a new variable";
+                this.recover_stmt_local(lo, attrs.into(), msg, "let")?
+            } else if this.is_kw_followed_by_ident(sym::var) {
+                this.bump(); // `var`
+                let msg = "write `let` instead of `var` to introduce a new variable";
+                this.recover_stmt_local(lo, attrs.into(), msg, "let")?
+            } else if this.check_path()
+                && !this.token.is_qpath_start()
+                && !this.is_path_start_item()
+            {
+                // We have avoided contextual keywords like `union`, items with `crate` visibility,
+                // or `auto trait` items. We aim to parse an arbitrary path `a::b` but not something
+                // that starts like a path (1 token), but it fact not a path.
+                // Also, we avoid stealing syntax from `parse_item_`.
+                this.parse_stmt_path_start(lo, attrs)?
+            } else if let Some(item) =
+                this.parse_item_common(attrs.clone(), false, true, |_| true)?
+            {
+                // FIXME: Bad copy of attrs
+                this.mk_stmt(lo.to(item.span), StmtKind::Item(P(item)))
+            } else if this.eat(&token::Semi) {
+                // Do not attempt to parse an expression if we're done here.
+                this.error_outer_attrs(&attrs);
+                this.mk_stmt(lo, StmtKind::Empty)
+            } else if this.token != token::CloseDelim(token::Brace) {
+                // Remainder are line-expr stmts.
+                let e = this.parse_expr_res(Restrictions::STMT_EXPR, Some(attrs.into()))?;
+                this.mk_stmt(lo.to(e.span), StmtKind::Expr(e))
+            } else {
+                this.error_outer_attrs(&attrs);
+                return Ok(None);
+            };
+            Ok(Some(stmt))
+        };
+
+        let stmt = if has_attrs {
+            let (mut stmt, tokens) = self.collect_tokens(parse_stmt_inner)?;
+            if let Some(stmt) = &mut stmt {
+                // If we already have tokens (e.g. due to encounting an `NtStmt`),
+                // use those instead.
+                if stmt.tokens().is_none() {
+                    stmt.set_tokens(tokens);
+                }
+            }
+            stmt
         } else {
-            self.error_outer_attrs(&attrs);
-            return Ok(None);
+            parse_stmt_inner(self)?
         };
-        Ok(Some(stmt))
+        Ok(stmt)
     }
 
     fn parse_stmt_path_start(&mut self, lo: Span, attrs: Vec<Attribute>) -> PResult<'a, Stmt> {
@@ -107,7 +139,7 @@ impl<'a> Parser<'a> {
 
         let kind = if delim == token::Brace || self.token == token::Semi || self.token == token::Eof
         {
-            StmtKind::MacCall(P(MacCallStmt { mac, style, attrs }))
+            StmtKind::MacCall(P(MacCallStmt { mac, style, attrs, tokens: None }))
         } else {
             // Since none of the above applied, this is an expression statement macro.
             let e = self.mk_expr(lo.to(hi), ExprKind::MacCall(mac), AttrVec::new());
@@ -219,7 +251,7 @@ impl<'a> Parser<'a> {
             }
         };
         let hi = if self.token == token::Semi { self.token.span } else { self.prev_token.span };
-        Ok(P(ast::Local { ty, pat, init, id: DUMMY_NODE_ID, span: lo.to(hi), attrs }))
+        Ok(P(ast::Local { ty, pat, init, id: DUMMY_NODE_ID, span: lo.to(hi), attrs, tokens: None }))
     }
 
     /// Parses the RHS of a local variable declaration (e.g., '= 14;').
@@ -376,6 +408,12 @@ impl<'a> Parser<'a> {
             None => return Ok(None),
         };
 
+        let add_semi_token = |tokens: Option<&mut LazyTokenStream>| {
+            if let Some(tokens) = tokens {
+                *tokens = tokens.add_trailing_semi();
+            }
+        };
+
         let mut eat_semi = true;
         match stmt.kind {
             // Expression without semicolon.
@@ -417,6 +455,7 @@ impl<'a> Parser<'a> {
                     *expr = self.mk_expr_err(sp);
                 }
             }
+            StmtKind::Expr(_) | StmtKind::MacCall(_) => {}
             StmtKind::Local(ref mut local) => {
                 if let Err(e) = self.expect_semi() {
                     // We might be at the `,` in `let x = foo<bar, baz>;`. Try to recover.
@@ -430,13 +469,18 @@ impl<'a> Parser<'a> {
                     }
                 }
                 eat_semi = false;
+                // We just checked that there's a semicolon in the tokenstream,
+                // so capture it
+                add_semi_token(local.tokens.as_mut());
             }
-            StmtKind::Empty => eat_semi = false,
-            _ => {}
+            StmtKind::Empty | StmtKind::Item(_) | StmtKind::Semi(_) => eat_semi = false,
         }
 
         if eat_semi && self.eat(&token::Semi) {
             stmt = stmt.add_trailing_semicolon();
+            // We just checked that we have a semicolon in the tokenstream,
+            // so capture it
+            add_semi_token(stmt.tokens_mut());
         }
         stmt.span = stmt.span.to(self.prev_token.span);
         Ok(Some(stmt))
@@ -447,7 +491,7 @@ impl<'a> Parser<'a> {
     }
 
     pub(super) fn mk_stmt(&self, span: Span, kind: StmtKind) -> Stmt {
-        Stmt { id: DUMMY_NODE_ID, kind, span, tokens: None }
+        Stmt { id: DUMMY_NODE_ID, kind, span }
     }
 
     pub(super) fn mk_stmt_err(&self, span: Span) -> Stmt {
author	bors <bors@rust-lang.org>	2020-11-28 07:48:56 +0000
committer	bors <bors@rust-lang.org>	2020-11-28 07:48:56 +0000
commit	4ae328bef47dffcbf363e5ae873f419c06a5511d (patch)
tree	a4638ba9c7a64bae7fa30ec681219ef1a00dc3d3 /compiler/rustc_parse/src/parser
parent	f8e5209a21c698398a0f8c04af55e2ad6cbd113c (diff)
parent	92bfa05b07090be1b3bf32af0c8947a0f0b387e8 (diff)
download	rust-4ae328bef47dffcbf363e5ae873f419c06a5511d.tar.gz rust-4ae328bef47dffcbf363e5ae873f419c06a5511d.zip