about summary refs log tree commit diff
diff options
context:
space:
mode:
authorAaron Hill <aa1ronham@gmail.com>2021-01-14 10:42:01 -0500
committerAaron Hill <aa1ronham@gmail.com>2021-01-22 00:33:03 -0500
commitccfc292999e133193f61feacd70031c90884636b (patch)
tree7b6c4584b12203c6943d213abcbe31bf5f18d885
parentdc1eee2f256efbd1d3b50b6b090232f81cac6d72 (diff)
downloadrust-ccfc292999e133193f61feacd70031c90884636b.tar.gz
rust-ccfc292999e133193f61feacd70031c90884636b.zip
Refactor token collection to capture trailing token immediately
-rw-r--r--compiler/rustc_ast/src/tokenstream.rs11
-rw-r--r--compiler/rustc_parse/src/lib.rs2
-rw-r--r--compiler/rustc_parse/src/parser/item.rs4
-rw-r--r--compiler/rustc_parse/src/parser/mod.rs55
-rw-r--r--compiler/rustc_parse/src/parser/stmt.rs170
5 files changed, 124 insertions, 118 deletions
diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs
index 00354b42ebb..9ac05f316f0 100644
--- a/compiler/rustc_ast/src/tokenstream.rs
+++ b/compiler/rustc_ast/src/tokenstream.rs
@@ -127,14 +127,10 @@ where
 }
 
 pub trait CreateTokenStream: sync::Send + sync::Sync {
-    fn add_trailing_semi(&self) -> Box<dyn CreateTokenStream>;
     fn create_token_stream(&self) -> TokenStream;
 }
 
 impl CreateTokenStream for TokenStream {
-    fn add_trailing_semi(&self) -> Box<dyn CreateTokenStream> {
-        panic!("Cannot call `add_trailing_semi` on a `TokenStream`!");
-    }
     fn create_token_stream(&self) -> TokenStream {
         self.clone()
     }
@@ -151,13 +147,6 @@ impl LazyTokenStream {
         LazyTokenStream(Lrc::new(Box::new(inner)))
     }
 
-    /// Extends the captured stream by one token,
-    /// which must be a trailing semicolon. This
-    /// affects the `TokenStream` created by `make_tokenstream`.
-    pub fn add_trailing_semi(&self) -> LazyTokenStream {
-        LazyTokenStream(Lrc::new(self.0.add_trailing_semi()))
-    }
-
     pub fn create_token_stream(&self) -> TokenStream {
         self.0.create_token_stream()
     }
diff --git a/compiler/rustc_parse/src/lib.rs b/compiler/rustc_parse/src/lib.rs
index 62fd6936d21..c365acac625 100644
--- a/compiler/rustc_parse/src/lib.rs
+++ b/compiler/rustc_parse/src/lib.rs
@@ -292,7 +292,7 @@ pub fn nt_to_tokenstream(
     } else if matches!(synthesize_tokens, CanSynthesizeMissingTokens::Yes) {
         return fake_token_stream(sess, nt);
     } else {
-        panic!("Missing tokens for nt {:?}", pprust::nonterminal_to_string(nt));
+        panic!("Missing tokens for nt at {:?}: {:?}", nt.span(), pprust::nonterminal_to_string(nt));
     }
 }
 
diff --git a/compiler/rustc_parse/src/parser/item.rs b/compiler/rustc_parse/src/parser/item.rs
index 28067f0216c..1ed4d39cd05 100644
--- a/compiler/rustc_parse/src/parser/item.rs
+++ b/compiler/rustc_parse/src/parser/item.rs
@@ -1,6 +1,6 @@
 use super::diagnostics::{dummy_arg, ConsumeClosingDelim, Error};
 use super::ty::{AllowPlus, RecoverQPath, RecoverReturnSign};
-use super::{FollowedByType, ForceCollect, Parser, PathStyle};
+use super::{FollowedByType, ForceCollect, Parser, PathStyle, TrailingToken};
 
 use crate::{maybe_collect_tokens, maybe_whole};
 
@@ -125,7 +125,7 @@ impl<'a> Parser<'a> {
         let item = maybe_collect_tokens!(self, force_collect, &attrs, |this: &mut Self| {
             let item = this.parse_item_common_(attrs, mac_allowed, attrs_allowed, req_name);
             unclosed_delims.append(&mut this.unclosed_delims);
-            item
+            Ok((item?, TrailingToken::None))
         })?;
 
         self.unclosed_delims.append(&mut unclosed_delims);
diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs
index c85b7a00732..b332005ca74 100644
--- a/compiler/rustc_parse/src/parser/mod.rs
+++ b/compiler/rustc_parse/src/parser/mod.rs
@@ -61,6 +61,11 @@ pub enum ForceCollect {
     No,
 }
 
+pub enum TrailingToken {
+    None,
+    Semi,
+}
+
 /// Like `maybe_whole_expr`, but for things other than expressions.
 #[macro_export]
 macro_rules! maybe_whole {
@@ -1225,6 +1230,13 @@ impl<'a> Parser<'a> {
         }
     }
 
+    pub fn collect_tokens<R: HasTokens>(
+        &mut self,
+        f: impl FnOnce(&mut Self) -> PResult<'a, R>,
+    ) -> PResult<'a, R> {
+        self.collect_tokens_trailing_token(|this| Ok((f(this)?, TrailingToken::None)))
+    }
+
     /// Records all tokens consumed by the provided callback,
     /// including the current token. These tokens are collected
     /// into a `LazyTokenStream`, and returned along with the result
@@ -1241,9 +1253,9 @@ impl<'a> Parser<'a> {
     /// This restriction shouldn't be an issue in practice,
     /// since this function is used to record the tokens for
     /// a parsed AST item, which always has matching delimiters.
-    pub fn collect_tokens<R: HasTokens>(
+    pub fn collect_tokens_trailing_token<R: HasTokens>(
         &mut self,
-        f: impl FnOnce(&mut Self) -> PResult<'a, R>,
+        f: impl FnOnce(&mut Self) -> PResult<'a, (R, TrailingToken)>,
     ) -> PResult<'a, R> {
         let start_token = (self.token.clone(), self.token_spacing);
         let cursor_snapshot = TokenCursor {
@@ -1256,7 +1268,7 @@ impl<'a> Parser<'a> {
             append_unglued_token: self.token_cursor.append_unglued_token.clone(),
         };
 
-        let mut ret = f(self)?;
+        let (mut ret, trailing_token) = f(self)?;
 
         // Produces a `TokenStream` on-demand. Using `cursor_snapshot`
         // and `num_calls`, we can reconstruct the `TokenStream` seen
@@ -1275,15 +1287,10 @@ impl<'a> Parser<'a> {
             cursor_snapshot: TokenCursor,
             num_calls: usize,
             desugar_doc_comments: bool,
-            trailing_semi: bool,
             append_unglued_token: Option<TreeAndSpacing>,
         }
         impl CreateTokenStream for LazyTokenStreamImpl {
             fn create_token_stream(&self) -> TokenStream {
-                let mut num_calls = self.num_calls;
-                if self.trailing_semi {
-                    num_calls += 1;
-                }
                 // The token produced by the final call to `next` or `next_desugared`
                 // was not actually consumed by the callback. The combination
                 // of chaining the initial token and using `take` produces the desired
@@ -1291,39 +1298,33 @@ impl<'a> Parser<'a> {
                 // and omit the final token otherwise.
                 let mut cursor_snapshot = self.cursor_snapshot.clone();
                 let tokens = std::iter::once(self.start_token.clone())
-                    .chain((0..num_calls).map(|_| {
+                    .chain((0..self.num_calls).map(|_| {
                         if self.desugar_doc_comments {
                             cursor_snapshot.next_desugared()
                         } else {
                             cursor_snapshot.next()
                         }
                     }))
-                    .take(num_calls);
+                    .take(self.num_calls);
 
                 make_token_stream(tokens, self.append_unglued_token.clone())
             }
-            fn add_trailing_semi(&self) -> Box<dyn CreateTokenStream> {
-                if self.trailing_semi {
-                    panic!("Called `add_trailing_semi` twice!");
-                }
-                if self.append_unglued_token.is_some() {
-                    panic!(
-                        "Cannot call `add_trailing_semi` when we have an unglued token {:?}",
-                        self.append_unglued_token
-                    );
-                }
-                let mut new = self.clone();
-                new.trailing_semi = true;
-                Box::new(new)
+        }
+
+        let mut num_calls = self.token_cursor.num_next_calls - cursor_snapshot.num_next_calls;
+        match trailing_token {
+            TrailingToken::None => {}
+            TrailingToken::Semi => {
+                assert_eq!(self.token.kind, token::Semi);
+                num_calls += 1;
             }
         }
 
         let lazy_impl = LazyTokenStreamImpl {
             start_token,
-            num_calls: self.token_cursor.num_next_calls - cursor_snapshot.num_next_calls,
+            num_calls,
             cursor_snapshot,
             desugar_doc_comments: self.desugar_doc_comments,
-            trailing_semi: false,
             append_unglued_token: self.token_cursor.append_unglued_token.clone(),
         };
         ret.finalize_tokens(LazyTokenStream::new(lazy_impl));
@@ -1427,9 +1428,9 @@ macro_rules! maybe_collect_tokens {
         if matches!($force_collect, ForceCollect::Yes)
             || $crate::parser::attr::maybe_needs_tokens($attrs)
         {
-            $self.collect_tokens($f)
+            $self.collect_tokens_trailing_token($f)
         } else {
-            $f($self)
+            Ok($f($self)?.0)
         }
     };
 }
diff --git a/compiler/rustc_parse/src/parser/stmt.rs b/compiler/rustc_parse/src/parser/stmt.rs
index da60ba8472b..8373f6acd7e 100644
--- a/compiler/rustc_parse/src/parser/stmt.rs
+++ b/compiler/rustc_parse/src/parser/stmt.rs
@@ -3,14 +3,13 @@ use super::diagnostics::{AttemptLocalParseRecovery, Error};
 use super::expr::LhsExpr;
 use super::pat::{GateOr, RecoverComma};
 use super::path::PathStyle;
-use super::{BlockMode, ForceCollect, Parser, Restrictions, SemiColonMode};
+use super::{BlockMode, ForceCollect, Parser, Restrictions, SemiColonMode, TrailingToken};
 use crate::{maybe_collect_tokens, maybe_whole};
 
 use rustc_ast as ast;
 use rustc_ast::attr::HasAttrs;
 use rustc_ast::ptr::P;
 use rustc_ast::token::{self, TokenKind};
-use rustc_ast::tokenstream::LazyTokenStream;
 use rustc_ast::util::classify;
 use rustc_ast::{AttrStyle, AttrVec, Attribute, MacCall, MacCallStmt, MacStmtStyle};
 use rustc_ast::{Block, BlockCheckMode, Expr, ExprKind, Local, Stmt, StmtKind, DUMMY_NODE_ID};
@@ -25,7 +24,7 @@ impl<'a> Parser<'a> {
     /// e.g., a `StmtKind::Semi` parses to a `StmtKind::Expr`, leaving the trailing `;` unconsumed.
     // Public for rustfmt usage.
     pub fn parse_stmt(&mut self, force_collect: ForceCollect) -> PResult<'a, Option<Stmt>> {
-        Ok(self.parse_stmt_without_recovery(force_collect).unwrap_or_else(|mut e| {
+        Ok(self.parse_stmt_without_recovery(false, force_collect).unwrap_or_else(|mut e| {
             e.emit();
             self.recover_stmt_(SemiColonMode::Break, BlockMode::Ignore);
             None
@@ -36,6 +35,7 @@ impl<'a> Parser<'a> {
     /// or not we have attributes
     fn parse_stmt_without_recovery(
         &mut self,
+        capture_semi: bool,
         force_collect: ForceCollect,
     ) -> PResult<'a, Option<Stmt>> {
         let mut attrs = self.parse_outer_attributes()?;
@@ -50,68 +50,77 @@ impl<'a> Parser<'a> {
             Some(stmt)
         });
 
-        maybe_collect_tokens!(self, force_collect, &attrs, |this: &mut Self| {
-            let stmt = if this.eat_keyword(kw::Let) {
-                this.parse_local_mk(lo, attrs.into())?
-            } else if this.is_kw_followed_by_ident(kw::Mut) {
-                this.recover_stmt_local(lo, attrs.into(), "missing keyword", "let mut")?
-            } else if this.is_kw_followed_by_ident(kw::Auto) {
-                this.bump(); // `auto`
-                let msg = "write `let` instead of `auto` to introduce a new variable";
-                this.recover_stmt_local(lo, attrs.into(), msg, "let")?
-            } else if this.is_kw_followed_by_ident(sym::var) {
-                this.bump(); // `var`
-                let msg = "write `let` instead of `var` to introduce a new variable";
-                this.recover_stmt_local(lo, attrs.into(), msg, "let")?
-            } else if this.check_path()
-                && !this.token.is_qpath_start()
-                && !this.is_path_start_item()
-            {
-                // We have avoided contextual keywords like `union`, items with `crate` visibility,
-                // or `auto trait` items. We aim to parse an arbitrary path `a::b` but not something
-                // that starts like a path (1 token), but it fact not a path.
-                // Also, we avoid stealing syntax from `parse_item_`.
-                this.parse_stmt_path_start(lo, attrs)?
-            } else if let Some(item) =
-                this.parse_item_common(attrs.clone(), false, true, |_| true, force_collect)?
-            {
-                // FIXME: Bad copy of attrs
-                this.mk_stmt(lo.to(item.span), StmtKind::Item(P(item)))
-            } else if this.eat(&token::Semi) {
-                // Do not attempt to parse an expression if we're done here.
-                this.error_outer_attrs(&attrs);
-                this.mk_stmt(lo, StmtKind::Empty)
-            } else if this.token != token::CloseDelim(token::Brace) {
-                // Remainder are line-expr stmts.
-                let e = this.parse_expr_res(Restrictions::STMT_EXPR, Some(attrs.into()))?;
-                this.mk_stmt(lo.to(e.span), StmtKind::Expr(e))
-            } else {
-                this.error_outer_attrs(&attrs);
-                return Ok(None);
-            };
-            Ok(Some(stmt))
-        })
+        Ok(Some(if self.token.is_keyword(kw::Let) {
+            self.parse_local_mk(lo, attrs.into(), capture_semi, force_collect)?
+        } else if self.is_kw_followed_by_ident(kw::Mut) {
+            self.recover_stmt_local(lo, attrs.into(), "missing keyword", "let mut")?
+        } else if self.is_kw_followed_by_ident(kw::Auto) {
+            self.bump(); // `auto`
+            let msg = "write `let` instead of `auto` to introduce a new variable";
+            self.recover_stmt_local(lo, attrs.into(), msg, "let")?
+        } else if self.is_kw_followed_by_ident(sym::var) {
+            self.bump(); // `var`
+            let msg = "write `let` instead of `var` to introduce a new variable";
+            self.recover_stmt_local(lo, attrs.into(), msg, "let")?
+        } else if self.check_path() && !self.token.is_qpath_start() && !self.is_path_start_item() {
+            // We have avoided contextual keywords like `union`, items with `crate` visibility,
+            // or `auto trait` items. We aim to parse an arbitrary path `a::b` but not something
+            // that starts like a path (1 token), but it fact not a path.
+            // Also, we avoid stealing syntax from `parse_item_`.
+            self.parse_stmt_path_start(lo, attrs, force_collect)?
+        } else if let Some(item) =
+            self.parse_item_common(attrs.clone(), false, true, |_| true, force_collect)?
+        {
+            // FIXME: Bad copy of attrs
+            self.mk_stmt(lo.to(item.span), StmtKind::Item(P(item)))
+        } else if self.eat(&token::Semi) {
+            // Do not attempt to parse an expression if we're done here.
+            self.error_outer_attrs(&attrs);
+            self.mk_stmt(lo, StmtKind::Empty)
+        } else if self.token != token::CloseDelim(token::Brace) {
+            // Remainder are line-expr stmts.
+            let e = self.parse_expr_res(Restrictions::STMT_EXPR, Some(attrs.into()))?;
+            self.mk_stmt(lo.to(e.span), StmtKind::Expr(e))
+        } else {
+            self.error_outer_attrs(&attrs);
+            return Ok(None);
+        }))
     }
 
-    fn parse_stmt_path_start(&mut self, lo: Span, attrs: Vec<Attribute>) -> PResult<'a, Stmt> {
-        let path = self.parse_path(PathStyle::Expr)?;
+    fn parse_stmt_path_start(
+        &mut self,
+        lo: Span,
+        attrs: Vec<Attribute>,
+        force_collect: ForceCollect,
+    ) -> PResult<'a, Stmt> {
+        maybe_collect_tokens!(self, force_collect, &attrs, |this: &mut Parser<'a>| {
+            let path = this.parse_path(PathStyle::Expr)?;
 
-        if self.eat(&token::Not) {
-            return self.parse_stmt_mac(lo, attrs.into(), path);
-        }
+            if this.eat(&token::Not) {
+                let stmt_mac = this.parse_stmt_mac(lo, attrs.into(), path)?;
+                if this.token == token::Semi {
+                    return Ok((stmt_mac, TrailingToken::Semi));
+                } else {
+                    return Ok((stmt_mac, TrailingToken::None));
+                }
+            }
 
-        let expr = if self.eat(&token::OpenDelim(token::Brace)) {
-            self.parse_struct_expr(path, AttrVec::new(), true)?
-        } else {
-            let hi = self.prev_token.span;
-            self.mk_expr(lo.to(hi), ExprKind::Path(None, path), AttrVec::new())
-        };
+            let expr = if this.eat(&token::OpenDelim(token::Brace)) {
+                this.parse_struct_expr(path, AttrVec::new(), true)?
+            } else {
+                let hi = this.prev_token.span;
+                this.mk_expr(lo.to(hi), ExprKind::Path(None, path), AttrVec::new())
+            };
 
-        let expr = self.with_res(Restrictions::STMT_EXPR, |this| {
-            let expr = this.parse_dot_or_call_expr_with(expr, lo, attrs.into())?;
-            this.parse_assoc_expr_with(0, LhsExpr::AlreadyParsed(expr))
-        })?;
-        Ok(self.mk_stmt(lo.to(self.prev_token.span), StmtKind::Expr(expr)))
+            let expr = this.with_res(Restrictions::STMT_EXPR, |this| {
+                let expr = this.parse_dot_or_call_expr_with(expr, lo, attrs.into())?;
+                this.parse_assoc_expr_with(0, LhsExpr::AlreadyParsed(expr))
+            })?;
+            Ok((
+                this.mk_stmt(lo.to(this.prev_token.span), StmtKind::Expr(expr)),
+                TrailingToken::None,
+            ))
+        })
     }
 
     /// Parses a statement macro `mac!(args)` provided a `path` representing `mac`.
@@ -159,15 +168,34 @@ impl<'a> Parser<'a> {
         msg: &str,
         sugg: &str,
     ) -> PResult<'a, Stmt> {
-        let stmt = self.parse_local_mk(lo, attrs)?;
+        let stmt = self.recover_local_after_let(lo, attrs)?;
         self.struct_span_err(lo, "invalid variable declaration")
             .span_suggestion(lo, msg, sugg.to_string(), Applicability::MachineApplicable)
             .emit();
         Ok(stmt)
     }
 
-    fn parse_local_mk(&mut self, lo: Span, attrs: AttrVec) -> PResult<'a, Stmt> {
-        let local = self.parse_local(attrs)?;
+    fn parse_local_mk(
+        &mut self,
+        lo: Span,
+        attrs: AttrVec,
+        capture_semi: bool,
+        force_collect: ForceCollect,
+    ) -> PResult<'a, Stmt> {
+        maybe_collect_tokens!(self, force_collect, &attrs, |this: &mut Parser<'a>| {
+            this.expect_keyword(kw::Let)?;
+            let local = this.parse_local(attrs.into())?;
+            let trailing = if capture_semi && this.token.kind == token::Semi {
+                TrailingToken::Semi
+            } else {
+                TrailingToken::None
+            };
+            Ok((this.mk_stmt(lo.to(this.prev_token.span), StmtKind::Local(local)), trailing))
+        })
+    }
+
+    fn recover_local_after_let(&mut self, lo: Span, attrs: AttrVec) -> PResult<'a, Stmt> {
+        let local = self.parse_local(attrs.into())?;
         Ok(self.mk_stmt(lo.to(self.prev_token.span), StmtKind::Local(local)))
     }
 
@@ -289,7 +317,7 @@ impl<'a> Parser<'a> {
         //      bar;
         //
         // which is valid in other languages, but not Rust.
-        match self.parse_stmt_without_recovery(ForceCollect::No) {
+        match self.parse_stmt_without_recovery(false, ForceCollect::No) {
             // If the next token is an open brace (e.g., `if a b {`), the place-
             // inside-a-block suggestion would be more likely wrong than right.
             Ok(Some(_))
@@ -392,17 +420,11 @@ impl<'a> Parser<'a> {
         // Skip looking for a trailing semicolon when we have an interpolated statement.
         maybe_whole!(self, NtStmt, |x| Some(x));
 
-        let mut stmt = match self.parse_stmt_without_recovery(ForceCollect::No)? {
+        let mut stmt = match self.parse_stmt_without_recovery(true, ForceCollect::No)? {
             Some(stmt) => stmt,
             None => return Ok(None),
         };
 
-        let add_semi_token = |tokens: Option<&mut LazyTokenStream>| {
-            if let Some(tokens) = tokens {
-                *tokens = tokens.add_trailing_semi();
-            }
-        };
-
         let mut eat_semi = true;
         match stmt.kind {
             // Expression without semicolon.
@@ -458,18 +480,12 @@ impl<'a> Parser<'a> {
                     }
                 }
                 eat_semi = false;
-                // We just checked that there's a semicolon in the tokenstream,
-                // so capture it
-                add_semi_token(local.tokens.as_mut());
             }
             StmtKind::Empty | StmtKind::Item(_) | StmtKind::Semi(_) => eat_semi = false,
         }
 
         if eat_semi && self.eat(&token::Semi) {
             stmt = stmt.add_trailing_semicolon();
-            // We just checked that we have a semicolon in the tokenstream,
-            // so capture it
-            add_semi_token(stmt.tokens_mut());
         }
         stmt.span = stmt.span.to(self.prev_token.span);
         Ok(Some(stmt))