Rollup merge of #65640 - estebank:recover-missing-semi, r=Centril

Use heuristics to recover parsing of missing `;` - Detect `,` and `:` typos where `;` was intended. - When the next token could have been the start of a new statement, detect a missing semicolon. Fix #48160, fix #44767 (after adding note about statements).
author: Mazdak Farrokhzad <twingoow@gmail.com> 2019-10-28 21:35:58 +0100
committer: GitHub <noreply@github.com> 2019-10-28 21:35:58 +0100
commit: 2fe6f22dea25dd7498b2acb0f0156563644a13f2 (patch)
tree: 1b750d9b784fc10e9690b0a6d853edc2c5d1832c /src/libsyntax/parse
parent: eec3a9c4af5067c37f5b681599a48b4e21098260 (diff)
parent: e8016c2b13a7e16b6eed9e30b4b6bfe304750566 (diff)
download: rust-2fe6f22dea25dd7498b2acb0f0156563644a13f2.tar.gz
rust-2fe6f22dea25dd7498b2acb0f0156563644a13f2.zip
4 files changed, 124 insertions, 107 deletions
diff --git a/src/libsyntax/parse/parser/diagnostics.rs b/src/libsyntax/parse/parser/diagnostics.rs
index 06982c789db..ab2b4519cb7 100644
--- a/src/libsyntax/parse/parser/diagnostics.rs
+++ b/src/libsyntax/parse/parser/diagnostics.rs
@@ -6,7 +6,7 @@ use crate::ast::{
     self, Param, BinOpKind, BindingMode, BlockCheckMode, Expr, ExprKind, Ident, Item, ItemKind,
     Mutability, Pat, PatKind, PathSegment, QSelf, Ty, TyKind,
 };
-use crate::parse::token::{self, TokenKind};
+use crate::parse::token::{self, TokenKind, token_can_begin_expr};
 use crate::print::pprust;
 use crate::ptr::P;
 use crate::symbol::{kw, sym};
@@ -274,23 +274,23 @@ impl<'a> Parser<'a> {
         expected.sort_by_cached_key(|x| x.to_string());
         expected.dedup();
         let expect = tokens_to_string(&expected[..]);
-        let actual = self.this_token_to_string();
+        let actual = self.this_token_descr();
         let (msg_exp, (label_sp, label_exp)) = if expected.len() > 1 {
             let short_expect = if expected.len() > 6 {
                 format!("{} possible tokens", expected.len())
             } else {
                 expect.clone()
             };
-            (format!("expected one of {}, found `{}`", expect, actual),
+            (format!("expected one of {}, found {}", expect, actual),
                 (self.sess.source_map().next_point(self.prev_span),
                 format!("expected one of {} here", short_expect)))
         } else if expected.is_empty() {
-            (format!("unexpected token: `{}`", actual),
+            (format!("unexpected token: {}", actual),
                 (self.prev_span, "unexpected token after this".to_string()))
         } else {
-            (format!("expected {}, found `{}`", expect, actual),
+            (format!("expected {}, found {}", expect, actual),
                 (self.sess.source_map().next_point(self.prev_span),
-                format!("expected {} here", expect)))
+                format!("expected {}", expect)))
         };
         self.last_unexpected_token_span = Some(self.token.span);
         let mut err = self.fatal(&msg_exp);
@@ -326,58 +326,28 @@ impl<'a> Parser<'a> {
             }
         }
 
-        let is_semi_suggestable = expected.iter().any(|t| match t {
-            TokenType::Token(token::Semi) => true, // We expect a `;` here.
-            _ => false,
-        }) && ( // A `;` would be expected before the current keyword.
-            self.token.is_keyword(kw::Break) ||
-            self.token.is_keyword(kw::Continue) ||
-            self.token.is_keyword(kw::For) ||
-            self.token.is_keyword(kw::If) ||
-            self.token.is_keyword(kw::Let) ||
-            self.token.is_keyword(kw::Loop) ||
-            self.token.is_keyword(kw::Match) ||
-            self.token.is_keyword(kw::Return) ||
-            self.token.is_keyword(kw::While)
-        );
         let sm = self.sess.source_map();
-        match (sm.lookup_line(self.token.span.lo()), sm.lookup_line(sp.lo())) {
-            (Ok(ref a), Ok(ref b)) if a.line != b.line && is_semi_suggestable => {
-                // The spans are in different lines, expected `;` and found `let` or `return`.
-                // High likelihood that it is only a missing `;`.
-                err.span_suggestion_short(
-                    label_sp,
-                    "a semicolon may be missing here",
-                    ";".to_string(),
-                    Applicability::MaybeIncorrect,
-                );
-                err.emit();
-                return Ok(true);
-            }
-            (Ok(ref a), Ok(ref b)) if a.line == b.line => {
-                // When the spans are in the same line, it means that the only content between
-                // them is whitespace, point at the found token in that case:
-                //
-                // X |     () => { syntax error };
-                //   |                    ^^^^^ expected one of 8 possible tokens here
-                //
-                // instead of having:
-                //
-                // X |     () => { syntax error };
-                //   |                   -^^^^^ unexpected token
-                //   |                   |
-                //   |                   expected one of 8 possible tokens here
-                err.span_label(self.token.span, label_exp);
-            }
-            _ if self.prev_span == syntax_pos::DUMMY_SP => {
-                // Account for macro context where the previous span might not be
-                // available to avoid incorrect output (#54841).
-                err.span_label(self.token.span, "unexpected token");
-            }
-            _ => {
-                err.span_label(sp, label_exp);
-                err.span_label(self.token.span, "unexpected token");
-            }
+        if self.prev_span == DUMMY_SP {
+            // Account for macro context where the previous span might not be
+            // available to avoid incorrect output (#54841).
+            err.span_label(self.token.span, label_exp);
+        } else if !sm.is_multiline(self.token.span.shrink_to_hi().until(sp.shrink_to_lo())) {
+            // When the spans are in the same line, it means that the only content between
+            // them is whitespace, point at the found token in that case:
+            //
+            // X |     () => { syntax error };
+            //   |                    ^^^^^ expected one of 8 possible tokens here
+            //
+            // instead of having:
+            //
+            // X |     () => { syntax error };
+            //   |                   -^^^^^ unexpected token
+            //   |                   |
+            //   |                   expected one of 8 possible tokens here
+            err.span_label(self.token.span, label_exp);
+        } else {
+            err.span_label(sp, label_exp);
+            err.span_label(self.token.span, "unexpected token");
         }
         self.maybe_annotate_with_ascription(&mut err, false);
         Err(err)
@@ -902,20 +872,64 @@ impl<'a> Parser<'a> {
             }
         }
         let sm = self.sess.source_map();
-        match (sm.lookup_line(prev_sp.lo()), sm.lookup_line(sp.lo())) {
-            (Ok(ref a), Ok(ref b)) if a.line == b.line => {
-                // When the spans are in the same line, it means that the only content
-                // between them is whitespace, point only at the found token.
-                err.span_label(sp, label_exp);
-            }
-            _ => {
-                err.span_label(prev_sp, label_exp);
-                err.span_label(sp, "unexpected token");
-            }
+        if !sm.is_multiline(prev_sp.until(sp)) {
+            // When the spans are in the same line, it means that the only content
+            // between them is whitespace, point only at the found token.
+            err.span_label(sp, label_exp);
+        } else {
+            err.span_label(prev_sp, label_exp);
+            err.span_label(sp, "unexpected token");
         }
         Err(err)
     }
 
+    pub(super) fn expect_semi(&mut self) -> PResult<'a, ()> {
+        if self.eat(&token::Semi) {
+            return Ok(());
+        }
+        let sm = self.sess.source_map();
+        let msg = format!("expected `;`, found `{}`", self.this_token_descr());
+        let appl = Applicability::MachineApplicable;
+        if self.token.span == DUMMY_SP || self.prev_span == DUMMY_SP {
+            // Likely inside a macro, can't provide meaninful suggestions.
+            return self.expect(&token::Semi).map(|_| ());
+        } else if !sm.is_multiline(self.prev_span.until(self.token.span)) {
+            // The current token is in the same line as the prior token, not recoverable.
+        } else if self.look_ahead(1, |t| t == &token::CloseDelim(token::Brace)
+            || token_can_begin_expr(t) && t.kind != token::Colon
+        ) && [token::Comma, token::Colon].contains(&self.token.kind) {
+            // Likely typo: `,` → `;` or `:` → `;`. This is triggered if the current token is
+            // either `,` or `:`, and the next token could either start a new statement or is a
+            // block close. For example:
+            //
+            //   let x = 32:
+            //   let y = 42;
+            self.bump();
+            let sp = self.prev_span;
+            self.struct_span_err(sp, &msg)
+                .span_suggestion(sp, "change this to `;`", ";".to_string(), appl)
+                .emit();
+            return Ok(())
+        } else if self.look_ahead(0, |t| t == &token::CloseDelim(token::Brace) || (
+                token_can_begin_expr(t)
+                && t != &token::Semi
+                && t != &token::Pound // Avoid triggering with too many trailing `#` in raw string.
+        )) {
+            // Missing semicolon typo. This is triggered if the next token could either start a
+            // new statement or is a block close. For example:
+            //
+            //   let x = 32
+            //   let y = 42;
+            let sp = self.prev_span.shrink_to_hi();
+            self.struct_span_err(sp, &msg)
+                .span_label(self.token.span, "unexpected token")
+                .span_suggestion_short(sp, "add `;` here", ";".to_string(), appl)
+                .emit();
+            return Ok(())
+        }
+        self.expect(&token::Semi).map(|_| ()) // Error unconditionally
+    }
+
     pub(super) fn parse_semi_or_incorrect_foreign_fn_body(
         &mut self,
         ident: &Ident,
@@ -943,7 +957,7 @@ impl<'a> Parser<'a> {
                 Err(mut err) => {
                     err.cancel();
                     mem::replace(self, parser_snapshot);
-                    self.expect(&token::Semi)?;
+                    self.expect_semi()?;
                 }
             }
         } else {
diff --git a/src/libsyntax/parse/parser/item.rs b/src/libsyntax/parse/parser/item.rs
index 506a1a2a27a..fe125336190 100644
--- a/src/libsyntax/parse/parser/item.rs
+++ b/src/libsyntax/parse/parser/item.rs
@@ -98,7 +98,7 @@ impl<'a> Parser<'a> {
         if self.eat_keyword(kw::Use) {
             // USE ITEM
             let item_ = ItemKind::Use(P(self.parse_use_tree()?));
-            self.expect(&token::Semi)?;
+            self.expect_semi()?;
 
             let span = lo.to(self.prev_span);
             let item = self.mk_item(span, Ident::invalid(), item_, vis, attrs);
@@ -526,7 +526,7 @@ impl<'a> Parser<'a> {
             // eat a matched-delimiter token tree:
             let (delim, tts) = self.expect_delimited_token_tree()?;
             if delim != MacDelimiter::Brace {
-                self.expect(&token::Semi)?;
+                self.expect_semi()?;
             }
 
             Ok(Some(Mac {
@@ -776,7 +776,7 @@ impl<'a> Parser<'a> {
         let typ = self.parse_ty()?;
         self.expect(&token::Eq)?;
         let expr = self.parse_expr()?;
-        self.expect(&token::Semi)?;
+        self.expect_semi()?;
         Ok((name, ImplItemKind::Const(typ, expr), Generics::default()))
     }
 
@@ -813,7 +813,7 @@ impl<'a> Parser<'a> {
 
             let bounds = self.parse_generic_bounds(None)?;
             tps.where_clause = self.parse_where_clause()?;
-            self.expect(&token::Semi)?;
+            self.expect_semi()?;
 
             let whole_span = lo.to(self.prev_span);
             if is_auto == IsAuto::Yes {
@@ -927,7 +927,7 @@ impl<'a> Parser<'a> {
         } else {
             None
         };
-        self.expect(&token::Semi)?;
+        self.expect_semi()?;
         Ok((ident, TraitItemKind::Const(ty, default), Generics::default()))
     }
 
@@ -951,7 +951,7 @@ impl<'a> Parser<'a> {
         } else {
             None
         };
-        self.expect(&token::Semi)?;
+        self.expect_semi()?;
 
         Ok((ident, TraitItemKind::Type(bounds, default), generics))
     }
@@ -1054,7 +1054,7 @@ impl<'a> Parser<'a> {
         } else {
             (orig_name, None)
         };
-        self.expect(&token::Semi)?;
+        self.expect_semi()?;
 
         let span = lo.to(self.prev_span);
         Ok(self.mk_item(span, item_name, ItemKind::ExternCrate(orig_name), visibility, attrs))
@@ -1217,7 +1217,7 @@ impl<'a> Parser<'a> {
         self.expect(&token::Colon)?;
         let ty = self.parse_ty()?;
         let hi = self.token.span;
-        self.expect(&token::Semi)?;
+        self.expect_semi()?;
         Ok(ForeignItem {
             ident,
             attrs,
@@ -1235,7 +1235,7 @@ impl<'a> Parser<'a> {
 
         let ident = self.parse_ident()?;
         let hi = self.token.span;
-        self.expect(&token::Semi)?;
+        self.expect_semi()?;
         Ok(ast::ForeignItem {
             ident,
             attrs,
@@ -1282,7 +1282,7 @@ impl<'a> Parser<'a> {
 
         self.expect(&token::Eq)?;
         let e = self.parse_expr()?;
-        self.expect(&token::Semi)?;
+        self.expect_semi()?;
         let item = match m {
             Some(m) => ItemKind::Static(ty, m, e),
             None => ItemKind::Const(ty, e),
@@ -1344,7 +1344,7 @@ impl<'a> Parser<'a> {
             let ty = self.parse_ty()?;
             AliasKind::Weak(ty)
         };
-        self.expect(&token::Semi)?;
+        self.expect_semi()?;
         Ok((ident, alias, tps))
     }
 
@@ -1468,7 +1468,7 @@ impl<'a> Parser<'a> {
         } else if self.token == token::OpenDelim(token::Paren) {
             let body = VariantData::Tuple(self.parse_tuple_struct_body()?, DUMMY_NODE_ID);
             generics.where_clause = self.parse_where_clause()?;
-            self.expect(&token::Semi)?;
+            self.expect_semi()?;
             body
         } else {
             let token_str = self.this_token_descr();
diff --git a/src/libsyntax/parse/parser/stmt.rs b/src/libsyntax/parse/parser/stmt.rs
index ea7e4c05ea1..4f51fefe66f 100644
--- a/src/libsyntax/parse/parser/stmt.rs
+++ b/src/libsyntax/parse/parser/stmt.rs
@@ -432,6 +432,7 @@ impl<'a> Parser<'a> {
             None => return Ok(None),
         };
 
+        let mut eat_semi = true;
         match stmt.kind {
             StmtKind::Expr(ref expr) if self.token != token::Eof => {
                 // expression without semicolon
@@ -453,13 +454,14 @@ impl<'a> Parser<'a> {
                 if macro_legacy_warnings && self.token != token::Semi {
                     self.warn_missing_semicolon();
                 } else {
-                    self.expect_one_of(&[], &[token::Semi])?;
+                    self.expect_semi()?;
+                    eat_semi = false;
                 }
             }
             _ => {}
         }
 
-        if self.eat(&token::Semi) {
+        if eat_semi && self.eat(&token::Semi) {
             stmt = stmt.add_trailing_semicolon();
         }
         stmt.span = stmt.span.to(self.prev_span);
diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs
index 4a8b25c6107..03e77b199cc 100644
--- a/src/libsyntax/parse/token.rs
+++ b/src/libsyntax/parse/token.rs
@@ -143,34 +143,35 @@ impl Lit {
 
 pub(crate) fn ident_can_begin_expr(name: ast::Name, span: Span, is_raw: bool) -> bool {
     let ident_token = Token::new(Ident(name, is_raw), span);
+    token_can_begin_expr(&ident_token)
+}
 
+pub(crate) fn token_can_begin_expr(ident_token: &Token) -> bool {
     !ident_token.is_reserved_ident() ||
     ident_token.is_path_segment_keyword() ||
-    [
-        kw::Async,
-
-        // FIXME: remove when `await!(..)` syntax is removed
-        // https://github.com/rust-lang/rust/issues/60610
-        kw::Await,
-
-        kw::Do,
-        kw::Box,
-        kw::Break,
-        kw::Continue,
-        kw::False,
-        kw::For,
-        kw::If,
-        kw::Let,
-        kw::Loop,
-        kw::Match,
-        kw::Move,
-        kw::Return,
-        kw::True,
-        kw::Unsafe,
-        kw::While,
-        kw::Yield,
-        kw::Static,
-    ].contains(&name)
+    match ident_token.kind {
+        TokenKind::Ident(ident, _) => [
+            kw::Async,
+            kw::Do,
+            kw::Box,
+            kw::Break,
+            kw::Continue,
+            kw::False,
+            kw::For,
+            kw::If,
+            kw::Let,
+            kw::Loop,
+            kw::Match,
+            kw::Move,
+            kw::Return,
+            kw::True,
+            kw::Unsafe,
+            kw::While,
+            kw::Yield,
+            kw::Static,
+        ].contains(&ident),
+        _=> false,
+    }
 }
 
 fn ident_can_begin_type(name: ast::Name, span: Span, is_raw: bool) -> bool {
author	Mazdak Farrokhzad <twingoow@gmail.com>	2019-10-28 21:35:58 +0100
committer	GitHub <noreply@github.com>	2019-10-28 21:35:58 +0100
commit	2fe6f22dea25dd7498b2acb0f0156563644a13f2 (patch)
tree	1b750d9b784fc10e9690b0a6d853edc2c5d1832c /src/libsyntax/parse
parent	eec3a9c4af5067c37f5b681599a48b4e21098260 (diff)
parent	e8016c2b13a7e16b6eed9e30b4b6bfe304750566 (diff)
download	rust-2fe6f22dea25dd7498b2acb0f0156563644a13f2.tar.gz rust-2fe6f22dea25dd7498b2acb0f0156563644a13f2.zip