about summary refs log tree commit diff
path: root/src/libsyntax
diff options
context:
space:
mode:
authorMazdak Farrokhzad <twingoow@gmail.com>2019-05-09 23:56:09 +0200
committerGitHub <noreply@github.com>2019-05-09 23:56:09 +0200
commit39edc68c690b2711def27f229ad3d113c55f6354 (patch)
treea0ca4ffa4446344ec7af9c8292ddf259c0b4e120 /src/libsyntax
parent62ab971bb7143601a8734a9d3f072232e46eb973 (diff)
parent54430ad53ab00bf86090a8d11844db1c40b2ca24 (diff)
downloadrust-39edc68c690b2711def27f229ad3d113c55f6354.tar.gz
rust-39edc68c690b2711def27f229ad3d113c55f6354.zip
Rollup merge of #60188 - estebank:recover-block, r=varkor
Identify when a stmt could have been parsed as an expr

There are some expressions that can be parsed as a statement without
a trailing semicolon depending on the context, which can lead to
confusing errors due to the same looking code being accepted in some
places and not others. Identify these cases and suggest enclosing in
parenthesis making the parse non-ambiguous without changing the
accepted grammar.

Fix #54186, cc #54482, fix #59975, fix #47287.
Diffstat (limited to 'src/libsyntax')
-rw-r--r--src/libsyntax/parse/lexer/mod.rs3
-rw-r--r--src/libsyntax/parse/mod.rs27
-rw-r--r--src/libsyntax/parse/parser.rs53
-rw-r--r--src/libsyntax/util/parser.rs25
4 files changed, 101 insertions, 7 deletions
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index 3c342c2ff26..2882acb0e78 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -1598,7 +1598,7 @@ mod tests {
     use std::io;
     use std::path::PathBuf;
     use syntax_pos::{BytePos, Span, NO_EXPANSION};
-    use rustc_data_structures::fx::FxHashSet;
+    use rustc_data_structures::fx::{FxHashSet, FxHashMap};
     use rustc_data_structures::sync::Lock;
 
     fn mk_sess(sm: Lrc<SourceMap>) -> ParseSess {
@@ -1617,6 +1617,7 @@ mod tests {
             raw_identifier_spans: Lock::new(Vec::new()),
             registered_diagnostics: Lock::new(ErrorMap::new()),
             buffered_lints: Lock::new(vec![]),
+            ambiguous_block_expr_parse: Lock::new(FxHashMap::default()),
         }
     }
 
diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs
index 8c2fab9ada7..be44b964ba5 100644
--- a/src/libsyntax/parse/mod.rs
+++ b/src/libsyntax/parse/mod.rs
@@ -11,12 +11,12 @@ use crate::tokenstream::{TokenStream, TokenTree};
 use crate::diagnostics::plugin::ErrorMap;
 use crate::print::pprust::token_to_string;
 
-use errors::{FatalError, Level, Handler, ColorConfig, Diagnostic, DiagnosticBuilder};
+use errors::{Applicability, FatalError, Level, Handler, ColorConfig, Diagnostic, DiagnosticBuilder};
 use rustc_data_structures::sync::{Lrc, Lock};
 use syntax_pos::{Span, SourceFile, FileName, MultiSpan};
 use log::debug;
 
-use rustc_data_structures::fx::FxHashSet;
+use rustc_data_structures::fx::{FxHashSet, FxHashMap};
 use std::borrow::Cow;
 use std::path::{Path, PathBuf};
 use std::str;
@@ -52,6 +52,10 @@ pub struct ParseSess {
     included_mod_stack: Lock<Vec<PathBuf>>,
     source_map: Lrc<SourceMap>,
     pub buffered_lints: Lock<Vec<BufferedEarlyLint>>,
+    /// Contains the spans of block expressions that could have been incomplete based on the
+    /// operation token that followed it, but that the parser cannot identify without further
+    /// analysis.
+    pub ambiguous_block_expr_parse: Lock<FxHashMap<Span, Span>>,
 }
 
 impl ParseSess {
@@ -75,6 +79,7 @@ impl ParseSess {
             included_mod_stack: Lock::new(vec![]),
             source_map,
             buffered_lints: Lock::new(vec![]),
+            ambiguous_block_expr_parse: Lock::new(FxHashMap::default()),
         }
     }
 
@@ -98,6 +103,24 @@ impl ParseSess {
             });
         });
     }
+
+    /// Extend an error with a suggestion to wrap an expression with parentheses to allow the
+    /// parser to continue parsing the following operation as part of the same expression.
+    pub fn expr_parentheses_needed(
+        &self,
+        err: &mut DiagnosticBuilder<'_>,
+        span: Span,
+        alt_snippet: Option<String>,
+    ) {
+        if let Some(snippet) = self.source_map().span_to_snippet(span).ok().or(alt_snippet) {
+            err.span_suggestion(
+                span,
+                "parentheses are required to parse this as an expression",
+                format!("({})", snippet),
+                Applicability::MachineApplicable,
+            );
+        }
+    }
 }
 
 #[derive(Clone)]
diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs
index f3eac71ee77..7b19bb760e0 100644
--- a/src/libsyntax/parse/parser.rs
+++ b/src/libsyntax/parse/parser.rs
@@ -186,6 +186,7 @@ enum PrevTokenKind {
     Interpolated,
     Eof,
     Ident,
+    BitOr,
     Other,
 }
 
@@ -1375,6 +1376,7 @@ impl<'a> Parser<'a> {
             token::DocComment(..) => PrevTokenKind::DocComment,
             token::Comma => PrevTokenKind::Comma,
             token::BinOp(token::Plus) => PrevTokenKind::Plus,
+            token::BinOp(token::Or) => PrevTokenKind::BitOr,
             token::Interpolated(..) => PrevTokenKind::Interpolated,
             token::Eof => PrevTokenKind::Eof,
             token::Ident(..) => PrevTokenKind::Ident,
@@ -2806,6 +2808,12 @@ impl<'a> Parser<'a> {
                             let msg = format!("expected expression, found {}",
                                               self.this_token_descr());
                             let mut err = self.fatal(&msg);
+                            let sp = self.sess.source_map().start_point(self.span);
+                            if let Some(sp) = self.sess.ambiguous_block_expr_parse.borrow()
+                                .get(&sp)
+                            {
+                                self.sess.expr_parentheses_needed(&mut err, *sp, None);
+                            }
                             err.span_label(self.span, "expected expression");
                             return Err(err);
                         }
@@ -2845,7 +2853,7 @@ impl<'a> Parser<'a> {
                     "struct literals are not allowed here",
                 );
                 err.multipart_suggestion(
-                    "surround the struct literal with parenthesis",
+                    "surround the struct literal with parentheses",
                     vec![
                         (lo.shrink_to_lo(), "(".to_string()),
                         (expr.span.shrink_to_hi(), ")".to_string()),
@@ -3506,9 +3514,42 @@ impl<'a> Parser<'a> {
             }
         };
 
-        if self.expr_is_complete(&lhs) {
-            // Semi-statement forms are odd. See https://github.com/rust-lang/rust/issues/29071
-            return Ok(lhs);
+        match (self.expr_is_complete(&lhs), AssocOp::from_token(&self.token)) {
+            (true, None) => {
+                // Semi-statement forms are odd. See https://github.com/rust-lang/rust/issues/29071
+                return Ok(lhs);
+            }
+            (false, _) => {} // continue parsing the expression
+            // An exhaustive check is done in the following block, but these are checked first
+            // because they *are* ambiguous but also reasonable looking incorrect syntax, so we
+            // want to keep their span info to improve diagnostics in these cases in a later stage.
+            (true, Some(AssocOp::Multiply)) | // `{ 42 } *foo = bar;` or `{ 42 } * 3`
+            (true, Some(AssocOp::Subtract)) | // `{ 42 } -5`
+            (true, Some(AssocOp::Add)) => { // `{ 42 } + 42
+                // These cases are ambiguous and can't be identified in the parser alone
+                let sp = self.sess.source_map().start_point(self.span);
+                self.sess.ambiguous_block_expr_parse.borrow_mut().insert(sp, lhs.span);
+                return Ok(lhs);
+            }
+            (true, Some(ref op)) if !op.can_continue_expr_unambiguously() => {
+                return Ok(lhs);
+            }
+            (true, Some(_)) => {
+                // We've found an expression that would be parsed as a statement, but the next
+                // token implies this should be parsed as an expression.
+                // For example: `if let Some(x) = x { x } else { 0 } / 2`
+                let mut err = self.sess.span_diagnostic.struct_span_err(self.span, &format!(
+                    "expected expression, found `{}`",
+                    pprust::token_to_string(&self.token),
+                ));
+                err.span_label(self.span, "expected expression");
+                self.sess.expr_parentheses_needed(
+                    &mut err,
+                    lhs.span,
+                    Some(pprust::expr_to_string(&lhs),
+                ));
+                err.emit();
+            }
         }
         self.expected_tokens.push(TokenType::Operator);
         while let Some(op) = AssocOp::from_token(&self.token) {
@@ -4819,6 +4860,10 @@ impl<'a> Parser<'a> {
                         );
                         let mut err = self.fatal(&msg);
                         err.span_label(self.span, format!("expected {}", expected));
+                        let sp = self.sess.source_map().start_point(self.span);
+                        if let Some(sp) = self.sess.ambiguous_block_expr_parse.borrow().get(&sp) {
+                            self.sess.expr_parentheses_needed(&mut err, *sp, None);
+                        }
                         return Err(err);
                     }
                 }
diff --git a/src/libsyntax/util/parser.rs b/src/libsyntax/util/parser.rs
index 80dabffaba9..86e89945afe 100644
--- a/src/libsyntax/util/parser.rs
+++ b/src/libsyntax/util/parser.rs
@@ -207,6 +207,31 @@ impl AssocOp {
             ObsoleteInPlace | Assign | AssignOp(_) | As | DotDot | DotDotEq | Colon => None
         }
     }
+
+    /// This operator could be used to follow a block unambiguously.
+    ///
+    /// This is used for error recovery at the moment, providing a suggestion to wrap blocks with
+    /// parentheses while having a high degree of confidence on the correctness of the suggestion.
+    pub fn can_continue_expr_unambiguously(&self) -> bool {
+        use AssocOp::*;
+        match self {
+            BitXor | // `{ 42 } ^ 3`
+            Assign | // `{ 42 } = { 42 }`
+            Divide | // `{ 42 } / 42`
+            Modulus | // `{ 42 } % 2`
+            ShiftRight | // `{ 42 } >> 2`
+            LessEqual | // `{ 42 } <= 3`
+            Greater | // `{ 42 } > 3`
+            GreaterEqual | // `{ 42 } >= 3`
+            AssignOp(_) | // `{ 42 } +=`
+            LAnd | // `{ 42 } &&foo`
+            As | // `{ 42 } as usize`
+            // Equal | // `{ 42 } == { 42 }`    Accepting these here would regress incorrect
+            // NotEqual | // `{ 42 } != { 42 }  struct literals parser recovery.
+            Colon => true, // `{ 42 }: usize`
+            _ => false,
+        }
+    }
 }
 
 pub const PREC_RESET: i8 = -100;