about summary refs log tree commit diff
diff options
context:
space:
mode:
authorGuillaume Gomez <guillaume1.gomez@gmail.com>2024-01-30 11:19:19 +0100
committerGitHub <noreply@github.com>2024-01-30 11:19:19 +0100
commitc00192ae2a9a20125df082576e2d6c0587da3836 (patch)
tree8fa69944fe8056d00e5831e984555efeed5236cf
parentd5e8d85249b86e1826bbfc1977759a5a182c6ea8 (diff)
parent306612ea60850249e0ad9e7a14fb1ce75fcd944e (diff)
downloadrust-c00192ae2a9a20125df082576e2d6c0587da3836.tar.gz
rust-c00192ae2a9a20125df082576e2d6c0587da3836.zip
Rollup merge of #120460 - nnethercote:fix-120397, r=compiler-errors
Be more careful about interpreting a label/lifetime as a mistyped char literal.

Currently the parser interprets any label/lifetime in certain positions as a mistyped char literal, on the assumption that the trailing single quote was accidentally omitted. In such cases it gives an error with a suggestion to add the trailing single quote, and then puts the appropriate char literal into the AST. This behaviour was introduced in #101293.

This is reasonable for a case like this:
```
let c = 'a;
```
because `'a'` is a valid char literal. It's less reasonable for a case like this:
```
let c = 'abc;
```
because `'abc'` is not a valid char literal.

Prior to #120329 this could result in some sub-optimal suggestions in error messages, but nothing else. But #120329 changed `LitKind::from_token_lit` to assume that the char/byte/string literals it receives are valid, and to assert if not. This is reasonable because the lexer does not produce invalid char/byte/string literals in general. But in this "interpret label/lifetime as unclosed char literal" case the parser can produce an invalid char literal with contents such as `abc`, which triggers an assertion failure.

This PR changes the parser so it's more cautious about interpreting labels/lifetimes as unclosed char literals.

Fixes #120397.

r? `@compiler-errors`
-rw-r--r--compiler/rustc_parse/src/parser/expr.rs31
-rw-r--r--compiler/rustc_parse/src/parser/pat.rs4
-rw-r--r--tests/ui/parser/label-is-actually-char.rs41
-rw-r--r--tests/ui/parser/label-is-actually-char.stderr73
4 files changed, 119 insertions, 30 deletions
diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs
index c395decab12..9f748e2a3fe 100644
--- a/compiler/rustc_parse/src/parser/expr.rs
+++ b/compiler/rustc_parse/src/parser/expr.rs
@@ -28,6 +28,7 @@ use rustc_data_structures::stack::ensure_sufficient_stack;
 use rustc_errors::{
     AddToDiagnostic, Applicability, Diagnostic, DiagnosticBuilder, PResult, StashKey,
 };
+use rustc_lexer::unescape::unescape_char;
 use rustc_macros::Subdiagnostic;
 use rustc_session::errors::{report_lit_error, ExprParenthesesNeeded};
 use rustc_session::lint::builtin::BREAK_WITH_LABEL_AND_LOOP;
@@ -1665,6 +1666,7 @@ impl<'a> Parser<'a> {
             && self.may_recover()
             && (matches!(self.token.kind, token::CloseDelim(_) | token::Comma)
                 || self.token.is_punct())
+            && could_be_unclosed_char_literal(label_.ident)
         {
             let (lit, _) =
                 self.recover_unclosed_char(label_.ident, Parser::mk_token_lit_char, |self_| {
@@ -1750,16 +1752,17 @@ impl<'a> Parser<'a> {
         Ok(expr)
     }
 
-    /// Emit an error when a char is parsed as a lifetime because of a missing quote.
+    /// Emit an error when a char is parsed as a lifetime or label because of a missing quote.
     pub(super) fn recover_unclosed_char<L>(
         &self,
-        lifetime: Ident,
+        ident: Ident,
         mk_lit_char: impl FnOnce(Symbol, Span) -> L,
         err: impl FnOnce(&Self) -> DiagnosticBuilder<'a>,
     ) -> L {
-        if let Some(diag) = self.dcx().steal_diagnostic(lifetime.span, StashKey::LifetimeIsChar) {
+        assert!(could_be_unclosed_char_literal(ident));
+        if let Some(diag) = self.dcx().steal_diagnostic(ident.span, StashKey::LifetimeIsChar) {
             diag.with_span_suggestion_verbose(
-                lifetime.span.shrink_to_hi(),
+                ident.span.shrink_to_hi(),
                 "add `'` to close the char literal",
                 "'",
                 Applicability::MaybeIncorrect,
@@ -1768,15 +1771,15 @@ impl<'a> Parser<'a> {
         } else {
             err(self)
                 .with_span_suggestion_verbose(
-                    lifetime.span.shrink_to_hi(),
+                    ident.span.shrink_to_hi(),
                     "add `'` to close the char literal",
                     "'",
                     Applicability::MaybeIncorrect,
                 )
                 .emit();
         }
-        let name = lifetime.without_first_quote().name;
-        mk_lit_char(name, lifetime.span)
+        let name = ident.without_first_quote().name;
+        mk_lit_char(name, ident.span)
     }
 
     /// Recover on the syntax `do catch { ... }` suggesting `try { ... }` instead.
@@ -2047,8 +2050,11 @@ impl<'a> Parser<'a> {
             let msg = format!("unexpected token: {}", super::token_descr(&token));
             self_.dcx().struct_span_err(token.span, msg)
         };
-        // On an error path, eagerly consider a lifetime to be an unclosed character lit
-        if self.token.is_lifetime() {
+        // On an error path, eagerly consider a lifetime to be an unclosed character lit, if that
+        // makes sense.
+        if let Some(ident) = self.token.lifetime()
+            && could_be_unclosed_char_literal(ident)
+        {
             let lt = self.expect_lifetime();
             Ok(self.recover_unclosed_char(lt.ident, mk_lit_char, err))
         } else {
@@ -3776,6 +3782,13 @@ impl<'a> Parser<'a> {
     }
 }
 
+/// Could this lifetime/label be an unclosed char literal? For example, `'a`
+/// could be, but `'abc` could not.
+pub(crate) fn could_be_unclosed_char_literal(ident: Ident) -> bool {
+    ident.name.as_str().starts_with('\'')
+        && unescape_char(ident.without_first_quote().name.as_str()).is_ok()
+}
+
 /// Used to forbid `let` expressions in certain syntactic locations.
 #[derive(Clone, Copy, Subdiagnostic)]
 pub(crate) enum ForbiddenLetReason {
diff --git a/compiler/rustc_parse/src/parser/pat.rs b/compiler/rustc_parse/src/parser/pat.rs
index d04921dde54..12260ec95a5 100644
--- a/compiler/rustc_parse/src/parser/pat.rs
+++ b/compiler/rustc_parse/src/parser/pat.rs
@@ -10,6 +10,7 @@ use crate::errors::{
     UnexpectedParenInRangePat, UnexpectedParenInRangePatSugg,
     UnexpectedVertVertBeforeFunctionParam, UnexpectedVertVertInPattern,
 };
+use crate::parser::expr::could_be_unclosed_char_literal;
 use crate::{maybe_recover_from_interpolated_ty_qpath, maybe_whole};
 use rustc_ast::mut_visit::{noop_visit_pat, MutVisitor};
 use rustc_ast::ptr::P;
@@ -535,11 +536,12 @@ impl<'a> Parser<'a> {
                     None => PatKind::Path(qself, path),
                 }
             }
-        } else if matches!(self.token.kind, token::Lifetime(_))
+        } else if let token::Lifetime(lt) = self.token.kind
             // In pattern position, we're totally fine with using "next token isn't colon"
             // as a heuristic. We could probably just always try to recover if it's a lifetime,
             // because we never have `'a: label {}` in a pattern position anyways, but it does
             // keep us from suggesting something like `let 'a: Ty = ..` => `let 'a': Ty = ..`
+            && could_be_unclosed_char_literal(Ident::with_dummy_span(lt))
             && !self.look_ahead(1, |token| matches!(token.kind, token::Colon))
         {
             // Recover a `'a` as a `'a'` literal
diff --git a/tests/ui/parser/label-is-actually-char.rs b/tests/ui/parser/label-is-actually-char.rs
index 183da603da4..74df898d191 100644
--- a/tests/ui/parser/label-is-actually-char.rs
+++ b/tests/ui/parser/label-is-actually-char.rs
@@ -1,16 +1,43 @@
+// Note: it's ok to interpret 'a as 'a', but but not ok to interpret 'abc as
+// 'abc' because 'abc' is not a valid char literal.
+
 fn main() {
     let c = 'a;
     //~^ ERROR expected `while`, `for`, `loop` or `{` after a label
     //~| HELP add `'` to close the char literal
-    match c {
+
+    let c = 'abc;
+    //~^ ERROR expected `while`, `for`, `loop` or `{` after a label
+    //~| ERROR expected expression, found `;`
+}
+
+fn f() {
+    match 'a' {
         'a'..='b => {}
         //~^ ERROR unexpected token: `'b`
         //~| HELP add `'` to close the char literal
-        _ => {}
+        'c'..='def => {}
+        //~^ ERROR unexpected token: `'def`
     }
-    let x = ['a, 'b];
-    //~^ ERROR expected `while`, `for`, `loop` or `{` after a label
-    //~| ERROR expected `while`, `for`, `loop` or `{` after a label
-    //~| HELP add `'` to close the char literal
-    //~| HELP add `'` to close the char literal
+}
+
+fn g() {
+   match 'g' {
+       'g => {}
+       //~^ ERROR expected pattern, found `=>`
+       //~| HELP add `'` to close the char literal
+       'hij => {}
+       //~^ ERROR expected pattern, found `'hij`
+       _ => {}
+   }
+}
+
+fn h() {
+   let x = ['a, 'b, 'cde];
+   //~^ ERROR expected `while`, `for`, `loop` or `{` after a label
+   //~| HELP add `'` to close the char literal
+   //~| ERROR expected `while`, `for`, `loop` or `{` after a label
+   //~| HELP add `'` to close the char literal
+   //~| ERROR expected `while`, `for`, `loop` or `{` after a label
+   //~| ERROR expected expression, found `]`
 }
diff --git a/tests/ui/parser/label-is-actually-char.stderr b/tests/ui/parser/label-is-actually-char.stderr
index 28c8d2ada3a..10a7e1803b5 100644
--- a/tests/ui/parser/label-is-actually-char.stderr
+++ b/tests/ui/parser/label-is-actually-char.stderr
@@ -1,5 +1,5 @@
 error: expected `while`, `for`, `loop` or `{` after a label
-  --> $DIR/label-is-actually-char.rs:2:15
+  --> $DIR/label-is-actually-char.rs:5:15
    |
 LL |     let c = 'a;
    |               ^ expected `while`, `for`, `loop` or `{` after a label
@@ -9,8 +9,20 @@ help: add `'` to close the char literal
 LL |     let c = 'a';
    |               +
 
+error: expected `while`, `for`, `loop` or `{` after a label
+  --> $DIR/label-is-actually-char.rs:9:17
+   |
+LL |     let c = 'abc;
+   |                 ^ expected `while`, `for`, `loop` or `{` after a label
+
+error: expected expression, found `;`
+  --> $DIR/label-is-actually-char.rs:9:17
+   |
+LL |     let c = 'abc;
+   |                 ^ expected expression
+
 error: unexpected token: `'b`
-  --> $DIR/label-is-actually-char.rs:6:15
+  --> $DIR/label-is-actually-char.rs:16:15
    |
 LL |         'a'..='b => {}
    |               ^^
@@ -20,27 +32,62 @@ help: add `'` to close the char literal
 LL |         'a'..='b' => {}
    |                 +
 
+error: unexpected token: `'def`
+  --> $DIR/label-is-actually-char.rs:19:15
+   |
+LL |         'c'..='def => {}
+   |               ^^^^
+
+error: expected pattern, found `=>`
+  --> $DIR/label-is-actually-char.rs:26:11
+   |
+LL |        'g => {}
+   |           ^^ expected pattern
+   |
+help: add `'` to close the char literal
+   |
+LL |        'g' => {}
+   |          +
+
+error: expected pattern, found `'hij`
+  --> $DIR/label-is-actually-char.rs:29:8
+   |
+LL |        'hij => {}
+   |        ^^^^ expected pattern
+
 error: expected `while`, `for`, `loop` or `{` after a label
-  --> $DIR/label-is-actually-char.rs:11:16
+  --> $DIR/label-is-actually-char.rs:36:15
    |
-LL |     let x = ['a, 'b];
-   |                ^ expected `while`, `for`, `loop` or `{` after a label
+LL |    let x = ['a, 'b, 'cde];
+   |               ^ expected `while`, `for`, `loop` or `{` after a label
    |
 help: add `'` to close the char literal
    |
-LL |     let x = ['a', 'b];
-   |                +
+LL |    let x = ['a', 'b, 'cde];
+   |               +
 
 error: expected `while`, `for`, `loop` or `{` after a label
-  --> $DIR/label-is-actually-char.rs:11:20
+  --> $DIR/label-is-actually-char.rs:36:19
    |
-LL |     let x = ['a, 'b];
-   |                    ^ expected `while`, `for`, `loop` or `{` after a label
+LL |    let x = ['a, 'b, 'cde];
+   |                   ^ expected `while`, `for`, `loop` or `{` after a label
    |
 help: add `'` to close the char literal
    |
-LL |     let x = ['a, 'b'];
-   |                    +
+LL |    let x = ['a, 'b', 'cde];
+   |                   +
+
+error: expected `while`, `for`, `loop` or `{` after a label
+  --> $DIR/label-is-actually-char.rs:36:25
+   |
+LL |    let x = ['a, 'b, 'cde];
+   |                         ^ expected `while`, `for`, `loop` or `{` after a label
+
+error: expected expression, found `]`
+  --> $DIR/label-is-actually-char.rs:36:25
+   |
+LL |    let x = ['a, 'b, 'cde];
+   |                         ^ expected expression
 
-error: aborting due to 4 previous errors
+error: aborting due to 11 previous errors