Use string literal directly when available in format

Previous implementation used the `Parser::parse_expr` function in order to extract the format expression. If the first comma following the format expression was mistakenly replaced with a dot, then the next format expression was eaten by the function, because it looked as a syntactically valid expression, which resulted in incorrectly spanned error messages. The way the format expression is exctracted is changed: we first look at the first available token in the first argument supplied to the `format!` macro call. If it is a string literal, then it is promoted as a format expression immediatly, otherwise we fall back to the original `parse_expr`-related method. This allows us to ensure that the parser won't consume too much tokens when a typo is made. A test has been created so that it is ensured that the issue is properly fixed.
author: Sasha <sasha.pourcelot@protonmail.com> 2020-08-28 23:04:42 +0200
committer: Sasha <sasha.pourcelot@protonmail.com> 2020-08-30 22:09:58 +0200
commit: f6d18db402cfebcf5c1bdc8e730f3923b3bb0421 (patch)
tree: 75a6755f7e97b1a5e7da581a8361ee65193b6b25
parent: 85fbf49ce0e2274d0acf798f6e703747674feec3 (diff)
download: rust-f6d18db402cfebcf5c1bdc8e730f3923b3bb0421.tar.gz
rust-f6d18db402cfebcf5c1bdc8e730f3923b3bb0421.zip
4 files changed, 75 insertions, 2 deletions
diff --git a/compiler/rustc_builtin_macros/src/format.rs b/compiler/rustc_builtin_macros/src/format.rs
index 373277f525d..48506148ed9 100644
--- a/compiler/rustc_builtin_macros/src/format.rs
+++ b/compiler/rustc_builtin_macros/src/format.rs
@@ -135,7 +135,26 @@ fn parse_args<'a>(
         return Err(ecx.struct_span_err(sp, "requires at least a format string argument"));
     }
 
-    let fmtstr = p.parse_expr()?;
+    let first_token = &p.token;
+    let fmtstr = match first_token.kind {
+        token::TokenKind::Literal(token::Lit {
+            kind: token::LitKind::Str | token::LitKind::StrRaw(_),
+            ..
+        }) => {
+            // If the first token is a string literal, then a format expression
+            // is constructed from it.
+            //
+            // This allows us to properly handle cases when the first comma
+            // after the format string is mistakenly replaced with any operator,
+            // which cause the expression parser to eat too much tokens.
+            p.parse_literal_maybe_minus()?
+        }
+        _ => {
+            // Otherwise, we fall back to the expression parser.
+            p.parse_expr()?
+        }
+    };
+
     let mut first = true;
     let mut named = false;
 
diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs
index f022c628fe2..69d13b5cf53 100644
--- a/compiler/rustc_parse/src/parser/expr.rs
+++ b/compiler/rustc_parse/src/parser/expr.rs
@@ -1480,7 +1480,7 @@ impl<'a> Parser<'a> {
 
     /// Matches `'-' lit | lit` (cf. `ast_validation::AstValidator::check_expr_within_pat`).
     /// Keep this in sync with `Token::can_begin_literal_maybe_minus`.
-    pub(super) fn parse_literal_maybe_minus(&mut self) -> PResult<'a, P<Expr>> {
+    pub fn parse_literal_maybe_minus(&mut self) -> PResult<'a, P<Expr>> {
         maybe_whole_expr!(self);
 
         let lo = self.token.span;
diff --git a/src/test/ui/fmt/incorrect-first-separator.rs b/src/test/ui/fmt/incorrect-first-separator.rs
new file mode 100644
index 00000000000..0b097fdfab8
--- /dev/null
+++ b/src/test/ui/fmt/incorrect-first-separator.rs
@@ -0,0 +1,22 @@
+// Allows to track issue #75492:
+// https://github.com/rust-lang/rust/issues/75492
+
+use std::iter;
+
+fn main() {
+    format!("A number: {}". iter::once(42).next().unwrap());
+    //~^ ERROR expected token: `,`
+
+    // Other kind of types are also checked:
+
+    format!("A number: {}" / iter::once(42).next().unwrap());
+    //~^ ERROR expected token: `,`
+
+    format!("A number: {}"; iter::once(42).next().unwrap());
+    //~^ ERROR expected token: `,`
+
+    // Note: this character is an COMBINING COMMA BELOW unicode char
+    format!("A number: {}" ̦ iter::once(42).next().unwrap());
+    //~^ ERROR expected token: `,`
+    //~^^ ERROR unknown start of token: \u{326}
+}
diff --git a/src/test/ui/fmt/incorrect-first-separator.stderr b/src/test/ui/fmt/incorrect-first-separator.stderr
new file mode 100644
index 00000000000..60d2a82855e
--- /dev/null
+++ b/src/test/ui/fmt/incorrect-first-separator.stderr
@@ -0,0 +1,32 @@
+error: unknown start of token: \u{326}
+  --> $DIR/incorrect-first-separator.rs:19:28
+   |
+LL |     format!("A number: {}" ̦ iter::once(42).next().unwrap());
+   |                            ^
+
+error: expected token: `,`
+  --> $DIR/incorrect-first-separator.rs:7:27
+   |
+LL |     format!("A number: {}". iter::once(42).next().unwrap());
+   |                           ^ expected `,`
+
+error: expected token: `,`
+  --> $DIR/incorrect-first-separator.rs:12:28
+   |
+LL |     format!("A number: {}" / iter::once(42).next().unwrap());
+   |                            ^ expected `,`
+
+error: expected token: `,`
+  --> $DIR/incorrect-first-separator.rs:15:27
+   |
+LL |     format!("A number: {}"; iter::once(42).next().unwrap());
+   |                           ^ expected `,`
+
+error: expected token: `,`
+  --> $DIR/incorrect-first-separator.rs:19:30
+   |
+LL |     format!("A number: {}" ̦ iter::once(42).next().unwrap());
+   |                             ^^^^ expected `,`
+
+error: aborting due to 5 previous errors
+
author	Sasha <sasha.pourcelot@protonmail.com>	2020-08-28 23:04:42 +0200
committer	Sasha <sasha.pourcelot@protonmail.com>	2020-08-30 22:09:58 +0200
commit	f6d18db402cfebcf5c1bdc8e730f3923b3bb0421 (patch)
tree	75a6755f7e97b1a5e7da581a8361ee65193b6b25
parent	85fbf49ce0e2274d0acf798f6e703747674feec3 (diff)
download	rust-f6d18db402cfebcf5c1bdc8e730f3923b3bb0421.tar.gz rust-f6d18db402cfebcf5c1bdc8e730f3923b3bb0421.zip