diff options
| author | Matthias Krüger <matthias.krueger@famsik.de> | 2024-08-09 18:25:00 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-08-09 18:25:00 +0200 |
| commit | 665a1a4b55560ceb61d77a4c2e2e52ea5cd63b53 (patch) | |
| tree | ece7979bec4d7a3129e0de6454b63fd45a9fc096 | |
| parent | 9eb77ac3e06d54aa94789982d8df499f649d6bc1 (diff) | |
| parent | d65f1316bba2b3cdccce3f0d7d3e2eb27b5fbe49 (diff) | |
| download | rust-665a1a4b55560ceb61d77a4c2e2e52ea5cd63b53.tar.gz rust-665a1a4b55560ceb61d77a4c2e2e52ea5cd63b53.zip | |
Rollup merge of #128865 - jieyouxu:unicurd, r=Urgau
Ensure let stmt compound assignment removal suggestion respect codepoint boundaries Previously we would try to issue a suggestion for `let x <op>= 1`, i.e. a compound assignment within a `let` binding, to remove the `<op>`. The suggestion code unfortunately incorrectly assumed that the `<op>` is an exactly-1-byte ASCII character, but this assumption is incorrect because we also recover Unicode-confusables like `➖=` as `-=`. In this example, the suggestion code used a `+ BytePos(1)` to calculate the span of the `<op>` codepoint that looks like `-` but the mult-byte Unicode look-alike would cause the suggested removal span to be inside a multi-byte codepoint boundary, triggering a codepoint boundary assertion. The fix is to use `SourceMap::start_point(token_span)` which properly accounts for codepoint boundaries. Fixes #128845. cc #128790 r? ````@fmease````
| -rw-r--r-- | compiler/rustc_parse/src/parser/stmt.rs | 8 | ||||
| -rw-r--r-- | tests/ui/parser/suggest-remove-compount-assign-let-ice.rs | 16 | ||||
| -rw-r--r-- | tests/ui/parser/suggest-remove-compount-assign-let-ice.stderr | 26 |
3 files changed, 48 insertions, 2 deletions
diff --git a/compiler/rustc_parse/src/parser/stmt.rs b/compiler/rustc_parse/src/parser/stmt.rs index b3efb87a4a2..a3b782d651d 100644 --- a/compiler/rustc_parse/src/parser/stmt.rs +++ b/compiler/rustc_parse/src/parser/stmt.rs @@ -408,10 +408,14 @@ impl<'a> Parser<'a> { fn parse_initializer(&mut self, eq_optional: bool) -> PResult<'a, Option<P<Expr>>> { let eq_consumed = match self.token.kind { token::BinOpEq(..) => { - // Recover `let x <op>= 1` as `let x = 1` + // Recover `let x <op>= 1` as `let x = 1` We must not use `+ BytePos(1)` here + // because `<op>` can be a multi-byte lookalike that was recovered, e.g. `➖=` (the + // `➖` is a U+2796 Heavy Minus Sign Unicode Character) that was recovered as a + // `-=`. + let extra_op_span = self.psess.source_map().start_point(self.token.span); self.dcx().emit_err(errors::CompoundAssignmentExpressionInLet { span: self.token.span, - suggestion: self.token.span.with_hi(self.token.span.lo() + BytePos(1)), + suggestion: extra_op_span, }); self.bump(); true diff --git a/tests/ui/parser/suggest-remove-compount-assign-let-ice.rs b/tests/ui/parser/suggest-remove-compount-assign-let-ice.rs new file mode 100644 index 00000000000..1affee5678e --- /dev/null +++ b/tests/ui/parser/suggest-remove-compount-assign-let-ice.rs @@ -0,0 +1,16 @@ +//! Previously we would try to issue a suggestion for `let x <op>= 1`, i.e. a compound assignment +//! within a `let` binding, to remove the `<op>`. The suggestion code unfortunately incorrectly +//! assumed that the `<op>` is an exactly-1-byte ASCII character, but this assumption is incorrect +//! because we also recover Unicode-confusables like `➖=` as `-=`. In this example, the suggestion +//! code used a `+ BytePos(1)` to calculate the span of the `<op>` codepoint that looks like `-` but +//! the mult-byte Unicode look-alike would cause the suggested removal span to be inside a +//! multi-byte codepoint boundary, triggering a codepoint boundary assertion. +//! +//! issue: rust-lang/rust#128845 + +fn main() { + // Adapted from #128845 but with irrelevant components removed and simplified. + let x ➖= 1; + //~^ ERROR unknown start of token: \u{2796} + //~| ERROR: can't reassign to an uninitialized variable +} diff --git a/tests/ui/parser/suggest-remove-compount-assign-let-ice.stderr b/tests/ui/parser/suggest-remove-compount-assign-let-ice.stderr new file mode 100644 index 00000000000..59716d69b50 --- /dev/null +++ b/tests/ui/parser/suggest-remove-compount-assign-let-ice.stderr @@ -0,0 +1,26 @@ +error: unknown start of token: \u{2796} + --> $DIR/suggest-remove-compount-assign-let-ice.rs:13:11 + | +LL | let x ➖= 1; + | ^^ + | +help: Unicode character '➖' (Heavy Minus Sign) looks like '-' (Minus/Hyphen), but it is not + | +LL | let x -= 1; + | ~ + +error: can't reassign to an uninitialized variable + --> $DIR/suggest-remove-compount-assign-let-ice.rs:13:11 + | +LL | let x ➖= 1; + | ^^^ + | + = help: if you meant to overwrite, remove the `let` binding +help: initialize the variable + | +LL - let x ➖= 1; +LL + let x = 1; + | + +error: aborting due to 2 previous errors + |
