about summary refs log tree commit diff
path: root/compiler/rustc_parse/src
diff options
context:
space:
mode:
authorMatthias Krüger <matthias.krueger@famsik.de>2024-08-09 18:25:00 +0200
committerGitHub <noreply@github.com>2024-08-09 18:25:00 +0200
commit665a1a4b55560ceb61d77a4c2e2e52ea5cd63b53 (patch)
treeece7979bec4d7a3129e0de6454b63fd45a9fc096 /compiler/rustc_parse/src
parent9eb77ac3e06d54aa94789982d8df499f649d6bc1 (diff)
parentd65f1316bba2b3cdccce3f0d7d3e2eb27b5fbe49 (diff)
downloadrust-665a1a4b55560ceb61d77a4c2e2e52ea5cd63b53.tar.gz
rust-665a1a4b55560ceb61d77a4c2e2e52ea5cd63b53.zip
Rollup merge of #128865 - jieyouxu:unicurd, r=Urgau
Ensure let stmt compound assignment removal suggestion respect codepoint boundaries

Previously we would try to issue a suggestion for `let x <op>= 1`, i.e.
a compound assignment within a `let` binding, to remove the `<op>`. The
suggestion code unfortunately incorrectly assumed that the `<op>` is an
exactly-1-byte ASCII character, but this assumption is incorrect because
we also recover Unicode-confusables like `➖=` as `-=`. In this example,
the suggestion code used a `+ BytePos(1)` to calculate the span of the
`<op>` codepoint that looks like `-` but the mult-byte Unicode
look-alike would cause the suggested removal span to be inside a
multi-byte codepoint boundary, triggering a codepoint boundary
assertion.

The fix is to use `SourceMap::start_point(token_span)` which properly accounts for codepoint boundaries.

Fixes #128845.

cc #128790

r? ````@fmease````
Diffstat (limited to 'compiler/rustc_parse/src')
-rw-r--r--compiler/rustc_parse/src/parser/stmt.rs8
1 files changed, 6 insertions, 2 deletions
diff --git a/compiler/rustc_parse/src/parser/stmt.rs b/compiler/rustc_parse/src/parser/stmt.rs
index b3efb87a4a2..a3b782d651d 100644
--- a/compiler/rustc_parse/src/parser/stmt.rs
+++ b/compiler/rustc_parse/src/parser/stmt.rs
@@ -408,10 +408,14 @@ impl<'a> Parser<'a> {
     fn parse_initializer(&mut self, eq_optional: bool) -> PResult<'a, Option<P<Expr>>> {
         let eq_consumed = match self.token.kind {
             token::BinOpEq(..) => {
-                // Recover `let x <op>= 1` as `let x = 1`
+                // Recover `let x <op>= 1` as `let x = 1` We must not use `+ BytePos(1)` here
+                // because `<op>` can be a multi-byte lookalike that was recovered, e.g. `➖=` (the
+                // `➖` is a U+2796 Heavy Minus Sign Unicode Character) that was recovered as a
+                // `-=`.
+                let extra_op_span = self.psess.source_map().start_point(self.token.span);
                 self.dcx().emit_err(errors::CompoundAssignmentExpressionInLet {
                     span: self.token.span,
-                    suggestion: self.token.span.with_hi(self.token.span.lo() + BytePos(1)),
+                    suggestion: extra_op_span,
                 });
                 self.bump();
                 true