diff options
| author | Matthias Krüger <matthias.krueger@famsik.de> | 2024-08-09 18:24:59 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-08-09 18:24:59 +0200 |
| commit | 9eb77ac3e06d54aa94789982d8df499f649d6bc1 (patch) | |
| tree | bc9ca014720e0a82dfa739b76cd751b2ab3b011b | |
| parent | 2e0c5adf04fd2d0a093d05c961c9bf7c99a0aecf (diff) | |
| parent | 879bfd7ad0f5f79e7bc90320dfb80dfabe91ac2b (diff) | |
| download | rust-9eb77ac3e06d54aa94789982d8df499f649d6bc1.tar.gz rust-9eb77ac3e06d54aa94789982d8df499f649d6bc1.zip | |
Rollup merge of #128864 - jieyouxu:funnicode, r=Urgau
Use `SourceMap::end_point` instead of `- BytePos(1)` in arg removal suggestion Previously, we tried to remove extra arg commas when providing extra arg removal suggestions. One of the edge cases is having to account for an arg that has a closing delimiter `)` following it. However, the previous suggestion code assumed that the delimiter is in fact exactly the 1-byte `)` character. This assumption was proven incorrect, because we recover from Unicode-confusable delimiters in the parser, which means that the ending delimiter could be a multi-byte codepoint that looks *like* a `)`. Subtracing 1 byte could land us in the middle of a codepoint, triggering a codepoint boundary assertion. This is fixed by using `SourceMap::end_point` which properly accounts for codepoint boundaries. Fixes #128717. cc ````@fmease```` and #128790
| -rw-r--r-- | compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs | 9 | ||||
| -rw-r--r-- | tests/ui/typeck/suggest-arg-comma-delete-ice.rs | 19 | ||||
| -rw-r--r-- | tests/ui/typeck/suggest-arg-comma-delete-ice.stderr | 38 |
3 files changed, 63 insertions, 3 deletions
diff --git a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs index cef003e0a43..89e7227eda2 100644 --- a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs +++ b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs @@ -22,7 +22,7 @@ use rustc_middle::ty::{self, IsSuggestable, Ty, TyCtxt}; use rustc_middle::{bug, span_bug}; use rustc_session::Session; use rustc_span::symbol::{kw, Ident}; -use rustc_span::{sym, BytePos, Span, DUMMY_SP}; +use rustc_span::{sym, Span, DUMMY_SP}; use rustc_trait_selection::error_reporting::infer::{FailureCode, ObligationCauseExt}; use rustc_trait_selection::infer::InferCtxtExt; use rustc_trait_selection::traits::{self, ObligationCauseCode, SelectionContext}; @@ -1140,8 +1140,11 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { .get(arg_idx + 1) .map(|&(_, sp)| sp) .unwrap_or_else(|| { - // Subtract one to move before `)` - call_expr.span.with_lo(call_expr.span.hi() - BytePos(1)) + // Try to move before `)`. Note that `)` here is not necessarily + // the latin right paren, it could be a Unicode-confusable that + // looks like a `)`, so we must not use `- BytePos(1)` + // manipulations here. + self.tcx().sess.source_map().end_point(call_expr.span) }); // Include next comma diff --git a/tests/ui/typeck/suggest-arg-comma-delete-ice.rs b/tests/ui/typeck/suggest-arg-comma-delete-ice.rs new file mode 100644 index 00000000000..48d02e13eca --- /dev/null +++ b/tests/ui/typeck/suggest-arg-comma-delete-ice.rs @@ -0,0 +1,19 @@ +//! Previously, we tried to remove extra arg commas when providing extra arg removal suggestions. +//! One of the edge cases is having to account for an arg that has a closing delimiter `)` +//! following it. However, the previous suggestion code assumed that the delimiter is in fact +//! exactly the 1-byte `)` character. This assumption was proven incorrect, because we recover +//! from Unicode-confusable delimiters in the parser, which means that the ending delimiter could be +//! a multi-byte codepoint that looks *like* a `)`. Subtracing 1 byte could land us in the middle of +//! a codepoint, triggering a codepoint boundary assertion. +//! +//! issue: rust-lang/rust#128717 + +fn main() { + // The following example has been modified from #128717 to remove irrelevant Unicode as they do + // not otherwise partake in the right delimiter calculation causing the codepoint boundary + // assertion. + main(rahh); + //~^ ERROR unknown start of token + //~| ERROR this function takes 0 arguments but 1 argument was supplied + //~| ERROR cannot find value `rahh` in this scope +} diff --git a/tests/ui/typeck/suggest-arg-comma-delete-ice.stderr b/tests/ui/typeck/suggest-arg-comma-delete-ice.stderr new file mode 100644 index 00000000000..53608391f3c --- /dev/null +++ b/tests/ui/typeck/suggest-arg-comma-delete-ice.stderr @@ -0,0 +1,38 @@ +error: unknown start of token: \u{ff09} + --> $DIR/suggest-arg-comma-delete-ice.rs:15:14 + | +LL | main(rahh); + | ^^ + | +help: Unicode character ')' (Fullwidth Right Parenthesis) looks like ')' (Right Parenthesis), but it is not + | +LL | main(rahh); + | ~ + +error[E0425]: cannot find value `rahh` in this scope + --> $DIR/suggest-arg-comma-delete-ice.rs:15:10 + | +LL | main(rahh); + | ^^^^ not found in this scope + +error[E0061]: this function takes 0 arguments but 1 argument was supplied + --> $DIR/suggest-arg-comma-delete-ice.rs:15:5 + | +LL | main(rahh); + | ^^^^ ---- unexpected argument + | +note: function defined here + --> $DIR/suggest-arg-comma-delete-ice.rs:11:4 + | +LL | fn main() { + | ^^^^ +help: remove the extra argument + | +LL - main(rahh); +LL + main(); + | + +error: aborting due to 3 previous errors + +Some errors have detailed explanations: E0061, E0425. +For more information about an error, try `rustc --explain E0061`. |
