about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMatthias Krüger <matthias.krueger@famsik.de>2024-08-09 18:24:59 +0200
committerGitHub <noreply@github.com>2024-08-09 18:24:59 +0200
commit9eb77ac3e06d54aa94789982d8df499f649d6bc1 (patch)
treebc9ca014720e0a82dfa739b76cd751b2ab3b011b
parent2e0c5adf04fd2d0a093d05c961c9bf7c99a0aecf (diff)
parent879bfd7ad0f5f79e7bc90320dfb80dfabe91ac2b (diff)
downloadrust-9eb77ac3e06d54aa94789982d8df499f649d6bc1.tar.gz
rust-9eb77ac3e06d54aa94789982d8df499f649d6bc1.zip
Rollup merge of #128864 - jieyouxu:funnicode, r=Urgau
Use `SourceMap::end_point` instead of `- BytePos(1)` in arg removal suggestion

Previously, we tried to remove extra arg commas when providing extra arg removal suggestions. One of
the edge cases is having to account for an arg that has a closing delimiter `)` following it.
However, the previous suggestion code assumed that the delimiter is in fact exactly the 1-byte `)`
character. This assumption was proven incorrect, because we recover from Unicode-confusable
delimiters in the parser, which means that the ending delimiter could be a multi-byte codepoint
that looks *like* a `)`. Subtracing 1 byte could land us in the middle of a codepoint, triggering a
codepoint boundary assertion.

This is fixed by using `SourceMap::end_point` which properly accounts for codepoint boundaries.

Fixes #128717.

cc ````@fmease```` and #128790
-rw-r--r--compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs9
-rw-r--r--tests/ui/typeck/suggest-arg-comma-delete-ice.rs19
-rw-r--r--tests/ui/typeck/suggest-arg-comma-delete-ice.stderr38
3 files changed, 63 insertions, 3 deletions
diff --git a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs
index cef003e0a43..89e7227eda2 100644
--- a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs
+++ b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs
@@ -22,7 +22,7 @@ use rustc_middle::ty::{self, IsSuggestable, Ty, TyCtxt};
 use rustc_middle::{bug, span_bug};
 use rustc_session::Session;
 use rustc_span::symbol::{kw, Ident};
-use rustc_span::{sym, BytePos, Span, DUMMY_SP};
+use rustc_span::{sym, Span, DUMMY_SP};
 use rustc_trait_selection::error_reporting::infer::{FailureCode, ObligationCauseExt};
 use rustc_trait_selection::infer::InferCtxtExt;
 use rustc_trait_selection::traits::{self, ObligationCauseCode, SelectionContext};
@@ -1140,8 +1140,11 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
                                 .get(arg_idx + 1)
                                 .map(|&(_, sp)| sp)
                                 .unwrap_or_else(|| {
-                                    // Subtract one to move before `)`
-                                    call_expr.span.with_lo(call_expr.span.hi() - BytePos(1))
+                                    // Try to move before `)`. Note that `)` here is not necessarily
+                                    // the latin right paren, it could be a Unicode-confusable that
+                                    // looks like a `)`, so we must not use `- BytePos(1)`
+                                    // manipulations here.
+                                    self.tcx().sess.source_map().end_point(call_expr.span)
                                 });
 
                             // Include next comma
diff --git a/tests/ui/typeck/suggest-arg-comma-delete-ice.rs b/tests/ui/typeck/suggest-arg-comma-delete-ice.rs
new file mode 100644
index 00000000000..48d02e13eca
--- /dev/null
+++ b/tests/ui/typeck/suggest-arg-comma-delete-ice.rs
@@ -0,0 +1,19 @@
+//! Previously, we tried to remove extra arg commas when providing extra arg removal suggestions.
+//! One of the edge cases is having to account for an arg that has a closing delimiter `)`
+//! following it. However, the previous suggestion code assumed that the delimiter is in fact
+//! exactly the 1-byte `)` character. This assumption was proven incorrect, because we recover
+//! from Unicode-confusable delimiters in the parser, which means that the ending delimiter could be
+//! a multi-byte codepoint that looks *like* a `)`. Subtracing 1 byte could land us in the middle of
+//! a codepoint, triggering a codepoint boundary assertion.
+//!
+//! issue: rust-lang/rust#128717
+
+fn main() {
+    // The following example has been modified from #128717 to remove irrelevant Unicode as they do
+    // not otherwise partake in the right delimiter calculation causing the codepoint boundary
+    // assertion.
+    main(rahh);
+    //~^ ERROR unknown start of token
+    //~| ERROR this function takes 0 arguments but 1 argument was supplied
+    //~| ERROR cannot find value `rahh` in this scope
+}
diff --git a/tests/ui/typeck/suggest-arg-comma-delete-ice.stderr b/tests/ui/typeck/suggest-arg-comma-delete-ice.stderr
new file mode 100644
index 00000000000..53608391f3c
--- /dev/null
+++ b/tests/ui/typeck/suggest-arg-comma-delete-ice.stderr
@@ -0,0 +1,38 @@
+error: unknown start of token: \u{ff09}
+  --> $DIR/suggest-arg-comma-delete-ice.rs:15:14
+   |
+LL |     main(rahh);
+   |              ^^
+   |
+help: Unicode character ')' (Fullwidth Right Parenthesis) looks like ')' (Right Parenthesis), but it is not
+   |
+LL |     main(rahh);
+   |              ~
+
+error[E0425]: cannot find value `rahh` in this scope
+  --> $DIR/suggest-arg-comma-delete-ice.rs:15:10
+   |
+LL |     main(rahh);
+   |          ^^^^ not found in this scope
+
+error[E0061]: this function takes 0 arguments but 1 argument was supplied
+  --> $DIR/suggest-arg-comma-delete-ice.rs:15:5
+   |
+LL |     main(rahh);
+   |     ^^^^ ---- unexpected argument
+   |
+note: function defined here
+  --> $DIR/suggest-arg-comma-delete-ice.rs:11:4
+   |
+LL | fn main() {
+   |    ^^^^
+help: remove the extra argument
+   |
+LL -     main(rahh);
+LL +     main();
+   |
+
+error: aborting due to 3 previous errors
+
+Some errors have detailed explanations: E0061, E0425.
+For more information about an error, try `rustc --explain E0061`.