about summary refs log tree commit diff
diff options
context:
space:
mode:
authorDianQK <dianqk@dianqk.net>2024-12-20 21:46:59 +0800
committerGitHub <noreply@github.com>2024-12-20 21:46:59 +0800
commit1652e3a56073b81f4c990f9c560dbbbab288d4ef (patch)
treeee016791ec121e207cb68739156a2e43f1c9ffa1
parent8a1f8039a7ded79d3d4fe97b110016d89f2b11e2 (diff)
parent1e33dd17115ca948c6e5ebf319695ed64490b2bf (diff)
downloadrust-1652e3a56073b81f4c990f9c560dbbbab288d4ef.tar.gz
rust-1652e3a56073b81f4c990f9c560dbbbab288d4ef.zip
Rollup merge of #134366 - harrisonkaiser:no-break-space, r=davidtwco
Fix logical error with what text is considered whitespace.

There appears to be a logical issue around what counts as leading white-space. There is code which does a subtraction assuming that no errors will be reported inside the leading whitespace. However we compute the length of that whitespace with std::char::is_whitespace and not rustc_lexer::is_whitespace. The former will include a no-break space while later will excluded it. We can only safely make the assumption that no errors will be reported  in whitespace if it is all "Rust Standard" whitespace. Indeed an error does occur in unicode whitespace if it contains a no-break space. In that case the subtraction will cause a ICE (for a compiler in debug mode) as described in https://github.com/rust-lang/rust/issues/132918.
-rw-r--r--Cargo.lock1
-rw-r--r--compiler/rustc_errors/Cargo.toml1
-rw-r--r--compiler/rustc_errors/src/emitter.rs10
-rw-r--r--tests/ui/errors/emitter-overflow-bad-whitespace.rs11
-rw-r--r--tests/ui/errors/emitter-overflow-bad-whitespace.stderr13
5 files changed, 34 insertions, 2 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 3f05b44ccfa..1fd288af3a1 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3722,6 +3722,7 @@ dependencies = [
  "rustc_fluent_macro",
  "rustc_hir",
  "rustc_index",
+ "rustc_lexer",
  "rustc_lint_defs",
  "rustc_macros",
  "rustc_serialize",
diff --git a/compiler/rustc_errors/Cargo.toml b/compiler/rustc_errors/Cargo.toml
index 06bae57638f..66b9adbead0 100644
--- a/compiler/rustc_errors/Cargo.toml
+++ b/compiler/rustc_errors/Cargo.toml
@@ -16,6 +16,7 @@ rustc_error_messages = { path = "../rustc_error_messages" }
 rustc_fluent_macro = { path = "../rustc_fluent_macro" }
 rustc_hir = { path = "../rustc_hir" }
 rustc_index = { path = "../rustc_index" }
+rustc_lexer = { path = "../rustc_lexer" }
 rustc_lint_defs = { path = "../rustc_lint_defs" }
 rustc_macros = { path = "../rustc_macros" }
 rustc_serialize = { path = "../rustc_serialize" }
diff --git a/compiler/rustc_errors/src/emitter.rs b/compiler/rustc_errors/src/emitter.rs
index f63188402fe..977721a5b8a 100644
--- a/compiler/rustc_errors/src/emitter.rs
+++ b/compiler/rustc_errors/src/emitter.rs
@@ -19,6 +19,7 @@ use derive_setters::Setters;
 use rustc_data_structures::fx::{FxHashMap, FxIndexMap, FxIndexSet};
 use rustc_data_structures::sync::{DynSend, IntoDynSyncSend, Lrc};
 use rustc_error_messages::{FluentArgs, SpanLabel};
+use rustc_lexer;
 use rustc_lint_defs::pluralize;
 use rustc_span::hygiene::{ExpnKind, MacroKind};
 use rustc_span::source_map::SourceMap;
@@ -1698,9 +1699,14 @@ impl HumanEmitter {
                     if let Some(source_string) =
                         line.line_index.checked_sub(1).and_then(|l| file.get_line(l))
                     {
+                        // Whitespace can only be removed (aka considered leading)
+                        // if the lexer considers it whitespace.
+                        // non-rustc_lexer::is_whitespace() chars are reported as an
+                        // error (ex. no-break-spaces \u{a0}), and thus can't be considered
+                        // for removal during error reporting.
                         let leading_whitespace = source_string
                             .chars()
-                            .take_while(|c| c.is_whitespace())
+                            .take_while(|c| rustc_lexer::is_whitespace(*c))
                             .map(|c| {
                                 match c {
                                     // Tabs are displayed as 4 spaces
@@ -1709,7 +1715,7 @@ impl HumanEmitter {
                                 }
                             })
                             .sum();
-                        if source_string.chars().any(|c| !c.is_whitespace()) {
+                        if source_string.chars().any(|c| !rustc_lexer::is_whitespace(c)) {
                             whitespace_margin = min(whitespace_margin, leading_whitespace);
                         }
                     }
diff --git a/tests/ui/errors/emitter-overflow-bad-whitespace.rs b/tests/ui/errors/emitter-overflow-bad-whitespace.rs
new file mode 100644
index 00000000000..d024d1f3921
--- /dev/null
+++ b/tests/ui/errors/emitter-overflow-bad-whitespace.rs
@@ -0,0 +1,11 @@
+// Invalid whitespace (not listed here: https://doc.rust-lang.org/reference/whitespace.html
+// e.g. \u{a0}) before any other syntax on the line should not cause any integer overflow
+// in the emitter, even when the terminal width causes the line to be truncated.
+//
+// issue #132918
+
+//@ check-fail
+//@ needs-rustc-debug-assertions
+//@ compile-flags: --diagnostic-width=1
+                                         fn main() {              return;              }
+//~^ ERROR unknown start of token: \u{a0}
diff --git a/tests/ui/errors/emitter-overflow-bad-whitespace.stderr b/tests/ui/errors/emitter-overflow-bad-whitespace.stderr
new file mode 100644
index 00000000000..8c0c097bcb9
--- /dev/null
+++ b/tests/ui/errors/emitter-overflow-bad-whitespace.stderr
@@ -0,0 +1,13 @@
+error: unknown start of token: \u{a0}
+  --> $DIR/emitter-overflow-bad-whitespace.rs:10:1
+   |
+LL |     ...
+   | ^
+   |
+help: Unicode character ' ' (No-Break Space) looks like ' ' (Space), but it is not
+   |
+LL |                                          fn main() {              return;              }
+   | +
+
+error: aborting due to 1 previous error
+