about summary refs log tree commit diff
path: root/compiler/rustc_errors
diff options
context:
space:
mode:
authorDianQK <dianqk@dianqk.net>2024-12-20 21:46:59 +0800
committerGitHub <noreply@github.com>2024-12-20 21:46:59 +0800
commit1652e3a56073b81f4c990f9c560dbbbab288d4ef (patch)
treeee016791ec121e207cb68739156a2e43f1c9ffa1 /compiler/rustc_errors
parent8a1f8039a7ded79d3d4fe97b110016d89f2b11e2 (diff)
parent1e33dd17115ca948c6e5ebf319695ed64490b2bf (diff)
downloadrust-1652e3a56073b81f4c990f9c560dbbbab288d4ef.tar.gz
rust-1652e3a56073b81f4c990f9c560dbbbab288d4ef.zip
Rollup merge of #134366 - harrisonkaiser:no-break-space, r=davidtwco
Fix logical error with what text is considered whitespace.

There appears to be a logical issue around what counts as leading white-space. There is code which does a subtraction assuming that no errors will be reported inside the leading whitespace. However we compute the length of that whitespace with std::char::is_whitespace and not rustc_lexer::is_whitespace. The former will include a no-break space while later will excluded it. We can only safely make the assumption that no errors will be reported  in whitespace if it is all "Rust Standard" whitespace. Indeed an error does occur in unicode whitespace if it contains a no-break space. In that case the subtraction will cause a ICE (for a compiler in debug mode) as described in https://github.com/rust-lang/rust/issues/132918.
Diffstat (limited to 'compiler/rustc_errors')
-rw-r--r--compiler/rustc_errors/Cargo.toml1
-rw-r--r--compiler/rustc_errors/src/emitter.rs10
2 files changed, 9 insertions, 2 deletions
diff --git a/compiler/rustc_errors/Cargo.toml b/compiler/rustc_errors/Cargo.toml
index 06bae57638f..66b9adbead0 100644
--- a/compiler/rustc_errors/Cargo.toml
+++ b/compiler/rustc_errors/Cargo.toml
@@ -16,6 +16,7 @@ rustc_error_messages = { path = "../rustc_error_messages" }
 rustc_fluent_macro = { path = "../rustc_fluent_macro" }
 rustc_hir = { path = "../rustc_hir" }
 rustc_index = { path = "../rustc_index" }
+rustc_lexer = { path = "../rustc_lexer" }
 rustc_lint_defs = { path = "../rustc_lint_defs" }
 rustc_macros = { path = "../rustc_macros" }
 rustc_serialize = { path = "../rustc_serialize" }
diff --git a/compiler/rustc_errors/src/emitter.rs b/compiler/rustc_errors/src/emitter.rs
index f63188402fe..977721a5b8a 100644
--- a/compiler/rustc_errors/src/emitter.rs
+++ b/compiler/rustc_errors/src/emitter.rs
@@ -19,6 +19,7 @@ use derive_setters::Setters;
 use rustc_data_structures::fx::{FxHashMap, FxIndexMap, FxIndexSet};
 use rustc_data_structures::sync::{DynSend, IntoDynSyncSend, Lrc};
 use rustc_error_messages::{FluentArgs, SpanLabel};
+use rustc_lexer;
 use rustc_lint_defs::pluralize;
 use rustc_span::hygiene::{ExpnKind, MacroKind};
 use rustc_span::source_map::SourceMap;
@@ -1698,9 +1699,14 @@ impl HumanEmitter {
                     if let Some(source_string) =
                         line.line_index.checked_sub(1).and_then(|l| file.get_line(l))
                     {
+                        // Whitespace can only be removed (aka considered leading)
+                        // if the lexer considers it whitespace.
+                        // non-rustc_lexer::is_whitespace() chars are reported as an
+                        // error (ex. no-break-spaces \u{a0}), and thus can't be considered
+                        // for removal during error reporting.
                         let leading_whitespace = source_string
                             .chars()
-                            .take_while(|c| c.is_whitespace())
+                            .take_while(|c| rustc_lexer::is_whitespace(*c))
                             .map(|c| {
                                 match c {
                                     // Tabs are displayed as 4 spaces
@@ -1709,7 +1715,7 @@ impl HumanEmitter {
                                 }
                             })
                             .sum();
-                        if source_string.chars().any(|c| !c.is_whitespace()) {
+                        if source_string.chars().any(|c| !rustc_lexer::is_whitespace(c)) {
                             whitespace_margin = min(whitespace_margin, leading_whitespace);
                         }
                     }