diff options
| author | Esteban Küber <esteban@kuber.com.ar> | 2024-07-25 18:38:28 +0000 |
|---|---|---|
| committer | Esteban Küber <esteban@kuber.com.ar> | 2024-07-29 18:40:58 +0000 |
| commit | 6d9ee6ee26c296d9361d5f0fb479b2462687fe4e (patch) | |
| tree | ee792787028161e6d1554df8dc7ed38e951081a6 /compiler/rustc_errors/src | |
| parent | 4db3d12e6f395babed53dee1d209a5c8699a5ae6 (diff) | |
| download | rust-6d9ee6ee26c296d9361d5f0fb479b2462687fe4e.tar.gz rust-6d9ee6ee26c296d9361d5f0fb479b2462687fe4e.zip | |
Change output normalization logic to be linear against size of output
Scan strings to be normalized for printing in a linear scan and collect the resulting `String` only once. Use a binary search when looking for chars to be replaced, instead of a `HashMap::get`.
Diffstat (limited to 'compiler/rustc_errors/src')
| -rw-r--r-- | compiler/rustc_errors/src/emitter.rs | 98 |
1 files changed, 52 insertions, 46 deletions
diff --git a/compiler/rustc_errors/src/emitter.rs b/compiler/rustc_errors/src/emitter.rs index 73908e58085..d186996040b 100644 --- a/compiler/rustc_errors/src/emitter.rs +++ b/compiler/rustc_errors/src/emitter.rs @@ -16,6 +16,7 @@ use std::iter; use std::path::Path; use derive_setters::Setters; +use either::Either; use rustc_data_structures::fx::{FxHashMap, FxIndexMap, FxIndexSet}; use rustc_data_structures::sync::{DynSend, IntoDynSyncSend, Lrc}; use rustc_error_messages::{FluentArgs, SpanLabel}; @@ -2559,60 +2560,65 @@ fn num_decimal_digits(num: usize) -> usize { // We replace some characters so the CLI output is always consistent and underlines aligned. // Keep the following list in sync with `rustc_span::char_width`. +// ATTENTION: keep lexicografically sorted so that the binary search will work const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[ - ('\t', " "), // We do our own tab replacement - ('\u{200D}', ""), // Replace ZWJ with nothing for consistent terminal output of grapheme clusters. - ('\u{202A}', "�"), // The following unicode text flow control characters are inconsistently - ('\u{202B}', "�"), // supported across CLIs and can cause confusion due to the bytes on disk - ('\u{202D}', "�"), // not corresponding to the visible source code, so we replace them always. - ('\u{202E}', "�"), + // In terminals without Unicode support the following will be garbled, but in *all* terminals + // the underlying codepoint will be as well. We could gate this replacement behind a "unicode + // support" gate. + ('\0', "␀"), + ('\u{1}', "␁"), + ('\u{2}', "␂"), + ('\u{3}', "␃"), + ('\u{4}', "␄"), + ('\u{5}', "␅"), + ('\u{6}', "␆"), + ('\u{7}', "␇"), + ('\u{8}', "␈"), + ('\t', " "), // We do our own tab replacement + ('\u{b}', "␋"), + ('\u{c}', "␌"), + ('\r', "␍"), + ('\u{e}', "␎"), + ('\u{f}', "␏"), + ('\u{10}', "␐"), + ('\u{11}', "␑"), + ('\u{12}', "␒"), + ('\u{13}', "␓"), + ('\u{14}', "␔"), + ('\u{15}', "␕"), + ('\u{16}', "␖"), + ('\u{17}', "␗"), + ('\u{18}', "␘"), + ('\u{19}', "␙"), + ('\u{1a}', "␚"), + ('\u{1b}', "␛"), + ('\u{1c}', "␜"), + ('\u{1d}', "␝"), + ('\u{1e}', "␞"), + ('\u{1f}', "␟"), + ('\u{7f}', "␡"), + ('\u{200d}', ""), // Replace ZWJ for consistent terminal output of grapheme clusters. + ('\u{202a}', "�"), // The following unicode text flow control characters are inconsistently + ('\u{202b}', "�"), // supported across CLIs and can cause confusion due to the bytes on disk + ('\u{202c}', "�"), // not corresponding to the visible source code, so we replace them always. + ('\u{202d}', "�"), + ('\u{202e}', "�"), ('\u{2066}', "�"), ('\u{2067}', "�"), ('\u{2068}', "�"), - ('\u{202C}', "�"), ('\u{2069}', "�"), - // In terminals without Unicode support the following will be garbled, but in *all* terminals - // the underlying codepoint will be as well. We could gate this replacement behind a "unicode - // support" gate. - ('\u{0000}', "␀"), - ('\u{0001}', "␁"), - ('\u{0002}', "␂"), - ('\u{0003}', "␃"), - ('\u{0004}', "␄"), - ('\u{0005}', "␅"), - ('\u{0006}', "␆"), - ('\u{0007}', "␇"), - ('\u{0008}', "␈"), - ('\u{000B}', "␋"), - ('\u{000C}', "␌"), - ('\u{000D}', "␍"), - ('\u{000E}', "␎"), - ('\u{000F}', "␏"), - ('\u{0010}', "␐"), - ('\u{0011}', "␑"), - ('\u{0012}', "␒"), - ('\u{0013}', "␓"), - ('\u{0014}', "␔"), - ('\u{0015}', "␕"), - ('\u{0016}', "␖"), - ('\u{0017}', "␗"), - ('\u{0018}', "␘"), - ('\u{0019}', "␙"), - ('\u{001A}', "␚"), - ('\u{001B}', "␛"), - ('\u{001C}', "␜"), - ('\u{001D}', "␝"), - ('\u{001E}', "␞"), - ('\u{001F}', "␟"), - ('\u{007F}', "␡"), ]; fn normalize_whitespace(str: &str) -> String { - let mut s = str.to_string(); - for (c, replacement) in OUTPUT_REPLACEMENTS { - s = s.replace(*c, replacement); - } - s + // Scan the input string for a character in the ordered table above. If it's present, replace + // it with it's alternative string (it can be more than 1 char!). Otherwise, retain the input + // char. At the end, allocate all chars into a string in one operation. + str.chars() + .flat_map(|c| match OUTPUT_REPLACEMENTS.binary_search_by_key(&c, |(k, _)| *k) { + Ok(i) => Either::Left(OUTPUT_REPLACEMENTS[i].1.chars()), + _ => Either::Right([c].into_iter()), + }) + .collect() } fn draw_col_separator(buffer: &mut StyledBuffer, line: usize, col: usize) { |
