diff options
| author | Esteban Küber <esteban@kuber.com.ar> | 2024-07-09 16:46:09 +0000 | 
|---|---|---|
| committer | Esteban Küber <esteban@kuber.com.ar> | 2024-07-18 19:23:42 +0000 | 
| commit | 89f273f40dafb693139496ed6f914872b6533fa6 (patch) | |
| tree | 27db2fbc8d56fc1cc5bf9c1926d2d78df7134c93 | |
| parent | 5753b3067662e17a69b54b9418dbc37b73769a84 (diff) | |
| download | rust-89f273f40dafb693139496ed6f914872b6533fa6.tar.gz rust-89f273f40dafb693139496ed6f914872b6533fa6.zip | |
Replace ASCII control chars with Unicode Control Pictures
``` error: bare CR not allowed in doc-comment --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:3:32 | LL | /// doc comment with bare CR: '␍' | ^ ```
| -rw-r--r-- | compiler/rustc_errors/src/emitter.rs | 69 | ||||
| -rw-r--r-- | tests/ui/lexer/lex-bare-cr-string-literal-doc-comment.stderr | 14 | ||||
| -rw-r--r-- | tests/ui/parser/bad-char-literals.rs | bin | 496 -> 608 bytes | |||
| -rw-r--r-- | tests/ui/parser/bad-char-literals.stderr | 17 | ||||
| -rw-r--r-- | tests/ui/parser/issues/issue-66473.stderr | bin | 1061 -> 1209 bytes | |||
| -rw-r--r-- | tests/ui/parser/issues/issue-68629.stderr | bin | 944 -> 976 bytes | |||
| -rw-r--r-- | tests/ui/parser/issues/issue-68730.stderr | bin | 1266 -> 1294 bytes | |||
| -rw-r--r-- | tests/ui/parser/raw/raw-byte-string-literals.stderr | 2 | ||||
| -rw-r--r-- | tests/ui/parser/several-carriage-returns-in-doc-comment.stderr | 6 | ||||
| -rw-r--r-- | tests/ui/parser/trailing-carriage-return-in-string.stderr | 2 | ||||
| -rw-r--r-- | tests/ui/parser/utf16-be-without-bom.stderr | bin | 3641 -> 4029 bytes | |||
| -rw-r--r-- | tests/ui/parser/utf16-le-without-bom.stderr | bin | 3603 -> 3939 bytes | |||
| -rw-r--r-- | tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr | bin | 2028 -> 2040 bytes | |||
| -rw-r--r-- | tests/ui/str/str-escape.stderr | 2 | 
14 files changed, 81 insertions, 31 deletions
| diff --git a/compiler/rustc_errors/src/emitter.rs b/compiler/rustc_errors/src/emitter.rs index aa47ca16676..95e1b5348b7 100644 --- a/compiler/rustc_errors/src/emitter.rs +++ b/compiler/rustc_errors/src/emitter.rs @@ -677,10 +677,7 @@ impl HumanEmitter { .skip(left) .take_while(|ch| { // Make sure that the trimming on the right will fall within the terminal width. - // FIXME: `unicode_width` sometimes disagrees with terminals on how wide a `char` - // is. For now, just accept that sometimes the code line will be longer than - // desired. - let next = unicode_width::UnicodeWidthChar::width(*ch).unwrap_or(1); + let next = char_width(*ch); if taken + next > right - left { return false; } @@ -742,11 +739,7 @@ impl HumanEmitter { let left = margin.left(source_string.len()); // Account for unicode characters of width !=0 that were removed. - let left = source_string - .chars() - .take(left) - .map(|ch| unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1)) - .sum(); + let left = source_string.chars().take(left).map(|ch| char_width(ch)).sum(); self.draw_line( buffer, @@ -2039,7 +2032,7 @@ impl HumanEmitter { let sub_len: usize = if is_whitespace_addition { &part.snippet } else { part.snippet.trim() } .chars() - .map(|ch| unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1)) + .map(|ch| char_width(ch)) .sum(); let offset: isize = offsets @@ -2076,11 +2069,8 @@ impl HumanEmitter { } // length of the code after substitution - let full_sub_len = part - .snippet - .chars() - .map(|ch| unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1)) - .sum::<usize>() as isize; + let full_sub_len = + part.snippet.chars().map(|ch| char_width(ch)).sum::<usize>() as isize; // length of the code to be substituted let snippet_len = span_end_pos as isize - span_start_pos as isize; @@ -2580,6 +2570,40 @@ const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[ ('\u{2068}', ""), ('\u{202C}', ""), ('\u{2069}', ""), + // In terminals without Unicode support the following will be garbled, but in *all* terminals + // the underlying codepoint will be as well. We could gate this replacement behind a "unicode + // support" gate. + ('\u{0000}', "␀"), + ('\u{0001}', "␁"), + ('\u{0002}', "␂"), + ('\u{0003}', "␃"), + ('\u{0004}', "␄"), + ('\u{0005}', "␅"), + ('\u{0006}', "␆"), + ('\u{0007}', "␇"), + ('\u{0008}', "␈"), + ('\u{000B}', "␋"), + ('\u{000C}', "␌"), + ('\u{000D}', "␍"), + ('\u{000E}', "␎"), + ('\u{000F}', "␏"), + ('\u{0010}', "␐"), + ('\u{0011}', "␑"), + ('\u{0012}', "␒"), + ('\u{0013}', "␓"), + ('\u{0014}', "␔"), + ('\u{0015}', "␕"), + ('\u{0016}', "␖"), + ('\u{0017}', "␗"), + ('\u{0018}', "␘"), + ('\u{0019}', "␙"), + ('\u{001A}', "␚"), + ('\u{001B}', "␛"), + ('\u{001C}', "␜"), + ('\u{001D}', "␝"), + ('\u{001E}', "␞"), + ('\u{001F}', "␟"), + ('\u{007F}', "␡"), ]; fn normalize_whitespace(str: &str) -> String { @@ -2590,6 +2614,21 @@ fn normalize_whitespace(str: &str) -> String { s } +fn char_width(ch: char) -> usize { + // FIXME: `unicode_width` sometimes disagrees with terminals on how wide a `char` is. For now, + // just accept that sometimes the code line will be longer than desired. + match ch { + '\t' => 4, + '\u{0000}' | '\u{0001}' | '\u{0002}' | '\u{0003}' | '\u{0004}' | '\u{0005}' + | '\u{0006}' | '\u{0007}' | '\u{0008}' | '\u{000B}' | '\u{000C}' | '\u{000D}' + | '\u{000E}' | '\u{000F}' | '\u{0010}' | '\u{0011}' | '\u{0012}' | '\u{0013}' + | '\u{0014}' | '\u{0015}' | '\u{0016}' | '\u{0017}' | '\u{0018}' | '\u{0019}' + | '\u{001A}' | '\u{001B}' | '\u{001C}' | '\u{001D}' | '\u{001E}' | '\u{001F}' + | '\u{007F}' => 1, + _ => unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1), + } +} + fn draw_col_separator(buffer: &mut StyledBuffer, line: usize, col: usize) { buffer.puts(line, col, "| ", Style::LineNumber); } diff --git a/tests/ui/lexer/lex-bare-cr-string-literal-doc-comment.stderr b/tests/ui/lexer/lex-bare-cr-string-literal-doc-comment.stderr index da80991c727..841d5236ede 100644 --- a/tests/ui/lexer/lex-bare-cr-string-literal-doc-comment.stderr +++ b/tests/ui/lexer/lex-bare-cr-string-literal-doc-comment.stderr @@ -1,31 +1,31 @@ error: bare CR not allowed in doc-comment --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:3:32 | -LL | /// doc comment with bare CR: ' ' +LL | /// doc comment with bare CR: '␍' | ^ error: bare CR not allowed in block doc-comment --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:7:38 | -LL | /** block doc comment with bare CR: ' ' */ +LL | /** block doc comment with bare CR: '␍' */ | ^ error: bare CR not allowed in doc-comment --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:12:36 | -LL | //! doc comment with bare CR: ' ' +LL | //! doc comment with bare CR: '␍' | ^ error: bare CR not allowed in block doc-comment --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:15:42 | -LL | /*! block doc comment with bare CR: ' ' */ +LL | /*! block doc comment with bare CR: '␍' */ | ^ error: bare CR not allowed in string, use `\r` instead --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:19:18 | -LL | let _s = "foo bar"; +LL | let _s = "foo␍bar"; | ^ | help: escape the character @@ -36,13 +36,13 @@ LL | let _s = "foo\rbar"; error: bare CR not allowed in raw string --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:22:19 | -LL | let _s = r"bar foo"; +LL | let _s = r"bar␍foo"; | ^ error: unknown character escape: `\r` --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:25:19 | -LL | let _s = "foo\ bar"; +LL | let _s = "foo\␍bar"; | ^ unknown character escape | = help: this is an isolated carriage return; consider checking your editor and version control settings diff --git a/tests/ui/parser/bad-char-literals.rs b/tests/ui/parser/bad-char-literals.rs index 748b4a22253..c3d55d3f7e3 100644 --- a/tests/ui/parser/bad-char-literals.rs +++ b/tests/ui/parser/bad-char-literals.rs Binary files differdiff --git a/tests/ui/parser/bad-char-literals.stderr b/tests/ui/parser/bad-char-literals.stderr index 89253d7d4aa..38889da5da1 100644 --- a/tests/ui/parser/bad-char-literals.stderr +++ b/tests/ui/parser/bad-char-literals.stderr @@ -25,7 +25,7 @@ LL | '\n'; error: character constant must be escaped: `\r` --> $DIR/bad-char-literals.rs:15:6 | -LL | ' '; +LL | '␍'; | ^ | help: escape the character @@ -33,8 +33,19 @@ help: escape the character LL | '\r'; | ++ +error: character literal may only contain one codepoint + --> $DIR/bad-char-literals.rs:18:5 + | +LL | '-␀-'; + | ^^^^ + | +help: if you meant to write a string literal, use double quotes + | +LL | "-␀-"; + | ~ ~ + error: character constant must be escaped: `\t` - --> $DIR/bad-char-literals.rs:18:6 + --> $DIR/bad-char-literals.rs:21:6 | LL | ' '; | ^^^^ @@ -44,5 +55,5 @@ help: escape the character LL | '\t'; | ++ -error: aborting due to 4 previous errors +error: aborting due to 5 previous errors diff --git a/tests/ui/parser/issues/issue-66473.stderr b/tests/ui/parser/issues/issue-66473.stderr index 0e8b0a5da22..4be992d5846 100644 --- a/tests/ui/parser/issues/issue-66473.stderr +++ b/tests/ui/parser/issues/issue-66473.stderr Binary files differdiff --git a/tests/ui/parser/issues/issue-68629.stderr b/tests/ui/parser/issues/issue-68629.stderr index 2562baa1c49..ccb0624208b 100644 --- a/tests/ui/parser/issues/issue-68629.stderr +++ b/tests/ui/parser/issues/issue-68629.stderr Binary files differdiff --git a/tests/ui/parser/issues/issue-68730.stderr b/tests/ui/parser/issues/issue-68730.stderr index 5bca5bbebea..6025ea8c1ae 100644 --- a/tests/ui/parser/issues/issue-68730.stderr +++ b/tests/ui/parser/issues/issue-68730.stderr Binary files differdiff --git a/tests/ui/parser/raw/raw-byte-string-literals.stderr b/tests/ui/parser/raw/raw-byte-string-literals.stderr index a2f27d1ed70..a20ce845c32 100644 --- a/tests/ui/parser/raw/raw-byte-string-literals.stderr +++ b/tests/ui/parser/raw/raw-byte-string-literals.stderr @@ -1,7 +1,7 @@ error: bare CR not allowed in raw string --> $DIR/raw-byte-string-literals.rs:4:9 | -LL | br"a "; +LL | br"a␍"; | ^ error: non-ASCII character in raw byte string literal diff --git a/tests/ui/parser/several-carriage-returns-in-doc-comment.stderr b/tests/ui/parser/several-carriage-returns-in-doc-comment.stderr index 07066fc22e6..3150570e1c9 100644 --- a/tests/ui/parser/several-carriage-returns-in-doc-comment.stderr +++ b/tests/ui/parser/several-carriage-returns-in-doc-comment.stderr @@ -1,19 +1,19 @@ error: bare CR not allowed in doc-comment --> $DIR/several-carriage-returns-in-doc-comment.rs:6:12 | -LL | /// This do c comment contains three isolated `\r` symbols +LL | /// This do␍c comment contains ␍three isolated `\r`␍ symbols | ^ error: bare CR not allowed in doc-comment --> $DIR/several-carriage-returns-in-doc-comment.rs:6:32 | -LL | /// This do c comment contains three isolated `\r` symbols +LL | /// This do␍c comment contains ␍three isolated `\r`␍ symbols | ^ error: bare CR not allowed in doc-comment --> $DIR/several-carriage-returns-in-doc-comment.rs:6:52 | -LL | /// This do c comment contains three isolated `\r` symbols +LL | /// This do␍c comment contains ␍three isolated `\r`␍ symbols | ^ error: aborting due to 3 previous errors diff --git a/tests/ui/parser/trailing-carriage-return-in-string.stderr b/tests/ui/parser/trailing-carriage-return-in-string.stderr index fa2677921b3..c5949432af8 100644 --- a/tests/ui/parser/trailing-carriage-return-in-string.stderr +++ b/tests/ui/parser/trailing-carriage-return-in-string.stderr @@ -1,7 +1,7 @@ error: unknown character escape: `\r` --> $DIR/trailing-carriage-return-in-string.rs:10:25 | -LL | let bad = "This is \ a test"; +LL | let bad = "This is \␍ a test"; | ^ unknown character escape | = help: this is an isolated carriage return; consider checking your editor and version control settings diff --git a/tests/ui/parser/utf16-be-without-bom.stderr b/tests/ui/parser/utf16-be-without-bom.stderr index c041f3ecf53..28cf6d97e96 100644 --- a/tests/ui/parser/utf16-be-without-bom.stderr +++ b/tests/ui/parser/utf16-be-without-bom.stderr Binary files differdiff --git a/tests/ui/parser/utf16-le-without-bom.stderr b/tests/ui/parser/utf16-le-without-bom.stderr index cc2220441ac..53004ac942d 100644 --- a/tests/ui/parser/utf16-le-without-bom.stderr +++ b/tests/ui/parser/utf16-le-without-bom.stderr Binary files differdiff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr index a05dea3ff07..13cbd8370db 100644 --- a/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr +++ b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr Binary files differdiff --git a/tests/ui/str/str-escape.stderr b/tests/ui/str/str-escape.stderr index c4aee2a110a..599c8de9757 100644 --- a/tests/ui/str/str-escape.stderr +++ b/tests/ui/str/str-escape.stderr @@ -22,7 +22,7 @@ warning: whitespace symbol '\u{c}' is not skipped | LL | let s = b"a\ | ________________^ -LL | | b"; +LL | | ␌b"; | | ^- whitespace symbol '\u{c}' is not skipped | |____| | | 
