about summary refs log tree commit diff
path: root/compiler/rustc_span/src
diff options
context:
space:
mode:
authorEsteban Küber <esteban@kuber.com.ar>2024-07-18 20:02:08 +0000
committerEsteban Küber <esteban@kuber.com.ar>2024-07-18 20:08:43 +0000
commit9dffe9573be86ac7302fb2f27219ef8f9c78dea7 (patch)
treeaf2758a3014b0ca805c82298b4bdcc0698388cb6 /compiler/rustc_span/src
parent2d7795dfb942d49d447837ef43db89216de15696 (diff)
downloadrust-9dffe9573be86ac7302fb2f27219ef8f9c78dea7.tar.gz
rust-9dffe9573be86ac7302fb2f27219ef8f9c78dea7.zip
Make unicode text flow control chars visible as �
We already point these out quite aggressively, telling people not to use them, but would normally be rendered as nothing. Having them visible will make it easier for people to actually deal with them.

```
error: unicode codepoint changing visible direction of text present in literal
  --> $DIR/unicode-control-codepoints.rs:26:22
   |
LL |     println!("{:?}", '�');
   |                      ^-^
   |                      ||
   |                      |'\u{202e}'
   |                      this literal contains an invisible unicode text flow control codepoint
   |
   = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen
   = help: if their presence wasn't intentional, you can remove them
help: if you want to keep them but make them visible in your source code, you can escape them
   |
LL |     println!("{:?}", '\u{202e}');
   |                       ~~~~~~~~
```

vs the previous

```
error: unicode codepoint changing visible direction of text present in literal
  --> $DIR/unicode-control-codepoints.rs:26:22
   |
LL |     println!("{:?}", '');
   |                      ^-
   |                      ||
   |                      |'\u{202e}'
   |                      this literal contains an invisible unicode text flow control codepoint
   |
   = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen
   = help: if their presence wasn't intentional, you can remove them
help: if you want to keep them but make them visible in your source code, you can escape them
   |
LL |     println!("{:?}", '\u{202e}');
   |                       ~~~~~~~~
```
Diffstat (limited to 'compiler/rustc_span/src')
-rw-r--r--compiler/rustc_span/src/lib.rs3
1 files changed, 2 insertions, 1 deletions
diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs
index ea57c1ce818..7c8ac3be4be 100644
--- a/compiler/rustc_span/src/lib.rs
+++ b/compiler/rustc_span/src/lib.rs
@@ -2093,7 +2093,8 @@ pub fn char_width(ch: char) -> usize {
         | '\u{000E}' | '\u{000F}' | '\u{0010}' | '\u{0011}' | '\u{0012}' | '\u{0013}'
         | '\u{0014}' | '\u{0015}' | '\u{0016}' | '\u{0017}' | '\u{0018}' | '\u{0019}'
         | '\u{001A}' | '\u{001B}' | '\u{001C}' | '\u{001D}' | '\u{001E}' | '\u{001F}'
-        | '\u{007F}' => 1,
+        | '\u{007F}' | '\u{202A}' | '\u{202B}' | '\u{202D}' | '\u{202E}' | '\u{2066}'
+        | '\u{2067}' | '\u{2068}' | '\u{202C}' | '\u{2069}' => 1,
         _ => unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1),
     }
 }