about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMichael Goulet <michael@errs.io>2024-12-31 04:53:00 +0000
committerMichael Goulet <michael@errs.io>2024-12-31 04:53:00 +0000
commit54e33bbdeca62508a71c0e445f1d1c82eb0b48c3 (patch)
treecf4fd07d91400f184815ecdc4ea1c17ebca38130
parent4e5fec2f1ea4b1cfecaa14304c9f56de59b344cb (diff)
downloadrust-54e33bbdeca62508a71c0e445f1d1c82eb0b48c3.tar.gz
rust-54e33bbdeca62508a71c0e445f1d1c82eb0b48c3.zip
Account for C string literals in HiddenUnicodeCodepoints lint
-rw-r--r--compiler/rustc_lint/src/hidden_unicode_codepoints.rs24
-rw-r--r--tests/ui/parser/unicode-control-codepoints.rs7
-rw-r--r--tests/ui/parser/unicode-control-codepoints.stderr68
3 files changed, 76 insertions, 23 deletions
diff --git a/compiler/rustc_lint/src/hidden_unicode_codepoints.rs b/compiler/rustc_lint/src/hidden_unicode_codepoints.rs
index 28368e1ab46..4a7e4bf75cf 100644
--- a/compiler/rustc_lint/src/hidden_unicode_codepoints.rs
+++ b/compiler/rustc_lint/src/hidden_unicode_codepoints.rs
@@ -101,14 +101,28 @@ impl EarlyLintPass for HiddenUnicodeCodepoints {
                 if !contains_text_flow_control_chars(text.as_str()) {
                     return;
                 }
-                let padding = match token_lit.kind {
+                let (padding, point_at_inner_spans) = match token_lit.kind {
                     // account for `"` or `'`
-                    ast::token::LitKind::Str | ast::token::LitKind::Char => 1,
+                    ast::token::LitKind::Str | ast::token::LitKind::Char => (1, true),
+                    // account for `c"`
+                    ast::token::LitKind::CStr => (2, true),
                     // account for `r###"`
-                    ast::token::LitKind::StrRaw(n) => n as u32 + 2,
-                    _ => return,
+                    ast::token::LitKind::StrRaw(n) => (n as u32 + 2, true),
+                    // account for `cr###"`
+                    ast::token::LitKind::CStrRaw(n) => (n as u32 + 3, true),
+                    // suppress bad literals.
+                    ast::token::LitKind::Err(_) => return,
+                    // Be conservative just in case new literals do support these.
+                    _ => (0, false),
                 };
-                self.lint_text_direction_codepoint(cx, text, expr.span, padding, true, "literal");
+                self.lint_text_direction_codepoint(
+                    cx,
+                    text,
+                    expr.span,
+                    padding,
+                    point_at_inner_spans,
+                    "literal",
+                );
             }
             _ => {}
         };
diff --git a/tests/ui/parser/unicode-control-codepoints.rs b/tests/ui/parser/unicode-control-codepoints.rs
index df099bb62ad..c2b9a9911ac 100644
--- a/tests/ui/parser/unicode-control-codepoints.rs
+++ b/tests/ui/parser/unicode-control-codepoints.rs
@@ -1,3 +1,5 @@
+//@ edition: 2021
+
 fn main() {
     // if access_level != "us‫e‪r" { // Check if admin
     //~^ ERROR unicode codepoint changing visible direction of text present in comment
@@ -25,6 +27,11 @@ fn main() {
     //~| ERROR non-ASCII character in raw byte string literal
     println!("{:?}", '‮');
     //~^ ERROR unicode codepoint changing visible direction of text present in literal
+
+    let _ = c"‮";
+    //~^ ERROR unicode codepoint changing visible direction of text present in literal
+    let _ = cr#"‮"#;
+    //~^ ERROR unicode codepoint changing visible direction of text present in literal
 }
 
 //"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only */"
diff --git a/tests/ui/parser/unicode-control-codepoints.stderr b/tests/ui/parser/unicode-control-codepoints.stderr
index 28de4ae72ab..fa75df6a443 100644
--- a/tests/ui/parser/unicode-control-codepoints.stderr
+++ b/tests/ui/parser/unicode-control-codepoints.stderr
@@ -1,5 +1,5 @@
 error: unicode escape in byte string
-  --> $DIR/unicode-control-codepoints.rs:6:26
+  --> $DIR/unicode-control-codepoints.rs:8:26
    |
 LL |     println!("{:?}", b"us\u{202B}e\u{202A}r");
    |                          ^^^^^^^^ unicode escape in byte string
@@ -7,7 +7,7 @@ LL |     println!("{:?}", b"us\u{202B}e\u{202A}r");
    = help: unicode escape sequences cannot be used as a byte or in a byte string
 
 error: unicode escape in byte string
-  --> $DIR/unicode-control-codepoints.rs:6:35
+  --> $DIR/unicode-control-codepoints.rs:8:35
    |
 LL |     println!("{:?}", b"us\u{202B}e\u{202A}r");
    |                                   ^^^^^^^^ unicode escape in byte string
@@ -15,7 +15,7 @@ LL |     println!("{:?}", b"us\u{202B}e\u{202A}r");
    = help: unicode escape sequences cannot be used as a byte or in a byte string
 
 error: non-ASCII character in byte string literal
-  --> $DIR/unicode-control-codepoints.rs:16:26
+  --> $DIR/unicode-control-codepoints.rs:18:26
    |
 LL |     println!("{:?}", b"/*� } �if isAdmin� � begin admins only ");
    |                          ^ must be ASCII but is '\u{202e}'
@@ -26,7 +26,7 @@ LL |     println!("{:?}", b"/*\xE2\x80\xAE } �if isAdmin� � begin admins o
    |                          ~~~~~~~~~~~~
 
 error: non-ASCII character in byte string literal
-  --> $DIR/unicode-control-codepoints.rs:16:30
+  --> $DIR/unicode-control-codepoints.rs:18:30
    |
 LL |     println!("{:?}", b"/*� } �if isAdmin� � begin admins only ");
    |                              ^ must be ASCII but is '\u{2066}'
@@ -37,7 +37,7 @@ LL |     println!("{:?}", b"/*� } \xE2\x81\xA6if isAdmin� � begin admins o
    |                              ~~~~~~~~~~~~
 
 error: non-ASCII character in byte string literal
-  --> $DIR/unicode-control-codepoints.rs:16:41
+  --> $DIR/unicode-control-codepoints.rs:18:41
    |
 LL |     println!("{:?}", b"/*� } �if isAdmin� � begin admins only ");
    |                                         ^ must be ASCII but is '\u{2069}'
@@ -48,7 +48,7 @@ LL |     println!("{:?}", b"/*� } �if isAdmin\xE2\x81\xA9 � begin admins o
    |                                         ~~~~~~~~~~~~
 
 error: non-ASCII character in byte string literal
-  --> $DIR/unicode-control-codepoints.rs:16:43
+  --> $DIR/unicode-control-codepoints.rs:18:43
    |
 LL |     println!("{:?}", b"/*� } �if isAdmin� � begin admins only ");
    |                                           ^ must be ASCII but is '\u{2066}'
@@ -59,31 +59,31 @@ LL |     println!("{:?}", b"/*� } �if isAdmin� \xE2\x81\xA6 begin admins o
    |                                           ~~~~~~~~~~~~
 
 error: non-ASCII character in raw byte string literal
-  --> $DIR/unicode-control-codepoints.rs:21:29
+  --> $DIR/unicode-control-codepoints.rs:23:29
    |
 LL |     println!("{:?}", br##"/*� } �if isAdmin� � begin admins only "##);
    |                             ^ must be ASCII but is '\u{202e}'
 
 error: non-ASCII character in raw byte string literal
-  --> $DIR/unicode-control-codepoints.rs:21:33
+  --> $DIR/unicode-control-codepoints.rs:23:33
    |
 LL |     println!("{:?}", br##"/*� } �if isAdmin� � begin admins only "##);
    |                                 ^ must be ASCII but is '\u{2066}'
 
 error: non-ASCII character in raw byte string literal
-  --> $DIR/unicode-control-codepoints.rs:21:44
+  --> $DIR/unicode-control-codepoints.rs:23:44
    |
 LL |     println!("{:?}", br##"/*� } �if isAdmin� � begin admins only "##);
    |                                            ^ must be ASCII but is '\u{2069}'
 
 error: non-ASCII character in raw byte string literal
-  --> $DIR/unicode-control-codepoints.rs:21:46
+  --> $DIR/unicode-control-codepoints.rs:23:46
    |
 LL |     println!("{:?}", br##"/*� } �if isAdmin� � begin admins only "##);
    |                                              ^ must be ASCII but is '\u{2066}'
 
 error: unicode codepoint changing visible direction of text present in comment
-  --> $DIR/unicode-control-codepoints.rs:2:5
+  --> $DIR/unicode-control-codepoints.rs:4:5
    |
 LL |     // if access_level != "us�e�r" { // Check if admin
    |     ^^^^^^^^^^^^^^^^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^
@@ -97,7 +97,7 @@ LL |     // if access_level != "us�e�r" { // Check if admin
    = help: if their presence wasn't intentional, you can remove them
 
 error: unicode codepoint changing visible direction of text present in comment
-  --> $DIR/unicode-control-codepoints.rs:30:1
+  --> $DIR/unicode-control-codepoints.rs:37:1
    |
 LL | //"/*� } �if isAdmin� � begin admins only */"
    | ^^^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^
@@ -112,7 +112,7 @@ LL | //"/*� } �if isAdmin� � begin admins only */"
    = help: if their presence wasn't intentional, you can remove them
 
 error: unicode codepoint changing visible direction of text present in literal
-  --> $DIR/unicode-control-codepoints.rs:11:22
+  --> $DIR/unicode-control-codepoints.rs:13:22
    |
 LL |     println!("{:?}", "/*� } �if isAdmin� � begin admins only ");
    |                      ^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^
@@ -132,7 +132,7 @@ LL |     println!("{:?}", "/*\u{202e} } \u{2066}if isAdmin\u{2069} \u{2066} begi
    |                         ~~~~~~~~   ~~~~~~~~          ~~~~~~~~ ~~~~~~~~
 
 error: unicode codepoint changing visible direction of text present in literal
-  --> $DIR/unicode-control-codepoints.rs:14:22
+  --> $DIR/unicode-control-codepoints.rs:16:22
    |
 LL |     println!("{:?}", r##"/*� } �if isAdmin� � begin admins only "##);
    |                      ^^^^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^
@@ -151,7 +151,7 @@ LL |     println!("{:?}", r##"/*\u{202e} } \u{2066}if isAdmin\u{2069} \u{2066} b
    |                            ~~~~~~~~   ~~~~~~~~          ~~~~~~~~ ~~~~~~~~
 
 error: unicode codepoint changing visible direction of text present in literal
-  --> $DIR/unicode-control-codepoints.rs:26:22
+  --> $DIR/unicode-control-codepoints.rs:28:22
    |
 LL |     println!("{:?}", '�');
    |                      ^-^
@@ -166,8 +166,40 @@ help: if you want to keep them but make them visible in your source code, you ca
 LL |     println!("{:?}", '\u{202e}');
    |                       ~~~~~~~~
 
+error: unicode codepoint changing visible direction of text present in literal
+  --> $DIR/unicode-control-codepoints.rs:31:13
+   |
+LL |     let _ = c"�";
+   |             ^^-^
+   |             | |
+   |             | '\u{202e}'
+   |             this literal contains an invisible unicode text flow control codepoint
+   |
+   = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen
+   = help: if their presence wasn't intentional, you can remove them
+help: if you want to keep them but make them visible in your source code, you can escape them
+   |
+LL |     let _ = c"\u{202e}";
+   |               ~~~~~~~~
+
+error: unicode codepoint changing visible direction of text present in literal
+  --> $DIR/unicode-control-codepoints.rs:33:13
+   |
+LL |     let _ = cr#"�"#;
+   |             ^^^^-^^
+   |             |   |
+   |             |   '\u{202e}'
+   |             this literal contains an invisible unicode text flow control codepoint
+   |
+   = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen
+   = help: if their presence wasn't intentional, you can remove them
+help: if you want to keep them but make them visible in your source code, you can escape them
+   |
+LL |     let _ = cr#"\u{202e}"#;
+   |                 ~~~~~~~~
+
 error: unicode codepoint changing visible direction of text present in doc comment
-  --> $DIR/unicode-control-codepoints.rs:33:1
+  --> $DIR/unicode-control-codepoints.rs:40:1
    |
 LL | /**  '�'); */fn foo() {}
    | ^^^^^^^^^^^^^ this doc comment contains an invisible unicode text flow control codepoint
@@ -177,7 +209,7 @@ LL | /**  '�'); */fn foo() {}
    = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}'
 
 error: unicode codepoint changing visible direction of text present in doc comment
-  --> $DIR/unicode-control-codepoints.rs:36:1
+  --> $DIR/unicode-control-codepoints.rs:43:1
    |
 LL | / /**
 LL | |  *
@@ -188,5 +220,5 @@ LL | |  *  '�'); */fn bar() {}
    = note: if their presence wasn't intentional, you can remove them
    = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}'
 
-error: aborting due to 17 previous errors
+error: aborting due to 19 previous errors