about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMichael Goulet <michael@errs.io>2024-12-31 05:03:22 +0000
committerMichael Goulet <michael@errs.io>2024-12-31 05:03:22 +0000
commitea291e5b5f5c2562fec89a11444e0dc4388565cf (patch)
tree4005485bc538e6c7d4d9d0623fbd5190e567a941
parentc6afe82b8a3255145ba0eeeb49f8c590e38f38e2 (diff)
downloadrust-ea291e5b5f5c2562fec89a11444e0dc4388565cf.tar.gz
rust-ea291e5b5f5c2562fec89a11444e0dc4388565cf.zip
Account for format_args in HiddenUnicodeCodepoints lint
-rw-r--r--compiler/rustc_ast/src/format.rs5
-rw-r--r--compiler/rustc_ast/src/mut_visit.rs2
-rw-r--r--compiler/rustc_ast/src/visit.rs2
-rw-r--r--compiler/rustc_builtin_macros/src/asm.rs1
-rw-r--r--compiler/rustc_builtin_macros/src/format.rs14
-rw-r--r--compiler/rustc_builtin_macros/src/util.rs5
-rw-r--r--compiler/rustc_lint/src/hidden_unicode_codepoints.rs60
-rw-r--r--tests/ui/parser/unicode-control-codepoints.rs3
-rw-r--r--tests/ui/parser/unicode-control-codepoints.stderr24
9 files changed, 82 insertions, 34 deletions
diff --git a/compiler/rustc_ast/src/format.rs b/compiler/rustc_ast/src/format.rs
index de628f09853..b93846c1fe6 100644
--- a/compiler/rustc_ast/src/format.rs
+++ b/compiler/rustc_ast/src/format.rs
@@ -4,6 +4,7 @@ use rustc_span::{Ident, Span, Symbol};
 
 use crate::Expr;
 use crate::ptr::P;
+use crate::token::LitKind;
 
 // Definitions:
 //
@@ -45,6 +46,10 @@ pub struct FormatArgs {
     pub span: Span,
     pub template: Vec<FormatArgsPiece>,
     pub arguments: FormatArguments,
+    /// The raw, un-split format string literal, with no escaping or processing.
+    ///
+    /// Generally only useful for lints that care about the raw bytes the user wrote.
+    pub uncooked_fmt_str: (LitKind, Symbol),
 }
 
 /// A piece of a format template string.
diff --git a/compiler/rustc_ast/src/mut_visit.rs b/compiler/rustc_ast/src/mut_visit.rs
index 995924c2a29..04cdfc93dcb 100644
--- a/compiler/rustc_ast/src/mut_visit.rs
+++ b/compiler/rustc_ast/src/mut_visit.rs
@@ -1596,7 +1596,7 @@ fn walk_inline_asm_sym<T: MutVisitor>(
 
 fn walk_format_args<T: MutVisitor>(vis: &mut T, fmt: &mut FormatArgs) {
     // FIXME: visit the template exhaustively.
-    let FormatArgs { span, template: _, arguments } = fmt;
+    let FormatArgs { span, template: _, arguments, uncooked_fmt_str: _ } = fmt;
     for FormatArgument { kind, expr } in arguments.all_args_mut() {
         match kind {
             FormatArgumentKind::Named(ident) | FormatArgumentKind::Captured(ident) => {
diff --git a/compiler/rustc_ast/src/visit.rs b/compiler/rustc_ast/src/visit.rs
index c7cc772dabb..e99fc7b604e 100644
--- a/compiler/rustc_ast/src/visit.rs
+++ b/compiler/rustc_ast/src/visit.rs
@@ -1061,7 +1061,7 @@ pub fn walk_inline_asm_sym<'a, V: Visitor<'a>>(
 }
 
 pub fn walk_format_args<'a, V: Visitor<'a>>(visitor: &mut V, fmt: &'a FormatArgs) -> V::Result {
-    let FormatArgs { span: _, template: _, arguments } = fmt;
+    let FormatArgs { span: _, template: _, arguments, uncooked_fmt_str: _ } = fmt;
     for FormatArgument { kind, expr } in arguments.all_args() {
         match kind {
             FormatArgumentKind::Named(ident) | FormatArgumentKind::Captured(ident) => {
diff --git a/compiler/rustc_builtin_macros/src/asm.rs b/compiler/rustc_builtin_macros/src/asm.rs
index 238cc14ff0b..5062cf55bb9 100644
--- a/compiler/rustc_builtin_macros/src/asm.rs
+++ b/compiler/rustc_builtin_macros/src/asm.rs
@@ -531,6 +531,7 @@ fn expand_preparsed_asm(
             symbol: template_str,
             style: template_style,
             span: template_span,
+            ..
         } = {
             let ExpandResult::Ready(mac) = expr_to_spanned_string(ecx, template_expr, msg) else {
                 return ExpandResult::Retry(());
diff --git a/compiler/rustc_builtin_macros/src/format.rs b/compiler/rustc_builtin_macros/src/format.rs
index 5b3f08948a9..0112499c509 100644
--- a/compiler/rustc_builtin_macros/src/format.rs
+++ b/compiler/rustc_builtin_macros/src/format.rs
@@ -166,7 +166,12 @@ fn make_format_args(
 
     let MacroInput { fmtstr: efmt, mut args, is_direct_literal } = input;
 
-    let ExprToSpannedString { symbol: fmt_str, span: fmt_span, style: fmt_style } = {
+    let ExprToSpannedString {
+        symbol: fmt_str,
+        span: fmt_span,
+        style: fmt_style,
+        uncooked_symbol: uncooked_fmt_str,
+    } = {
         let ExpandResult::Ready(mac) = expr_to_spanned_string(ecx, efmt.clone(), msg) else {
             return ExpandResult::Retry(());
         };
@@ -584,7 +589,12 @@ fn make_format_args(
         }
     }
 
-    ExpandResult::Ready(Ok(FormatArgs { span: fmt_span, template, arguments: args }))
+    ExpandResult::Ready(Ok(FormatArgs {
+        span: fmt_span,
+        template,
+        arguments: args,
+        uncooked_fmt_str,
+    }))
 }
 
 fn invalid_placeholder_type_error(
diff --git a/compiler/rustc_builtin_macros/src/util.rs b/compiler/rustc_builtin_macros/src/util.rs
index 9162e94eddb..38fec2bff14 100644
--- a/compiler/rustc_builtin_macros/src/util.rs
+++ b/compiler/rustc_builtin_macros/src/util.rs
@@ -63,6 +63,10 @@ pub(crate) struct ExprToSpannedString {
     pub symbol: Symbol,
     pub style: ast::StrStyle,
     pub span: Span,
+    /// The raw string literal, with no escaping or processing.
+    ///
+    /// Generally only useful for lints that care about the raw bytes the user wrote.
+    pub uncooked_symbol: (ast::token::LitKind, Symbol),
 }
 
 /// - `Ok` is returned when the conversion to a string literal is unsuccessful,
@@ -100,6 +104,7 @@ pub(crate) fn expr_to_spanned_string<'a>(
                     symbol: s,
                     style,
                     span: expr.span,
+                    uncooked_symbol: (token_lit.kind, token_lit.symbol),
                 }));
             }
             Ok(ast::LitKind::ByteStr(..)) => {
diff --git a/compiler/rustc_lint/src/hidden_unicode_codepoints.rs b/compiler/rustc_lint/src/hidden_unicode_codepoints.rs
index 4a7e4bf75cf..406aa1005df 100644
--- a/compiler/rustc_lint/src/hidden_unicode_codepoints.rs
+++ b/compiler/rustc_lint/src/hidden_unicode_codepoints.rs
@@ -82,7 +82,36 @@ impl HiddenUnicodeCodepoints {
             sub,
         });
     }
+
+    fn check_literal(
+        &mut self,
+        cx: &EarlyContext<'_>,
+        text: Symbol,
+        lit_kind: ast::token::LitKind,
+        span: Span,
+        label: &'static str,
+    ) {
+        if !contains_text_flow_control_chars(text.as_str()) {
+            return;
+        }
+        let (padding, point_at_inner_spans) = match lit_kind {
+            // account for `"` or `'`
+            ast::token::LitKind::Str | ast::token::LitKind::Char => (1, true),
+            // account for `c"`
+            ast::token::LitKind::CStr => (2, true),
+            // account for `r###"`
+            ast::token::LitKind::StrRaw(n) => (n as u32 + 2, true),
+            // account for `cr###"`
+            ast::token::LitKind::CStrRaw(n) => (n as u32 + 3, true),
+            // suppress bad literals.
+            ast::token::LitKind::Err(_) => return,
+            // Be conservative just in case new literals do support these.
+            _ => (0, false),
+        };
+        self.lint_text_direction_codepoint(cx, text, span, padding, point_at_inner_spans, label);
+    }
 }
+
 impl EarlyLintPass for HiddenUnicodeCodepoints {
     fn check_attribute(&mut self, cx: &EarlyContext<'_>, attr: &ast::Attribute) {
         if let ast::AttrKind::DocComment(_, comment) = attr.kind {
@@ -97,32 +126,11 @@ impl EarlyLintPass for HiddenUnicodeCodepoints {
         // byte strings are already handled well enough by `EscapeError::NonAsciiCharInByteString`
         match &expr.kind {
             ast::ExprKind::Lit(token_lit) => {
-                let text = token_lit.symbol;
-                if !contains_text_flow_control_chars(text.as_str()) {
-                    return;
-                }
-                let (padding, point_at_inner_spans) = match token_lit.kind {
-                    // account for `"` or `'`
-                    ast::token::LitKind::Str | ast::token::LitKind::Char => (1, true),
-                    // account for `c"`
-                    ast::token::LitKind::CStr => (2, true),
-                    // account for `r###"`
-                    ast::token::LitKind::StrRaw(n) => (n as u32 + 2, true),
-                    // account for `cr###"`
-                    ast::token::LitKind::CStrRaw(n) => (n as u32 + 3, true),
-                    // suppress bad literals.
-                    ast::token::LitKind::Err(_) => return,
-                    // Be conservative just in case new literals do support these.
-                    _ => (0, false),
-                };
-                self.lint_text_direction_codepoint(
-                    cx,
-                    text,
-                    expr.span,
-                    padding,
-                    point_at_inner_spans,
-                    "literal",
-                );
+                self.check_literal(cx, token_lit.symbol, token_lit.kind, expr.span, "literal");
+            }
+            ast::ExprKind::FormatArgs(args) => {
+                let (lit_kind, text) = args.uncooked_fmt_str;
+                self.check_literal(cx, text, lit_kind, args.span, "format string");
             }
             _ => {}
         };
diff --git a/tests/ui/parser/unicode-control-codepoints.rs b/tests/ui/parser/unicode-control-codepoints.rs
index c2b9a9911ac..14e1cfe59d3 100644
--- a/tests/ui/parser/unicode-control-codepoints.rs
+++ b/tests/ui/parser/unicode-control-codepoints.rs
@@ -32,6 +32,9 @@ fn main() {
     //~^ ERROR unicode codepoint changing visible direction of text present in literal
     let _ = cr#"‮"#;
     //~^ ERROR unicode codepoint changing visible direction of text present in literal
+
+    println!("{{‮}}");
+    //~^ ERROR unicode codepoint changing visible direction of text present in format string
 }
 
 //"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only */"
diff --git a/tests/ui/parser/unicode-control-codepoints.stderr b/tests/ui/parser/unicode-control-codepoints.stderr
index fa75df6a443..2893194308e 100644
--- a/tests/ui/parser/unicode-control-codepoints.stderr
+++ b/tests/ui/parser/unicode-control-codepoints.stderr
@@ -97,7 +97,7 @@ LL |     // if access_level != "us�e�r" { // Check if admin
    = help: if their presence wasn't intentional, you can remove them
 
 error: unicode codepoint changing visible direction of text present in comment
-  --> $DIR/unicode-control-codepoints.rs:37:1
+  --> $DIR/unicode-control-codepoints.rs:40:1
    |
 LL | //"/*� } �if isAdmin� � begin admins only */"
    | ^^^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^
@@ -198,8 +198,24 @@ help: if you want to keep them but make them visible in your source code, you ca
 LL |     let _ = cr#"\u{202e}"#;
    |                 ~~~~~~~~
 
+error: unicode codepoint changing visible direction of text present in format string
+  --> $DIR/unicode-control-codepoints.rs:36:14
+   |
+LL |     println!("{{�}}");
+   |              ^^^-^^^
+   |              |  |
+   |              |  '\u{202e}'
+   |              this format string contains an invisible unicode text flow control codepoint
+   |
+   = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen
+   = help: if their presence wasn't intentional, you can remove them
+help: if you want to keep them but make them visible in your source code, you can escape them
+   |
+LL |     println!("{{\u{202e}}}");
+   |                 ~~~~~~~~
+
 error: unicode codepoint changing visible direction of text present in doc comment
-  --> $DIR/unicode-control-codepoints.rs:40:1
+  --> $DIR/unicode-control-codepoints.rs:43:1
    |
 LL | /**  '�'); */fn foo() {}
    | ^^^^^^^^^^^^^ this doc comment contains an invisible unicode text flow control codepoint
@@ -209,7 +225,7 @@ LL | /**  '�'); */fn foo() {}
    = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}'
 
 error: unicode codepoint changing visible direction of text present in doc comment
-  --> $DIR/unicode-control-codepoints.rs:43:1
+  --> $DIR/unicode-control-codepoints.rs:46:1
    |
 LL | / /**
 LL | |  *
@@ -220,5 +236,5 @@ LL | |  *  '�'); */fn bar() {}
    = note: if their presence wasn't intentional, you can remove them
    = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}'
 
-error: aborting due to 19 previous errors
+error: aborting due to 20 previous errors