about summary refs log tree commit diff
path: root/compiler/rustc_lint/src
diff options
context:
space:
mode:
authorMatthias Krüger <476013+matthiaskrgr@users.noreply.github.com>2025-05-30 07:01:28 +0200
committerGitHub <noreply@github.com>2025-05-30 07:01:28 +0200
commit5fc3f26748d0913dfa56b19be3ccef2d7846d2d1 (patch)
tree6d6ba7cc9e8a9302b737eec769d11eac1f5be6da /compiler/rustc_lint/src
parent7aba37da44991e9046d1e09044c36da06a82b86c (diff)
parentf6520673fc28815f5beee5fbd48d4363462fb0f6 (diff)
downloadrust-5fc3f26748d0913dfa56b19be3ccef2d7846d2d1.tar.gz
rust-5fc3f26748d0913dfa56b19be3ccef2d7846d2d1.zip
Rollup merge of #141004 - matthewjasper:unicode-before-expansion, r=davidtwco
Report text_direction_codepoint_in_literal when parsing

The lint is now reported in code that gets removed/modified/duplicated by macro expansion, and spans are more accurate so we don't get ICEs from trying to split a span in the middle of a character.

This removes support for lint level attributes for `text_direction_codepoint_in_literal` except at the crate level, I don't think that there's an easy way around this when the lint can be reported on code that's removed by `cfg` or that is only in the input of a macro.

Fixes #140281
Diffstat (limited to 'compiler/rustc_lint/src')
-rw-r--r--compiler/rustc_lint/src/early/diagnostics.rs21
-rw-r--r--compiler/rustc_lint/src/hidden_unicode_codepoints.rs136
-rw-r--r--compiler/rustc_lint/src/lib.rs3
3 files changed, 21 insertions, 139 deletions
diff --git a/compiler/rustc_lint/src/early/diagnostics.rs b/compiler/rustc_lint/src/early/diagnostics.rs
index 646dc109805..71b621e8d20 100644
--- a/compiler/rustc_lint/src/early/diagnostics.rs
+++ b/compiler/rustc_lint/src/early/diagnostics.rs
@@ -187,6 +187,27 @@ pub fn decorate_builtin_lint(
                 lints::ReservedMultihash { suggestion }.decorate_lint(diag);
             }
         }
+        BuiltinLintDiag::HiddenUnicodeCodepoints {
+            label,
+            count,
+            span_label,
+            labels,
+            escape,
+            spans,
+        } => {
+            lints::HiddenUnicodeCodepointsDiag {
+                label: &label,
+                count,
+                span_label,
+                labels: labels.map(|spans| lints::HiddenUnicodeCodepointsDiagLabels { spans }),
+                sub: if escape {
+                    lints::HiddenUnicodeCodepointsDiagSub::Escape { spans }
+                } else {
+                    lints::HiddenUnicodeCodepointsDiagSub::NoEscape { spans }
+                },
+            }
+            .decorate_lint(diag);
+        }
         BuiltinLintDiag::UnusedBuiltinAttribute { attr_name, macro_name, invoc_span } => {
             lints::UnusedBuiltinAttribute { invoc_span, attr_name, macro_name }.decorate_lint(diag);
         }
diff --git a/compiler/rustc_lint/src/hidden_unicode_codepoints.rs b/compiler/rustc_lint/src/hidden_unicode_codepoints.rs
deleted file mode 100644
index 491c2826baa..00000000000
--- a/compiler/rustc_lint/src/hidden_unicode_codepoints.rs
+++ /dev/null
@@ -1,136 +0,0 @@
-use ast::util::unicode::{TEXT_FLOW_CONTROL_CHARS, contains_text_flow_control_chars};
-use rustc_ast as ast;
-use rustc_session::{declare_lint, declare_lint_pass};
-use rustc_span::{BytePos, Span, Symbol};
-
-use crate::lints::{
-    HiddenUnicodeCodepointsDiag, HiddenUnicodeCodepointsDiagLabels, HiddenUnicodeCodepointsDiagSub,
-};
-use crate::{EarlyContext, EarlyLintPass, LintContext};
-
-declare_lint! {
-    #[allow(text_direction_codepoint_in_literal)]
-    /// The `text_direction_codepoint_in_literal` lint detects Unicode codepoints that change the
-    /// visual representation of text on screen in a way that does not correspond to their on
-    /// memory representation.
-    ///
-    /// ### Explanation
-    ///
-    /// The unicode characters `\u{202A}`, `\u{202B}`, `\u{202D}`, `\u{202E}`, `\u{2066}`,
-    /// `\u{2067}`, `\u{2068}`, `\u{202C}` and `\u{2069}` make the flow of text on screen change
-    /// its direction on software that supports these codepoints. This makes the text "abc" display
-    /// as "cba" on screen. By leveraging software that supports these, people can write specially
-    /// crafted literals that make the surrounding code seem like it's performing one action, when
-    /// in reality it is performing another. Because of this, we proactively lint against their
-    /// presence to avoid surprises.
-    ///
-    /// ### Example
-    ///
-    /// ```rust,compile_fail
-    /// #![deny(text_direction_codepoint_in_literal)]
-    /// fn main() {
-    ///     println!("{:?}", '‮');
-    /// }
-    /// ```
-    ///
-    /// {{produces}}
-    ///
-    pub TEXT_DIRECTION_CODEPOINT_IN_LITERAL,
-    Deny,
-    "detect special Unicode codepoints that affect the visual representation of text on screen, \
-     changing the direction in which text flows",
-}
-
-declare_lint_pass!(HiddenUnicodeCodepoints => [TEXT_DIRECTION_CODEPOINT_IN_LITERAL]);
-
-impl HiddenUnicodeCodepoints {
-    fn lint_text_direction_codepoint(
-        &self,
-        cx: &EarlyContext<'_>,
-        text: Symbol,
-        span: Span,
-        padding: u32,
-        point_at_inner_spans: bool,
-        label: &str,
-    ) {
-        // Obtain the `Span`s for each of the forbidden chars.
-        let spans: Vec<_> = text
-            .as_str()
-            .char_indices()
-            .filter_map(|(i, c)| {
-                TEXT_FLOW_CONTROL_CHARS.contains(&c).then(|| {
-                    let lo = span.lo() + BytePos(i as u32 + padding);
-                    (c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32)))
-                })
-            })
-            .collect();
-
-        let count = spans.len();
-        let labels = point_at_inner_spans
-            .then_some(HiddenUnicodeCodepointsDiagLabels { spans: spans.clone() });
-        let sub = if point_at_inner_spans && !spans.is_empty() {
-            HiddenUnicodeCodepointsDiagSub::Escape { spans }
-        } else {
-            HiddenUnicodeCodepointsDiagSub::NoEscape { spans }
-        };
-
-        cx.emit_span_lint(
-            TEXT_DIRECTION_CODEPOINT_IN_LITERAL,
-            span,
-            HiddenUnicodeCodepointsDiag { label, count, span_label: span, labels, sub },
-        );
-    }
-
-    fn check_literal(
-        &mut self,
-        cx: &EarlyContext<'_>,
-        text: Symbol,
-        lit_kind: ast::token::LitKind,
-        span: Span,
-        label: &'static str,
-    ) {
-        if !contains_text_flow_control_chars(text.as_str()) {
-            return;
-        }
-        let (padding, point_at_inner_spans) = match lit_kind {
-            // account for `"` or `'`
-            ast::token::LitKind::Str | ast::token::LitKind::Char => (1, true),
-            // account for `c"`
-            ast::token::LitKind::CStr => (2, true),
-            // account for `r###"`
-            ast::token::LitKind::StrRaw(n) => (n as u32 + 2, true),
-            // account for `cr###"`
-            ast::token::LitKind::CStrRaw(n) => (n as u32 + 3, true),
-            // suppress bad literals.
-            ast::token::LitKind::Err(_) => return,
-            // Be conservative just in case new literals do support these.
-            _ => (0, false),
-        };
-        self.lint_text_direction_codepoint(cx, text, span, padding, point_at_inner_spans, label);
-    }
-}
-
-impl EarlyLintPass for HiddenUnicodeCodepoints {
-    fn check_attribute(&mut self, cx: &EarlyContext<'_>, attr: &ast::Attribute) {
-        if let ast::AttrKind::DocComment(_, comment) = attr.kind {
-            if contains_text_flow_control_chars(comment.as_str()) {
-                self.lint_text_direction_codepoint(cx, comment, attr.span, 0, false, "doc comment");
-            }
-        }
-    }
-
-    #[inline]
-    fn check_expr(&mut self, cx: &EarlyContext<'_>, expr: &ast::Expr) {
-        // byte strings are already handled well enough by `EscapeError::NonAsciiCharInByteString`
-        match &expr.kind {
-            ast::ExprKind::Lit(token_lit) => {
-                self.check_literal(cx, token_lit.symbol, token_lit.kind, expr.span, "literal");
-            }
-            ast::ExprKind::FormatArgs(args) => {
-                let (lit_kind, text) = args.uncooked_fmt_str;
-                self.check_literal(cx, text, lit_kind, args.span, "format string");
-            }
-            _ => {}
-        };
-    }
-}
diff --git a/compiler/rustc_lint/src/lib.rs b/compiler/rustc_lint/src/lib.rs
index ce290eab8e9..0a52e42e442 100644
--- a/compiler/rustc_lint/src/lib.rs
+++ b/compiler/rustc_lint/src/lib.rs
@@ -48,7 +48,6 @@ mod errors;
 mod expect;
 mod for_loops_over_fallibles;
 mod foreign_modules;
-pub mod hidden_unicode_codepoints;
 mod if_let_rescope;
 mod impl_trait_overcaptures;
 mod internal;
@@ -92,7 +91,6 @@ use deref_into_dyn_supertrait::*;
 use drop_forget_useless::*;
 use enum_intrinsics_non_enums::EnumIntrinsicsNonEnums;
 use for_loops_over_fallibles::*;
-use hidden_unicode_codepoints::*;
 use if_let_rescope::IfLetRescope;
 use impl_trait_overcaptures::ImplTraitOvercaptures;
 use internal::*;
@@ -177,7 +175,6 @@ early_lint_methods!(
             DeprecatedAttr: DeprecatedAttr::default(),
             WhileTrue: WhileTrue,
             NonAsciiIdents: NonAsciiIdents,
-            HiddenUnicodeCodepoints: HiddenUnicodeCodepoints,
             IncompleteInternalFeatures: IncompleteInternalFeatures,
             RedundantSemicolons: RedundantSemicolons,
             UnusedDocComment: UnusedDocComment,