about summary refs log tree commit diff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/rustc_lint/src/early/diagnostics.rs21
-rw-r--r--compiler/rustc_lint/src/hidden_unicode_codepoints.rs136
-rw-r--r--compiler/rustc_lint/src/lib.rs3
-rw-r--r--compiler/rustc_lint_defs/src/builtin.rs42
-rw-r--r--compiler/rustc_lint_defs/src/lib.rs8
-rw-r--r--compiler/rustc_parse/src/lexer/mod.rs89
6 files changed, 155 insertions, 144 deletions
diff --git a/compiler/rustc_lint/src/early/diagnostics.rs b/compiler/rustc_lint/src/early/diagnostics.rs
index 646dc109805..71b621e8d20 100644
--- a/compiler/rustc_lint/src/early/diagnostics.rs
+++ b/compiler/rustc_lint/src/early/diagnostics.rs
@@ -187,6 +187,27 @@ pub fn decorate_builtin_lint(
                 lints::ReservedMultihash { suggestion }.decorate_lint(diag);
             }
         }
+        BuiltinLintDiag::HiddenUnicodeCodepoints {
+            label,
+            count,
+            span_label,
+            labels,
+            escape,
+            spans,
+        } => {
+            lints::HiddenUnicodeCodepointsDiag {
+                label: &label,
+                count,
+                span_label,
+                labels: labels.map(|spans| lints::HiddenUnicodeCodepointsDiagLabels { spans }),
+                sub: if escape {
+                    lints::HiddenUnicodeCodepointsDiagSub::Escape { spans }
+                } else {
+                    lints::HiddenUnicodeCodepointsDiagSub::NoEscape { spans }
+                },
+            }
+            .decorate_lint(diag);
+        }
         BuiltinLintDiag::UnusedBuiltinAttribute { attr_name, macro_name, invoc_span } => {
             lints::UnusedBuiltinAttribute { invoc_span, attr_name, macro_name }.decorate_lint(diag);
         }
diff --git a/compiler/rustc_lint/src/hidden_unicode_codepoints.rs b/compiler/rustc_lint/src/hidden_unicode_codepoints.rs
deleted file mode 100644
index 491c2826baa..00000000000
--- a/compiler/rustc_lint/src/hidden_unicode_codepoints.rs
+++ /dev/null
@@ -1,136 +0,0 @@
-use ast::util::unicode::{TEXT_FLOW_CONTROL_CHARS, contains_text_flow_control_chars};
-use rustc_ast as ast;
-use rustc_session::{declare_lint, declare_lint_pass};
-use rustc_span::{BytePos, Span, Symbol};
-
-use crate::lints::{
-    HiddenUnicodeCodepointsDiag, HiddenUnicodeCodepointsDiagLabels, HiddenUnicodeCodepointsDiagSub,
-};
-use crate::{EarlyContext, EarlyLintPass, LintContext};
-
-declare_lint! {
-    #[allow(text_direction_codepoint_in_literal)]
-    /// The `text_direction_codepoint_in_literal` lint detects Unicode codepoints that change the
-    /// visual representation of text on screen in a way that does not correspond to their on
-    /// memory representation.
-    ///
-    /// ### Explanation
-    ///
-    /// The unicode characters `\u{202A}`, `\u{202B}`, `\u{202D}`, `\u{202E}`, `\u{2066}`,
-    /// `\u{2067}`, `\u{2068}`, `\u{202C}` and `\u{2069}` make the flow of text on screen change
-    /// its direction on software that supports these codepoints. This makes the text "abc" display
-    /// as "cba" on screen. By leveraging software that supports these, people can write specially
-    /// crafted literals that make the surrounding code seem like it's performing one action, when
-    /// in reality it is performing another. Because of this, we proactively lint against their
-    /// presence to avoid surprises.
-    ///
-    /// ### Example
-    ///
-    /// ```rust,compile_fail
-    /// #![deny(text_direction_codepoint_in_literal)]
-    /// fn main() {
-    ///     println!("{:?}", '‮');
-    /// }
-    /// ```
-    ///
-    /// {{produces}}
-    ///
-    pub TEXT_DIRECTION_CODEPOINT_IN_LITERAL,
-    Deny,
-    "detect special Unicode codepoints that affect the visual representation of text on screen, \
-     changing the direction in which text flows",
-}
-
-declare_lint_pass!(HiddenUnicodeCodepoints => [TEXT_DIRECTION_CODEPOINT_IN_LITERAL]);
-
-impl HiddenUnicodeCodepoints {
-    fn lint_text_direction_codepoint(
-        &self,
-        cx: &EarlyContext<'_>,
-        text: Symbol,
-        span: Span,
-        padding: u32,
-        point_at_inner_spans: bool,
-        label: &str,
-    ) {
-        // Obtain the `Span`s for each of the forbidden chars.
-        let spans: Vec<_> = text
-            .as_str()
-            .char_indices()
-            .filter_map(|(i, c)| {
-                TEXT_FLOW_CONTROL_CHARS.contains(&c).then(|| {
-                    let lo = span.lo() + BytePos(i as u32 + padding);
-                    (c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32)))
-                })
-            })
-            .collect();
-
-        let count = spans.len();
-        let labels = point_at_inner_spans
-            .then_some(HiddenUnicodeCodepointsDiagLabels { spans: spans.clone() });
-        let sub = if point_at_inner_spans && !spans.is_empty() {
-            HiddenUnicodeCodepointsDiagSub::Escape { spans }
-        } else {
-            HiddenUnicodeCodepointsDiagSub::NoEscape { spans }
-        };
-
-        cx.emit_span_lint(
-            TEXT_DIRECTION_CODEPOINT_IN_LITERAL,
-            span,
-            HiddenUnicodeCodepointsDiag { label, count, span_label: span, labels, sub },
-        );
-    }
-
-    fn check_literal(
-        &mut self,
-        cx: &EarlyContext<'_>,
-        text: Symbol,
-        lit_kind: ast::token::LitKind,
-        span: Span,
-        label: &'static str,
-    ) {
-        if !contains_text_flow_control_chars(text.as_str()) {
-            return;
-        }
-        let (padding, point_at_inner_spans) = match lit_kind {
-            // account for `"` or `'`
-            ast::token::LitKind::Str | ast::token::LitKind::Char => (1, true),
-            // account for `c"`
-            ast::token::LitKind::CStr => (2, true),
-            // account for `r###"`
-            ast::token::LitKind::StrRaw(n) => (n as u32 + 2, true),
-            // account for `cr###"`
-            ast::token::LitKind::CStrRaw(n) => (n as u32 + 3, true),
-            // suppress bad literals.
-            ast::token::LitKind::Err(_) => return,
-            // Be conservative just in case new literals do support these.
-            _ => (0, false),
-        };
-        self.lint_text_direction_codepoint(cx, text, span, padding, point_at_inner_spans, label);
-    }
-}
-
-impl EarlyLintPass for HiddenUnicodeCodepoints {
-    fn check_attribute(&mut self, cx: &EarlyContext<'_>, attr: &ast::Attribute) {
-        if let ast::AttrKind::DocComment(_, comment) = attr.kind {
-            if contains_text_flow_control_chars(comment.as_str()) {
-                self.lint_text_direction_codepoint(cx, comment, attr.span, 0, false, "doc comment");
-            }
-        }
-    }
-
-    #[inline]
-    fn check_expr(&mut self, cx: &EarlyContext<'_>, expr: &ast::Expr) {
-        // byte strings are already handled well enough by `EscapeError::NonAsciiCharInByteString`
-        match &expr.kind {
-            ast::ExprKind::Lit(token_lit) => {
-                self.check_literal(cx, token_lit.symbol, token_lit.kind, expr.span, "literal");
-            }
-            ast::ExprKind::FormatArgs(args) => {
-                let (lit_kind, text) = args.uncooked_fmt_str;
-                self.check_literal(cx, text, lit_kind, args.span, "format string");
-            }
-            _ => {}
-        };
-    }
-}
diff --git a/compiler/rustc_lint/src/lib.rs b/compiler/rustc_lint/src/lib.rs
index ce290eab8e9..0a52e42e442 100644
--- a/compiler/rustc_lint/src/lib.rs
+++ b/compiler/rustc_lint/src/lib.rs
@@ -48,7 +48,6 @@ mod errors;
 mod expect;
 mod for_loops_over_fallibles;
 mod foreign_modules;
-pub mod hidden_unicode_codepoints;
 mod if_let_rescope;
 mod impl_trait_overcaptures;
 mod internal;
@@ -92,7 +91,6 @@ use deref_into_dyn_supertrait::*;
 use drop_forget_useless::*;
 use enum_intrinsics_non_enums::EnumIntrinsicsNonEnums;
 use for_loops_over_fallibles::*;
-use hidden_unicode_codepoints::*;
 use if_let_rescope::IfLetRescope;
 use impl_trait_overcaptures::ImplTraitOvercaptures;
 use internal::*;
@@ -177,7 +175,6 @@ early_lint_methods!(
             DeprecatedAttr: DeprecatedAttr::default(),
             WhileTrue: WhileTrue,
             NonAsciiIdents: NonAsciiIdents,
-            HiddenUnicodeCodepoints: HiddenUnicodeCodepoints,
             IncompleteInternalFeatures: IncompleteInternalFeatures,
             RedundantSemicolons: RedundantSemicolons,
             UnusedDocComment: UnusedDocComment,
diff --git a/compiler/rustc_lint_defs/src/builtin.rs b/compiler/rustc_lint_defs/src/builtin.rs
index abf4840a026..7c7ba85d484 100644
--- a/compiler/rustc_lint_defs/src/builtin.rs
+++ b/compiler/rustc_lint_defs/src/builtin.rs
@@ -103,6 +103,7 @@ declare_lint_pass! {
         TAIL_EXPR_DROP_ORDER,
         TEST_UNSTABLE_LINT,
         TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
+        TEXT_DIRECTION_CODEPOINT_IN_LITERAL,
         TRIVIAL_CASTS,
         TRIVIAL_NUMERIC_CASTS,
         TYVAR_BEHIND_RAW_POINTER,
@@ -3782,7 +3783,6 @@ declare_lint! {
 }
 
 declare_lint! {
-    #[allow(text_direction_codepoint_in_literal)]
     /// The `text_direction_codepoint_in_comment` lint detects Unicode codepoints in comments that
     /// change the visual representation of text on screen in a way that does not correspond to
     /// their on memory representation.
@@ -3792,7 +3792,7 @@ declare_lint! {
     /// ```rust,compile_fail
     /// #![deny(text_direction_codepoint_in_comment)]
     /// fn main() {
-    ///     println!("{:?}"); // '‮');
+    #[doc = "    println!(\"{:?}\"); // '\u{202E}');"]
     /// }
     /// ```
     ///
@@ -3807,7 +3807,43 @@ declare_lint! {
     /// their use.
     pub TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
     Deny,
-    "invisible directionality-changing codepoints in comment"
+    "invisible directionality-changing codepoints in comment",
+    crate_level_only
+}
+
+declare_lint! {
+    /// The `text_direction_codepoint_in_literal` lint detects Unicode codepoints that change the
+    /// visual representation of text on screen in a way that does not correspond to their on
+    /// memory representation.
+    ///
+    /// ### Explanation
+    ///
+    /// The unicode characters `\u{202A}`, `\u{202B}`, `\u{202D}`, `\u{202E}`, `\u{2066}`,
+    /// `\u{2067}`, `\u{2068}`, `\u{202C}` and `\u{2069}` make the flow of text on screen change
+    /// its direction on software that supports these codepoints. This makes the text "abc" display
+    /// as "cba" on screen. By leveraging software that supports these, people can write specially
+    /// crafted literals that make the surrounding code seem like it's performing one action, when
+    /// in reality it is performing another. Because of this, we proactively lint against their
+    /// presence to avoid surprises.
+    ///
+    /// ### Example
+    ///
+    /// ```rust,compile_fail
+    /// #![deny(text_direction_codepoint_in_literal)]
+    /// fn main() {
+    // ` - convince tidy that backticks match
+    #[doc = "    println!(\"{:?}\", '\u{202E}');"]
+    // `
+    /// }
+    /// ```
+    ///
+    /// {{produces}}
+    ///
+    pub TEXT_DIRECTION_CODEPOINT_IN_LITERAL,
+    Deny,
+    "detect special Unicode codepoints that affect the visual representation of text on screen, \
+     changing the direction in which text flows",
+    crate_level_only
 }
 
 declare_lint! {
diff --git a/compiler/rustc_lint_defs/src/lib.rs b/compiler/rustc_lint_defs/src/lib.rs
index dc30532ba1a..6cbdc245d21 100644
--- a/compiler/rustc_lint_defs/src/lib.rs
+++ b/compiler/rustc_lint_defs/src/lib.rs
@@ -698,6 +698,14 @@ pub enum BuiltinLintDiag {
         is_string: bool,
         suggestion: Span,
     },
+    HiddenUnicodeCodepoints {
+        label: String,
+        count: usize,
+        span_label: Span,
+        labels: Option<Vec<(char, Span)>>,
+        escape: bool,
+        spans: Vec<(char, Span)>,
+    },
     TrailingMacro(bool, Ident),
     BreakWithLabelAndLoop(Span),
     UnicodeTextFlow(Span, String),
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index 78c5742414b..2845bbed1c0 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -4,7 +4,7 @@ use diagnostics::make_unclosed_delims_error;
 use rustc_ast::ast::{self, AttrStyle};
 use rustc_ast::token::{self, CommentKind, Delimiter, IdentIsRaw, Token, TokenKind};
 use rustc_ast::tokenstream::TokenStream;
-use rustc_ast::util::unicode::contains_text_flow_control_chars;
+use rustc_ast::util::unicode::{TEXT_FLOW_CONTROL_CHARS, contains_text_flow_control_chars};
 use rustc_errors::codes::*;
 use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey};
 use rustc_lexer::{
@@ -14,7 +14,7 @@ use rustc_literal_escaper::{EscapeError, Mode, unescape_mixed, unescape_unicode}
 use rustc_session::lint::BuiltinLintDiag;
 use rustc_session::lint::builtin::{
     RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX,
-    TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
+    TEXT_DIRECTION_CODEPOINT_IN_COMMENT, TEXT_DIRECTION_CODEPOINT_IN_LITERAL,
 };
 use rustc_session::parse::ParseSess;
 use rustc_span::{BytePos, Pos, Span, Symbol, sym};
@@ -174,6 +174,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
                     // Opening delimiter of the length 3 is not included into the symbol.
                     let content_start = start + BytePos(3);
                     let content = self.str_from(content_start);
+                    self.lint_doc_comment_unicode_text_flow(start, content);
                     self.cook_doc_comment(content_start, content, CommentKind::Line, doc_style)
                 }
                 rustc_lexer::TokenKind::BlockComment { doc_style, terminated } => {
@@ -193,6 +194,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
                     let content_start = start + BytePos(3);
                     let content_end = self.pos - BytePos(if terminated { 2 } else { 0 });
                     let content = self.str_from_to(content_start, content_end);
+                    self.lint_doc_comment_unicode_text_flow(start, content);
                     self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style)
                 }
                 rustc_lexer::TokenKind::Frontmatter { has_invalid_preceding_whitespace, invalid_infostring } => {
@@ -287,6 +289,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
                     } else {
                         None
                     };
+                    self.lint_literal_unicode_text_flow(symbol, kind, self.mk_sp(start, self.pos), "literal");
                     token::Literal(token::Lit { kind, symbol, suffix })
                 }
                 rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
@@ -481,6 +484,88 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
         }
     }
 
+    fn lint_doc_comment_unicode_text_flow(&mut self, start: BytePos, content: &str) {
+        if contains_text_flow_control_chars(content) {
+            self.report_text_direction_codepoint(
+                content,
+                self.mk_sp(start, self.pos),
+                0,
+                false,
+                "doc comment",
+            );
+        }
+    }
+
+    fn lint_literal_unicode_text_flow(
+        &mut self,
+        text: Symbol,
+        lit_kind: token::LitKind,
+        span: Span,
+        label: &'static str,
+    ) {
+        if !contains_text_flow_control_chars(text.as_str()) {
+            return;
+        }
+        let (padding, point_at_inner_spans) = match lit_kind {
+            // account for `"` or `'`
+            token::LitKind::Str | token::LitKind::Char => (1, true),
+            // account for `c"`
+            token::LitKind::CStr => (2, true),
+            // account for `r###"`
+            token::LitKind::StrRaw(n) => (n as u32 + 2, true),
+            // account for `cr###"`
+            token::LitKind::CStrRaw(n) => (n as u32 + 3, true),
+            // suppress bad literals.
+            token::LitKind::Err(_) => return,
+            // Be conservative just in case new literals do support these.
+            _ => (0, false),
+        };
+        self.report_text_direction_codepoint(
+            text.as_str(),
+            span,
+            padding,
+            point_at_inner_spans,
+            label,
+        );
+    }
+
+    fn report_text_direction_codepoint(
+        &self,
+        text: &str,
+        span: Span,
+        padding: u32,
+        point_at_inner_spans: bool,
+        label: &str,
+    ) {
+        // Obtain the `Span`s for each of the forbidden chars.
+        let spans: Vec<_> = text
+            .char_indices()
+            .filter_map(|(i, c)| {
+                TEXT_FLOW_CONTROL_CHARS.contains(&c).then(|| {
+                    let lo = span.lo() + BytePos(i as u32 + padding);
+                    (c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32)))
+                })
+            })
+            .collect();
+
+        let count = spans.len();
+        let labels = point_at_inner_spans.then_some(spans.clone());
+
+        self.psess.buffer_lint(
+            TEXT_DIRECTION_CODEPOINT_IN_LITERAL,
+            span,
+            ast::CRATE_NODE_ID,
+            BuiltinLintDiag::HiddenUnicodeCodepoints {
+                label: label.to_string(),
+                count,
+                span_label: span,
+                labels,
+                escape: point_at_inner_spans && !spans.is_empty(),
+                spans,
+            },
+        );
+    }
+
     fn validate_frontmatter(
         &self,
         start: BytePos,