diff options
| author | Matthias Krüger <476013+matthiaskrgr@users.noreply.github.com> | 2025-05-30 07:01:28 +0200 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-05-30 07:01:28 +0200 | 
| commit | 5fc3f26748d0913dfa56b19be3ccef2d7846d2d1 (patch) | |
| tree | 6d6ba7cc9e8a9302b737eec769d11eac1f5be6da /compiler/rustc_lint/src | |
| parent | 7aba37da44991e9046d1e09044c36da06a82b86c (diff) | |
| parent | f6520673fc28815f5beee5fbd48d4363462fb0f6 (diff) | |
| download | rust-5fc3f26748d0913dfa56b19be3ccef2d7846d2d1.tar.gz rust-5fc3f26748d0913dfa56b19be3ccef2d7846d2d1.zip | |
Rollup merge of #141004 - matthewjasper:unicode-before-expansion, r=davidtwco
Report text_direction_codepoint_in_literal when parsing The lint is now reported in code that gets removed/modified/duplicated by macro expansion, and spans are more accurate so we don't get ICEs from trying to split a span in the middle of a character. This removes support for lint level attributes for `text_direction_codepoint_in_literal` except at the crate level, I don't think that there's an easy way around this when the lint can be reported on code that's removed by `cfg` or that is only in the input of a macro. Fixes #140281
Diffstat (limited to 'compiler/rustc_lint/src')
| -rw-r--r-- | compiler/rustc_lint/src/early/diagnostics.rs | 21 | ||||
| -rw-r--r-- | compiler/rustc_lint/src/hidden_unicode_codepoints.rs | 136 | ||||
| -rw-r--r-- | compiler/rustc_lint/src/lib.rs | 3 | 
3 files changed, 21 insertions, 139 deletions
| diff --git a/compiler/rustc_lint/src/early/diagnostics.rs b/compiler/rustc_lint/src/early/diagnostics.rs index 646dc109805..71b621e8d20 100644 --- a/compiler/rustc_lint/src/early/diagnostics.rs +++ b/compiler/rustc_lint/src/early/diagnostics.rs @@ -187,6 +187,27 @@ pub fn decorate_builtin_lint( lints::ReservedMultihash { suggestion }.decorate_lint(diag); } } + BuiltinLintDiag::HiddenUnicodeCodepoints { + label, + count, + span_label, + labels, + escape, + spans, + } => { + lints::HiddenUnicodeCodepointsDiag { + label: &label, + count, + span_label, + labels: labels.map(|spans| lints::HiddenUnicodeCodepointsDiagLabels { spans }), + sub: if escape { + lints::HiddenUnicodeCodepointsDiagSub::Escape { spans } + } else { + lints::HiddenUnicodeCodepointsDiagSub::NoEscape { spans } + }, + } + .decorate_lint(diag); + } BuiltinLintDiag::UnusedBuiltinAttribute { attr_name, macro_name, invoc_span } => { lints::UnusedBuiltinAttribute { invoc_span, attr_name, macro_name }.decorate_lint(diag); } diff --git a/compiler/rustc_lint/src/hidden_unicode_codepoints.rs b/compiler/rustc_lint/src/hidden_unicode_codepoints.rs deleted file mode 100644 index 491c2826baa..00000000000 --- a/compiler/rustc_lint/src/hidden_unicode_codepoints.rs +++ /dev/null @@ -1,136 +0,0 @@ -use ast::util::unicode::{TEXT_FLOW_CONTROL_CHARS, contains_text_flow_control_chars}; -use rustc_ast as ast; -use rustc_session::{declare_lint, declare_lint_pass}; -use rustc_span::{BytePos, Span, Symbol}; - -use crate::lints::{ - HiddenUnicodeCodepointsDiag, HiddenUnicodeCodepointsDiagLabels, HiddenUnicodeCodepointsDiagSub, -}; -use crate::{EarlyContext, EarlyLintPass, LintContext}; - -declare_lint! { - #[allow(text_direction_codepoint_in_literal)] - /// The `text_direction_codepoint_in_literal` lint detects Unicode codepoints that change the - /// visual representation of text on screen in a way that does not correspond to their on - /// memory representation. - /// - /// ### Explanation - /// - /// The unicode characters `\u{202A}`, `\u{202B}`, `\u{202D}`, `\u{202E}`, `\u{2066}`, - /// `\u{2067}`, `\u{2068}`, `\u{202C}` and `\u{2069}` make the flow of text on screen change - /// its direction on software that supports these codepoints. This makes the text "abc" display - /// as "cba" on screen. By leveraging software that supports these, people can write specially - /// crafted literals that make the surrounding code seem like it's performing one action, when - /// in reality it is performing another. Because of this, we proactively lint against their - /// presence to avoid surprises. - /// - /// ### Example - /// - /// ```rust,compile_fail - /// #![deny(text_direction_codepoint_in_literal)] - /// fn main() { - /// println!("{:?}", ''); - /// } - /// ``` - /// - /// {{produces}} - /// - pub TEXT_DIRECTION_CODEPOINT_IN_LITERAL, - Deny, - "detect special Unicode codepoints that affect the visual representation of text on screen, \ - changing the direction in which text flows", -} - -declare_lint_pass!(HiddenUnicodeCodepoints => [TEXT_DIRECTION_CODEPOINT_IN_LITERAL]); - -impl HiddenUnicodeCodepoints { - fn lint_text_direction_codepoint( - &self, - cx: &EarlyContext<'_>, - text: Symbol, - span: Span, - padding: u32, - point_at_inner_spans: bool, - label: &str, - ) { - // Obtain the `Span`s for each of the forbidden chars. - let spans: Vec<_> = text - .as_str() - .char_indices() - .filter_map(|(i, c)| { - TEXT_FLOW_CONTROL_CHARS.contains(&c).then(|| { - let lo = span.lo() + BytePos(i as u32 + padding); - (c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32))) - }) - }) - .collect(); - - let count = spans.len(); - let labels = point_at_inner_spans - .then_some(HiddenUnicodeCodepointsDiagLabels { spans: spans.clone() }); - let sub = if point_at_inner_spans && !spans.is_empty() { - HiddenUnicodeCodepointsDiagSub::Escape { spans } - } else { - HiddenUnicodeCodepointsDiagSub::NoEscape { spans } - }; - - cx.emit_span_lint( - TEXT_DIRECTION_CODEPOINT_IN_LITERAL, - span, - HiddenUnicodeCodepointsDiag { label, count, span_label: span, labels, sub }, - ); - } - - fn check_literal( - &mut self, - cx: &EarlyContext<'_>, - text: Symbol, - lit_kind: ast::token::LitKind, - span: Span, - label: &'static str, - ) { - if !contains_text_flow_control_chars(text.as_str()) { - return; - } - let (padding, point_at_inner_spans) = match lit_kind { - // account for `"` or `'` - ast::token::LitKind::Str | ast::token::LitKind::Char => (1, true), - // account for `c"` - ast::token::LitKind::CStr => (2, true), - // account for `r###"` - ast::token::LitKind::StrRaw(n) => (n as u32 + 2, true), - // account for `cr###"` - ast::token::LitKind::CStrRaw(n) => (n as u32 + 3, true), - // suppress bad literals. - ast::token::LitKind::Err(_) => return, - // Be conservative just in case new literals do support these. - _ => (0, false), - }; - self.lint_text_direction_codepoint(cx, text, span, padding, point_at_inner_spans, label); - } -} - -impl EarlyLintPass for HiddenUnicodeCodepoints { - fn check_attribute(&mut self, cx: &EarlyContext<'_>, attr: &ast::Attribute) { - if let ast::AttrKind::DocComment(_, comment) = attr.kind { - if contains_text_flow_control_chars(comment.as_str()) { - self.lint_text_direction_codepoint(cx, comment, attr.span, 0, false, "doc comment"); - } - } - } - - #[inline] - fn check_expr(&mut self, cx: &EarlyContext<'_>, expr: &ast::Expr) { - // byte strings are already handled well enough by `EscapeError::NonAsciiCharInByteString` - match &expr.kind { - ast::ExprKind::Lit(token_lit) => { - self.check_literal(cx, token_lit.symbol, token_lit.kind, expr.span, "literal"); - } - ast::ExprKind::FormatArgs(args) => { - let (lit_kind, text) = args.uncooked_fmt_str; - self.check_literal(cx, text, lit_kind, args.span, "format string"); - } - _ => {} - }; - } -} diff --git a/compiler/rustc_lint/src/lib.rs b/compiler/rustc_lint/src/lib.rs index ce290eab8e9..0a52e42e442 100644 --- a/compiler/rustc_lint/src/lib.rs +++ b/compiler/rustc_lint/src/lib.rs @@ -48,7 +48,6 @@ mod errors; mod expect; mod for_loops_over_fallibles; mod foreign_modules; -pub mod hidden_unicode_codepoints; mod if_let_rescope; mod impl_trait_overcaptures; mod internal; @@ -92,7 +91,6 @@ use deref_into_dyn_supertrait::*; use drop_forget_useless::*; use enum_intrinsics_non_enums::EnumIntrinsicsNonEnums; use for_loops_over_fallibles::*; -use hidden_unicode_codepoints::*; use if_let_rescope::IfLetRescope; use impl_trait_overcaptures::ImplTraitOvercaptures; use internal::*; @@ -177,7 +175,6 @@ early_lint_methods!( DeprecatedAttr: DeprecatedAttr::default(), WhileTrue: WhileTrue, NonAsciiIdents: NonAsciiIdents, - HiddenUnicodeCodepoints: HiddenUnicodeCodepoints, IncompleteInternalFeatures: IncompleteInternalFeatures, RedundantSemicolons: RedundantSemicolons, UnusedDocComment: UnusedDocComment, | 
