diff options
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/rustc_lint/src/early/diagnostics.rs | 21 | ||||
| -rw-r--r-- | compiler/rustc_lint/src/hidden_unicode_codepoints.rs | 136 | ||||
| -rw-r--r-- | compiler/rustc_lint/src/lib.rs | 3 | ||||
| -rw-r--r-- | compiler/rustc_lint_defs/src/builtin.rs | 42 | ||||
| -rw-r--r-- | compiler/rustc_lint_defs/src/lib.rs | 8 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/lexer/mod.rs | 89 |
6 files changed, 155 insertions, 144 deletions
diff --git a/compiler/rustc_lint/src/early/diagnostics.rs b/compiler/rustc_lint/src/early/diagnostics.rs index 646dc109805..71b621e8d20 100644 --- a/compiler/rustc_lint/src/early/diagnostics.rs +++ b/compiler/rustc_lint/src/early/diagnostics.rs @@ -187,6 +187,27 @@ pub fn decorate_builtin_lint( lints::ReservedMultihash { suggestion }.decorate_lint(diag); } } + BuiltinLintDiag::HiddenUnicodeCodepoints { + label, + count, + span_label, + labels, + escape, + spans, + } => { + lints::HiddenUnicodeCodepointsDiag { + label: &label, + count, + span_label, + labels: labels.map(|spans| lints::HiddenUnicodeCodepointsDiagLabels { spans }), + sub: if escape { + lints::HiddenUnicodeCodepointsDiagSub::Escape { spans } + } else { + lints::HiddenUnicodeCodepointsDiagSub::NoEscape { spans } + }, + } + .decorate_lint(diag); + } BuiltinLintDiag::UnusedBuiltinAttribute { attr_name, macro_name, invoc_span } => { lints::UnusedBuiltinAttribute { invoc_span, attr_name, macro_name }.decorate_lint(diag); } diff --git a/compiler/rustc_lint/src/hidden_unicode_codepoints.rs b/compiler/rustc_lint/src/hidden_unicode_codepoints.rs deleted file mode 100644 index 491c2826baa..00000000000 --- a/compiler/rustc_lint/src/hidden_unicode_codepoints.rs +++ /dev/null @@ -1,136 +0,0 @@ -use ast::util::unicode::{TEXT_FLOW_CONTROL_CHARS, contains_text_flow_control_chars}; -use rustc_ast as ast; -use rustc_session::{declare_lint, declare_lint_pass}; -use rustc_span::{BytePos, Span, Symbol}; - -use crate::lints::{ - HiddenUnicodeCodepointsDiag, HiddenUnicodeCodepointsDiagLabels, HiddenUnicodeCodepointsDiagSub, -}; -use crate::{EarlyContext, EarlyLintPass, LintContext}; - -declare_lint! { - #[allow(text_direction_codepoint_in_literal)] - /// The `text_direction_codepoint_in_literal` lint detects Unicode codepoints that change the - /// visual representation of text on screen in a way that does not correspond to their on - /// memory representation. - /// - /// ### Explanation - /// - /// The unicode characters `\u{202A}`, `\u{202B}`, `\u{202D}`, `\u{202E}`, `\u{2066}`, - /// `\u{2067}`, `\u{2068}`, `\u{202C}` and `\u{2069}` make the flow of text on screen change - /// its direction on software that supports these codepoints. This makes the text "abc" display - /// as "cba" on screen. By leveraging software that supports these, people can write specially - /// crafted literals that make the surrounding code seem like it's performing one action, when - /// in reality it is performing another. Because of this, we proactively lint against their - /// presence to avoid surprises. - /// - /// ### Example - /// - /// ```rust,compile_fail - /// #![deny(text_direction_codepoint_in_literal)] - /// fn main() { - /// println!("{:?}", ''); - /// } - /// ``` - /// - /// {{produces}} - /// - pub TEXT_DIRECTION_CODEPOINT_IN_LITERAL, - Deny, - "detect special Unicode codepoints that affect the visual representation of text on screen, \ - changing the direction in which text flows", -} - -declare_lint_pass!(HiddenUnicodeCodepoints => [TEXT_DIRECTION_CODEPOINT_IN_LITERAL]); - -impl HiddenUnicodeCodepoints { - fn lint_text_direction_codepoint( - &self, - cx: &EarlyContext<'_>, - text: Symbol, - span: Span, - padding: u32, - point_at_inner_spans: bool, - label: &str, - ) { - // Obtain the `Span`s for each of the forbidden chars. - let spans: Vec<_> = text - .as_str() - .char_indices() - .filter_map(|(i, c)| { - TEXT_FLOW_CONTROL_CHARS.contains(&c).then(|| { - let lo = span.lo() + BytePos(i as u32 + padding); - (c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32))) - }) - }) - .collect(); - - let count = spans.len(); - let labels = point_at_inner_spans - .then_some(HiddenUnicodeCodepointsDiagLabels { spans: spans.clone() }); - let sub = if point_at_inner_spans && !spans.is_empty() { - HiddenUnicodeCodepointsDiagSub::Escape { spans } - } else { - HiddenUnicodeCodepointsDiagSub::NoEscape { spans } - }; - - cx.emit_span_lint( - TEXT_DIRECTION_CODEPOINT_IN_LITERAL, - span, - HiddenUnicodeCodepointsDiag { label, count, span_label: span, labels, sub }, - ); - } - - fn check_literal( - &mut self, - cx: &EarlyContext<'_>, - text: Symbol, - lit_kind: ast::token::LitKind, - span: Span, - label: &'static str, - ) { - if !contains_text_flow_control_chars(text.as_str()) { - return; - } - let (padding, point_at_inner_spans) = match lit_kind { - // account for `"` or `'` - ast::token::LitKind::Str | ast::token::LitKind::Char => (1, true), - // account for `c"` - ast::token::LitKind::CStr => (2, true), - // account for `r###"` - ast::token::LitKind::StrRaw(n) => (n as u32 + 2, true), - // account for `cr###"` - ast::token::LitKind::CStrRaw(n) => (n as u32 + 3, true), - // suppress bad literals. - ast::token::LitKind::Err(_) => return, - // Be conservative just in case new literals do support these. - _ => (0, false), - }; - self.lint_text_direction_codepoint(cx, text, span, padding, point_at_inner_spans, label); - } -} - -impl EarlyLintPass for HiddenUnicodeCodepoints { - fn check_attribute(&mut self, cx: &EarlyContext<'_>, attr: &ast::Attribute) { - if let ast::AttrKind::DocComment(_, comment) = attr.kind { - if contains_text_flow_control_chars(comment.as_str()) { - self.lint_text_direction_codepoint(cx, comment, attr.span, 0, false, "doc comment"); - } - } - } - - #[inline] - fn check_expr(&mut self, cx: &EarlyContext<'_>, expr: &ast::Expr) { - // byte strings are already handled well enough by `EscapeError::NonAsciiCharInByteString` - match &expr.kind { - ast::ExprKind::Lit(token_lit) => { - self.check_literal(cx, token_lit.symbol, token_lit.kind, expr.span, "literal"); - } - ast::ExprKind::FormatArgs(args) => { - let (lit_kind, text) = args.uncooked_fmt_str; - self.check_literal(cx, text, lit_kind, args.span, "format string"); - } - _ => {} - }; - } -} diff --git a/compiler/rustc_lint/src/lib.rs b/compiler/rustc_lint/src/lib.rs index ce290eab8e9..0a52e42e442 100644 --- a/compiler/rustc_lint/src/lib.rs +++ b/compiler/rustc_lint/src/lib.rs @@ -48,7 +48,6 @@ mod errors; mod expect; mod for_loops_over_fallibles; mod foreign_modules; -pub mod hidden_unicode_codepoints; mod if_let_rescope; mod impl_trait_overcaptures; mod internal; @@ -92,7 +91,6 @@ use deref_into_dyn_supertrait::*; use drop_forget_useless::*; use enum_intrinsics_non_enums::EnumIntrinsicsNonEnums; use for_loops_over_fallibles::*; -use hidden_unicode_codepoints::*; use if_let_rescope::IfLetRescope; use impl_trait_overcaptures::ImplTraitOvercaptures; use internal::*; @@ -177,7 +175,6 @@ early_lint_methods!( DeprecatedAttr: DeprecatedAttr::default(), WhileTrue: WhileTrue, NonAsciiIdents: NonAsciiIdents, - HiddenUnicodeCodepoints: HiddenUnicodeCodepoints, IncompleteInternalFeatures: IncompleteInternalFeatures, RedundantSemicolons: RedundantSemicolons, UnusedDocComment: UnusedDocComment, diff --git a/compiler/rustc_lint_defs/src/builtin.rs b/compiler/rustc_lint_defs/src/builtin.rs index abf4840a026..7c7ba85d484 100644 --- a/compiler/rustc_lint_defs/src/builtin.rs +++ b/compiler/rustc_lint_defs/src/builtin.rs @@ -103,6 +103,7 @@ declare_lint_pass! { TAIL_EXPR_DROP_ORDER, TEST_UNSTABLE_LINT, TEXT_DIRECTION_CODEPOINT_IN_COMMENT, + TEXT_DIRECTION_CODEPOINT_IN_LITERAL, TRIVIAL_CASTS, TRIVIAL_NUMERIC_CASTS, TYVAR_BEHIND_RAW_POINTER, @@ -3782,7 +3783,6 @@ declare_lint! { } declare_lint! { - #[allow(text_direction_codepoint_in_literal)] /// The `text_direction_codepoint_in_comment` lint detects Unicode codepoints in comments that /// change the visual representation of text on screen in a way that does not correspond to /// their on memory representation. @@ -3792,7 +3792,7 @@ declare_lint! { /// ```rust,compile_fail /// #![deny(text_direction_codepoint_in_comment)] /// fn main() { - /// println!("{:?}"); // ''); + #[doc = " println!(\"{:?}\"); // '\u{202E}');"] /// } /// ``` /// @@ -3807,7 +3807,43 @@ declare_lint! { /// their use. pub TEXT_DIRECTION_CODEPOINT_IN_COMMENT, Deny, - "invisible directionality-changing codepoints in comment" + "invisible directionality-changing codepoints in comment", + crate_level_only +} + +declare_lint! { + /// The `text_direction_codepoint_in_literal` lint detects Unicode codepoints that change the + /// visual representation of text on screen in a way that does not correspond to their on + /// memory representation. + /// + /// ### Explanation + /// + /// The unicode characters `\u{202A}`, `\u{202B}`, `\u{202D}`, `\u{202E}`, `\u{2066}`, + /// `\u{2067}`, `\u{2068}`, `\u{202C}` and `\u{2069}` make the flow of text on screen change + /// its direction on software that supports these codepoints. This makes the text "abc" display + /// as "cba" on screen. By leveraging software that supports these, people can write specially + /// crafted literals that make the surrounding code seem like it's performing one action, when + /// in reality it is performing another. Because of this, we proactively lint against their + /// presence to avoid surprises. + /// + /// ### Example + /// + /// ```rust,compile_fail + /// #![deny(text_direction_codepoint_in_literal)] + /// fn main() { + // ` - convince tidy that backticks match + #[doc = " println!(\"{:?}\", '\u{202E}');"] + // ` + /// } + /// ``` + /// + /// {{produces}} + /// + pub TEXT_DIRECTION_CODEPOINT_IN_LITERAL, + Deny, + "detect special Unicode codepoints that affect the visual representation of text on screen, \ + changing the direction in which text flows", + crate_level_only } declare_lint! { diff --git a/compiler/rustc_lint_defs/src/lib.rs b/compiler/rustc_lint_defs/src/lib.rs index dc30532ba1a..6cbdc245d21 100644 --- a/compiler/rustc_lint_defs/src/lib.rs +++ b/compiler/rustc_lint_defs/src/lib.rs @@ -698,6 +698,14 @@ pub enum BuiltinLintDiag { is_string: bool, suggestion: Span, }, + HiddenUnicodeCodepoints { + label: String, + count: usize, + span_label: Span, + labels: Option<Vec<(char, Span)>>, + escape: bool, + spans: Vec<(char, Span)>, + }, TrailingMacro(bool, Ident), BreakWithLabelAndLoop(Span), UnicodeTextFlow(Span, String), diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 78c5742414b..2845bbed1c0 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -4,7 +4,7 @@ use diagnostics::make_unclosed_delims_error; use rustc_ast::ast::{self, AttrStyle}; use rustc_ast::token::{self, CommentKind, Delimiter, IdentIsRaw, Token, TokenKind}; use rustc_ast::tokenstream::TokenStream; -use rustc_ast::util::unicode::contains_text_flow_control_chars; +use rustc_ast::util::unicode::{TEXT_FLOW_CONTROL_CHARS, contains_text_flow_control_chars}; use rustc_errors::codes::*; use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey}; use rustc_lexer::{ @@ -14,7 +14,7 @@ use rustc_literal_escaper::{EscapeError, Mode, unescape_mixed, unescape_unicode} use rustc_session::lint::BuiltinLintDiag; use rustc_session::lint::builtin::{ RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX, - TEXT_DIRECTION_CODEPOINT_IN_COMMENT, + TEXT_DIRECTION_CODEPOINT_IN_COMMENT, TEXT_DIRECTION_CODEPOINT_IN_LITERAL, }; use rustc_session::parse::ParseSess; use rustc_span::{BytePos, Pos, Span, Symbol, sym}; @@ -174,6 +174,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { // Opening delimiter of the length 3 is not included into the symbol. let content_start = start + BytePos(3); let content = self.str_from(content_start); + self.lint_doc_comment_unicode_text_flow(start, content); self.cook_doc_comment(content_start, content, CommentKind::Line, doc_style) } rustc_lexer::TokenKind::BlockComment { doc_style, terminated } => { @@ -193,6 +194,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { let content_start = start + BytePos(3); let content_end = self.pos - BytePos(if terminated { 2 } else { 0 }); let content = self.str_from_to(content_start, content_end); + self.lint_doc_comment_unicode_text_flow(start, content); self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style) } rustc_lexer::TokenKind::Frontmatter { has_invalid_preceding_whitespace, invalid_infostring } => { @@ -287,6 +289,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { } else { None }; + self.lint_literal_unicode_text_flow(symbol, kind, self.mk_sp(start, self.pos), "literal"); token::Literal(token::Lit { kind, symbol, suffix }) } rustc_lexer::TokenKind::Lifetime { starts_with_number } => { @@ -481,6 +484,88 @@ impl<'psess, 'src> Lexer<'psess, 'src> { } } + fn lint_doc_comment_unicode_text_flow(&mut self, start: BytePos, content: &str) { + if contains_text_flow_control_chars(content) { + self.report_text_direction_codepoint( + content, + self.mk_sp(start, self.pos), + 0, + false, + "doc comment", + ); + } + } + + fn lint_literal_unicode_text_flow( + &mut self, + text: Symbol, + lit_kind: token::LitKind, + span: Span, + label: &'static str, + ) { + if !contains_text_flow_control_chars(text.as_str()) { + return; + } + let (padding, point_at_inner_spans) = match lit_kind { + // account for `"` or `'` + token::LitKind::Str | token::LitKind::Char => (1, true), + // account for `c"` + token::LitKind::CStr => (2, true), + // account for `r###"` + token::LitKind::StrRaw(n) => (n as u32 + 2, true), + // account for `cr###"` + token::LitKind::CStrRaw(n) => (n as u32 + 3, true), + // suppress bad literals. + token::LitKind::Err(_) => return, + // Be conservative just in case new literals do support these. + _ => (0, false), + }; + self.report_text_direction_codepoint( + text.as_str(), + span, + padding, + point_at_inner_spans, + label, + ); + } + + fn report_text_direction_codepoint( + &self, + text: &str, + span: Span, + padding: u32, + point_at_inner_spans: bool, + label: &str, + ) { + // Obtain the `Span`s for each of the forbidden chars. + let spans: Vec<_> = text + .char_indices() + .filter_map(|(i, c)| { + TEXT_FLOW_CONTROL_CHARS.contains(&c).then(|| { + let lo = span.lo() + BytePos(i as u32 + padding); + (c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32))) + }) + }) + .collect(); + + let count = spans.len(); + let labels = point_at_inner_spans.then_some(spans.clone()); + + self.psess.buffer_lint( + TEXT_DIRECTION_CODEPOINT_IN_LITERAL, + span, + ast::CRATE_NODE_ID, + BuiltinLintDiag::HiddenUnicodeCodepoints { + label: label.to_string(), + count, + span_label: span, + labels, + escape: point_at_inner_spans && !spans.is_empty(), + spans, + }, + ); + } + fn validate_frontmatter( &self, start: BytePos, |
