diff options
| author | bors <bors@rust-lang.org> | 2021-11-01 03:51:06 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2021-11-01 03:51:06 +0000 |
| commit | 708d57e288d051a6238ed56039ffeac158e10e84 (patch) | |
| tree | f834a735f6c6d81803e35c728f9a4faafbd0f3c6 /compiler/rustc_lint/src | |
| parent | 7e4c9eebd82e9fa71f74626e5ba4e3494b8aba25 (diff) | |
| parent | a59d96ecf299c2c84eeda29ae89862fa7bc269c4 (diff) | |
| download | rust-708d57e288d051a6238ed56039ffeac158e10e84.tar.gz rust-708d57e288d051a6238ed56039ffeac158e10e84.zip | |
Auto merge of #90461 - pietroalbini:bidi-beta, r=nikomatsakis
[beta] Fix CVE-2021-42574 This PR implements new lints to mitigate the impact of [CVE-2021-42574], caused by the presence of bidirectional-override Unicode codepoints in the compiled source code. [See the advisory][advisory] for more information about the vulnerability. The changes in this PR will be released in tomorrow's beta release. [CVE-2021-42574]: https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-42574 [advisory]: https://blog.rust-lang.org/2021/11/01/cve-2021-42574.html
Diffstat (limited to 'compiler/rustc_lint/src')
| -rw-r--r-- | compiler/rustc_lint/src/context.rs | 39 | ||||
| -rw-r--r-- | compiler/rustc_lint/src/hidden_unicode_codepoints.rs | 161 | ||||
| -rw-r--r-- | compiler/rustc_lint/src/lib.rs | 3 |
3 files changed, 202 insertions, 1 deletions
diff --git a/compiler/rustc_lint/src/context.rs b/compiler/rustc_lint/src/context.rs index d235b220944..6fd0a5b95f9 100644 --- a/compiler/rustc_lint/src/context.rs +++ b/compiler/rustc_lint/src/context.rs @@ -16,6 +16,7 @@ use self::TargetLint::*; +use crate::hidden_unicode_codepoints::UNICODE_TEXT_FLOW_CHARS; use crate::levels::{is_known_lint_tool, LintLevelsBuilder}; use crate::passes::{EarlyLintPassObject, LateLintPassObject}; use rustc_ast as ast; @@ -39,7 +40,7 @@ use rustc_session::lint::{BuiltinLintDiagnostics, ExternDepSpec}; use rustc_session::lint::{FutureIncompatibleInfo, Level, Lint, LintBuffer, LintId}; use rustc_session::Session; use rustc_span::lev_distance::find_best_match_for_name; -use rustc_span::{symbol::Symbol, MultiSpan, Span, DUMMY_SP}; +use rustc_span::{symbol::Symbol, BytePos, MultiSpan, Span, DUMMY_SP}; use rustc_target::abi; use tracing::debug; @@ -597,6 +598,42 @@ pub trait LintContext: Sized { // Now, set up surrounding context. let sess = self.sess(); match diagnostic { + BuiltinLintDiagnostics::UnicodeTextFlow(span, content) => { + let spans: Vec<_> = content + .char_indices() + .filter_map(|(i, c)| { + UNICODE_TEXT_FLOW_CHARS.contains(&c).then(|| { + let lo = span.lo() + BytePos(2 + i as u32); + (c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32))) + }) + }) + .collect(); + let (an, s) = match spans.len() { + 1 => ("an ", ""), + _ => ("", "s"), + }; + db.span_label(span, &format!( + "this comment contains {}invisible unicode text flow control codepoint{}", + an, + s, + )); + for (c, span) in &spans { + db.span_label(*span, format!("{:?}", c)); + } + db.note( + "these kind of unicode codepoints change the way text flows on \ + applications that support them, but can cause confusion because they \ + change the order of characters on the screen", + ); + if !spans.is_empty() { + db.multipart_suggestion_with_style( + "if their presence wasn't intentional, you can remove them", + spans.into_iter().map(|(_, span)| (span, "".to_string())).collect(), + Applicability::MachineApplicable, + SuggestionStyle::HideCodeAlways, + ); + } + }, BuiltinLintDiagnostics::Normal => (), BuiltinLintDiagnostics::BareTraitObject(span, is_global) => { let (sugg, app) = match sess.source_map().span_to_snippet(span) { diff --git a/compiler/rustc_lint/src/hidden_unicode_codepoints.rs b/compiler/rustc_lint/src/hidden_unicode_codepoints.rs new file mode 100644 index 00000000000..1bcdcb806fc --- /dev/null +++ b/compiler/rustc_lint/src/hidden_unicode_codepoints.rs @@ -0,0 +1,161 @@ +use crate::{EarlyContext, EarlyLintPass, LintContext}; +use rustc_ast as ast; +use rustc_errors::{Applicability, SuggestionStyle}; +use rustc_span::{BytePos, Span, Symbol}; + +declare_lint! { + /// The `text_direction_codepoint_in_literal` lint detects Unicode codepoints that change the + /// visual representation of text on screen in a way that does not correspond to their on + /// memory representation. + /// + /// ### Explanation + /// + /// The unicode characters `\u{202A}`, `\u{202B}`, `\u{202D}`, `\u{202E}`, `\u{2066}`, + /// `\u{2067}`, `\u{2068}`, `\u{202C}` and `\u{2069}` make the flow of text on screen change + /// its direction on software that supports these codepoints. This makes the text "abc" display + /// as "cba" on screen. By leveraging software that supports these, people can write specially + /// crafted literals that make the surrounding code seem like it's performing one action, when + /// in reality it is performing another. Because of this, we proactively lint against their + /// presence to avoid surprises. + /// + /// ### Example + /// + /// ```rust,compile_fail + /// #![deny(text_direction_codepoint_in_literal)] + /// fn main() { + /// println!("{:?}", ''); + /// } + /// ``` + /// + /// {{produces}} + /// + pub TEXT_DIRECTION_CODEPOINT_IN_LITERAL, + Deny, + "detect special Unicode codepoints that affect the visual representation of text on screen, \ + changing the direction in which text flows", +} + +declare_lint_pass!(HiddenUnicodeCodepoints => [TEXT_DIRECTION_CODEPOINT_IN_LITERAL]); + +crate const UNICODE_TEXT_FLOW_CHARS: &[char] = &[ + '\u{202A}', '\u{202B}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}', '\u{202C}', + '\u{2069}', +]; + +impl HiddenUnicodeCodepoints { + fn lint_text_direction_codepoint( + &self, + cx: &EarlyContext<'_>, + text: Symbol, + span: Span, + padding: u32, + point_at_inner_spans: bool, + label: &str, + ) { + // Obtain the `Span`s for each of the forbidden chars. + let spans: Vec<_> = text + .as_str() + .char_indices() + .filter_map(|(i, c)| { + UNICODE_TEXT_FLOW_CHARS.contains(&c).then(|| { + let lo = span.lo() + BytePos(i as u32 + padding); + (c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32))) + }) + }) + .collect(); + + cx.struct_span_lint(TEXT_DIRECTION_CODEPOINT_IN_LITERAL, span, |lint| { + let mut err = lint.build(&format!( + "unicode codepoint changing visible direction of text present in {}", + label + )); + let (an, s) = match spans.len() { + 1 => ("an ", ""), + _ => ("", "s"), + }; + err.span_label( + span, + &format!( + "this {} contains {}invisible unicode text flow control codepoint{}", + label, an, s, + ), + ); + if point_at_inner_spans { + for (c, span) in &spans { + err.span_label(*span, format!("{:?}", c)); + } + } + err.note( + "these kind of unicode codepoints change the way text flows on applications that \ + support them, but can cause confusion because they change the order of \ + characters on the screen", + ); + if point_at_inner_spans && !spans.is_empty() { + err.multipart_suggestion_with_style( + "if their presence wasn't intentional, you can remove them", + spans.iter().map(|(_, span)| (*span, "".to_string())).collect(), + Applicability::MachineApplicable, + SuggestionStyle::HideCodeAlways, + ); + err.multipart_suggestion( + "if you want to keep them but make them visible in your source code, you can \ + escape them", + spans + .into_iter() + .map(|(c, span)| { + let c = format!("{:?}", c); + (span, c[1..c.len() - 1].to_string()) + }) + .collect(), + Applicability::MachineApplicable, + ); + } else { + // FIXME: in other suggestions we've reversed the inner spans of doc comments. We + // should do the same here to provide the same good suggestions as we do for + // literals above. + err.note("if their presence wasn't intentional, you can remove them"); + err.note(&format!( + "if you want to keep them but make them visible in your source code, you can \ + escape them: {}", + spans + .into_iter() + .map(|(c, _)| { format!("{:?}", c) }) + .collect::<Vec<String>>() + .join(", "), + )); + } + err.emit(); + }); + } +} +impl EarlyLintPass for HiddenUnicodeCodepoints { + fn check_attribute(&mut self, cx: &EarlyContext<'_>, attr: &ast::Attribute) { + if let ast::AttrKind::DocComment(_, comment) = attr.kind { + if comment.as_str().contains(UNICODE_TEXT_FLOW_CHARS) { + self.lint_text_direction_codepoint(cx, comment, attr.span, 0, false, "doc comment"); + } + } + } + + fn check_expr(&mut self, cx: &EarlyContext<'_>, expr: &ast::Expr) { + // byte strings are already handled well enough by `EscapeError::NonAsciiCharInByteString` + let (text, span, padding) = match &expr.kind { + ast::ExprKind::Lit(ast::Lit { token, kind, span }) => { + let text = token.symbol; + if !text.as_str().contains(UNICODE_TEXT_FLOW_CHARS) { + return; + } + let padding = match kind { + // account for `"` or `'` + ast::LitKind::Str(_, ast::StrStyle::Cooked) | ast::LitKind::Char(_) => 1, + // account for `r###"` + ast::LitKind::Str(_, ast::StrStyle::Raw(val)) => *val as u32 + 2, + _ => return, + }; + (text, span, padding) + } + _ => return, + }; + self.lint_text_direction_codepoint(cx, text, *span, padding, true, "literal"); + } +} diff --git a/compiler/rustc_lint/src/lib.rs b/compiler/rustc_lint/src/lib.rs index 6f684a0fe51..d98d65385e5 100644 --- a/compiler/rustc_lint/src/lib.rs +++ b/compiler/rustc_lint/src/lib.rs @@ -48,6 +48,7 @@ pub mod builtin; mod context; mod early; mod enum_intrinsics_non_enums; +pub mod hidden_unicode_codepoints; mod internal; mod late; mod levels; @@ -78,6 +79,7 @@ use rustc_span::Span; use array_into_iter::ArrayIntoIter; use builtin::*; use enum_intrinsics_non_enums::EnumIntrinsicsNonEnums; +use hidden_unicode_codepoints::*; use internal::*; use methods::*; use non_ascii_idents::*; @@ -129,6 +131,7 @@ macro_rules! early_lint_passes { DeprecatedAttr: DeprecatedAttr::new(), WhileTrue: WhileTrue, NonAsciiIdents: NonAsciiIdents, + HiddenUnicodeCodepoints: HiddenUnicodeCodepoints, IncompleteFeatures: IncompleteFeatures, RedundantSemicolons: RedundantSemicolons, UnusedDocComment: UnusedDocComment, |
