diff options
| author | bors <bors@rust-lang.org> | 2021-11-01 07:14:16 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2021-11-01 07:14:16 +0000 |
| commit | 59eed8a2aac0230a8b53e89d4e99d55912ba6b35 (patch) | |
| tree | 4c5617dfb93a6e1cf6381c445e6a6ea3016bfb48 /compiler/rustc_parse | |
| parent | 09c42c45858d5f3aedfa670698275303a3d19afa (diff) | |
| parent | 6552f7a75a4ee0f314ca5e87edc4c322d3f3eceb (diff) | |
| download | rust-1.56.1.tar.gz rust-1.56.1.zip | |
Auto merge of #90460 - pietroalbini:bidi-stable, r=nikomatsakis,pietroalbini 1.56.1
[stable] Fix CVE-2021-42574 and prepare Rust 1.56.1 This PR implements new lints to mitigate the impact of [CVE-2021-42574], caused by the presence of bidirectional-override Unicode codepoints in the compiled source code. [See the advisory][advisory] for more information about the vulnerability. The changes in this PR will be released later today as part of Rust 1.56.1. [CVE-2021-42574]: https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-42574 [advisory]: https://blog.rust-lang.org/2021/11/01/cve-2021-42574.html
Diffstat (limited to 'compiler/rustc_parse')
| -rw-r--r-- | compiler/rustc_parse/Cargo.toml | 1 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/lexer/mod.rs | 40 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/lexer/unescape_error_reporting.rs | 20 |
3 files changed, 53 insertions, 8 deletions
diff --git a/compiler/rustc_parse/Cargo.toml b/compiler/rustc_parse/Cargo.toml index 1ca1a92252e..b67195e6e22 100644 --- a/compiler/rustc_parse/Cargo.toml +++ b/compiler/rustc_parse/Cargo.toml @@ -18,3 +18,4 @@ rustc_session = { path = "../rustc_session" } rustc_span = { path = "../rustc_span" } rustc_ast = { path = "../rustc_ast" } unicode-normalization = "0.1.11" +unicode-width = "0.1.4" diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 1e65cc27154..8e90f73b44e 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -4,7 +4,9 @@ use rustc_ast::tokenstream::{Spacing, TokenStream}; use rustc_errors::{error_code, Applicability, DiagnosticBuilder, FatalError, PResult}; use rustc_lexer::unescape::{self, Mode}; use rustc_lexer::{Base, DocStyle, RawStrError}; -use rustc_session::lint::builtin::RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX; +use rustc_session::lint::builtin::{ + TEXT_DIRECTION_CODEPOINT_IN_COMMENT, RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, +}; use rustc_session::lint::BuiltinLintDiagnostics; use rustc_session::parse::ParseSess; use rustc_span::symbol::{sym, Symbol}; @@ -129,6 +131,28 @@ impl<'a> StringReader<'a> { .struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c))) } + /// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly + /// complain about it. + fn lint_unicode_text_flow(&self, start: BytePos) { + // Opening delimiter of the length 2 is not included into the comment text. + let content_start = start + BytePos(2); + let content = self.str_from(content_start); + let span = self.mk_sp(start, self.pos); + const UNICODE_TEXT_FLOW_CHARS: &[char] = &[ + '\u{202A}', '\u{202B}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}', + '\u{202C}', '\u{2069}', + ]; + if content.contains(UNICODE_TEXT_FLOW_CHARS) { + self.sess.buffer_lint_with_diagnostic( + &TEXT_DIRECTION_CODEPOINT_IN_COMMENT, + span, + ast::CRATE_NODE_ID, + "unicode codepoint changing visible direction of text present in comment", + BuiltinLintDiagnostics::UnicodeTextFlow(span, content.to_string()), + ); + } + } + /// Turns simple `rustc_lexer::TokenKind` enum into a rich /// `rustc_ast::TokenKind`. This turns strings into interned /// symbols and runs additional validation. @@ -136,7 +160,12 @@ impl<'a> StringReader<'a> { Some(match token { rustc_lexer::TokenKind::LineComment { doc_style } => { // Skip non-doc comments - let doc_style = doc_style?; + let doc_style = if let Some(doc_style) = doc_style { + doc_style + } else { + self.lint_unicode_text_flow(start); + return None; + }; // Opening delimiter of the length 3 is not included into the symbol. let content_start = start + BytePos(3); @@ -158,7 +187,12 @@ impl<'a> StringReader<'a> { } // Skip non-doc comments - let doc_style = doc_style?; + let doc_style = if let Some(doc_style) = doc_style { + doc_style + } else { + self.lint_unicode_text_flow(start); + return None; + }; // Opening delimiter of the length 3 and closing delimiter of the length 2 // are not included into the symbol. diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index aa6b424ce2b..e26d094a0e2 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -154,12 +154,17 @@ pub(crate) fn emit_unescape_error( assert!(mode.is_bytes()); let (c, span) = last_char(); let mut err = handler.struct_span_err(span, "non-ASCII character in byte constant"); - err.span_label(span, "byte constant must be ASCII"); + let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 { + format!(" but is {:?}", c) + } else { + String::new() + }; + err.span_label(span, &format!("byte constant must be ASCII{}", postfix)); if (c as u32) <= 0xFF { err.span_suggestion( span, &format!( - "if you meant to use the unicode code point for '{}', use a \\xHH escape", + "if you meant to use the unicode code point for {:?}, use a \\xHH escape", c ), format!("\\x{:X}", c as u32), @@ -173,7 +178,7 @@ pub(crate) fn emit_unescape_error( err.span_suggestion( span, &format!( - "if you meant to use the UTF-8 encoding of '{}', use \\xHH escapes", + "if you meant to use the UTF-8 encoding of {:?}, use \\xHH escapes", c ), utf8.as_bytes() @@ -187,10 +192,15 @@ pub(crate) fn emit_unescape_error( } EscapeError::NonAsciiCharInByteString => { assert!(mode.is_bytes()); - let (_c, span) = last_char(); + let (c, span) = last_char(); + let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 { + format!(" but is {:?}", c) + } else { + String::new() + }; handler .struct_span_err(span, "raw byte string must be ASCII") - .span_label(span, "must be ASCII") + .span_label(span, &format!("must be ASCII{}", postfix)) .emit(); } EscapeError::OutOfRangeHexEscape => { |
