Auto merge of #90460 - pietroalbini:bidi-stable, r=nikomatsakis,pietroalbini 1.56.1

[stable] Fix CVE-2021-42574 and prepare Rust 1.56.1 This PR implements new lints to mitigate the impact of [CVE-2021-42574], caused by the presence of bidirectional-override Unicode codepoints in the compiled source code. [See the advisory][advisory] for more information about the vulnerability. The changes in this PR will be released later today as part of Rust 1.56.1. [CVE-2021-42574]: https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-42574 [advisory]: https://blog.rust-lang.org/2021/11/01/cve-2021-42574.html
author: bors <bors@rust-lang.org> 2021-11-01 07:14:16 +0000
committer: bors <bors@rust-lang.org> 2021-11-01 07:14:16 +0000
commit: 59eed8a2aac0230a8b53e89d4e99d55912ba6b35 (patch)
tree: 4c5617dfb93a6e1cf6381c445e6a6ea3016bfb48 /compiler/rustc_parse
parent: 09c42c45858d5f3aedfa670698275303a3d19afa (diff)
parent: 6552f7a75a4ee0f314ca5e87edc4c322d3f3eceb (diff)
download: rust-1.56.1.tar.gz
rust-1.56.1.zip
3 files changed, 53 insertions, 8 deletions
diff --git a/compiler/rustc_parse/Cargo.toml b/compiler/rustc_parse/Cargo.toml
index 1ca1a92252e..b67195e6e22 100644
--- a/compiler/rustc_parse/Cargo.toml
+++ b/compiler/rustc_parse/Cargo.toml
@@ -18,3 +18,4 @@ rustc_session = { path = "../rustc_session" }
 rustc_span = { path = "../rustc_span" }
 rustc_ast = { path = "../rustc_ast" }
 unicode-normalization = "0.1.11"
+unicode-width = "0.1.4"
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index 1e65cc27154..8e90f73b44e 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -4,7 +4,9 @@ use rustc_ast::tokenstream::{Spacing, TokenStream};
 use rustc_errors::{error_code, Applicability, DiagnosticBuilder, FatalError, PResult};
 use rustc_lexer::unescape::{self, Mode};
 use rustc_lexer::{Base, DocStyle, RawStrError};
-use rustc_session::lint::builtin::RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX;
+use rustc_session::lint::builtin::{
+    TEXT_DIRECTION_CODEPOINT_IN_COMMENT, RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX,
+};
 use rustc_session::lint::BuiltinLintDiagnostics;
 use rustc_session::parse::ParseSess;
 use rustc_span::symbol::{sym, Symbol};
@@ -129,6 +131,28 @@ impl<'a> StringReader<'a> {
             .struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
     }
 
+    /// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly
+    /// complain about it.
+    fn lint_unicode_text_flow(&self, start: BytePos) {
+        // Opening delimiter of the length 2 is not included into the comment text.
+        let content_start = start + BytePos(2);
+        let content = self.str_from(content_start);
+        let span = self.mk_sp(start, self.pos);
+        const UNICODE_TEXT_FLOW_CHARS: &[char] = &[
+            '\u{202A}', '\u{202B}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}',
+            '\u{202C}', '\u{2069}',
+        ];
+        if content.contains(UNICODE_TEXT_FLOW_CHARS) {
+            self.sess.buffer_lint_with_diagnostic(
+                &TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
+                span,
+                ast::CRATE_NODE_ID,
+                "unicode codepoint changing visible direction of text present in comment",
+                BuiltinLintDiagnostics::UnicodeTextFlow(span, content.to_string()),
+            );
+        }
+    }
+
     /// Turns simple `rustc_lexer::TokenKind` enum into a rich
     /// `rustc_ast::TokenKind`. This turns strings into interned
     /// symbols and runs additional validation.
@@ -136,7 +160,12 @@ impl<'a> StringReader<'a> {
         Some(match token {
             rustc_lexer::TokenKind::LineComment { doc_style } => {
                 // Skip non-doc comments
-                let doc_style = doc_style?;
+                let doc_style = if let Some(doc_style) = doc_style {
+                    doc_style
+                } else {
+                    self.lint_unicode_text_flow(start);
+                    return None;
+                };
 
                 // Opening delimiter of the length 3 is not included into the symbol.
                 let content_start = start + BytePos(3);
@@ -158,7 +187,12 @@ impl<'a> StringReader<'a> {
                 }
 
                 // Skip non-doc comments
-                let doc_style = doc_style?;
+                let doc_style = if let Some(doc_style) = doc_style {
+                    doc_style
+                } else {
+                    self.lint_unicode_text_flow(start);
+                    return None;
+                };
 
                 // Opening delimiter of the length 3 and closing delimiter of the length 2
                 // are not included into the symbol.
diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
index aa6b424ce2b..e26d094a0e2 100644
--- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
+++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
@@ -154,12 +154,17 @@ pub(crate) fn emit_unescape_error(
             assert!(mode.is_bytes());
             let (c, span) = last_char();
             let mut err = handler.struct_span_err(span, "non-ASCII character in byte constant");
-            err.span_label(span, "byte constant must be ASCII");
+            let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
+                format!(" but is {:?}", c)
+            } else {
+                String::new()
+            };
+            err.span_label(span, &format!("byte constant must be ASCII{}", postfix));
             if (c as u32) <= 0xFF {
                 err.span_suggestion(
                     span,
                     &format!(
-                        "if you meant to use the unicode code point for '{}', use a \\xHH escape",
+                        "if you meant to use the unicode code point for {:?}, use a \\xHH escape",
                         c
                     ),
                     format!("\\x{:X}", c as u32),
@@ -173,7 +178,7 @@ pub(crate) fn emit_unescape_error(
                 err.span_suggestion(
                     span,
                     &format!(
-                        "if you meant to use the UTF-8 encoding of '{}', use \\xHH escapes",
+                        "if you meant to use the UTF-8 encoding of {:?}, use \\xHH escapes",
                         c
                     ),
                     utf8.as_bytes()
@@ -187,10 +192,15 @@ pub(crate) fn emit_unescape_error(
         }
         EscapeError::NonAsciiCharInByteString => {
             assert!(mode.is_bytes());
-            let (_c, span) = last_char();
+            let (c, span) = last_char();
+            let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
+                format!(" but is {:?}", c)
+            } else {
+                String::new()
+            };
             handler
                 .struct_span_err(span, "raw byte string must be ASCII")
-                .span_label(span, "must be ASCII")
+                .span_label(span, &format!("must be ASCII{}", postfix))
                 .emit();
         }
         EscapeError::OutOfRangeHexEscape => {
author	bors <bors@rust-lang.org>	2021-11-01 07:14:16 +0000
committer	bors <bors@rust-lang.org>	2021-11-01 07:14:16 +0000
commit	59eed8a2aac0230a8b53e89d4e99d55912ba6b35 (patch)
tree	4c5617dfb93a6e1cf6381c445e6a6ea3016bfb48 /compiler/rustc_parse
parent	09c42c45858d5f3aedfa670698275303a3d19afa (diff)
parent	6552f7a75a4ee0f314ca5e87edc4c322d3f3eceb (diff)
download	rust-1.56.1.tar.gz rust-1.56.1.zip