summary refs log tree commit diff
path: root/compiler/rustc_parse/src/lexer
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_parse/src/lexer')
-rw-r--r--compiler/rustc_parse/src/lexer/mod.rs40
-rw-r--r--compiler/rustc_parse/src/lexer/unescape_error_reporting.rs20
2 files changed, 52 insertions, 8 deletions
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index 1e65cc27154..8e90f73b44e 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -4,7 +4,9 @@ use rustc_ast::tokenstream::{Spacing, TokenStream};
 use rustc_errors::{error_code, Applicability, DiagnosticBuilder, FatalError, PResult};
 use rustc_lexer::unescape::{self, Mode};
 use rustc_lexer::{Base, DocStyle, RawStrError};
-use rustc_session::lint::builtin::RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX;
+use rustc_session::lint::builtin::{
+    TEXT_DIRECTION_CODEPOINT_IN_COMMENT, RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX,
+};
 use rustc_session::lint::BuiltinLintDiagnostics;
 use rustc_session::parse::ParseSess;
 use rustc_span::symbol::{sym, Symbol};
@@ -129,6 +131,28 @@ impl<'a> StringReader<'a> {
             .struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
     }
 
+    /// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly
+    /// complain about it.
+    fn lint_unicode_text_flow(&self, start: BytePos) {
+        // Opening delimiter of the length 2 is not included into the comment text.
+        let content_start = start + BytePos(2);
+        let content = self.str_from(content_start);
+        let span = self.mk_sp(start, self.pos);
+        const UNICODE_TEXT_FLOW_CHARS: &[char] = &[
+            '\u{202A}', '\u{202B}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}',
+            '\u{202C}', '\u{2069}',
+        ];
+        if content.contains(UNICODE_TEXT_FLOW_CHARS) {
+            self.sess.buffer_lint_with_diagnostic(
+                &TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
+                span,
+                ast::CRATE_NODE_ID,
+                "unicode codepoint changing visible direction of text present in comment",
+                BuiltinLintDiagnostics::UnicodeTextFlow(span, content.to_string()),
+            );
+        }
+    }
+
     /// Turns simple `rustc_lexer::TokenKind` enum into a rich
     /// `rustc_ast::TokenKind`. This turns strings into interned
     /// symbols and runs additional validation.
@@ -136,7 +160,12 @@ impl<'a> StringReader<'a> {
         Some(match token {
             rustc_lexer::TokenKind::LineComment { doc_style } => {
                 // Skip non-doc comments
-                let doc_style = doc_style?;
+                let doc_style = if let Some(doc_style) = doc_style {
+                    doc_style
+                } else {
+                    self.lint_unicode_text_flow(start);
+                    return None;
+                };
 
                 // Opening delimiter of the length 3 is not included into the symbol.
                 let content_start = start + BytePos(3);
@@ -158,7 +187,12 @@ impl<'a> StringReader<'a> {
                 }
 
                 // Skip non-doc comments
-                let doc_style = doc_style?;
+                let doc_style = if let Some(doc_style) = doc_style {
+                    doc_style
+                } else {
+                    self.lint_unicode_text_flow(start);
+                    return None;
+                };
 
                 // Opening delimiter of the length 3 and closing delimiter of the length 2
                 // are not included into the symbol.
diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
index aa6b424ce2b..e26d094a0e2 100644
--- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
+++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
@@ -154,12 +154,17 @@ pub(crate) fn emit_unescape_error(
             assert!(mode.is_bytes());
             let (c, span) = last_char();
             let mut err = handler.struct_span_err(span, "non-ASCII character in byte constant");
-            err.span_label(span, "byte constant must be ASCII");
+            let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
+                format!(" but is {:?}", c)
+            } else {
+                String::new()
+            };
+            err.span_label(span, &format!("byte constant must be ASCII{}", postfix));
             if (c as u32) <= 0xFF {
                 err.span_suggestion(
                     span,
                     &format!(
-                        "if you meant to use the unicode code point for '{}', use a \\xHH escape",
+                        "if you meant to use the unicode code point for {:?}, use a \\xHH escape",
                         c
                     ),
                     format!("\\x{:X}", c as u32),
@@ -173,7 +178,7 @@ pub(crate) fn emit_unescape_error(
                 err.span_suggestion(
                     span,
                     &format!(
-                        "if you meant to use the UTF-8 encoding of '{}', use \\xHH escapes",
+                        "if you meant to use the UTF-8 encoding of {:?}, use \\xHH escapes",
                         c
                     ),
                     utf8.as_bytes()
@@ -187,10 +192,15 @@ pub(crate) fn emit_unescape_error(
         }
         EscapeError::NonAsciiCharInByteString => {
             assert!(mode.is_bytes());
-            let (_c, span) = last_char();
+            let (c, span) = last_char();
+            let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
+                format!(" but is {:?}", c)
+            } else {
+                String::new()
+            };
             handler
                 .struct_span_err(span, "raw byte string must be ASCII")
-                .span_label(span, "must be ASCII")
+                .span_label(span, &format!("must be ASCII{}", postfix))
                 .emit();
         }
         EscapeError::OutOfRangeHexEscape => {