summary refs log tree commit diff
path: root/compiler/rustc_parse/src/lexer/mod.rs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_parse/src/lexer/mod.rs')
-rw-r--r--compiler/rustc_parse/src/lexer/mod.rs40
1 files changed, 37 insertions, 3 deletions
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index 1e65cc27154..8e90f73b44e 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -4,7 +4,9 @@ use rustc_ast::tokenstream::{Spacing, TokenStream};
 use rustc_errors::{error_code, Applicability, DiagnosticBuilder, FatalError, PResult};
 use rustc_lexer::unescape::{self, Mode};
 use rustc_lexer::{Base, DocStyle, RawStrError};
-use rustc_session::lint::builtin::RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX;
+use rustc_session::lint::builtin::{
+    TEXT_DIRECTION_CODEPOINT_IN_COMMENT, RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX,
+};
 use rustc_session::lint::BuiltinLintDiagnostics;
 use rustc_session::parse::ParseSess;
 use rustc_span::symbol::{sym, Symbol};
@@ -129,6 +131,28 @@ impl<'a> StringReader<'a> {
             .struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
     }
 
+    /// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly
+    /// complain about it.
+    fn lint_unicode_text_flow(&self, start: BytePos) {
+        // Opening delimiter of the length 2 is not included into the comment text.
+        let content_start = start + BytePos(2);
+        let content = self.str_from(content_start);
+        let span = self.mk_sp(start, self.pos);
+        const UNICODE_TEXT_FLOW_CHARS: &[char] = &[
+            '\u{202A}', '\u{202B}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}',
+            '\u{202C}', '\u{2069}',
+        ];
+        if content.contains(UNICODE_TEXT_FLOW_CHARS) {
+            self.sess.buffer_lint_with_diagnostic(
+                &TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
+                span,
+                ast::CRATE_NODE_ID,
+                "unicode codepoint changing visible direction of text present in comment",
+                BuiltinLintDiagnostics::UnicodeTextFlow(span, content.to_string()),
+            );
+        }
+    }
+
     /// Turns simple `rustc_lexer::TokenKind` enum into a rich
     /// `rustc_ast::TokenKind`. This turns strings into interned
     /// symbols and runs additional validation.
@@ -136,7 +160,12 @@ impl<'a> StringReader<'a> {
         Some(match token {
             rustc_lexer::TokenKind::LineComment { doc_style } => {
                 // Skip non-doc comments
-                let doc_style = doc_style?;
+                let doc_style = if let Some(doc_style) = doc_style {
+                    doc_style
+                } else {
+                    self.lint_unicode_text_flow(start);
+                    return None;
+                };
 
                 // Opening delimiter of the length 3 is not included into the symbol.
                 let content_start = start + BytePos(3);
@@ -158,7 +187,12 @@ impl<'a> StringReader<'a> {
                 }
 
                 // Skip non-doc comments
-                let doc_style = doc_style?;
+                let doc_style = if let Some(doc_style) = doc_style {
+                    doc_style
+                } else {
+                    self.lint_unicode_text_flow(start);
+                    return None;
+                };
 
                 // Opening delimiter of the length 3 and closing delimiter of the length 2
                 // are not included into the symbol.