diff options
Diffstat (limited to 'compiler/rustc_parse/src/lexer/mod.rs')
| -rw-r--r-- | compiler/rustc_parse/src/lexer/mod.rs | 40 |
1 files changed, 37 insertions, 3 deletions
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 1e65cc27154..8e90f73b44e 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -4,7 +4,9 @@ use rustc_ast::tokenstream::{Spacing, TokenStream}; use rustc_errors::{error_code, Applicability, DiagnosticBuilder, FatalError, PResult}; use rustc_lexer::unescape::{self, Mode}; use rustc_lexer::{Base, DocStyle, RawStrError}; -use rustc_session::lint::builtin::RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX; +use rustc_session::lint::builtin::{ + TEXT_DIRECTION_CODEPOINT_IN_COMMENT, RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, +}; use rustc_session::lint::BuiltinLintDiagnostics; use rustc_session::parse::ParseSess; use rustc_span::symbol::{sym, Symbol}; @@ -129,6 +131,28 @@ impl<'a> StringReader<'a> { .struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c))) } + /// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly + /// complain about it. + fn lint_unicode_text_flow(&self, start: BytePos) { + // Opening delimiter of the length 2 is not included into the comment text. + let content_start = start + BytePos(2); + let content = self.str_from(content_start); + let span = self.mk_sp(start, self.pos); + const UNICODE_TEXT_FLOW_CHARS: &[char] = &[ + '\u{202A}', '\u{202B}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}', + '\u{202C}', '\u{2069}', + ]; + if content.contains(UNICODE_TEXT_FLOW_CHARS) { + self.sess.buffer_lint_with_diagnostic( + &TEXT_DIRECTION_CODEPOINT_IN_COMMENT, + span, + ast::CRATE_NODE_ID, + "unicode codepoint changing visible direction of text present in comment", + BuiltinLintDiagnostics::UnicodeTextFlow(span, content.to_string()), + ); + } + } + /// Turns simple `rustc_lexer::TokenKind` enum into a rich /// `rustc_ast::TokenKind`. This turns strings into interned /// symbols and runs additional validation. @@ -136,7 +160,12 @@ impl<'a> StringReader<'a> { Some(match token { rustc_lexer::TokenKind::LineComment { doc_style } => { // Skip non-doc comments - let doc_style = doc_style?; + let doc_style = if let Some(doc_style) = doc_style { + doc_style + } else { + self.lint_unicode_text_flow(start); + return None; + }; // Opening delimiter of the length 3 is not included into the symbol. let content_start = start + BytePos(3); @@ -158,7 +187,12 @@ impl<'a> StringReader<'a> { } // Skip non-doc comments - let doc_style = doc_style?; + let doc_style = if let Some(doc_style) = doc_style { + doc_style + } else { + self.lint_unicode_text_flow(start); + return None; + }; // Opening delimiter of the length 3 and closing delimiter of the length 2 // are not included into the symbol. |
