Merge from rustc

author: The Miri Cronjob Bot <miri@cron.bot> 2025-05-31 05:01:23 +0000
committer: The Miri Cronjob Bot <miri@cron.bot> 2025-05-31 05:01:23 +0000
commit: 449ec7b3fec9ec7fb8d6bcf56b269f4b92128e20 (patch)
tree: 5f34a0f675e03dd7056bd9dfd39758ea117ff087 /compiler/rustc_parse/src
parent: 34cdfb75c1c77a9aee0d26e1c046390ea42a2515 (diff)
parent: 7a7bcbbcdbf2845164a94377d0e0efebb737ffd3 (diff)
download: rust-449ec7b3fec9ec7fb8d6bcf56b269f4b92128e20.tar.gz
rust-449ec7b3fec9ec7fb8d6bcf56b269f4b92128e20.zip
2 files changed, 123 insertions, 20 deletions
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index 78c5742414b..2845bbed1c0 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -4,7 +4,7 @@ use diagnostics::make_unclosed_delims_error;
 use rustc_ast::ast::{self, AttrStyle};
 use rustc_ast::token::{self, CommentKind, Delimiter, IdentIsRaw, Token, TokenKind};
 use rustc_ast::tokenstream::TokenStream;
-use rustc_ast::util::unicode::contains_text_flow_control_chars;
+use rustc_ast::util::unicode::{TEXT_FLOW_CONTROL_CHARS, contains_text_flow_control_chars};
 use rustc_errors::codes::*;
 use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey};
 use rustc_lexer::{
@@ -14,7 +14,7 @@ use rustc_literal_escaper::{EscapeError, Mode, unescape_mixed, unescape_unicode}
 use rustc_session::lint::BuiltinLintDiag;
 use rustc_session::lint::builtin::{
     RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX,
-    TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
+    TEXT_DIRECTION_CODEPOINT_IN_COMMENT, TEXT_DIRECTION_CODEPOINT_IN_LITERAL,
 };
 use rustc_session::parse::ParseSess;
 use rustc_span::{BytePos, Pos, Span, Symbol, sym};
@@ -174,6 +174,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
                     // Opening delimiter of the length 3 is not included into the symbol.
                     let content_start = start + BytePos(3);
                     let content = self.str_from(content_start);
+                    self.lint_doc_comment_unicode_text_flow(start, content);
                     self.cook_doc_comment(content_start, content, CommentKind::Line, doc_style)
                 }
                 rustc_lexer::TokenKind::BlockComment { doc_style, terminated } => {
@@ -193,6 +194,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
                     let content_start = start + BytePos(3);
                     let content_end = self.pos - BytePos(if terminated { 2 } else { 0 });
                     let content = self.str_from_to(content_start, content_end);
+                    self.lint_doc_comment_unicode_text_flow(start, content);
                     self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style)
                 }
                 rustc_lexer::TokenKind::Frontmatter { has_invalid_preceding_whitespace, invalid_infostring } => {
@@ -287,6 +289,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
                     } else {
                         None
                     };
+                    self.lint_literal_unicode_text_flow(symbol, kind, self.mk_sp(start, self.pos), "literal");
                     token::Literal(token::Lit { kind, symbol, suffix })
                 }
                 rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
@@ -481,6 +484,88 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
         }
     }
 
+    fn lint_doc_comment_unicode_text_flow(&mut self, start: BytePos, content: &str) {
+        if contains_text_flow_control_chars(content) {
+            self.report_text_direction_codepoint(
+                content,
+                self.mk_sp(start, self.pos),
+                0,
+                false,
+                "doc comment",
+            );
+        }
+    }
+
+    fn lint_literal_unicode_text_flow(
+        &mut self,
+        text: Symbol,
+        lit_kind: token::LitKind,
+        span: Span,
+        label: &'static str,
+    ) {
+        if !contains_text_flow_control_chars(text.as_str()) {
+            return;
+        }
+        let (padding, point_at_inner_spans) = match lit_kind {
+            // account for `"` or `'`
+            token::LitKind::Str | token::LitKind::Char => (1, true),
+            // account for `c"`
+            token::LitKind::CStr => (2, true),
+            // account for `r###"`
+            token::LitKind::StrRaw(n) => (n as u32 + 2, true),
+            // account for `cr###"`
+            token::LitKind::CStrRaw(n) => (n as u32 + 3, true),
+            // suppress bad literals.
+            token::LitKind::Err(_) => return,
+            // Be conservative just in case new literals do support these.
+            _ => (0, false),
+        };
+        self.report_text_direction_codepoint(
+            text.as_str(),
+            span,
+            padding,
+            point_at_inner_spans,
+            label,
+        );
+    }
+
+    fn report_text_direction_codepoint(
+        &self,
+        text: &str,
+        span: Span,
+        padding: u32,
+        point_at_inner_spans: bool,
+        label: &str,
+    ) {
+        // Obtain the `Span`s for each of the forbidden chars.
+        let spans: Vec<_> = text
+            .char_indices()
+            .filter_map(|(i, c)| {
+                TEXT_FLOW_CONTROL_CHARS.contains(&c).then(|| {
+                    let lo = span.lo() + BytePos(i as u32 + padding);
+                    (c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32)))
+                })
+            })
+            .collect();
+
+        let count = spans.len();
+        let labels = point_at_inner_spans.then_some(spans.clone());
+
+        self.psess.buffer_lint(
+            TEXT_DIRECTION_CODEPOINT_IN_LITERAL,
+            span,
+            ast::CRATE_NODE_ID,
+            BuiltinLintDiag::HiddenUnicodeCodepoints {
+                label: label.to_string(),
+                count,
+                span_label: span,
+                labels,
+                escape: point_at_inner_spans && !spans.is_empty(),
+                spans,
+            },
+        );
+    }
+
     fn validate_frontmatter(
         &self,
         start: BytePos,
diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs
index 2a7910a6af4..1a44f4af8a6 100644
--- a/compiler/rustc_parse/src/parser/expr.rs
+++ b/compiler/rustc_parse/src/parser/expr.rs
@@ -3311,26 +3311,44 @@ impl<'a> Parser<'a> {
                             let sm = this.psess.source_map();
                             if let Ok(expr_lines) = sm.span_to_lines(expr_span)
                                 && let Ok(arm_start_lines) = sm.span_to_lines(arm_start_span)
-                                && arm_start_lines.lines[0].end_col == expr_lines.lines[0].end_col
                                 && expr_lines.lines.len() == 2
                             {
-                                // We check whether there's any trailing code in the parse span,
-                                // if there isn't, we very likely have the following:
-                                //
-                                // X |     &Y => "y"
-                                //   |        --    - missing comma
-                                //   |        |
-                                //   |        arrow_span
-                                // X |     &X => "x"
-                                //   |      - ^^ self.token.span
-                                //   |      |
-                                //   |      parsed until here as `"y" & X`
-                                err.span_suggestion_short(
-                                    arm_start_span.shrink_to_hi(),
-                                    "missing a comma here to end this `match` arm",
-                                    ",",
-                                    Applicability::MachineApplicable,
-                                );
+                                if arm_start_lines.lines[0].end_col == expr_lines.lines[0].end_col {
+                                    // We check whether there's any trailing code in the parse span,
+                                    // if there isn't, we very likely have the following:
+                                    //
+                                    // X |     &Y => "y"
+                                    //   |        --    - missing comma
+                                    //   |        |
+                                    //   |        arrow_span
+                                    // X |     &X => "x"
+                                    //   |      - ^^ self.token.span
+                                    //   |      |
+                                    //   |      parsed until here as `"y" & X`
+                                    err.span_suggestion_short(
+                                        arm_start_span.shrink_to_hi(),
+                                        "missing a comma here to end this `match` arm",
+                                        ",",
+                                        Applicability::MachineApplicable,
+                                    );
+                                } else if arm_start_lines.lines[0].end_col + rustc_span::CharPos(1)
+                                    == expr_lines.lines[0].end_col
+                                {
+                                    // similar to the above, but we may typo a `.` or `/` at the end of the line
+                                    let comma_span = arm_start_span
+                                        .shrink_to_hi()
+                                        .with_hi(arm_start_span.hi() + rustc_span::BytePos(1));
+                                    if let Ok(res) = sm.span_to_snippet(comma_span)
+                                        && (res == "." || res == "/")
+                                    {
+                                        err.span_suggestion_short(
+                                            comma_span,
+                                            "you might have meant to write a `,` to end this `match` arm",
+                                            ",",
+                                            Applicability::MachineApplicable,
+                                        );
+                                    }
+                                }
                             }
                         } else {
                             err.span_label(
author	The Miri Cronjob Bot <miri@cron.bot>	2025-05-31 05:01:23 +0000
committer	The Miri Cronjob Bot <miri@cron.bot>	2025-05-31 05:01:23 +0000
commit	449ec7b3fec9ec7fb8d6bcf56b269f4b92128e20 (patch)
tree	5f34a0f675e03dd7056bd9dfd39758ea117ff087 /compiler/rustc_parse/src
parent	34cdfb75c1c77a9aee0d26e1c046390ea42a2515 (diff)
parent	7a7bcbbcdbf2845164a94377d0e0efebb737ffd3 (diff)
download	rust-449ec7b3fec9ec7fb8d6bcf56b269f4b92128e20.tar.gz rust-449ec7b3fec9ec7fb8d6bcf56b269f4b92128e20.zip