diff options
| author | The Miri Cronjob Bot <miri@cron.bot> | 2025-05-31 05:01:23 +0000 |
|---|---|---|
| committer | The Miri Cronjob Bot <miri@cron.bot> | 2025-05-31 05:01:23 +0000 |
| commit | 449ec7b3fec9ec7fb8d6bcf56b269f4b92128e20 (patch) | |
| tree | 5f34a0f675e03dd7056bd9dfd39758ea117ff087 /compiler/rustc_parse/src | |
| parent | 34cdfb75c1c77a9aee0d26e1c046390ea42a2515 (diff) | |
| parent | 7a7bcbbcdbf2845164a94377d0e0efebb737ffd3 (diff) | |
| download | rust-449ec7b3fec9ec7fb8d6bcf56b269f4b92128e20.tar.gz rust-449ec7b3fec9ec7fb8d6bcf56b269f4b92128e20.zip | |
Merge from rustc
Diffstat (limited to 'compiler/rustc_parse/src')
| -rw-r--r-- | compiler/rustc_parse/src/lexer/mod.rs | 89 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/expr.rs | 54 |
2 files changed, 123 insertions, 20 deletions
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 78c5742414b..2845bbed1c0 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -4,7 +4,7 @@ use diagnostics::make_unclosed_delims_error; use rustc_ast::ast::{self, AttrStyle}; use rustc_ast::token::{self, CommentKind, Delimiter, IdentIsRaw, Token, TokenKind}; use rustc_ast::tokenstream::TokenStream; -use rustc_ast::util::unicode::contains_text_flow_control_chars; +use rustc_ast::util::unicode::{TEXT_FLOW_CONTROL_CHARS, contains_text_flow_control_chars}; use rustc_errors::codes::*; use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey}; use rustc_lexer::{ @@ -14,7 +14,7 @@ use rustc_literal_escaper::{EscapeError, Mode, unescape_mixed, unescape_unicode} use rustc_session::lint::BuiltinLintDiag; use rustc_session::lint::builtin::{ RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX, - TEXT_DIRECTION_CODEPOINT_IN_COMMENT, + TEXT_DIRECTION_CODEPOINT_IN_COMMENT, TEXT_DIRECTION_CODEPOINT_IN_LITERAL, }; use rustc_session::parse::ParseSess; use rustc_span::{BytePos, Pos, Span, Symbol, sym}; @@ -174,6 +174,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { // Opening delimiter of the length 3 is not included into the symbol. let content_start = start + BytePos(3); let content = self.str_from(content_start); + self.lint_doc_comment_unicode_text_flow(start, content); self.cook_doc_comment(content_start, content, CommentKind::Line, doc_style) } rustc_lexer::TokenKind::BlockComment { doc_style, terminated } => { @@ -193,6 +194,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { let content_start = start + BytePos(3); let content_end = self.pos - BytePos(if terminated { 2 } else { 0 }); let content = self.str_from_to(content_start, content_end); + self.lint_doc_comment_unicode_text_flow(start, content); self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style) } rustc_lexer::TokenKind::Frontmatter { has_invalid_preceding_whitespace, invalid_infostring } => { @@ -287,6 +289,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { } else { None }; + self.lint_literal_unicode_text_flow(symbol, kind, self.mk_sp(start, self.pos), "literal"); token::Literal(token::Lit { kind, symbol, suffix }) } rustc_lexer::TokenKind::Lifetime { starts_with_number } => { @@ -481,6 +484,88 @@ impl<'psess, 'src> Lexer<'psess, 'src> { } } + fn lint_doc_comment_unicode_text_flow(&mut self, start: BytePos, content: &str) { + if contains_text_flow_control_chars(content) { + self.report_text_direction_codepoint( + content, + self.mk_sp(start, self.pos), + 0, + false, + "doc comment", + ); + } + } + + fn lint_literal_unicode_text_flow( + &mut self, + text: Symbol, + lit_kind: token::LitKind, + span: Span, + label: &'static str, + ) { + if !contains_text_flow_control_chars(text.as_str()) { + return; + } + let (padding, point_at_inner_spans) = match lit_kind { + // account for `"` or `'` + token::LitKind::Str | token::LitKind::Char => (1, true), + // account for `c"` + token::LitKind::CStr => (2, true), + // account for `r###"` + token::LitKind::StrRaw(n) => (n as u32 + 2, true), + // account for `cr###"` + token::LitKind::CStrRaw(n) => (n as u32 + 3, true), + // suppress bad literals. + token::LitKind::Err(_) => return, + // Be conservative just in case new literals do support these. + _ => (0, false), + }; + self.report_text_direction_codepoint( + text.as_str(), + span, + padding, + point_at_inner_spans, + label, + ); + } + + fn report_text_direction_codepoint( + &self, + text: &str, + span: Span, + padding: u32, + point_at_inner_spans: bool, + label: &str, + ) { + // Obtain the `Span`s for each of the forbidden chars. + let spans: Vec<_> = text + .char_indices() + .filter_map(|(i, c)| { + TEXT_FLOW_CONTROL_CHARS.contains(&c).then(|| { + let lo = span.lo() + BytePos(i as u32 + padding); + (c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32))) + }) + }) + .collect(); + + let count = spans.len(); + let labels = point_at_inner_spans.then_some(spans.clone()); + + self.psess.buffer_lint( + TEXT_DIRECTION_CODEPOINT_IN_LITERAL, + span, + ast::CRATE_NODE_ID, + BuiltinLintDiag::HiddenUnicodeCodepoints { + label: label.to_string(), + count, + span_label: span, + labels, + escape: point_at_inner_spans && !spans.is_empty(), + spans, + }, + ); + } + fn validate_frontmatter( &self, start: BytePos, diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index 2a7910a6af4..1a44f4af8a6 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -3311,26 +3311,44 @@ impl<'a> Parser<'a> { let sm = this.psess.source_map(); if let Ok(expr_lines) = sm.span_to_lines(expr_span) && let Ok(arm_start_lines) = sm.span_to_lines(arm_start_span) - && arm_start_lines.lines[0].end_col == expr_lines.lines[0].end_col && expr_lines.lines.len() == 2 { - // We check whether there's any trailing code in the parse span, - // if there isn't, we very likely have the following: - // - // X | &Y => "y" - // | -- - missing comma - // | | - // | arrow_span - // X | &X => "x" - // | - ^^ self.token.span - // | | - // | parsed until here as `"y" & X` - err.span_suggestion_short( - arm_start_span.shrink_to_hi(), - "missing a comma here to end this `match` arm", - ",", - Applicability::MachineApplicable, - ); + if arm_start_lines.lines[0].end_col == expr_lines.lines[0].end_col { + // We check whether there's any trailing code in the parse span, + // if there isn't, we very likely have the following: + // + // X | &Y => "y" + // | -- - missing comma + // | | + // | arrow_span + // X | &X => "x" + // | - ^^ self.token.span + // | | + // | parsed until here as `"y" & X` + err.span_suggestion_short( + arm_start_span.shrink_to_hi(), + "missing a comma here to end this `match` arm", + ",", + Applicability::MachineApplicable, + ); + } else if arm_start_lines.lines[0].end_col + rustc_span::CharPos(1) + == expr_lines.lines[0].end_col + { + // similar to the above, but we may typo a `.` or `/` at the end of the line + let comma_span = arm_start_span + .shrink_to_hi() + .with_hi(arm_start_span.hi() + rustc_span::BytePos(1)); + if let Ok(res) = sm.span_to_snippet(comma_span) + && (res == "." || res == "/") + { + err.span_suggestion_short( + comma_span, + "you might have meant to write a `,` to end this `match` arm", + ",", + Applicability::MachineApplicable, + ); + } + } } } else { err.span_label( |
