diff options
| author | Deadbeef <ent3rm4n@gmail.com> | 2025-04-12 15:53:46 +0000 |
|---|---|---|
| committer | Deadbeef <ent3rm4n@gmail.com> | 2025-05-05 23:10:08 +0800 |
| commit | 662182637e100642d1e5de7cb193da837a14ae48 (patch) | |
| tree | 28e183216387d8d8cd939404a0c2fc01efaf450f /compiler/rustc_parse/src/lexer/mod.rs | |
| parent | 0c33fe2c3d3eecadd17a84b110bb067288a64f1c (diff) | |
| download | rust-662182637e100642d1e5de7cb193da837a14ae48.tar.gz rust-662182637e100642d1e5de7cb193da837a14ae48.zip | |
Implement RFC 3503: frontmatters
Supercedes #137193
Diffstat (limited to 'compiler/rustc_parse/src/lexer/mod.rs')
| -rw-r--r-- | compiler/rustc_parse/src/lexer/mod.rs | 106 |
1 files changed, 99 insertions, 7 deletions
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index e8a5cae54cf..78c5742414b 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -7,7 +7,9 @@ use rustc_ast::tokenstream::TokenStream; use rustc_ast::util::unicode::contains_text_flow_control_chars; use rustc_errors::codes::*; use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey}; -use rustc_lexer::{Base, Cursor, DocStyle, LiteralKind, RawStrError}; +use rustc_lexer::{ + Base, Cursor, DocStyle, FrontmatterAllowed, LiteralKind, RawStrError, is_whitespace, +}; use rustc_literal_escaper::{EscapeError, Mode, unescape_mixed, unescape_unicode}; use rustc_session::lint::BuiltinLintDiag; use rustc_session::lint::builtin::{ @@ -15,7 +17,7 @@ use rustc_session::lint::builtin::{ TEXT_DIRECTION_CODEPOINT_IN_COMMENT, }; use rustc_session::parse::ParseSess; -use rustc_span::{BytePos, Pos, Span, Symbol}; +use rustc_span::{BytePos, Pos, Span, Symbol, sym}; use tracing::debug; use crate::errors; @@ -56,7 +58,7 @@ pub(crate) fn lex_token_trees<'psess, 'src>( start_pos = start_pos + BytePos::from_usize(shebang_len); } - let cursor = Cursor::new(src); + let cursor = Cursor::new(src, FrontmatterAllowed::Yes); let mut lexer = Lexer { psess, start_pos, @@ -193,6 +195,11 @@ impl<'psess, 'src> Lexer<'psess, 'src> { let content = self.str_from_to(content_start, content_end); self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style) } + rustc_lexer::TokenKind::Frontmatter { has_invalid_preceding_whitespace, invalid_infostring } => { + self.validate_frontmatter(start, has_invalid_preceding_whitespace, invalid_infostring); + preceded_by_whitespace = true; + continue; + } rustc_lexer::TokenKind::Whitespace => { preceded_by_whitespace = true; continue; @@ -256,7 +263,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { // was consumed. let lit_start = start + BytePos(prefix_len); self.pos = lit_start; - self.cursor = Cursor::new(&str_before[prefix_len as usize..]); + self.cursor = Cursor::new(&str_before[prefix_len as usize..], FrontmatterAllowed::No); self.report_unknown_prefix(start); let prefix_span = self.mk_sp(start, lit_start); return (Token::new(self.ident(start), prefix_span), preceded_by_whitespace); @@ -361,7 +368,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { // Reset the state so we just lex the `'r`. let lt_start = start + BytePos(2); self.pos = lt_start; - self.cursor = Cursor::new(&str_before[2 as usize..]); + self.cursor = Cursor::new(&str_before[2 as usize..], FrontmatterAllowed::No); let lifetime_name = self.str_from(start); let ident = Symbol::intern(lifetime_name); @@ -474,6 +481,91 @@ impl<'psess, 'src> Lexer<'psess, 'src> { } } + fn validate_frontmatter( + &self, + start: BytePos, + has_invalid_preceding_whitespace: bool, + invalid_infostring: bool, + ) { + let s = self.str_from(start); + let real_start = s.find("---").unwrap(); + let frontmatter_opening_pos = BytePos(real_start as u32) + start; + let s_new = &s[real_start..]; + let within = s_new.trim_start_matches('-'); + let len_opening = s_new.len() - within.len(); + + let frontmatter_opening_end_pos = frontmatter_opening_pos + BytePos(len_opening as u32); + if has_invalid_preceding_whitespace { + let line_start = + BytePos(s[..real_start].rfind("\n").map_or(0, |i| i as u32 + 1)) + start; + let span = self.mk_sp(line_start, frontmatter_opening_end_pos); + let label_span = self.mk_sp(line_start, frontmatter_opening_pos); + self.dcx().emit_err(errors::FrontmatterInvalidOpeningPrecedingWhitespace { + span, + note_span: label_span, + }); + } + + if invalid_infostring { + let line_end = s[real_start..].find('\n').unwrap_or(s[real_start..].len()); + let span = self.mk_sp( + frontmatter_opening_end_pos, + frontmatter_opening_pos + BytePos(line_end as u32), + ); + self.dcx().emit_err(errors::FrontmatterInvalidInfostring { span }); + } + + let last_line_start = within.rfind('\n').map_or(0, |i| i + 1); + let last_line = &within[last_line_start..]; + let last_line_trimmed = last_line.trim_start_matches(is_whitespace); + let last_line_start_pos = frontmatter_opening_end_pos + BytePos(last_line_start as u32); + + let frontmatter_span = self.mk_sp(frontmatter_opening_pos, self.pos); + self.psess.gated_spans.gate(sym::frontmatter, frontmatter_span); + + if !last_line_trimmed.starts_with("---") { + let label_span = self.mk_sp(frontmatter_opening_pos, frontmatter_opening_end_pos); + self.dcx().emit_err(errors::FrontmatterUnclosed { + span: frontmatter_span, + note_span: label_span, + }); + return; + } + + if last_line_trimmed.len() != last_line.len() { + let line_end = last_line_start_pos + BytePos(last_line.len() as u32); + let span = self.mk_sp(last_line_start_pos, line_end); + let whitespace_end = + last_line_start_pos + BytePos((last_line.len() - last_line_trimmed.len()) as u32); + let label_span = self.mk_sp(last_line_start_pos, whitespace_end); + self.dcx().emit_err(errors::FrontmatterInvalidClosingPrecedingWhitespace { + span, + note_span: label_span, + }); + } + + let rest = last_line_trimmed.trim_start_matches('-'); + let len_close = last_line_trimmed.len() - rest.len(); + if len_close != len_opening { + let span = self.mk_sp(frontmatter_opening_pos, self.pos); + let opening = self.mk_sp(frontmatter_opening_pos, frontmatter_opening_end_pos); + let last_line_close_pos = last_line_start_pos + BytePos(len_close as u32); + let close = self.mk_sp(last_line_start_pos, last_line_close_pos); + self.dcx().emit_err(errors::FrontmatterLengthMismatch { + span, + opening, + close, + len_opening, + len_close, + }); + } + + if !rest.trim_matches(is_whitespace).is_empty() { + let span = self.mk_sp(last_line_start_pos, self.pos); + self.dcx().emit_err(errors::FrontmatterExtraCharactersAfterClose { span }); + } + } + fn cook_doc_comment( &self, content_start: BytePos, @@ -839,7 +931,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { let space_pos = start + BytePos(1); let space_span = self.mk_sp(space_pos, space_pos); - let mut cursor = Cursor::new(str_before); + let mut cursor = Cursor::new(str_before, FrontmatterAllowed::No); let (is_string, span, unterminated) = match cursor.guarded_double_quoted_string() { Some(rustc_lexer::GuardedStr { n_hashes, terminated, token_len }) => { @@ -905,7 +997,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { // For backwards compatibility, roll back to after just the first `#` // and return the `Pound` token. self.pos = start + BytePos(1); - self.cursor = Cursor::new(&str_before[1..]); + self.cursor = Cursor::new(&str_before[1..], FrontmatterAllowed::No); token::Pound } } |
