about summary refs log tree commit diff
path: root/compiler/rustc_parse/src/lexer/mod.rs
diff options
context:
space:
mode:
authorDeadbeef <ent3rm4n@gmail.com>2025-04-12 15:53:46 +0000
committerDeadbeef <ent3rm4n@gmail.com>2025-05-05 23:10:08 +0800
commit662182637e100642d1e5de7cb193da837a14ae48 (patch)
tree28e183216387d8d8cd939404a0c2fc01efaf450f /compiler/rustc_parse/src/lexer/mod.rs
parent0c33fe2c3d3eecadd17a84b110bb067288a64f1c (diff)
downloadrust-662182637e100642d1e5de7cb193da837a14ae48.tar.gz
rust-662182637e100642d1e5de7cb193da837a14ae48.zip
Implement RFC 3503: frontmatters
Supercedes #137193
Diffstat (limited to 'compiler/rustc_parse/src/lexer/mod.rs')
-rw-r--r--compiler/rustc_parse/src/lexer/mod.rs106
1 files changed, 99 insertions, 7 deletions
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index e8a5cae54cf..78c5742414b 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -7,7 +7,9 @@ use rustc_ast::tokenstream::TokenStream;
 use rustc_ast::util::unicode::contains_text_flow_control_chars;
 use rustc_errors::codes::*;
 use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey};
-use rustc_lexer::{Base, Cursor, DocStyle, LiteralKind, RawStrError};
+use rustc_lexer::{
+    Base, Cursor, DocStyle, FrontmatterAllowed, LiteralKind, RawStrError, is_whitespace,
+};
 use rustc_literal_escaper::{EscapeError, Mode, unescape_mixed, unescape_unicode};
 use rustc_session::lint::BuiltinLintDiag;
 use rustc_session::lint::builtin::{
@@ -15,7 +17,7 @@ use rustc_session::lint::builtin::{
     TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
 };
 use rustc_session::parse::ParseSess;
-use rustc_span::{BytePos, Pos, Span, Symbol};
+use rustc_span::{BytePos, Pos, Span, Symbol, sym};
 use tracing::debug;
 
 use crate::errors;
@@ -56,7 +58,7 @@ pub(crate) fn lex_token_trees<'psess, 'src>(
         start_pos = start_pos + BytePos::from_usize(shebang_len);
     }
 
-    let cursor = Cursor::new(src);
+    let cursor = Cursor::new(src, FrontmatterAllowed::Yes);
     let mut lexer = Lexer {
         psess,
         start_pos,
@@ -193,6 +195,11 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
                     let content = self.str_from_to(content_start, content_end);
                     self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style)
                 }
+                rustc_lexer::TokenKind::Frontmatter { has_invalid_preceding_whitespace, invalid_infostring } => {
+                    self.validate_frontmatter(start, has_invalid_preceding_whitespace, invalid_infostring);
+                    preceded_by_whitespace = true;
+                    continue;
+                }
                 rustc_lexer::TokenKind::Whitespace => {
                     preceded_by_whitespace = true;
                     continue;
@@ -256,7 +263,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
                     // was consumed.
                     let lit_start = start + BytePos(prefix_len);
                     self.pos = lit_start;
-                    self.cursor = Cursor::new(&str_before[prefix_len as usize..]);
+                    self.cursor = Cursor::new(&str_before[prefix_len as usize..], FrontmatterAllowed::No);
                     self.report_unknown_prefix(start);
                     let prefix_span = self.mk_sp(start, lit_start);
                     return (Token::new(self.ident(start), prefix_span), preceded_by_whitespace);
@@ -361,7 +368,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
                         // Reset the state so we just lex the `'r`.
                         let lt_start = start + BytePos(2);
                         self.pos = lt_start;
-                        self.cursor = Cursor::new(&str_before[2 as usize..]);
+                        self.cursor = Cursor::new(&str_before[2 as usize..], FrontmatterAllowed::No);
 
                         let lifetime_name = self.str_from(start);
                         let ident = Symbol::intern(lifetime_name);
@@ -474,6 +481,91 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
         }
     }
 
+    fn validate_frontmatter(
+        &self,
+        start: BytePos,
+        has_invalid_preceding_whitespace: bool,
+        invalid_infostring: bool,
+    ) {
+        let s = self.str_from(start);
+        let real_start = s.find("---").unwrap();
+        let frontmatter_opening_pos = BytePos(real_start as u32) + start;
+        let s_new = &s[real_start..];
+        let within = s_new.trim_start_matches('-');
+        let len_opening = s_new.len() - within.len();
+
+        let frontmatter_opening_end_pos = frontmatter_opening_pos + BytePos(len_opening as u32);
+        if has_invalid_preceding_whitespace {
+            let line_start =
+                BytePos(s[..real_start].rfind("\n").map_or(0, |i| i as u32 + 1)) + start;
+            let span = self.mk_sp(line_start, frontmatter_opening_end_pos);
+            let label_span = self.mk_sp(line_start, frontmatter_opening_pos);
+            self.dcx().emit_err(errors::FrontmatterInvalidOpeningPrecedingWhitespace {
+                span,
+                note_span: label_span,
+            });
+        }
+
+        if invalid_infostring {
+            let line_end = s[real_start..].find('\n').unwrap_or(s[real_start..].len());
+            let span = self.mk_sp(
+                frontmatter_opening_end_pos,
+                frontmatter_opening_pos + BytePos(line_end as u32),
+            );
+            self.dcx().emit_err(errors::FrontmatterInvalidInfostring { span });
+        }
+
+        let last_line_start = within.rfind('\n').map_or(0, |i| i + 1);
+        let last_line = &within[last_line_start..];
+        let last_line_trimmed = last_line.trim_start_matches(is_whitespace);
+        let last_line_start_pos = frontmatter_opening_end_pos + BytePos(last_line_start as u32);
+
+        let frontmatter_span = self.mk_sp(frontmatter_opening_pos, self.pos);
+        self.psess.gated_spans.gate(sym::frontmatter, frontmatter_span);
+
+        if !last_line_trimmed.starts_with("---") {
+            let label_span = self.mk_sp(frontmatter_opening_pos, frontmatter_opening_end_pos);
+            self.dcx().emit_err(errors::FrontmatterUnclosed {
+                span: frontmatter_span,
+                note_span: label_span,
+            });
+            return;
+        }
+
+        if last_line_trimmed.len() != last_line.len() {
+            let line_end = last_line_start_pos + BytePos(last_line.len() as u32);
+            let span = self.mk_sp(last_line_start_pos, line_end);
+            let whitespace_end =
+                last_line_start_pos + BytePos((last_line.len() - last_line_trimmed.len()) as u32);
+            let label_span = self.mk_sp(last_line_start_pos, whitespace_end);
+            self.dcx().emit_err(errors::FrontmatterInvalidClosingPrecedingWhitespace {
+                span,
+                note_span: label_span,
+            });
+        }
+
+        let rest = last_line_trimmed.trim_start_matches('-');
+        let len_close = last_line_trimmed.len() - rest.len();
+        if len_close != len_opening {
+            let span = self.mk_sp(frontmatter_opening_pos, self.pos);
+            let opening = self.mk_sp(frontmatter_opening_pos, frontmatter_opening_end_pos);
+            let last_line_close_pos = last_line_start_pos + BytePos(len_close as u32);
+            let close = self.mk_sp(last_line_start_pos, last_line_close_pos);
+            self.dcx().emit_err(errors::FrontmatterLengthMismatch {
+                span,
+                opening,
+                close,
+                len_opening,
+                len_close,
+            });
+        }
+
+        if !rest.trim_matches(is_whitespace).is_empty() {
+            let span = self.mk_sp(last_line_start_pos, self.pos);
+            self.dcx().emit_err(errors::FrontmatterExtraCharactersAfterClose { span });
+        }
+    }
+
     fn cook_doc_comment(
         &self,
         content_start: BytePos,
@@ -839,7 +931,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
         let space_pos = start + BytePos(1);
         let space_span = self.mk_sp(space_pos, space_pos);
 
-        let mut cursor = Cursor::new(str_before);
+        let mut cursor = Cursor::new(str_before, FrontmatterAllowed::No);
 
         let (is_string, span, unterminated) = match cursor.guarded_double_quoted_string() {
             Some(rustc_lexer::GuardedStr { n_hashes, terminated, token_len }) => {
@@ -905,7 +997,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
             // For backwards compatibility, roll back to after just the first `#`
             // and return the `Pound` token.
             self.pos = start + BytePos(1);
-            self.cursor = Cursor::new(&str_before[1..]);
+            self.cursor = Cursor::new(&str_before[1..], FrontmatterAllowed::No);
             token::Pound
         }
     }