Rollup merge of #143708 - epage:pretty, r=compiler-errors

fix: Include frontmatter in -Zunpretty output In the implementation (rust-lang/rust#140035), this was left as an open question for the tracking issue (rust-lang/rust#136889). My assumption is that this should be carried over. The test was carried over from rust-lang/rust#137193 which was superseded by rust-lang/rust#140035. Thankfully, either way, `-Zunpretty` is unstable and we can always change it even if we stabilize frontmatter.
author: Matthias Krüger <476013+matthiaskrgr@users.noreply.github.com> 2025-07-11 07:35:21 +0200
committer: GitHub <noreply@github.com> 2025-07-11 07:35:21 +0200
commit: dbd2f303953e66db33f15bb78e2fbf23ca557b13 (patch)
tree: d1b1b0fc1dcc92075d3d909641b0376e7feb7aa0 /compiler
parent: 140f2fa5ae32814cd3915adbb86af8b93efce9c4 (diff)
parent: a11ee5614c052d6339c56c2673780468c96dbc16 (diff)
download: rust-dbd2f303953e66db33f15bb78e2fbf23ca557b13.tar.gz
rust-dbd2f303953e66db33f15bb78e2fbf23ca557b13.zip
3 files changed, 113 insertions, 13 deletions
diff --git a/compiler/rustc_ast_pretty/src/pprust/state.rs b/compiler/rustc_ast_pretty/src/pprust/state.rs
index aff98c63bcb..def0cb74d29 100644
--- a/compiler/rustc_ast_pretty/src/pprust/state.rs
+++ b/compiler/rustc_ast_pretty/src/pprust/state.rs
@@ -120,7 +120,7 @@ fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comment>
         pos += shebang_len;
     }
 
-    for token in rustc_lexer::tokenize(&text[pos..]) {
+    for token in rustc_lexer::tokenize(&text[pos..], rustc_lexer::FrontmatterAllowed::Yes) {
         let token_text = &text[pos..pos + token.len as usize];
         match token.kind {
             rustc_lexer::TokenKind::Whitespace => {
@@ -171,6 +171,14 @@ fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comment>
                     })
                 }
             }
+            rustc_lexer::TokenKind::Frontmatter { .. } => {
+                code_to_the_left = false;
+                comments.push(Comment {
+                    style: CommentStyle::Isolated,
+                    lines: vec![token_text.to_string()],
+                    pos: start_bpos + BytePos(pos as u32),
+                });
+            }
             _ => {
                 code_to_the_left = true;
             }
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
index e30dbe80248..e80196ed567 100644
--- a/compiler/rustc_lexer/src/lib.rs
+++ b/compiler/rustc_lexer/src/lib.rs
@@ -273,14 +273,15 @@ pub fn strip_shebang(input: &str) -> Option<usize> {
     if let Some(input_tail) = input.strip_prefix("#!") {
         // Ok, this is a shebang but if the next non-whitespace token is `[`,
         // then it may be valid Rust code, so consider it Rust code.
-        let next_non_whitespace_token = tokenize(input_tail).map(|tok| tok.kind).find(|tok| {
-            !matches!(
-                tok,
-                TokenKind::Whitespace
-                    | TokenKind::LineComment { doc_style: None }
-                    | TokenKind::BlockComment { doc_style: None, .. }
-            )
-        });
+        let next_non_whitespace_token =
+            tokenize(input_tail, FrontmatterAllowed::No).map(|tok| tok.kind).find(|tok| {
+                !matches!(
+                    tok,
+                    TokenKind::Whitespace
+                        | TokenKind::LineComment { doc_style: None }
+                        | TokenKind::BlockComment { doc_style: None, .. }
+                )
+            });
         if next_non_whitespace_token != Some(TokenKind::OpenBracket) {
             // No other choice than to consider this a shebang.
             return Some(2 + input_tail.lines().next().unwrap_or_default().len());
@@ -303,8 +304,16 @@ pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError>
 }
 
 /// Creates an iterator that produces tokens from the input string.
-pub fn tokenize(input: &str) -> impl Iterator<Item = Token> {
-    let mut cursor = Cursor::new(input, FrontmatterAllowed::No);
+///
+/// When parsing a full Rust document,
+/// first [`strip_shebang`] and then allow frontmatters with [`FrontmatterAllowed::Yes`].
+///
+/// When tokenizing a slice of a document, be sure to disallow frontmatters with [`FrontmatterAllowed::No`]
+pub fn tokenize(
+    input: &str,
+    frontmatter_allowed: FrontmatterAllowed,
+) -> impl Iterator<Item = Token> {
+    let mut cursor = Cursor::new(input, frontmatter_allowed);
     std::iter::from_fn(move || {
         let token = cursor.advance_token();
         if token.kind != TokenKind::Eof { Some(token) } else { None }
diff --git a/compiler/rustc_lexer/src/tests.rs b/compiler/rustc_lexer/src/tests.rs
index fc8d9b9d57b..a7357ba38c8 100644
--- a/compiler/rustc_lexer/src/tests.rs
+++ b/compiler/rustc_lexer/src/tests.rs
@@ -124,8 +124,9 @@ fn test_valid_shebang() {
     assert_eq!(strip_shebang(input), None);
 }
 
-fn check_lexing(src: &str, expect: Expect) {
-    let actual: String = tokenize(src).map(|token| format!("{:?}\n", token)).collect();
+fn check_lexing(src: &str, frontmatter_allowed: FrontmatterAllowed, expect: Expect) {
+    let actual: String =
+        tokenize(src, frontmatter_allowed).map(|token| format!("{:?}\n", token)).collect();
     expect.assert_eq(&actual)
 }
 
@@ -133,6 +134,7 @@ fn check_lexing(src: &str, expect: Expect) {
 fn smoke_test() {
     check_lexing(
         "/* my source file */ fn main() { println!(\"zebra\"); }\n",
+        FrontmatterAllowed::No,
         expect![[r#"
             Token { kind: BlockComment { doc_style: None, terminated: true }, len: 20 }
             Token { kind: Whitespace, len: 1 }
@@ -171,6 +173,7 @@ fn comment_flavors() {
 /** outer doc block */
 /*! inner doc block */
 ",
+        FrontmatterAllowed::No,
         expect![[r#"
             Token { kind: Whitespace, len: 1 }
             Token { kind: LineComment { doc_style: None }, len: 7 }
@@ -199,6 +202,7 @@ fn comment_flavors() {
 fn nested_block_comments() {
     check_lexing(
         "/* /* */ */'a'",
+        FrontmatterAllowed::No,
         expect![[r#"
             Token { kind: BlockComment { doc_style: None, terminated: true }, len: 11 }
             Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 }
@@ -210,6 +214,7 @@ fn nested_block_comments() {
 fn characters() {
     check_lexing(
         "'a' ' ' '\\n'",
+        FrontmatterAllowed::No,
         expect![[r#"
             Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 }
             Token { kind: Whitespace, len: 1 }
@@ -224,6 +229,7 @@ fn characters() {
 fn lifetime() {
     check_lexing(
         "'abc",
+        FrontmatterAllowed::No,
         expect![[r#"
             Token { kind: Lifetime { starts_with_number: false }, len: 4 }
         "#]],
@@ -234,6 +240,7 @@ fn lifetime() {
 fn raw_string() {
     check_lexing(
         "r###\"\"#a\\b\x00c\"\"###",
+        FrontmatterAllowed::No,
         expect![[r#"
             Token { kind: Literal { kind: RawStr { n_hashes: Some(3) }, suffix_start: 17 }, len: 17 }
         "#]],
@@ -257,6 +264,7 @@ b"a"
 r###"raw"###suffix
 br###"raw"###suffix
 "####,
+        FrontmatterAllowed::No,
         expect![[r#"
             Token { kind: Whitespace, len: 1 }
             Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 }
@@ -286,3 +294,78 @@ br###"raw"###suffix
         "#]],
     )
 }
+
+#[test]
+fn frontmatter_allowed() {
+    check_lexing(
+        r#"
+---cargo
+[dependencies]
+clap = "4"
+---
+
+fn main() {}
+"#,
+        FrontmatterAllowed::Yes,
+        expect![[r#"
+            Token { kind: Whitespace, len: 1 }
+            Token { kind: Frontmatter { has_invalid_preceding_whitespace: false, invalid_infostring: false }, len: 38 }
+            Token { kind: Whitespace, len: 2 }
+            Token { kind: Ident, len: 2 }
+            Token { kind: Whitespace, len: 1 }
+            Token { kind: Ident, len: 4 }
+            Token { kind: OpenParen, len: 1 }
+            Token { kind: CloseParen, len: 1 }
+            Token { kind: Whitespace, len: 1 }
+            Token { kind: OpenBrace, len: 1 }
+            Token { kind: CloseBrace, len: 1 }
+            Token { kind: Whitespace, len: 1 }
+        "#]],
+    )
+}
+
+#[test]
+fn frontmatter_disallowed() {
+    check_lexing(
+        r#"
+---cargo
+[dependencies]
+clap = "4"
+---
+
+fn main() {}
+"#,
+        FrontmatterAllowed::No,
+        expect![[r#"
+            Token { kind: Whitespace, len: 1 }
+            Token { kind: Minus, len: 1 }
+            Token { kind: Minus, len: 1 }
+            Token { kind: Minus, len: 1 }
+            Token { kind: Ident, len: 5 }
+            Token { kind: Whitespace, len: 1 }
+            Token { kind: OpenBracket, len: 1 }
+            Token { kind: Ident, len: 12 }
+            Token { kind: CloseBracket, len: 1 }
+            Token { kind: Whitespace, len: 1 }
+            Token { kind: Ident, len: 4 }
+            Token { kind: Whitespace, len: 1 }
+            Token { kind: Eq, len: 1 }
+            Token { kind: Whitespace, len: 1 }
+            Token { kind: Literal { kind: Str { terminated: true }, suffix_start: 3 }, len: 3 }
+            Token { kind: Whitespace, len: 1 }
+            Token { kind: Minus, len: 1 }
+            Token { kind: Minus, len: 1 }
+            Token { kind: Minus, len: 1 }
+            Token { kind: Whitespace, len: 2 }
+            Token { kind: Ident, len: 2 }
+            Token { kind: Whitespace, len: 1 }
+            Token { kind: Ident, len: 4 }
+            Token { kind: OpenParen, len: 1 }
+            Token { kind: CloseParen, len: 1 }
+            Token { kind: Whitespace, len: 1 }
+            Token { kind: OpenBrace, len: 1 }
+            Token { kind: CloseBrace, len: 1 }
+            Token { kind: Whitespace, len: 1 }
+        "#]],
+    )
+}
author	Matthias Krüger <476013+matthiaskrgr@users.noreply.github.com>	2025-07-11 07:35:21 +0200
committer	GitHub <noreply@github.com>	2025-07-11 07:35:21 +0200
commit	dbd2f303953e66db33f15bb78e2fbf23ca557b13 (patch)
tree	d1b1b0fc1dcc92075d3d909641b0376e7feb7aa0 /compiler
parent	140f2fa5ae32814cd3915adbb86af8b93efce9c4 (diff)
parent	a11ee5614c052d6339c56c2673780468c96dbc16 (diff)
download	rust-dbd2f303953e66db33f15bb78e2fbf23ca557b13.tar.gz rust-dbd2f303953e66db33f15bb78e2fbf23ca557b13.zip