diff options
| author | Matthias Krüger <476013+matthiaskrgr@users.noreply.github.com> | 2025-07-11 07:35:21 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-07-11 07:35:21 +0200 |
| commit | dbd2f303953e66db33f15bb78e2fbf23ca557b13 (patch) | |
| tree | d1b1b0fc1dcc92075d3d909641b0376e7feb7aa0 /compiler | |
| parent | 140f2fa5ae32814cd3915adbb86af8b93efce9c4 (diff) | |
| parent | a11ee5614c052d6339c56c2673780468c96dbc16 (diff) | |
| download | rust-dbd2f303953e66db33f15bb78e2fbf23ca557b13.tar.gz rust-dbd2f303953e66db33f15bb78e2fbf23ca557b13.zip | |
Rollup merge of #143708 - epage:pretty, r=compiler-errors
fix: Include frontmatter in -Zunpretty output In the implementation (rust-lang/rust#140035), this was left as an open question for the tracking issue (rust-lang/rust#136889). My assumption is that this should be carried over. The test was carried over from rust-lang/rust#137193 which was superseded by rust-lang/rust#140035. Thankfully, either way, `-Zunpretty` is unstable and we can always change it even if we stabilize frontmatter.
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/rustc_ast_pretty/src/pprust/state.rs | 10 | ||||
| -rw-r--r-- | compiler/rustc_lexer/src/lib.rs | 29 | ||||
| -rw-r--r-- | compiler/rustc_lexer/src/tests.rs | 87 |
3 files changed, 113 insertions, 13 deletions
diff --git a/compiler/rustc_ast_pretty/src/pprust/state.rs b/compiler/rustc_ast_pretty/src/pprust/state.rs index aff98c63bcb..def0cb74d29 100644 --- a/compiler/rustc_ast_pretty/src/pprust/state.rs +++ b/compiler/rustc_ast_pretty/src/pprust/state.rs @@ -120,7 +120,7 @@ fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comment> pos += shebang_len; } - for token in rustc_lexer::tokenize(&text[pos..]) { + for token in rustc_lexer::tokenize(&text[pos..], rustc_lexer::FrontmatterAllowed::Yes) { let token_text = &text[pos..pos + token.len as usize]; match token.kind { rustc_lexer::TokenKind::Whitespace => { @@ -171,6 +171,14 @@ fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comment> }) } } + rustc_lexer::TokenKind::Frontmatter { .. } => { + code_to_the_left = false; + comments.push(Comment { + style: CommentStyle::Isolated, + lines: vec![token_text.to_string()], + pos: start_bpos + BytePos(pos as u32), + }); + } _ => { code_to_the_left = true; } diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index e30dbe80248..e80196ed567 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -273,14 +273,15 @@ pub fn strip_shebang(input: &str) -> Option<usize> { if let Some(input_tail) = input.strip_prefix("#!") { // Ok, this is a shebang but if the next non-whitespace token is `[`, // then it may be valid Rust code, so consider it Rust code. - let next_non_whitespace_token = tokenize(input_tail).map(|tok| tok.kind).find(|tok| { - !matches!( - tok, - TokenKind::Whitespace - | TokenKind::LineComment { doc_style: None } - | TokenKind::BlockComment { doc_style: None, .. } - ) - }); + let next_non_whitespace_token = + tokenize(input_tail, FrontmatterAllowed::No).map(|tok| tok.kind).find(|tok| { + !matches!( + tok, + TokenKind::Whitespace + | TokenKind::LineComment { doc_style: None } + | TokenKind::BlockComment { doc_style: None, .. } + ) + }); if next_non_whitespace_token != Some(TokenKind::OpenBracket) { // No other choice than to consider this a shebang. return Some(2 + input_tail.lines().next().unwrap_or_default().len()); @@ -303,8 +304,16 @@ pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError> } /// Creates an iterator that produces tokens from the input string. -pub fn tokenize(input: &str) -> impl Iterator<Item = Token> { - let mut cursor = Cursor::new(input, FrontmatterAllowed::No); +/// +/// When parsing a full Rust document, +/// first [`strip_shebang`] and then allow frontmatters with [`FrontmatterAllowed::Yes`]. +/// +/// When tokenizing a slice of a document, be sure to disallow frontmatters with [`FrontmatterAllowed::No`] +pub fn tokenize( + input: &str, + frontmatter_allowed: FrontmatterAllowed, +) -> impl Iterator<Item = Token> { + let mut cursor = Cursor::new(input, frontmatter_allowed); std::iter::from_fn(move || { let token = cursor.advance_token(); if token.kind != TokenKind::Eof { Some(token) } else { None } diff --git a/compiler/rustc_lexer/src/tests.rs b/compiler/rustc_lexer/src/tests.rs index fc8d9b9d57b..a7357ba38c8 100644 --- a/compiler/rustc_lexer/src/tests.rs +++ b/compiler/rustc_lexer/src/tests.rs @@ -124,8 +124,9 @@ fn test_valid_shebang() { assert_eq!(strip_shebang(input), None); } -fn check_lexing(src: &str, expect: Expect) { - let actual: String = tokenize(src).map(|token| format!("{:?}\n", token)).collect(); +fn check_lexing(src: &str, frontmatter_allowed: FrontmatterAllowed, expect: Expect) { + let actual: String = + tokenize(src, frontmatter_allowed).map(|token| format!("{:?}\n", token)).collect(); expect.assert_eq(&actual) } @@ -133,6 +134,7 @@ fn check_lexing(src: &str, expect: Expect) { fn smoke_test() { check_lexing( "/* my source file */ fn main() { println!(\"zebra\"); }\n", + FrontmatterAllowed::No, expect![[r#" Token { kind: BlockComment { doc_style: None, terminated: true }, len: 20 } Token { kind: Whitespace, len: 1 } @@ -171,6 +173,7 @@ fn comment_flavors() { /** outer doc block */ /*! inner doc block */ ", + FrontmatterAllowed::No, expect![[r#" Token { kind: Whitespace, len: 1 } Token { kind: LineComment { doc_style: None }, len: 7 } @@ -199,6 +202,7 @@ fn comment_flavors() { fn nested_block_comments() { check_lexing( "/* /* */ */'a'", + FrontmatterAllowed::No, expect![[r#" Token { kind: BlockComment { doc_style: None, terminated: true }, len: 11 } Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 } @@ -210,6 +214,7 @@ fn nested_block_comments() { fn characters() { check_lexing( "'a' ' ' '\\n'", + FrontmatterAllowed::No, expect![[r#" Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 } Token { kind: Whitespace, len: 1 } @@ -224,6 +229,7 @@ fn characters() { fn lifetime() { check_lexing( "'abc", + FrontmatterAllowed::No, expect![[r#" Token { kind: Lifetime { starts_with_number: false }, len: 4 } "#]], @@ -234,6 +240,7 @@ fn lifetime() { fn raw_string() { check_lexing( "r###\"\"#a\\b\x00c\"\"###", + FrontmatterAllowed::No, expect![[r#" Token { kind: Literal { kind: RawStr { n_hashes: Some(3) }, suffix_start: 17 }, len: 17 } "#]], @@ -257,6 +264,7 @@ b"a" r###"raw"###suffix br###"raw"###suffix "####, + FrontmatterAllowed::No, expect![[r#" Token { kind: Whitespace, len: 1 } Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 } @@ -286,3 +294,78 @@ br###"raw"###suffix "#]], ) } + +#[test] +fn frontmatter_allowed() { + check_lexing( + r#" +---cargo +[dependencies] +clap = "4" +--- + +fn main() {} +"#, + FrontmatterAllowed::Yes, + expect![[r#" + Token { kind: Whitespace, len: 1 } + Token { kind: Frontmatter { has_invalid_preceding_whitespace: false, invalid_infostring: false }, len: 38 } + Token { kind: Whitespace, len: 2 } + Token { kind: Ident, len: 2 } + Token { kind: Whitespace, len: 1 } + Token { kind: Ident, len: 4 } + Token { kind: OpenParen, len: 1 } + Token { kind: CloseParen, len: 1 } + Token { kind: Whitespace, len: 1 } + Token { kind: OpenBrace, len: 1 } + Token { kind: CloseBrace, len: 1 } + Token { kind: Whitespace, len: 1 } + "#]], + ) +} + +#[test] +fn frontmatter_disallowed() { + check_lexing( + r#" +---cargo +[dependencies] +clap = "4" +--- + +fn main() {} +"#, + FrontmatterAllowed::No, + expect![[r#" + Token { kind: Whitespace, len: 1 } + Token { kind: Minus, len: 1 } + Token { kind: Minus, len: 1 } + Token { kind: Minus, len: 1 } + Token { kind: Ident, len: 5 } + Token { kind: Whitespace, len: 1 } + Token { kind: OpenBracket, len: 1 } + Token { kind: Ident, len: 12 } + Token { kind: CloseBracket, len: 1 } + Token { kind: Whitespace, len: 1 } + Token { kind: Ident, len: 4 } + Token { kind: Whitespace, len: 1 } + Token { kind: Eq, len: 1 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Str { terminated: true }, suffix_start: 3 }, len: 3 } + Token { kind: Whitespace, len: 1 } + Token { kind: Minus, len: 1 } + Token { kind: Minus, len: 1 } + Token { kind: Minus, len: 1 } + Token { kind: Whitespace, len: 2 } + Token { kind: Ident, len: 2 } + Token { kind: Whitespace, len: 1 } + Token { kind: Ident, len: 4 } + Token { kind: OpenParen, len: 1 } + Token { kind: CloseParen, len: 1 } + Token { kind: Whitespace, len: 1 } + Token { kind: OpenBrace, len: 1 } + Token { kind: CloseBrace, len: 1 } + Token { kind: Whitespace, len: 1 } + "#]], + ) +} |
