diff options
| author | Ed Page <eopage@gmail.com> | 2025-07-09 16:40:22 -0500 |
|---|---|---|
| committer | Ed Page <eopage@gmail.com> | 2025-07-09 16:42:27 -0500 |
| commit | 45a1e492b1adfc6f7664d1da736dff147e5c3168 (patch) | |
| tree | 8e7a7a4cd1084e16fae200a80d5bafae5a8c6547 /compiler/rustc_lexer | |
| parent | 425cd0f571b3888be81ebad295af8e3c903fb244 (diff) | |
| download | rust-45a1e492b1adfc6f7664d1da736dff147e5c3168.tar.gz rust-45a1e492b1adfc6f7664d1da736dff147e5c3168.zip | |
feat(lexer): Allow including frontmatter with 'tokenize'
Diffstat (limited to 'compiler/rustc_lexer')
| -rw-r--r-- | compiler/rustc_lexer/src/lib.rs | 29 | ||||
| -rw-r--r-- | compiler/rustc_lexer/src/tests.rs | 3 |
2 files changed, 21 insertions, 11 deletions
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index e30dbe80248..e80196ed567 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -273,14 +273,15 @@ pub fn strip_shebang(input: &str) -> Option<usize> { if let Some(input_tail) = input.strip_prefix("#!") { // Ok, this is a shebang but if the next non-whitespace token is `[`, // then it may be valid Rust code, so consider it Rust code. - let next_non_whitespace_token = tokenize(input_tail).map(|tok| tok.kind).find(|tok| { - !matches!( - tok, - TokenKind::Whitespace - | TokenKind::LineComment { doc_style: None } - | TokenKind::BlockComment { doc_style: None, .. } - ) - }); + let next_non_whitespace_token = + tokenize(input_tail, FrontmatterAllowed::No).map(|tok| tok.kind).find(|tok| { + !matches!( + tok, + TokenKind::Whitespace + | TokenKind::LineComment { doc_style: None } + | TokenKind::BlockComment { doc_style: None, .. } + ) + }); if next_non_whitespace_token != Some(TokenKind::OpenBracket) { // No other choice than to consider this a shebang. return Some(2 + input_tail.lines().next().unwrap_or_default().len()); @@ -303,8 +304,16 @@ pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError> } /// Creates an iterator that produces tokens from the input string. -pub fn tokenize(input: &str) -> impl Iterator<Item = Token> { - let mut cursor = Cursor::new(input, FrontmatterAllowed::No); +/// +/// When parsing a full Rust document, +/// first [`strip_shebang`] and then allow frontmatters with [`FrontmatterAllowed::Yes`]. +/// +/// When tokenizing a slice of a document, be sure to disallow frontmatters with [`FrontmatterAllowed::No`] +pub fn tokenize( + input: &str, + frontmatter_allowed: FrontmatterAllowed, +) -> impl Iterator<Item = Token> { + let mut cursor = Cursor::new(input, frontmatter_allowed); std::iter::from_fn(move || { let token = cursor.advance_token(); if token.kind != TokenKind::Eof { Some(token) } else { None } diff --git a/compiler/rustc_lexer/src/tests.rs b/compiler/rustc_lexer/src/tests.rs index fc8d9b9d57b..a9fcb481759 100644 --- a/compiler/rustc_lexer/src/tests.rs +++ b/compiler/rustc_lexer/src/tests.rs @@ -125,7 +125,8 @@ fn test_valid_shebang() { } fn check_lexing(src: &str, expect: Expect) { - let actual: String = tokenize(src).map(|token| format!("{:?}\n", token)).collect(); + let actual: String = + tokenize(src, FrontmatterAllowed::No).map(|token| format!("{:?}\n", token)).collect(); expect.assert_eq(&actual) } |
