diff options
| author | Nicholas Nethercote <n.nethercote@gmail.com> | 2023-08-08 11:43:44 +1000 |
|---|---|---|
| committer | Nicholas Nethercote <n.nethercote@gmail.com> | 2023-12-11 09:19:09 +1100 |
| commit | 925f7fad576b6a8659d93846faf8d9610e59bab0 (patch) | |
| tree | f2c496128bcb5e75ed3ad46d7671c9aa3123b61b /compiler/rustc_parse/src | |
| parent | 7e452c123c5acea813130a9519b34f83795cb856 (diff) | |
| download | rust-925f7fad576b6a8659d93846faf8d9610e59bab0.tar.gz rust-925f7fad576b6a8659d93846faf8d9610e59bab0.zip | |
Improve `print_tts` by changing `tokenstream::Spacing`.
`tokenstream::Spacing` appears on all `TokenTree::Token` instances, both punct and non-punct. Its current usage: - `Joint` means "can join with the next token *and* that token is a punct". - `Alone` means "cannot join with the next token *or* can join with the next token but that token is not a punct". The fact that `Alone` is used for two different cases is awkward. This commit augments `tokenstream::Spacing` with a new variant `JointHidden`, resulting in: - `Joint` means "can join with the next token *and* that token is a punct". - `JointHidden` means "can join with the next token *and* that token is a not a punct". - `Alone` means "cannot join with the next token". This *drastically* improves the output of `print_tts`. For example, this: ``` stringify!(let a: Vec<u32> = vec![];) ``` currently produces this string: ``` let a : Vec < u32 > = vec! [] ; ``` With this PR, it now produces this string: ``` let a: Vec<u32> = vec![] ; ``` (The space after the `]` is because `TokenTree::Delimited` currently doesn't have spacing information. The subsequent commit fixes this.) The new `print_tts` doesn't replicate original code perfectly. E.g. multiple space characters will be condensed into a single space character. But it's much improved. `print_tts` still produces the old, uglier output for code produced by proc macros. Because we have to translate the generated code from `proc_macro::Spacing` to the more expressive `token::Spacing`, which results in too much `proc_macro::Along` usage and no `proc_macro::JointHidden` usage. So `space_between` still exists and is used by `print_tts` in conjunction with the `Spacing` field. This change will also help with the removal of `Token::Interpolated`. Currently interpolated tokens are pretty-printed nicely via AST pretty printing. `Token::Interpolated` removal will mean they get printed with `print_tts`. Without this change, that would result in much uglier output for code produced by decl macro expansions. With this change, AST pretty printing and `print_tts` produce similar results. The commit also tweaks the comments on `proc_macro::Spacing`. In particular, it refers to "compound tokens" rather than "multi-char operators" because lifetimes aren't operators.
Diffstat (limited to 'compiler/rustc_parse/src')
| -rw-r--r-- | compiler/rustc_parse/src/lexer/tokentrees.rs | 9 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/mod.rs | 17 |
2 files changed, 19 insertions, 7 deletions
diff --git a/compiler/rustc_parse/src/lexer/tokentrees.rs b/compiler/rustc_parse/src/lexer/tokentrees.rs index e646f5dfd85..a80e8fac178 100644 --- a/compiler/rustc_parse/src/lexer/tokentrees.rs +++ b/compiler/rustc_parse/src/lexer/tokentrees.rs @@ -67,8 +67,13 @@ impl<'a> TokenTreesReader<'a> { } else if let Some(glued) = self.token.glue(&next_tok) { self.token = glued; } else { - let this_spacing = - if next_tok.is_punct() { Spacing::Joint } else { Spacing::Alone }; + let this_spacing = if next_tok.is_punct() { + Spacing::Joint + } else if next_tok.kind == token::Eof { + Spacing::Alone + } else { + Spacing::JointHidden + }; break (this_spacing, next_tok); } }; diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 7a306823ed4..29709d92fad 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -130,7 +130,7 @@ pub struct Parser<'a> { pub sess: &'a ParseSess, /// The current token. pub token: Token, - /// The spacing for the current token + /// The spacing for the current token. pub token_spacing: Spacing, /// The previous token. pub prev_token: Token, @@ -268,6 +268,8 @@ impl TokenCursor { let trees = tts.clone().into_trees(); self.stack.push((mem::replace(&mut self.tree_cursor, trees), delim, sp)); if delim != Delimiter::Invisible { + // FIXME: add two `Spacing` fields to `TokenTree::Delimited` + // and use the open delim one here. return (Token::new(token::OpenDelim(delim), sp.open), Spacing::Alone); } // No open delimiter to return; continue on to the next iteration. @@ -277,11 +279,15 @@ impl TokenCursor { // We have exhausted this token stream. Move back to its parent token stream. self.tree_cursor = tree_cursor; if delim != Delimiter::Invisible { + // FIXME: add two `Spacing` fields to `TokenTree::Delimited` and + // use the close delim one here. return (Token::new(token::CloseDelim(delim), span.close), Spacing::Alone); } // No close delimiter to return; continue on to the next iteration. } else { - // We have exhausted the outermost token stream. + // We have exhausted the outermost token stream. The use of + // `Spacing::Alone` is arbitrary and immaterial, because the + // `Eof` token's spacing is never used. return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone); } } @@ -699,8 +705,8 @@ impl<'a> Parser<'a> { // is not needed (we'll capture the entire 'glued' token), // and `bump` will set this field to `None` self.break_last_token = true; - // Use the spacing of the glued token as the spacing - // of the unglued second token. + // Use the spacing of the glued token as the spacing of the + // unglued second token. self.bump_with((Token::new(second, second_span), self.token_spacing)); true } @@ -1312,8 +1318,9 @@ impl<'a> Parser<'a> { } token::CloseDelim(_) | token::Eof => unreachable!(), _ => { + let prev_spacing = self.token_spacing; self.bump(); - TokenTree::Token(self.prev_token.clone(), Spacing::Alone) + TokenTree::Token(self.prev_token.clone(), prev_spacing) } } } |
