about summary refs log tree commit diff
path: root/compiler/rustc_parse/src
diff options
context:
space:
mode:
authorNicholas Nethercote <n.nethercote@gmail.com>2023-08-08 11:43:44 +1000
committerNicholas Nethercote <n.nethercote@gmail.com>2023-12-11 09:19:09 +1100
commit925f7fad576b6a8659d93846faf8d9610e59bab0 (patch)
treef2c496128bcb5e75ed3ad46d7671c9aa3123b61b /compiler/rustc_parse/src
parent7e452c123c5acea813130a9519b34f83795cb856 (diff)
downloadrust-925f7fad576b6a8659d93846faf8d9610e59bab0.tar.gz
rust-925f7fad576b6a8659d93846faf8d9610e59bab0.zip
Improve `print_tts` by changing `tokenstream::Spacing`.
`tokenstream::Spacing` appears on all `TokenTree::Token` instances,
both punct and non-punct. Its current usage:
- `Joint` means "can join with the next token *and* that token is a
  punct".
- `Alone` means "cannot join with the next token *or* can join with the
  next token but that token is not a punct".

The fact that `Alone` is used for two different cases is awkward.
This commit augments `tokenstream::Spacing` with a new variant
`JointHidden`, resulting in:
- `Joint` means "can join with the next token *and* that token is a
  punct".
- `JointHidden` means "can join with the next token *and* that token is a
  not a punct".
- `Alone` means "cannot join with the next token".

This *drastically* improves the output of `print_tts`. For example,
this:
```
stringify!(let a: Vec<u32> = vec![];)
```
currently produces this string:
```
let a : Vec < u32 > = vec! [] ;
```
With this PR, it now produces this string:
```
let a: Vec<u32> = vec![] ;
```
(The space after the `]` is because `TokenTree::Delimited` currently
doesn't have spacing information. The subsequent commit fixes this.)

The new `print_tts` doesn't replicate original code perfectly. E.g.
multiple space characters will be condensed into a single space
character. But it's much improved.

`print_tts` still produces the old, uglier output for code produced by
proc macros. Because we have to translate the generated code from
`proc_macro::Spacing` to the more expressive `token::Spacing`, which
results in too much `proc_macro::Along` usage and no
`proc_macro::JointHidden` usage. So `space_between` still exists and
is used by `print_tts` in conjunction with the `Spacing` field.

This change will also help with the removal of `Token::Interpolated`.
Currently interpolated tokens are pretty-printed nicely via AST pretty
printing. `Token::Interpolated` removal will mean they get printed with
`print_tts`. Without this change, that would result in much uglier
output for code produced by decl macro expansions. With this change, AST
pretty printing and `print_tts` produce similar results.

The commit also tweaks the comments on `proc_macro::Spacing`. In
particular, it refers to "compound tokens" rather than "multi-char
operators" because lifetimes aren't operators.
Diffstat (limited to 'compiler/rustc_parse/src')
-rw-r--r--compiler/rustc_parse/src/lexer/tokentrees.rs9
-rw-r--r--compiler/rustc_parse/src/parser/mod.rs17
2 files changed, 19 insertions, 7 deletions
diff --git a/compiler/rustc_parse/src/lexer/tokentrees.rs b/compiler/rustc_parse/src/lexer/tokentrees.rs
index e646f5dfd85..a80e8fac178 100644
--- a/compiler/rustc_parse/src/lexer/tokentrees.rs
+++ b/compiler/rustc_parse/src/lexer/tokentrees.rs
@@ -67,8 +67,13 @@ impl<'a> TokenTreesReader<'a> {
                         } else if let Some(glued) = self.token.glue(&next_tok) {
                             self.token = glued;
                         } else {
-                            let this_spacing =
-                                if next_tok.is_punct() { Spacing::Joint } else { Spacing::Alone };
+                            let this_spacing = if next_tok.is_punct() {
+                                Spacing::Joint
+                            } else if next_tok.kind == token::Eof {
+                                Spacing::Alone
+                            } else {
+                                Spacing::JointHidden
+                            };
                             break (this_spacing, next_tok);
                         }
                     };
diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs
index 7a306823ed4..29709d92fad 100644
--- a/compiler/rustc_parse/src/parser/mod.rs
+++ b/compiler/rustc_parse/src/parser/mod.rs
@@ -130,7 +130,7 @@ pub struct Parser<'a> {
     pub sess: &'a ParseSess,
     /// The current token.
     pub token: Token,
-    /// The spacing for the current token
+    /// The spacing for the current token.
     pub token_spacing: Spacing,
     /// The previous token.
     pub prev_token: Token,
@@ -268,6 +268,8 @@ impl TokenCursor {
                         let trees = tts.clone().into_trees();
                         self.stack.push((mem::replace(&mut self.tree_cursor, trees), delim, sp));
                         if delim != Delimiter::Invisible {
+                            // FIXME: add two `Spacing` fields to `TokenTree::Delimited`
+                            // and use the open delim one here.
                             return (Token::new(token::OpenDelim(delim), sp.open), Spacing::Alone);
                         }
                         // No open delimiter to return; continue on to the next iteration.
@@ -277,11 +279,15 @@ impl TokenCursor {
                 // We have exhausted this token stream. Move back to its parent token stream.
                 self.tree_cursor = tree_cursor;
                 if delim != Delimiter::Invisible {
+                    // FIXME: add two `Spacing` fields to `TokenTree::Delimited` and
+                    // use the close delim one here.
                     return (Token::new(token::CloseDelim(delim), span.close), Spacing::Alone);
                 }
                 // No close delimiter to return; continue on to the next iteration.
             } else {
-                // We have exhausted the outermost token stream.
+                // We have exhausted the outermost token stream. The use of
+                // `Spacing::Alone` is arbitrary and immaterial, because the
+                // `Eof` token's spacing is never used.
                 return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone);
             }
         }
@@ -699,8 +705,8 @@ impl<'a> Parser<'a> {
                 // is not needed (we'll capture the entire 'glued' token),
                 // and `bump` will set this field to `None`
                 self.break_last_token = true;
-                // Use the spacing of the glued token as the spacing
-                // of the unglued second token.
+                // Use the spacing of the glued token as the spacing of the
+                // unglued second token.
                 self.bump_with((Token::new(second, second_span), self.token_spacing));
                 true
             }
@@ -1312,8 +1318,9 @@ impl<'a> Parser<'a> {
             }
             token::CloseDelim(_) | token::Eof => unreachable!(),
             _ => {
+                let prev_spacing = self.token_spacing;
                 self.bump();
-                TokenTree::Token(self.prev_token.clone(), Spacing::Alone)
+                TokenTree::Token(self.prev_token.clone(), prev_spacing)
             }
         }
     }