about summary refs log tree commit diff
path: root/compiler/rustc_ast_pretty/src/pprust/state.rs
diff options
context:
space:
mode:
authorNicholas Nethercote <n.nethercote@gmail.com>2023-08-08 11:43:44 +1000
committerNicholas Nethercote <n.nethercote@gmail.com>2023-12-11 09:19:09 +1100
commit925f7fad576b6a8659d93846faf8d9610e59bab0 (patch)
treef2c496128bcb5e75ed3ad46d7671c9aa3123b61b /compiler/rustc_ast_pretty/src/pprust/state.rs
parent7e452c123c5acea813130a9519b34f83795cb856 (diff)
downloadrust-925f7fad576b6a8659d93846faf8d9610e59bab0.tar.gz
rust-925f7fad576b6a8659d93846faf8d9610e59bab0.zip
Improve `print_tts` by changing `tokenstream::Spacing`.
`tokenstream::Spacing` appears on all `TokenTree::Token` instances,
both punct and non-punct. Its current usage:
- `Joint` means "can join with the next token *and* that token is a
  punct".
- `Alone` means "cannot join with the next token *or* can join with the
  next token but that token is not a punct".

The fact that `Alone` is used for two different cases is awkward.
This commit augments `tokenstream::Spacing` with a new variant
`JointHidden`, resulting in:
- `Joint` means "can join with the next token *and* that token is a
  punct".
- `JointHidden` means "can join with the next token *and* that token is a
  not a punct".
- `Alone` means "cannot join with the next token".

This *drastically* improves the output of `print_tts`. For example,
this:
```
stringify!(let a: Vec<u32> = vec![];)
```
currently produces this string:
```
let a : Vec < u32 > = vec! [] ;
```
With this PR, it now produces this string:
```
let a: Vec<u32> = vec![] ;
```
(The space after the `]` is because `TokenTree::Delimited` currently
doesn't have spacing information. The subsequent commit fixes this.)

The new `print_tts` doesn't replicate original code perfectly. E.g.
multiple space characters will be condensed into a single space
character. But it's much improved.

`print_tts` still produces the old, uglier output for code produced by
proc macros. Because we have to translate the generated code from
`proc_macro::Spacing` to the more expressive `token::Spacing`, which
results in too much `proc_macro::Along` usage and no
`proc_macro::JointHidden` usage. So `space_between` still exists and
is used by `print_tts` in conjunction with the `Spacing` field.

This change will also help with the removal of `Token::Interpolated`.
Currently interpolated tokens are pretty-printed nicely via AST pretty
printing. `Token::Interpolated` removal will mean they get printed with
`print_tts`. Without this change, that would result in much uglier
output for code produced by decl macro expansions. With this change, AST
pretty printing and `print_tts` produce similar results.

The commit also tweaks the comments on `proc_macro::Spacing`. In
particular, it refers to "compound tokens" rather than "multi-char
operators" because lifetimes aren't operators.
Diffstat (limited to 'compiler/rustc_ast_pretty/src/pprust/state.rs')
-rw-r--r--compiler/rustc_ast_pretty/src/pprust/state.rs29
1 files changed, 23 insertions, 6 deletions
diff --git a/compiler/rustc_ast_pretty/src/pprust/state.rs b/compiler/rustc_ast_pretty/src/pprust/state.rs
index ff36e6c2845..8bec7647587 100644
--- a/compiler/rustc_ast_pretty/src/pprust/state.rs
+++ b/compiler/rustc_ast_pretty/src/pprust/state.rs
@@ -10,7 +10,7 @@ use crate::pp::{self, Breaks};
 use rustc_ast::attr::AttrIdGenerator;
 use rustc_ast::ptr::P;
 use rustc_ast::token::{self, BinOpToken, CommentKind, Delimiter, Nonterminal, Token, TokenKind};
-use rustc_ast::tokenstream::{TokenStream, TokenTree};
+use rustc_ast::tokenstream::{Spacing, TokenStream, TokenTree};
 use rustc_ast::util::classify;
 use rustc_ast::util::comments::{gather_comments, Comment, CommentStyle};
 use rustc_ast::util::parser;
@@ -509,14 +509,15 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
     /// appropriate macro, transcribe back into the grammar we just parsed from,
     /// and then pretty-print the resulting AST nodes (so, e.g., we print
     /// expression arguments as expressions). It can be done! I think.
-    fn print_tt(&mut self, tt: &TokenTree, convert_dollar_crate: bool) {
+    fn print_tt(&mut self, tt: &TokenTree, convert_dollar_crate: bool) -> Spacing {
         match tt {
-            TokenTree::Token(token, _) => {
+            TokenTree::Token(token, spacing) => {
                 let token_str = self.token_to_string_ext(token, convert_dollar_crate);
                 self.word(token_str);
                 if let token::DocComment(..) = token.kind {
                     self.hardbreak()
                 }
+                *spacing
             }
             TokenTree::Delimited(dspan, delim, tts) => {
                 self.print_mac_common(
@@ -528,6 +529,9 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
                     convert_dollar_crate,
                     dspan.entire(),
                 );
+                // FIXME: add two `Spacing` fields to `TokenTree::Delimited`
+                // and use the close delim one here.
+                Spacing::Alone
             }
         }
     }
@@ -535,9 +539,20 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
     fn print_tts(&mut self, tts: &TokenStream, convert_dollar_crate: bool) {
         let mut iter = tts.trees().peekable();
         while let Some(tt) = iter.next() {
-            self.print_tt(tt, convert_dollar_crate);
+            let spacing = self.print_tt(tt, convert_dollar_crate);
             if let Some(next) = iter.peek() {
-                if space_between(tt, next) {
+                // Should we print a space after `tt`? There are two guiding
+                // factors.
+                // - `spacing` is the more important and accurate one. Most
+                //   tokens have good spacing information, and
+                //   `Joint`/`JointHidden` get used a lot.
+                // - `space_between` is the backup. Code produced by proc
+                //   macros has worse spacing information, with no
+                //   `JointHidden` usage and too much `Alone` usage, which
+                //   would result in over-spaced output such as
+                //   `( x () , y . z )`. `space_between` avoids some of the
+                //   excess whitespace.
+                if spacing == Spacing::Alone && space_between(tt, next) {
                     self.space();
                 }
             }
@@ -1797,7 +1812,9 @@ impl<'a> State<'a> {
     }
 
     pub(crate) fn tt_to_string(&self, tt: &TokenTree) -> String {
-        Self::to_string(|s| s.print_tt(tt, false))
+        Self::to_string(|s| {
+            s.print_tt(tt, false);
+        })
     }
 
     pub(crate) fn tts_to_string(&self, tokens: &TokenStream) -> String {