Improve `print_tts` by changing `tokenstream::Spacing`.

`tokenstream::Spacing` appears on all `TokenTree::Token` instances, both punct and non-punct. Its current usage: - `Joint` means "can join with the next token *and* that token is a punct". - `Alone` means "cannot join with the next token *or* can join with the next token but that token is not a punct". The fact that `Alone` is used for two different cases is awkward. This commit augments `tokenstream::Spacing` with a new variant `JointHidden`, resulting in: - `Joint` means "can join with the next token *and* that token is a punct". - `JointHidden` means "can join with the next token *and* that token is a not a punct". - `Alone` means "cannot join with the next token". This *drastically* improves the output of `print_tts`. For example, this: ``` stringify!(let a: Vec<u32> = vec![];) ``` currently produces this string: ``` let a : Vec < u32 > = vec! [] ; ``` With this PR, it now produces this string: ``` let a: Vec<u32> = vec![] ; ``` (The space after the `]` is because `TokenTree::Delimited` currently doesn't have spacing information. The subsequent commit fixes this.) The new `print_tts` doesn't replicate original code perfectly. E.g. multiple space characters will be condensed into a single space character. But it's much improved. `print_tts` still produces the old, uglier output for code produced by proc macros. Because we have to translate the generated code from `proc_macro::Spacing` to the more expressive `token::Spacing`, which results in too much `proc_macro::Along` usage and no `proc_macro::JointHidden` usage. So `space_between` still exists and is used by `print_tts` in conjunction with the `Spacing` field. This change will also help with the removal of `Token::Interpolated`. Currently interpolated tokens are pretty-printed nicely via AST pretty printing. `Token::Interpolated` removal will mean they get printed with `print_tts`. Without this change, that would result in much uglier output for code produced by decl macro expansions. With this change, AST pretty printing and `print_tts` produce similar results. The commit also tweaks the comments on `proc_macro::Spacing`. In particular, it refers to "compound tokens" rather than "multi-char operators" because lifetimes aren't operators.
author: Nicholas Nethercote <n.nethercote@gmail.com> 2023-08-08 11:43:44 +1000
committer: Nicholas Nethercote <n.nethercote@gmail.com> 2023-12-11 09:19:09 +1100
commit: 925f7fad576b6a8659d93846faf8d9610e59bab0 (patch)
tree: f2c496128bcb5e75ed3ad46d7671c9aa3123b61b /compiler/rustc_ast_pretty/src/pprust/state.rs
parent: 7e452c123c5acea813130a9519b34f83795cb856 (diff)
download: rust-925f7fad576b6a8659d93846faf8d9610e59bab0.tar.gz
rust-925f7fad576b6a8659d93846faf8d9610e59bab0.zip
1 files changed, 23 insertions, 6 deletions
diff --git a/compiler/rustc_ast_pretty/src/pprust/state.rs b/compiler/rustc_ast_pretty/src/pprust/state.rs
index ff36e6c2845..8bec7647587 100644
--- a/compiler/rustc_ast_pretty/src/pprust/state.rs
+++ b/compiler/rustc_ast_pretty/src/pprust/state.rs
@@ -10,7 +10,7 @@ use crate::pp::{self, Breaks};
 use rustc_ast::attr::AttrIdGenerator;
 use rustc_ast::ptr::P;
 use rustc_ast::token::{self, BinOpToken, CommentKind, Delimiter, Nonterminal, Token, TokenKind};
-use rustc_ast::tokenstream::{TokenStream, TokenTree};
+use rustc_ast::tokenstream::{Spacing, TokenStream, TokenTree};
 use rustc_ast::util::classify;
 use rustc_ast::util::comments::{gather_comments, Comment, CommentStyle};
 use rustc_ast::util::parser;
@@ -509,14 +509,15 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
     /// appropriate macro, transcribe back into the grammar we just parsed from,
     /// and then pretty-print the resulting AST nodes (so, e.g., we print
     /// expression arguments as expressions). It can be done! I think.
-    fn print_tt(&mut self, tt: &TokenTree, convert_dollar_crate: bool) {
+    fn print_tt(&mut self, tt: &TokenTree, convert_dollar_crate: bool) -> Spacing {
         match tt {
-            TokenTree::Token(token, _) => {
+            TokenTree::Token(token, spacing) => {
                 let token_str = self.token_to_string_ext(token, convert_dollar_crate);
                 self.word(token_str);
                 if let token::DocComment(..) = token.kind {
                     self.hardbreak()
                 }
+                *spacing
             }
             TokenTree::Delimited(dspan, delim, tts) => {
                 self.print_mac_common(
@@ -528,6 +529,9 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
                     convert_dollar_crate,
                     dspan.entire(),
                 );
+                // FIXME: add two `Spacing` fields to `TokenTree::Delimited`
+                // and use the close delim one here.
+                Spacing::Alone
             }
         }
     }
@@ -535,9 +539,20 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
     fn print_tts(&mut self, tts: &TokenStream, convert_dollar_crate: bool) {
         let mut iter = tts.trees().peekable();
         while let Some(tt) = iter.next() {
-            self.print_tt(tt, convert_dollar_crate);
+            let spacing = self.print_tt(tt, convert_dollar_crate);
             if let Some(next) = iter.peek() {
-                if space_between(tt, next) {
+                // Should we print a space after `tt`? There are two guiding
+                // factors.
+                // - `spacing` is the more important and accurate one. Most
+                //   tokens have good spacing information, and
+                //   `Joint`/`JointHidden` get used a lot.
+                // - `space_between` is the backup. Code produced by proc
+                //   macros has worse spacing information, with no
+                //   `JointHidden` usage and too much `Alone` usage, which
+                //   would result in over-spaced output such as
+                //   `( x () , y . z )`. `space_between` avoids some of the
+                //   excess whitespace.
+                if spacing == Spacing::Alone && space_between(tt, next) {
                     self.space();
                 }
             }
@@ -1797,7 +1812,9 @@ impl<'a> State<'a> {
     }
 
     pub(crate) fn tt_to_string(&self, tt: &TokenTree) -> String {
-        Self::to_string(|s| s.print_tt(tt, false))
+        Self::to_string(|s| {
+            s.print_tt(tt, false);
+        })
     }
 
     pub(crate) fn tts_to_string(&self, tokens: &TokenStream) -> String {
author	Nicholas Nethercote <n.nethercote@gmail.com>	2023-08-08 11:43:44 +1000
committer	Nicholas Nethercote <n.nethercote@gmail.com>	2023-12-11 09:19:09 +1100
commit	925f7fad576b6a8659d93846faf8d9610e59bab0 (patch)
tree	f2c496128bcb5e75ed3ad46d7671c9aa3123b61b /compiler/rustc_ast_pretty/src/pprust/state.rs
parent	7e452c123c5acea813130a9519b34f83795cb856 (diff)
download	rust-925f7fad576b6a8659d93846faf8d9610e59bab0.tar.gz rust-925f7fad576b6a8659d93846faf8d9610e59bab0.zip