about summary refs log tree commit diff
path: root/compiler/rustc_expand/src
diff options
context:
space:
mode:
authorNicholas Nethercote <n.nethercote@gmail.com>2023-08-08 11:43:44 +1000
committerNicholas Nethercote <n.nethercote@gmail.com>2023-12-11 09:19:09 +1100
commit925f7fad576b6a8659d93846faf8d9610e59bab0 (patch)
treef2c496128bcb5e75ed3ad46d7671c9aa3123b61b /compiler/rustc_expand/src
parent7e452c123c5acea813130a9519b34f83795cb856 (diff)
downloadrust-925f7fad576b6a8659d93846faf8d9610e59bab0.tar.gz
rust-925f7fad576b6a8659d93846faf8d9610e59bab0.zip
Improve `print_tts` by changing `tokenstream::Spacing`.
`tokenstream::Spacing` appears on all `TokenTree::Token` instances,
both punct and non-punct. Its current usage:
- `Joint` means "can join with the next token *and* that token is a
  punct".
- `Alone` means "cannot join with the next token *or* can join with the
  next token but that token is not a punct".

The fact that `Alone` is used for two different cases is awkward.
This commit augments `tokenstream::Spacing` with a new variant
`JointHidden`, resulting in:
- `Joint` means "can join with the next token *and* that token is a
  punct".
- `JointHidden` means "can join with the next token *and* that token is a
  not a punct".
- `Alone` means "cannot join with the next token".

This *drastically* improves the output of `print_tts`. For example,
this:
```
stringify!(let a: Vec<u32> = vec![];)
```
currently produces this string:
```
let a : Vec < u32 > = vec! [] ;
```
With this PR, it now produces this string:
```
let a: Vec<u32> = vec![] ;
```
(The space after the `]` is because `TokenTree::Delimited` currently
doesn't have spacing information. The subsequent commit fixes this.)

The new `print_tts` doesn't replicate original code perfectly. E.g.
multiple space characters will be condensed into a single space
character. But it's much improved.

`print_tts` still produces the old, uglier output for code produced by
proc macros. Because we have to translate the generated code from
`proc_macro::Spacing` to the more expressive `token::Spacing`, which
results in too much `proc_macro::Along` usage and no
`proc_macro::JointHidden` usage. So `space_between` still exists and
is used by `print_tts` in conjunction with the `Spacing` field.

This change will also help with the removal of `Token::Interpolated`.
Currently interpolated tokens are pretty-printed nicely via AST pretty
printing. `Token::Interpolated` removal will mean they get printed with
`print_tts`. Without this change, that would result in much uglier
output for code produced by decl macro expansions. With this change, AST
pretty printing and `print_tts` produce similar results.

The commit also tweaks the comments on `proc_macro::Spacing`. In
particular, it refers to "compound tokens" rather than "multi-char
operators" because lifetimes aren't operators.
Diffstat (limited to 'compiler/rustc_expand/src')
-rw-r--r--compiler/rustc_expand/src/config.rs26
-rw-r--r--compiler/rustc_expand/src/mbe/transcribe.rs2
-rw-r--r--compiler/rustc_expand/src/parse/tests.rs20
-rw-r--r--compiler/rustc_expand/src/proc_macro_server.rs40
4 files changed, 61 insertions, 27 deletions
diff --git a/compiler/rustc_expand/src/config.rs b/compiler/rustc_expand/src/config.rs
index 5ccef343b17..3106fad0432 100644
--- a/compiler/rustc_expand/src/config.rs
+++ b/compiler/rustc_expand/src/config.rs
@@ -372,16 +372,6 @@ impl<'a> StripUnconfigured<'a> {
         };
         let pound_span = pound_token.span;
 
-        let mut trees = vec![AttrTokenTree::Token(pound_token, Spacing::Alone)];
-        if attr.style == AttrStyle::Inner {
-            // For inner attributes, we do the same thing for the `!` in `#![some_attr]`
-            let TokenTree::Token(bang_token @ Token { kind: TokenKind::Not, .. }, _) =
-                orig_trees.next().unwrap().clone()
-            else {
-                panic!("Bad tokens for attribute {attr:?}");
-            };
-            trees.push(AttrTokenTree::Token(bang_token, Spacing::Alone));
-        }
         // We don't really have a good span to use for the synthesized `[]`
         // in `#[attr]`, so just use the span of the `#` token.
         let bracket_group = AttrTokenTree::Delimited(
@@ -392,7 +382,21 @@ impl<'a> StripUnconfigured<'a> {
                 .unwrap_or_else(|| panic!("Missing tokens for {item:?}"))
                 .to_attr_token_stream(),
         );
-        trees.push(bracket_group);
+        let trees = if attr.style == AttrStyle::Inner {
+            // For inner attributes, we do the same thing for the `!` in `#![some_attr]`
+            let TokenTree::Token(bang_token @ Token { kind: TokenKind::Not, .. }, _) =
+                orig_trees.next().unwrap().clone()
+            else {
+                panic!("Bad tokens for attribute {attr:?}");
+            };
+            vec![
+                AttrTokenTree::Token(pound_token, Spacing::Joint),
+                AttrTokenTree::Token(bang_token, Spacing::JointHidden),
+                bracket_group,
+            ]
+        } else {
+            vec![AttrTokenTree::Token(pound_token, Spacing::JointHidden), bracket_group]
+        };
         let tokens = Some(LazyAttrTokenStream::new(AttrTokenStream::new(trees)));
         let attr = attr::mk_attr_from_item(
             &self.sess.parse_sess.attr_id_generator,
diff --git a/compiler/rustc_expand/src/mbe/transcribe.rs b/compiler/rustc_expand/src/mbe/transcribe.rs
index bc03fc0d1b1..854f551ea1b 100644
--- a/compiler/rustc_expand/src/mbe/transcribe.rs
+++ b/compiler/rustc_expand/src/mbe/transcribe.rs
@@ -240,7 +240,7 @@ pub(super) fn transcribe<'a>(
                     // with modified syntax context. (I believe this supports nested macros).
                     marker.visit_span(&mut sp);
                     marker.visit_ident(&mut original_ident);
-                    result.push(TokenTree::token_alone(token::Dollar, sp));
+                    result.push(TokenTree::token_joint_hidden(token::Dollar, sp));
                     result.push(TokenTree::Token(
                         Token::from_ast_ident(original_ident),
                         Spacing::Alone,
diff --git a/compiler/rustc_expand/src/parse/tests.rs b/compiler/rustc_expand/src/parse/tests.rs
index bdc20882a9d..a6c6d82b575 100644
--- a/compiler/rustc_expand/src/parse/tests.rs
+++ b/compiler/rustc_expand/src/parse/tests.rs
@@ -116,27 +116,29 @@ fn string_to_tts_macro() {
 #[test]
 fn string_to_tts_1() {
     create_default_session_globals_then(|| {
-        let tts = string_to_stream("fn a (b : i32) { b; }".to_string());
+        let tts = string_to_stream("fn a(b: i32) { b; }".to_string());
 
         let expected = TokenStream::new(vec![
             TokenTree::token_alone(token::Ident(kw::Fn, false), sp(0, 2)),
-            TokenTree::token_alone(token::Ident(Symbol::intern("a"), false), sp(3, 4)),
+            TokenTree::token_joint_hidden(token::Ident(Symbol::intern("a"), false), sp(3, 4)),
             TokenTree::Delimited(
-                DelimSpan::from_pair(sp(5, 6), sp(13, 14)),
+                DelimSpan::from_pair(sp(4, 5), sp(11, 12)),
                 Delimiter::Parenthesis,
                 TokenStream::new(vec![
-                    TokenTree::token_alone(token::Ident(Symbol::intern("b"), false), sp(6, 7)),
-                    TokenTree::token_alone(token::Colon, sp(8, 9)),
-                    TokenTree::token_alone(token::Ident(sym::i32, false), sp(10, 13)),
+                    TokenTree::token_joint(token::Ident(Symbol::intern("b"), false), sp(5, 6)),
+                    TokenTree::token_alone(token::Colon, sp(6, 7)),
+                    // `JointHidden` because the `i32` is immediately followed by the `)`.
+                    TokenTree::token_joint_hidden(token::Ident(sym::i32, false), sp(8, 11)),
                 ])
                 .into(),
             ),
             TokenTree::Delimited(
-                DelimSpan::from_pair(sp(15, 16), sp(20, 21)),
+                DelimSpan::from_pair(sp(13, 14), sp(18, 19)),
                 Delimiter::Brace,
                 TokenStream::new(vec![
-                    TokenTree::token_joint(token::Ident(Symbol::intern("b"), false), sp(17, 18)),
-                    TokenTree::token_alone(token::Semi, sp(18, 19)),
+                    TokenTree::token_joint(token::Ident(Symbol::intern("b"), false), sp(15, 16)),
+                    // `Alone` because the `;` is followed by whitespace.
+                    TokenTree::token_alone(token::Semi, sp(16, 17)),
                 ])
                 .into(),
             ),
diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs
index b057a645f81..b3b78330801 100644
--- a/compiler/rustc_expand/src/proc_macro_server.rs
+++ b/compiler/rustc_expand/src/proc_macro_server.rs
@@ -5,7 +5,7 @@ use pm::bridge::{
 use pm::{Delimiter, Level};
 use rustc_ast as ast;
 use rustc_ast::token;
-use rustc_ast::tokenstream::{self, Spacing::*, TokenStream};
+use rustc_ast::tokenstream::{self, Spacing, TokenStream};
 use rustc_ast::util::literal::escape_byte_str_symbol;
 use rustc_ast_pretty::pprust;
 use rustc_data_structures::fx::FxHashMap;
@@ -111,7 +111,22 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)> for Vec<TokenTree<TokenStre
                     }));
                     continue;
                 }
-                tokenstream::TokenTree::Token(token, spacing) => (token, spacing == Joint),
+                tokenstream::TokenTree::Token(token, spacing) => {
+                    // Do not be tempted to check here that the `spacing`
+                    // values are "correct" w.r.t. the token stream (e.g. that
+                    // `Spacing::Joint` is actually followed by a `Punct` token
+                    // tree). Because the problem in #76399 was introduced that
+                    // way.
+                    //
+                    // This is where the `Hidden` in `JointHidden` applies,
+                    // because the jointness is effectively hidden from proc
+                    // macros.
+                    let joint = match spacing {
+                        Spacing::Alone | Spacing::JointHidden => false,
+                        Spacing::Joint => true,
+                    };
+                    (token, joint)
+                }
             };
 
             // Split the operator into one or more `Punct`s, one per character.
@@ -133,7 +148,8 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)> for Vec<TokenTree<TokenStre
                     } else {
                         span
                     };
-                    TokenTree::Punct(Punct { ch, joint: if is_final { joint } else { true }, span })
+                    let joint = if is_final { joint } else { true };
+                    TokenTree::Punct(Punct { ch, joint, span })
                 }));
             };
 
@@ -268,6 +284,10 @@ impl ToInternal<SmallVec<[tokenstream::TokenTree; 2]>>
     fn to_internal(self) -> SmallVec<[tokenstream::TokenTree; 2]> {
         use rustc_ast::token::*;
 
+        // The code below is conservative and uses `token_alone` in most
+        // places. When the resulting code is pretty-printed by `print_tts` it
+        // ends up with spaces between most tokens, which is safe but ugly.
+        // It's hard in general to do better when working at the token level.
         let (tree, rustc) = self;
         match tree {
             TokenTree::Punct(Punct { ch, joint, span }) => {
@@ -296,6 +316,11 @@ impl ToInternal<SmallVec<[tokenstream::TokenTree; 2]>>
                     b'\'' => SingleQuote,
                     _ => unreachable!(),
                 };
+                // We never produce `token::Spacing::JointHidden` here, which
+                // means the pretty-printing of code produced by proc macros is
+                // ugly, with lots of whitespace between tokens. This is
+                // unavoidable because `proc_macro::Spacing` only applies to
+                // `Punct` token trees.
                 smallvec![if joint {
                     tokenstream::TokenTree::token_joint(kind, span)
                 } else {
@@ -322,7 +347,7 @@ impl ToInternal<SmallVec<[tokenstream::TokenTree; 2]>>
                 let minus = BinOp(BinOpToken::Minus);
                 let symbol = Symbol::intern(&symbol.as_str()[1..]);
                 let integer = TokenKind::lit(token::Integer, symbol, suffix);
-                let a = tokenstream::TokenTree::token_alone(minus, span);
+                let a = tokenstream::TokenTree::token_joint_hidden(minus, span);
                 let b = tokenstream::TokenTree::token_alone(integer, span);
                 smallvec![a, b]
             }
@@ -335,7 +360,7 @@ impl ToInternal<SmallVec<[tokenstream::TokenTree; 2]>>
                 let minus = BinOp(BinOpToken::Minus);
                 let symbol = Symbol::intern(&symbol.as_str()[1..]);
                 let float = TokenKind::lit(token::Float, symbol, suffix);
-                let a = tokenstream::TokenTree::token_alone(minus, span);
+                let a = tokenstream::TokenTree::token_joint_hidden(minus, span);
                 let b = tokenstream::TokenTree::token_alone(float, span);
                 smallvec![a, b]
             }
@@ -546,7 +571,10 @@ impl server::TokenStream for Rustc<'_, '_> {
                         Ok(Self::TokenStream::from_iter([
                             // FIXME: The span of the `-` token is lost when
                             // parsing, so we cannot faithfully recover it here.
-                            tokenstream::TokenTree::token_alone(token::BinOp(token::Minus), e.span),
+                            tokenstream::TokenTree::token_joint_hidden(
+                                token::BinOp(token::Minus),
+                                e.span,
+                            ),
                             tokenstream::TokenTree::token_alone(token::Literal(*token_lit), e.span),
                         ]))
                     }