diff options
| author | Nicholas Nethercote <n.nethercote@gmail.com> | 2023-08-08 11:43:44 +1000 |
|---|---|---|
| committer | Nicholas Nethercote <n.nethercote@gmail.com> | 2023-12-11 09:19:09 +1100 |
| commit | 925f7fad576b6a8659d93846faf8d9610e59bab0 (patch) | |
| tree | f2c496128bcb5e75ed3ad46d7671c9aa3123b61b /compiler/rustc_expand/src | |
| parent | 7e452c123c5acea813130a9519b34f83795cb856 (diff) | |
| download | rust-925f7fad576b6a8659d93846faf8d9610e59bab0.tar.gz rust-925f7fad576b6a8659d93846faf8d9610e59bab0.zip | |
Improve `print_tts` by changing `tokenstream::Spacing`.
`tokenstream::Spacing` appears on all `TokenTree::Token` instances, both punct and non-punct. Its current usage: - `Joint` means "can join with the next token *and* that token is a punct". - `Alone` means "cannot join with the next token *or* can join with the next token but that token is not a punct". The fact that `Alone` is used for two different cases is awkward. This commit augments `tokenstream::Spacing` with a new variant `JointHidden`, resulting in: - `Joint` means "can join with the next token *and* that token is a punct". - `JointHidden` means "can join with the next token *and* that token is a not a punct". - `Alone` means "cannot join with the next token". This *drastically* improves the output of `print_tts`. For example, this: ``` stringify!(let a: Vec<u32> = vec![];) ``` currently produces this string: ``` let a : Vec < u32 > = vec! [] ; ``` With this PR, it now produces this string: ``` let a: Vec<u32> = vec![] ; ``` (The space after the `]` is because `TokenTree::Delimited` currently doesn't have spacing information. The subsequent commit fixes this.) The new `print_tts` doesn't replicate original code perfectly. E.g. multiple space characters will be condensed into a single space character. But it's much improved. `print_tts` still produces the old, uglier output for code produced by proc macros. Because we have to translate the generated code from `proc_macro::Spacing` to the more expressive `token::Spacing`, which results in too much `proc_macro::Along` usage and no `proc_macro::JointHidden` usage. So `space_between` still exists and is used by `print_tts` in conjunction with the `Spacing` field. This change will also help with the removal of `Token::Interpolated`. Currently interpolated tokens are pretty-printed nicely via AST pretty printing. `Token::Interpolated` removal will mean they get printed with `print_tts`. Without this change, that would result in much uglier output for code produced by decl macro expansions. With this change, AST pretty printing and `print_tts` produce similar results. The commit also tweaks the comments on `proc_macro::Spacing`. In particular, it refers to "compound tokens" rather than "multi-char operators" because lifetimes aren't operators.
Diffstat (limited to 'compiler/rustc_expand/src')
| -rw-r--r-- | compiler/rustc_expand/src/config.rs | 26 | ||||
| -rw-r--r-- | compiler/rustc_expand/src/mbe/transcribe.rs | 2 | ||||
| -rw-r--r-- | compiler/rustc_expand/src/parse/tests.rs | 20 | ||||
| -rw-r--r-- | compiler/rustc_expand/src/proc_macro_server.rs | 40 |
4 files changed, 61 insertions, 27 deletions
diff --git a/compiler/rustc_expand/src/config.rs b/compiler/rustc_expand/src/config.rs index 5ccef343b17..3106fad0432 100644 --- a/compiler/rustc_expand/src/config.rs +++ b/compiler/rustc_expand/src/config.rs @@ -372,16 +372,6 @@ impl<'a> StripUnconfigured<'a> { }; let pound_span = pound_token.span; - let mut trees = vec![AttrTokenTree::Token(pound_token, Spacing::Alone)]; - if attr.style == AttrStyle::Inner { - // For inner attributes, we do the same thing for the `!` in `#![some_attr]` - let TokenTree::Token(bang_token @ Token { kind: TokenKind::Not, .. }, _) = - orig_trees.next().unwrap().clone() - else { - panic!("Bad tokens for attribute {attr:?}"); - }; - trees.push(AttrTokenTree::Token(bang_token, Spacing::Alone)); - } // We don't really have a good span to use for the synthesized `[]` // in `#[attr]`, so just use the span of the `#` token. let bracket_group = AttrTokenTree::Delimited( @@ -392,7 +382,21 @@ impl<'a> StripUnconfigured<'a> { .unwrap_or_else(|| panic!("Missing tokens for {item:?}")) .to_attr_token_stream(), ); - trees.push(bracket_group); + let trees = if attr.style == AttrStyle::Inner { + // For inner attributes, we do the same thing for the `!` in `#![some_attr]` + let TokenTree::Token(bang_token @ Token { kind: TokenKind::Not, .. }, _) = + orig_trees.next().unwrap().clone() + else { + panic!("Bad tokens for attribute {attr:?}"); + }; + vec![ + AttrTokenTree::Token(pound_token, Spacing::Joint), + AttrTokenTree::Token(bang_token, Spacing::JointHidden), + bracket_group, + ] + } else { + vec![AttrTokenTree::Token(pound_token, Spacing::JointHidden), bracket_group] + }; let tokens = Some(LazyAttrTokenStream::new(AttrTokenStream::new(trees))); let attr = attr::mk_attr_from_item( &self.sess.parse_sess.attr_id_generator, diff --git a/compiler/rustc_expand/src/mbe/transcribe.rs b/compiler/rustc_expand/src/mbe/transcribe.rs index bc03fc0d1b1..854f551ea1b 100644 --- a/compiler/rustc_expand/src/mbe/transcribe.rs +++ b/compiler/rustc_expand/src/mbe/transcribe.rs @@ -240,7 +240,7 @@ pub(super) fn transcribe<'a>( // with modified syntax context. (I believe this supports nested macros). marker.visit_span(&mut sp); marker.visit_ident(&mut original_ident); - result.push(TokenTree::token_alone(token::Dollar, sp)); + result.push(TokenTree::token_joint_hidden(token::Dollar, sp)); result.push(TokenTree::Token( Token::from_ast_ident(original_ident), Spacing::Alone, diff --git a/compiler/rustc_expand/src/parse/tests.rs b/compiler/rustc_expand/src/parse/tests.rs index bdc20882a9d..a6c6d82b575 100644 --- a/compiler/rustc_expand/src/parse/tests.rs +++ b/compiler/rustc_expand/src/parse/tests.rs @@ -116,27 +116,29 @@ fn string_to_tts_macro() { #[test] fn string_to_tts_1() { create_default_session_globals_then(|| { - let tts = string_to_stream("fn a (b : i32) { b; }".to_string()); + let tts = string_to_stream("fn a(b: i32) { b; }".to_string()); let expected = TokenStream::new(vec![ TokenTree::token_alone(token::Ident(kw::Fn, false), sp(0, 2)), - TokenTree::token_alone(token::Ident(Symbol::intern("a"), false), sp(3, 4)), + TokenTree::token_joint_hidden(token::Ident(Symbol::intern("a"), false), sp(3, 4)), TokenTree::Delimited( - DelimSpan::from_pair(sp(5, 6), sp(13, 14)), + DelimSpan::from_pair(sp(4, 5), sp(11, 12)), Delimiter::Parenthesis, TokenStream::new(vec![ - TokenTree::token_alone(token::Ident(Symbol::intern("b"), false), sp(6, 7)), - TokenTree::token_alone(token::Colon, sp(8, 9)), - TokenTree::token_alone(token::Ident(sym::i32, false), sp(10, 13)), + TokenTree::token_joint(token::Ident(Symbol::intern("b"), false), sp(5, 6)), + TokenTree::token_alone(token::Colon, sp(6, 7)), + // `JointHidden` because the `i32` is immediately followed by the `)`. + TokenTree::token_joint_hidden(token::Ident(sym::i32, false), sp(8, 11)), ]) .into(), ), TokenTree::Delimited( - DelimSpan::from_pair(sp(15, 16), sp(20, 21)), + DelimSpan::from_pair(sp(13, 14), sp(18, 19)), Delimiter::Brace, TokenStream::new(vec![ - TokenTree::token_joint(token::Ident(Symbol::intern("b"), false), sp(17, 18)), - TokenTree::token_alone(token::Semi, sp(18, 19)), + TokenTree::token_joint(token::Ident(Symbol::intern("b"), false), sp(15, 16)), + // `Alone` because the `;` is followed by whitespace. + TokenTree::token_alone(token::Semi, sp(16, 17)), ]) .into(), ), diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs index b057a645f81..b3b78330801 100644 --- a/compiler/rustc_expand/src/proc_macro_server.rs +++ b/compiler/rustc_expand/src/proc_macro_server.rs @@ -5,7 +5,7 @@ use pm::bridge::{ use pm::{Delimiter, Level}; use rustc_ast as ast; use rustc_ast::token; -use rustc_ast::tokenstream::{self, Spacing::*, TokenStream}; +use rustc_ast::tokenstream::{self, Spacing, TokenStream}; use rustc_ast::util::literal::escape_byte_str_symbol; use rustc_ast_pretty::pprust; use rustc_data_structures::fx::FxHashMap; @@ -111,7 +111,22 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)> for Vec<TokenTree<TokenStre })); continue; } - tokenstream::TokenTree::Token(token, spacing) => (token, spacing == Joint), + tokenstream::TokenTree::Token(token, spacing) => { + // Do not be tempted to check here that the `spacing` + // values are "correct" w.r.t. the token stream (e.g. that + // `Spacing::Joint` is actually followed by a `Punct` token + // tree). Because the problem in #76399 was introduced that + // way. + // + // This is where the `Hidden` in `JointHidden` applies, + // because the jointness is effectively hidden from proc + // macros. + let joint = match spacing { + Spacing::Alone | Spacing::JointHidden => false, + Spacing::Joint => true, + }; + (token, joint) + } }; // Split the operator into one or more `Punct`s, one per character. @@ -133,7 +148,8 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)> for Vec<TokenTree<TokenStre } else { span }; - TokenTree::Punct(Punct { ch, joint: if is_final { joint } else { true }, span }) + let joint = if is_final { joint } else { true }; + TokenTree::Punct(Punct { ch, joint, span }) })); }; @@ -268,6 +284,10 @@ impl ToInternal<SmallVec<[tokenstream::TokenTree; 2]>> fn to_internal(self) -> SmallVec<[tokenstream::TokenTree; 2]> { use rustc_ast::token::*; + // The code below is conservative and uses `token_alone` in most + // places. When the resulting code is pretty-printed by `print_tts` it + // ends up with spaces between most tokens, which is safe but ugly. + // It's hard in general to do better when working at the token level. let (tree, rustc) = self; match tree { TokenTree::Punct(Punct { ch, joint, span }) => { @@ -296,6 +316,11 @@ impl ToInternal<SmallVec<[tokenstream::TokenTree; 2]>> b'\'' => SingleQuote, _ => unreachable!(), }; + // We never produce `token::Spacing::JointHidden` here, which + // means the pretty-printing of code produced by proc macros is + // ugly, with lots of whitespace between tokens. This is + // unavoidable because `proc_macro::Spacing` only applies to + // `Punct` token trees. smallvec![if joint { tokenstream::TokenTree::token_joint(kind, span) } else { @@ -322,7 +347,7 @@ impl ToInternal<SmallVec<[tokenstream::TokenTree; 2]>> let minus = BinOp(BinOpToken::Minus); let symbol = Symbol::intern(&symbol.as_str()[1..]); let integer = TokenKind::lit(token::Integer, symbol, suffix); - let a = tokenstream::TokenTree::token_alone(minus, span); + let a = tokenstream::TokenTree::token_joint_hidden(minus, span); let b = tokenstream::TokenTree::token_alone(integer, span); smallvec![a, b] } @@ -335,7 +360,7 @@ impl ToInternal<SmallVec<[tokenstream::TokenTree; 2]>> let minus = BinOp(BinOpToken::Minus); let symbol = Symbol::intern(&symbol.as_str()[1..]); let float = TokenKind::lit(token::Float, symbol, suffix); - let a = tokenstream::TokenTree::token_alone(minus, span); + let a = tokenstream::TokenTree::token_joint_hidden(minus, span); let b = tokenstream::TokenTree::token_alone(float, span); smallvec![a, b] } @@ -546,7 +571,10 @@ impl server::TokenStream for Rustc<'_, '_> { Ok(Self::TokenStream::from_iter([ // FIXME: The span of the `-` token is lost when // parsing, so we cannot faithfully recover it here. - tokenstream::TokenTree::token_alone(token::BinOp(token::Minus), e.span), + tokenstream::TokenTree::token_joint_hidden( + token::BinOp(token::Minus), + e.span, + ), tokenstream::TokenTree::token_alone(token::Literal(*token_lit), e.span), ])) } |
