Clarify spacing computation.

The spacing computation is done in two parts. In the first part `next_token` and `bump` use `Spacing::Alone` to mean "preceded by whitespace" and `Spacing::Joint` to mean the opposite. In the second part `parse_token_tree_other` then adjusts the `spacing` value to mean the usual thing (i.e. "is the following token joinable punctuation?"). This shift in meaning is very confusing and it took me some time to understand what was going on. This commit changes the first part to use a bool, and adds some comments, which makes things much clearer.
author: Nicholas Nethercote <n.nethercote@gmail.com> 2022-09-21 16:38:28 +1000
committer: Nicholas Nethercote <n.nethercote@gmail.com> 2022-09-26 13:21:26 +1000
commit: d7928a92e588e11a6c7145cde9ccfd75c4c7cc01 (patch)
tree: c89337945640fe2c616ec882f6e26a4f031c75f3 /compiler/rustc_parse/src
parent: 9640d1c02354dc3167f775e56629aaf8974e78f7 (diff)
download: rust-d7928a92e588e11a6c7145cde9ccfd75c4c7cc01.tar.gz
rust-d7928a92e588e11a6c7145cde9ccfd75c4c7cc01.zip
2 files changed, 20 insertions, 13 deletions
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index 0e8a739fb62..7d5f736a6f4 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -1,7 +1,7 @@
 use crate::lexer::unicode_chars::UNICODE_ARRAY;
 use rustc_ast::ast::{self, AttrStyle};
 use rustc_ast::token::{self, CommentKind, Delimiter, Token, TokenKind};
-use rustc_ast::tokenstream::{Spacing, TokenStream};
+use rustc_ast::tokenstream::TokenStream;
 use rustc_ast::util::unicode::contains_text_flow_control_chars;
 use rustc_errors::{error_code, Applicability, DiagnosticBuilder, ErrorGuaranteed, PResult};
 use rustc_lexer::unescape::{self, Mode};
@@ -67,9 +67,10 @@ impl<'a> StringReader<'a> {
         self.override_span.unwrap_or_else(|| Span::with_root_ctxt(lo, hi))
     }
 
-    /// Returns the next token, and info about preceding whitespace, if any.
-    fn next_token(&mut self) -> (Spacing, Token) {
-        let mut spacing = Spacing::Joint;
+    /// Returns the next token, paired with a bool indicating if the token was
+    /// preceded by whitespace.
+    fn next_token(&mut self) -> (Token, bool) {
+        let mut preceded_by_whitespace = false;
 
         // Skip trivial (whitespace & comments) tokens
         loop {
@@ -78,7 +79,7 @@ impl<'a> StringReader<'a> {
 
             if text.is_empty() {
                 let span = self.mk_sp(self.pos, self.pos);
-                return (spacing, Token::new(token::Eof, span));
+                return (Token::new(token::Eof, span), preceded_by_whitespace);
             }
 
             let token = rustc_lexer::first_token(text);
@@ -91,9 +92,9 @@ impl<'a> StringReader<'a> {
             match self.cook_lexer_token(token.kind, start) {
                 Some(kind) => {
                     let span = self.mk_sp(start, self.pos);
-                    return (spacing, Token::new(kind, span));
+                    return (Token::new(kind, span), preceded_by_whitespace);
                 }
-                None => spacing = Spacing::Alone,
+                None => preceded_by_whitespace = true,
             }
         }
     }
diff --git a/compiler/rustc_parse/src/lexer/tokentrees.rs b/compiler/rustc_parse/src/lexer/tokentrees.rs
index 3372544a579..fe95742972f 100644
--- a/compiler/rustc_parse/src/lexer/tokentrees.rs
+++ b/compiler/rustc_parse/src/lexer/tokentrees.rs
@@ -246,16 +246,22 @@ impl<'a> TokenTreesReader<'a> {
 
     #[inline]
     fn parse_token_tree_other(&mut self) -> TokenTree {
+        // `spacing` for the returned token is determined by the next token:
+        // its kind and its `preceded_by_whitespace` status.
         let tok = self.token.take();
-        let mut spacing = self.bump();
-        if !self.token.is_op() {
-            spacing = Spacing::Alone;
-        }
+        let is_next_tok_preceded_by_whitespace = self.bump();
+        let spacing = if is_next_tok_preceded_by_whitespace || !self.token.is_op() {
+            Spacing::Alone
+        } else {
+            Spacing::Joint
+        };
         TokenTree::Token(tok, spacing)
     }
 
-    fn bump(&mut self) -> Spacing {
-        let (spacing, token) = self.string_reader.next_token();
+    // Set `self.token` to the next token. Returns a bool indicating if that
+    // token was preceded by whitespace.
+    fn bump(&mut self) -> bool {
+        let (token, spacing) = self.string_reader.next_token();
         self.token = token;
         spacing
     }
author	Nicholas Nethercote <n.nethercote@gmail.com>	2022-09-21 16:38:28 +1000
committer	Nicholas Nethercote <n.nethercote@gmail.com>	2022-09-26 13:21:26 +1000
commit	d7928a92e588e11a6c7145cde9ccfd75c4c7cc01 (patch)
tree	c89337945640fe2c616ec882f6e26a4f031c75f3 /compiler/rustc_parse/src
parent	9640d1c02354dc3167f775e56629aaf8974e78f7 (diff)
download	rust-d7928a92e588e11a6c7145cde9ccfd75c4c7cc01.tar.gz rust-d7928a92e588e11a6c7145cde9ccfd75c4c7cc01.zip