Fix `break_last_token`.

It currently doesn't handle the three-char tokens `>>=` and `<<=` correctly. These can be broken twice, resulting in three individual tokens. This is a latent bug that currently doesn't cause any problems, but does cause problems for #124141, because that PR increases the usage of lazy token streams.
author: Nicholas Nethercote <n.nethercote@gmail.com> 2024-09-19 19:32:17 +1000
committer: Nicholas Nethercote <n.nethercote@gmail.com> 2024-09-23 09:14:30 +1000
commit: 73cc5751773d4c49cc9d938548762520037926ba (patch)
tree: f8387e17f2c33ee1775d211646b38d5a9a7827a7 /compiler/rustc_parse/src/parser/mod.rs
parent: f79a912d9edc3ad4db910c0e93672ed5c65133fa (diff)
download: rust-73cc5751773d4c49cc9d938548762520037926ba.tar.gz
rust-73cc5751773d4c49cc9d938548762520037926ba.zip
1 files changed, 21 insertions, 18 deletions
diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs
index 9d9265d5318..ca0838a7929 100644
--- a/compiler/rustc_parse/src/parser/mod.rs
+++ b/compiler/rustc_parse/src/parser/mod.rs
@@ -146,21 +146,25 @@ pub struct Parser<'a> {
     token_cursor: TokenCursor,
     // The number of calls to `bump`, i.e. the position in the token stream.
     num_bump_calls: u32,
-    // During parsing we may sometimes need to 'unglue' a glued token into two
-    // component tokens (e.g. '>>' into '>' and '>), so the parser can consume
-    // them one at a time. This process bypasses the normal capturing mechanism
-    // (e.g. `num_bump_calls` will not be incremented), since the 'unglued'
-    // tokens due not exist in the original `TokenStream`.
+    // During parsing we may sometimes need to "unglue" a glued token into two
+    // or three component tokens (e.g. `>>` into `>` and `>`, or `>>=` into `>`
+    // and `>` and `=`), so the parser can consume them one at a time. This
+    // process bypasses the normal capturing mechanism (e.g. `num_bump_calls`
+    // will not be incremented), since the "unglued" tokens due not exist in
+    // the original `TokenStream`.
     //
-    // If we end up consuming both unglued tokens, this is not an issue. We'll
-    // end up capturing the single 'glued' token.
+    // If we end up consuming all the component tokens, this is not an issue,
+    // because we'll end up capturing the single "glued" token.
     //
-    // However, sometimes we may want to capture just the first 'unglued'
+    // However, sometimes we may want to capture not all of the original
     // token. For example, capturing the `Vec<u8>` in `Option<Vec<u8>>`
     // requires us to unglue the trailing `>>` token. The `break_last_token`
-    // field is used to track this token. It gets appended to the captured
+    // field is used to track these tokens. They get appended to the captured
     // stream when we evaluate a `LazyAttrTokenStream`.
-    break_last_token: bool,
+    //
+    // This value is always 0, 1, or 2. It can only reach 2 when splitting
+    // `>>=` or `<<=`.
+    break_last_token: u32,
     /// This field is used to keep track of how many left angle brackets we have seen. This is
     /// required in order to detect extra leading left angle brackets (`<` characters) and error
     /// appropriately.
@@ -453,7 +457,7 @@ impl<'a> Parser<'a> {
             expected_tokens: Vec::new(),
             token_cursor: TokenCursor { tree_cursor: stream.into_trees(), stack: Vec::new() },
             num_bump_calls: 0,
-            break_last_token: false,
+            break_last_token: 0,
             unmatched_angle_bracket_count: 0,
             angle_bracket_nesting: 0,
             last_unexpected_token_span: None,
@@ -773,7 +777,7 @@ impl<'a> Parser<'a> {
             self.bump();
             return true;
         }
-        match self.token.kind.break_two_token_op() {
+        match self.token.kind.break_two_token_op(1) {
             Some((first, second)) if first == expected => {
                 let first_span = self.psess.source_map().start_point(self.token.span);
                 let second_span = self.token.span.with_lo(first_span.hi());
@@ -783,8 +787,8 @@ impl<'a> Parser<'a> {
                 //
                 // If we consume any additional tokens, then this token
                 // is not needed (we'll capture the entire 'glued' token),
-                // and `bump` will set this field to `None`
-                self.break_last_token = true;
+                // and `bump` will set this field to 0.
+                self.break_last_token += 1;
                 // Use the spacing of the glued token as the spacing of the
                 // unglued second token.
                 self.bump_with((Token::new(second, second_span), self.token_spacing));
@@ -1148,10 +1152,9 @@ impl<'a> Parser<'a> {
         // than `.0`/`.1` access.
         let mut next = self.token_cursor.inlined_next();
         self.num_bump_calls += 1;
-        // We've retrieved an token from the underlying
-        // cursor, so we no longer need to worry about
-        // an unglued token. See `break_and_eat` for more details
-        self.break_last_token = false;
+        // We got a token from the underlying cursor and no longer need to
+        // worry about an unglued token. See `break_and_eat` for more details.
+        self.break_last_token = 0;
         if next.0.span.is_dummy() {
             // Tweak the location for better diagnostics, but keep syntactic context intact.
             let fallback_span = self.token.span;
author	Nicholas Nethercote <n.nethercote@gmail.com>	2024-09-19 19:32:17 +1000
committer	Nicholas Nethercote <n.nethercote@gmail.com>	2024-09-23 09:14:30 +1000
commit	73cc5751773d4c49cc9d938548762520037926ba (patch)
tree	f8387e17f2c33ee1775d211646b38d5a9a7827a7 /compiler/rustc_parse/src/parser/mod.rs
parent	f79a912d9edc3ad4db910c0e93672ed5c65133fa (diff)
download	rust-73cc5751773d4c49cc9d938548762520037926ba.tar.gz rust-73cc5751773d4c49cc9d938548762520037926ba.zip