about summary refs log tree commit diff
diff options
context:
space:
mode:
authorNicholas Nethercote <n.nethercote@gmail.com>2024-08-20 17:47:53 +1000
committerNicholas Nethercote <n.nethercote@gmail.com>2024-08-23 14:40:08 +1000
commit1ae521e9d57ba3a67b1007204da2836d8b19b4a2 (patch)
tree5ea26dfc9754198bb0b6861a95fef8dd5b976fef
parent312ecdb2ed37249ad539bc0732278b80b4f3c7f8 (diff)
downloadrust-1ae521e9d57ba3a67b1007204da2836d8b19b4a2.tar.gz
rust-1ae521e9d57ba3a67b1007204da2836d8b19b4a2.zip
Return earlier in some cases in `collect_token`.
This example triggers an assertion failure:
```
fn f() -> u32 {
    #[cfg_eval] #[cfg(not(FALSE))] 0
}
```
The sequence of events:
- `configure_annotatable` calls `parse_expr_force_collect`, which calls
  `collect_tokens`.
- Within that, we end up in `parse_expr_dot_or_call`, which again calls
  `collect_tokens`.
  - The return value of the `f` call is the expression `0`.
  - This inner call collects tokens for `0` (parser range 10..11) and
    creates a replacement covering `#[cfg(not(FALSE))] 0` (parser range
    0..11).
- We return to the outer `collect_tokens` call. The return value of the
  `f` call is *again* the expression `0`, again with the range 10..11,
  but the replacement from earlier covers the range 0..11. The code
  mistakenly assumes that any attributes from an inner `collect_tokens`
  call fit entirely within the body of the result of an outer
  `collect_tokens` call. So it adjusts the replacement parser range
  0..11 to a node range by subtracting 10, resulting in -10..1. This is
  an invalid range and triggers an assertion failure.

It's tricky to follow, but basically things get complicated when an AST
node is returned from an inner `collect_tokens` call and then returned
again from an outer `collect_token` node without being wrapped in any
kind of additional layer.

This commit changes `collect_tokens` to return early in some extra cases,
avoiding the construction of lazy tokens. In the example above, the
outer `collect_tokens` returns earlier because the `0` token already has
tokens and `self.capture_state.capturing` is `Capturing::No`. This early
return avoids the creation of the invalid range and the assertion
failure.

Fixes #129166. Note: these invalid ranges have been happening for a long
time. #128725 looks like it's at fault only because it introduced the
assertion that catches the invalid ranges.
-rw-r--r--compiler/rustc_parse/src/parser/attr_wrapper.rs39
-rw-r--r--tests/crashes/129166.rs7
-rw-r--r--tests/ui/conditional-compilation/invalid-node-range-issue-129166.rs11
-rw-r--r--tests/ui/conditional-compilation/invalid-node-range-issue-129166.stderr8
4 files changed, 41 insertions, 24 deletions
diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs
index 67d1e410c58..81b683705f3 100644
--- a/compiler/rustc_parse/src/parser/attr_wrapper.rs
+++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs
@@ -234,6 +234,8 @@ impl<'a> Parser<'a> {
         force_collect: ForceCollect,
         f: impl FnOnce(&mut Self, AttrVec) -> PResult<'a, (R, Trailing, UsePreAttrPos)>,
     ) -> PResult<'a, R> {
+        let possible_capture_mode = self.capture_cfg;
+
         // We must collect if anything could observe the collected tokens, i.e.
         // if any of the following conditions hold.
         // - We are force collecting tokens (because force collection requires
@@ -244,9 +246,9 @@ impl<'a> Parser<'a> {
             // - Our target supports custom inner attributes (custom
             //   inner attribute invocation might require token capturing).
             || R::SUPPORTS_CUSTOM_INNER_ATTRS
-            // - We are in `capture_cfg` mode (which requires tokens if
+            // - We are in "possible capture mode" (which requires tokens if
             //   the parsed node has `#[cfg]` or `#[cfg_attr]` attributes).
-            || self.capture_cfg;
+            || possible_capture_mode;
         if !needs_collection {
             return Ok(f(self, attrs.attrs)?.0);
         }
@@ -267,7 +269,7 @@ impl<'a> Parser<'a> {
             res?
         };
 
-        // When we're not in `capture_cfg` mode, then skip collecting and
+        // When we're not in "definite capture mode", then skip collecting and
         // return early if either of the following conditions hold.
         // - `None`: Our target doesn't support tokens at all (e.g. `NtIdent`).
         // - `Some(Some(_))`: Our target already has tokens set (e.g. we've
@@ -278,7 +280,10 @@ impl<'a> Parser<'a> {
         // Note that this check is independent of `force_collect`. There's no
         // need to collect tokens when we don't support tokens or already have
         // tokens.
-        if !self.capture_cfg && matches!(ret.tokens_mut(), None | Some(Some(_))) {
+        let definite_capture_mode = self.capture_cfg
+            && matches!(self.capture_state.capturing, Capturing::Yes)
+            && has_cfg_or_cfg_attr(ret.attrs());
+        if !definite_capture_mode && matches!(ret.tokens_mut(), None | Some(Some(_))) {
             return Ok(ret);
         }
 
@@ -298,11 +303,11 @@ impl<'a> Parser<'a> {
             //   the earlier `needs_tokens` check, and we don't need to
             //   check `R::SUPPORTS_CUSTOM_INNER_ATTRS`.)
             || needs_tokens(ret.attrs())
-            // - We are in `capture_cfg` mode and there are `#[cfg]` or
-            //   `#[cfg_attr]` attributes. (During normal non-`capture_cfg`
-            //   parsing, we don't need any special capturing for those
-            //   attributes, because they're builtin.)
-            || (self.capture_cfg && has_cfg_or_cfg_attr(ret.attrs()));
+            // - We are in "definite capture mode", which requires that there
+            //   are `#[cfg]` or `#[cfg_attr]` attributes. (During normal
+            //   non-`capture_cfg` parsing, we don't need any special capturing
+            //   for those attributes, because they're builtin.)
+            || definite_capture_mode;
         if !needs_collection {
             return Ok(ret);
         }
@@ -399,20 +404,18 @@ impl<'a> Parser<'a> {
             break_last_token: self.break_last_token,
             node_replacements,
         });
+        let mut tokens_used = false;
 
         // If we support tokens and don't already have them, store the newly captured tokens.
         if let Some(target_tokens @ None) = ret.tokens_mut() {
+            tokens_used = true;
             *target_tokens = Some(tokens.clone());
         }
 
-        // If `capture_cfg` is set and we're inside a recursive call to
-        // `collect_tokens`, then we need to register a replace range if we
-        // have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager
-        // cfg-expansion on the captured token stream.
-        if self.capture_cfg
-            && matches!(self.capture_state.capturing, Capturing::Yes)
-            && has_cfg_or_cfg_attr(ret.attrs())
-        {
+        // If in "definite capture mode" we need to register a replace range
+        // for the `#[cfg]` and/or `#[cfg_attr]` attrs. This allows us to run
+        // eager cfg-expansion on the captured token stream.
+        if definite_capture_mode {
             assert!(!self.break_last_token, "Should not have unglued last token with cfg attr");
 
             // What is the status here when parsing the example code at the top of this method?
@@ -430,6 +433,7 @@ impl<'a> Parser<'a> {
             let start_pos =
                 if has_outer_attrs { attrs.start_pos.unwrap() } else { collect_pos.start_pos };
             let target = AttrsTarget { attrs: ret.attrs().iter().cloned().collect(), tokens };
+            tokens_used = true;
             self.capture_state
                 .parser_replacements
                 .push((ParserRange(start_pos..end_pos), Some(target)));
@@ -439,6 +443,7 @@ impl<'a> Parser<'a> {
             self.capture_state.parser_replacements.clear();
             self.capture_state.inner_attr_parser_ranges.clear();
         }
+        assert!(tokens_used); // check we didn't create `tokens` unnecessarily
         Ok(ret)
     }
 }
diff --git a/tests/crashes/129166.rs b/tests/crashes/129166.rs
deleted file mode 100644
index d3635d410db..00000000000
--- a/tests/crashes/129166.rs
+++ /dev/null
@@ -1,7 +0,0 @@
-//@ known-bug: rust-lang/rust#129166
-
-fn main() {
-    #[cfg_eval]
-    #[cfg]
-    0
-}
diff --git a/tests/ui/conditional-compilation/invalid-node-range-issue-129166.rs b/tests/ui/conditional-compilation/invalid-node-range-issue-129166.rs
new file mode 100644
index 00000000000..794e6fad3fc
--- /dev/null
+++ b/tests/ui/conditional-compilation/invalid-node-range-issue-129166.rs
@@ -0,0 +1,11 @@
+// This was triggering an assertion failure in `NodeRange::new`.
+
+#![feature(cfg_eval)]
+#![feature(stmt_expr_attributes)]
+
+fn f() -> u32 {
+    #[cfg_eval] #[cfg(not(FALSE))] 0
+    //~^ ERROR removing an expression is not supported in this position
+}
+
+fn main() {}
diff --git a/tests/ui/conditional-compilation/invalid-node-range-issue-129166.stderr b/tests/ui/conditional-compilation/invalid-node-range-issue-129166.stderr
new file mode 100644
index 00000000000..0699e182bd5
--- /dev/null
+++ b/tests/ui/conditional-compilation/invalid-node-range-issue-129166.stderr
@@ -0,0 +1,8 @@
+error: removing an expression is not supported in this position
+  --> $DIR/invalid-node-range-issue-129166.rs:7:17
+   |
+LL |     #[cfg_eval] #[cfg(not(FALSE))] 0
+   |                 ^^^^^^^^^^^^^^^^^^
+
+error: aborting due to 1 previous error
+