Auto merge of #85211 - Aaron1011:metadata-invalid-span, r=michaelwoerister

Preserve `SyntaxContext` for invalid/dummy spans in crate metadata Fixes #85197 We already preserved the `SyntaxContext` for invalid/dummy spans in the incremental cache, but we weren't doing the same for crate metadata. If an invalid (lo/hi from different files) span is written to the incremental cache, we will decode it with a 'dummy' location, but keep the original `SyntaxContext`. Since the crate metadata encoder was only checking for `DUMMY_SP` (dummy location + root `SyntaxContext`), the metadata encoder would treat it as a normal span, encoding the `SyntaxContext`. As a result, the final span encoded to the metadata would change across sessions, even if the crate itself was unchanged. This could lead to an 'unstable fingerprint' ICE under the following conditions: 1. We compile a crate with an invalid span using incremental compilation. The metadata encoder discards the `SyntaxContext` since the span is invalid, while the incremental cache encoder preserves the `SyntaxContext` 2. From another crate, we execute a foreign query, decoding the invalid span from the metadata as `DUMMY_SP` (e.g. with `SyntaxContext::root()`). This span gets hashed into the query fingerprint. So far, this has always happened through the `optimized_mir` query. 3. We recompile the first crate using our populated incremental cache, without changing anything. We load the (previously) invalid span from our incremental cache - it gets converted to a span with a dummy (but valid) location, along with the original `SyntaxContext`. This span gets written out to the crate metadata - since it now has a valid location, we preserve its `SyntaxContext`. 4. We recompile the second crate, again using a populated incremental cache. We now re-run the foreign query `optimized_mir` - the foreign crate hash is unchanged, but we end up decoding a different span (it now ha a non-root `SyntaxContext`). This results in the fingerprint changing, resulting in an ICE. This PR updates our encoding of spans in the crate metadata to mirror the encoding of spans into the incremental cache. We now always encode a `SyntaxContext`, and encode location information for spans with a non-dummy location.
author: bors <bors@rust-lang.org> 2021-05-14 16:58:30 +0000
committer: bors <bors@rust-lang.org> 2021-05-14 16:58:30 +0000
commit: 1025db84a68b948139b5adcd55da31bce32da8f3 (patch)
tree: 67d5db064ab62d2206d0a7bcf34172865263bb0a
parent: 75da570d784a798a34ff1e5048cd9a6a2fb23170 (diff)
parent: cdca3c81c1eed6f09733beea4dc2517e6f3d71f6 (diff)
download: rust-1025db84a68b948139b5adcd55da31bce32da8f3.tar.gz
rust-1025db84a68b948139b5adcd55da31bce32da8f3.zip
7 files changed, 113 insertions, 45 deletions
diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs
index de5279c4a8d..b27eef376c4 100644
--- a/compiler/rustc_metadata/src/rmeta/decoder.rs
+++ b/compiler/rustc_metadata/src/rmeta/decoder.rs
@@ -406,17 +406,17 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for ExpnId {
 
 impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for Span {
     fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Result<Span, String> {
+        let ctxt = SyntaxContext::decode(decoder)?;
         let tag = u8::decode(decoder)?;
 
-        if tag == TAG_INVALID_SPAN {
-            return Ok(DUMMY_SP);
+        if tag == TAG_PARTIAL_SPAN {
+            return Ok(DUMMY_SP.with_ctxt(ctxt));
         }
 
         debug_assert!(tag == TAG_VALID_SPAN_LOCAL || tag == TAG_VALID_SPAN_FOREIGN);
 
         let lo = BytePos::decode(decoder)?;
         let len = BytePos::decode(decoder)?;
-        let ctxt = SyntaxContext::decode(decoder)?;
         let hi = lo + len;
 
         let sess = if let Some(sess) = decoder.sess {
diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs
index 19c713665c7..29fcbffa0b9 100644
--- a/compiler/rustc_metadata/src/rmeta/encoder.rs
+++ b/compiler/rustc_metadata/src/rmeta/encoder.rs
@@ -187,11 +187,48 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for ExpnId {
 
 impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Span {
     fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) -> opaque::EncodeResult {
-        if *self == rustc_span::DUMMY_SP {
-            return TAG_INVALID_SPAN.encode(s);
+        let span = self.data();
+
+        // Don't serialize any `SyntaxContext`s from a proc-macro crate,
+        // since we don't load proc-macro dependencies during serialization.
+        // This means that any hygiene information from macros used *within*
+        // a proc-macro crate (e.g. invoking a macro that expands to a proc-macro
+        // definition) will be lost.
+        //
+        // This can show up in two ways:
+        //
+        // 1. Any hygiene information associated with identifier of
+        // a proc macro (e.g. `#[proc_macro] pub fn $name`) will be lost.
+        // Since proc-macros can only be invoked from a different crate,
+        // real code should never need to care about this.
+        //
+        // 2. Using `Span::def_site` or `Span::mixed_site` will not
+        // include any hygiene information associated with the definition
+        // site. This means that a proc-macro cannot emit a `$crate`
+        // identifier which resolves to one of its dependencies,
+        // which also should never come up in practice.
+        //
+        // Additionally, this affects `Span::parent`, and any other
+        // span inspection APIs that would otherwise allow traversing
+        // the `SyntaxContexts` associated with a span.
+        //
+        // None of these user-visible effects should result in any
+        // cross-crate inconsistencies (getting one behavior in the same
+        // crate, and a different behavior in another crate) due to the
+        // limited surface that proc-macros can expose.
+        //
+        // IMPORTANT: If this is ever changed, be sure to update
+        // `rustc_span::hygiene::raw_encode_expn_id` to handle
+        // encoding `ExpnData` for proc-macro crates.
+        if s.is_proc_macro {
+            SyntaxContext::root().encode(s)?;
+        } else {
+            span.ctxt.encode(s)?;
         }
 
-        let span = self.data();
+        if self.is_dummy() {
+            return TAG_PARTIAL_SPAN.encode(s);
+        }
 
         // The Span infrastructure should make sure that this invariant holds:
         debug_assert!(span.lo <= span.hi);
@@ -206,7 +243,7 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Span {
         if !s.source_file_cache.0.contains(span.hi) {
             // Unfortunately, macro expansion still sometimes generates Spans
             // that malformed in this way.
-            return TAG_INVALID_SPAN.encode(s);
+            return TAG_PARTIAL_SPAN.encode(s);
         }
 
         let source_files = s.required_source_files.as_mut().expect("Already encoded SourceMap!");
@@ -262,43 +299,6 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Span {
         let len = hi - lo;
         len.encode(s)?;
 
-        // Don't serialize any `SyntaxContext`s from a proc-macro crate,
-        // since we don't load proc-macro dependencies during serialization.
-        // This means that any hygiene information from macros used *within*
-        // a proc-macro crate (e.g. invoking a macro that expands to a proc-macro
-        // definition) will be lost.
-        //
-        // This can show up in two ways:
-        //
-        // 1. Any hygiene information associated with identifier of
-        // a proc macro (e.g. `#[proc_macro] pub fn $name`) will be lost.
-        // Since proc-macros can only be invoked from a different crate,
-        // real code should never need to care about this.
-        //
-        // 2. Using `Span::def_site` or `Span::mixed_site` will not
-        // include any hygiene information associated with the definition
-        // site. This means that a proc-macro cannot emit a `$crate`
-        // identifier which resolves to one of its dependencies,
-        // which also should never come up in practice.
-        //
-        // Additionally, this affects `Span::parent`, and any other
-        // span inspection APIs that would otherwise allow traversing
-        // the `SyntaxContexts` associated with a span.
-        //
-        // None of these user-visible effects should result in any
-        // cross-crate inconsistencies (getting one behavior in the same
-        // crate, and a different behavior in another crate) due to the
-        // limited surface that proc-macros can expose.
-        //
-        // IMPORTANT: If this is ever changed, be sure to update
-        // `rustc_span::hygiene::raw_encode_expn_id` to handle
-        // encoding `ExpnData` for proc-macro crates.
-        if s.is_proc_macro {
-            SyntaxContext::root().encode(s)?;
-        } else {
-            span.ctxt.encode(s)?;
-        }
-
         if tag == TAG_VALID_SPAN_FOREIGN {
             // This needs to be two lines to avoid holding the `s.source_file_cache`
             // while calling `cnum.encode(s)`
diff --git a/compiler/rustc_metadata/src/rmeta/mod.rs b/compiler/rustc_metadata/src/rmeta/mod.rs
index 6b375045f14..1fb68fd5fc3 100644
--- a/compiler/rustc_metadata/src/rmeta/mod.rs
+++ b/compiler/rustc_metadata/src/rmeta/mod.rs
@@ -451,4 +451,4 @@ struct GeneratorData<'tcx> {
 // Tags used for encoding Spans:
 const TAG_VALID_SPAN_LOCAL: u8 = 0;
 const TAG_VALID_SPAN_FOREIGN: u8 = 1;
-const TAG_INVALID_SPAN: u8 = 2;
+const TAG_PARTIAL_SPAN: u8 = 2;
diff --git a/src/test/incremental/issue-85197-invalid-span/auxiliary/invalid-span-helper-lib.rs b/src/test/incremental/issue-85197-invalid-span/auxiliary/invalid-span-helper-lib.rs
new file mode 100644
index 00000000000..2453af5b6b4
--- /dev/null
+++ b/src/test/incremental/issue-85197-invalid-span/auxiliary/invalid-span-helper-lib.rs
@@ -0,0 +1,11 @@
+// revisions: rpass1 rpass2
+
+extern crate respan;
+
+#[macro_use]
+#[path = "invalid-span-helper-mod.rs"]
+mod invalid_span_helper_mod;
+
+// Invoke a macro from a different file - this
+// allows us to get tokens with spans from different files
+helper!(1);
diff --git a/src/test/incremental/issue-85197-invalid-span/auxiliary/invalid-span-helper-mod.rs b/src/test/incremental/issue-85197-invalid-span/auxiliary/invalid-span-helper-mod.rs
new file mode 100644
index 00000000000..747174b1ebf
--- /dev/null
+++ b/src/test/incremental/issue-85197-invalid-span/auxiliary/invalid-span-helper-mod.rs
@@ -0,0 +1,14 @@
+#[macro_export]
+macro_rules! helper {
+    // Use `:tt` instead of `:ident` so that we don't get a `None`-delimited group
+    ($first:tt) => {
+        pub fn foo<T>() {
+            // The span of `$first` comes from another file,
+            // so the expression `1 + $first` ends up with an
+            // 'invalid' span that starts and ends in different files.
+            // We use the `respan!` macro to give all tokens the same
+            // `SyntaxContext`, so that the parser will try to merge the spans.
+            respan::respan!(let a = 1 + $first;);
+        }
+    }
+}
diff --git a/src/test/incremental/issue-85197-invalid-span/auxiliary/respan.rs b/src/test/incremental/issue-85197-invalid-span/auxiliary/respan.rs
new file mode 100644
index 00000000000..5088eab6294
--- /dev/null
+++ b/src/test/incremental/issue-85197-invalid-span/auxiliary/respan.rs
@@ -0,0 +1,19 @@
+// force-host
+// no-prefer-dynamic
+
+#![crate_type = "proc-macro"]
+
+extern crate proc_macro;
+use proc_macro::TokenStream;
+
+
+/// Copies the resolution information (the `SyntaxContext`) of the first
+/// token to all other tokens in the stream. Does not recurse into groups.
+#[proc_macro]
+pub fn respan(input: TokenStream) -> TokenStream {
+    let first_span = input.clone().into_iter().next().unwrap().span();
+    input.into_iter().map(|mut tree| {
+        tree.set_span(tree.span().resolved_at(first_span));
+        tree
+    }).collect()
+}
diff --git a/src/test/incremental/issue-85197-invalid-span/invalid_span_main.rs b/src/test/incremental/issue-85197-invalid-span/invalid_span_main.rs
new file mode 100644
index 00000000000..f358460b338
--- /dev/null
+++ b/src/test/incremental/issue-85197-invalid-span/invalid_span_main.rs
@@ -0,0 +1,24 @@
+// revisions: rpass1 rpass2
+// aux-build:respan.rs
+// aux-build:invalid-span-helper-lib.rs
+
+// This issue has several different parts. The high level idea is:
+// 1. We create an 'invalid' span with the help of the `respan` proc-macro,
+// The compiler attempts to prevent the creation of invalid spans by
+// refusing to join spans with different `SyntaxContext`s. We work around
+// this by applying the same `SyntaxContext` to the span of every token,
+// using `Span::resolved_at`
+// 2. We using this invalid span in the body of a function, causing it to get
+// encoded into the `optimized_mir`
+// 3. We call the function from a different crate - since the function is generic,
+// monomorphization runs, causing `optimized_mir` to get called.
+// 4. We re-run compilation using our populated incremental cache, but without
+// making any changes. When we recompile the crate containing our generic function
+// (`invalid_span_helper_lib`), we load the span from the incremental cache, and
+// write it into the crate metadata.
+
+extern crate invalid_span_helper_lib;
+
+fn main() {
+    invalid_span_helper_lib::foo::<u8>();
+}
author	bors <bors@rust-lang.org>	2021-05-14 16:58:30 +0000
committer	bors <bors@rust-lang.org>	2021-05-14 16:58:30 +0000
commit	1025db84a68b948139b5adcd55da31bce32da8f3 (patch)
tree	67d5db064ab62d2206d0a7bcf34172865263bb0a
parent	75da570d784a798a34ff1e5048cd9a6a2fb23170 (diff)
parent	cdca3c81c1eed6f09733beea4dc2517e6f3d71f6 (diff)
download	rust-1025db84a68b948139b5adcd55da31bce32da8f3.tar.gz rust-1025db84a68b948139b5adcd55da31bce32da8f3.zip