about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2021-06-22 14:53:58 +0000
committerbors <bors@rust-lang.org>2021-06-22 14:53:58 +0000
commit80926fc409d671e7da13f08c90642b1e71f800d9 (patch)
tree726ca593e13f10dfc8c0ec3eb38aa40c2e27403a
parent3487be11d5f3c9afc4d8e44438cdd2af1e98c859 (diff)
parentc3c4ab5ed204b09a8446f9abbdb4d49b278756be (diff)
downloadrust-80926fc409d671e7da13f08c90642b1e71f800d9.tar.gz
rust-80926fc409d671e7da13f08c90642b1e71f800d9.zip
Auto merge of #86368 - michaelwoerister:lexing-ice, r=davidtwco
Disambiguate between SourceFiles from different crates even if they have the same path

This PR fixes an ICE that can occur when the compiler encounters a source file that is part of both the local crate and an upstream crate:

1. While importing source files from an upstream crate the compiler creates a `SourceFile` entry for `foo.rs` in the `SourceMap`. Since this is an imported source file its `src` field is `None`.
2. At a later point the parser encounters `foo.rs` again. It tells the `SourceMap` to load the file but because we already have an entry for `foo.rs` the `SourceMap` will return the existing version with `src == None`.
3. The parser proceeds under the assumption that `src.is_some()` and panics when actually trying to use the file's contents.

This PR fixes the issue by adding the source file's associated `CrateNum` to the `SourceMap`'s interning key. As a consequence the two instances of the file will each have a separate entry in the `SourceMap`. They just happen to share the same file path. This approach seemed less problematic to me than trying to mutate the `SourceFile` after it had already been created.

Another, more involved, approach might be to merge the `src` and the `external_src` field.

Fixes #85955
-rw-r--r--compiler/rustc_middle/src/ty/query/on_disk_cache.rs38
-rw-r--r--compiler/rustc_span/src/source_map.rs41
-rw-r--r--src/test/ui/include-macros/auxiliary/same-file-in-two-crates-aux.rs4
-rw-r--r--src/test/ui/include-macros/same-file-in-two-crates.rs21
4 files changed, 89 insertions, 15 deletions
diff --git a/compiler/rustc_middle/src/ty/query/on_disk_cache.rs b/compiler/rustc_middle/src/ty/query/on_disk_cache.rs
index ebaef347f42..6df8e64fada 100644
--- a/compiler/rustc_middle/src/ty/query/on_disk_cache.rs
+++ b/compiler/rustc_middle/src/ty/query/on_disk_cache.rs
@@ -54,7 +54,7 @@ pub struct OnDiskCache<'sess> {
     cnum_map: OnceCell<UnhashMap<StableCrateId, CrateNum>>,
 
     source_map: &'sess SourceMap,
-    file_index_to_stable_id: FxHashMap<SourceFileIndex, StableSourceFileId>,
+    file_index_to_stable_id: FxHashMap<SourceFileIndex, EncodedSourceFileId>,
 
     // Caches that are populated lazily during decoding.
     file_index_to_file: Lock<FxHashMap<SourceFileIndex, Lrc<SourceFile>>>,
@@ -111,7 +111,7 @@ pub struct OnDiskCache<'sess> {
 // This type is used only for serialization and deserialization.
 #[derive(Encodable, Decodable)]
 struct Footer {
-    file_index_to_stable_id: FxHashMap<SourceFileIndex, StableSourceFileId>,
+    file_index_to_stable_id: FxHashMap<SourceFileIndex, EncodedSourceFileId>,
     query_result_index: EncodedQueryResultIndex,
     diagnostics_index: EncodedQueryResultIndex,
     // The location of all allocations.
@@ -157,6 +157,32 @@ crate struct RawDefId {
     pub index: u32,
 }
 
+/// An `EncodedSourceFileId` is the same as a `StableSourceFileId` except that
+/// the source crate is represented as a [StableCrateId] instead of as a
+/// `CrateNum`. This way `EncodedSourceFileId` can be encoded and decoded
+/// without any additional context, i.e. with a simple `opaque::Decoder` (which
+/// is the only thing available when decoding the cache's [Footer].
+#[derive(Encodable, Decodable, Clone, Debug)]
+struct EncodedSourceFileId {
+    file_name_hash: u64,
+    stable_crate_id: StableCrateId,
+}
+
+impl EncodedSourceFileId {
+    fn translate(&self, cnum_map: &UnhashMap<StableCrateId, CrateNum>) -> StableSourceFileId {
+        let cnum = cnum_map[&self.stable_crate_id];
+        StableSourceFileId { file_name_hash: self.file_name_hash, cnum }
+    }
+
+    fn new(tcx: TyCtxt<'_>, file: &SourceFile) -> EncodedSourceFileId {
+        let source_file_id = StableSourceFileId::new(file);
+        EncodedSourceFileId {
+            file_name_hash: source_file_id.file_name_hash,
+            stable_crate_id: tcx.stable_crate_id(source_file_id.cnum),
+        }
+    }
+}
+
 impl<'sess> OnDiskCache<'sess> {
     /// Creates a new `OnDiskCache` instance from the serialized data in `data`.
     pub fn new(sess: &'sess Session, data: Vec<u8>, start_pos: usize) -> Self {
@@ -238,7 +264,8 @@ impl<'sess> OnDiskCache<'sess> {
                     let index = SourceFileIndex(index as u32);
                     let file_ptr: *const SourceFile = &**file as *const _;
                     file_to_file_index.insert(file_ptr, index);
-                    file_index_to_stable_id.insert(index, StableSourceFileId::new(&file));
+                    let source_file_id = EncodedSourceFileId::new(tcx, &file);
+                    file_index_to_stable_id.insert(index, source_file_id);
                 }
 
                 (file_to_file_index, file_index_to_stable_id)
@@ -605,7 +632,7 @@ pub struct CacheDecoder<'a, 'tcx> {
     source_map: &'a SourceMap,
     cnum_map: &'a UnhashMap<StableCrateId, CrateNum>,
     file_index_to_file: &'a Lock<FxHashMap<SourceFileIndex, Lrc<SourceFile>>>,
-    file_index_to_stable_id: &'a FxHashMap<SourceFileIndex, StableSourceFileId>,
+    file_index_to_stable_id: &'a FxHashMap<SourceFileIndex, EncodedSourceFileId>,
     alloc_decoding_session: AllocDecodingSession<'a>,
     syntax_contexts: &'a FxHashMap<u32, AbsoluteBytePos>,
     expn_data: &'a FxHashMap<u32, AbsoluteBytePos>,
@@ -618,6 +645,7 @@ impl<'a, 'tcx> CacheDecoder<'a, 'tcx> {
             ref file_index_to_file,
             ref file_index_to_stable_id,
             ref source_map,
+            ref cnum_map,
             ..
         } = *self;
 
@@ -625,7 +653,7 @@ impl<'a, 'tcx> CacheDecoder<'a, 'tcx> {
             .borrow_mut()
             .entry(index)
             .or_insert_with(|| {
-                let stable_id = file_index_to_stable_id[&index];
+                let stable_id = file_index_to_stable_id[&index].translate(cnum_map);
                 source_map
                     .source_file_by_stable_id(stable_id)
                     .expect("failed to lookup `SourceFile` in new context")
diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs
index 1d45cd172b3..32031ac7071 100644
--- a/compiler/rustc_span/src/source_map.rs
+++ b/compiler/rustc_span/src/source_map.rs
@@ -117,25 +117,42 @@ impl FileLoader for RealFileLoader {
     }
 }
 
-// This is a `SourceFile` identifier that is used to correlate `SourceFile`s between
-// subsequent compilation sessions (which is something we need to do during
-// incremental compilation).
+/// This is a [SourceFile] identifier that is used to correlate source files between
+/// subsequent compilation sessions (which is something we need to do during
+/// incremental compilation).
+///
+/// The [StableSourceFileId] also contains the CrateNum of the crate the source
+/// file was originally parsed for. This way we get two separate entries in
+/// the [SourceMap] if the same file is part of both the local and an upstream
+/// crate. Trying to only have one entry for both cases is problematic because
+/// at the point where we discover that there's a local use of the file in
+/// addition to the upstream one, we might already have made decisions based on
+/// the assumption that it's an upstream file. Treating the two files as
+/// different has no real downsides.
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Encodable, Decodable, Debug)]
-pub struct StableSourceFileId(u128);
+pub struct StableSourceFileId {
+    // A hash of the source file's FileName. This is hash so that it's size
+    // is more predictable than if we included the actual FileName value.
+    pub file_name_hash: u64,
+
+    // The CrateNum of the crate this source file was originally parsed for.
+    // We cannot include this information in the hash because at the time
+    // of hashing we don't have the context to map from the CrateNum's numeric
+    // value to a StableCrateId.
+    pub cnum: CrateNum,
+}
 
 // FIXME: we need a more globally consistent approach to the problem solved by
 // StableSourceFileId, perhaps built atop source_file.name_hash.
 impl StableSourceFileId {
     pub fn new(source_file: &SourceFile) -> StableSourceFileId {
-        StableSourceFileId::new_from_name(&source_file.name)
+        StableSourceFileId::new_from_name(&source_file.name, source_file.cnum)
     }
 
-    fn new_from_name(name: &FileName) -> StableSourceFileId {
+    fn new_from_name(name: &FileName, cnum: CrateNum) -> StableSourceFileId {
         let mut hasher = StableHasher::new();
-
         name.hash(&mut hasher);
-
-        StableSourceFileId(hasher.finish())
+        StableSourceFileId { file_name_hash: hasher.finish(), cnum }
     }
 }
 
@@ -274,7 +291,7 @@ impl SourceMap {
         // be empty, so the working directory will be used.
         let (filename, _) = self.path_mapping.map_filename_prefix(&filename);
 
-        let file_id = StableSourceFileId::new_from_name(&filename);
+        let file_id = StableSourceFileId::new_from_name(&filename, LOCAL_CRATE);
 
         let lrc_sf = match self.source_file_by_stable_id(file_id) {
             Some(lrc_sf) => lrc_sf,
@@ -288,6 +305,10 @@ impl SourceMap {
                     self.hash_kind,
                 ));
 
+                // Let's make sure the file_id we generated above actually matches
+                // the ID we generate for the SourceFile we just created.
+                debug_assert_eq!(StableSourceFileId::new(&source_file), file_id);
+
                 let mut files = self.files.borrow_mut();
 
                 files.source_files.push(source_file.clone());
diff --git a/src/test/ui/include-macros/auxiliary/same-file-in-two-crates-aux.rs b/src/test/ui/include-macros/auxiliary/same-file-in-two-crates-aux.rs
new file mode 100644
index 00000000000..7b680bce49e
--- /dev/null
+++ b/src/test/ui/include-macros/auxiliary/same-file-in-two-crates-aux.rs
@@ -0,0 +1,4 @@
+#[inline]
+pub fn some_function() -> u32 {
+    1
+}
diff --git a/src/test/ui/include-macros/same-file-in-two-crates.rs b/src/test/ui/include-macros/same-file-in-two-crates.rs
new file mode 100644
index 00000000000..f49efa2cf8a
--- /dev/null
+++ b/src/test/ui/include-macros/same-file-in-two-crates.rs
@@ -0,0 +1,21 @@
+// This test makes sure that the compiler can handle the same source file to be
+// part of the local crate *and* an upstream crate. This can happen, for example,
+// when there is some auto-generated code that is part of both a library and an
+// accompanying integration test.
+//
+// The test uses include!() to include a source file that is also part of
+// an upstream crate.
+//
+// This is a regression test for https://github.com/rust-lang/rust/issues/85955.
+
+// check-pass
+// compile-flags: --crate-type=rlib
+// aux-build:same-file-in-two-crates-aux.rs
+extern crate same_file_in_two_crates_aux;
+
+pub fn foo() -> u32 {
+    same_file_in_two_crates_aux::some_function() +
+    some_function()
+}
+
+include!("./auxiliary/same-file-in-two-crates-aux.rs");