Auto merge of #119139 - michaelwoerister:cleanup-stable-source-file-id, r=cjgillot

Unify SourceFile::name_hash and StableSourceFileId This PR adapts the existing `StableSourceFileId` type so that it can be used instead of the `name_hash` field of `SourceFile`. This simplifies a few things that were kind of duplicated before. The PR should also fix issues https://github.com/rust-lang/rust/issues/112700 and https://github.com/rust-lang/rust/issues/115835, but I was not able to reproduce these issues in a regression test. As far as I can tell, the root cause of these issues is that the id of the originating crate is not hashed in the `HashStable` impl of `Span` and thus cache entries that should have been considered invalidated were loaded. After this PR, the `stable_id` field of `SourceFile` includes information about the originating crate, so that ICE should not occur anymore.
author: bors <bors@rust-lang.org> 2023-12-24 21:58:39 +0000
committer: bors <bors@rust-lang.org> 2023-12-24 21:58:39 +0000
commit: bf8716f1cd6416266807706bcae0ecb2e51c9d4a (patch)
tree: dbac1f4295bceef5d0e41dc8903e6b4a7c515a92 /compiler/rustc_span/src
parent: e87ccb8676be9ab641849a2539b215d0c9027237 (diff)
parent: fa8ef253720446959a7bab4593c62a1819563cbc (diff)
download: rust-bf8716f1cd6416266807706bcae0ecb2e51c9d4a.tar.gz
rust-bf8716f1cd6416266807706bcae0ecb2e51c9d4a.zip
3 files changed, 81 insertions, 64 deletions
diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs
index cc3f0962d6c..8f64eed9a87 100644
--- a/compiler/rustc_span/src/lib.rs
+++ b/compiler/rustc_span/src/lib.rs
@@ -58,7 +58,7 @@ pub use hygiene::{DesugaringKind, ExpnKind, MacroKind};
 pub use hygiene::{ExpnData, ExpnHash, ExpnId, LocalExpnId, SyntaxContext};
 use rustc_data_structures::stable_hasher::HashingControls;
 pub mod def_id;
-use def_id::{CrateNum, DefId, DefPathHash, LocalDefId, LOCAL_CRATE};
+use def_id::{CrateNum, DefId, DefPathHash, LocalDefId, StableCrateId, LOCAL_CRATE};
 pub mod edit_distance;
 mod span_encoding;
 pub use span_encoding::{Span, DUMMY_SP};
@@ -1333,8 +1333,10 @@ pub struct SourceFile {
     pub non_narrow_chars: Vec<NonNarrowChar>,
     /// Locations of characters removed during normalization.
     pub normalized_pos: Vec<NormalizedPos>,
-    /// A hash of the filename, used for speeding up hashing in incremental compilation.
-    pub name_hash: Hash128,
+    /// A hash of the filename & crate-id, used for uniquely identifying source
+    /// files within the crate graph and for speeding up hashing in incremental
+    /// compilation.
+    pub stable_id: StableSourceFileId,
     /// Indicates which crate this `SourceFile` was imported from.
     pub cnum: CrateNum,
 }
@@ -1352,7 +1354,7 @@ impl Clone for SourceFile {
             multibyte_chars: self.multibyte_chars.clone(),
             non_narrow_chars: self.non_narrow_chars.clone(),
             normalized_pos: self.normalized_pos.clone(),
-            name_hash: self.name_hash,
+            stable_id: self.stable_id,
             cnum: self.cnum,
         }
     }
@@ -1426,7 +1428,7 @@ impl<S: Encoder> Encodable<S> for SourceFile {
 
         self.multibyte_chars.encode(s);
         self.non_narrow_chars.encode(s);
-        self.name_hash.encode(s);
+        self.stable_id.encode(s);
         self.normalized_pos.encode(s);
         self.cnum.encode(s);
     }
@@ -1453,7 +1455,7 @@ impl<D: Decoder> Decodable<D> for SourceFile {
         };
         let multibyte_chars: Vec<MultiByteChar> = Decodable::decode(d);
         let non_narrow_chars: Vec<NonNarrowChar> = Decodable::decode(d);
-        let name_hash = Decodable::decode(d);
+        let stable_id = Decodable::decode(d);
         let normalized_pos: Vec<NormalizedPos> = Decodable::decode(d);
         let cnum: CrateNum = Decodable::decode(d);
         SourceFile {
@@ -1469,7 +1471,7 @@ impl<D: Decoder> Decodable<D> for SourceFile {
             multibyte_chars,
             non_narrow_chars,
             normalized_pos,
-            name_hash,
+            stable_id,
             cnum,
         }
     }
@@ -1481,6 +1483,66 @@ impl fmt::Debug for SourceFile {
     }
 }
 
+/// This is a [SourceFile] identifier that is used to correlate source files between
+/// subsequent compilation sessions (which is something we need to do during
+/// incremental compilation).
+///
+/// It is a hash value (so we can efficiently consume it when stable-hashing
+/// spans) that consists of the `FileName` and the `StableCrateId` of the crate
+/// the source file is from. The crate id is needed because sometimes the
+/// `FileName` is not unique within the crate graph (think `src/lib.rs`, for
+/// example).
+///
+/// The way the crate-id part is handled is a bit special: source files of the
+/// local crate are hashed as `(filename, None)`, while source files from
+/// upstream crates have a hash of `(filename, Some(stable_crate_id))`. This
+/// is because SourceFiles for the local crate are allocated very early in the
+/// compilation process when the `StableCrateId` is not yet known. If, due to
+/// some refactoring of the compiler, the `StableCrateId` of the local crate
+/// were to become available, it would be better to uniformely make this a
+/// hash of `(filename, stable_crate_id)`.
+///
+/// When `SourceFile`s are exported in crate metadata, the `StableSourceFileId`
+/// is updated to incorporate the `StableCrateId` of the exporting crate.
+#[derive(
+    Debug,
+    Clone,
+    Copy,
+    Hash,
+    PartialEq,
+    Eq,
+    HashStable_Generic,
+    Encodable,
+    Decodable,
+    Default,
+    PartialOrd,
+    Ord
+)]
+pub struct StableSourceFileId(Hash128);
+
+impl StableSourceFileId {
+    fn from_filename_in_current_crate(filename: &FileName) -> Self {
+        Self::from_filename_and_stable_crate_id(filename, None)
+    }
+
+    pub fn from_filename_for_export(
+        filename: &FileName,
+        local_crate_stable_crate_id: StableCrateId,
+    ) -> Self {
+        Self::from_filename_and_stable_crate_id(filename, Some(local_crate_stable_crate_id))
+    }
+
+    fn from_filename_and_stable_crate_id(
+        filename: &FileName,
+        stable_crate_id: Option<StableCrateId>,
+    ) -> Self {
+        let mut hasher = StableHasher::new();
+        filename.hash(&mut hasher);
+        stable_crate_id.hash(&mut hasher);
+        StableSourceFileId(hasher.finish())
+    }
+}
+
 impl SourceFile {
     pub fn new(
         name: FileName,
@@ -1491,11 +1553,7 @@ impl SourceFile {
         let src_hash = SourceFileHash::new(hash_kind, &src);
         let normalized_pos = normalize_src(&mut src);
 
-        let name_hash = {
-            let mut hasher: StableHasher = StableHasher::new();
-            name.hash(&mut hasher);
-            hasher.finish()
-        };
+        let stable_id = StableSourceFileId::from_filename_in_current_crate(&name);
         let source_len = src.len();
         let source_len = u32::try_from(source_len).map_err(|_| OffsetOverflowError)?;
 
@@ -1513,7 +1571,7 @@ impl SourceFile {
             multibyte_chars,
             non_narrow_chars,
             normalized_pos,
-            name_hash,
+            stable_id,
             cnum: LOCAL_CRATE,
         })
     }
@@ -2213,7 +2271,7 @@ where
         };
 
         Hash::hash(&TAG_VALID_SPAN, hasher);
-        Hash::hash(&file.name_hash, hasher);
+        Hash::hash(&file.stable_id, hasher);
 
         // Hash both the length and the end location (line/column) of a span. If we
         // hash only the length, for example, then two otherwise equal spans with
diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs
index cb10e6bf2ba..c61dbcaae95 100644
--- a/compiler/rustc_span/src/source_map.rs
+++ b/compiler/rustc_span/src/source_map.rs
@@ -13,7 +13,6 @@ use crate::*;
 use rustc_data_structures::fx::FxHashMap;
 use rustc_data_structures::sync::{IntoDynSyncSend, MappedReadGuard, ReadGuard, RwLock};
 use std::fs;
-use std::hash::Hash;
 use std::io::{self, BorrowedBuf, Read};
 use std::path::{self};
 
@@ -152,45 +151,6 @@ impl FileLoader for RealFileLoader {
     }
 }
 
-/// This is a [SourceFile] identifier that is used to correlate source files between
-/// subsequent compilation sessions (which is something we need to do during
-/// incremental compilation).
-///
-/// The [StableSourceFileId] also contains the CrateNum of the crate the source
-/// file was originally parsed for. This way we get two separate entries in
-/// the [SourceMap] if the same file is part of both the local and an upstream
-/// crate. Trying to only have one entry for both cases is problematic because
-/// at the point where we discover that there's a local use of the file in
-/// addition to the upstream one, we might already have made decisions based on
-/// the assumption that it's an upstream file. Treating the two files as
-/// different has no real downsides.
-#[derive(Copy, Clone, PartialEq, Eq, Hash, Encodable, Decodable, Debug)]
-pub struct StableSourceFileId {
-    /// A hash of the source file's [`FileName`]. This is hash so that it's size
-    /// is more predictable than if we included the actual [`FileName`] value.
-    pub file_name_hash: Hash64,
-
-    /// The [`CrateNum`] of the crate this source file was originally parsed for.
-    /// We cannot include this information in the hash because at the time
-    /// of hashing we don't have the context to map from the [`CrateNum`]'s numeric
-    /// value to a `StableCrateId`.
-    pub cnum: CrateNum,
-}
-
-// FIXME: we need a more globally consistent approach to the problem solved by
-// StableSourceFileId, perhaps built atop source_file.name_hash.
-impl StableSourceFileId {
-    pub fn new(source_file: &SourceFile) -> StableSourceFileId {
-        StableSourceFileId::new_from_name(&source_file.name, source_file.cnum)
-    }
-
-    fn new_from_name(name: &FileName, cnum: CrateNum) -> StableSourceFileId {
-        let mut hasher = StableHasher::new();
-        name.hash(&mut hasher);
-        StableSourceFileId { file_name_hash: hasher.finish(), cnum }
-    }
-}
-
 // _____________________________________________________________________________
 // SourceMap
 //
@@ -320,17 +280,17 @@ impl SourceMap {
         // be empty, so the working directory will be used.
         let (filename, _) = self.path_mapping.map_filename_prefix(&filename);
 
-        let file_id = StableSourceFileId::new_from_name(&filename, LOCAL_CRATE);
-        match self.source_file_by_stable_id(file_id) {
+        let stable_id = StableSourceFileId::from_filename_in_current_crate(&filename);
+        match self.source_file_by_stable_id(stable_id) {
             Some(lrc_sf) => Ok(lrc_sf),
             None => {
                 let source_file = SourceFile::new(filename, src, self.hash_kind)?;
 
                 // Let's make sure the file_id we generated above actually matches
                 // the ID we generate for the SourceFile we just created.
-                debug_assert_eq!(StableSourceFileId::new(&source_file), file_id);
+                debug_assert_eq!(source_file.stable_id, stable_id);
 
-                self.register_source_file(file_id, source_file)
+                self.register_source_file(stable_id, source_file)
             }
         }
     }
@@ -343,7 +303,7 @@ impl SourceMap {
         &self,
         filename: FileName,
         src_hash: SourceFileHash,
-        name_hash: Hash128,
+        stable_id: StableSourceFileId,
         source_len: u32,
         cnum: CrateNum,
         file_local_lines: FreezeLock<SourceFileLines>,
@@ -368,12 +328,11 @@ impl SourceMap {
             multibyte_chars,
             non_narrow_chars,
             normalized_pos,
-            name_hash,
+            stable_id,
             cnum,
         };
 
-        let file_id = StableSourceFileId::new(&source_file);
-        self.register_source_file(file_id, source_file)
+        self.register_source_file(stable_id, source_file)
             .expect("not enough address space for imported source file")
     }
 
diff --git a/compiler/rustc_span/src/source_map/tests.rs b/compiler/rustc_span/src/source_map/tests.rs
index 113ca493d36..130522a302d 100644
--- a/compiler/rustc_span/src/source_map/tests.rs
+++ b/compiler/rustc_span/src/source_map/tests.rs
@@ -234,14 +234,14 @@ fn t10() {
         multibyte_chars,
         non_narrow_chars,
         normalized_pos,
-        name_hash,
+        stable_id,
         ..
     } = (*src_file).clone();
 
     let imported_src_file = sm.new_imported_source_file(
         name,
         src_hash,
-        name_hash,
+        stable_id,
         source_len.to_u32(),
         CrateNum::new(0),
         FreezeLock::new(lines.read().clone()),
author	bors <bors@rust-lang.org>	2023-12-24 21:58:39 +0000
committer	bors <bors@rust-lang.org>	2023-12-24 21:58:39 +0000
commit	bf8716f1cd6416266807706bcae0ecb2e51c9d4a (patch)
tree	dbac1f4295bceef5d0e41dc8903e6b4a7c515a92 /compiler/rustc_span/src
parent	e87ccb8676be9ab641849a2539b215d0c9027237 (diff)
parent	fa8ef253720446959a7bab4593c62a1819563cbc (diff)
download	rust-bf8716f1cd6416266807706bcae0ecb2e51c9d4a.tar.gz rust-bf8716f1cd6416266807706bcae0ecb2e51c9d4a.zip