diff options
| author | bors <bors@rust-lang.org> | 2023-12-24 21:58:39 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2023-12-24 21:58:39 +0000 |
| commit | bf8716f1cd6416266807706bcae0ecb2e51c9d4a (patch) | |
| tree | dbac1f4295bceef5d0e41dc8903e6b4a7c515a92 /compiler/rustc_span/src | |
| parent | e87ccb8676be9ab641849a2539b215d0c9027237 (diff) | |
| parent | fa8ef253720446959a7bab4593c62a1819563cbc (diff) | |
| download | rust-bf8716f1cd6416266807706bcae0ecb2e51c9d4a.tar.gz rust-bf8716f1cd6416266807706bcae0ecb2e51c9d4a.zip | |
Auto merge of #119139 - michaelwoerister:cleanup-stable-source-file-id, r=cjgillot
Unify SourceFile::name_hash and StableSourceFileId This PR adapts the existing `StableSourceFileId` type so that it can be used instead of the `name_hash` field of `SourceFile`. This simplifies a few things that were kind of duplicated before. The PR should also fix issues https://github.com/rust-lang/rust/issues/112700 and https://github.com/rust-lang/rust/issues/115835, but I was not able to reproduce these issues in a regression test. As far as I can tell, the root cause of these issues is that the id of the originating crate is not hashed in the `HashStable` impl of `Span` and thus cache entries that should have been considered invalidated were loaded. After this PR, the `stable_id` field of `SourceFile` includes information about the originating crate, so that ICE should not occur anymore.
Diffstat (limited to 'compiler/rustc_span/src')
| -rw-r--r-- | compiler/rustc_span/src/lib.rs | 86 | ||||
| -rw-r--r-- | compiler/rustc_span/src/source_map.rs | 55 | ||||
| -rw-r--r-- | compiler/rustc_span/src/source_map/tests.rs | 4 |
3 files changed, 81 insertions, 64 deletions
diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs index cc3f0962d6c..8f64eed9a87 100644 --- a/compiler/rustc_span/src/lib.rs +++ b/compiler/rustc_span/src/lib.rs @@ -58,7 +58,7 @@ pub use hygiene::{DesugaringKind, ExpnKind, MacroKind}; pub use hygiene::{ExpnData, ExpnHash, ExpnId, LocalExpnId, SyntaxContext}; use rustc_data_structures::stable_hasher::HashingControls; pub mod def_id; -use def_id::{CrateNum, DefId, DefPathHash, LocalDefId, LOCAL_CRATE}; +use def_id::{CrateNum, DefId, DefPathHash, LocalDefId, StableCrateId, LOCAL_CRATE}; pub mod edit_distance; mod span_encoding; pub use span_encoding::{Span, DUMMY_SP}; @@ -1333,8 +1333,10 @@ pub struct SourceFile { pub non_narrow_chars: Vec<NonNarrowChar>, /// Locations of characters removed during normalization. pub normalized_pos: Vec<NormalizedPos>, - /// A hash of the filename, used for speeding up hashing in incremental compilation. - pub name_hash: Hash128, + /// A hash of the filename & crate-id, used for uniquely identifying source + /// files within the crate graph and for speeding up hashing in incremental + /// compilation. + pub stable_id: StableSourceFileId, /// Indicates which crate this `SourceFile` was imported from. pub cnum: CrateNum, } @@ -1352,7 +1354,7 @@ impl Clone for SourceFile { multibyte_chars: self.multibyte_chars.clone(), non_narrow_chars: self.non_narrow_chars.clone(), normalized_pos: self.normalized_pos.clone(), - name_hash: self.name_hash, + stable_id: self.stable_id, cnum: self.cnum, } } @@ -1426,7 +1428,7 @@ impl<S: Encoder> Encodable<S> for SourceFile { self.multibyte_chars.encode(s); self.non_narrow_chars.encode(s); - self.name_hash.encode(s); + self.stable_id.encode(s); self.normalized_pos.encode(s); self.cnum.encode(s); } @@ -1453,7 +1455,7 @@ impl<D: Decoder> Decodable<D> for SourceFile { }; let multibyte_chars: Vec<MultiByteChar> = Decodable::decode(d); let non_narrow_chars: Vec<NonNarrowChar> = Decodable::decode(d); - let name_hash = Decodable::decode(d); + let stable_id = Decodable::decode(d); let normalized_pos: Vec<NormalizedPos> = Decodable::decode(d); let cnum: CrateNum = Decodable::decode(d); SourceFile { @@ -1469,7 +1471,7 @@ impl<D: Decoder> Decodable<D> for SourceFile { multibyte_chars, non_narrow_chars, normalized_pos, - name_hash, + stable_id, cnum, } } @@ -1481,6 +1483,66 @@ impl fmt::Debug for SourceFile { } } +/// This is a [SourceFile] identifier that is used to correlate source files between +/// subsequent compilation sessions (which is something we need to do during +/// incremental compilation). +/// +/// It is a hash value (so we can efficiently consume it when stable-hashing +/// spans) that consists of the `FileName` and the `StableCrateId` of the crate +/// the source file is from. The crate id is needed because sometimes the +/// `FileName` is not unique within the crate graph (think `src/lib.rs`, for +/// example). +/// +/// The way the crate-id part is handled is a bit special: source files of the +/// local crate are hashed as `(filename, None)`, while source files from +/// upstream crates have a hash of `(filename, Some(stable_crate_id))`. This +/// is because SourceFiles for the local crate are allocated very early in the +/// compilation process when the `StableCrateId` is not yet known. If, due to +/// some refactoring of the compiler, the `StableCrateId` of the local crate +/// were to become available, it would be better to uniformely make this a +/// hash of `(filename, stable_crate_id)`. +/// +/// When `SourceFile`s are exported in crate metadata, the `StableSourceFileId` +/// is updated to incorporate the `StableCrateId` of the exporting crate. +#[derive( + Debug, + Clone, + Copy, + Hash, + PartialEq, + Eq, + HashStable_Generic, + Encodable, + Decodable, + Default, + PartialOrd, + Ord +)] +pub struct StableSourceFileId(Hash128); + +impl StableSourceFileId { + fn from_filename_in_current_crate(filename: &FileName) -> Self { + Self::from_filename_and_stable_crate_id(filename, None) + } + + pub fn from_filename_for_export( + filename: &FileName, + local_crate_stable_crate_id: StableCrateId, + ) -> Self { + Self::from_filename_and_stable_crate_id(filename, Some(local_crate_stable_crate_id)) + } + + fn from_filename_and_stable_crate_id( + filename: &FileName, + stable_crate_id: Option<StableCrateId>, + ) -> Self { + let mut hasher = StableHasher::new(); + filename.hash(&mut hasher); + stable_crate_id.hash(&mut hasher); + StableSourceFileId(hasher.finish()) + } +} + impl SourceFile { pub fn new( name: FileName, @@ -1491,11 +1553,7 @@ impl SourceFile { let src_hash = SourceFileHash::new(hash_kind, &src); let normalized_pos = normalize_src(&mut src); - let name_hash = { - let mut hasher: StableHasher = StableHasher::new(); - name.hash(&mut hasher); - hasher.finish() - }; + let stable_id = StableSourceFileId::from_filename_in_current_crate(&name); let source_len = src.len(); let source_len = u32::try_from(source_len).map_err(|_| OffsetOverflowError)?; @@ -1513,7 +1571,7 @@ impl SourceFile { multibyte_chars, non_narrow_chars, normalized_pos, - name_hash, + stable_id, cnum: LOCAL_CRATE, }) } @@ -2213,7 +2271,7 @@ where }; Hash::hash(&TAG_VALID_SPAN, hasher); - Hash::hash(&file.name_hash, hasher); + Hash::hash(&file.stable_id, hasher); // Hash both the length and the end location (line/column) of a span. If we // hash only the length, for example, then two otherwise equal spans with diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs index cb10e6bf2ba..c61dbcaae95 100644 --- a/compiler/rustc_span/src/source_map.rs +++ b/compiler/rustc_span/src/source_map.rs @@ -13,7 +13,6 @@ use crate::*; use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::sync::{IntoDynSyncSend, MappedReadGuard, ReadGuard, RwLock}; use std::fs; -use std::hash::Hash; use std::io::{self, BorrowedBuf, Read}; use std::path::{self}; @@ -152,45 +151,6 @@ impl FileLoader for RealFileLoader { } } -/// This is a [SourceFile] identifier that is used to correlate source files between -/// subsequent compilation sessions (which is something we need to do during -/// incremental compilation). -/// -/// The [StableSourceFileId] also contains the CrateNum of the crate the source -/// file was originally parsed for. This way we get two separate entries in -/// the [SourceMap] if the same file is part of both the local and an upstream -/// crate. Trying to only have one entry for both cases is problematic because -/// at the point where we discover that there's a local use of the file in -/// addition to the upstream one, we might already have made decisions based on -/// the assumption that it's an upstream file. Treating the two files as -/// different has no real downsides. -#[derive(Copy, Clone, PartialEq, Eq, Hash, Encodable, Decodable, Debug)] -pub struct StableSourceFileId { - /// A hash of the source file's [`FileName`]. This is hash so that it's size - /// is more predictable than if we included the actual [`FileName`] value. - pub file_name_hash: Hash64, - - /// The [`CrateNum`] of the crate this source file was originally parsed for. - /// We cannot include this information in the hash because at the time - /// of hashing we don't have the context to map from the [`CrateNum`]'s numeric - /// value to a `StableCrateId`. - pub cnum: CrateNum, -} - -// FIXME: we need a more globally consistent approach to the problem solved by -// StableSourceFileId, perhaps built atop source_file.name_hash. -impl StableSourceFileId { - pub fn new(source_file: &SourceFile) -> StableSourceFileId { - StableSourceFileId::new_from_name(&source_file.name, source_file.cnum) - } - - fn new_from_name(name: &FileName, cnum: CrateNum) -> StableSourceFileId { - let mut hasher = StableHasher::new(); - name.hash(&mut hasher); - StableSourceFileId { file_name_hash: hasher.finish(), cnum } - } -} - // _____________________________________________________________________________ // SourceMap // @@ -320,17 +280,17 @@ impl SourceMap { // be empty, so the working directory will be used. let (filename, _) = self.path_mapping.map_filename_prefix(&filename); - let file_id = StableSourceFileId::new_from_name(&filename, LOCAL_CRATE); - match self.source_file_by_stable_id(file_id) { + let stable_id = StableSourceFileId::from_filename_in_current_crate(&filename); + match self.source_file_by_stable_id(stable_id) { Some(lrc_sf) => Ok(lrc_sf), None => { let source_file = SourceFile::new(filename, src, self.hash_kind)?; // Let's make sure the file_id we generated above actually matches // the ID we generate for the SourceFile we just created. - debug_assert_eq!(StableSourceFileId::new(&source_file), file_id); + debug_assert_eq!(source_file.stable_id, stable_id); - self.register_source_file(file_id, source_file) + self.register_source_file(stable_id, source_file) } } } @@ -343,7 +303,7 @@ impl SourceMap { &self, filename: FileName, src_hash: SourceFileHash, - name_hash: Hash128, + stable_id: StableSourceFileId, source_len: u32, cnum: CrateNum, file_local_lines: FreezeLock<SourceFileLines>, @@ -368,12 +328,11 @@ impl SourceMap { multibyte_chars, non_narrow_chars, normalized_pos, - name_hash, + stable_id, cnum, }; - let file_id = StableSourceFileId::new(&source_file); - self.register_source_file(file_id, source_file) + self.register_source_file(stable_id, source_file) .expect("not enough address space for imported source file") } diff --git a/compiler/rustc_span/src/source_map/tests.rs b/compiler/rustc_span/src/source_map/tests.rs index 113ca493d36..130522a302d 100644 --- a/compiler/rustc_span/src/source_map/tests.rs +++ b/compiler/rustc_span/src/source_map/tests.rs @@ -234,14 +234,14 @@ fn t10() { multibyte_chars, non_narrow_chars, normalized_pos, - name_hash, + stable_id, .. } = (*src_file).clone(); let imported_src_file = sm.new_imported_source_file( name, src_hash, - name_hash, + stable_id, source_len.to_u32(), CrateNum::new(0), FreezeLock::new(lines.read().clone()), |
