From 960893c50a61e6eec6635d8f18b586e3ce0865d9 Mon Sep 17 00:00:00 2001 From: Michael Woerister Date: Tue, 20 Jul 2021 13:59:12 +0200 Subject: Store DefPathHash->DefIndex map in on-disk-hash-table format in crate metadata. This encoding allows for random access without an expensive upfront decoding state which in turn allows simplifying the DefPathIndex lookup logic without regressing performance. --- compiler/rustc_metadata/src/creader.rs | 2 +- compiler/rustc_metadata/src/rmeta/decoder.rs | 75 ++++++---------------- .../src/rmeta/decoder/cstore_impl.rs | 16 ++--- .../rustc_metadata/src/rmeta/def_path_hash_map.rs | 60 +++++++++++++++++ compiler/rustc_metadata/src/rmeta/encoder.rs | 13 ++++ compiler/rustc_metadata/src/rmeta/mod.rs | 4 ++ 6 files changed, 104 insertions(+), 66 deletions(-) create mode 100644 compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs (limited to 'compiler/rustc_metadata') diff --git a/compiler/rustc_metadata/src/creader.rs b/compiler/rustc_metadata/src/creader.rs index 99030febb73..48d8cdf57dc 100644 --- a/compiler/rustc_metadata/src/creader.rs +++ b/compiler/rustc_metadata/src/creader.rs @@ -45,7 +45,7 @@ pub struct CStore { /// This map is used to verify we get no hash conflicts between /// `StableCrateId` values. - stable_crate_ids: FxHashMap, + pub(crate) stable_crate_ids: FxHashMap, /// Unused externs of the crate unused_externs: Vec, diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs index 06b3ef2add2..d925e3102f2 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder.rs @@ -95,10 +95,8 @@ crate struct CrateMetadata { raw_proc_macros: Option<&'static [ProcMacro]>, /// Source maps for code from the crate. source_map_import_info: OnceCell>, - /// For every definition in this crate, maps its `DefPathHash` to its - /// `DefIndex`. See `raw_def_id_to_def_id` for more details about how - /// this is used. - def_path_hash_map: OnceCell>, + /// For every definition in this crate, maps its `DefPathHash` to its `DefIndex`. + def_path_hash_map: DefPathHashMap<'static>, /// Likewise for ExpnHash. expn_hash_map: OnceCell>, /// Used for decoding interpret::AllocIds in a cached & thread-safe manner. @@ -320,6 +318,11 @@ impl<'a, 'tcx> DecodeContext<'a, 'tcx> { self.lazy_state = LazyState::Previous(NonZeroUsize::new(position + min_size).unwrap()); Ok(Lazy::from_position_and_meta(NonZeroUsize::new(position).unwrap(), meta)) } + + #[inline] + pub fn read_raw_bytes(&mut self, len: usize) -> &'a [u8] { + self.opaque.read_raw_bytes(len) + } } impl<'a, 'tcx> TyDecoder<'tcx> for DecodeContext<'a, 'tcx> { @@ -1596,58 +1599,6 @@ impl<'a, 'tcx> CrateMetadataRef<'a> { .or_insert_with(|| self.root.tables.def_keys.get(self, index).unwrap().decode(self)) } - /// Finds the corresponding `DefId` for the provided `DefPathHash`, if it exists. - /// This is used by incremental compilation to map a serialized `DefPathHash` to - /// its `DefId` in the current session. - /// Normally, only one 'main' crate will change between incremental compilation sessions: - /// all dependencies will be completely unchanged. In this case, we can avoid - /// decoding every `DefPathHash` in the crate, since the `DefIndex` from the previous - /// session will still be valid. If our 'guess' is wrong (the `DefIndex` no longer exists, - /// or has a different `DefPathHash`, then we need to decode all `DefPathHashes` to determine - /// the correct mapping). - fn def_path_hash_to_def_id( - &self, - krate: CrateNum, - index_guess: u32, - hash: DefPathHash, - ) -> Option { - let def_index_guess = DefIndex::from_u32(index_guess); - let old_hash = self - .root - .tables - .def_path_hashes - .get(self, def_index_guess) - .map(|lazy| lazy.decode(self)); - - // Fast path: the definition and its index is unchanged from the - // previous compilation session. There is no need to decode anything - // else - if old_hash == Some(hash) { - return Some(DefId { krate, index: def_index_guess }); - } - - let is_proc_macro = self.is_proc_macro_crate(); - - // Slow path: We need to find out the new `DefIndex` of the provided - // `DefPathHash`, if its still exists. This requires decoding every `DefPathHash` - // stored in this crate. - let map = self.cdata.def_path_hash_map.get_or_init(|| { - let end_id = self.root.tables.def_path_hashes.size() as u32; - let mut map = UnhashMap::with_capacity_and_hasher(end_id as usize, Default::default()); - for i in 0..end_id { - let def_index = DefIndex::from_u32(i); - // There may be gaps in the encoded table if we're decoding a proc-macro crate - if let Some(hash) = self.root.tables.def_path_hashes.get(self, def_index) { - map.insert(hash.decode(self), def_index); - } else if !is_proc_macro { - panic!("Missing def_path_hashes entry for {:?}", def_index); - } - } - map - }); - map.get(&hash).map(|index| DefId { krate, index: *index }) - } - // Returns the path leading to the thing with this `id`. fn def_path(&self, id: DefIndex) -> DefPath { debug!("def_path(cnum={:?}, id={:?})", self.cnum, id); @@ -1670,6 +1621,11 @@ impl<'a, 'tcx> CrateMetadataRef<'a> { self.def_path_hash_unlocked(index, &mut def_path_hashes) } + #[inline] + fn def_path_hash_to_def_index(&self, hash: DefPathHash) -> Option { + self.def_path_hash_map.def_path_hash_to_def_index(&hash) + } + fn expn_hash_to_expn_id(&self, index_guess: u32, hash: ExpnHash) -> ExpnId { debug_assert_eq!(ExpnId::from_hash(hash), None); let index_guess = ExpnIndex::from_u32(index_guess); @@ -1936,13 +1892,18 @@ impl CrateMetadata { let alloc_decoding_state = AllocDecodingState::new(root.interpret_alloc_index.decode(&blob).collect()); let dependencies = Lock::new(cnum_map.iter().cloned().collect()); + + // Pre-decode the DefPathHash->DefIndex table. This is a cheap operation + // that does not copy any data. It just does some data verification. + let def_path_hash_map = root.def_path_hash_map.decode(&blob); + CrateMetadata { blob, root, trait_impls, raw_proc_macros, source_map_import_info: OnceCell::new(), - def_path_hash_map: Default::default(), + def_path_hash_map, expn_hash_map: Default::default(), alloc_decoding_state, cnum, diff --git a/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs b/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs index a01eaf68f01..4885de103a0 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs @@ -498,6 +498,10 @@ impl CrateStore for CStore { self.get_crate_data(cnum).root.stable_crate_id } + fn stable_crate_id_to_crate_num(&self, stable_crate_id: StableCrateId) -> CrateNum { + self.stable_crate_ids[&stable_crate_id] + } + /// Returns the `DefKey` for a given `DefId`. This indicates the /// parent `DefId` as well as some idea of what kind of data the /// `DefId` refers to. @@ -513,14 +517,10 @@ impl CrateStore for CStore { self.get_crate_data(def.krate).def_path_hash(def.index) } - // See `CrateMetadataRef::def_path_hash_to_def_id` for more details - fn def_path_hash_to_def_id( - &self, - cnum: CrateNum, - index_guess: u32, - hash: DefPathHash, - ) -> Option { - self.get_crate_data(cnum).def_path_hash_to_def_id(cnum, index_guess, hash) + fn def_path_hash_to_def_id(&self, cnum: CrateNum, hash: DefPathHash) -> Option { + self.get_crate_data(cnum) + .def_path_hash_to_def_index(hash) + .map(|index| DefId { krate: cnum, index }) } fn expn_hash_to_expn_id(&self, cnum: CrateNum, index_guess: u32, hash: ExpnHash) -> ExpnId { diff --git a/compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs b/compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs new file mode 100644 index 00000000000..3f500fce334 --- /dev/null +++ b/compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs @@ -0,0 +1,60 @@ +use crate::rmeta::DecodeContext; +use crate::rmeta::EncodeContext; +use crate::rmeta::MetadataBlob; +use rustc_data_structures::owning_ref::OwningRef; +use rustc_hir::def_path_hash_map::{ + Config as HashMapConfig, DefPathHashMap as DefPathHashMapInner, +}; +use rustc_serialize::{opaque, Decodable, Decoder, Encodable, Encoder}; +use rustc_span::def_id::{DefIndex, DefPathHash}; + +crate enum DefPathHashMap<'tcx> { + OwnedFromMetadata(odht::HashTable>), + BorrowedFromTcx(&'tcx DefPathHashMapInner), +} + +impl DefPathHashMap<'tcx> { + #[inline] + pub fn def_path_hash_to_def_index(&self, def_path_hash: &DefPathHash) -> Option { + match *self { + DefPathHashMap::OwnedFromMetadata(ref map) => map.get(def_path_hash), + DefPathHashMap::BorrowedFromTcx(_) => { + panic!("DefPathHashMap::BorrowedFromTcx variant only exists for serialization") + } + } + } +} + +impl<'a, 'tcx> Encodable> for DefPathHashMap<'tcx> { + fn encode(&self, e: &mut EncodeContext<'a, 'tcx>) -> opaque::EncodeResult { + match *self { + DefPathHashMap::BorrowedFromTcx(def_path_hash_map) => { + let bytes = def_path_hash_map.raw_bytes(); + e.emit_usize(bytes.len())?; + e.emit_raw_bytes(bytes) + } + DefPathHashMap::OwnedFromMetadata(_) => { + panic!("DefPathHashMap::OwnedFromMetadata variant only exists for deserialization") + } + } + } +} + +impl<'a, 'tcx> Decodable> for DefPathHashMap<'static> { + fn decode(d: &mut DecodeContext<'a, 'tcx>) -> Result, String> { + // Import TyDecoder so we can access the DecodeContext::position() method + use crate::rustc_middle::ty::codec::TyDecoder; + + let len = d.read_usize()?; + let pos = d.position(); + let o = OwningRef::new(d.blob().clone()).map(|x| &x[pos..pos + len]); + + // Although we already have the data we need via the OwningRef, we still need + // to advance the DecodeContext's position so it's in a valid state after + // the method. We use read_raw_bytes() for that. + let _ = d.read_raw_bytes(len); + + let inner = odht::HashTable::from_raw_bytes(o).map_err(|e| format!("{}", e))?; + Ok(DefPathHashMap::OwnedFromMetadata(inner)) + } +} diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index b0d22037f21..5a901e33df9 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -1,3 +1,4 @@ +use crate::rmeta::def_path_hash_map::DefPathHashMap; use crate::rmeta::table::{FixedSizeEncoding, TableBuilder}; use crate::rmeta::*; @@ -472,6 +473,12 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { } } + fn encode_def_path_hash_map(&mut self) -> Lazy> { + self.lazy(DefPathHashMap::BorrowedFromTcx( + self.tcx.resolutions(()).definitions.def_path_hash_to_def_index_map(), + )) + } + fn encode_source_map(&mut self) -> Lazy<[rustc_span::SourceFile]> { let source_map = self.tcx.sess.source_map(); let all_source_files = source_map.files(); @@ -675,6 +682,10 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { let (syntax_contexts, expn_data, expn_hashes) = self.encode_hygiene(); let hygiene_bytes = self.position() - i; + i = self.position(); + let def_path_hash_map = self.encode_def_path_hash_map(); + let def_path_hash_map_bytes = self.position() - i; + // Encode source_map. This needs to be done last, // since encoding `Span`s tells us which `SourceFiles` we actually // need to encode. @@ -722,6 +733,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { syntax_contexts, expn_data, expn_hashes, + def_path_hash_map, }); let total_bytes = self.position(); @@ -744,6 +756,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { eprintln!(" impl bytes: {}", impl_bytes); eprintln!(" exp. symbols bytes: {}", exported_symbols_bytes); eprintln!(" def-path table bytes: {}", def_path_table_bytes); + eprintln!(" def-path hashes bytes: {}", def_path_hash_map_bytes); eprintln!(" proc-macro-data-bytes: {}", proc_macro_data_bytes); eprintln!(" mir bytes: {}", mir_bytes); eprintln!(" item bytes: {}", item_bytes); diff --git a/compiler/rustc_metadata/src/rmeta/mod.rs b/compiler/rustc_metadata/src/rmeta/mod.rs index 1f307f3fdee..d47537b86ab 100644 --- a/compiler/rustc_metadata/src/rmeta/mod.rs +++ b/compiler/rustc_metadata/src/rmeta/mod.rs @@ -1,4 +1,5 @@ use decoder::Metadata; +use def_path_hash_map::DefPathHashMap; use table::{Table, TableBuilder}; use rustc_ast::{self as ast, MacroDef}; @@ -35,6 +36,7 @@ use encoder::EncodeContext; use rustc_span::hygiene::SyntaxContextData; mod decoder; +mod def_path_hash_map; mod encoder; mod table; @@ -231,6 +233,8 @@ crate struct CrateRoot<'tcx> { expn_data: ExpnDataTable, expn_hashes: ExpnHashTable, + def_path_hash_map: Lazy>, + source_map: Lazy<[rustc_span::SourceFile]>, compiler_builtins: bool, -- cgit 1.4.1-3-g733a5