about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMark Rousskov <mark.simulacrum@gmail.com>2023-12-22 21:42:49 -0500
committerMark Rousskov <mark.simulacrum@gmail.com>2023-12-23 14:10:28 -0500
commit6630d690859d882b2528a39317a701da64fe1203 (patch)
tree46b62bb8bc3550aba198ab860a866c8982ac7fbb
parentedcbcc768a484d52deb315e7c583fe4b2ab4f25b (diff)
downloadrust-6630d690859d882b2528a39317a701da64fe1203.tar.gz
rust-6630d690859d882b2528a39317a701da64fe1203.zip
Specialize DefPathHash table to skip crate IDs
Instead, we store just the local crate hash as a bare u64. On decoding,
we recombine it with the crate's stable crate ID stored separately in
metadata. The end result is that we save ~8 bytes/DefIndex in metadata
size.

One key detail here is that we no longer distinguish in encoded metadata
between present and non-present DefPathHashes. It used to be highly
likely we could distinguish as we used DefPathHash::default(), an
all-zero representation. However in theory even that is fallible as
nothing strictly prevents the StableCrateId from being zero.
-rw-r--r--compiler/rustc_metadata/src/rmeta/decoder.rs14
-rw-r--r--compiler/rustc_metadata/src/rmeta/encoder.rs4
-rw-r--r--compiler/rustc_metadata/src/rmeta/mod.rs7
-rw-r--r--compiler/rustc_metadata/src/rmeta/table.rs23
-rw-r--r--compiler/rustc_span/src/def_id.rs2
5 files changed, 19 insertions, 31 deletions
diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs
index 281a0eafee1..b5e251f3c59 100644
--- a/compiler/rustc_metadata/src/rmeta/decoder.rs
+++ b/compiler/rustc_metadata/src/rmeta/decoder.rs
@@ -6,6 +6,7 @@ use crate::rmeta::*;
 
 use rustc_ast as ast;
 use rustc_data_structures::captures::Captures;
+use rustc_data_structures::fingerprint::Fingerprint;
 use rustc_data_structures::owned_slice::OwnedSlice;
 use rustc_data_structures::sync::{AppendOnlyVec, AtomicBool, Lock, Lrc, OnceLock};
 use rustc_data_structures::unhash::UnhashMap;
@@ -1489,9 +1490,16 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
         index: DefIndex,
         def_path_hashes: &mut FxHashMap<DefIndex, DefPathHash>,
     ) -> DefPathHash {
-        *def_path_hashes
-            .entry(index)
-            .or_insert_with(|| self.root.tables.def_path_hashes.get(self, index))
+        *def_path_hashes.entry(index).or_insert_with(|| {
+            // This is a hack to workaround the fact that we can't easily encode/decode a Hash64
+            // into the FixedSizeEncoding, as Hash64 lacks a Default impl. A future refactor to
+            // relax the Default restriction will likely fix this.
+            let fingerprint = Fingerprint::new(
+                self.root.stable_crate_id.as_u64(),
+                self.root.tables.def_path_hashes.get(self, index),
+            );
+            DefPathHash::new(self.root.stable_crate_id, fingerprint.split().1)
+        })
     }
 
     #[inline]
diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs
index 057fb15ac3b..c8681f647c9 100644
--- a/compiler/rustc_metadata/src/rmeta/encoder.rs
+++ b/compiler/rustc_metadata/src/rmeta/encoder.rs
@@ -467,13 +467,13 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
                 let def_key = self.lazy(table.def_key(def_index));
                 let def_path_hash = table.def_path_hash(def_index);
                 self.tables.def_keys.set_some(def_index, def_key);
-                self.tables.def_path_hashes.set(def_index, def_path_hash);
+                self.tables.def_path_hashes.set(def_index, def_path_hash.local_hash().as_u64());
             }
         } else {
             for (def_index, def_key, def_path_hash) in table.enumerated_keys_and_path_hashes() {
                 let def_key = self.lazy(def_key);
                 self.tables.def_keys.set_some(def_index, def_key);
-                self.tables.def_path_hashes.set(def_index, *def_path_hash);
+                self.tables.def_path_hashes.set(def_index, def_path_hash.local_hash().as_u64());
             }
         }
     }
diff --git a/compiler/rustc_metadata/src/rmeta/mod.rs b/compiler/rustc_metadata/src/rmeta/mod.rs
index 235f0e35cae..905218e2e1b 100644
--- a/compiler/rustc_metadata/src/rmeta/mod.rs
+++ b/compiler/rustc_metadata/src/rmeta/mod.rs
@@ -386,7 +386,12 @@ define_tables! {
     is_type_alias_impl_trait: Table<DefIndex, bool>,
     type_alias_is_lazy: Table<DefIndex, bool>,
     attr_flags: Table<DefIndex, AttrFlags>,
-    def_path_hashes: Table<DefIndex, DefPathHash>,
+    // The u64 is the crate-local part of the DefPathHash. All hashes in this crate have the same
+    // StableCrateId, so we omit encoding those into the table.
+    //
+    // Note also that this table is fully populated (no gaps) as every DefIndex should have a
+    // corresponding DefPathHash.
+    def_path_hashes: Table<DefIndex, u64>,
     explicit_item_bounds: Table<DefIndex, LazyArray<(ty::Clause<'static>, Span)>>,
     inferred_outlives_of: Table<DefIndex, LazyArray<(ty::Clause<'static>, Span)>>,
     inherent_impls: Table<DefIndex, LazyArray<DefIndex>>,
diff --git a/compiler/rustc_metadata/src/rmeta/table.rs b/compiler/rustc_metadata/src/rmeta/table.rs
index 667fc301991..d53ee4836bb 100644
--- a/compiler/rustc_metadata/src/rmeta/table.rs
+++ b/compiler/rustc_metadata/src/rmeta/table.rs
@@ -1,6 +1,5 @@
 use crate::rmeta::*;
 
-use rustc_data_structures::fingerprint::Fingerprint;
 use rustc_hir::def::CtorOf;
 use rustc_index::Idx;
 
@@ -44,12 +43,6 @@ impl<T> IsDefault for LazyArray<T> {
     }
 }
 
-impl IsDefault for DefPathHash {
-    fn is_default(&self) -> bool {
-        self.0 == Fingerprint::ZERO
-    }
-}
-
 impl IsDefault for UnusedGenericParams {
     fn is_default(&self) -> bool {
         // UnusedGenericParams encodes the *un*usedness as a bitset.
@@ -234,22 +227,6 @@ fixed_size_enum! {
     }
 }
 
-// We directly encode `DefPathHash` because a `LazyValue` would incur a 25% cost.
-impl FixedSizeEncoding for DefPathHash {
-    type ByteArray = [u8; 16];
-
-    #[inline]
-    fn from_bytes(b: &[u8; 16]) -> Self {
-        DefPathHash(Fingerprint::from_le_bytes(*b))
-    }
-
-    #[inline]
-    fn write_to_bytes(self, b: &mut [u8; 16]) {
-        debug_assert!(!self.is_default());
-        *b = self.0.to_le_bytes();
-    }
-}
-
 // We directly encode RawDefId because using a `LazyValue` would incur a 50% overhead in the worst case.
 impl FixedSizeEncoding for Option<RawDefId> {
     type ByteArray = [u8; 8];
diff --git a/compiler/rustc_span/src/def_id.rs b/compiler/rustc_span/src/def_id.rs
index b2d51ac6c0d..e397fab5459 100644
--- a/compiler/rustc_span/src/def_id.rs
+++ b/compiler/rustc_span/src/def_id.rs
@@ -114,8 +114,6 @@ impl DefPathHash {
     }
 
     /// Returns the crate-local part of the [DefPathHash].
-    ///
-    /// Used for tests.
     #[inline]
     pub fn local_hash(&self) -> Hash64 {
         self.0.split().1