about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2024-01-16 21:58:10 +0000
committerbors <bors@rust-lang.org>2024-01-16 21:58:10 +0000
commit098d4fd74c078b12bfc2e9438a2a04bc18b393bc (patch)
tree4f3a771afa0dfa7f119fe68f0f4fd35c2b8094eb
parent92f2e0aa62113a5f31076a9414daca55722556cf (diff)
parent37849643c670f7f0809948986329b04ce5c90405 (diff)
downloadrust-098d4fd74c078b12bfc2e9438a2a04bc18b393bc.tar.gz
rust-098d4fd74c078b12bfc2e9438a2a04bc18b393bc.zip
Auto merge of #119977 - Mark-Simulacrum:defid-cache, r=cjgillot
Cache local DefId-keyed queries without hashing

This caches local DefId-keyed queries using just an IndexVec. This costs ~5% extra max-rss at most but brings significant runtime improvement, up to 13% cycle counts (mean: 4%) on primary benchmarks. It's possible that further tweaks could reduce the memory overhead further but this win seems worth landing despite the increased memory, particularly with regards to eliminating the present set in non-incr or storing it inline (skip list?) with the main data.

We tried applying this scheme to all keys in the [first perf run] but found that it carried a significant memory hit (50%). instructions/cycle counts were also much more mixed, though that may have been due to the lack of the present set optimization (needed for fast iter() calls in incremental scenarios).

Closes https://github.com/rust-lang/rust/issues/45275

[first perf run]: https://perf.rust-lang.org/compare.html?start=30dfb9e046aeb878db04332c74de76e52fb7db10&end=6235575300d8e6e2cc6f449cb9048722ef43f9c7&stat=instructions:u
-rw-r--r--compiler/rustc_middle/src/query/keys.rs3
-rw-r--r--compiler/rustc_query_system/src/query/caches.rs79
-rw-r--r--compiler/rustc_query_system/src/query/mod.rs3
3 files changed, 82 insertions, 3 deletions
diff --git a/compiler/rustc_middle/src/query/keys.rs b/compiler/rustc_middle/src/query/keys.rs
index 945f17d5df2..69d3974184d 100644
--- a/compiler/rustc_middle/src/query/keys.rs
+++ b/compiler/rustc_middle/src/query/keys.rs
@@ -9,6 +9,7 @@ use crate::ty::{self, Ty, TyCtxt};
 use crate::ty::{GenericArg, GenericArgsRef};
 use rustc_hir::def_id::{CrateNum, DefId, LocalDefId, LocalModDefId, ModDefId, LOCAL_CRATE};
 use rustc_hir::hir_id::{HirId, OwnerId};
+use rustc_query_system::query::DefIdCacheSelector;
 use rustc_query_system::query::{DefaultCacheSelector, SingleCacheSelector, VecCacheSelector};
 use rustc_span::symbol::{Ident, Symbol};
 use rustc_span::{Span, DUMMY_SP};
@@ -152,7 +153,7 @@ impl Key for LocalDefId {
 }
 
 impl Key for DefId {
-    type CacheSelector = DefaultCacheSelector<Self>;
+    type CacheSelector = DefIdCacheSelector;
 
     fn default_span(&self, tcx: TyCtxt<'_>) -> Span {
         tcx.def_span(*self)
diff --git a/compiler/rustc_query_system/src/query/caches.rs b/compiler/rustc_query_system/src/query/caches.rs
index 0240f012da0..674a0984ae9 100644
--- a/compiler/rustc_query_system/src/query/caches.rs
+++ b/compiler/rustc_query_system/src/query/caches.rs
@@ -2,8 +2,11 @@ use crate::dep_graph::DepNodeIndex;
 
 use rustc_data_structures::fx::FxHashMap;
 use rustc_data_structures::sharded::{self, Sharded};
-use rustc_data_structures::sync::OnceLock;
+use rustc_data_structures::sync::{Lock, OnceLock};
+use rustc_hir::def_id::LOCAL_CRATE;
 use rustc_index::{Idx, IndexVec};
+use rustc_span::def_id::DefId;
+use rustc_span::def_id::DefIndex;
 use std::fmt::Debug;
 use std::hash::Hash;
 use std::marker::PhantomData;
@@ -148,6 +151,8 @@ where
 
     #[inline(always)]
     fn lookup(&self, key: &K) -> Option<(V, DepNodeIndex)> {
+        // FIXME: lock_shard_by_hash will use high bits which are usually zero in the index() passed
+        // here. This makes sharding essentially useless, always selecting the zero'th shard.
         let lock = self.cache.lock_shard_by_hash(key.index() as u64);
         if let Some(Some(value)) = lock.get(*key) { Some(*value) } else { None }
     }
@@ -168,3 +173,75 @@ where
         }
     }
 }
+
+pub struct DefIdCacheSelector;
+
+impl<'tcx, V: 'tcx> CacheSelector<'tcx, V> for DefIdCacheSelector {
+    type Cache = DefIdCache<V>
+    where
+        V: Copy;
+}
+
+pub struct DefIdCache<V> {
+    /// Stores the local DefIds in a dense map. Local queries are much more often dense, so this is
+    /// a win over hashing query keys at marginal memory cost (~5% at most) compared to FxHashMap.
+    ///
+    /// The second element of the tuple is the set of keys actually present in the IndexVec, used
+    /// for faster iteration in `iter()`.
+    // FIXME: This may want to be sharded, like VecCache. However *how* to shard an IndexVec isn't
+    // super clear; VecCache is effectively not sharded today (see FIXME there). For now just omit
+    // that complexity here.
+    local: Lock<(IndexVec<DefIndex, Option<(V, DepNodeIndex)>>, Vec<DefIndex>)>,
+    foreign: DefaultCache<DefId, V>,
+}
+
+impl<V> Default for DefIdCache<V> {
+    fn default() -> Self {
+        DefIdCache { local: Default::default(), foreign: Default::default() }
+    }
+}
+
+impl<V> QueryCache for DefIdCache<V>
+where
+    V: Copy,
+{
+    type Key = DefId;
+    type Value = V;
+
+    #[inline(always)]
+    fn lookup(&self, key: &DefId) -> Option<(V, DepNodeIndex)> {
+        if key.krate == LOCAL_CRATE {
+            let cache = self.local.lock();
+            cache.0.get(key.index).and_then(|v| *v)
+        } else {
+            self.foreign.lookup(key)
+        }
+    }
+
+    #[inline]
+    fn complete(&self, key: DefId, value: V, index: DepNodeIndex) {
+        if key.krate == LOCAL_CRATE {
+            let mut cache = self.local.lock();
+            let (cache, present) = &mut *cache;
+            let slot = cache.ensure_contains_elem(key.index, Default::default);
+            if slot.is_none() {
+                // FIXME: Only store the present set when running in incremental mode. `iter` is not
+                // used outside of saving caches to disk and self-profile.
+                present.push(key.index);
+            }
+            *slot = Some((value, index));
+        } else {
+            self.foreign.complete(key, value, index)
+        }
+    }
+
+    fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {
+        let guard = self.local.lock();
+        let (cache, present) = &*guard;
+        for &idx in present.iter() {
+            let value = cache[idx].unwrap();
+            f(&DefId { krate: LOCAL_CRATE, index: idx }, &value.0, value.1);
+        }
+        self.foreign.iter(f);
+    }
+}
diff --git a/compiler/rustc_query_system/src/query/mod.rs b/compiler/rustc_query_system/src/query/mod.rs
index 9ff04c4e910..eecbf86c173 100644
--- a/compiler/rustc_query_system/src/query/mod.rs
+++ b/compiler/rustc_query_system/src/query/mod.rs
@@ -10,7 +10,8 @@ pub use self::job::{
 
 mod caches;
 pub use self::caches::{
-    CacheSelector, DefaultCacheSelector, QueryCache, SingleCacheSelector, VecCacheSelector,
+    CacheSelector, DefIdCacheSelector, DefaultCacheSelector, QueryCache, SingleCacheSelector,
+    VecCacheSelector,
 };
 
 mod config;