Auto merge of #107833 - Zoxc:arena-query-clean, r=cjgillot

Factor query arena allocation out from query caches This moves the logic for arena allocation out from the query caches into conditional code in the query system. The specialized arena caches are removed. A new `QuerySystem` type is added in `rustc_middle` which contains the arenas, providers and query caches. Performance seems to be slightly regressed: <table><tr><td rowspan="2">Benchmark</td><td colspan="1"><b>Before</b></th><td colspan="2"><b>After</b></th></tr><tr><td align="right">Time</td><td align="right">Time</td><td align="right">%</th></tr><tr><td>🟣 <b>clap</b>:check</td><td align="right">1.8053s</td><td align="right">1.8109s</td><td align="right"> 0.31%</td></tr><tr><td>🟣 <b>hyper</b>:check</td><td align="right">0.2600s</td><td align="right">0.2597s</td><td align="right"> -0.10%</td></tr><tr><td>🟣 <b>regex</b>:check</td><td align="right">0.9973s</td><td align="right">1.0006s</td><td align="right"> 0.34%</td></tr><tr><td>🟣 <b>syn</b>:check</td><td align="right">1.6048s</td><td align="right">1.6051s</td><td align="right"> 0.02%</td></tr><tr><td>🟣 <b>syntex_syntax</b>:check</td><td align="right">6.2992s</td><td align="right">6.3159s</td><td align="right"> 0.26%</td></tr><tr><td>Total</td><td align="right">10.9664s</td><td align="right">10.9922s</td><td align="right"> 0.23%</td></tr><tr><td>Summary</td><td align="right">1.0000s</td><td align="right">1.0017s</td><td align="right"> 0.17%</td></tr></table> Incremental performance is a bit worse: <table><tr><td rowspan="2">Benchmark</td><td colspan="1"><b>Before</b></th><td colspan="2"><b>After</b></th></tr><tr><td align="right">Time</td><td align="right">Time</td><td align="right">%</th></tr><tr><td>🟣 <b>clap</b>:check:initial</td><td align="right">2.2103s</td><td align="right">2.2247s</td><td align="right"> 0.65%</td></tr><tr><td>🟣 <b>hyper</b>:check:initial</td><td align="right">0.3335s</td><td align="right">0.3349s</td><td align="right"> 0.41%</td></tr><tr><td>🟣 <b>regex</b>:check:initial</td><td align="right">1.2597s</td><td align="right">1.2650s</td><td align="right"> 0.42%</td></tr><tr><td>🟣 <b>syn</b>:check:initial</td><td align="right">2.0521s</td><td align="right">2.0613s</td><td align="right"> 0.45%</td></tr><tr><td>🟣 <b>syntex_syntax</b>:check:initial</td><td align="right">7.8275s</td><td align="right">7.8583s</td><td align="right"> 0.39%</td></tr><tr><td>Total</td><td align="right">13.6832s</td><td align="right">13.7442s</td><td align="right"> 0.45%</td></tr><tr><td>Summary</td><td align="right">1.0000s</td><td align="right">1.0046s</td><td align="right"> 0.46%</td></tr></table> It does seem like LLVM optimizers struggle a bit with the current state of the query system. Based on top of https://github.com/rust-lang/rust/pull/107782 and https://github.com/rust-lang/rust/pull/107802. r? `@cjgillot`
author: bors <bors@rust-lang.org> 2023-02-16 22:10:10 +0000
committer: bors <bors@rust-lang.org> 2023-02-16 22:10:10 +0000
commit: 947b696ce0ce42c98b8fb82ffa0735ade051466c (patch)
tree: 35768557e506f47a11e729260131ab417143b1c7 /compiler/rustc_query_system/src
parent: 9a7cc6c32f1a690f86827e4724bcda85e506ef35 (diff)
parent: caf29b27277fd4fca4b372938e2fecf12dd12c78 (diff)
download: rust-947b696ce0ce42c98b8fb82ffa0735ade051466c.tar.gz
rust-947b696ce0ce42c98b8fb82ffa0735ade051466c.zip
3 files changed, 30 insertions, 198 deletions
diff --git a/compiler/rustc_query_system/src/query/caches.rs b/compiler/rustc_query_system/src/query/caches.rs
index 81c7e4673d4..e840108bdd8 100644
--- a/compiler/rustc_query_system/src/query/caches.rs
+++ b/compiler/rustc_query_system/src/query/caches.rs
@@ -1,12 +1,10 @@
 use crate::dep_graph::DepNodeIndex;
 
-use rustc_arena::TypedArena;
 use rustc_data_structures::fx::FxHashMap;
 use rustc_data_structures::sharded;
 #[cfg(parallel_compiler)]
 use rustc_data_structures::sharded::Sharded;
 use rustc_data_structures::sync::Lock;
-use rustc_data_structures::sync::WorkerLocal;
 use rustc_index::vec::{Idx, IndexVec};
 use std::fmt::Debug;
 use std::hash::Hash;
@@ -16,12 +14,10 @@ pub trait CacheSelector<'tcx, V> {
     type Cache
     where
         V: Copy;
-    type ArenaCache;
 }
 
 pub trait QueryStorage {
-    type Value: Debug;
-    type Stored: Copy;
+    type Value: Copy;
 }
 
 pub trait QueryCache: QueryStorage + Sized {
@@ -31,9 +27,9 @@ pub trait QueryCache: QueryStorage + Sized {
     /// It returns the shard index and a lock guard to the shard,
     /// which will be used if the query is not in the cache and we need
     /// to compute it.
-    fn lookup(&self, key: &Self::Key) -> Option<(Self::Stored, DepNodeIndex)>;
+    fn lookup(&self, key: &Self::Key) -> Option<(Self::Value, DepNodeIndex)>;
 
-    fn complete(&self, key: Self::Key, value: Self::Value, index: DepNodeIndex) -> Self::Stored;
+    fn complete(&self, key: Self::Key, value: Self::Value, index: DepNodeIndex);
 
     fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex));
 }
@@ -44,7 +40,6 @@ impl<'tcx, K: Eq + Hash, V: 'tcx> CacheSelector<'tcx, V> for DefaultCacheSelecto
     type Cache = DefaultCache<K, V>
     where
         V: Copy;
-    type ArenaCache = ArenaCache<'tcx, K, V>;
 }
 
 pub struct DefaultCache<K, V> {
@@ -62,7 +57,6 @@ impl<K, V> Default for DefaultCache<K, V> {
 
 impl<K: Eq + Hash, V: Copy + Debug> QueryStorage for DefaultCache<K, V> {
     type Value = V;
-    type Stored = V;
 }
 
 impl<K, V> QueryCache for DefaultCache<K, V>
@@ -85,7 +79,7 @@ where
     }
 
     #[inline]
-    fn complete(&self, key: K, value: V, index: DepNodeIndex) -> Self::Stored {
+    fn complete(&self, key: K, value: V, index: DepNodeIndex) {
         #[cfg(parallel_compiler)]
         let mut lock = self.cache.get_shard_by_value(&key).lock();
         #[cfg(not(parallel_compiler))]
@@ -93,7 +87,6 @@ where
         // We may be overwriting another value. This is all right, since the dep-graph
         // will check that the fingerprint matches.
         lock.insert(key, (value, index));
-        value
     }
 
     fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {
@@ -122,7 +115,6 @@ impl<'tcx, V: 'tcx> CacheSelector<'tcx, V> for SingleCacheSelector {
     type Cache = SingleCache<V>
     where
         V: Copy;
-    type ArenaCache = ArenaCache<'tcx, (), V>;
 }
 
 pub struct SingleCache<V> {
@@ -137,7 +129,6 @@ impl<V> Default for SingleCache<V> {
 
 impl<V: Copy + Debug> QueryStorage for SingleCache<V> {
     type Value = V;
-    type Stored = V;
 }
 
 impl<V> QueryCache for SingleCache<V>
@@ -152,9 +143,8 @@ where
     }
 
     #[inline]
-    fn complete(&self, _key: (), value: V, index: DepNodeIndex) -> Self::Stored {
+    fn complete(&self, _key: (), value: V, index: DepNodeIndex) {
         *self.cache.lock() = Some((value, index));
-        value
     }
 
     fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {
@@ -162,85 +152,12 @@ where
     }
 }
 
-pub struct ArenaCache<'tcx, K, V> {
-    arena: WorkerLocal<TypedArena<(V, DepNodeIndex)>>,
-    #[cfg(parallel_compiler)]
-    cache: Sharded<FxHashMap<K, &'tcx (V, DepNodeIndex)>>,
-    #[cfg(not(parallel_compiler))]
-    cache: Lock<FxHashMap<K, &'tcx (V, DepNodeIndex)>>,
-}
-
-impl<'tcx, K, V> Default for ArenaCache<'tcx, K, V> {
-    fn default() -> Self {
-        ArenaCache { arena: WorkerLocal::new(|_| TypedArena::default()), cache: Default::default() }
-    }
-}
-
-impl<'tcx, K: Eq + Hash, V: Debug + 'tcx> QueryStorage for ArenaCache<'tcx, K, V> {
-    type Value = V;
-    type Stored = &'tcx V;
-}
-
-impl<'tcx, K, V: 'tcx> QueryCache for ArenaCache<'tcx, K, V>
-where
-    K: Eq + Hash + Clone + Debug,
-    V: Debug,
-{
-    type Key = K;
-
-    #[inline(always)]
-    fn lookup(&self, key: &K) -> Option<(&'tcx V, DepNodeIndex)> {
-        let key_hash = sharded::make_hash(key);
-        #[cfg(parallel_compiler)]
-        let lock = self.cache.get_shard_by_hash(key_hash).lock();
-        #[cfg(not(parallel_compiler))]
-        let lock = self.cache.lock();
-        let result = lock.raw_entry().from_key_hashed_nocheck(key_hash, key);
-
-        if let Some((_, value)) = result { Some((&value.0, value.1)) } else { None }
-    }
-
-    #[inline]
-    fn complete(&self, key: K, value: V, index: DepNodeIndex) -> Self::Stored {
-        let value = self.arena.alloc((value, index));
-        let value = unsafe { &*(value as *const _) };
-        #[cfg(parallel_compiler)]
-        let mut lock = self.cache.get_shard_by_value(&key).lock();
-        #[cfg(not(parallel_compiler))]
-        let mut lock = self.cache.lock();
-        // We may be overwriting another value. This is all right, since the dep-graph
-        // will check that the fingerprint matches.
-        lock.insert(key, value);
-        &value.0
-    }
-
-    fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {
-        #[cfg(parallel_compiler)]
-        {
-            let shards = self.cache.lock_shards();
-            for shard in shards.iter() {
-                for (k, v) in shard.iter() {
-                    f(k, &v.0, v.1);
-                }
-            }
-        }
-        #[cfg(not(parallel_compiler))]
-        {
-            let map = self.cache.lock();
-            for (k, v) in map.iter() {
-                f(k, &v.0, v.1);
-            }
-        }
-    }
-}
-
 pub struct VecCacheSelector<K>(PhantomData<K>);
 
 impl<'tcx, K: Idx, V: 'tcx> CacheSelector<'tcx, V> for VecCacheSelector<K> {
     type Cache = VecCache<K, V>
     where
         V: Copy;
-    type ArenaCache = VecArenaCache<'tcx, K, V>;
 }
 
 pub struct VecCache<K: Idx, V> {
@@ -258,7 +175,6 @@ impl<K: Idx, V> Default for VecCache<K, V> {
 
 impl<K: Eq + Idx, V: Copy + Debug> QueryStorage for VecCache<K, V> {
     type Value = V;
-    type Stored = V;
 }
 
 impl<K, V> QueryCache for VecCache<K, V>
@@ -278,87 +194,12 @@ where
     }
 
     #[inline]
-    fn complete(&self, key: K, value: V, index: DepNodeIndex) -> Self::Stored {
+    fn complete(&self, key: K, value: V, index: DepNodeIndex) {
         #[cfg(parallel_compiler)]
         let mut lock = self.cache.get_shard_by_hash(key.index() as u64).lock();
         #[cfg(not(parallel_compiler))]
         let mut lock = self.cache.lock();
         lock.insert(key, (value, index));
-        value
-    }
-
-    fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {
-        #[cfg(parallel_compiler)]
-        {
-            let shards = self.cache.lock_shards();
-            for shard in shards.iter() {
-                for (k, v) in shard.iter_enumerated() {
-                    if let Some(v) = v {
-                        f(&k, &v.0, v.1);
-                    }
-                }
-            }
-        }
-        #[cfg(not(parallel_compiler))]
-        {
-            let map = self.cache.lock();
-            for (k, v) in map.iter_enumerated() {
-                if let Some(v) = v {
-                    f(&k, &v.0, v.1);
-                }
-            }
-        }
-    }
-}
-
-pub struct VecArenaCache<'tcx, K: Idx, V> {
-    arena: WorkerLocal<TypedArena<(V, DepNodeIndex)>>,
-    #[cfg(parallel_compiler)]
-    cache: Sharded<IndexVec<K, Option<&'tcx (V, DepNodeIndex)>>>,
-    #[cfg(not(parallel_compiler))]
-    cache: Lock<IndexVec<K, Option<&'tcx (V, DepNodeIndex)>>>,
-}
-
-impl<'tcx, K: Idx, V> Default for VecArenaCache<'tcx, K, V> {
-    fn default() -> Self {
-        VecArenaCache {
-            arena: WorkerLocal::new(|_| TypedArena::default()),
-            cache: Default::default(),
-        }
-    }
-}
-
-impl<'tcx, K: Eq + Idx, V: Debug + 'tcx> QueryStorage for VecArenaCache<'tcx, K, V> {
-    type Value = V;
-    type Stored = &'tcx V;
-}
-
-impl<'tcx, K, V: 'tcx> QueryCache for VecArenaCache<'tcx, K, V>
-where
-    K: Eq + Idx + Clone + Debug,
-    V: Debug,
-{
-    type Key = K;
-
-    #[inline(always)]
-    fn lookup(&self, key: &K) -> Option<(&'tcx V, DepNodeIndex)> {
-        #[cfg(parallel_compiler)]
-        let lock = self.cache.get_shard_by_hash(key.index() as u64).lock();
-        #[cfg(not(parallel_compiler))]
-        let lock = self.cache.lock();
-        if let Some(Some(value)) = lock.get(*key) { Some((&value.0, value.1)) } else { None }
-    }
-
-    #[inline]
-    fn complete(&self, key: K, value: V, index: DepNodeIndex) -> Self::Stored {
-        let value = self.arena.alloc((value, index));
-        let value = unsafe { &*(value as *const _) };
-        #[cfg(parallel_compiler)]
-        let mut lock = self.cache.get_shard_by_hash(key.index() as u64).lock();
-        #[cfg(not(parallel_compiler))]
-        let mut lock = self.cache.lock();
-        lock.insert(key, value);
-        &value.0
     }
 
     fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {
diff --git a/compiler/rustc_query_system/src/query/config.rs b/compiler/rustc_query_system/src/query/config.rs
index a28e45a5c08..56247e827a2 100644
--- a/compiler/rustc_query_system/src/query/config.rs
+++ b/compiler/rustc_query_system/src/query/config.rs
@@ -20,10 +20,9 @@ pub trait QueryConfig<Qcx: QueryContext> {
     const NAME: &'static str;
 
     type Key: DepNodeParams<Qcx::DepContext> + Eq + Hash + Clone + Debug;
-    type Value: Debug;
-    type Stored: Debug + Copy + std::borrow::Borrow<Self::Value>;
+    type Value: Debug + Copy;
 
-    type Cache: QueryCache<Key = Self::Key, Stored = Self::Stored, Value = Self::Value>;
+    type Cache: QueryCache<Key = Self::Key, Value = Self::Value>;
 
     // Don't use this method to access query results, instead use the methods on TyCtxt
     fn query_state<'a>(tcx: Qcx) -> &'a QueryState<Self::Key, Qcx::DepKind>
@@ -38,9 +37,9 @@ pub trait QueryConfig<Qcx: QueryContext> {
     fn cache_on_disk(tcx: Qcx::DepContext, key: &Self::Key) -> bool;
 
     // Don't use this method to compute query results, instead use the methods on TyCtxt
-    fn execute_query(tcx: Qcx::DepContext, k: Self::Key) -> Self::Stored;
+    fn execute_query(tcx: Qcx::DepContext, k: Self::Key) -> Self::Value;
 
-    fn compute(tcx: Qcx, key: &Self::Key) -> fn(Qcx::DepContext, Self::Key) -> Self::Value;
+    fn compute(tcx: Qcx, key: Self::Key) -> Self::Value;
 
     fn try_load_from_disk(qcx: Qcx, idx: &Self::Key) -> TryLoadFromDisk<Qcx, Self>;
 
diff --git a/compiler/rustc_query_system/src/query/plumbing.rs b/compiler/rustc_query_system/src/query/plumbing.rs
index ed66d1929c5..57217fb681a 100644
--- a/compiler/rustc_query_system/src/query/plumbing.rs
+++ b/compiler/rustc_query_system/src/query/plumbing.rs
@@ -246,7 +246,7 @@ where
 
     /// Completes the query by updating the query cache with the `result`,
     /// signals the waiter and forgets the JobOwner, so it won't poison the query
-    fn complete<C>(self, cache: &C, result: C::Value, dep_node_index: DepNodeIndex) -> C::Stored
+    fn complete<C>(self, cache: &C, result: C::Value, dep_node_index: DepNodeIndex)
     where
         C: QueryCache<Key = K>,
     {
@@ -257,23 +257,19 @@ where
         // Forget ourself so our destructor won't poison the query
         mem::forget(self);
 
-        let (job, result) = {
-            let job = {
-                #[cfg(parallel_compiler)]
-                let mut lock = state.active.get_shard_by_value(&key).lock();
-                #[cfg(not(parallel_compiler))]
-                let mut lock = state.active.lock();
-                match lock.remove(&key).unwrap() {
-                    QueryResult::Started(job) => job,
-                    QueryResult::Poisoned => panic!(),
-                }
-            };
-            let result = cache.complete(key, result, dep_node_index);
-            (job, result)
+        let job = {
+            #[cfg(parallel_compiler)]
+            let mut lock = state.active.get_shard_by_value(&key).lock();
+            #[cfg(not(parallel_compiler))]
+            let mut lock = state.active.lock();
+            match lock.remove(&key).unwrap() {
+                QueryResult::Started(job) => job,
+                QueryResult::Poisoned => panic!(),
+            }
         };
+        cache.complete(key, result, dep_node_index);
 
         job.signal_complete();
-        result
     }
 }
 
@@ -336,7 +332,7 @@ where
 /// which will be used if the query is not in the cache and we need
 /// to compute it.
 #[inline]
-pub fn try_get_cached<Tcx, C>(tcx: Tcx, cache: &C, key: &C::Key) -> Option<C::Stored>
+pub fn try_get_cached<Tcx, C>(tcx: Tcx, cache: &C, key: &C::Key) -> Option<C::Value>
 where
     C: QueryCache,
     Tcx: DepContext,
@@ -358,7 +354,7 @@ fn try_execute_query<Q, Qcx>(
     span: Span,
     key: Q::Key,
     dep_node: Option<DepNode<Qcx::DepKind>>,
-) -> (Q::Stored, Option<DepNodeIndex>)
+) -> (Q::Value, Option<DepNodeIndex>)
 where
     Q: QueryConfig<Qcx>,
     Qcx: QueryContext,
@@ -390,7 +386,7 @@ where
                     );
                 }
             }
-            let result = job.complete(cache, result, dep_node_index);
+            job.complete(cache, result, dep_node_index);
             (result, Some(dep_node_index))
         }
         TryGetJob::Cycle(error) => {
@@ -426,9 +422,7 @@ where
     // Fast path for when incr. comp. is off.
     if !dep_graph.is_fully_enabled() {
         let prof_timer = qcx.dep_context().profiler().query_provider();
-        let result = qcx.start_query(job_id, Q::DEPTH_LIMIT, None, || {
-            Q::compute(qcx, &key)(*qcx.dep_context(), key)
-        });
+        let result = qcx.start_query(job_id, Q::DEPTH_LIMIT, None, || Q::compute(qcx, key));
         let dep_node_index = dep_graph.next_virtual_depnode_index();
         prof_timer.finish_with_query_invocation_id(dep_node_index.into());
         return (result, dep_node_index);
@@ -454,17 +448,15 @@ where
     let (result, dep_node_index) =
         qcx.start_query(job_id, Q::DEPTH_LIMIT, Some(&diagnostics), || {
             if Q::ANON {
-                return dep_graph.with_anon_task(*qcx.dep_context(), Q::DEP_KIND, || {
-                    Q::compute(qcx, &key)(*qcx.dep_context(), key)
-                });
+                return dep_graph
+                    .with_anon_task(*qcx.dep_context(), Q::DEP_KIND, || Q::compute(qcx, key));
             }
 
             // `to_dep_node` is expensive for some `DepKind`s.
             let dep_node =
                 dep_node_opt.unwrap_or_else(|| Q::construct_dep_node(*qcx.dep_context(), &key));
 
-            let task = Q::compute(qcx, &key);
-            dep_graph.with_task(dep_node, *qcx.dep_context(), key, task, Q::HASH_RESULT)
+            dep_graph.with_task(dep_node, qcx, key, Q::compute, Q::HASH_RESULT)
         });
 
     prof_timer.finish_with_query_invocation_id(dep_node_index.into());
@@ -555,7 +547,7 @@ where
     let prof_timer = qcx.dep_context().profiler().query_provider();
 
     // The dep-graph for this computation is already in-place.
-    let result = dep_graph.with_ignore(|| Q::compute(qcx, key)(*qcx.dep_context(), key.clone()));
+    let result = dep_graph.with_ignore(|| Q::compute(qcx, key.clone()));
 
     prof_timer.finish_with_query_invocation_id(dep_node_index.into());
 
@@ -727,7 +719,7 @@ pub enum QueryMode {
     Ensure,
 }
 
-pub fn get_query<Q, Qcx, D>(qcx: Qcx, span: Span, key: Q::Key, mode: QueryMode) -> Option<Q::Stored>
+pub fn get_query<Q, Qcx, D>(qcx: Qcx, span: Span, key: Q::Key, mode: QueryMode) -> Option<Q::Value>
 where
     D: DepKind,
     Q: QueryConfig<Qcx>,
author	bors <bors@rust-lang.org>	2023-02-16 22:10:10 +0000
committer	bors <bors@rust-lang.org>	2023-02-16 22:10:10 +0000
commit	947b696ce0ce42c98b8fb82ffa0735ade051466c (patch)
tree	35768557e506f47a11e729260131ab417143b1c7 /compiler/rustc_query_system/src
parent	9a7cc6c32f1a690f86827e4724bcda85e506ef35 (diff)
parent	caf29b27277fd4fca4b372938e2fecf12dd12c78 (diff)
download	rust-947b696ce0ce42c98b8fb82ffa0735ade051466c.tar.gz rust-947b696ce0ce42c98b8fb82ffa0735ade051466c.zip