diff options
Diffstat (limited to 'compiler/rustc_query_system/src/dep_graph')
| -rw-r--r-- | compiler/rustc_query_system/src/dep_graph/graph.rs | 143 | ||||
| -rw-r--r-- | compiler/rustc_query_system/src/dep_graph/mod.rs | 4 | ||||
| -rw-r--r-- | compiler/rustc_query_system/src/dep_graph/serialized.rs | 17 |
3 files changed, 115 insertions, 49 deletions
diff --git a/compiler/rustc_query_system/src/dep_graph/graph.rs b/compiler/rustc_query_system/src/dep_graph/graph.rs index bfd8b320715..de5bbacf274 100644 --- a/compiler/rustc_query_system/src/dep_graph/graph.rs +++ b/compiler/rustc_query_system/src/dep_graph/graph.rs @@ -7,7 +7,8 @@ use std::sync::atomic::{AtomicU32, Ordering}; use rustc_data_structures::fingerprint::{Fingerprint, PackedFingerprint}; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; -use rustc_data_structures::profiling::{QueryInvocationId, SelfProfilerRef}; +use rustc_data_structures::outline; +use rustc_data_structures::profiling::QueryInvocationId; use rustc_data_structures::sharded::{self, ShardedHashMap}; use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; use rustc_data_structures::sync::{AtomicU64, Lock}; @@ -16,6 +17,7 @@ use rustc_errors::DiagInner; use rustc_index::IndexVec; use rustc_macros::{Decodable, Encodable}; use rustc_serialize::opaque::{FileEncodeResult, FileEncoder}; +use rustc_session::Session; use tracing::{debug, instrument}; #[cfg(debug_assertions)] use {super::debug::EdgeFilter, std::env}; @@ -117,7 +119,7 @@ where impl<D: Deps> DepGraph<D> { pub fn new( - profiler: &SelfProfilerRef, + session: &Session, prev_graph: Arc<SerializedDepGraph>, prev_work_products: WorkProductMap, encoder: FileEncoder, @@ -127,7 +129,7 @@ impl<D: Deps> DepGraph<D> { let prev_graph_node_count = prev_graph.node_count(); let current = CurrentDepGraph::new( - profiler, + session, prev_graph_node_count, encoder, record_graph, @@ -138,7 +140,7 @@ impl<D: Deps> DepGraph<D> { let colors = DepNodeColorMap::new(prev_graph_node_count); // Instantiate a dependy-less node only once for anonymous queries. - let _green_node_index = current.intern_new_node( + let _green_node_index = current.alloc_new_node( DepNode { kind: D::DEP_KIND_NULL, hash: current.anon_id_seed.into() }, EdgesVec::new(), Fingerprint::ZERO, @@ -351,12 +353,13 @@ impl<D: Deps> DepGraphData<D> { // in `DepGraph::try_mark_green()`. // 2. Two distinct query keys get mapped to the same `DepNode` // (see for example #48923). - assert!( - !self.dep_node_exists(&key), - "forcing query with already existing `DepNode`\n\ + self.assert_dep_node_not_yet_allocated_in_current_session(&key, || { + format!( + "forcing query with already existing `DepNode`\n\ - query-key: {arg:?}\n\ - dep-node: {key:?}" - ); + ) + }); let with_deps = |task_deps| D::with_deps(task_deps, || task(cx, arg)); let (result, edges) = if cx.dep_context().is_eval_always(key.kind) { @@ -436,7 +439,16 @@ impl<D: Deps> DepGraphData<D> { hash: self.current.anon_id_seed.combine(hasher.finish()).into(), }; - self.current.intern_new_node(target_dep_node, task_deps, Fingerprint::ZERO) + // The DepNodes generated by the process above are not unique. 2 queries could + // have exactly the same dependencies. However, deserialization does not handle + // duplicated nodes, so we do the deduplication here directly. + // + // As anonymous nodes are a small quantity compared to the full dep-graph, the + // memory impact of this `anon_node_to_index` map remains tolerable, and helps + // us avoid useless growth of the graph with almost-equivalent nodes. + self.current.anon_node_to_index.get_or_insert_with(target_dep_node, || { + self.current.alloc_new_node(target_dep_node, task_deps, Fingerprint::ZERO) + }) } }; @@ -637,20 +649,24 @@ impl<D: Deps> DepGraph<D> { } impl<D: Deps> DepGraphData<D> { - #[inline] - fn dep_node_index_of_opt(&self, dep_node: &DepNode) -> Option<DepNodeIndex> { + fn assert_dep_node_not_yet_allocated_in_current_session<S: std::fmt::Display>( + &self, + dep_node: &DepNode, + msg: impl FnOnce() -> S, + ) { if let Some(prev_index) = self.previous.node_to_index_opt(dep_node) { - self.current.prev_index_to_index.lock()[prev_index] - } else { - self.current.new_node_to_index.get(dep_node) + let current = self.current.prev_index_to_index.lock()[prev_index]; + assert!(current.is_none(), "{}", msg()) + } else if let Some(nodes_newly_allocated_in_current_session) = + &self.current.nodes_newly_allocated_in_current_session + { + outline(|| { + let seen = nodes_newly_allocated_in_current_session.lock().contains_key(dep_node); + assert!(!seen, "{}", msg()); + }); } } - #[inline] - fn dep_node_exists(&self, dep_node: &DepNode) -> bool { - self.dep_node_index_of_opt(dep_node).is_some() - } - fn node_color(&self, dep_node: &DepNode) -> Option<DepNodeColor> { if let Some(prev_index) = self.previous.node_to_index_opt(dep_node) { self.colors.get(prev_index) @@ -734,11 +750,6 @@ impl<D: Deps> DepGraphData<D> { } impl<D: Deps> DepGraph<D> { - #[inline] - pub fn dep_node_exists(&self, dep_node: &DepNode) -> bool { - self.data.as_ref().is_some_and(|data| data.dep_node_exists(dep_node)) - } - /// Checks whether a previous work product exists for `v` and, if /// so, return the path that leads to it. Used to skip doing work. pub fn previous_work_product(&self, v: &WorkProductId) -> Option<WorkProduct> { @@ -964,6 +975,16 @@ impl<D: Deps> DepGraph<D> { self.node_color(dep_node).is_some_and(|c| c.is_green()) } + pub fn assert_dep_node_not_yet_allocated_in_current_session<S: std::fmt::Display>( + &self, + dep_node: &DepNode, + msg: impl FnOnce() -> S, + ) { + if let Some(data) = &self.data { + data.assert_dep_node_not_yet_allocated_in_current_session(dep_node, msg) + } + } + /// This method loads all on-disk cacheable query results into memory, so /// they can be written out to the new cache file again. Most query results /// will already be in memory but in the case where we marked something as @@ -1069,24 +1090,24 @@ rustc_index::newtype_index! { /// largest in the compiler. /// /// For this reason, we avoid storing `DepNode`s more than once as map -/// keys. The `new_node_to_index` map only contains nodes not in the previous +/// keys. The `anon_node_to_index` map only contains nodes of anonymous queries not in the previous /// graph, and we map nodes in the previous graph to indices via a two-step /// mapping. `SerializedDepGraph` maps from `DepNode` to `SerializedDepNodeIndex`, /// and the `prev_index_to_index` vector (which is more compact and faster than /// using a map) maps from `SerializedDepNodeIndex` to `DepNodeIndex`. /// -/// This struct uses three locks internally. The `data`, `new_node_to_index`, +/// This struct uses three locks internally. The `data`, `anon_node_to_index`, /// and `prev_index_to_index` fields are locked separately. Operations that take /// a `DepNodeIndex` typically just access the `data` field. /// /// We only need to manipulate at most two locks simultaneously: -/// `new_node_to_index` and `data`, or `prev_index_to_index` and `data`. When -/// manipulating both, we acquire `new_node_to_index` or `prev_index_to_index` +/// `anon_node_to_index` and `data`, or `prev_index_to_index` and `data`. When +/// manipulating both, we acquire `anon_node_to_index` or `prev_index_to_index` /// first, and `data` second. pub(super) struct CurrentDepGraph<D: Deps> { encoder: GraphEncoder<D>, - new_node_to_index: ShardedHashMap<DepNode, DepNodeIndex>, prev_index_to_index: Lock<IndexVec<SerializedDepNodeIndex, Option<DepNodeIndex>>>, + anon_node_to_index: ShardedHashMap<DepNode, DepNodeIndex>, /// This is used to verify that fingerprints do not change between the creation of a node /// and its recomputation. @@ -1098,6 +1119,14 @@ pub(super) struct CurrentDepGraph<D: Deps> { #[cfg(debug_assertions)] forbidden_edge: Option<EdgeFilter>, + /// Used to verify the absence of hash collisions among DepNodes. + /// This field is only `Some` if the `-Z incremental_verify_ich` option is present + /// or if `debug_assertions` are enabled. + /// + /// The map contains all DepNodes that have been allocated in the current session so far and + /// for which there is no equivalent in the previous session. + nodes_newly_allocated_in_current_session: Option<Lock<FxHashMap<DepNode, DepNodeIndex>>>, + /// Anonymous `DepNode`s are nodes whose IDs we compute from the list of /// their edges. This has the beneficial side-effect that multiple anonymous /// nodes can be coalesced into one without changing the semantics of the @@ -1119,7 +1148,7 @@ pub(super) struct CurrentDepGraph<D: Deps> { impl<D: Deps> CurrentDepGraph<D> { fn new( - profiler: &SelfProfilerRef, + session: &Session, prev_graph_node_count: usize, encoder: FileEncoder, record_graph: bool, @@ -1145,16 +1174,20 @@ impl<D: Deps> CurrentDepGraph<D> { let new_node_count_estimate = 102 * prev_graph_node_count / 100 + 200; + let new_node_dbg = + session.opts.unstable_opts.incremental_verify_ich || cfg!(debug_assertions); + CurrentDepGraph { encoder: GraphEncoder::new( encoder, prev_graph_node_count, record_graph, record_stats, - profiler, + &session.prof, previous, ), - new_node_to_index: ShardedHashMap::with_capacity( + anon_node_to_index: ShardedHashMap::with_capacity( + // FIXME: The count estimate is off as anon nodes are only a portion of the nodes. new_node_count_estimate / sharded::shards(), ), prev_index_to_index: Lock::new(IndexVec::from_elem_n(None, prev_graph_node_count)), @@ -1163,6 +1196,12 @@ impl<D: Deps> CurrentDepGraph<D> { forbidden_edge, #[cfg(debug_assertions)] fingerprints: Lock::new(IndexVec::from_elem_n(None, new_node_count_estimate)), + nodes_newly_allocated_in_current_session: new_node_dbg.then(|| { + Lock::new(FxHashMap::with_capacity_and_hasher( + new_node_count_estimate, + Default::default(), + )) + }), total_read_count: AtomicU64::new(0), total_duplicate_read_count: AtomicU64::new(0), } @@ -1180,19 +1219,31 @@ impl<D: Deps> CurrentDepGraph<D> { /// Writes the node to the current dep-graph and allocates a `DepNodeIndex` for it. /// Assumes that this is a node that has no equivalent in the previous dep-graph. #[inline(always)] - fn intern_new_node( + fn alloc_new_node( &self, key: DepNode, edges: EdgesVec, current_fingerprint: Fingerprint, ) -> DepNodeIndex { - let dep_node_index = self - .new_node_to_index - .get_or_insert_with(key, || self.encoder.send(key, current_fingerprint, edges)); + let dep_node_index = self.encoder.send(key, current_fingerprint, edges); #[cfg(debug_assertions)] self.record_edge(dep_node_index, key, current_fingerprint); + if let Some(ref nodes_newly_allocated_in_current_session) = + self.nodes_newly_allocated_in_current_session + { + outline(|| { + if nodes_newly_allocated_in_current_session + .lock() + .insert(key, dep_node_index) + .is_some() + { + panic!("Found duplicate dep-node {key:?}"); + } + }); + } + dep_node_index } @@ -1247,7 +1298,7 @@ impl<D: Deps> CurrentDepGraph<D> { let fingerprint = fingerprint.unwrap_or(Fingerprint::ZERO); // This is a new node: it didn't exist in the previous compilation session. - let dep_node_index = self.intern_new_node(key, edges, fingerprint); + let dep_node_index = self.alloc_new_node(key, edges, fingerprint); (dep_node_index, None) } @@ -1286,7 +1337,10 @@ impl<D: Deps> CurrentDepGraph<D> { ) { let node = &prev_graph.index_to_node(prev_index); debug_assert!( - !self.new_node_to_index.get(node).is_some(), + !self + .nodes_newly_allocated_in_current_session + .as_ref() + .map_or(false, |set| set.lock().contains_key(node)), "node from previous graph present in new node collection" ); } @@ -1408,13 +1462,12 @@ fn panic_on_forbidden_read<D: Deps>(data: &DepGraphData<D>, dep_node_index: DepN } } - if dep_node.is_none() { - // Try to find it among the new nodes - for shard in data.current.new_node_to_index.lock_shards() { - if let Some((node, _)) = shard.iter().find(|(_, index)| *index == dep_node_index) { - dep_node = Some(*node); - break; - } + if dep_node.is_none() + && let Some(nodes) = &data.current.nodes_newly_allocated_in_current_session + { + // Try to find it among the nodes allocated so far in this session + if let Some((node, _)) = nodes.lock().iter().find(|&(_, index)| *index == dep_node_index) { + dep_node = Some(*node); } } diff --git a/compiler/rustc_query_system/src/dep_graph/mod.rs b/compiler/rustc_query_system/src/dep_graph/mod.rs index e3d64d1c0f8..4eeb6078d14 100644 --- a/compiler/rustc_query_system/src/dep_graph/mod.rs +++ b/compiler/rustc_query_system/src/dep_graph/mod.rs @@ -100,6 +100,8 @@ pub trait Deps { where OP: for<'a> FnOnce(TaskDepsRef<'a>); + fn name(&self, dep_kind: DepKind) -> &'static str; + /// We use this for most things when incr. comp. is turned off. const DEP_KIND_NULL: DepKind; @@ -154,7 +156,7 @@ pub enum FingerprintStyle { impl FingerprintStyle { #[inline] - pub fn reconstructible(self) -> bool { + pub const fn reconstructible(self) -> bool { match self { FingerprintStyle::DefPathHash | FingerprintStyle::Unit | FingerprintStyle::HirId => { true diff --git a/compiler/rustc_query_system/src/dep_graph/serialized.rs b/compiler/rustc_query_system/src/dep_graph/serialized.rs index 2c6fd7d494f..f4b2cf631ed 100644 --- a/compiler/rustc_query_system/src/dep_graph/serialized.rs +++ b/compiler/rustc_query_system/src/dep_graph/serialized.rs @@ -179,8 +179,8 @@ fn mask(bits: usize) -> usize { } impl SerializedDepGraph { - #[instrument(level = "debug", skip(d))] - pub fn decode<D: Deps>(d: &mut MemDecoder<'_>) -> Arc<SerializedDepGraph> { + #[instrument(level = "debug", skip(d, deps))] + pub fn decode<D: Deps>(d: &mut MemDecoder<'_>, deps: &D) -> Arc<SerializedDepGraph> { // The last 16 bytes are the node count and edge count. debug!("position: {:?}", d.position()); let (node_count, edge_count) = @@ -252,7 +252,18 @@ impl SerializedDepGraph { .collect(); for (idx, node) in nodes.iter_enumerated() { - index[node.kind.as_usize()].insert(node.hash, idx); + if index[node.kind.as_usize()].insert(node.hash, idx).is_some() { + // Side effect nodes can have duplicates + if node.kind != D::DEP_KIND_SIDE_EFFECT { + let name = deps.name(node.kind); + panic!( + "Error: A dep graph node ({name}) does not have an unique index. \ + Running a clean build on a nightly compiler with `-Z incremental-verify-ich` \ + can help narrow down the issue for reporting. A clean build may also work around the issue.\n + DepNode: {node:?}" + ) + } + } } Arc::new(SerializedDepGraph { |
