about summary refs log tree commit diff
diff options
context:
space:
mode:
authorTyson Nottingham <tgnottingham@gmail.com>2020-11-30 21:07:08 -0800
committerTyson Nottingham <tgnottingham@gmail.com>2020-12-22 14:12:57 -0800
commitdd1ab840d279c96adf5d7778670f14a6398b595a (patch)
treede9c50cc2fb99a0c5feb5189026fd85f74beacdb
parentea47269f5f50cd7804141ba00811b79be1e156c6 (diff)
downloadrust-dd1ab840d279c96adf5d7778670f14a6398b595a.tar.gz
rust-dd1ab840d279c96adf5d7778670f14a6398b595a.zip
rustc_query_system: use more space-efficient edges representation
Use single vector of edges rather than per-node vector. There is a small
hit to instruction counts (< 0.5%), but the memory savings make up for it.
-rw-r--r--compiler/rustc_query_system/src/dep_graph/graph.rs137
-rw-r--r--compiler/rustc_query_system/src/dep_graph/query.rs18
2 files changed, 103 insertions, 52 deletions
diff --git a/compiler/rustc_query_system/src/dep_graph/graph.rs b/compiler/rustc_query_system/src/dep_graph/graph.rs
index 24508a52373..076dcb4fed1 100644
--- a/compiler/rustc_query_system/src/dep_graph/graph.rs
+++ b/compiler/rustc_query_system/src/dep_graph/graph.rs
@@ -15,6 +15,7 @@ use std::env;
 use std::hash::Hash;
 use std::marker::PhantomData;
 use std::mem;
+use std::ops::Range;
 use std::sync::atomic::Ordering::Relaxed;
 
 use super::debug::EdgeFilter;
@@ -143,42 +144,48 @@ impl<K: DepKind> DepGraph<K> {
         let node_count = data.hybrid_indices.len();
 
         let mut nodes = Vec::with_capacity(node_count);
-        let mut edges = Vec::with_capacity(edge_count);
-
-        for (index, &hybrid_index) in data.hybrid_indices.iter_enumerated() {
-            let src = index.index();
+        let mut edge_list_indices = Vec::with_capacity(node_count);
+        let mut edge_list_data = Vec::with_capacity(edge_count);
+        edge_list_data.extend(data.unshared_edges.iter().map(|i| i.index()));
 
+        for &hybrid_index in data.hybrid_indices.iter() {
             match hybrid_index.into() {
                 HybridIndex::New(new_index) => {
-                    let new = &data.new;
-                    nodes.push(new.nodes[new_index]);
-                    edges.extend(new.edges[new_index].iter().map(|dst| (src, dst.index())));
+                    nodes.push(data.new.nodes[new_index]);
+                    let edges = &data.new.edges[new_index];
+                    edge_list_indices.push((edges.start.index(), edges.end.index()));
                 }
                 HybridIndex::Red(red_index) => {
-                    let red = &data.red;
-                    nodes.push(previous.index_to_node(red.node_indices[red_index]));
-                    edges.extend(red.edges[red_index].iter().map(|dst| (src, dst.index())));
+                    nodes.push(previous.index_to_node(data.red.node_indices[red_index]));
+                    let edges = &data.red.edges[red_index];
+                    edge_list_indices.push((edges.start.index(), edges.end.index()));
                 }
                 HybridIndex::LightGreen(lg_index) => {
-                    let lg = &data.light_green;
-                    nodes.push(previous.index_to_node(lg.node_indices[lg_index]));
-                    edges.extend(lg.edges[lg_index].iter().map(|dst| (src, dst.index())));
+                    nodes.push(previous.index_to_node(data.light_green.node_indices[lg_index]));
+                    let edges = &data.light_green.edges[lg_index];
+                    edge_list_indices.push((edges.start.index(), edges.end.index()));
                 }
                 HybridIndex::DarkGreen(prev_index) => {
                     nodes.push(previous.index_to_node(prev_index));
+
                     let edges_iter = previous
                         .edge_targets_from(prev_index)
                         .iter()
-                        .map(|&dst| (src, prev_index_to_index[dst].unwrap().index()));
-                    edges.extend(edges_iter);
+                        .map(|&dst| prev_index_to_index[dst].unwrap().index());
+
+                    let start = edge_list_data.len();
+                    edge_list_data.extend(edges_iter);
+                    let end = edge_list_data.len();
+                    edge_list_indices.push((start, end));
                 }
             }
         }
 
         debug_assert_eq!(nodes.len(), node_count);
-        debug_assert_eq!(edges.len(), edge_count);
+        debug_assert_eq!(edge_list_indices.len(), node_count);
+        debug_assert_eq!(edge_list_data.len(), edge_count);
 
-        DepGraphQuery::new(&nodes[..], &edges[..])
+        DepGraphQuery::new(&nodes[..], &edge_list_indices[..], &edge_list_data[..])
     }
 
     pub fn assert_ignored(&self) {
@@ -561,11 +568,7 @@ impl<K: DepKind> DepGraph<K> {
         let previous = &data.previous;
         let data = data.current.data.lock();
 
-        // Linearly scanning each collection is a bit faster than scanning
-        // `hybrid_indices` and bouncing around the different collections.
-        let mut edge_count = data.new.edges.iter().map(|e| e.len()).sum::<usize>()
-            + data.red.edges.iter().map(|e| e.len()).sum::<usize>()
-            + data.light_green.edges.iter().map(|e| e.len()).sum::<usize>();
+        let mut edge_count = data.unshared_edges.len();
 
         for &hybrid_index in data.hybrid_indices.iter() {
             if let HybridIndex::DarkGreen(prev_index) = hybrid_index.into() {
@@ -591,17 +594,7 @@ impl<K: DepKind> DepGraph<K> {
         let mut fingerprints = IndexVec::with_capacity(node_count);
         let mut edge_list_indices = IndexVec::with_capacity(node_count);
         let mut edge_list_data = Vec::with_capacity(edge_count);
-
-        fn add_edges<'a, I: Iterator<Item = &'a DepNodeIndex>>(
-            edge_list_indices: &mut IndexVec<SerializedDepNodeIndex, (u32, u32)>,
-            edge_list_data: &mut Vec<SerializedDepNodeIndex>,
-            iter: I,
-        ) {
-            let start = edge_list_data.len() as u32;
-            edge_list_data.extend(iter.map(|i| SDNI::new(i.index())));
-            let end = edge_list_data.len() as u32;
-            edge_list_indices.push((start, end));
-        };
+        edge_list_data.extend(data.unshared_edges.iter().map(|i| SDNI::new(i.index())));
 
         for &hybrid_index in data.hybrid_indices.iter() {
             match hybrid_index.into() {
@@ -609,28 +602,36 @@ impl<K: DepKind> DepGraph<K> {
                     let new = &data.new;
                     nodes.push(new.nodes[i]);
                     fingerprints.push(new.fingerprints[i]);
-                    add_edges(&mut edge_list_indices, &mut edge_list_data, new.edges[i].iter());
+                    let edges = &new.edges[i];
+                    edge_list_indices.push((edges.start.as_u32(), edges.end.as_u32()));
                 }
                 HybridIndex::Red(i) => {
                     let red = &data.red;
                     nodes.push(previous.index_to_node(red.node_indices[i]));
                     fingerprints.push(red.fingerprints[i]);
-                    add_edges(&mut edge_list_indices, &mut edge_list_data, red.edges[i].iter());
+                    let edges = &red.edges[i];
+                    edge_list_indices.push((edges.start.as_u32(), edges.end.as_u32()));
                 }
                 HybridIndex::LightGreen(i) => {
                     let lg = &data.light_green;
                     nodes.push(previous.index_to_node(lg.node_indices[i]));
                     fingerprints.push(previous.fingerprint_by_index(lg.node_indices[i]));
-                    add_edges(&mut edge_list_indices, &mut edge_list_data, lg.edges[i].iter());
+                    let edges = &lg.edges[i];
+                    edge_list_indices.push((edges.start.as_u32(), edges.end.as_u32()));
                 }
                 HybridIndex::DarkGreen(prev_index) => {
                     nodes.push(previous.index_to_node(prev_index));
                     fingerprints.push(previous.fingerprint_by_index(prev_index));
+
                     let edges_iter = previous
                         .edge_targets_from(prev_index)
                         .iter()
                         .map(|&dst| prev_index_to_index[dst].as_ref().unwrap());
-                    add_edges(&mut edge_list_indices, &mut edge_list_data, edges_iter);
+
+                    let start = edge_list_data.len() as u32;
+                    edge_list_data.extend(edges_iter.map(|i| SDNI::new(i.index())));
+                    let end = edge_list_data.len() as u32;
+                    edge_list_indices.push((start, end));
                 }
             }
         }
@@ -1125,6 +1126,11 @@ impl From<CompressedHybridIndex> for HybridIndex {
     }
 }
 
+// Index type for `DepNodeData`'s edges.
+rustc_index::newtype_index! {
+    struct EdgeIndex { .. }
+}
+
 /// Data for nodes in the current graph, divided into different collections
 /// based on their presence in the previous graph, and if present, their color.
 /// We divide nodes this way because different types of nodes are able to share
@@ -1171,6 +1177,16 @@ struct DepNodeData<K> {
     /// Data for nodes in previous graph that have been marked light green.
     light_green: LightGreenDepNodeData,
 
+    // Edges for all nodes other than dark-green ones. Edges for each node
+    // occupy a contiguous region of this collection, which a node can reference
+    // using two indices. Storing edges this way rather than using an `EdgesVec`
+    // for each node reduces memory consumption by a not insignificant amount
+    // when compiling large crates. The downside is that we have to copy into
+    // this collection the edges from the `EdgesVec`s that are built up during
+    // query execution. But this is mostly balanced out by the more efficient
+    // implementation of `DepGraph::serialize` enabled by this representation.
+    unshared_edges: IndexVec<EdgeIndex, DepNodeIndex>,
+
     /// Mapping from `DepNodeIndex` to an index into a collection above.
     /// Indicates which of the above collections contains a node's data.
     ///
@@ -1186,7 +1202,7 @@ struct DepNodeData<K> {
 /// the previous graph, so we must store all of such a node's data here.
 struct NewDepNodeData<K> {
     nodes: IndexVec<NewDepNodeIndex, DepNode<K>>,
-    edges: IndexVec<NewDepNodeIndex, EdgesVec>,
+    edges: IndexVec<NewDepNodeIndex, Range<EdgeIndex>>,
     fingerprints: IndexVec<NewDepNodeIndex, Fingerprint>,
 }
 
@@ -1195,7 +1211,7 @@ struct NewDepNodeData<K> {
 /// fingerprint is known to be different, so we store the latter two directly.
 struct RedDepNodeData {
     node_indices: IndexVec<RedDepNodeIndex, SerializedDepNodeIndex>,
-    edges: IndexVec<RedDepNodeIndex, EdgesVec>,
+    edges: IndexVec<RedDepNodeIndex, Range<EdgeIndex>>,
     fingerprints: IndexVec<RedDepNodeIndex, Fingerprint>,
 }
 
@@ -1205,7 +1221,7 @@ struct RedDepNodeData {
 /// graph, but the edges may be different, so we store them directly.
 struct LightGreenDepNodeData {
     node_indices: IndexVec<LightGreenDepNodeIndex, SerializedDepNodeIndex>,
-    edges: IndexVec<LightGreenDepNodeIndex, EdgesVec>,
+    edges: IndexVec<LightGreenDepNodeIndex, Range<EdgeIndex>>,
 }
 
 /// `CurrentDepGraph` stores the dependency graph for the current session. It
@@ -1294,11 +1310,27 @@ impl<K: DepKind> CurrentDepGraph<K> {
         // not be enough. The allocation for red and green node data doesn't
         // include a constant, as we don't want to allocate anything for these
         // structures during full incremental builds, where they aren't used.
+        //
+        // These estimates are based on the distribution of node and edge counts
+        // seen in rustc-perf benchmarks, adjusted somewhat to account for the
+        // fact that these benchmarks aren't perfectly representative.
+        //
+        // FIXME Use a collection type that doesn't copy node and edge data and
+        // grow multiplicatively on reallocation. Without such a collection or
+        // solution having the same effect, there is a performance hazard here
+        // in both time and space, as growing these collections means copying a
+        // large amount of data and doubling already large buffer capacities. A
+        // solution for this will also mean that it's less important to get
+        // these estimates right.
         let new_node_count_estimate = (prev_graph_node_count * 2) / 100 + 200;
         let red_node_count_estimate = (prev_graph_node_count * 3) / 100;
         let light_green_node_count_estimate = (prev_graph_node_count * 25) / 100;
         let total_node_count_estimate = prev_graph_node_count + new_node_count_estimate;
 
+        let average_edges_per_node_estimate = 6;
+        let unshared_edge_count_estimate = average_edges_per_node_estimate
+            * (new_node_count_estimate + red_node_count_estimate + light_green_node_count_estimate);
+
         // We store a large collection of these in `prev_index_to_index` during
         // non-full incremental builds, and want to ensure that the element size
         // doesn't inadvertently increase.
@@ -1320,6 +1352,7 @@ impl<K: DepKind> CurrentDepGraph<K> {
                     node_indices: IndexVec::with_capacity(light_green_node_count_estimate),
                     edges: IndexVec::with_capacity(light_green_node_count_estimate),
                 },
+                unshared_edges: IndexVec::with_capacity(unshared_edge_count_estimate),
                 hybrid_indices: IndexVec::with_capacity(total_node_count_estimate),
             }),
             new_node_to_index: Sharded::new(|| {
@@ -1352,9 +1385,9 @@ impl<K: DepKind> CurrentDepGraph<K> {
         match self.new_node_to_index.get_shard_by_value(&dep_node).lock().entry(dep_node) {
             Entry::Occupied(entry) => *entry.get(),
             Entry::Vacant(entry) => {
-                let mut data = self.data.lock();
+                let data = &mut *self.data.lock();
                 let new_index = data.new.nodes.push(dep_node);
-                data.new.edges.push(edges);
+                add_edges(&mut data.unshared_edges, &mut data.new.edges, edges);
                 data.new.fingerprints.push(fingerprint);
                 let dep_node_index = data.hybrid_indices.push(new_index.into());
                 entry.insert(dep_node_index);
@@ -1377,9 +1410,9 @@ impl<K: DepKind> CurrentDepGraph<K> {
         match prev_index_to_index[prev_index] {
             Some(dep_node_index) => dep_node_index,
             None => {
-                let mut data = self.data.lock();
+                let data = &mut *self.data.lock();
                 let red_index = data.red.node_indices.push(prev_index);
-                data.red.edges.push(edges);
+                add_edges(&mut data.unshared_edges, &mut data.red.edges, edges);
                 data.red.fingerprints.push(fingerprint);
                 let dep_node_index = data.hybrid_indices.push(red_index.into());
                 prev_index_to_index[prev_index] = Some(dep_node_index);
@@ -1401,9 +1434,9 @@ impl<K: DepKind> CurrentDepGraph<K> {
         match prev_index_to_index[prev_index] {
             Some(dep_node_index) => dep_node_index,
             None => {
-                let mut data = self.data.lock();
+                let data = &mut *self.data.lock();
                 let light_green_index = data.light_green.node_indices.push(prev_index);
-                data.light_green.edges.push(edges);
+                add_edges(&mut data.unshared_edges, &mut data.light_green.edges, edges);
                 let dep_node_index = data.hybrid_indices.push(light_green_index.into());
                 prev_index_to_index[prev_index] = Some(dep_node_index);
                 dep_node_index
@@ -1445,6 +1478,18 @@ impl<K: DepKind> CurrentDepGraph<K> {
     }
 }
 
+#[inline]
+fn add_edges<I: Idx>(
+    edges: &mut IndexVec<EdgeIndex, DepNodeIndex>,
+    edge_indices: &mut IndexVec<I, Range<EdgeIndex>>,
+    new_edges: EdgesVec,
+) {
+    let start = edges.next_index();
+    edges.extend(new_edges);
+    let end = edges.next_index();
+    edge_indices.push(start..end);
+}
+
 /// The capacity of the `reads` field `SmallVec`
 const TASK_DEPS_READS_CAP: usize = 8;
 type EdgesVec = SmallVec<[DepNodeIndex; TASK_DEPS_READS_CAP]>;
diff --git a/compiler/rustc_query_system/src/dep_graph/query.rs b/compiler/rustc_query_system/src/dep_graph/query.rs
index 82e99d64afe..cc25d08cb54 100644
--- a/compiler/rustc_query_system/src/dep_graph/query.rs
+++ b/compiler/rustc_query_system/src/dep_graph/query.rs
@@ -9,17 +9,23 @@ pub struct DepGraphQuery<K> {
 }
 
 impl<K: DepKind> DepGraphQuery<K> {
-    pub fn new(nodes: &[DepNode<K>], edges: &[(usize, usize)]) -> DepGraphQuery<K> {
-        let mut graph = Graph::with_capacity(nodes.len(), edges.len());
+    pub fn new(
+        nodes: &[DepNode<K>],
+        edge_list_indices: &[(usize, usize)],
+        edge_list_data: &[usize],
+    ) -> DepGraphQuery<K> {
+        let mut graph = Graph::with_capacity(nodes.len(), edge_list_data.len());
         let mut indices = FxHashMap::default();
         for node in nodes {
             indices.insert(*node, graph.add_node(*node));
         }
 
-        for &(source, target) in edges {
-            let source = indices[&nodes[source]];
-            let target = indices[&nodes[target]];
-            graph.add_edge(source, target, ());
+        for (source, &(start, end)) in edge_list_indices.iter().enumerate() {
+            for &target in &edge_list_data[start..end] {
+                let source = indices[&nodes[source]];
+                let target = indices[&nodes[target]];
+                graph.add_edge(source, target, ());
+            }
         }
 
         DepGraphQuery { graph, indices }