about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2019-06-22 15:12:15 +0000
committerbors <bors@rust-lang.org>2019-06-22 15:12:15 +0000
commit4a365a29d64bec75d107214319a129ba68fc12a3 (patch)
tree53411659f0e073468d798001b442873717ebf65b
parentd4d5d67c1c20b9599c812ab4d926ab4fa9fe6935 (diff)
parent3f28811774da8ee16bd6391a0e66d23e6962485f (diff)
downloadrust-4a365a29d64bec75d107214319a129ba68fc12a3.tar.gz
rust-4a365a29d64bec75d107214319a129ba68fc12a3.zip
Auto merge of #61020 - HeroicKatora:master, r=matthewjasper
librustc_data_structures: Speedup union of sparse and dense hybrid set

This optimization speeds up the union of a hybrid bitset when that
switches it from a sparse representation to a dense bitset. It now
clones the dense bitset and integrate only the spare elements instead of
densifying the sparse bitset, initializing all elements, and then a
union on two dense bitset, touching all words a second time.

It's not completely certain if the added complexity is worth it but I would
like to hear some feedback in any case. Benchmark results from my machine:

```
Now:  bit_set::union_hybrid_sparse_to_dense ... bench:          72 ns/iter (+/- 5)
Previous: bit_set::union_hybrid_sparse_to_dense ... bench:          90 ns/iter (+/- 6)
```

This being the second iteration of trying to improve the speed, since I missed the return value in the first, and forgot to run the relevant tests. Oops.
-rw-r--r--src/librustc_data_structures/bit_set.rs147
1 files changed, 143 insertions, 4 deletions
diff --git a/src/librustc_data_structures/bit_set.rs b/src/librustc_data_structures/bit_set.rs
index 7a11ca07007..3430d83f23f 100644
--- a/src/librustc_data_structures/bit_set.rs
+++ b/src/librustc_data_structures/bit_set.rs
@@ -5,6 +5,10 @@ use std::iter;
 use std::marker::PhantomData;
 use std::mem;
 use std::slice;
+#[cfg(test)]
+extern crate test;
+#[cfg(test)]
+use test::Bencher;
 
 pub type Word = u64;
 pub const WORD_BYTES: usize = mem::size_of::<Word>();
@@ -177,6 +181,45 @@ impl<T: Idx> BitSet<T> {
         // Note: we currently don't bother trying to make a Sparse set.
         HybridBitSet::Dense(self.to_owned())
     }
+
+    /// Set `self = self | other`. In contrast to `union` returns `true` if the set contains at
+    /// least one bit that is not in `other` (i.e. `other` is not a superset of `self`).
+    ///
+    /// This is an optimization for union of a hybrid bitset.
+    fn reverse_union_sparse(&mut self, sparse: &SparseBitSet<T>) -> bool {
+        assert!(sparse.domain_size == self.domain_size);
+        self.clear_excess_bits();
+
+        let mut not_already = false;
+        // Index of the current word not yet merged.
+        let mut current_index = 0;
+        // Mask of bits that came from the sparse set in the current word.
+        let mut new_bit_mask = 0;
+        for (word_index, mask) in sparse.iter().map(|x| word_index_and_mask(*x)) {
+            // Next bit is in a word not inspected yet.
+            if word_index > current_index {
+                self.words[current_index] |= new_bit_mask;
+                // Were there any bits in the old word that did not occur in the sparse set?
+                not_already |= (self.words[current_index] ^ new_bit_mask) != 0;
+                // Check all words we skipped for any set bit.
+                not_already |= self.words[current_index+1..word_index].iter().any(|&x| x != 0);
+                // Update next word.
+                current_index = word_index;
+                // Reset bit mask, no bits have been merged yet.
+                new_bit_mask = 0;
+            }
+            // Add bit and mark it as coming from the sparse set.
+            // self.words[word_index] |= mask;
+            new_bit_mask |= mask;
+        }
+        self.words[current_index] |= new_bit_mask;
+        // Any bits in the last inspected word that were not in the sparse set?
+        not_already |= (self.words[current_index] ^ new_bit_mask) != 0;
+        // Any bits in the tail? Note `clear_excess_bits` before.
+        not_already |= self.words[current_index+1..].iter().any(|&x| x != 0);
+
+        not_already
+    }
 }
 
 /// This is implemented by all the bitsets so that BitSet::union() can be
@@ -514,10 +557,22 @@ impl<T: Idx> HybridBitSet<T> {
                         changed
                     }
                     HybridBitSet::Dense(other_dense) => {
-                        // `self` is sparse and `other` is dense. Densify
-                        // `self` and then do the bitwise union.
-                        let mut new_dense = self_sparse.to_dense();
-                        let changed = new_dense.union(other_dense);
+                        // `self` is sparse and `other` is dense. To
+                        // merge them, we have two available strategies:
+                        // * Densify `self` then merge other
+                        // * Clone other then integrate bits from `self`
+                        // The second strategy requires dedicated method
+                        // since the usual `union` returns the wrong
+                        // result. In the dedicated case the computation
+                        // is slightly faster if the bits of the sparse
+                        // bitset map to only few words of the dense
+                        // representation, i.e. indices are near each
+                        // other.
+                        //
+                        // Benchmarking seems to suggest that the second
+                        // option is worth it.
+                        let mut new_dense = other_dense.clone();
+                        let changed = new_dense.reverse_union_sparse(self_sparse);
                         *self = HybridBitSet::Dense(new_dense);
                         changed
                     }
@@ -1214,3 +1269,87 @@ fn sparse_matrix_iter() {
     }
     assert!(iter.next().is_none());
 }
+
+/// Merge dense hybrid set into empty sparse hybrid set.
+#[bench]
+fn union_hybrid_sparse_empty_to_dense(b: &mut Bencher) {
+    let mut pre_dense: HybridBitSet<usize> = HybridBitSet::new_empty(256);
+    for i in 0..10 {
+        assert!(pre_dense.insert(i));
+    }
+    let pre_sparse: HybridBitSet<usize> = HybridBitSet::new_empty(256);
+    b.iter(|| {
+        let dense = pre_dense.clone();
+        let mut sparse = pre_sparse.clone();
+        sparse.union(&dense);
+    })
+}
+
+/// Merge dense hybrid set into full hybrid set with same indices.
+#[bench]
+fn union_hybrid_sparse_full_to_dense(b: &mut Bencher) {
+    let mut pre_dense: HybridBitSet<usize> = HybridBitSet::new_empty(256);
+    for i in 0..10 {
+        assert!(pre_dense.insert(i));
+    }
+    let mut pre_sparse: HybridBitSet<usize> = HybridBitSet::new_empty(256);
+    for i in 0..SPARSE_MAX {
+        assert!(pre_sparse.insert(i));
+    }
+    b.iter(|| {
+        let dense = pre_dense.clone();
+        let mut sparse = pre_sparse.clone();
+        sparse.union(&dense);
+    })
+}
+
+/// Merge dense hybrid set into full hybrid set with indices over the whole domain.
+#[bench]
+fn union_hybrid_sparse_domain_to_dense(b: &mut Bencher) {
+    let mut pre_dense: HybridBitSet<usize> = HybridBitSet::new_empty(SPARSE_MAX*64);
+    for i in 0..10 {
+        assert!(pre_dense.insert(i));
+    }
+    let mut pre_sparse: HybridBitSet<usize> = HybridBitSet::new_empty(SPARSE_MAX*64);
+    for i in 0..SPARSE_MAX {
+        assert!(pre_sparse.insert(i*64));
+    }
+    b.iter(|| {
+        let dense = pre_dense.clone();
+        let mut sparse = pre_sparse.clone();
+        sparse.union(&dense);
+    })
+}
+
+/// Merge dense hybrid set into empty hybrid set where the domain is very small.
+#[bench]
+fn union_hybrid_sparse_empty_small_domain(b: &mut Bencher) {
+    let mut pre_dense: HybridBitSet<usize> = HybridBitSet::new_empty(SPARSE_MAX);
+    for i in 0..SPARSE_MAX {
+        assert!(pre_dense.insert(i));
+    }
+    let pre_sparse: HybridBitSet<usize> = HybridBitSet::new_empty(SPARSE_MAX);
+    b.iter(|| {
+        let dense = pre_dense.clone();
+        let mut sparse = pre_sparse.clone();
+        sparse.union(&dense);
+    })
+}
+
+/// Merge dense hybrid set into full hybrid set where the domain is very small.
+#[bench]
+fn union_hybrid_sparse_full_small_domain(b: &mut Bencher) {
+    let mut pre_dense: HybridBitSet<usize> = HybridBitSet::new_empty(SPARSE_MAX);
+    for i in 0..SPARSE_MAX {
+        assert!(pre_dense.insert(i));
+    }
+    let mut pre_sparse: HybridBitSet<usize> = HybridBitSet::new_empty(SPARSE_MAX);
+    for i in 0..SPARSE_MAX {
+        assert!(pre_sparse.insert(i));
+    }
+    b.iter(|| {
+        let dense = pre_dense.clone();
+        let mut sparse = pre_sparse.clone();
+        sparse.union(&dense);
+    })
+}