about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2015-05-18 19:54:24 +0000
committerbors <bors@rust-lang.org>2015-05-18 19:54:24 +0000
commit4daa62a55f906bd7ec7ad265cb23d49d9d0db66a (patch)
tree151e41c763c3f5344f91fd5e8683858c91b6bc93
parent2dd5ad0be84f8d14dc357fb35a4b21fe5a34030a (diff)
parent307fab1aa7540adf8856670c5a1cf44ff508ae2f (diff)
downloadrust-4daa62a55f906bd7ec7ad265cb23d49d9d0db66a.tar.gz
rust-4daa62a55f906bd7ec7ad265cb23d49d9d0db66a.zip
Auto merge of #25230 - rayglover:patch-bitset, r=Gankro
Some modest running-time improvements to `std::collections::BitSet` on bit-sets of varying set-membership densities. This is work originally from [here](https://github.com/rayglover/alt_collections). (Benchmarks copied below)
```
std::collections::BitSet / alt_collections::BitSet

copy_dense         ... 3.08x
copy_sparse        ... 4.22x
count_dense        ... 11.01x
count_sparse       ... 8.11x
from_bytes         ... 1.47x
intersect_dense    ... 6.54x
intersect_sparse   ... 4.37x
union_dense        ... 5.53x
union_sparse       ... 5.60x
```

The exception is `from_bytes`, which I've left unaltered since the optimization is rather obscure.

Compiling with the cpu feature `popcnt` gave a further ~10% improvement on my machine, but this wasn't factored in to the benchmarks above.

Similar improvements could be made to `BitVec`, although that would probably require more substantial changes.

criticism welcome!
-rw-r--r--src/libcollections/bit.rs157
1 files changed, 83 insertions, 74 deletions
diff --git a/src/libcollections/bit.rs b/src/libcollections/bit.rs
index 8ec4a68f2b1..c06cbdb4179 100644
--- a/src/libcollections/bit.rs
+++ b/src/libcollections/bit.rs
@@ -1555,7 +1555,7 @@ impl BitSet {
     #[inline]
     #[stable(feature = "rust1", since = "1.0.0")]
     pub fn iter(&self) -> bit_set::Iter {
-        SetIter {set: self, next_idx: 0}
+        SetIter(BlockIter::from_blocks(self.bit_vec.blocks()))
     }
 
     /// Iterator over each usize stored in `self` union `other`.
@@ -1580,13 +1580,11 @@ impl BitSet {
     pub fn union<'a>(&'a self, other: &'a BitSet) -> Union<'a> {
         fn or(w1: u32, w2: u32) -> u32 { w1 | w2 }
 
-        Union(TwoBitPositions {
-            set: self,
-            other: other,
+        Union(BlockIter::from_blocks(TwoBitPositions {
+            set: self.bit_vec.blocks(),
+            other: other.bit_vec.blocks(),
             merge: or,
-            current_word: 0,
-            next_idx: 0
-        })
+        }))
     }
 
     /// Iterator over each usize stored in `self` intersect `other`.
@@ -1611,13 +1609,12 @@ impl BitSet {
     pub fn intersection<'a>(&'a self, other: &'a BitSet) -> Intersection<'a> {
         fn bitand(w1: u32, w2: u32) -> u32 { w1 & w2 }
         let min = cmp::min(self.bit_vec.len(), other.bit_vec.len());
-        Intersection(TwoBitPositions {
-            set: self,
-            other: other,
+
+        Intersection(BlockIter::from_blocks(TwoBitPositions {
+            set: self.bit_vec.blocks(),
+            other: other.bit_vec.blocks(),
             merge: bitand,
-            current_word: 0,
-            next_idx: 0
-        }.take(min))
+        }).take(min))
     }
 
     /// Iterator over each usize stored in the `self` setminus `other`.
@@ -1649,13 +1646,11 @@ impl BitSet {
     pub fn difference<'a>(&'a self, other: &'a BitSet) -> Difference<'a> {
         fn diff(w1: u32, w2: u32) -> u32 { w1 & !w2 }
 
-        Difference(TwoBitPositions {
-            set: self,
-            other: other,
+        Difference(BlockIter::from_blocks(TwoBitPositions {
+            set: self.bit_vec.blocks(),
+            other: other.bit_vec.blocks(),
             merge: diff,
-            current_word: 0,
-            next_idx: 0
-        })
+        }))
     }
 
     /// Iterator over each usize stored in the symmetric difference of `self` and `other`.
@@ -1681,13 +1676,11 @@ impl BitSet {
     pub fn symmetric_difference<'a>(&'a self, other: &'a BitSet) -> SymmetricDifference<'a> {
         fn bitxor(w1: u32, w2: u32) -> u32 { w1 ^ w2 }
 
-        SymmetricDifference(TwoBitPositions {
-            set: self,
-            other: other,
+        SymmetricDifference(BlockIter::from_blocks(TwoBitPositions {
+            set: self.bit_vec.blocks(),
+            other: other.bit_vec.blocks(),
             merge: bitxor,
-            current_word: 0,
-            next_idx: 0
-        })
+        }))
     }
 
     /// Unions in-place with the specified other bit vector.
@@ -1994,99 +1987,115 @@ impl hash::Hash for BitSet {
     }
 }
 
-/// An iterator for `BitSet`.
 #[derive(Clone)]
 #[stable(feature = "rust1", since = "1.0.0")]
-pub struct SetIter<'a> {
-    set: &'a BitSet,
-    next_idx: usize
+struct BlockIter<T> where T: Iterator<Item=u32> {
+    head: u32,
+    head_offset: usize,
+    tail: T,
+}
+
+impl<'a, T> BlockIter<T> where T: Iterator<Item=u32> {
+    fn from_blocks(mut blocks: T) -> BlockIter<T> {
+        let h = blocks.next().unwrap_or(0);
+        BlockIter {tail: blocks, head: h, head_offset: 0}
+    }
 }
 
 /// An iterator combining two `BitSet` iterators.
 #[derive(Clone)]
 struct TwoBitPositions<'a> {
-    set: &'a BitSet,
-    other: &'a BitSet,
+    set: Blocks<'a>,
+    other: Blocks<'a>,
     merge: fn(u32, u32) -> u32,
-    current_word: u32,
-    next_idx: usize
 }
 
+/// An iterator for `BitSet`.
+#[derive(Clone)]
+#[stable(feature = "rust1", since = "1.0.0")]
+pub struct SetIter<'a>(BlockIter<Blocks<'a>>);
 #[derive(Clone)]
 #[stable(feature = "rust1", since = "1.0.0")]
-pub struct Union<'a>(TwoBitPositions<'a>);
+pub struct Union<'a>(BlockIter<TwoBitPositions<'a>>);
 #[derive(Clone)]
 #[stable(feature = "rust1", since = "1.0.0")]
-pub struct Intersection<'a>(Take<TwoBitPositions<'a>>);
+pub struct Intersection<'a>(Take<BlockIter<TwoBitPositions<'a>>>);
 #[derive(Clone)]
 #[stable(feature = "rust1", since = "1.0.0")]
-pub struct Difference<'a>(TwoBitPositions<'a>);
+pub struct Difference<'a>(BlockIter<TwoBitPositions<'a>>);
 #[derive(Clone)]
 #[stable(feature = "rust1", since = "1.0.0")]
-pub struct SymmetricDifference<'a>(TwoBitPositions<'a>);
+pub struct SymmetricDifference<'a>(BlockIter<TwoBitPositions<'a>>);
 
 #[stable(feature = "rust1", since = "1.0.0")]
-impl<'a> Iterator for SetIter<'a> {
+impl<'a, T> Iterator for BlockIter<T> where T: Iterator<Item=u32> {
     type Item = usize;
 
     fn next(&mut self) -> Option<usize> {
-        while self.next_idx < self.set.bit_vec.len() {
-            let idx = self.next_idx;
-            self.next_idx += 1;
-
-            if self.set.contains(&idx) {
-                return Some(idx);
+        while self.head == 0 {
+            match self.tail.next() {
+                Some(w) => self.head = w,
+                None => return None
             }
+            self.head_offset += u32::BITS;
         }
 
-        return None;
+        // from the current block, isolate the
+        // LSB and subtract 1, producing k:
+        // a block with a number of set bits
+        // equal to the index of the LSB
+        let k = (self.head & (!self.head + 1)) - 1;
+        // update block, removing the LSB
+        self.head &= self.head - 1;
+        // return offset + (index of LSB)
+        Some(self.head_offset + (u32::count_ones(k) as usize))
     }
 
     #[inline]
     fn size_hint(&self) -> (usize, Option<usize>) {
-        (0, Some(self.set.bit_vec.len() - self.next_idx))
+        match self.tail.size_hint() {
+            (_, Some(h)) => (0, Some(1 + h * (u32::BITS as usize))),
+            _ => (0, None)
+        }
     }
 }
 
 #[stable(feature = "rust1", since = "1.0.0")]
 impl<'a> Iterator for TwoBitPositions<'a> {
-    type Item = usize;
-
-    fn next(&mut self) -> Option<usize> {
-        while self.next_idx < self.set.bit_vec.len() ||
-              self.next_idx < self.other.bit_vec.len() {
-            let bit_idx = self.next_idx % u32::BITS;
-            if bit_idx == 0 {
-                let s_bit_vec = &self.set.bit_vec;
-                let o_bit_vec = &self.other.bit_vec;
-                // Merging the two words is a bit of an awkward dance since
-                // one BitVec might be longer than the other
-                let word_idx = self.next_idx / u32::BITS;
-                let w1 = if word_idx < s_bit_vec.storage.len() {
-                             s_bit_vec.storage[word_idx]
-                         } else { 0 };
-                let w2 = if word_idx < o_bit_vec.storage.len() {
-                             o_bit_vec.storage[word_idx]
-                         } else { 0 };
-                self.current_word = (self.merge)(w1, w2);
-            }
-
-            self.next_idx += 1;
-            if self.current_word & (1 << bit_idx) != 0 {
-                return Some(self.next_idx - 1);
-            }
+    type Item = u32;
+
+    fn next(&mut self) -> Option<u32> {
+        match (self.set.next(), self.other.next()) {
+            (Some(a), Some(b)) => Some((self.merge)(a, b)),
+            (Some(a), None) => Some((self.merge)(a, 0)),
+            (None, Some(b)) => Some((self.merge)(0, b)),
+            _ => return None
         }
-        return None;
     }
 
     #[inline]
     fn size_hint(&self) -> (usize, Option<usize>) {
-        let cap = cmp::max(self.set.bit_vec.len(), self.other.bit_vec.len());
-        (0, Some(cap - self.next_idx))
+        let (a, au) = self.set.size_hint();
+        let (b, bu) = self.other.size_hint();
+
+        let upper = match (au, bu) {
+            (Some(au), Some(bu)) => Some(cmp::max(au, bu)),
+            _ => None
+        };
+
+        (cmp::max(a, b), upper)
     }
 }
 
 #[stable(feature = "rust1", since = "1.0.0")]
+impl<'a> Iterator for SetIter<'a> {
+    type Item = usize;
+
+    #[inline] fn next(&mut self) -> Option<usize> { self.0.next() }
+    #[inline] fn size_hint(&self) -> (usize, Option<usize>) { self.0.size_hint() }
+}
+
+#[stable(feature = "rust1", since = "1.0.0")]
 impl<'a> Iterator for Union<'a> {
     type Item = usize;