diff options
Diffstat (limited to 'compiler/rustc_index')
| -rw-r--r-- | compiler/rustc_index/src/bit_set.rs | 385 | ||||
| -rw-r--r-- | compiler/rustc_index/src/bit_set/tests.rs | 6 | 
2 files changed, 273 insertions, 118 deletions
| diff --git a/compiler/rustc_index/src/bit_set.rs b/compiler/rustc_index/src/bit_set.rs index de6fa132ca0..aba1e938296 100644 --- a/compiler/rustc_index/src/bit_set.rs +++ b/compiler/rustc_index/src/bit_set.rs @@ -296,6 +296,111 @@ impl<T: Idx> From<GrowableBitSet<T>> for BitSet<T> { } } +impl<T> Clone for BitSet<T> { + fn clone(&self) -> Self { + BitSet { domain_size: self.domain_size, words: self.words.clone(), marker: PhantomData } + } + + fn clone_from(&mut self, from: &Self) { + self.domain_size = from.domain_size; + self.words.clone_from(&from.words); + } +} + +impl<T: Idx> fmt::Debug for BitSet<T> { + fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result { + w.debug_list().entries(self.iter()).finish() + } +} + +impl<T: Idx> ToString for BitSet<T> { + fn to_string(&self) -> String { + let mut result = String::new(); + let mut sep = '['; + + // Note: this is a little endian printout of bytes. + + // i tracks how many bits we have printed so far. + let mut i = 0; + for word in &self.words { + let mut word = *word; + for _ in 0..WORD_BYTES { + // for each byte in `word`: + let remain = self.domain_size - i; + // If less than a byte remains, then mask just that many bits. + let mask = if remain <= 8 { (1 << remain) - 1 } else { 0xFF }; + assert!(mask <= 0xFF); + let byte = word & mask; + + result.push_str(&format!("{sep}{byte:02x}")); + + if remain <= 8 { + break; + } + word >>= 8; + i += 8; + sep = '-'; + } + sep = '|'; + } + result.push(']'); + + result + } +} + +pub struct BitIter<'a, T: Idx> { + /// A copy of the current word, but with any already-visited bits cleared. + /// (This lets us use `trailing_zeros()` to find the next set bit.) When it + /// is reduced to 0, we move onto the next word. + word: Word, + + /// The offset (measured in bits) of the current word. + offset: usize, + + /// Underlying iterator over the words. + iter: slice::Iter<'a, Word>, + + marker: PhantomData<T>, +} + +impl<'a, T: Idx> BitIter<'a, T> { + #[inline] + fn new(words: &'a [Word]) -> BitIter<'a, T> { + // We initialize `word` and `offset` to degenerate values. On the first + // call to `next()` we will fall through to getting the first word from + // `iter`, which sets `word` to the first word (if there is one) and + // `offset` to 0. Doing it this way saves us from having to maintain + // additional state about whether we have started. + BitIter { + word: 0, + offset: usize::MAX - (WORD_BITS - 1), + iter: words.iter(), + marker: PhantomData, + } + } +} + +impl<'a, T: Idx> Iterator for BitIter<'a, T> { + type Item = T; + fn next(&mut self) -> Option<T> { + loop { + if self.word != 0 { + // Get the position of the next set bit in the current word, + // then clear the bit. + let bit_pos = self.word.trailing_zeros() as usize; + self.word ^= 1 << bit_pos; + return Some(T::new(bit_pos + self.offset)); + } + + // Move onto the next word. `wrapping_add()` is needed to handle + // the degenerate initial value given to `offset` in `new()`. + self.word = *self.iter.next()?; + self.offset = self.offset.wrapping_add(WORD_BITS); + } + } +} + /// A fixed-size bitset type with a partially dense, partially sparse /// representation. The bitset is broken into chunks, and chunks that are all /// zeros or all ones are represented and handled very efficiently. @@ -305,6 +410,9 @@ impl<T: Idx> From<GrowableBitSet<T>> for BitSet<T> { /// some stretches with lots of 0s and 1s mixed in a way that causes trouble /// for `IntervalSet`. /// +/// Best used via `MixedBitSet`, rather than directly, because `MixedBitSet` +/// has better performance for small bitsets. +/// /// `T` is an index type, typically a newtyped `usize` wrapper, but it can also /// just be `usize`. /// @@ -958,117 +1066,12 @@ fn sequential_update<T: Idx>( it.fold(false, |changed, elem| self_update(elem) | changed) } -impl<T> Clone for BitSet<T> { - fn clone(&self) -> Self { - BitSet { domain_size: self.domain_size, words: self.words.clone(), marker: PhantomData } - } - - fn clone_from(&mut self, from: &Self) { - self.domain_size = from.domain_size; - self.words.clone_from(&from.words); - } -} - -impl<T: Idx> fmt::Debug for BitSet<T> { - fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result { - w.debug_list().entries(self.iter()).finish() - } -} - impl<T: Idx> fmt::Debug for ChunkedBitSet<T> { fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result { w.debug_list().entries(self.iter()).finish() } } -impl<T: Idx> ToString for BitSet<T> { - fn to_string(&self) -> String { - let mut result = String::new(); - let mut sep = '['; - - // Note: this is a little endian printout of bytes. - - // i tracks how many bits we have printed so far. - let mut i = 0; - for word in &self.words { - let mut word = *word; - for _ in 0..WORD_BYTES { - // for each byte in `word`: - let remain = self.domain_size - i; - // If less than a byte remains, then mask just that many bits. - let mask = if remain <= 8 { (1 << remain) - 1 } else { 0xFF }; - assert!(mask <= 0xFF); - let byte = word & mask; - - result.push_str(&format!("{sep}{byte:02x}")); - - if remain <= 8 { - break; - } - word >>= 8; - i += 8; - sep = '-'; - } - sep = '|'; - } - result.push(']'); - - result - } -} - -pub struct BitIter<'a, T: Idx> { - /// A copy of the current word, but with any already-visited bits cleared. - /// (This lets us use `trailing_zeros()` to find the next set bit.) When it - /// is reduced to 0, we move onto the next word. - word: Word, - - /// The offset (measured in bits) of the current word. - offset: usize, - - /// Underlying iterator over the words. - iter: slice::Iter<'a, Word>, - - marker: PhantomData<T>, -} - -impl<'a, T: Idx> BitIter<'a, T> { - #[inline] - fn new(words: &'a [Word]) -> BitIter<'a, T> { - // We initialize `word` and `offset` to degenerate values. On the first - // call to `next()` we will fall through to getting the first word from - // `iter`, which sets `word` to the first word (if there is one) and - // `offset` to 0. Doing it this way saves us from having to maintain - // additional state about whether we have started. - BitIter { - word: 0, - offset: usize::MAX - (WORD_BITS - 1), - iter: words.iter(), - marker: PhantomData, - } - } -} - -impl<'a, T: Idx> Iterator for BitIter<'a, T> { - type Item = T; - fn next(&mut self) -> Option<T> { - loop { - if self.word != 0 { - // Get the position of the next set bit in the current word, - // then clear the bit. - let bit_pos = self.word.trailing_zeros() as usize; - self.word ^= 1 << bit_pos; - return Some(T::new(bit_pos + self.offset)); - } - - // Move onto the next word. `wrapping_add()` is needed to handle - // the degenerate initial value given to `offset` in `new()`. - self.word = *self.iter.next()?; - self.offset = self.offset.wrapping_add(WORD_BITS); - } - } -} - #[inline] fn bitwise<Op>(out_vec: &mut [Word], in_vec: &[Word], op: Op) -> bool where @@ -1106,6 +1109,158 @@ where false } +/// A bitset with a mixed representation, using `BitSet` for small and medium +/// bitsets, and `ChunkedBitSet` for large bitsets, i.e. those with enough bits +/// for at least two chunks. This is a good choice for many bitsets that can +/// have large domain sizes (e.g. 5000+). +/// +/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also +/// just be `usize`. +/// +/// All operations that involve an element will panic if the element is equal +/// to or greater than the domain size. All operations that involve two bitsets +/// will panic if the bitsets have differing domain sizes. +#[derive(PartialEq, Eq)] +pub enum MixedBitSet<T> { + Small(BitSet<T>), + Large(ChunkedBitSet<T>), +} + +impl<T> MixedBitSet<T> { + pub fn domain_size(&self) -> usize { + match self { + MixedBitSet::Small(set) => set.domain_size(), + MixedBitSet::Large(set) => set.domain_size(), + } + } +} + +impl<T: Idx> MixedBitSet<T> { + #[inline] + pub fn new_empty(domain_size: usize) -> MixedBitSet<T> { + if domain_size <= CHUNK_BITS { + MixedBitSet::Small(BitSet::new_empty(domain_size)) + } else { + MixedBitSet::Large(ChunkedBitSet::new_empty(domain_size)) + } + } + + #[inline] + pub fn is_empty(&self) -> bool { + match self { + MixedBitSet::Small(set) => set.is_empty(), + MixedBitSet::Large(set) => set.is_empty(), + } + } + + #[inline] + pub fn contains(&self, elem: T) -> bool { + match self { + MixedBitSet::Small(set) => set.contains(elem), + MixedBitSet::Large(set) => set.contains(elem), + } + } + + #[inline] + pub fn insert(&mut self, elem: T) -> bool { + match self { + MixedBitSet::Small(set) => set.insert(elem), + MixedBitSet::Large(set) => set.insert(elem), + } + } + + pub fn insert_all(&mut self) { + match self { + MixedBitSet::Small(set) => set.insert_all(), + MixedBitSet::Large(set) => set.insert_all(), + } + } + + #[inline] + pub fn remove(&mut self, elem: T) -> bool { + match self { + MixedBitSet::Small(set) => set.remove(elem), + MixedBitSet::Large(set) => set.remove(elem), + } + } + + pub fn iter(&self) -> MixedBitIter<'_, T> { + match self { + MixedBitSet::Small(set) => MixedBitIter::Small(set.iter()), + MixedBitSet::Large(set) => MixedBitIter::Large(set.iter()), + } + } + + bit_relations_inherent_impls! {} +} + +impl<T> Clone for MixedBitSet<T> { + fn clone(&self) -> Self { + match self { + MixedBitSet::Small(set) => MixedBitSet::Small(set.clone()), + MixedBitSet::Large(set) => MixedBitSet::Large(set.clone()), + } + } + + /// WARNING: this implementation of clone_from may panic if the two + /// bitsets have different domain sizes. This constraint is not inherent to + /// `clone_from`, but it works with the existing call sites and allows a + /// faster implementation, which is important because this function is hot. + fn clone_from(&mut self, from: &Self) { + match (self, from) { + (MixedBitSet::Small(set), MixedBitSet::Small(from)) => set.clone_from(from), + (MixedBitSet::Large(set), MixedBitSet::Large(from)) => set.clone_from(from), + _ => panic!("MixedBitSet size mismatch"), + } + } +} + +impl<T: Idx> BitRelations<MixedBitSet<T>> for MixedBitSet<T> { + fn union(&mut self, other: &MixedBitSet<T>) -> bool { + match (self, other) { + (MixedBitSet::Small(set), MixedBitSet::Small(other)) => set.union(other), + (MixedBitSet::Large(set), MixedBitSet::Large(other)) => set.union(other), + _ => panic!("MixedBitSet size mismatch"), + } + } + + fn subtract(&mut self, other: &MixedBitSet<T>) -> bool { + match (self, other) { + (MixedBitSet::Small(set), MixedBitSet::Small(other)) => set.subtract(other), + (MixedBitSet::Large(set), MixedBitSet::Large(other)) => set.subtract(other), + _ => panic!("MixedBitSet size mismatch"), + } + } + + fn intersect(&mut self, _other: &MixedBitSet<T>) -> bool { + unimplemented!("implement if/when necessary"); + } +} + +impl<T: Idx> fmt::Debug for MixedBitSet<T> { + fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + MixedBitSet::Small(set) => set.fmt(w), + MixedBitSet::Large(set) => set.fmt(w), + } + } +} + +pub enum MixedBitIter<'a, T: Idx> { + Small(BitIter<'a, T>), + Large(ChunkedBitIter<'a, T>), +} + +impl<'a, T: Idx> Iterator for MixedBitIter<'a, T> { + type Item = T; + fn next(&mut self) -> Option<T> { + match self { + MixedBitIter::Small(iter) => iter.next(), + MixedBitIter::Large(iter) => iter.next(), + } + } +} + /// A resizable bitset type with a dense representation. /// /// `T` is an index type, typically a newtyped `usize` wrapper, but it can also @@ -1374,7 +1529,7 @@ impl<R: Idx, C: Idx> fmt::Debug for BitMatrix<R, C> { /// sparse representation. /// /// Initially, every row has no explicit representation. If any bit within a -/// row is set, the entire row is instantiated as `Some(<ChunkedBitSet>)`. +/// row is set, the entire row is instantiated as `Some(<BitSet>)`. /// Furthermore, any previously uninstantiated rows prior to it will be /// instantiated as `None`. Those prior rows may themselves become fully /// instantiated later on if any of their bits are set. @@ -1388,7 +1543,7 @@ where C: Idx, { num_columns: usize, - rows: IndexVec<R, Option<ChunkedBitSet<C>>>, + rows: IndexVec<R, Option<BitSet<C>>>, } impl<R: Idx, C: Idx> SparseBitMatrix<R, C> { @@ -1397,10 +1552,10 @@ impl<R: Idx, C: Idx> SparseBitMatrix<R, C> { Self { num_columns, rows: IndexVec::new() } } - fn ensure_row(&mut self, row: R) -> &mut ChunkedBitSet<C> { - // Instantiate any missing rows up to and including row `row` with an empty ChunkedBitSet. - // Then replace row `row` with a full ChunkedBitSet if necessary. - self.rows.get_or_insert_with(row, || ChunkedBitSet::new_empty(self.num_columns)) + fn ensure_row(&mut self, row: R) -> &mut BitSet<C> { + // Instantiate any missing rows up to and including row `row` with an empty `BitSet`. + // Then replace row `row` with a full `BitSet` if necessary. + self.rows.get_or_insert_with(row, || BitSet::new_empty(self.num_columns)) } /// Sets the cell at `(row, column)` to true. Put another way, insert @@ -1474,7 +1629,7 @@ impl<R: Idx, C: Idx> SparseBitMatrix<R, C> { self.row(row).into_iter().flat_map(|r| r.iter()) } - pub fn row(&self, row: R) -> Option<&ChunkedBitSet<C>> { + pub fn row(&self, row: R) -> Option<&BitSet<C>> { self.rows.get(row)?.as_ref() } @@ -1484,7 +1639,7 @@ impl<R: Idx, C: Idx> SparseBitMatrix<R, C> { /// Returns true if the row was changed. pub fn intersect_row<Set>(&mut self, row: R, set: &Set) -> bool where - ChunkedBitSet<C>: BitRelations<Set>, + BitSet<C>: BitRelations<Set>, { match self.rows.get_mut(row) { Some(Some(row)) => row.intersect(set), @@ -1498,7 +1653,7 @@ impl<R: Idx, C: Idx> SparseBitMatrix<R, C> { /// Returns true if the row was changed. pub fn subtract_row<Set>(&mut self, row: R, set: &Set) -> bool where - ChunkedBitSet<C>: BitRelations<Set>, + BitSet<C>: BitRelations<Set>, { match self.rows.get_mut(row) { Some(Some(row)) => row.subtract(set), @@ -1512,7 +1667,7 @@ impl<R: Idx, C: Idx> SparseBitMatrix<R, C> { /// Returns true if the row was changed. pub fn union_row<Set>(&mut self, row: R, set: &Set) -> bool where - ChunkedBitSet<C>: BitRelations<Set>, + BitSet<C>: BitRelations<Set>, { self.ensure_row(row).union(set) } diff --git a/compiler/rustc_index/src/bit_set/tests.rs b/compiler/rustc_index/src/bit_set/tests.rs index 3f9198ce37f..f6142323979 100644 --- a/compiler/rustc_index/src/bit_set/tests.rs +++ b/compiler/rustc_index/src/bit_set/tests.rs @@ -503,15 +503,15 @@ fn sparse_matrix_operations() { matrix.insert(2, 99); matrix.insert(4, 0); - let mut disjoint: ChunkedBitSet<usize> = ChunkedBitSet::new_empty(100); + let mut disjoint: BitSet<usize> = BitSet::new_empty(100); disjoint.insert(33); - let mut superset = ChunkedBitSet::new_empty(100); + let mut superset = BitSet::new_empty(100); superset.insert(22); superset.insert(75); superset.insert(33); - let mut subset = ChunkedBitSet::new_empty(100); + let mut subset = BitSet::new_empty(100); subset.insert(22); // SparseBitMatrix::remove | 
