diff options
| author | Nicholas Nethercote <n.nethercote@gmail.com> | 2022-02-10 00:47:48 +1100 | 
|---|---|---|
| committer | Nicholas Nethercote <n.nethercote@gmail.com> | 2022-02-23 10:18:49 +1100 | 
| commit | 36b495f3cf23a1f235482ce7f81f0f4be614bb85 (patch) | |
| tree | bb651118e987cbe7467f088e84811ae93a676954 /compiler/rustc_index/src | |
| parent | 523a1b1d388bfe82a5d0540b876d9428b6dccc9c (diff) | |
| download | rust-36b495f3cf23a1f235482ce7f81f0f4be614bb85.tar.gz rust-36b495f3cf23a1f235482ce7f81f0f4be614bb85.zip | |
Introduce `ChunkedBitSet` and use it for some dataflow analyses.
This reduces peak memory usage significantly for some programs with very large functions, such as: - `keccak`, `unicode_normalization`, and `match-stress-enum`, from the `rustc-perf` benchmark suite; - `http-0.2.6` from crates.io. The new type is used in the analyses where the bitsets can get huge (e.g. 10s of thousands of bits): `MaybeInitializedPlaces`, `MaybeUninitializedPlaces`, and `EverInitializedPlaces`. Some refactoring was required in `rustc_mir_dataflow`. All existing analysis domains are either `BitSet` or a trivial wrapper around `BitSet`, and access in a few places is done via `Borrow<BitSet>` or `BorrowMut<BitSet>`. Now that some of these domains are `ClusterBitSet`, that no longer works. So this commit replaces the `Borrow`/`BorrowMut` usage with a new trait `BitSetExt` containing the needed bitset operations. The impls just forward these to the underlying bitset type. This required fiddling with trait bounds in a few places. The commit also: - Moves `static_assert_size` from `rustc_data_structures` to `rustc_index` so it can be used in the latter; the former now re-exports it so existing users are unaffected. - Factors out some common "clear excess bits in the final word" functionality in `bit_set.rs`. - Uses `fill` in a few places instead of loops.
Diffstat (limited to 'compiler/rustc_index/src')
| -rw-r--r-- | compiler/rustc_index/src/bit_set.rs | 498 | ||||
| -rw-r--r-- | compiler/rustc_index/src/bit_set/tests.rs | 195 | ||||
| -rw-r--r-- | compiler/rustc_index/src/lib.rs | 12 | 
3 files changed, 677 insertions, 28 deletions
| diff --git a/compiler/rustc_index/src/bit_set.rs b/compiler/rustc_index/src/bit_set.rs index 7f376c5fbe5..12bde029494 100644 --- a/compiler/rustc_index/src/bit_set.rs +++ b/compiler/rustc_index/src/bit_set.rs @@ -5,16 +5,37 @@ use std::iter; use std::marker::PhantomData; use std::mem; use std::ops::{BitAnd, BitAndAssign, BitOrAssign, Bound, Not, Range, RangeBounds, Shl}; +use std::rc::Rc; use std::slice; use rustc_macros::{Decodable, Encodable}; +use Chunk::*; + #[cfg(test)] mod tests; -pub type Word = u64; -pub const WORD_BYTES: usize = mem::size_of::<Word>(); -pub const WORD_BITS: usize = WORD_BYTES * 8; +type Word = u64; +const WORD_BYTES: usize = mem::size_of::<Word>(); +const WORD_BITS: usize = WORD_BYTES * 8; + +// The choice of chunk size has some trade-offs. +// +// A big chunk size tends to favour cases where many large `ChunkedBitSet`s are +// present, because they require fewer `Chunk`s, reducing the number of +// allocations and reducing peak memory usage. Also, fewer chunk operations are +// required, though more of them might be `Mixed`. +// +// A small chunk size tends to favour cases where many small `ChunkedBitSet`s +// are present, because less space is wasted at the end of the final chunk (if +// it's not full). +const CHUNK_WORDS: usize = 32; +const CHUNK_BITS: usize = CHUNK_WORDS * WORD_BITS; // 2048 bits + +/// ChunkSize is small to keep `Chunk` small. The static assertion ensures it's +/// not too small. +type ChunkSize = u16; +const _: () = assert!(CHUNK_BITS <= ChunkSize::MAX as usize); pub trait BitRelations<Rhs> { fn union(&mut self, other: &Rhs) -> bool; @@ -121,19 +142,12 @@ impl<T: Idx> BitSet<T> { /// Clear all elements. #[inline] pub fn clear(&mut self) { - for word in &mut self.words { - *word = 0; - } + self.words.fill(0); } /// Clear excess bits in the final word. fn clear_excess_bits(&mut self) { - let num_bits_in_final_word = self.domain_size % WORD_BITS; - if num_bits_in_final_word > 0 { - let mask = (1 << num_bits_in_final_word) - 1; - let final_word_idx = self.words.len() - 1; - self.words[final_word_idx] &= mask; - } + clear_excess_bits_in_final_word(self.domain_size, &mut self.words); } /// Count the number of set bits in the set. @@ -203,9 +217,7 @@ impl<T: Idx> BitSet<T> { /// Sets all bits to true. pub fn insert_all(&mut self) { - for word in &mut self.words { - *word = !0; - } + self.words.fill(!0); self.clear_excess_bits(); } @@ -328,6 +340,407 @@ impl<T: Idx> BitRelations<BitSet<T>> for BitSet<T> { } } +/// A fixed-size bitset type with a partially dense, partially sparse +/// representation. The bitset is broken into chunks, and chunks that are all +/// zeros or all ones are represented and handled very efficiently. +/// +/// This type is especially efficient for sets that typically have a large +/// `domain_size` with significant stretches of all zeros or all ones, and also +/// some stretches with lots of 0s and 1s mixed in a way that causes trouble +/// for `IntervalSet`. +/// +/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also +/// just be `usize`. +/// +/// All operations that involve an element will panic if the element is equal +/// to or greater than the domain size. All operations that involve two bitsets +/// will panic if the bitsets have differing domain sizes. +#[derive(Debug, PartialEq, Eq)] +pub struct ChunkedBitSet<T> { + domain_size: usize, + + /// The chunks. Each one contains exactly CHUNK_BITS values, except the + /// last one which contains 1..=CHUNK_BITS values. + chunks: Box<[Chunk]>, + + marker: PhantomData<T>, +} + +// Note: the chunk domain size is duplicated in each variant. This is a bit +// inconvenient, but it allows the type size to be smaller than if we had an +// outer struct containing a chunk domain size plus the `Chunk`, because the +// compiler can place the chunk domain size after the tag. +#[derive(Clone, Debug, PartialEq, Eq)] +enum Chunk { + /// A chunk that is all zeros; we don't represent the zeros explicitly. + Zeros(ChunkSize), + + /// A chunk that is all ones; we don't represent the ones explicitly. + Ones(ChunkSize), + + /// A chunk that has a mix of zeros and ones, which are represented + /// explicitly and densely. It never has all zeros or all ones. + /// + /// If this is the final chunk there may be excess, unused words. This + /// turns out to be both simpler and have better performance than + /// allocating the minimum number of words, largely because we avoid having + /// to store the length, which would make this type larger. These excess + /// words are always be zero, as are any excess bits in the final in-use + /// word. + /// + /// The second field is the count of 1s set in the chunk, and must satisfy + /// `0 < count < chunk_domain_size`. + /// + /// The words are within an `Rc` because it's surprisingly common to + /// duplicate an entire chunk, e.g. in `ChunkedBitSet::clone_from()`, or + /// when a `Mixed` chunk is union'd into a `Zeros` chunk. When we do need + /// to modify a chunk we use `Rc::make_mut`. + Mixed(ChunkSize, ChunkSize, Rc<[Word; CHUNK_WORDS]>), +} + +// This type is used a lot. Make sure it doesn't unintentionally get bigger. +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +crate::static_assert_size!(Chunk, 16); + +impl<T> ChunkedBitSet<T> { + pub fn domain_size(&self) -> usize { + self.domain_size + } + + #[cfg(test)] + fn assert_valid(&self) { + if self.domain_size == 0 { + assert!(self.chunks.is_empty()); + return; + } + + assert!((self.chunks.len() - 1) * CHUNK_BITS <= self.domain_size); + assert!(self.chunks.len() * CHUNK_BITS >= self.domain_size); + for chunk in self.chunks.iter() { + chunk.assert_valid(); + } + } +} + +impl<T: Idx> ChunkedBitSet<T> { + /// Creates a new bitset with a given `domain_size` and chunk kind. + fn new(domain_size: usize, is_empty: bool) -> Self { + let chunks = if domain_size == 0 { + Box::new([]) + } else { + // All the chunks have a chunk_domain_size of `CHUNK_BITS` except + // the final one. + let final_chunk_domain_size = { + let n = domain_size % CHUNK_BITS; + if n == 0 { CHUNK_BITS } else { n } + }; + let mut chunks = + vec![Chunk::new(CHUNK_BITS, is_empty); num_chunks(domain_size)].into_boxed_slice(); + *chunks.last_mut().unwrap() = Chunk::new(final_chunk_domain_size, is_empty); + chunks + }; + ChunkedBitSet { domain_size, chunks, marker: PhantomData } + } + + /// Creates a new, empty bitset with a given `domain_size`. + #[inline] + pub fn new_empty(domain_size: usize) -> Self { + ChunkedBitSet::new(domain_size, /* is_empty */ true) + } + + /// Creates a new, filled bitset with a given `domain_size`. + #[inline] + pub fn new_filled(domain_size: usize) -> Self { + ChunkedBitSet::new(domain_size, /* is_empty */ false) + } + + #[cfg(test)] + fn chunks(&self) -> &[Chunk] { + &self.chunks + } + + /// Count the number of bits in the set. + pub fn count(&self) -> usize { + self.chunks.iter().map(|chunk| chunk.count()).sum() + } + + /// Returns `true` if `self` contains `elem`. + #[inline] + pub fn contains(&self, elem: T) -> bool { + assert!(elem.index() < self.domain_size); + let chunk = &self.chunks[chunk_index(elem)]; + match &chunk { + Zeros(_) => false, + Ones(_) => true, + Mixed(_, _, words) => { + let (word_index, mask) = chunk_word_index_and_mask(elem); + (words[word_index] & mask) != 0 + } + } + } + + /// Insert `elem`. Returns whether the set has changed. + pub fn insert(&mut self, elem: T) -> bool { + assert!(elem.index() < self.domain_size); + let chunk_index = chunk_index(elem); + let chunk = &mut self.chunks[chunk_index]; + match *chunk { + Zeros(chunk_domain_size) => { + if chunk_domain_size > 1 { + // We take some effort to avoid copying the words. + let words = Rc::<[Word; CHUNK_WORDS]>::new_zeroed(); + // SAFETY: `words` can safely be all zeroes. + let mut words = unsafe { words.assume_init() }; + let words_ref = Rc::get_mut(&mut words).unwrap(); + + let (word_index, mask) = chunk_word_index_and_mask(elem); + words_ref[word_index] |= mask; + *chunk = Mixed(chunk_domain_size, 1, words); + } else { + *chunk = Ones(chunk_domain_size); + } + true + } + Ones(_) => false, + Mixed(chunk_domain_size, ref mut count, ref mut words) => { + // We skip all the work if the bit is already set. + let (word_index, mask) = chunk_word_index_and_mask(elem); + if (words[word_index] & mask) == 0 { + *count += 1; + if *count < chunk_domain_size { + let words = Rc::make_mut(words); + words[word_index] |= mask; + } else { + *chunk = Ones(chunk_domain_size); + } + true + } else { + false + } + } + } + } + + /// Sets all bits to true. + pub fn insert_all(&mut self) { + for chunk in self.chunks.iter_mut() { + *chunk = match *chunk { + Zeros(chunk_domain_size) + | Ones(chunk_domain_size) + | Mixed(chunk_domain_size, ..) => Ones(chunk_domain_size), + } + } + } + + /// Returns `true` if the set has changed. + pub fn remove(&mut self, elem: T) -> bool { + assert!(elem.index() < self.domain_size); + let chunk_index = chunk_index(elem); + let chunk = &mut self.chunks[chunk_index]; + match *chunk { + Zeros(_) => false, + Ones(chunk_domain_size) => { + if chunk_domain_size > 1 { + // We take some effort to avoid copying the words. + let words = Rc::<[Word; CHUNK_WORDS]>::new_zeroed(); + // SAFETY: `words` can safely be all zeroes. + let mut words = unsafe { words.assume_init() }; + let words_ref = Rc::get_mut(&mut words).unwrap(); + + // Set only the bits in use. + let num_words = num_words(chunk_domain_size as usize); + words_ref[..num_words].fill(!0); + clear_excess_bits_in_final_word( + chunk_domain_size as usize, + &mut words_ref[..num_words], + ); + let (word_index, mask) = chunk_word_index_and_mask(elem); + words_ref[word_index] &= !mask; + *chunk = Mixed(chunk_domain_size, chunk_domain_size - 1, words); + } else { + *chunk = Zeros(chunk_domain_size); + } + true + } + Mixed(chunk_domain_size, ref mut count, ref mut words) => { + // We skip all the work if the bit is already clear. + let (word_index, mask) = chunk_word_index_and_mask(elem); + if (words[word_index] & mask) != 0 { + *count -= 1; + if *count > 0 { + let words = Rc::make_mut(words); + words[word_index] &= !mask; + } else { + *chunk = Zeros(chunk_domain_size); + } + true + } else { + false + } + } + } + } + + bit_relations_inherent_impls! {} +} + +impl<T: Idx> BitRelations<ChunkedBitSet<T>> for ChunkedBitSet<T> { + fn union(&mut self, other: &ChunkedBitSet<T>) -> bool { + assert_eq!(self.domain_size, other.domain_size); + debug_assert_eq!(self.chunks.len(), other.chunks.len()); + + let mut changed = false; + for (mut self_chunk, other_chunk) in self.chunks.iter_mut().zip(other.chunks.iter()) { + match (&mut self_chunk, &other_chunk) { + (_, Zeros(_)) | (Ones(_), _) => {} + (Zeros(self_chunk_domain_size), Ones(other_chunk_domain_size)) + | (Mixed(self_chunk_domain_size, ..), Ones(other_chunk_domain_size)) + | (Zeros(self_chunk_domain_size), Mixed(other_chunk_domain_size, ..)) => { + // `other_chunk` fully overwrites `self_chunk` + debug_assert_eq!(self_chunk_domain_size, other_chunk_domain_size); + *self_chunk = other_chunk.clone(); + changed = true; + } + ( + Mixed( + self_chunk_domain_size, + ref mut self_chunk_count, + ref mut self_chunk_words, + ), + Mixed(_other_chunk_domain_size, _other_chunk_count, other_chunk_words), + ) => { + // First check if the operation would change + // `self_chunk.words`. If not, we can avoid allocating some + // words, and this happens often enough that it's a + // performance win. Also, we only need to operate on the + // in-use words, hence the slicing. + let op = |a, b| a | b; + let num_words = num_words(*self_chunk_domain_size as usize); + if bitwise_changes( + &self_chunk_words[0..num_words], + &other_chunk_words[0..num_words], + op, + ) { + let self_chunk_words = Rc::make_mut(self_chunk_words); + let has_changed = bitwise( + &mut self_chunk_words[0..num_words], + &other_chunk_words[0..num_words], + op, + ); + debug_assert!(has_changed); + *self_chunk_count = self_chunk_words[0..num_words] + .iter() + .map(|w| w.count_ones() as ChunkSize) + .sum(); + if *self_chunk_count == *self_chunk_domain_size { + *self_chunk = Ones(*self_chunk_domain_size); + } + changed = true; + } + } + } + } + changed + } + + fn subtract(&mut self, _other: &ChunkedBitSet<T>) -> bool { + unimplemented!("implement if/when necessary"); + } + + fn intersect(&mut self, _other: &ChunkedBitSet<T>) -> bool { + unimplemented!("implement if/when necessary"); + } +} + +impl<T: Idx> BitRelations<HybridBitSet<T>> for ChunkedBitSet<T> { + fn union(&mut self, other: &HybridBitSet<T>) -> bool { + // FIXME: this is slow if `other` is dense, and could easily be + // improved, but it hasn't been a problem in practice so far. + assert_eq!(self.domain_size, other.domain_size()); + sequential_update(|elem| self.insert(elem), other.iter()) + } + + fn subtract(&mut self, other: &HybridBitSet<T>) -> bool { + // FIXME: this is slow if `other` is dense, and could easily be + // improved, but it hasn't been a problem in practice so far. + assert_eq!(self.domain_size, other.domain_size()); + sequential_update(|elem| self.remove(elem), other.iter()) + } + + fn intersect(&mut self, _other: &HybridBitSet<T>) -> bool { + unimplemented!("implement if/when necessary"); + } +} + +impl<T> Clone for ChunkedBitSet<T> { + fn clone(&self) -> Self { + ChunkedBitSet { + domain_size: self.domain_size, + chunks: self.chunks.clone(), + marker: PhantomData, + } + } + + /// WARNING: this implementation of clone_from will panic if the two + /// bitsets have different domain sizes. This constraint is not inherent to + /// `clone_from`, but it works with the existing call sites and allows a + /// faster implementation, which is important because this function is hot. + fn clone_from(&mut self, from: &Self) { + assert_eq!(self.domain_size, from.domain_size); + debug_assert_eq!(self.chunks.len(), from.chunks.len()); + + self.chunks.clone_from(&from.chunks) + } +} + +impl Chunk { + #[cfg(test)] + fn assert_valid(&self) { + match *self { + Zeros(chunk_domain_size) | Ones(chunk_domain_size) => { + assert!(chunk_domain_size as usize <= CHUNK_BITS); + } + Mixed(chunk_domain_size, count, ref words) => { + assert!(chunk_domain_size as usize <= CHUNK_BITS); + assert!(0 < count && count < chunk_domain_size); + + // Check the number of set bits matches `count`. + assert_eq!( + words.iter().map(|w| w.count_ones() as ChunkSize).sum::<ChunkSize>(), + count + ); + + // Check the not-in-use words are all zeroed. + let num_words = num_words(chunk_domain_size as usize); + if num_words < CHUNK_WORDS { + assert_eq!( + words[num_words..] + .iter() + .map(|w| w.count_ones() as ChunkSize) + .sum::<ChunkSize>(), + 0 + ); + } + } + } + } + + fn new(chunk_domain_size: usize, is_empty: bool) -> Self { + debug_assert!(chunk_domain_size <= CHUNK_BITS); + let chunk_domain_size = chunk_domain_size as ChunkSize; + if is_empty { Zeros(chunk_domain_size) } else { Ones(chunk_domain_size) } + } + + /// Count the number of 1s in the chunk. + fn count(&self) -> usize { + match *self { + Zeros(_) => 0, + Ones(chunk_domain_size) => chunk_domain_size as usize, + Mixed(_, count, _) => count as usize, + } + } +} + // Applies a function to mutate a bitset, and returns true if any // of the applications return true fn sequential_update<T: Idx>( @@ -642,6 +1055,23 @@ where changed != 0 } +/// Does this bitwise operation change `out_vec`? +#[inline] +fn bitwise_changes<Op>(out_vec: &[Word], in_vec: &[Word], op: Op) -> bool +where + Op: Fn(Word, Word) -> Word, +{ + assert_eq!(out_vec.len(), in_vec.len()); + for (out_elem, in_elem) in iter::zip(out_vec, in_vec) { + let old_val = *out_elem; + let new_val = op(old_val, *in_elem); + if old_val != new_val { + return true; + } + } + false +} + const SPARSE_MAX: usize = 8; /// A fixed-size bitset type with a sparse representation and a maximum of @@ -1136,18 +1566,7 @@ impl<R: Idx, C: Idx> BitMatrix<R, C> { for index in start..end { words[index] = !0; } - self.clear_excess_bits(row); - } - - /// Clear excess bits in the final word of the row. - fn clear_excess_bits(&mut self, row: R) { - let num_bits_in_final_word = self.num_columns % WORD_BITS; - if num_bits_in_final_word > 0 { - let mask = (1 << num_bits_in_final_word) - 1; - let (_, end) = self.range(row); - let final_word_idx = end - 1; - self.words[final_word_idx] &= mask; - } + clear_excess_bits_in_final_word(self.num_columns, &mut self.words[..end]); } /// Gets a slice of the underlying words. @@ -1340,6 +1759,12 @@ fn num_words<T: Idx>(domain_size: T) -> usize { } #[inline] +fn num_chunks<T: Idx>(domain_size: T) -> usize { + assert!(domain_size.index() > 0); + (domain_size.index() + CHUNK_BITS - 1) / CHUNK_BITS +} + +#[inline] fn word_index_and_mask<T: Idx>(elem: T) -> (usize, Word) { let elem = elem.index(); let word_index = elem / WORD_BITS; @@ -1348,6 +1773,25 @@ fn word_index_and_mask<T: Idx>(elem: T) -> (usize, Word) { } #[inline] +fn chunk_index<T: Idx>(elem: T) -> usize { + elem.index() / CHUNK_BITS +} + +#[inline] +fn chunk_word_index_and_mask<T: Idx>(elem: T) -> (usize, Word) { + let chunk_elem = elem.index() % CHUNK_BITS; + word_index_and_mask(chunk_elem) +} + +fn clear_excess_bits_in_final_word(domain_size: usize, words: &mut [Word]) { + let num_bits_in_final_word = domain_size % WORD_BITS; + if num_bits_in_final_word > 0 { + let mask = (1 << num_bits_in_final_word) - 1; + words[words.len() - 1] &= mask; + } +} + +#[inline] fn max_bit(word: Word) -> usize { WORD_BITS - 1 - word.leading_zeros() as usize } diff --git a/compiler/rustc_index/src/bit_set/tests.rs b/compiler/rustc_index/src/bit_set/tests.rs index e2b07305c96..eec7dab5189 100644 --- a/compiler/rustc_index/src/bit_set/tests.rs +++ b/compiler/rustc_index/src/bit_set/tests.rs @@ -148,6 +148,201 @@ fn hybrid_bitset() { } #[test] +fn chunked_bitset() { + let mut b0 = ChunkedBitSet::<usize>::new_empty(0); + let b0b = b0.clone(); + assert_eq!(b0, ChunkedBitSet { domain_size: 0, chunks: Box::new([]), marker: PhantomData }); + + // There are no valid insert/remove/contains operations on a 0-domain + // bitset, but we can test `union`. + b0.assert_valid(); + assert!(!b0.union(&b0b)); + assert_eq!(b0.chunks(), vec![]); + assert_eq!(b0.count(), 0); + b0.assert_valid(); + + //----------------------------------------------------------------------- + + let mut b1 = ChunkedBitSet::<usize>::new_empty(1); + assert_eq!( + b1, + ChunkedBitSet { domain_size: 1, chunks: Box::new([Zeros(1)]), marker: PhantomData } + ); + + b1.assert_valid(); + assert!(!b1.contains(0)); + assert_eq!(b1.count(), 0); + assert!(b1.insert(0)); + assert!(b1.contains(0)); + assert_eq!(b1.count(), 1); + assert_eq!(b1.chunks(), [Ones(1)]); + assert!(!b1.insert(0)); + assert!(b1.remove(0)); + assert!(!b1.contains(0)); + assert_eq!(b1.count(), 0); + assert_eq!(b1.chunks(), [Zeros(1)]); + b1.assert_valid(); + + //----------------------------------------------------------------------- + + let mut b100 = ChunkedBitSet::<usize>::new_filled(100); + assert_eq!( + b100, + ChunkedBitSet { domain_size: 100, chunks: Box::new([Ones(100)]), marker: PhantomData } + ); + + b100.assert_valid(); + for i in 0..100 { + assert!(b100.contains(i)); + } + assert_eq!(b100.count(), 100); + assert!(b100.remove(3)); + assert!(b100.insert(3)); + assert_eq!(b100.chunks(), vec![Ones(100)]); + assert!( + b100.remove(20) && b100.remove(30) && b100.remove(40) && b100.remove(99) && b100.insert(30) + ); + assert_eq!(b100.count(), 97); + assert!(!b100.contains(20) && b100.contains(30) && !b100.contains(99) && b100.contains(50)); + assert_eq!( + b100.chunks(), + vec![Mixed( + 100, + 97, + #[rustfmt::skip] + Rc::new([ + 0b11111111_11111111_11111110_11111111_11111111_11101111_11111111_11111111, + 0b00000000_00000000_00000000_00000111_11111111_11111111_11111111_11111111, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, + ]) + )], + ); + b100.assert_valid(); + let mut num_removed = 0; + for i in 0..100 { + if b100.remove(i) { + num_removed += 1; + } + } + assert_eq!(num_removed, 97); + assert_eq!(b100.chunks(), vec![Zeros(100)]); + b100.assert_valid(); + + //----------------------------------------------------------------------- + + let mut b2548 = ChunkedBitSet::<usize>::new_empty(2548); + assert_eq!( + b2548, + ChunkedBitSet { + domain_size: 2548, + chunks: Box::new([Zeros(2048), Zeros(500)]), + marker: PhantomData, + } + ); + + b2548.assert_valid(); + b2548.insert(14); + b2548.remove(14); + assert_eq!(b2548.chunks(), vec![Zeros(2048), Zeros(500)]); + b2548.insert_all(); + for i in 0..2548 { + assert!(b2548.contains(i)); + } + assert_eq!(b2548.count(), 2548); + assert_eq!(b2548.chunks(), vec![Ones(2048), Ones(500)]); + b2548.assert_valid(); + + //----------------------------------------------------------------------- + + let mut b4096 = ChunkedBitSet::<usize>::new_empty(4096); + assert_eq!( + b4096, + ChunkedBitSet { + domain_size: 4096, + chunks: Box::new([Zeros(2048), Zeros(2048)]), + marker: PhantomData, + } + ); + + b4096.assert_valid(); + for i in 0..4096 { + assert!(!b4096.contains(i)); + } + assert!(b4096.insert(0) && b4096.insert(4095) && !b4096.insert(4095)); + assert!( + b4096.contains(0) && !b4096.contains(2047) && !b4096.contains(2048) && b4096.contains(4095) + ); + assert_eq!( + b4096.chunks(), + #[rustfmt::skip] + vec![ + Mixed(2048, 1, Rc::new([ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + ])), + Mixed(2048, 1, Rc::new([ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x8000_0000_0000_0000 + ])), + ], + ); + assert_eq!(b4096.count(), 2); + b4096.assert_valid(); + + //----------------------------------------------------------------------- + + let mut b10000 = ChunkedBitSet::<usize>::new_empty(10000); + assert_eq!( + b10000, + ChunkedBitSet { + domain_size: 10000, + chunks: Box::new([Zeros(2048), Zeros(2048), Zeros(2048), Zeros(2048), Zeros(1808),]), + marker: PhantomData, + } + ); + + b10000.assert_valid(); + assert!(b10000.insert(3000) && b10000.insert(5000)); + assert_eq!( + b10000.chunks(), + #[rustfmt::skip] + vec![ + Zeros(2048), + Mixed(2048, 1, Rc::new([ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x0100_0000_0000_0000, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ])), + Mixed(2048, 1, Rc::new([ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x0100, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ])), + Zeros(2048), + Zeros(1808), + ], + ); + let mut b10000b = ChunkedBitSet::<usize>::new_empty(10000); + b10000b.clone_from(&b10000); + assert_eq!(b10000, b10000b); + for i in 6000..7000 { + b10000b.insert(i); + } + assert_eq!(b10000b.count(), 1002); + b10000b.assert_valid(); + b10000b.clone_from(&b10000); + assert_eq!(b10000b.count(), 2); + for i in 2000..8000 { + b10000b.insert(i); + } + b10000.union(&b10000b); + assert_eq!(b10000.count(), 6000); + b10000.union(&b10000b); + assert_eq!(b10000.count(), 6000); + b10000.assert_valid(); + b10000b.assert_valid(); +} + +#[test] fn grow() { let mut set: GrowableBitSet<usize> = GrowableBitSet::with_capacity(65); for index in 0..65 { diff --git a/compiler/rustc_index/src/lib.rs b/compiler/rustc_index/src/lib.rs index 7919e409253..0cfb7bd1106 100644 --- a/compiler/rustc_index/src/lib.rs +++ b/compiler/rustc_index/src/lib.rs @@ -1,11 +1,21 @@ #![feature(allow_internal_unstable)] #![feature(bench_black_box)] #![feature(extend_one)] +#![feature(let_else)] #![feature(min_specialization)] +#![feature(new_uninit)] #![feature(step_trait)] +#![feature(stmt_expr_attributes)] #![feature(test)] -#![feature(let_else)] pub mod bit_set; pub mod interval; pub mod vec; + +/// Type size assertion. The first argument is a type and the second argument is its expected size. +#[macro_export] +macro_rules! static_assert_size { + ($ty:ty, $size:expr) => { + const _: [(); $size] = [(); ::std::mem::size_of::<$ty>()]; + }; +} | 
