diff options
| author | Nicholas Nethercote <n.nethercote@gmail.com> | 2024-12-05 11:15:59 +1100 |
|---|---|---|
| committer | Nicholas Nethercote <n.nethercote@gmail.com> | 2024-12-05 20:07:25 +1100 |
| commit | 6ee1a7aaa02f7e7713c8b60d785610983dc69ee7 (patch) | |
| tree | c159f95a3fb0745e097dde52bece0d24cf6828be | |
| parent | dff5ce68816edaf8a8740db60a8aa735641939f7 (diff) | |
| download | rust-6ee1a7aaa02f7e7713c8b60d785610983dc69ee7.tar.gz rust-6ee1a7aaa02f7e7713c8b60d785610983dc69ee7.zip | |
Introduce `MixedBitSet`.
It just uses `BitSet` for small/medium sizes (<= 2048 bits) and `ChunkedBitSet` for larger sizes. This is good because `ChunkedBitSet` is slow and memory-hungry at smaller sizes.
| -rw-r--r-- | compiler/rustc_index/src/bit_set.rs | 155 | ||||
| -rw-r--r-- | compiler/rustc_mir_dataflow/src/framework/fmt.rs | 22 | ||||
| -rw-r--r-- | compiler/rustc_mir_dataflow/src/framework/lattice.rs | 8 | ||||
| -rw-r--r-- | compiler/rustc_mir_dataflow/src/framework/mod.rs | 18 |
4 files changed, 200 insertions, 3 deletions
diff --git a/compiler/rustc_index/src/bit_set.rs b/compiler/rustc_index/src/bit_set.rs index 8693c731dd0..41bd47ea6d0 100644 --- a/compiler/rustc_index/src/bit_set.rs +++ b/compiler/rustc_index/src/bit_set.rs @@ -410,6 +410,9 @@ impl<'a, T: Idx> Iterator for BitIter<'a, T> { /// some stretches with lots of 0s and 1s mixed in a way that causes trouble /// for `IntervalSet`. /// +/// Best used via `MixedBitSet`, rather than directly, because `MixedBitSet` +/// has better performance for small bitsets. +/// /// `T` is an index type, typically a newtyped `usize` wrapper, but it can also /// just be `usize`. /// @@ -1106,6 +1109,158 @@ where false } +/// A bitset with a mixed representation, using `BitSet` for small and medium +/// bitsets, and `ChunkedBitSet` for large bitsets, i.e. those with enough bits +/// for at least two chunks. This is a good choice for many bitsets that can +/// have large domain sizes (e.g. 5000+). +/// +/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also +/// just be `usize`. +/// +/// All operations that involve an element will panic if the element is equal +/// to or greater than the domain size. All operations that involve two bitsets +/// will panic if the bitsets have differing domain sizes. +#[derive(PartialEq, Eq)] +pub enum MixedBitSet<T> { + Small(BitSet<T>), + Large(ChunkedBitSet<T>), +} + +impl<T> MixedBitSet<T> { + pub fn domain_size(&self) -> usize { + match self { + MixedBitSet::Small(set) => set.domain_size(), + MixedBitSet::Large(set) => set.domain_size(), + } + } +} + +impl<T: Idx> MixedBitSet<T> { + #[inline] + pub fn new_empty(domain_size: usize) -> MixedBitSet<T> { + if domain_size <= CHUNK_BITS { + MixedBitSet::Small(BitSet::new_empty(domain_size)) + } else { + MixedBitSet::Large(ChunkedBitSet::new_empty(domain_size)) + } + } + + #[inline] + pub fn is_empty(&self) -> bool { + match self { + MixedBitSet::Small(set) => set.is_empty(), + MixedBitSet::Large(set) => set.is_empty(), + } + } + + #[inline] + pub fn contains(&self, elem: T) -> bool { + match self { + MixedBitSet::Small(set) => set.contains(elem), + MixedBitSet::Large(set) => set.contains(elem), + } + } + + #[inline] + pub fn insert(&mut self, elem: T) -> bool { + match self { + MixedBitSet::Small(set) => set.insert(elem), + MixedBitSet::Large(set) => set.insert(elem), + } + } + + pub fn insert_all(&mut self) { + match self { + MixedBitSet::Small(set) => set.insert_all(), + MixedBitSet::Large(set) => set.insert_all(), + } + } + + #[inline] + pub fn remove(&mut self, elem: T) -> bool { + match self { + MixedBitSet::Small(set) => set.remove(elem), + MixedBitSet::Large(set) => set.remove(elem), + } + } + + pub fn iter(&self) -> MixedBitIter<'_, T> { + match self { + MixedBitSet::Small(set) => MixedBitIter::Small(set.iter()), + MixedBitSet::Large(set) => MixedBitIter::Large(set.iter()), + } + } + + bit_relations_inherent_impls! {} +} + +impl<T> Clone for MixedBitSet<T> { + fn clone(&self) -> Self { + match self { + MixedBitSet::Small(set) => MixedBitSet::Small(set.clone()), + MixedBitSet::Large(set) => MixedBitSet::Large(set.clone()), + } + } + + /// WARNING: this implementation of clone_from may panic if the two + /// bitsets have different domain sizes. This constraint is not inherent to + /// `clone_from`, but it works with the existing call sites and allows a + /// faster implementation, which is important because this function is hot. + fn clone_from(&mut self, from: &Self) { + match (self, from) { + (MixedBitSet::Small(set), MixedBitSet::Small(from)) => set.clone_from(from), + (MixedBitSet::Large(set), MixedBitSet::Large(from)) => set.clone_from(from), + _ => panic!("MixedBitSet size mismatch"), + } + } +} + +impl<T: Idx> BitRelations<MixedBitSet<T>> for MixedBitSet<T> { + fn union(&mut self, other: &MixedBitSet<T>) -> bool { + match (self, other) { + (MixedBitSet::Small(set), MixedBitSet::Small(other)) => set.union(other), + (MixedBitSet::Large(set), MixedBitSet::Large(other)) => set.union(other), + _ => panic!("MixedBitSet size mismatch"), + } + } + + fn subtract(&mut self, other: &MixedBitSet<T>) -> bool { + match (self, other) { + (MixedBitSet::Small(set), MixedBitSet::Small(other)) => set.subtract(other), + (MixedBitSet::Large(set), MixedBitSet::Large(other)) => set.subtract(other), + _ => panic!("MixedBitSet size mismatch"), + } + } + + fn intersect(&mut self, _other: &MixedBitSet<T>) -> bool { + unimplemented!("implement if/when necessary"); + } +} + +impl<T: Idx> fmt::Debug for MixedBitSet<T> { + fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + MixedBitSet::Small(set) => set.fmt(w), + MixedBitSet::Large(set) => set.fmt(w), + } + } +} + +pub enum MixedBitIter<'a, T: Idx> { + Small(BitIter<'a, T>), + Large(ChunkedBitIter<'a, T>), +} + +impl<'a, T: Idx> Iterator for MixedBitIter<'a, T> { + type Item = T; + fn next(&mut self) -> Option<T> { + match self { + MixedBitIter::Small(iter) => iter.next(), + MixedBitIter::Large(iter) => iter.next(), + } + } +} + /// A resizable bitset type with a dense representation. /// /// `T` is an index type, typically a newtyped `usize` wrapper, but it can also diff --git a/compiler/rustc_mir_dataflow/src/framework/fmt.rs b/compiler/rustc_mir_dataflow/src/framework/fmt.rs index dc176ba2d03..186172b3b86 100644 --- a/compiler/rustc_mir_dataflow/src/framework/fmt.rs +++ b/compiler/rustc_mir_dataflow/src/framework/fmt.rs @@ -4,7 +4,7 @@ use std::fmt; use rustc_index::Idx; -use rustc_index::bit_set::{BitSet, ChunkedBitSet}; +use rustc_index::bit_set::{BitSet, ChunkedBitSet, MixedBitSet}; use super::lattice::MaybeReachable; @@ -127,6 +127,26 @@ where } } +impl<T, C> DebugWithContext<C> for MixedBitSet<T> +where + T: Idx + DebugWithContext<C>, +{ + fn fmt_with(&self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + MixedBitSet::Small(set) => set.fmt_with(ctxt, f), + MixedBitSet::Large(set) => set.fmt_with(ctxt, f), + } + } + + fn fmt_diff_with(&self, old: &Self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match (self, old) { + (MixedBitSet::Small(set), MixedBitSet::Small(old)) => set.fmt_diff_with(old, ctxt, f), + (MixedBitSet::Large(set), MixedBitSet::Large(old)) => set.fmt_diff_with(old, ctxt, f), + _ => panic!("MixedBitSet size mismatch"), + } + } +} + impl<S, C> DebugWithContext<C> for MaybeReachable<S> where S: DebugWithContext<C>, diff --git a/compiler/rustc_mir_dataflow/src/framework/lattice.rs b/compiler/rustc_mir_dataflow/src/framework/lattice.rs index e2b56aedca3..852099e2ac8 100644 --- a/compiler/rustc_mir_dataflow/src/framework/lattice.rs +++ b/compiler/rustc_mir_dataflow/src/framework/lattice.rs @@ -40,7 +40,7 @@ use std::iter; -use rustc_index::bit_set::{BitSet, ChunkedBitSet}; +use rustc_index::bit_set::{BitSet, ChunkedBitSet, MixedBitSet}; use rustc_index::{Idx, IndexVec}; use crate::framework::BitSetExt; @@ -132,6 +132,12 @@ impl<T: Idx> JoinSemiLattice for ChunkedBitSet<T> { } } +impl<T: Idx> JoinSemiLattice for MixedBitSet<T> { + fn join(&mut self, other: &Self) -> bool { + self.union(other) + } +} + /// Extends a type `T` with top and bottom elements to make it a partially ordered set in which no /// value of `T` is comparable with any other. /// diff --git a/compiler/rustc_mir_dataflow/src/framework/mod.rs b/compiler/rustc_mir_dataflow/src/framework/mod.rs index b9407882ec5..40fb22014e5 100644 --- a/compiler/rustc_mir_dataflow/src/framework/mod.rs +++ b/compiler/rustc_mir_dataflow/src/framework/mod.rs @@ -35,7 +35,7 @@ use std::cmp::Ordering; use rustc_data_structures::work_queue::WorkQueue; -use rustc_index::bit_set::{BitSet, ChunkedBitSet}; +use rustc_index::bit_set::{BitSet, ChunkedBitSet, MixedBitSet}; use rustc_index::{Idx, IndexVec}; use rustc_middle::bug; use rustc_middle::mir::{self, BasicBlock, CallReturnPlaces, Location, TerminatorEdges, traversal}; @@ -77,6 +77,12 @@ impl<T: Idx> BitSetExt<T> for ChunkedBitSet<T> { } } +impl<T: Idx> BitSetExt<T> for MixedBitSet<T> { + fn contains(&self, elem: T) -> bool { + self.contains(elem) + } +} + /// A dataflow problem with an arbitrarily complex transfer function. /// /// This trait specifies the lattice on which this analysis operates (the domain), its @@ -337,6 +343,16 @@ impl<T: Idx> GenKill<T> for ChunkedBitSet<T> { } } +impl<T: Idx> GenKill<T> for MixedBitSet<T> { + fn gen_(&mut self, elem: T) { + self.insert(elem); + } + + fn kill(&mut self, elem: T) { + self.remove(elem); + } +} + impl<T, S: GenKill<T>> GenKill<T> for MaybeReachable<S> { fn gen_(&mut self, elem: T) { match self { |
