about summary refs log tree commit diff
diff options
context:
space:
mode:
authorNicholas Nethercote <n.nethercote@gmail.com>2024-12-05 11:15:59 +1100
committerNicholas Nethercote <n.nethercote@gmail.com>2024-12-05 20:07:25 +1100
commit6ee1a7aaa02f7e7713c8b60d785610983dc69ee7 (patch)
treec159f95a3fb0745e097dde52bece0d24cf6828be
parentdff5ce68816edaf8a8740db60a8aa735641939f7 (diff)
downloadrust-6ee1a7aaa02f7e7713c8b60d785610983dc69ee7.tar.gz
rust-6ee1a7aaa02f7e7713c8b60d785610983dc69ee7.zip
Introduce `MixedBitSet`.
It just uses `BitSet` for small/medium sizes (<= 2048 bits) and
`ChunkedBitSet` for larger sizes. This is good because `ChunkedBitSet`
is slow and memory-hungry at smaller sizes.
-rw-r--r--compiler/rustc_index/src/bit_set.rs155
-rw-r--r--compiler/rustc_mir_dataflow/src/framework/fmt.rs22
-rw-r--r--compiler/rustc_mir_dataflow/src/framework/lattice.rs8
-rw-r--r--compiler/rustc_mir_dataflow/src/framework/mod.rs18
4 files changed, 200 insertions, 3 deletions
diff --git a/compiler/rustc_index/src/bit_set.rs b/compiler/rustc_index/src/bit_set.rs
index 8693c731dd0..41bd47ea6d0 100644
--- a/compiler/rustc_index/src/bit_set.rs
+++ b/compiler/rustc_index/src/bit_set.rs
@@ -410,6 +410,9 @@ impl<'a, T: Idx> Iterator for BitIter<'a, T> {
 /// some stretches with lots of 0s and 1s mixed in a way that causes trouble
 /// for `IntervalSet`.
 ///
+/// Best used via `MixedBitSet`, rather than directly, because `MixedBitSet`
+/// has better performance for small bitsets.
+///
 /// `T` is an index type, typically a newtyped `usize` wrapper, but it can also
 /// just be `usize`.
 ///
@@ -1106,6 +1109,158 @@ where
     false
 }
 
+/// A bitset with a mixed representation, using `BitSet` for small and medium
+/// bitsets, and `ChunkedBitSet` for large bitsets, i.e. those with enough bits
+/// for at least two chunks. This is a good choice for many bitsets that can
+/// have large domain sizes (e.g. 5000+).
+///
+/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also
+/// just be `usize`.
+///
+/// All operations that involve an element will panic if the element is equal
+/// to or greater than the domain size. All operations that involve two bitsets
+/// will panic if the bitsets have differing domain sizes.
+#[derive(PartialEq, Eq)]
+pub enum MixedBitSet<T> {
+    Small(BitSet<T>),
+    Large(ChunkedBitSet<T>),
+}
+
+impl<T> MixedBitSet<T> {
+    pub fn domain_size(&self) -> usize {
+        match self {
+            MixedBitSet::Small(set) => set.domain_size(),
+            MixedBitSet::Large(set) => set.domain_size(),
+        }
+    }
+}
+
+impl<T: Idx> MixedBitSet<T> {
+    #[inline]
+    pub fn new_empty(domain_size: usize) -> MixedBitSet<T> {
+        if domain_size <= CHUNK_BITS {
+            MixedBitSet::Small(BitSet::new_empty(domain_size))
+        } else {
+            MixedBitSet::Large(ChunkedBitSet::new_empty(domain_size))
+        }
+    }
+
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        match self {
+            MixedBitSet::Small(set) => set.is_empty(),
+            MixedBitSet::Large(set) => set.is_empty(),
+        }
+    }
+
+    #[inline]
+    pub fn contains(&self, elem: T) -> bool {
+        match self {
+            MixedBitSet::Small(set) => set.contains(elem),
+            MixedBitSet::Large(set) => set.contains(elem),
+        }
+    }
+
+    #[inline]
+    pub fn insert(&mut self, elem: T) -> bool {
+        match self {
+            MixedBitSet::Small(set) => set.insert(elem),
+            MixedBitSet::Large(set) => set.insert(elem),
+        }
+    }
+
+    pub fn insert_all(&mut self) {
+        match self {
+            MixedBitSet::Small(set) => set.insert_all(),
+            MixedBitSet::Large(set) => set.insert_all(),
+        }
+    }
+
+    #[inline]
+    pub fn remove(&mut self, elem: T) -> bool {
+        match self {
+            MixedBitSet::Small(set) => set.remove(elem),
+            MixedBitSet::Large(set) => set.remove(elem),
+        }
+    }
+
+    pub fn iter(&self) -> MixedBitIter<'_, T> {
+        match self {
+            MixedBitSet::Small(set) => MixedBitIter::Small(set.iter()),
+            MixedBitSet::Large(set) => MixedBitIter::Large(set.iter()),
+        }
+    }
+
+    bit_relations_inherent_impls! {}
+}
+
+impl<T> Clone for MixedBitSet<T> {
+    fn clone(&self) -> Self {
+        match self {
+            MixedBitSet::Small(set) => MixedBitSet::Small(set.clone()),
+            MixedBitSet::Large(set) => MixedBitSet::Large(set.clone()),
+        }
+    }
+
+    /// WARNING: this implementation of clone_from may panic if the two
+    /// bitsets have different domain sizes. This constraint is not inherent to
+    /// `clone_from`, but it works with the existing call sites and allows a
+    /// faster implementation, which is important because this function is hot.
+    fn clone_from(&mut self, from: &Self) {
+        match (self, from) {
+            (MixedBitSet::Small(set), MixedBitSet::Small(from)) => set.clone_from(from),
+            (MixedBitSet::Large(set), MixedBitSet::Large(from)) => set.clone_from(from),
+            _ => panic!("MixedBitSet size mismatch"),
+        }
+    }
+}
+
+impl<T: Idx> BitRelations<MixedBitSet<T>> for MixedBitSet<T> {
+    fn union(&mut self, other: &MixedBitSet<T>) -> bool {
+        match (self, other) {
+            (MixedBitSet::Small(set), MixedBitSet::Small(other)) => set.union(other),
+            (MixedBitSet::Large(set), MixedBitSet::Large(other)) => set.union(other),
+            _ => panic!("MixedBitSet size mismatch"),
+        }
+    }
+
+    fn subtract(&mut self, other: &MixedBitSet<T>) -> bool {
+        match (self, other) {
+            (MixedBitSet::Small(set), MixedBitSet::Small(other)) => set.subtract(other),
+            (MixedBitSet::Large(set), MixedBitSet::Large(other)) => set.subtract(other),
+            _ => panic!("MixedBitSet size mismatch"),
+        }
+    }
+
+    fn intersect(&mut self, _other: &MixedBitSet<T>) -> bool {
+        unimplemented!("implement if/when necessary");
+    }
+}
+
+impl<T: Idx> fmt::Debug for MixedBitSet<T> {
+    fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            MixedBitSet::Small(set) => set.fmt(w),
+            MixedBitSet::Large(set) => set.fmt(w),
+        }
+    }
+}
+
+pub enum MixedBitIter<'a, T: Idx> {
+    Small(BitIter<'a, T>),
+    Large(ChunkedBitIter<'a, T>),
+}
+
+impl<'a, T: Idx> Iterator for MixedBitIter<'a, T> {
+    type Item = T;
+    fn next(&mut self) -> Option<T> {
+        match self {
+            MixedBitIter::Small(iter) => iter.next(),
+            MixedBitIter::Large(iter) => iter.next(),
+        }
+    }
+}
+
 /// A resizable bitset type with a dense representation.
 ///
 /// `T` is an index type, typically a newtyped `usize` wrapper, but it can also
diff --git a/compiler/rustc_mir_dataflow/src/framework/fmt.rs b/compiler/rustc_mir_dataflow/src/framework/fmt.rs
index dc176ba2d03..186172b3b86 100644
--- a/compiler/rustc_mir_dataflow/src/framework/fmt.rs
+++ b/compiler/rustc_mir_dataflow/src/framework/fmt.rs
@@ -4,7 +4,7 @@
 use std::fmt;
 
 use rustc_index::Idx;
-use rustc_index::bit_set::{BitSet, ChunkedBitSet};
+use rustc_index::bit_set::{BitSet, ChunkedBitSet, MixedBitSet};
 
 use super::lattice::MaybeReachable;
 
@@ -127,6 +127,26 @@ where
     }
 }
 
+impl<T, C> DebugWithContext<C> for MixedBitSet<T>
+where
+    T: Idx + DebugWithContext<C>,
+{
+    fn fmt_with(&self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            MixedBitSet::Small(set) => set.fmt_with(ctxt, f),
+            MixedBitSet::Large(set) => set.fmt_with(ctxt, f),
+        }
+    }
+
+    fn fmt_diff_with(&self, old: &Self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match (self, old) {
+            (MixedBitSet::Small(set), MixedBitSet::Small(old)) => set.fmt_diff_with(old, ctxt, f),
+            (MixedBitSet::Large(set), MixedBitSet::Large(old)) => set.fmt_diff_with(old, ctxt, f),
+            _ => panic!("MixedBitSet size mismatch"),
+        }
+    }
+}
+
 impl<S, C> DebugWithContext<C> for MaybeReachable<S>
 where
     S: DebugWithContext<C>,
diff --git a/compiler/rustc_mir_dataflow/src/framework/lattice.rs b/compiler/rustc_mir_dataflow/src/framework/lattice.rs
index e2b56aedca3..852099e2ac8 100644
--- a/compiler/rustc_mir_dataflow/src/framework/lattice.rs
+++ b/compiler/rustc_mir_dataflow/src/framework/lattice.rs
@@ -40,7 +40,7 @@
 
 use std::iter;
 
-use rustc_index::bit_set::{BitSet, ChunkedBitSet};
+use rustc_index::bit_set::{BitSet, ChunkedBitSet, MixedBitSet};
 use rustc_index::{Idx, IndexVec};
 
 use crate::framework::BitSetExt;
@@ -132,6 +132,12 @@ impl<T: Idx> JoinSemiLattice for ChunkedBitSet<T> {
     }
 }
 
+impl<T: Idx> JoinSemiLattice for MixedBitSet<T> {
+    fn join(&mut self, other: &Self) -> bool {
+        self.union(other)
+    }
+}
+
 /// Extends a type `T` with top and bottom elements to make it a partially ordered set in which no
 /// value of `T` is comparable with any other.
 ///
diff --git a/compiler/rustc_mir_dataflow/src/framework/mod.rs b/compiler/rustc_mir_dataflow/src/framework/mod.rs
index b9407882ec5..40fb22014e5 100644
--- a/compiler/rustc_mir_dataflow/src/framework/mod.rs
+++ b/compiler/rustc_mir_dataflow/src/framework/mod.rs
@@ -35,7 +35,7 @@
 use std::cmp::Ordering;
 
 use rustc_data_structures::work_queue::WorkQueue;
-use rustc_index::bit_set::{BitSet, ChunkedBitSet};
+use rustc_index::bit_set::{BitSet, ChunkedBitSet, MixedBitSet};
 use rustc_index::{Idx, IndexVec};
 use rustc_middle::bug;
 use rustc_middle::mir::{self, BasicBlock, CallReturnPlaces, Location, TerminatorEdges, traversal};
@@ -77,6 +77,12 @@ impl<T: Idx> BitSetExt<T> for ChunkedBitSet<T> {
     }
 }
 
+impl<T: Idx> BitSetExt<T> for MixedBitSet<T> {
+    fn contains(&self, elem: T) -> bool {
+        self.contains(elem)
+    }
+}
+
 /// A dataflow problem with an arbitrarily complex transfer function.
 ///
 /// This trait specifies the lattice on which this analysis operates (the domain), its
@@ -337,6 +343,16 @@ impl<T: Idx> GenKill<T> for ChunkedBitSet<T> {
     }
 }
 
+impl<T: Idx> GenKill<T> for MixedBitSet<T> {
+    fn gen_(&mut self, elem: T) {
+        self.insert(elem);
+    }
+
+    fn kill(&mut self, elem: T) {
+        self.remove(elem);
+    }
+}
+
 impl<T, S: GenKill<T>> GenKill<T> for MaybeReachable<S> {
     fn gen_(&mut self, elem: T) {
         match self {