about summary refs log tree commit diff
path: root/compiler/rustc_data_structures
diff options
context:
space:
mode:
authorCamille Gillot <gillot.camille@gmail.com>2025-08-22 02:22:51 +0000
committerCamille Gillot <gillot.camille@gmail.com>2025-08-22 02:22:51 +0000
commit689171d38eaca74015b96d43bca5d30f2850c24a (patch)
tree92e1d29c2c5750e8bfd324a1c7f689755d04add8 /compiler/rustc_data_structures
parent040a98af70f0a7da03f3d5356531b28a2a7a77e4 (diff)
downloadrust-689171d38eaca74015b96d43bca5d30f2850c24a.tar.gz
rust-689171d38eaca74015b96d43bca5d30f2850c24a.zip
Uplift rustc_mir_transform::coverage::counters::union_find to rustc_data_structures.
Diffstat (limited to 'compiler/rustc_data_structures')
-rw-r--r--compiler/rustc_data_structures/src/lib.rs1
-rw-r--r--compiler/rustc_data_structures/src/union_find.rs96
-rw-r--r--compiler/rustc_data_structures/src/union_find/tests.rs32
3 files changed, 129 insertions, 0 deletions
diff --git a/compiler/rustc_data_structures/src/lib.rs b/compiler/rustc_data_structures/src/lib.rs
index 53178d09348..17da3ea83c8 100644
--- a/compiler/rustc_data_structures/src/lib.rs
+++ b/compiler/rustc_data_structures/src/lib.rs
@@ -77,6 +77,7 @@ pub mod thinvec;
 pub mod thousands;
 pub mod transitive_relation;
 pub mod unhash;
+pub mod union_find;
 pub mod unord;
 pub mod vec_cache;
 pub mod work_queue;
diff --git a/compiler/rustc_data_structures/src/union_find.rs b/compiler/rustc_data_structures/src/union_find.rs
new file mode 100644
index 00000000000..ef73cd7ab40
--- /dev/null
+++ b/compiler/rustc_data_structures/src/union_find.rs
@@ -0,0 +1,96 @@
+use std::cmp::Ordering;
+use std::mem;
+
+use rustc_index::{Idx, IndexVec};
+
+#[cfg(test)]
+mod tests;
+
+/// Simple implementation of a union-find data structure, i.e. a disjoint-set
+/// forest.
+#[derive(Debug)]
+pub struct UnionFind<Key: Idx> {
+    table: IndexVec<Key, UnionFindEntry<Key>>,
+}
+
+#[derive(Debug)]
+struct UnionFindEntry<Key> {
+    /// Transitively points towards the "root" of the set containing this key.
+    ///
+    /// Invariant: A root key is its own parent.
+    parent: Key,
+    /// When merging two "root" keys, their ranks determine which key becomes
+    /// the new root, to prevent the parent tree from becoming unnecessarily
+    /// tall. See [`UnionFind::unify`] for details.
+    rank: u32,
+}
+
+impl<Key: Idx> UnionFind<Key> {
+    /// Creates a new disjoint-set forest containing the keys `0..num_keys`.
+    /// Initially, every key is part of its own one-element set.
+    pub fn new(num_keys: usize) -> Self {
+        // Initially, every key is the root of its own set, so its parent is itself.
+        Self { table: IndexVec::from_fn_n(|key| UnionFindEntry { parent: key, rank: 0 }, num_keys) }
+    }
+
+    /// Returns the "root" key of the disjoint-set containing the given key.
+    /// If two keys have the same root, they belong to the same set.
+    ///
+    /// Also updates internal data structures to make subsequent `find`
+    /// operations faster.
+    pub fn find(&mut self, key: Key) -> Key {
+        // Loop until we find a key that is its own parent.
+        let mut curr = key;
+        while let parent = self.table[curr].parent
+            && curr != parent
+        {
+            // Perform "path compression" by peeking one layer ahead, and
+            // setting the current key's parent to that value.
+            // (This works even when `parent` is the root of its set, because
+            // of the invariant that a root is its own parent.)
+            let parent_parent = self.table[parent].parent;
+            self.table[curr].parent = parent_parent;
+
+            // Advance by one step and continue.
+            curr = parent;
+        }
+        curr
+    }
+
+    /// Merges the set containing `a` and the set containing `b` into one set.
+    ///
+    /// Returns the common root of both keys, after the merge.
+    pub fn unify(&mut self, a: Key, b: Key) -> Key {
+        let mut a = self.find(a);
+        let mut b = self.find(b);
+
+        // If both keys have the same root, they're already in the same set,
+        // so there's nothing more to do.
+        if a == b {
+            return a;
+        };
+
+        // Ensure that `a` has strictly greater rank, swapping if necessary.
+        // If both keys have the same rank, increment the rank of `a` so that
+        // future unifications will also prefer `a`, leading to flatter trees.
+        match Ord::cmp(&self.table[a].rank, &self.table[b].rank) {
+            Ordering::Less => mem::swap(&mut a, &mut b),
+            Ordering::Equal => self.table[a].rank += 1,
+            Ordering::Greater => {}
+        }
+
+        debug_assert!(self.table[a].rank > self.table[b].rank);
+        debug_assert_eq!(self.table[b].parent, b);
+
+        // Make `a` the parent of `b`.
+        self.table[b].parent = a;
+
+        a
+    }
+
+    /// Takes a "snapshot" of the current state of this disjoint-set forest, in
+    /// the form of a vector that directly maps each key to its current root.
+    pub fn snapshot(&mut self) -> IndexVec<Key, Key> {
+        self.table.indices().map(|key| self.find(key)).collect()
+    }
+}
diff --git a/compiler/rustc_data_structures/src/union_find/tests.rs b/compiler/rustc_data_structures/src/union_find/tests.rs
new file mode 100644
index 00000000000..34a4e4f8e6e
--- /dev/null
+++ b/compiler/rustc_data_structures/src/union_find/tests.rs
@@ -0,0 +1,32 @@
+use super::UnionFind;
+
+#[test]
+fn empty() {
+    let mut sets = UnionFind::<u32>::new(10);
+
+    for i in 1..10 {
+        assert_eq!(sets.find(i), i);
+    }
+}
+
+#[test]
+fn transitive() {
+    let mut sets = UnionFind::<u32>::new(10);
+
+    sets.unify(3, 7);
+    sets.unify(4, 2);
+
+    assert_eq!(sets.find(7), sets.find(3));
+    assert_eq!(sets.find(2), sets.find(4));
+    assert_ne!(sets.find(3), sets.find(4));
+
+    sets.unify(7, 4);
+
+    assert_eq!(sets.find(7), sets.find(3));
+    assert_eq!(sets.find(2), sets.find(4));
+    assert_eq!(sets.find(3), sets.find(4));
+
+    for i in [0, 1, 5, 6, 8, 9] {
+        assert_eq!(sets.find(i), i);
+    }
+}