about summary refs log tree commit diff
path: root/compiler/rustc_data_structures/src
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2022-02-15 11:59:37 +0000
committerbors <bors@rust-lang.org>2022-02-15 11:59:37 +0000
commit55697574915ca58c3fcd7b1c854c1c93e002dc85 (patch)
tree43f8669ead4f7dbe2812b96a39d1d52bffc62e62 /compiler/rustc_data_structures/src
parent6421a499a50adbaa7b5d0234bdd4817d970f0933 (diff)
parent80632de4a1f9d1c0dfe16170fc079e940f42776a (diff)
downloadrust-55697574915ca58c3fcd7b1c854c1c93e002dc85.tar.gz
rust-55697574915ca58c3fcd7b1c854c1c93e002dc85.zip
Auto merge of #93148 - nnethercote:Uniq, r=fee1-dead
Overhaul interning.

A number of types are interned and `eq` and `hash` are implemented on
the pointer rather than the contents. But this is not well enforced
within the type system like you might expect.

This PR introduces a new type `Interned` which encapsulates this concept
more rigorously, and uses it to convert a couple of the less common
interned types.

r? `@fee1-dead`
Diffstat (limited to 'compiler/rustc_data_structures/src')
-rw-r--r--compiler/rustc_data_structures/src/intern.rs98
-rw-r--r--compiler/rustc_data_structures/src/intern/tests.rs59
-rw-r--r--compiler/rustc_data_structures/src/lib.rs3
-rw-r--r--compiler/rustc_data_structures/src/ptr_key.rs37
4 files changed, 159 insertions, 38 deletions
diff --git a/compiler/rustc_data_structures/src/intern.rs b/compiler/rustc_data_structures/src/intern.rs
new file mode 100644
index 00000000000..c79a5ebf093
--- /dev/null
+++ b/compiler/rustc_data_structures/src/intern.rs
@@ -0,0 +1,98 @@
+use std::cmp::Ordering;
+use std::hash::{Hash, Hasher};
+use std::ops::Deref;
+use std::ptr;
+
+mod private {
+    #[derive(Clone, Copy, Debug)]
+    pub struct PrivateZst;
+}
+
+/// A reference to a value that is interned, and is known to be unique.
+///
+/// Note that it is possible to have a `T` and a `Interned<T>` that are (or
+/// refer to) equal but different values. But if you have two different
+/// `Interned<T>`s, they both refer to the same value, at a single location in
+/// memory. This means that equality and hashing can be done on the value's
+/// address rather than the value's contents, which can improve performance.
+///
+/// The `PrivateZst` field means you can pattern match with `Interned(v, _)`
+/// but you can only construct a `Interned` with `new_unchecked`, and not
+/// directly.
+#[derive(Debug)]
+#[cfg_attr(not(bootstrap), rustc_pass_by_value)]
+pub struct Interned<'a, T>(pub &'a T, pub private::PrivateZst);
+
+impl<'a, T> Interned<'a, T> {
+    /// Create a new `Interned` value. The value referred to *must* be interned
+    /// and thus be unique, and it *must* remain unique in the future. This
+    /// function has `_unchecked` in the name but is not `unsafe`, because if
+    /// the uniqueness condition is violated condition it will cause incorrect
+    /// behaviour but will not affect memory safety.
+    #[inline]
+    pub const fn new_unchecked(t: &'a T) -> Self {
+        Interned(t, private::PrivateZst)
+    }
+}
+
+impl<'a, T> Clone for Interned<'a, T> {
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+impl<'a, T> Copy for Interned<'a, T> {}
+
+impl<'a, T> Deref for Interned<'a, T> {
+    type Target = T;
+
+    #[inline]
+    fn deref(&self) -> &T {
+        self.0
+    }
+}
+
+impl<'a, T> PartialEq for Interned<'a, T> {
+    #[inline]
+    fn eq(&self, other: &Self) -> bool {
+        // Pointer equality implies equality, due to the uniqueness constraint.
+        ptr::eq(self.0, other.0)
+    }
+}
+
+impl<'a, T> Eq for Interned<'a, T> {}
+
+// In practice you can't intern any `T` that doesn't implement `Eq`, because
+// that's needed for hashing. Therefore, we won't be interning any `T` that
+// implements `PartialOrd` without also implementing `Ord`. So we can have the
+// bound `T: Ord` here and avoid duplication with the `Ord` impl below.
+impl<'a, T: Ord> PartialOrd for Interned<'a, T> {
+    fn partial_cmp(&self, other: &Interned<'a, T>) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl<'a, T: Ord> Ord for Interned<'a, T> {
+    fn cmp(&self, other: &Interned<'a, T>) -> Ordering {
+        // Pointer equality implies equality, due to the uniqueness constraint,
+        // but the contents must be compared otherwise.
+        if ptr::eq(self.0, other.0) {
+            Ordering::Equal
+        } else {
+            let res = self.0.cmp(&other.0);
+            debug_assert_ne!(res, Ordering::Equal);
+            res
+        }
+    }
+}
+
+impl<'a, T> Hash for Interned<'a, T> {
+    #[inline]
+    fn hash<H: Hasher>(&self, s: &mut H) {
+        // Pointer hashing is sufficient, due to the uniqueness constraint.
+        ptr::hash(self.0, s)
+    }
+}
+
+#[cfg(test)]
+mod tests;
diff --git a/compiler/rustc_data_structures/src/intern/tests.rs b/compiler/rustc_data_structures/src/intern/tests.rs
new file mode 100644
index 00000000000..09810a0850e
--- /dev/null
+++ b/compiler/rustc_data_structures/src/intern/tests.rs
@@ -0,0 +1,59 @@
+use super::*;
+use std::cmp::Ordering;
+
+#[derive(Debug)]
+struct S(u32);
+
+impl PartialEq for S {
+    fn eq(&self, _other: &Self) -> bool {
+        panic!("shouldn't be called");
+    }
+}
+
+impl Eq for S {}
+
+impl PartialOrd for S {
+    fn partial_cmp(&self, other: &S) -> Option<Ordering> {
+        // The `==` case should be handled by `Interned`.
+        assert_ne!(self.0, other.0);
+        self.0.partial_cmp(&other.0)
+    }
+}
+
+impl Ord for S {
+    fn cmp(&self, other: &S) -> Ordering {
+        // The `==` case should be handled by `Interned`.
+        assert_ne!(self.0, other.0);
+        self.0.cmp(&other.0)
+    }
+}
+
+#[test]
+fn test_uniq() {
+    let s1 = S(1);
+    let s2 = S(2);
+    let s3 = S(3);
+    let s4 = S(1); // violates uniqueness
+
+    let v1 = Interned::new_unchecked(&s1);
+    let v2 = Interned::new_unchecked(&s2);
+    let v3a = Interned::new_unchecked(&s3);
+    let v3b = Interned::new_unchecked(&s3);
+    let v4 = Interned::new_unchecked(&s4); // violates uniqueness
+
+    assert_ne!(v1, v2);
+    assert_ne!(v2, v3a);
+    assert_eq!(v1, v1);
+    assert_eq!(v3a, v3b);
+    assert_ne!(v1, v4); // same content but different addresses: not equal
+
+    assert_eq!(v1.cmp(&v2), Ordering::Less);
+    assert_eq!(v3a.cmp(&v2), Ordering::Greater);
+    assert_eq!(v1.cmp(&v1), Ordering::Equal); // only uses Interned::eq, not S::cmp
+    assert_eq!(v3a.cmp(&v3b), Ordering::Equal); // only uses Interned::eq, not S::cmp
+
+    assert_eq!(v1.partial_cmp(&v2), Some(Ordering::Less));
+    assert_eq!(v3a.partial_cmp(&v2), Some(Ordering::Greater));
+    assert_eq!(v1.partial_cmp(&v1), Some(Ordering::Equal)); // only uses Interned::eq, not S::cmp
+    assert_eq!(v3a.partial_cmp(&v3b), Some(Ordering::Equal)); // only uses Interned::eq, not S::cmp
+}
diff --git a/compiler/rustc_data_structures/src/lib.rs b/compiler/rustc_data_structures/src/lib.rs
index 205f1cd77c0..80f83140f4b 100644
--- a/compiler/rustc_data_structures/src/lib.rs
+++ b/compiler/rustc_data_structures/src/lib.rs
@@ -21,6 +21,7 @@
 #![feature(type_alias_impl_trait)]
 #![feature(new_uninit)]
 #![feature(once_cell)]
+#![feature(rustc_attrs)]
 #![feature(test)]
 #![feature(thread_id_value)]
 #![feature(vec_into_raw_parts)]
@@ -68,12 +69,12 @@ pub mod flock;
 pub mod functor;
 pub mod fx;
 pub mod graph;
+pub mod intern;
 pub mod jobserver;
 pub mod macros;
 pub mod map_in_place;
 pub mod obligation_forest;
 pub mod owning_ref;
-pub mod ptr_key;
 pub mod sip128;
 pub mod small_c_str;
 pub mod snapshot_map;
diff --git a/compiler/rustc_data_structures/src/ptr_key.rs b/compiler/rustc_data_structures/src/ptr_key.rs
deleted file mode 100644
index 440ccb05d86..00000000000
--- a/compiler/rustc_data_structures/src/ptr_key.rs
+++ /dev/null
@@ -1,37 +0,0 @@
-use std::ops::Deref;
-use std::{hash, ptr};
-
-/// A wrapper around reference that compares and hashes like a pointer.
-/// Can be used as a key in sets/maps indexed by pointers to avoid `unsafe`.
-#[derive(Debug)]
-pub struct PtrKey<'a, T>(pub &'a T);
-
-impl<'a, T> Clone for PtrKey<'a, T> {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
-
-impl<'a, T> Copy for PtrKey<'a, T> {}
-
-impl<'a, T> PartialEq for PtrKey<'a, T> {
-    fn eq(&self, rhs: &Self) -> bool {
-        ptr::eq(self.0, rhs.0)
-    }
-}
-
-impl<'a, T> Eq for PtrKey<'a, T> {}
-
-impl<'a, T> hash::Hash for PtrKey<'a, T> {
-    fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
-        (self.0 as *const T).hash(hasher)
-    }
-}
-
-impl<'a, T> Deref for PtrKey<'a, T> {
-    type Target = T;
-
-    fn deref(&self) -> &Self::Target {
-        self.0
-    }
-}