diff options
| author | bors <bors@rust-lang.org> | 2022-02-15 11:59:37 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2022-02-15 11:59:37 +0000 |
| commit | 55697574915ca58c3fcd7b1c854c1c93e002dc85 (patch) | |
| tree | 43f8669ead4f7dbe2812b96a39d1d52bffc62e62 /compiler/rustc_data_structures/src | |
| parent | 6421a499a50adbaa7b5d0234bdd4817d970f0933 (diff) | |
| parent | 80632de4a1f9d1c0dfe16170fc079e940f42776a (diff) | |
| download | rust-55697574915ca58c3fcd7b1c854c1c93e002dc85.tar.gz rust-55697574915ca58c3fcd7b1c854c1c93e002dc85.zip | |
Auto merge of #93148 - nnethercote:Uniq, r=fee1-dead
Overhaul interning. A number of types are interned and `eq` and `hash` are implemented on the pointer rather than the contents. But this is not well enforced within the type system like you might expect. This PR introduces a new type `Interned` which encapsulates this concept more rigorously, and uses it to convert a couple of the less common interned types. r? `@fee1-dead`
Diffstat (limited to 'compiler/rustc_data_structures/src')
| -rw-r--r-- | compiler/rustc_data_structures/src/intern.rs | 98 | ||||
| -rw-r--r-- | compiler/rustc_data_structures/src/intern/tests.rs | 59 | ||||
| -rw-r--r-- | compiler/rustc_data_structures/src/lib.rs | 3 | ||||
| -rw-r--r-- | compiler/rustc_data_structures/src/ptr_key.rs | 37 |
4 files changed, 159 insertions, 38 deletions
diff --git a/compiler/rustc_data_structures/src/intern.rs b/compiler/rustc_data_structures/src/intern.rs new file mode 100644 index 00000000000..c79a5ebf093 --- /dev/null +++ b/compiler/rustc_data_structures/src/intern.rs @@ -0,0 +1,98 @@ +use std::cmp::Ordering; +use std::hash::{Hash, Hasher}; +use std::ops::Deref; +use std::ptr; + +mod private { + #[derive(Clone, Copy, Debug)] + pub struct PrivateZst; +} + +/// A reference to a value that is interned, and is known to be unique. +/// +/// Note that it is possible to have a `T` and a `Interned<T>` that are (or +/// refer to) equal but different values. But if you have two different +/// `Interned<T>`s, they both refer to the same value, at a single location in +/// memory. This means that equality and hashing can be done on the value's +/// address rather than the value's contents, which can improve performance. +/// +/// The `PrivateZst` field means you can pattern match with `Interned(v, _)` +/// but you can only construct a `Interned` with `new_unchecked`, and not +/// directly. +#[derive(Debug)] +#[cfg_attr(not(bootstrap), rustc_pass_by_value)] +pub struct Interned<'a, T>(pub &'a T, pub private::PrivateZst); + +impl<'a, T> Interned<'a, T> { + /// Create a new `Interned` value. The value referred to *must* be interned + /// and thus be unique, and it *must* remain unique in the future. This + /// function has `_unchecked` in the name but is not `unsafe`, because if + /// the uniqueness condition is violated condition it will cause incorrect + /// behaviour but will not affect memory safety. + #[inline] + pub const fn new_unchecked(t: &'a T) -> Self { + Interned(t, private::PrivateZst) + } +} + +impl<'a, T> Clone for Interned<'a, T> { + fn clone(&self) -> Self { + *self + } +} + +impl<'a, T> Copy for Interned<'a, T> {} + +impl<'a, T> Deref for Interned<'a, T> { + type Target = T; + + #[inline] + fn deref(&self) -> &T { + self.0 + } +} + +impl<'a, T> PartialEq for Interned<'a, T> { + #[inline] + fn eq(&self, other: &Self) -> bool { + // Pointer equality implies equality, due to the uniqueness constraint. + ptr::eq(self.0, other.0) + } +} + +impl<'a, T> Eq for Interned<'a, T> {} + +// In practice you can't intern any `T` that doesn't implement `Eq`, because +// that's needed for hashing. Therefore, we won't be interning any `T` that +// implements `PartialOrd` without also implementing `Ord`. So we can have the +// bound `T: Ord` here and avoid duplication with the `Ord` impl below. +impl<'a, T: Ord> PartialOrd for Interned<'a, T> { + fn partial_cmp(&self, other: &Interned<'a, T>) -> Option<Ordering> { + Some(self.cmp(other)) + } +} + +impl<'a, T: Ord> Ord for Interned<'a, T> { + fn cmp(&self, other: &Interned<'a, T>) -> Ordering { + // Pointer equality implies equality, due to the uniqueness constraint, + // but the contents must be compared otherwise. + if ptr::eq(self.0, other.0) { + Ordering::Equal + } else { + let res = self.0.cmp(&other.0); + debug_assert_ne!(res, Ordering::Equal); + res + } + } +} + +impl<'a, T> Hash for Interned<'a, T> { + #[inline] + fn hash<H: Hasher>(&self, s: &mut H) { + // Pointer hashing is sufficient, due to the uniqueness constraint. + ptr::hash(self.0, s) + } +} + +#[cfg(test)] +mod tests; diff --git a/compiler/rustc_data_structures/src/intern/tests.rs b/compiler/rustc_data_structures/src/intern/tests.rs new file mode 100644 index 00000000000..09810a0850e --- /dev/null +++ b/compiler/rustc_data_structures/src/intern/tests.rs @@ -0,0 +1,59 @@ +use super::*; +use std::cmp::Ordering; + +#[derive(Debug)] +struct S(u32); + +impl PartialEq for S { + fn eq(&self, _other: &Self) -> bool { + panic!("shouldn't be called"); + } +} + +impl Eq for S {} + +impl PartialOrd for S { + fn partial_cmp(&self, other: &S) -> Option<Ordering> { + // The `==` case should be handled by `Interned`. + assert_ne!(self.0, other.0); + self.0.partial_cmp(&other.0) + } +} + +impl Ord for S { + fn cmp(&self, other: &S) -> Ordering { + // The `==` case should be handled by `Interned`. + assert_ne!(self.0, other.0); + self.0.cmp(&other.0) + } +} + +#[test] +fn test_uniq() { + let s1 = S(1); + let s2 = S(2); + let s3 = S(3); + let s4 = S(1); // violates uniqueness + + let v1 = Interned::new_unchecked(&s1); + let v2 = Interned::new_unchecked(&s2); + let v3a = Interned::new_unchecked(&s3); + let v3b = Interned::new_unchecked(&s3); + let v4 = Interned::new_unchecked(&s4); // violates uniqueness + + assert_ne!(v1, v2); + assert_ne!(v2, v3a); + assert_eq!(v1, v1); + assert_eq!(v3a, v3b); + assert_ne!(v1, v4); // same content but different addresses: not equal + + assert_eq!(v1.cmp(&v2), Ordering::Less); + assert_eq!(v3a.cmp(&v2), Ordering::Greater); + assert_eq!(v1.cmp(&v1), Ordering::Equal); // only uses Interned::eq, not S::cmp + assert_eq!(v3a.cmp(&v3b), Ordering::Equal); // only uses Interned::eq, not S::cmp + + assert_eq!(v1.partial_cmp(&v2), Some(Ordering::Less)); + assert_eq!(v3a.partial_cmp(&v2), Some(Ordering::Greater)); + assert_eq!(v1.partial_cmp(&v1), Some(Ordering::Equal)); // only uses Interned::eq, not S::cmp + assert_eq!(v3a.partial_cmp(&v3b), Some(Ordering::Equal)); // only uses Interned::eq, not S::cmp +} diff --git a/compiler/rustc_data_structures/src/lib.rs b/compiler/rustc_data_structures/src/lib.rs index 205f1cd77c0..80f83140f4b 100644 --- a/compiler/rustc_data_structures/src/lib.rs +++ b/compiler/rustc_data_structures/src/lib.rs @@ -21,6 +21,7 @@ #![feature(type_alias_impl_trait)] #![feature(new_uninit)] #![feature(once_cell)] +#![feature(rustc_attrs)] #![feature(test)] #![feature(thread_id_value)] #![feature(vec_into_raw_parts)] @@ -68,12 +69,12 @@ pub mod flock; pub mod functor; pub mod fx; pub mod graph; +pub mod intern; pub mod jobserver; pub mod macros; pub mod map_in_place; pub mod obligation_forest; pub mod owning_ref; -pub mod ptr_key; pub mod sip128; pub mod small_c_str; pub mod snapshot_map; diff --git a/compiler/rustc_data_structures/src/ptr_key.rs b/compiler/rustc_data_structures/src/ptr_key.rs deleted file mode 100644 index 440ccb05d86..00000000000 --- a/compiler/rustc_data_structures/src/ptr_key.rs +++ /dev/null @@ -1,37 +0,0 @@ -use std::ops::Deref; -use std::{hash, ptr}; - -/// A wrapper around reference that compares and hashes like a pointer. -/// Can be used as a key in sets/maps indexed by pointers to avoid `unsafe`. -#[derive(Debug)] -pub struct PtrKey<'a, T>(pub &'a T); - -impl<'a, T> Clone for PtrKey<'a, T> { - fn clone(&self) -> Self { - *self - } -} - -impl<'a, T> Copy for PtrKey<'a, T> {} - -impl<'a, T> PartialEq for PtrKey<'a, T> { - fn eq(&self, rhs: &Self) -> bool { - ptr::eq(self.0, rhs.0) - } -} - -impl<'a, T> Eq for PtrKey<'a, T> {} - -impl<'a, T> hash::Hash for PtrKey<'a, T> { - fn hash<H: hash::Hasher>(&self, hasher: &mut H) { - (self.0 as *const T).hash(hasher) - } -} - -impl<'a, T> Deref for PtrKey<'a, T> { - type Target = T; - - fn deref(&self) -> &Self::Target { - self.0 - } -} |
