Use sharded maps for interning

author: John Kåre Alsaker <john.kare.alsaker@gmail.com> 2019-06-12 14:39:12 +0200
committer: John Kåre Alsaker <john.kare.alsaker@gmail.com> 2019-07-19 23:37:48 +0200
commit: 0e73386a667498414df76f1b6f3bc2471845fbf6 (patch)
tree: 922cc5d73f9a79a36643c3d388faea4de72fb2f3 /src/librustc_data_structures
parent: e3cebcb3bd4ffaf86bb0cdfd2af5b7e698717b01 (diff)
download: rust-0e73386a667498414df76f1b6f3bc2471845fbf6.tar.gz
rust-0e73386a667498414df76f1b6f3bc2471845fbf6.zip
3 files changed, 129 insertions, 59 deletions
diff --git a/src/librustc_data_structures/interner.rs b/src/librustc_data_structures/interner.rs
deleted file mode 100644
index 36ccbb704a7..00000000000
--- a/src/librustc_data_structures/interner.rs
+++ /dev/null
@@ -1,58 +0,0 @@
-use std::hash::Hash;
-use std::hash::BuildHasher;
-use std::hash::Hasher;
-use std::collections::HashMap;
-use std::collections::hash_map::RawEntryMut;
-use std::borrow::Borrow;
-
-pub trait HashInterner<K: Eq + Hash> {
-    fn intern_ref<Q: ?Sized, F: FnOnce() -> K>(&mut self, value: &Q, make: F) -> K
-        where K: Borrow<Q>,
-              Q: Hash + Eq;
-
-    fn intern<Q, F: FnOnce(Q) -> K>(&mut self, value: Q, make: F) -> K
-        where K: Borrow<Q>,
-              Q: Hash + Eq;
-}
-
-impl<K: Eq + Hash + Copy, S: BuildHasher> HashInterner<K> for HashMap<K, (), S> {
-    #[inline]
-    fn intern_ref<Q: ?Sized, F: FnOnce() -> K>(&mut self, value: &Q, make: F) -> K
-        where K: Borrow<Q>,
-              Q: Hash + Eq
-    {
-        let mut hasher = self.hasher().build_hasher();
-        value.hash(&mut hasher);
-        let hash = hasher.finish();
-        let entry = self.raw_entry_mut().from_key_hashed_nocheck(hash, value);
-
-        match entry {
-            RawEntryMut::Occupied(e) => *e.key(),
-            RawEntryMut::Vacant(e) => {
-                let v = make();
-                e.insert_hashed_nocheck(hash, v, ());
-                v
-            }
-        }
-    }
-
-    #[inline]
-    fn intern<Q, F: FnOnce(Q) -> K>(&mut self, value: Q, make: F) -> K
-        where K: Borrow<Q>,
-              Q: Hash + Eq
-    {
-        let mut hasher = self.hasher().build_hasher();
-        value.hash(&mut hasher);
-        let hash = hasher.finish();
-        let entry = self.raw_entry_mut().from_key_hashed_nocheck(hash, &value);
-
-        match entry {
-            RawEntryMut::Occupied(e) => *e.key(),
-            RawEntryMut::Vacant(e) => {
-                let v = make(value);
-                e.insert_hashed_nocheck(hash, v, ());
-                v
-            }
-        }
-    }
-}
diff --git a/src/librustc_data_structures/lib.rs b/src/librustc_data_structures/lib.rs
index b479643a5e8..a2407681e6d 100644
--- a/src/librustc_data_structures/lib.rs
+++ b/src/librustc_data_structures/lib.rs
@@ -77,7 +77,6 @@ pub mod flock;
 pub mod fx;
 pub mod graph;
 pub mod indexed_vec;
-pub mod interner;
 pub mod jobserver;
 pub mod obligation_forest;
 pub mod owning_ref;
@@ -89,6 +88,7 @@ pub use ena::snapshot_vec;
 pub mod sorted_map;
 #[macro_use] pub mod stable_hasher;
 pub mod sync;
+pub mod sharded;
 pub mod tiny_list;
 pub mod thin_vec;
 pub mod transitive_relation;
diff --git a/src/librustc_data_structures/sharded.rs b/src/librustc_data_structures/sharded.rs
new file mode 100644
index 00000000000..31cb22098b8
--- /dev/null
+++ b/src/librustc_data_structures/sharded.rs
@@ -0,0 +1,128 @@
+use std::hash::{Hasher, Hash};
+use std::mem;
+use std::borrow::Borrow;
+use std::collections::hash_map::RawEntryMut;
+use crate::fx::{FxHasher, FxHashMap};
+use crate::sync::{Lock, LockGuard};
+
+#[derive(Clone, Default)]
+#[cfg_attr(parallel_compiler, repr(align(64)))]
+struct CacheAligned<T>(T);
+
+#[cfg(parallel_compiler)]
+// 32 shards is sufficient to reduce contention on an 8-core Ryzen 7 1700,
+// but this should be tested on higher core count CPUs. How the `Sharded` type gets used
+// may also affect the ideal nunber of shards.
+const SHARD_BITS: usize = 5;
+
+#[cfg(not(parallel_compiler))]
+const SHARD_BITS: usize = 0;
+
+const SHARDS: usize = 1 << SHARD_BITS;
+
+/// An array of cache-line aligned inner locked structures with convenience methods.
+#[derive(Clone)]
+pub struct Sharded<T> {
+    shards: [CacheAligned<Lock<T>>; SHARDS],
+}
+
+impl<T: Default> Default for Sharded<T> {
+    #[inline]
+    fn default() -> Self {
+        let mut shards: mem::MaybeUninit<[CacheAligned<Lock<T>>; SHARDS]> =
+            mem::MaybeUninit::uninit();
+        let first = shards.as_mut_ptr() as *mut CacheAligned<Lock<T>>;
+        unsafe {
+            for i in 0..SHARDS {
+                first.add(i).write(CacheAligned(Lock::new(T::default())));
+            }
+            Sharded {
+                shards: shards.assume_init(),
+            }
+        }
+    }
+}
+
+impl<T> Sharded<T> {
+    #[inline]
+    pub fn get_shard_by_value<K: Hash + ?Sized>(&self, val: &K) -> &Lock<T> {
+        if SHARDS == 1 {
+            &self.shards[0].0
+        } else {
+            self.get_shard_by_hash(make_hash(val))
+        }
+    }
+
+    #[inline]
+    pub fn get_shard_by_hash(&self, hash: u64) -> &Lock<T> {
+        let hash_len = mem::size_of::<usize>();
+        // Ignore the top 7 bits as hashbrown uses these and get the next SHARD_BITS highest bits.
+        // hashbrown also uses the lowest bits, so we can't use those
+        let bits = (hash >> (hash_len * 8 - 7 - SHARD_BITS)) as usize;
+        let i = bits % SHARDS;
+        &self.shards[i].0
+    }
+
+    pub fn lock_shards(&self) -> Vec<LockGuard<'_, T>> {
+        (0..SHARDS).map(|i| self.shards[i].0.lock()).collect()
+    }
+
+    pub fn try_lock_shards(&self) -> Option<Vec<LockGuard<'_, T>>> {
+        (0..SHARDS).map(|i| self.shards[i].0.try_lock()).collect()
+    }
+}
+
+pub type ShardedHashMap<K, V> = Sharded<FxHashMap<K, V>>;
+
+impl<K: Eq + Hash, V> ShardedHashMap<K, V> {
+    pub fn len(&self) -> usize {
+        self.lock_shards().iter().map(|shard| shard.len()).sum()
+    }
+}
+
+impl<K: Eq + Hash + Copy> ShardedHashMap<K, ()> {
+    #[inline]
+    pub fn intern_ref<Q: ?Sized>(&self, value: &Q, make: impl FnOnce() -> K) -> K
+        where K: Borrow<Q>,
+              Q: Hash + Eq
+    {
+        let hash = make_hash(value);
+        let mut shard = self.get_shard_by_hash(hash).lock();
+        let entry = shard.raw_entry_mut().from_key_hashed_nocheck(hash, value);
+
+        match entry {
+            RawEntryMut::Occupied(e) => *e.key(),
+            RawEntryMut::Vacant(e) => {
+                let v = make();
+                e.insert_hashed_nocheck(hash, v, ());
+                v
+            }
+        }
+    }
+
+    #[inline]
+    pub fn intern<Q>(&self, value: Q, make: impl FnOnce(Q) -> K) -> K
+        where K: Borrow<Q>,
+              Q: Hash + Eq
+    {
+        let hash = make_hash(&value);
+        let mut shard = self.get_shard_by_hash(hash).lock();
+        let entry = shard.raw_entry_mut().from_key_hashed_nocheck(hash, &value);
+
+        match entry {
+            RawEntryMut::Occupied(e) => *e.key(),
+            RawEntryMut::Vacant(e) => {
+                let v = make(value);
+                e.insert_hashed_nocheck(hash, v, ());
+                v
+            }
+        }
+    }
+}
+
+#[inline]
+fn make_hash<K: Hash + ?Sized>(val: &K) -> u64 {
+    let mut state = FxHasher::default();
+    val.hash(&mut state);
+    state.finish()
+}
author	John Kåre Alsaker <john.kare.alsaker@gmail.com>	2019-06-12 14:39:12 +0200
committer	John Kåre Alsaker <john.kare.alsaker@gmail.com>	2019-07-19 23:37:48 +0200
commit	0e73386a667498414df76f1b6f3bc2471845fbf6 (patch)
tree	922cc5d73f9a79a36643c3d388faea4de72fb2f3 /src/librustc_data_structures
parent	e3cebcb3bd4ffaf86bb0cdfd2af5b7e698717b01 (diff)
download	rust-0e73386a667498414df76f1b6f3bc2471845fbf6.tar.gz rust-0e73386a667498414df76f1b6f3bc2471845fbf6.zip