diff options
Diffstat (limited to 'compiler/rustc_data_structures')
| -rw-r--r-- | compiler/rustc_data_structures/src/lib.rs | 2 | ||||
| -rw-r--r-- | compiler/rustc_data_structures/src/vec_cache.rs | 324 | ||||
| -rw-r--r-- | compiler/rustc_data_structures/src/vec_cache/tests.rs | 95 |
3 files changed, 421 insertions, 0 deletions
diff --git a/compiler/rustc_data_structures/src/lib.rs b/compiler/rustc_data_structures/src/lib.rs index bede4c49703..65d586124b3 100644 --- a/compiler/rustc_data_structures/src/lib.rs +++ b/compiler/rustc_data_structures/src/lib.rs @@ -21,6 +21,7 @@ #![feature(auto_traits)] #![feature(cfg_match)] #![feature(core_intrinsics)] +#![feature(dropck_eyepatch)] #![feature(extend_one)] #![feature(file_buffered)] #![feature(hash_raw_entry)] @@ -78,6 +79,7 @@ pub mod thinvec; pub mod transitive_relation; pub mod unhash; pub mod unord; +pub mod vec_cache; pub mod work_queue; mod atomic_ref; diff --git a/compiler/rustc_data_structures/src/vec_cache.rs b/compiler/rustc_data_structures/src/vec_cache.rs new file mode 100644 index 00000000000..eb251b587c8 --- /dev/null +++ b/compiler/rustc_data_structures/src/vec_cache.rs @@ -0,0 +1,324 @@ +//! VecCache maintains a mapping from K -> (V, I) pairing. K and I must be roughly u32-sized, and V +//! must be Copy. +//! +//! VecCache supports efficient concurrent put/get across the key space, with write-once semantics +//! (i.e., a given key can only be put once). Subsequent puts will panic. +//! +//! This is currently used for query caching. + +use std::fmt::Debug; +use std::marker::PhantomData; +use std::sync::atomic::{AtomicPtr, AtomicU32, AtomicUsize, Ordering}; + +use rustc_index::Idx; + +struct Slot<V> { + // We never construct &Slot<V> so it's fine for this to not be in an UnsafeCell. + value: V, + // This is both an index and a once-lock. + // + // 0: not yet initialized. + // 1: lock held, initializing. + // 2..u32::MAX - 2: initialized. + index_and_lock: AtomicU32, +} + +/// This uniquely identifies a single `Slot<V>` entry in the buckets map, and provides accessors for +/// either getting the value or putting a value. +#[derive(Copy, Clone, Debug)] +struct SlotIndex { + // the index of the bucket in VecCache (0 to 20) + bucket_idx: usize, + // number of entries in that bucket + entries: usize, + // the index of the slot within the bucket + index_in_bucket: usize, +} + +// This makes sure the counts are consistent with what we allocate, precomputing each bucket a +// compile-time. Visiting all powers of two is enough to hit all the buckets. +// +// We confirm counts are accurate in the slot_index_exhaustive test. +const ENTRIES_BY_BUCKET: [usize; 21] = { + let mut entries = [0; 21]; + let mut key = 0; + loop { + let si = SlotIndex::from_index(key); + entries[si.bucket_idx] = si.entries; + if key == 0 { + key = 1; + } else if key == (1 << 31) { + break; + } else { + key <<= 1; + } + } + entries +}; + +impl SlotIndex { + // This unpacks a flat u32 index into identifying which bucket it belongs to and the offset + // within that bucket. As noted in the VecCache docs, buckets double in size with each index. + // Typically that would mean 31 buckets (2^0 + 2^1 ... + 2^31 = u32::MAX - 1), but to reduce + // the size of the VecCache struct and avoid uselessly small allocations, we instead have the + // first bucket have 2**12 entries. To simplify the math, the second bucket also 2**12 entries, + // and buckets double from there. + // + // We assert that [0, 2**32 - 1] uniquely map through this function to individual, consecutive + // slots (see `slot_index_exhaustive` in tests). + #[inline] + const fn from_index(idx: u32) -> Self { + let mut bucket = match idx.checked_ilog2() { + Some(x) => x as usize, + None => 0, + }; + let entries; + let running_sum; + if bucket <= 11 { + entries = 1 << 12; + running_sum = 0; + bucket = 0; + } else { + entries = 1 << bucket; + running_sum = entries; + bucket = bucket - 11; + } + SlotIndex { bucket_idx: bucket, entries, index_in_bucket: idx as usize - running_sum } + } + + // SAFETY: Buckets must be managed solely by functions here (i.e., get/put on SlotIndex) and + // `self` comes from SlotIndex::from_index + #[inline] + unsafe fn get<V: Copy>(&self, buckets: &[AtomicPtr<Slot<V>>; 21]) -> Option<(V, u32)> { + // SAFETY: `bucket_idx` is ilog2(u32).saturating_sub(11), which is at most 21, i.e., + // in-bounds of buckets. See `from_index` for computation. + let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) }; + let ptr = bucket.load(Ordering::Acquire); + // Bucket is not yet initialized: then we obviously won't find this entry in that bucket. + if ptr.is_null() { + return None; + } + assert!(self.index_in_bucket < self.entries); + // SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this + // must be inbounds. + let slot = unsafe { ptr.add(self.index_in_bucket) }; + + // SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for + // AtomicU32 access. + let index_and_lock = unsafe { &(*slot).index_and_lock }; + let current = index_and_lock.load(Ordering::Acquire); + let index = match current { + 0 => return None, + // Treat "initializing" as actually just not initialized at all. + // The only reason this is a separate state is that `complete` calls could race and + // we can't allow that, but from load perspective there's no difference. + 1 => return None, + _ => current - 2, + }; + + // SAFETY: + // * slot is a valid pointer (buckets are always valid for the index we get). + // * value is initialized since we saw a >= 2 index above. + // * `V: Copy`, so safe to read. + let value = unsafe { (*slot).value }; + Some((value, index)) + } + + fn bucket_ptr<V>(&self, bucket: &AtomicPtr<Slot<V>>) -> *mut Slot<V> { + let ptr = bucket.load(Ordering::Acquire); + if ptr.is_null() { self.initialize_bucket(bucket) } else { ptr } + } + + #[cold] + fn initialize_bucket<V>(&self, bucket: &AtomicPtr<Slot<V>>) -> *mut Slot<V> { + static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); + + // If we are initializing the bucket, then acquire a global lock. + // + // This path is quite cold, so it's cheap to use a global lock. This ensures that we never + // have multiple allocations for the same bucket. + let _allocator_guard = LOCK.lock().unwrap_or_else(|e| e.into_inner()); + + let ptr = bucket.load(Ordering::Acquire); + + // OK, now under the allocator lock, if we're still null then it's definitely us that will + // initialize this bucket. + if ptr.is_null() { + let bucket_layout = + std::alloc::Layout::array::<Slot<V>>(self.entries as usize).unwrap(); + // This is more of a sanity check -- this code is very cold, so it's safe to pay a + // little extra cost here. + assert!(bucket_layout.size() > 0); + // SAFETY: Just checked that size is non-zero. + let allocated = unsafe { std::alloc::alloc_zeroed(bucket_layout).cast::<Slot<V>>() }; + if allocated.is_null() { + std::alloc::handle_alloc_error(bucket_layout); + } + bucket.store(allocated, Ordering::Release); + allocated + } else { + // Otherwise some other thread initialized this bucket after we took the lock. In that + // case, just return early. + ptr + } + } + + /// Returns true if this successfully put into the map. + #[inline] + fn put<V>(&self, buckets: &[AtomicPtr<Slot<V>>; 21], value: V, extra: u32) -> bool { + // SAFETY: `bucket_idx` is ilog2(u32).saturating_sub(11), which is at most 21, i.e., + // in-bounds of buckets. + let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) }; + let ptr = self.bucket_ptr(bucket); + + assert!(self.index_in_bucket < self.entries); + // SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this + // must be inbounds. + let slot = unsafe { ptr.add(self.index_in_bucket) }; + + // SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for + // AtomicU32 access. + let index_and_lock = unsafe { &(*slot).index_and_lock }; + match index_and_lock.compare_exchange(0, 1, Ordering::AcqRel, Ordering::Acquire) { + Ok(_) => { + // We have acquired the initialization lock. It is our job to write `value` and + // then set the lock to the real index. + + unsafe { + (&raw mut (*slot).value).write(value); + } + + index_and_lock.store(extra.checked_add(2).unwrap(), Ordering::Release); + + true + } + + // Treat "initializing" as the caller's fault. Callers are responsible for ensuring that + // there are no races on initialization. In the compiler's current usage for query + // caches, that's the "active query map" which ensures each query actually runs once + // (even if concurrently started). + Err(1) => panic!("caller raced calls to put()"), + + // This slot was already populated. Also ignore, currently this is the same as + // "initializing". + Err(_) => false, + } + } +} + +pub struct VecCache<K: Idx, V, I> { + // Entries per bucket: + // Bucket 0: 4096 2^12 + // Bucket 1: 4096 2^12 + // Bucket 2: 8192 + // Bucket 3: 16384 + // ... + // Bucket 19: 1073741824 + // Bucket 20: 2147483648 + // The total number of entries if all buckets are initialized is u32::MAX-1. + buckets: [AtomicPtr<Slot<V>>; 21], + + // In the compiler's current usage these are only *read* during incremental and self-profiling. + // They are an optimization over iterating the full buckets array. + present: [AtomicPtr<Slot<()>>; 21], + len: AtomicUsize, + + key: PhantomData<(K, I)>, +} + +impl<K: Idx, V, I> Default for VecCache<K, V, I> { + fn default() -> Self { + VecCache { + buckets: Default::default(), + key: PhantomData, + len: Default::default(), + present: Default::default(), + } + } +} + +// SAFETY: No access to `V` is made. +unsafe impl<K: Idx, #[may_dangle] V, I> Drop for VecCache<K, V, I> { + fn drop(&mut self) { + // We have unique ownership, so no locks etc. are needed. Since `K` and `V` are both `Copy`, + // we are also guaranteed to just need to deallocate any large arrays (not iterate over + // contents). + // + // Confirm no need to deallocate invidual entries. Note that `V: Copy` is asserted on + // insert/lookup but not necessarily construction, primarily to avoid annoyingly propagating + // the bounds into struct definitions everywhere. + assert!(!std::mem::needs_drop::<K>()); + assert!(!std::mem::needs_drop::<V>()); + + for (idx, bucket) in self.buckets.iter().enumerate() { + let bucket = bucket.load(Ordering::Acquire); + if !bucket.is_null() { + let layout = std::alloc::Layout::array::<Slot<V>>(ENTRIES_BY_BUCKET[idx]).unwrap(); + unsafe { + std::alloc::dealloc(bucket.cast(), layout); + } + } + } + + for (idx, bucket) in self.present.iter().enumerate() { + let bucket = bucket.load(Ordering::Acquire); + if !bucket.is_null() { + let layout = std::alloc::Layout::array::<Slot<()>>(ENTRIES_BY_BUCKET[idx]).unwrap(); + unsafe { + std::alloc::dealloc(bucket.cast(), layout); + } + } + } + } +} + +impl<K, V, I> VecCache<K, V, I> +where + K: Eq + Idx + Copy + Debug, + V: Copy, + I: Idx + Copy, +{ + #[inline(always)] + pub fn lookup(&self, key: &K) -> Option<(V, I)> { + let key = u32::try_from(key.index()).unwrap(); + let slot_idx = SlotIndex::from_index(key); + match unsafe { slot_idx.get(&self.buckets) } { + Some((value, idx)) => Some((value, I::new(idx as usize))), + None => None, + } + } + + #[inline] + pub fn complete(&self, key: K, value: V, index: I) { + let key = u32::try_from(key.index()).unwrap(); + let slot_idx = SlotIndex::from_index(key); + if slot_idx.put(&self.buckets, value, index.index() as u32) { + let present_idx = self.len.fetch_add(1, Ordering::Relaxed); + let slot = SlotIndex::from_index(present_idx as u32); + // We should always be uniquely putting due to `len` fetch_add returning unique values. + assert!(slot.put(&self.present, (), key)); + } + } + + pub fn iter(&self, f: &mut dyn FnMut(&K, &V, I)) { + for idx in 0..self.len.load(Ordering::Acquire) { + let key = SlotIndex::from_index(idx as u32); + match unsafe { key.get(&self.present) } { + // This shouldn't happen in our current usage (iter is really only + // used long after queries are done running), but if we hit this in practice it's + // probably fine to just break early. + None => unreachable!(), + Some(((), key)) => { + let key = K::new(key as usize); + // unwrap() is OK: present entries are always written only after we put the real + // entry. + let value = self.lookup(&key).unwrap(); + f(&key, &value.0, value.1); + } + } + } + } +} + +#[cfg(test)] +mod tests; diff --git a/compiler/rustc_data_structures/src/vec_cache/tests.rs b/compiler/rustc_data_structures/src/vec_cache/tests.rs new file mode 100644 index 00000000000..a05f2741362 --- /dev/null +++ b/compiler/rustc_data_structures/src/vec_cache/tests.rs @@ -0,0 +1,95 @@ +use super::*; + +#[test] +#[cfg(not(miri))] +fn vec_cache_empty() { + let cache: VecCache<u32, u32, u32> = VecCache::default(); + for key in 0..u32::MAX { + assert!(cache.lookup(&key).is_none()); + } +} + +#[test] +fn vec_cache_insert_and_check() { + let cache: VecCache<u32, u32, u32> = VecCache::default(); + cache.complete(0, 1, 2); + assert_eq!(cache.lookup(&0), Some((1, 2))); +} + +#[test] +fn sparse_inserts() { + let cache: VecCache<u32, u8, u32> = VecCache::default(); + let end = if cfg!(target_pointer_width = "64") && cfg!(target_os = "linux") { + // For paged memory, 64-bit systems we should be able to sparsely allocate all of the pages + // needed for these inserts cheaply (without needing to actually have gigabytes of resident + // memory). + 31 + } else { + // Otherwise, still run the test but scaled back: + // + // Each slot is 5 bytes, so 2^25 entries (on non-virtual memory systems, like e.g. Windows) will + // mean 160 megabytes of allocated memory. Going beyond that is probably not reasonable for + // tests. + 25 + }; + for shift in 0..end { + let key = 1u32 << shift; + cache.complete(key, shift, key); + assert_eq!(cache.lookup(&key), Some((shift, key))); + } +} + +#[test] +fn concurrent_stress_check() { + let cache: VecCache<u32, u32, u32> = VecCache::default(); + std::thread::scope(|s| { + for idx in 0..100 { + let cache = &cache; + s.spawn(move || { + cache.complete(idx, idx, idx); + }); + } + }); + + for idx in 0..100 { + assert_eq!(cache.lookup(&idx), Some((idx, idx))); + } +} + +#[test] +fn slot_entries_table() { + assert_eq!(ENTRIES_BY_BUCKET, [ + 4096, 4096, 8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576, 2097152, 4194304, + 8388608, 16777216, 33554432, 67108864, 134217728, 268435456, 536870912, 1073741824, + 2147483648 + ]); +} + +#[test] +#[cfg(not(miri))] +fn slot_index_exhaustive() { + let mut buckets = [0u32; 21]; + for idx in 0..=u32::MAX { + buckets[SlotIndex::from_index(idx).bucket_idx] += 1; + } + let mut prev = None::<SlotIndex>; + for idx in 0..=u32::MAX { + let slot_idx = SlotIndex::from_index(idx); + if let Some(p) = prev { + if p.bucket_idx == slot_idx.bucket_idx { + assert_eq!(p.index_in_bucket + 1, slot_idx.index_in_bucket); + } else { + assert_eq!(slot_idx.index_in_bucket, 0); + } + } else { + assert_eq!(idx, 0); + assert_eq!(slot_idx.index_in_bucket, 0); + assert_eq!(slot_idx.bucket_idx, 0); + } + + assert_eq!(buckets[slot_idx.bucket_idx], slot_idx.entries as u32); + assert_eq!(ENTRIES_BY_BUCKET[slot_idx.bucket_idx], slot_idx.entries, "{}", idx); + + prev = Some(slot_idx); + } +} |
