diff options
| author | Mark Rousskov <mark.simulacrum@gmail.com> | 2024-05-05 19:57:24 -0400 |
|---|---|---|
| committer | Mark Rousskov <mark.simulacrum@gmail.com> | 2024-11-15 18:20:32 -0500 |
| commit | da58efb11df229eac6eb727c2884546310f9ffde (patch) | |
| tree | 91897cf85ecde09693842fad8144b6059614c2dc /compiler/rustc_data_structures/src/vec_cache | |
| parent | b73478b6ee1ed915ac3727da02d4675835588538 (diff) | |
| download | rust-da58efb11df229eac6eb727c2884546310f9ffde.tar.gz rust-da58efb11df229eac6eb727c2884546310f9ffde.zip | |
Improve VecCache under parallel frontend
This replaces the single Vec allocation with a series of progressively larger buckets. With the cfg for parallel enabled but with -Zthreads=1, this looks like a slight regression in i-count and cycle counts (<0.1%). With the parallel frontend at -Zthreads=4, this is an improvement (-5% wall-time from 5.788 to 5.4688 on libcore) than our current Lock-based approach, likely due to reducing the bouncing of the cache line holding the lock. At -Zthreads=32 it's a huge improvement (-46%: 8.829 -> 4.7319 seconds).
Diffstat (limited to 'compiler/rustc_data_structures/src/vec_cache')
| -rw-r--r-- | compiler/rustc_data_structures/src/vec_cache/tests.rs | 95 |
1 files changed, 95 insertions, 0 deletions
diff --git a/compiler/rustc_data_structures/src/vec_cache/tests.rs b/compiler/rustc_data_structures/src/vec_cache/tests.rs new file mode 100644 index 00000000000..a05f2741362 --- /dev/null +++ b/compiler/rustc_data_structures/src/vec_cache/tests.rs @@ -0,0 +1,95 @@ +use super::*; + +#[test] +#[cfg(not(miri))] +fn vec_cache_empty() { + let cache: VecCache<u32, u32, u32> = VecCache::default(); + for key in 0..u32::MAX { + assert!(cache.lookup(&key).is_none()); + } +} + +#[test] +fn vec_cache_insert_and_check() { + let cache: VecCache<u32, u32, u32> = VecCache::default(); + cache.complete(0, 1, 2); + assert_eq!(cache.lookup(&0), Some((1, 2))); +} + +#[test] +fn sparse_inserts() { + let cache: VecCache<u32, u8, u32> = VecCache::default(); + let end = if cfg!(target_pointer_width = "64") && cfg!(target_os = "linux") { + // For paged memory, 64-bit systems we should be able to sparsely allocate all of the pages + // needed for these inserts cheaply (without needing to actually have gigabytes of resident + // memory). + 31 + } else { + // Otherwise, still run the test but scaled back: + // + // Each slot is 5 bytes, so 2^25 entries (on non-virtual memory systems, like e.g. Windows) will + // mean 160 megabytes of allocated memory. Going beyond that is probably not reasonable for + // tests. + 25 + }; + for shift in 0..end { + let key = 1u32 << shift; + cache.complete(key, shift, key); + assert_eq!(cache.lookup(&key), Some((shift, key))); + } +} + +#[test] +fn concurrent_stress_check() { + let cache: VecCache<u32, u32, u32> = VecCache::default(); + std::thread::scope(|s| { + for idx in 0..100 { + let cache = &cache; + s.spawn(move || { + cache.complete(idx, idx, idx); + }); + } + }); + + for idx in 0..100 { + assert_eq!(cache.lookup(&idx), Some((idx, idx))); + } +} + +#[test] +fn slot_entries_table() { + assert_eq!(ENTRIES_BY_BUCKET, [ + 4096, 4096, 8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576, 2097152, 4194304, + 8388608, 16777216, 33554432, 67108864, 134217728, 268435456, 536870912, 1073741824, + 2147483648 + ]); +} + +#[test] +#[cfg(not(miri))] +fn slot_index_exhaustive() { + let mut buckets = [0u32; 21]; + for idx in 0..=u32::MAX { + buckets[SlotIndex::from_index(idx).bucket_idx] += 1; + } + let mut prev = None::<SlotIndex>; + for idx in 0..=u32::MAX { + let slot_idx = SlotIndex::from_index(idx); + if let Some(p) = prev { + if p.bucket_idx == slot_idx.bucket_idx { + assert_eq!(p.index_in_bucket + 1, slot_idx.index_in_bucket); + } else { + assert_eq!(slot_idx.index_in_bucket, 0); + } + } else { + assert_eq!(idx, 0); + assert_eq!(slot_idx.index_in_bucket, 0); + assert_eq!(slot_idx.bucket_idx, 0); + } + + assert_eq!(buckets[slot_idx.bucket_idx], slot_idx.entries as u32); + assert_eq!(ENTRIES_BY_BUCKET[slot_idx.bucket_idx], slot_idx.entries, "{}", idx); + + prev = Some(slot_idx); + } +} |
