diff options
| author | bors <bors@rust-lang.org> | 2023-11-25 02:01:39 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2023-11-25 02:01:39 +0000 |
| commit | 34c5ab9aac327a8a18e18ea37a2468a320d82fb0 (patch) | |
| tree | 6b568c081d23279b1c6f151f6147890465405bad /compiler/rustc_data_structures/src | |
| parent | 2a48155aec535e140b10cb2553673ced43c6e148 (diff) | |
| parent | 107ea5d3bcc154b4c1dd31e646bb8fc41d4e7531 (diff) | |
| download | rust-34c5ab9aac327a8a18e18ea37a2468a320d82fb0.tar.gz rust-34c5ab9aac327a8a18e18ea37a2468a320d82fb0.zip | |
Auto merge of #118227 - Mark-Simulacrum:worker-local-outline, r=cjgillot
Optimize QueryArena allocation This shifts the WorkerLocal wrapper to be outside the QueryArena, meaning that instead of having each query allocate distinct arenas per-worker we allocate the full set of arenas per-worker. This is primarily a code size optimization (locally, ~85 kilobytes, [perf is reporting >100 kilobytes](https://perf.rust-lang.org/compare.html?start=1fd418f92ed13db88a21865ba5d909abcf16b6cc&end=884c95a3f1fe8d28630ec3cdb0c8f95b2e539fde&stat=instructions%3Au&tab=artifact-size)), saving a bunch of code in the initialization of the arenas which was previously duplicated lots of times (per arena type). Additionally this tells LLVM that the thread count can't be zero in this code (I believe this is true?) which shaves some small amount of bytes off as well since we eliminate checks for zero in the vec allocations.
Diffstat (limited to 'compiler/rustc_data_structures/src')
| -rw-r--r-- | compiler/rustc_data_structures/src/sync/worker_local.rs | 11 |
1 files changed, 7 insertions, 4 deletions
diff --git a/compiler/rustc_data_structures/src/sync/worker_local.rs b/compiler/rustc_data_structures/src/sync/worker_local.rs index ffafdba13ce..b34d3dd9044 100644 --- a/compiler/rustc_data_structures/src/sync/worker_local.rs +++ b/compiler/rustc_data_structures/src/sync/worker_local.rs @@ -1,6 +1,7 @@ use parking_lot::Mutex; use std::cell::Cell; use std::cell::OnceCell; +use std::num::NonZeroUsize; use std::ops::Deref; use std::ptr; use std::sync::Arc; @@ -30,7 +31,7 @@ impl RegistryId { } struct RegistryData { - thread_limit: usize, + thread_limit: NonZeroUsize, threads: Mutex<usize>, } @@ -60,7 +61,7 @@ thread_local! { impl Registry { /// Creates a registry which can hold up to `thread_limit` threads. - pub fn new(thread_limit: usize) -> Self { + pub fn new(thread_limit: NonZeroUsize) -> Self { Registry(Arc::new(RegistryData { thread_limit, threads: Mutex::new(0) })) } @@ -73,7 +74,7 @@ impl Registry { /// Panics if the thread limit is hit or if the thread already has an associated registry. pub fn register(&self) { let mut threads = self.0.threads.lock(); - if *threads < self.0.thread_limit { + if *threads < self.0.thread_limit.get() { REGISTRY.with(|registry| { if registry.get().is_some() { drop(threads); @@ -126,7 +127,9 @@ impl<T> WorkerLocal<T> { { let registry = Registry::current(); WorkerLocal { - locals: (0..registry.0.thread_limit).map(|i| CacheAligned(initial(i))).collect(), + locals: (0..registry.0.thread_limit.get()) + .map(|i| CacheAligned(initial(i))) + .collect(), registry, } } |
