diff options
Diffstat (limited to 'compiler/rustc_data_structures/src')
| -rw-r--r-- | compiler/rustc_data_structures/src/aligned.rs | 3 | ||||
| -rw-r--r-- | compiler/rustc_data_structures/src/flock.rs | 13 | ||||
| -rw-r--r-- | compiler/rustc_data_structures/src/lib.rs | 8 | ||||
| -rw-r--r-- | compiler/rustc_data_structures/src/marker.rs | 3 | ||||
| -rw-r--r-- | compiler/rustc_data_structures/src/profiling.rs | 113 | ||||
| -rw-r--r-- | compiler/rustc_data_structures/src/vec_cache.rs | 2 |
6 files changed, 100 insertions, 42 deletions
diff --git a/compiler/rustc_data_structures/src/aligned.rs b/compiler/rustc_data_structures/src/aligned.rs index 111740e5509..bfc7556faf6 100644 --- a/compiler/rustc_data_structures/src/aligned.rs +++ b/compiler/rustc_data_structures/src/aligned.rs @@ -1,7 +1,6 @@ +use std::marker::PointeeSized; use std::ptr::Alignment; -use rustc_serialize::PointeeSized; - /// Returns the ABI-required minimum alignment of a type in bytes. /// /// This is equivalent to [`align_of`], but also works for some unsized diff --git a/compiler/rustc_data_structures/src/flock.rs b/compiler/rustc_data_structures/src/flock.rs index 60ae7ad115a..f33f6b7cac1 100644 --- a/compiler/rustc_data_structures/src/flock.rs +++ b/compiler/rustc_data_structures/src/flock.rs @@ -4,18 +4,7 @@ //! green/native threading. This is just a bare-bones enough solution for //! librustdoc, it is not production quality at all. -// cfg(bootstrap) -macro_rules! cfg_select_dispatch { - ($($tokens:tt)*) => { - #[cfg(bootstrap)] - cfg_match! { $($tokens)* } - - #[cfg(not(bootstrap))] - cfg_select! { $($tokens)* } - }; -} - -cfg_select_dispatch! { +cfg_select! { target_os = "linux" => { mod linux; use linux as imp; diff --git a/compiler/rustc_data_structures/src/lib.rs b/compiler/rustc_data_structures/src/lib.rs index 0431182e9e2..53178d09348 100644 --- a/compiler/rustc_data_structures/src/lib.rs +++ b/compiler/rustc_data_structures/src/lib.rs @@ -10,9 +10,6 @@ #![allow(internal_features)] #![allow(rustc::default_hash_types)] #![allow(rustc::potential_query_instability)] -#![cfg_attr(bootstrap, feature(cfg_match))] -#![cfg_attr(not(bootstrap), feature(cfg_select))] -#![cfg_attr(not(bootstrap), feature(sized_hierarchy))] #![deny(unsafe_op_in_unsafe_fn)] #![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")] #![doc(rust_logo)] @@ -22,6 +19,7 @@ #![feature(ascii_char_variants)] #![feature(assert_matches)] #![feature(auto_traits)] +#![feature(cfg_select)] #![feature(core_intrinsics)] #![feature(dropck_eyepatch)] #![feature(extend_one)] @@ -33,6 +31,7 @@ #![feature(ptr_alignment_type)] #![feature(rustc_attrs)] #![feature(rustdoc_internals)] +#![feature(sized_hierarchy)] #![feature(test)] #![feature(thread_id_value)] #![feature(type_alias_impl_trait)] @@ -44,9 +43,6 @@ use std::fmt; pub use atomic_ref::AtomicRef; pub use ena::{snapshot_vec, undo_log, unify}; pub use rustc_index::static_assert_size; -// re-exported for `rustc_smir` -// FIXME(sized_hierarchy): remove with `cfg(bootstrap)`, see `rustc_serialize/src/lib.rs` -pub use rustc_serialize::PointeeSized; pub mod aligned; pub mod base_n; diff --git a/compiler/rustc_data_structures/src/marker.rs b/compiler/rustc_data_structures/src/marker.rs index 4846bc997f1..2be9ba292f9 100644 --- a/compiler/rustc_data_structures/src/marker.rs +++ b/compiler/rustc_data_structures/src/marker.rs @@ -1,6 +1,5 @@ use std::alloc::Allocator; - -use rustc_serialize::PointeeSized; +use std::marker::PointeeSized; #[diagnostic::on_unimplemented(message = "`{Self}` doesn't implement `DynSend`. \ Add it to `rustc_data_structures::marker` or use `IntoDynSyncSend` if it's already `Send`")] diff --git a/compiler/rustc_data_structures/src/profiling.rs b/compiler/rustc_data_structures/src/profiling.rs index e3a01e4035c..1b4db7adc27 100644 --- a/compiler/rustc_data_structures/src/profiling.rs +++ b/compiler/rustc_data_structures/src/profiling.rs @@ -88,6 +88,7 @@ use std::fmt::Display; use std::intrinsics::unlikely; use std::path::Path; use std::sync::Arc; +use std::sync::atomic::Ordering; use std::time::{Duration, Instant}; use std::{fs, process}; @@ -99,12 +100,15 @@ use tracing::warn; use crate::fx::FxHashMap; use crate::outline; +use crate::sync::AtomicU64; bitflags::bitflags! { #[derive(Clone, Copy)] struct EventFilter: u16 { const GENERIC_ACTIVITIES = 1 << 0; const QUERY_PROVIDERS = 1 << 1; + /// Store detailed instant events, including timestamp and thread ID, + /// per each query cache hit. Note that this is quite expensive. const QUERY_CACHE_HITS = 1 << 2; const QUERY_BLOCKED = 1 << 3; const INCR_CACHE_LOADS = 1 << 4; @@ -113,16 +117,20 @@ bitflags::bitflags! { const FUNCTION_ARGS = 1 << 6; const LLVM = 1 << 7; const INCR_RESULT_HASHING = 1 << 8; - const ARTIFACT_SIZES = 1 << 9; + const ARTIFACT_SIZES = 1 << 9; + /// Store aggregated counts of cache hits per query invocation. + const QUERY_CACHE_HIT_COUNTS = 1 << 10; const DEFAULT = Self::GENERIC_ACTIVITIES.bits() | Self::QUERY_PROVIDERS.bits() | Self::QUERY_BLOCKED.bits() | Self::INCR_CACHE_LOADS.bits() | Self::INCR_RESULT_HASHING.bits() | - Self::ARTIFACT_SIZES.bits(); + Self::ARTIFACT_SIZES.bits() | + Self::QUERY_CACHE_HIT_COUNTS.bits(); const ARGS = Self::QUERY_KEYS.bits() | Self::FUNCTION_ARGS.bits(); + const QUERY_CACHE_HIT_COMBINED = Self::QUERY_CACHE_HITS.bits() | Self::QUERY_CACHE_HIT_COUNTS.bits(); } } @@ -134,6 +142,7 @@ const EVENT_FILTERS_BY_NAME: &[(&str, EventFilter)] = &[ ("generic-activity", EventFilter::GENERIC_ACTIVITIES), ("query-provider", EventFilter::QUERY_PROVIDERS), ("query-cache-hit", EventFilter::QUERY_CACHE_HITS), + ("query-cache-hit-count", EventFilter::QUERY_CACHE_HITS), ("query-blocked", EventFilter::QUERY_BLOCKED), ("incr-cache-load", EventFilter::INCR_CACHE_LOADS), ("query-keys", EventFilter::QUERY_KEYS), @@ -411,13 +420,24 @@ impl SelfProfilerRef { #[inline(never)] #[cold] fn cold_call(profiler_ref: &SelfProfilerRef, query_invocation_id: QueryInvocationId) { - profiler_ref.instant_query_event( - |profiler| profiler.query_cache_hit_event_kind, - query_invocation_id, - ); + if profiler_ref.event_filter_mask.contains(EventFilter::QUERY_CACHE_HIT_COUNTS) { + profiler_ref + .profiler + .as_ref() + .unwrap() + .increment_query_cache_hit_counters(QueryInvocationId(query_invocation_id.0)); + } + if unlikely(profiler_ref.event_filter_mask.contains(EventFilter::QUERY_CACHE_HITS)) { + profiler_ref.instant_query_event( + |profiler| profiler.query_cache_hit_event_kind, + query_invocation_id, + ); + } } - if unlikely(self.event_filter_mask.contains(EventFilter::QUERY_CACHE_HITS)) { + // We check both kinds of query cache hit events at once, to reduce overhead in the + // common case (with self-profile disabled). + if unlikely(self.event_filter_mask.intersects(EventFilter::QUERY_CACHE_HIT_COMBINED)) { cold_call(self, query_invocation_id); } } @@ -489,6 +509,35 @@ impl SelfProfilerRef { self.profiler.as_ref().map(|p| p.get_or_alloc_cached_string(s)) } + /// Store query cache hits to the self-profile log. + /// Should be called once at the end of the compilation session. + /// + /// The cache hits are stored per **query invocation**, not **per query kind/type**. + /// `analyzeme` can later deduplicate individual query labels from the QueryInvocationId event + /// IDs. + pub fn store_query_cache_hits(&self) { + if self.event_filter_mask.contains(EventFilter::QUERY_CACHE_HIT_COUNTS) { + let profiler = self.profiler.as_ref().unwrap(); + let query_hits = profiler.query_hits.read(); + let builder = EventIdBuilder::new(&profiler.profiler); + let thread_id = get_thread_id(); + for (query_invocation, hit_count) in query_hits.iter().enumerate() { + let hit_count = hit_count.load(Ordering::Relaxed); + // No need to record empty cache hit counts + if hit_count > 0 { + let event_id = + builder.from_label(StringId::new_virtual(query_invocation as u64)); + profiler.profiler.record_integer_event( + profiler.query_cache_hit_count_event_kind, + event_id, + thread_id, + hit_count, + ); + } + } + } + } + #[inline] pub fn enabled(&self) -> bool { self.profiler.is_some() @@ -537,6 +586,19 @@ pub struct SelfProfiler { string_cache: RwLock<FxHashMap<String, StringId>>, + /// Recording individual query cache hits as "instant" measureme events + /// is incredibly expensive. Instead of doing that, we simply aggregate + /// cache hit *counts* per query invocation, and then store the final count + /// of cache hits per invocation at the end of the compilation session. + /// + /// With this approach, we don't know the individual thread IDs and timestamps + /// of cache hits, but it has very little overhead on top of `-Zself-profile`. + /// Recording the cache hits as individual events made compilation 3-5x slower. + /// + /// Query invocation IDs should be monotonic integers, so we can store them in a vec, + /// rather than using a hashmap. + query_hits: RwLock<Vec<AtomicU64>>, + query_event_kind: StringId, generic_activity_event_kind: StringId, incremental_load_result_event_kind: StringId, @@ -544,6 +606,8 @@ pub struct SelfProfiler { query_blocked_event_kind: StringId, query_cache_hit_event_kind: StringId, artifact_size_event_kind: StringId, + /// Total cache hits per query invocation + query_cache_hit_count_event_kind: StringId, } impl SelfProfiler { @@ -573,6 +637,7 @@ impl SelfProfiler { let query_blocked_event_kind = profiler.alloc_string("QueryBlocked"); let query_cache_hit_event_kind = profiler.alloc_string("QueryCacheHit"); let artifact_size_event_kind = profiler.alloc_string("ArtifactSize"); + let query_cache_hit_count_event_kind = profiler.alloc_string("QueryCacheHitCount"); let mut event_filter_mask = EventFilter::empty(); @@ -618,6 +683,8 @@ impl SelfProfiler { query_blocked_event_kind, query_cache_hit_event_kind, artifact_size_event_kind, + query_cache_hit_count_event_kind, + query_hits: Default::default(), }) } @@ -627,6 +694,25 @@ impl SelfProfiler { self.profiler.alloc_string(s) } + /// Store a cache hit of a query invocation + pub fn increment_query_cache_hit_counters(&self, id: QueryInvocationId) { + // Fast path: assume that the query was already encountered before, and just record + // a cache hit. + let mut guard = self.query_hits.upgradable_read(); + let query_hits = &guard; + let index = id.0 as usize; + if index < query_hits.len() { + // We only want to increment the count, no other synchronization is required + query_hits[index].fetch_add(1, Ordering::Relaxed); + } else { + // If not, we need to extend the query hit map to the highest observed ID + guard.with_upgraded(|vec| { + vec.resize_with(index + 1, || AtomicU64::new(0)); + vec[index] = AtomicU64::from(1); + }); + } + } + /// Gets a `StringId` for the given string. This method makes sure that /// any strings going through it will only be allocated once in the /// profiling data. @@ -859,19 +945,8 @@ fn get_thread_id() -> u32 { std::thread::current().id().as_u64().get() as u32 } -// cfg(bootstrap) -macro_rules! cfg_select_dispatch { - ($($tokens:tt)*) => { - #[cfg(bootstrap)] - cfg_match! { $($tokens)* } - - #[cfg(not(bootstrap))] - cfg_select! { $($tokens)* } - }; -} - // Memory reporting -cfg_select_dispatch! { +cfg_select! { windows => { pub fn get_resident_set_size() -> Option<usize> { use windows::{ diff --git a/compiler/rustc_data_structures/src/vec_cache.rs b/compiler/rustc_data_structures/src/vec_cache.rs index df83d15b5f9..599970663db 100644 --- a/compiler/rustc_data_structures/src/vec_cache.rs +++ b/compiler/rustc_data_structures/src/vec_cache.rs @@ -257,7 +257,7 @@ unsafe impl<K: Idx, #[may_dangle] V, I> Drop for VecCache<K, V, I> { // we are also guaranteed to just need to deallocate any large arrays (not iterate over // contents). // - // Confirm no need to deallocate invidual entries. Note that `V: Copy` is asserted on + // Confirm no need to deallocate individual entries. Note that `V: Copy` is asserted on // insert/lookup but not necessarily construction, primarily to avoid annoyingly propagating // the bounds into struct definitions everywhere. assert!(!std::mem::needs_drop::<K>()); |
