14 files changed, 366 insertions, 268 deletions
diff --git a/src/librustc/session/mod.rs b/src/librustc/session/mod.rs
index 9f57f8dfd9a..f22445f5d47 100644
--- a/src/librustc/session/mod.rs
+++ b/src/librustc/session/mod.rs
@@ -32,7 +32,7 @@ use syntax::source_map;
 use syntax::parse::{self, ParseSess};
 use syntax::symbol::Symbol;
 use syntax_pos::{MultiSpan, Span};
-use crate::util::profiling::SelfProfiler;
+use crate::util::profiling::{SelfProfiler, SelfProfilerRef};
 
 use rustc_target::spec::{PanicStrategy, RelroLevel, Target, TargetTriple};
 use rustc_data_structures::flock;
@@ -129,7 +129,7 @@ pub struct Session {
     pub profile_channel: Lock<Option<mpsc::Sender<ProfileQueriesMsg>>>,
 
     /// Used by `-Z self-profile`.
-    pub self_profiling: Option<Arc<SelfProfiler>>,
+    pub prof: SelfProfilerRef,
 
     /// Some measurements that are being gathered during compilation.
     pub perf_stats: PerfStats,
@@ -835,24 +835,6 @@ impl Session {
         }
     }
 
-    #[inline(never)]
-    #[cold]
-    fn profiler_active<F: FnOnce(&SelfProfiler) -> ()>(&self, f: F) {
-        match &self.self_profiling {
-            None => bug!("profiler_active() called but there was no profiler active"),
-            Some(profiler) => {
-                f(&profiler);
-            }
-        }
-    }
-
-    #[inline(always)]
-    pub fn profiler<F: FnOnce(&SelfProfiler) -> ()>(&self, f: F) {
-        if unlikely!(self.self_profiling.is_some()) {
-            self.profiler_active(f)
-        }
-    }
-
     pub fn print_perf_stats(&self) {
         println!(
             "Total time spent computing symbol hashes:      {}",
@@ -1251,7 +1233,7 @@ fn build_session_(
         imported_macro_spans: OneThread::new(RefCell::new(FxHashMap::default())),
         incr_comp_session: OneThread::new(RefCell::new(IncrCompSession::NotInitialized)),
         cgu_reuse_tracker,
-        self_profiling: self_profiler,
+        prof: SelfProfilerRef::new(self_profiler),
         profile_channel: Lock::new(None),
         perf_stats: PerfStats {
             symbol_hash_time: Lock::new(Duration::from_secs(0)),
diff --git a/src/librustc/ty/context.rs b/src/librustc/ty/context.rs
index 87b9917f340..5f1a17e4a95 100644
--- a/src/librustc/ty/context.rs
+++ b/src/librustc/ty/context.rs
@@ -45,6 +45,7 @@ use crate::ty::CanonicalPolyFnSig;
 use crate::util::common::ErrorReported;
 use crate::util::nodemap::{DefIdMap, DefIdSet, ItemLocalMap, ItemLocalSet};
 use crate::util::nodemap::{FxHashMap, FxHashSet};
+use crate::util::profiling::SelfProfilerRef;
 
 use errors::DiagnosticBuilder;
 use arena::SyncDroplessArena;
@@ -1030,6 +1031,8 @@ pub struct GlobalCtxt<'tcx> {
 
     pub dep_graph: DepGraph,
 
+    pub prof: SelfProfilerRef,
+
     /// Common objects.
     pub common: Common<'tcx>,
 
@@ -1260,6 +1263,7 @@ impl<'tcx> TyCtxt<'tcx> {
             arena: WorkerLocal::new(|_| Arena::default()),
             interners,
             dep_graph,
+            prof: s.prof.clone(),
             common,
             types: common_types,
             lifetimes: common_lifetimes,
diff --git a/src/librustc/ty/query/plumbing.rs b/src/librustc/ty/query/plumbing.rs
index 32858d30b0c..955f1447c55 100644
--- a/src/librustc/ty/query/plumbing.rs
+++ b/src/librustc/ty/query/plumbing.rs
@@ -112,7 +112,7 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
             let mut lock = cache.get_shard_by_value(key).lock();
             if let Some(value) = lock.results.get(key) {
                 profq_msg!(tcx, ProfileQueriesMsg::CacheHit);
-                tcx.sess.profiler(|p| p.record_query_hit(Q::NAME));
+                tcx.prof.query_cache_hit(Q::NAME);
                 let result = (value.value.clone(), value.index);
                 #[cfg(debug_assertions)]
                 {
@@ -128,7 +128,7 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
                             // in another thread has completed. Record how long we wait in the
                             // self-profiler.
                             #[cfg(parallel_compiler)]
-                            tcx.sess.profiler(|p| p.query_blocked_start(Q::NAME));
+                            tcx.prof.query_blocked_start(Q::NAME);
 
                             job.clone()
                         },
@@ -170,7 +170,7 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
             #[cfg(parallel_compiler)]
             {
                 let result = job.r#await(tcx, span);
-                tcx.sess.profiler(|p| p.query_blocked_end(Q::NAME));
+                tcx.prof.query_blocked_end(Q::NAME);
 
                 if let Err(cycle) = result {
                     return TryGetJob::Cycle(Q::handle_cycle_error(tcx, cycle));
@@ -382,8 +382,9 @@ impl<'tcx> TyCtxt<'tcx> {
         }
 
         if Q::ANON {
+
             profq_msg!(self, ProfileQueriesMsg::ProviderBegin);
-            self.sess.profiler(|p| p.start_query(Q::NAME));
+            let prof_timer = self.prof.query_provider(Q::NAME);
 
             let ((result, dep_node_index), diagnostics) = with_diagnostics(|diagnostics| {
                 self.start_query(job.job.clone(), diagnostics, |tcx| {
@@ -393,7 +394,7 @@ impl<'tcx> TyCtxt<'tcx> {
                 })
             });
 
-            self.sess.profiler(|p| p.end_query(Q::NAME));
+            drop(prof_timer);
             profq_msg!(self, ProfileQueriesMsg::ProviderEnd);
 
             self.dep_graph.read_index(dep_node_index);
@@ -451,9 +452,8 @@ impl<'tcx> TyCtxt<'tcx> {
         // First we try to load the result from the on-disk cache.
         let result = if Q::cache_on_disk(self, key.clone(), None) &&
                         self.sess.opts.debugging_opts.incremental_queries {
-            self.sess.profiler(|p| p.incremental_load_result_start(Q::NAME));
+            let _prof_timer = self.prof.incr_cache_loading(Q::NAME);
             let result = Q::try_load_from_disk(self, prev_dep_node_index);
-            self.sess.profiler(|p| p.incremental_load_result_end(Q::NAME));
 
             // We always expect to find a cached result for things that
             // can be forced from `DepNode`.
@@ -469,21 +469,17 @@ impl<'tcx> TyCtxt<'tcx> {
 
         let result = if let Some(result) = result {
             profq_msg!(self, ProfileQueriesMsg::CacheHit);
-            self.sess.profiler(|p| p.record_query_hit(Q::NAME));
-
             result
         } else {
             // We could not load a result from the on-disk cache, so
             // recompute.
-
-            self.sess.profiler(|p| p.start_query(Q::NAME));
+            let _prof_timer = self.prof.query_provider(Q::NAME);
 
             // The dep-graph for this computation is already in-place.
             let result = self.dep_graph.with_ignore(|| {
                 Q::compute(self, key)
             });
 
-            self.sess.profiler(|p| p.end_query(Q::NAME));
             result
         };
 
@@ -551,7 +547,7 @@ impl<'tcx> TyCtxt<'tcx> {
                 key, dep_node);
 
         profq_msg!(self, ProfileQueriesMsg::ProviderBegin);
-        self.sess.profiler(|p| p.start_query(Q::NAME));
+        let prof_timer = self.prof.query_provider(Q::NAME);
 
         let ((result, dep_node_index), diagnostics) = with_diagnostics(|diagnostics| {
             self.start_query(job.job.clone(), diagnostics, |tcx| {
@@ -571,7 +567,7 @@ impl<'tcx> TyCtxt<'tcx> {
             })
         });
 
-        self.sess.profiler(|p| p.end_query(Q::NAME));
+        drop(prof_timer);
         profq_msg!(self, ProfileQueriesMsg::ProviderEnd);
 
         if unlikely!(self.sess.opts.debugging_opts.query_dep_graph) {
@@ -619,7 +615,7 @@ impl<'tcx> TyCtxt<'tcx> {
             let _ = self.get_query::<Q>(DUMMY_SP, key);
         } else {
             profq_msg!(self, ProfileQueriesMsg::CacheHit);
-            self.sess.profiler(|p| p.record_query_hit(Q::NAME));
+            self.prof.query_cache_hit(Q::NAME);
         }
     }
 
diff --git a/src/librustc/util/profiling.rs b/src/librustc/util/profiling.rs
index 8624856a4f5..bd02e7f5a14 100644
--- a/src/librustc/util/profiling.rs
+++ b/src/librustc/util/profiling.rs
@@ -1,9 +1,9 @@
-use std::borrow::Cow;
 use std::error::Error;
 use std::fs;
 use std::mem::{self, Discriminant};
 use std::path::Path;
 use std::process;
+use std::sync::Arc;
 use std::thread::ThreadId;
 use std::u32;
 
@@ -62,6 +62,206 @@ fn thread_id_to_u64(tid: ThreadId) -> u64 {
     unsafe { mem::transmute::<ThreadId, u64>(tid) }
 }
 
+
+/// A reference to the SelfProfiler. It can be cloned and sent across thread
+/// boundaries at will.
+#[derive(Clone)]
+pub struct SelfProfilerRef {
+    // This field is `None` if self-profiling is disabled for the current
+    // compilation session.
+    profiler: Option<Arc<SelfProfiler>>,
+
+    // We store the filter mask directly in the reference because that doesn't
+    // cost anything and allows for filtering with checking if the profiler is
+    // actually enabled.
+    event_filter_mask: EventFilter,
+}
+
+impl SelfProfilerRef {
+
+    pub fn new(profiler: Option<Arc<SelfProfiler>>) -> SelfProfilerRef {
+        // If there is no SelfProfiler then the filter mask is set to NONE,
+        // ensuring that nothing ever tries to actually access it.
+        let event_filter_mask = profiler
+            .as_ref()
+            .map(|p| p.event_filter_mask)
+            .unwrap_or(EventFilter::NONE);
+
+        SelfProfilerRef {
+            profiler,
+            event_filter_mask,
+        }
+    }
+
+    // This shim makes sure that calls only get executed if the filter mask
+    // lets them pass. It also contains some trickery to make sure that
+    // code is optimized for non-profiling compilation sessions, i.e. anything
+    // past the filter check is never inlined so it doesn't clutter the fast
+    // path.
+    #[inline(always)]
+    fn exec<F>(&self, event_filter: EventFilter, f: F) -> TimingGuard<'_>
+        where F: for<'a> FnOnce(&'a SelfProfiler) -> TimingGuard<'a>
+    {
+        #[inline(never)]
+        fn cold_call<F>(profiler_ref: &SelfProfilerRef, f: F) -> TimingGuard<'_>
+            where F: for<'a> FnOnce(&'a SelfProfiler) -> TimingGuard<'a>
+        {
+            let profiler = profiler_ref.profiler.as_ref().unwrap();
+            f(&**profiler)
+        }
+
+        if unlikely!(self.event_filter_mask.contains(event_filter)) {
+            cold_call(self, f)
+        } else {
+            TimingGuard::none()
+        }
+    }
+
+    /// Start profiling a generic activity. Profiling continues until the
+    /// TimingGuard returned from this call is dropped.
+    #[inline(always)]
+    pub fn generic_activity(&self, event_id: &str) -> TimingGuard<'_> {
+        self.exec(EventFilter::GENERIC_ACTIVITIES, |profiler| {
+            let event_id = profiler.profiler.alloc_string(event_id);
+            TimingGuard::start(
+                profiler,
+                profiler.generic_activity_event_kind,
+                event_id
+            )
+        })
+    }
+
+    /// Start profiling a generic activity. Profiling continues until
+    /// `generic_activity_end` is called. The RAII-based `generic_activity`
+    /// usually is the better alternative.
+    #[inline(always)]
+    pub fn generic_activity_start(&self, event_id: &str) {
+        self.non_guard_generic_event(
+            |profiler| profiler.generic_activity_event_kind,
+            |profiler| profiler.profiler.alloc_string(event_id),
+            EventFilter::GENERIC_ACTIVITIES,
+            TimestampKind::Start,
+        );
+    }
+
+    /// End profiling a generic activity that was started with
+    /// `generic_activity_start`. The RAII-based `generic_activity` usually is
+    /// the better alternative.
+    #[inline(always)]
+    pub fn generic_activity_end(&self, event_id: &str) {
+        self.non_guard_generic_event(
+            |profiler| profiler.generic_activity_event_kind,
+            |profiler| profiler.profiler.alloc_string(event_id),
+            EventFilter::GENERIC_ACTIVITIES,
+            TimestampKind::End,
+        );
+    }
+
+    /// Start profiling a query provider. Profiling continues until the
+    /// TimingGuard returned from this call is dropped.
+    #[inline(always)]
+    pub fn query_provider(&self, query_name: QueryName) -> TimingGuard<'_> {
+        self.exec(EventFilter::QUERY_PROVIDERS, |profiler| {
+            let event_id = SelfProfiler::get_query_name_string_id(query_name);
+            TimingGuard::start(profiler, profiler.query_event_kind, event_id)
+        })
+    }
+
+    /// Record a query in-memory cache hit.
+    #[inline(always)]
+    pub fn query_cache_hit(&self, query_name: QueryName) {
+        self.non_guard_query_event(
+            |profiler| profiler.query_cache_hit_event_kind,
+            query_name,
+            EventFilter::QUERY_CACHE_HITS,
+            TimestampKind::Instant,
+        );
+    }
+
+    /// Start profiling a query being blocked on a concurrent execution.
+    /// Profiling continues until `query_blocked_end` is called.
+    #[inline(always)]
+    pub fn query_blocked_start(&self, query_name: QueryName) {
+        self.non_guard_query_event(
+            |profiler| profiler.query_blocked_event_kind,
+            query_name,
+            EventFilter::QUERY_BLOCKED,
+            TimestampKind::Start,
+        );
+    }
+
+    /// End profiling a query being blocked on a concurrent execution.
+    #[inline(always)]
+    pub fn query_blocked_end(&self, query_name: QueryName) {
+        self.non_guard_query_event(
+            |profiler| profiler.query_blocked_event_kind,
+            query_name,
+            EventFilter::QUERY_BLOCKED,
+            TimestampKind::End,
+        );
+    }
+
+    /// Start profiling how long it takes to load a query result from the
+    /// incremental compilation on-disk cache. Profiling continues until the
+    /// TimingGuard returned from this call is dropped.
+    #[inline(always)]
+    pub fn incr_cache_loading(&self, query_name: QueryName) -> TimingGuard<'_> {
+        self.exec(EventFilter::INCR_CACHE_LOADS, |profiler| {
+            let event_id = SelfProfiler::get_query_name_string_id(query_name);
+            TimingGuard::start(
+                profiler,
+                profiler.incremental_load_result_event_kind,
+                event_id
+            )
+        })
+    }
+
+    #[inline(always)]
+    fn non_guard_query_event(
+        &self,
+        event_kind: fn(&SelfProfiler) -> StringId,
+        query_name: QueryName,
+        event_filter: EventFilter,
+        timestamp_kind: TimestampKind
+    ) {
+        drop(self.exec(event_filter, |profiler| {
+            let event_id = SelfProfiler::get_query_name_string_id(query_name);
+            let thread_id = thread_id_to_u64(std::thread::current().id());
+
+            profiler.profiler.record_event(
+                event_kind(profiler),
+                event_id,
+                thread_id,
+                timestamp_kind,
+            );
+
+            TimingGuard::none()
+        }));
+    }
+
+    #[inline(always)]
+    fn non_guard_generic_event<F: FnOnce(&SelfProfiler) -> StringId>(
+        &self,
+        event_kind: fn(&SelfProfiler) -> StringId,
+        event_id: F,
+        event_filter: EventFilter,
+        timestamp_kind: TimestampKind
+    ) {
+        drop(self.exec(event_filter, |profiler| {
+            let thread_id = thread_id_to_u64(std::thread::current().id());
+
+            profiler.profiler.record_event(
+                event_kind(profiler),
+                event_id(profiler),
+                thread_id,
+                timestamp_kind,
+            );
+
+            TimingGuard::none()
+        }));
+    }
+}
+
 pub struct SelfProfiler {
     profiler: Profiler,
     event_filter_mask: EventFilter,
@@ -143,103 +343,51 @@ impl SelfProfiler {
         let id = SelfProfiler::get_query_name_string_id(query_name);
         self.profiler.alloc_string_with_reserved_id(id, query_name.as_str());
     }
+}
 
-    #[inline]
-    pub fn start_activity(
-        &self,
-        label: impl Into<Cow<'static, str>>,
-    ) {
-        if self.event_filter_mask.contains(EventFilter::GENERIC_ACTIVITIES) {
-            self.record(&label.into(), self.generic_activity_event_kind, TimestampKind::Start);
-        }
-    }
-
-    #[inline]
-    pub fn end_activity(
-        &self,
-        label: impl Into<Cow<'static, str>>,
-    ) {
-        if self.event_filter_mask.contains(EventFilter::GENERIC_ACTIVITIES) {
-            self.record(&label.into(), self.generic_activity_event_kind, TimestampKind::End);
-        }
-    }
-
-    #[inline]
-    pub fn record_query_hit(&self, query_name: QueryName) {
-        if self.event_filter_mask.contains(EventFilter::QUERY_CACHE_HITS) {
-            self.record_query(query_name, self.query_cache_hit_event_kind, TimestampKind::Instant);
-        }
-    }
-
-    #[inline]
-    pub fn start_query(&self, query_name: QueryName) {
-        if self.event_filter_mask.contains(EventFilter::QUERY_PROVIDERS) {
-            self.record_query(query_name, self.query_event_kind, TimestampKind::Start);
-        }
-    }
-
-    #[inline]
-    pub fn end_query(&self, query_name: QueryName) {
-        if self.event_filter_mask.contains(EventFilter::QUERY_PROVIDERS) {
-            self.record_query(query_name, self.query_event_kind, TimestampKind::End);
-        }
-    }
-
-    #[inline]
-    pub fn incremental_load_result_start(&self, query_name: QueryName) {
-        if self.event_filter_mask.contains(EventFilter::INCR_CACHE_LOADS) {
-            self.record_query(
-                query_name,
-                self.incremental_load_result_event_kind,
-                TimestampKind::Start
-            );
-        }
-    }
-
-    #[inline]
-    pub fn incremental_load_result_end(&self, query_name: QueryName) {
-        if self.event_filter_mask.contains(EventFilter::INCR_CACHE_LOADS) {
-            self.record_query(
-                query_name,
-                self.incremental_load_result_event_kind,
-                TimestampKind::End
-            );
-        }
-    }
+#[must_use]
+pub struct TimingGuard<'a>(Option<TimingGuardInternal<'a>>);
 
-    #[inline]
-    pub fn query_blocked_start(&self, query_name: QueryName) {
-        if self.event_filter_mask.contains(EventFilter::QUERY_BLOCKED) {
-            self.record_query(query_name, self.query_blocked_event_kind, TimestampKind::Start);
-        }
-    }
+struct TimingGuardInternal<'a> {
+    raw_profiler: &'a Profiler,
+    event_id: StringId,
+    event_kind: StringId,
+    thread_id: u64,
+}
 
+impl<'a> TimingGuard<'a> {
     #[inline]
-    pub fn query_blocked_end(&self, query_name: QueryName) {
-        if self.event_filter_mask.contains(EventFilter::QUERY_BLOCKED) {
-            self.record_query(query_name, self.query_blocked_event_kind, TimestampKind::End);
-        }
+    pub fn start(
+        profiler: &'a SelfProfiler,
+        event_kind: StringId,
+        event_id: StringId,
+    ) -> TimingGuard<'a> {
+        let thread_id = thread_id_to_u64(std::thread::current().id());
+        let raw_profiler = &profiler.profiler;
+        raw_profiler.record_event(event_kind, event_id, thread_id, TimestampKind::Start);
+
+        TimingGuard(Some(TimingGuardInternal {
+            raw_profiler,
+            event_kind,
+            event_id,
+            thread_id,
+        }))
     }
 
     #[inline]
-    fn record(&self, event_id: &str, event_kind: StringId, timestamp_kind: TimestampKind) {
-        let thread_id = thread_id_to_u64(std::thread::current().id());
-
-        let event_id = self.profiler.alloc_string(event_id);
-        self.profiler.record_event(event_kind, event_id, thread_id, timestamp_kind);
+    pub fn none() -> TimingGuard<'a> {
+        TimingGuard(None)
     }
+}
 
+impl<'a> Drop for TimingGuardInternal<'a> {
     #[inline]
-    fn record_query(
-        &self,
-        query_name: QueryName,
-        event_kind: StringId,
-        timestamp_kind: TimestampKind,
-    ) {
-        let dep_node_name = SelfProfiler::get_query_name_string_id(query_name);
-
-        let thread_id = thread_id_to_u64(std::thread::current().id());
-
-        self.profiler.record_event(event_kind, dep_node_name, thread_id, timestamp_kind);
+    fn drop(&mut self) {
+        self.raw_profiler.record_event(
+            self.event_kind,
+            self.event_id,
+            self.thread_id,
+            TimestampKind::End
+        );
     }
 }
diff --git a/src/librustc_codegen_llvm/back/lto.rs b/src/librustc_codegen_llvm/back/lto.rs
index a43fbb68dba..c4368d2cb8b 100644
--- a/src/librustc_codegen_llvm/back/lto.rs
+++ b/src/librustc_codegen_llvm/back/lto.rs
@@ -62,11 +62,13 @@ fn prepare_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
     };
     let exported_symbols = cgcx.exported_symbols
         .as_ref().expect("needs exported symbols for LTO");
-    let mut symbol_white_list = exported_symbols[&LOCAL_CRATE]
-        .iter()
-        .filter_map(symbol_filter)
-        .collect::<Vec<CString>>();
-    let _timer = cgcx.profile_activity("generate_symbol_white_list_for_thinlto");
+    let mut symbol_white_list = {
+        let _timer = cgcx.prof.generic_activity("LLVM_lto_generate_symbol_white_list");
+        exported_symbols[&LOCAL_CRATE]
+            .iter()
+            .filter_map(symbol_filter)
+            .collect::<Vec<CString>>()
+    };
     info!("{} symbols to preserve in this crate", symbol_white_list.len());
 
     // If we're performing LTO for the entire crate graph, then for each of our
@@ -95,14 +97,17 @@ fn prepare_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
         }
 
         for &(cnum, ref path) in cgcx.each_linked_rlib_for_lto.iter() {
-            let _timer = cgcx.profile_activity(format!("load: {}", path.display()));
             let exported_symbols = cgcx.exported_symbols
                 .as_ref().expect("needs exported symbols for LTO");
-            symbol_white_list.extend(
-                exported_symbols[&cnum]
-                    .iter()
-                    .filter_map(symbol_filter));
+            {
+                let _timer = cgcx.prof.generic_activity("LLVM_lto_generate_symbol_white_list");
+                symbol_white_list.extend(
+                    exported_symbols[&cnum]
+                        .iter()
+                        .filter_map(symbol_filter));
+            }
 
+            let _timer = cgcx.prof.generic_activity("LLVM_lto_load_upstream_bitcode");
             let archive = ArchiveRO::open(&path).expect("wanted an rlib");
             let bytecodes = archive.iter().filter_map(|child| {
                 child.ok().and_then(|c| c.name().map(|name| (name, c)))
@@ -189,6 +194,7 @@ fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
            symbol_white_list: &[*const libc::c_char])
     -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError>
 {
+    let _timer = cgcx.prof.generic_activity("LLVM_fat_lto_build_monolithic_module");
     info!("going for a fat lto");
 
     // Sort out all our lists of incoming modules into two lists.
@@ -287,6 +293,7 @@ fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
         // save and persist everything with the original module.
         let mut linker = Linker::new(llmod);
         for (bc_decoded, name) in serialized_modules {
+            let _timer = cgcx.prof.generic_activity("LLVM_fat_lto_link_module");
             info!("linking {:?}", name);
             time_ext(cgcx.time_passes, None, &format!("ll link {:?}", name), || {
                 let data = bc_decoded.data();
@@ -388,6 +395,7 @@ fn thin_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
             symbol_white_list: &[*const libc::c_char])
     -> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError>
 {
+    let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_global_analysis");
     unsafe {
         info!("going for that thin, thin LTO");
 
@@ -601,16 +609,6 @@ impl ModuleBuffer {
             llvm::LLVMRustModuleBufferCreate(m)
         })
     }
-
-    pub fn parse<'a>(
-        &self,
-        name: &str,
-        cx: &'a llvm::Context,
-        handler: &Handler,
-    ) -> Result<&'a llvm::Module, FatalError> {
-        let name = CString::new(name).unwrap();
-        parse_module(cx, &name, self.data(), handler)
-    }
 }
 
 impl ModuleBufferMethods for ModuleBuffer {
@@ -723,7 +721,7 @@ pub unsafe fn optimize_thin_module(
         // Like with "fat" LTO, get some better optimizations if landing pads
         // are disabled by removing all landing pads.
         if cgcx.no_landing_pads {
-            let _timer = cgcx.profile_activity("LLVM_remove_landing_pads");
+            let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_remove_landing_pads");
             llvm::LLVMRustMarkAllFunctionsNounwind(llmod);
             save_temp_bitcode(&cgcx, &module, "thin-lto-after-nounwind");
         }
@@ -736,26 +734,41 @@ pub unsafe fn optimize_thin_module(
         //
         // You can find some more comments about these functions in the LLVM
         // bindings we've got (currently `PassWrapper.cpp`)
-        if !llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod) {
-            let msg = "failed to prepare thin LTO module";
-            return Err(write::llvm_err(&diag_handler, msg))
+        {
+            let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_rename");
+            if !llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod) {
+                let msg = "failed to prepare thin LTO module";
+                return Err(write::llvm_err(&diag_handler, msg))
+            }
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-rename");
         }
-        save_temp_bitcode(cgcx, &module, "thin-lto-after-rename");
-        if !llvm::LLVMRustPrepareThinLTOResolveWeak(thin_module.shared.data.0, llmod) {
-            let msg = "failed to prepare thin LTO module";
-            return Err(write::llvm_err(&diag_handler, msg))
+
+        {
+            let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_resolve_weak");
+            if !llvm::LLVMRustPrepareThinLTOResolveWeak(thin_module.shared.data.0, llmod) {
+                let msg = "failed to prepare thin LTO module";
+                return Err(write::llvm_err(&diag_handler, msg))
+            }
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-resolve");
         }
-        save_temp_bitcode(cgcx, &module, "thin-lto-after-resolve");
-        if !llvm::LLVMRustPrepareThinLTOInternalize(thin_module.shared.data.0, llmod) {
-            let msg = "failed to prepare thin LTO module";
-            return Err(write::llvm_err(&diag_handler, msg))
+
+        {
+            let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_internalize");
+            if !llvm::LLVMRustPrepareThinLTOInternalize(thin_module.shared.data.0, llmod) {
+                let msg = "failed to prepare thin LTO module";
+                return Err(write::llvm_err(&diag_handler, msg))
+            }
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-internalize");
         }
-        save_temp_bitcode(cgcx, &module, "thin-lto-after-internalize");
-        if !llvm::LLVMRustPrepareThinLTOImport(thin_module.shared.data.0, llmod) {
-            let msg = "failed to prepare thin LTO module";
-            return Err(write::llvm_err(&diag_handler, msg))
+
+        {
+            let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_import");
+            if !llvm::LLVMRustPrepareThinLTOImport(thin_module.shared.data.0, llmod) {
+                let msg = "failed to prepare thin LTO module";
+                return Err(write::llvm_err(&diag_handler, msg))
+            }
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-import");
         }
-        save_temp_bitcode(cgcx, &module, "thin-lto-after-import");
 
         // Ok now this is a bit unfortunate. This is also something you won't
         // find upstream in LLVM's ThinLTO passes! This is a hack for now to
@@ -786,18 +799,24 @@ pub unsafe fn optimize_thin_module(
         // not too much) but for now at least gets LLVM to emit valid DWARF (or
         // so it appears). Hopefully we can remove this once upstream bugs are
         // fixed in LLVM.
-        llvm::LLVMRustThinLTOPatchDICompileUnit(llmod, cu1);
-        save_temp_bitcode(cgcx, &module, "thin-lto-after-patch");
+        {
+            let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_patch_debuginfo");
+            llvm::LLVMRustThinLTOPatchDICompileUnit(llmod, cu1);
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-patch");
+        }
 
         // Alright now that we've done everything related to the ThinLTO
         // analysis it's time to run some optimizations! Here we use the same
         // `run_pass_manager` as the "fat" LTO above except that we tell it to
         // populate a thin-specific pass manager, which presumably LLVM treats a
         // little differently.
-        info!("running thin lto passes over {}", module.name);
-        let config = cgcx.config(module.kind);
-        run_pass_manager(cgcx, &module, config, true);
-        save_temp_bitcode(cgcx, &module, "thin-lto-after-pm");
+        {
+            let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_optimize");
+            info!("running thin lto passes over {}", module.name);
+            let config = cgcx.config(module.kind);
+            run_pass_manager(cgcx, &module, config, true);
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-pm");
+        }
     }
     Ok(module)
 }
diff --git a/src/librustc_codegen_llvm/back/write.rs b/src/librustc_codegen_llvm/back/write.rs
index 253110dcb34..78db90b57b5 100644
--- a/src/librustc_codegen_llvm/back/write.rs
+++ b/src/librustc_codegen_llvm/back/write.rs
@@ -306,6 +306,8 @@ pub(crate) unsafe fn optimize(cgcx: &CodegenContext<LlvmCodegenBackend>,
                    config: &ModuleConfig)
     -> Result<(), FatalError>
 {
+    let _timer = cgcx.prof.generic_activity("LLVM_module_optimize");
+
     let llmod = module.module_llvm.llmod();
     let llcx = &*module.module_llvm.llcx;
     let tm = &*module.module_llvm.tm;
@@ -423,7 +425,7 @@ pub(crate) unsafe fn optimize(cgcx: &CodegenContext<LlvmCodegenBackend>,
 
         // Finally, run the actual optimization passes
         {
-            let _timer = cgcx.profile_activity("LLVM_function_passes");
+            let _timer = cgcx.prof.generic_activity("LLVM_module_optimize_function_passes");
             time_ext(config.time_passes,
                         None,
                         &format!("llvm function passes [{}]", module_name.unwrap()),
@@ -432,7 +434,7 @@ pub(crate) unsafe fn optimize(cgcx: &CodegenContext<LlvmCodegenBackend>,
             });
         }
         {
-            let _timer = cgcx.profile_activity("LLVM_module_passes");
+            let _timer = cgcx.prof.generic_activity("LLVM_module_optimize_module_passes");
             time_ext(config.time_passes,
                     None,
                     &format!("llvm module passes [{}]", module_name.unwrap()),
@@ -454,7 +456,7 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<LlvmCodegenBackend>,
                   config: &ModuleConfig)
     -> Result<CompiledModule, FatalError>
 {
-    let _timer = cgcx.profile_activity("codegen");
+    let _timer = cgcx.prof.generic_activity("LLVM_module_codegen");
     {
         let llmod = module.module_llvm.llmod();
         let llcx = &*module.module_llvm.llcx;
@@ -505,12 +507,12 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<LlvmCodegenBackend>,
 
 
         if write_bc || config.emit_bc_compressed || config.embed_bitcode {
-            let _timer = cgcx.profile_activity("LLVM_make_bitcode");
+            let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_make_bitcode");
             let thin = ThinBuffer::new(llmod);
             let data = thin.data();
 
             if write_bc {
-                let _timer = cgcx.profile_activity("LLVM_emit_bitcode");
+                let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_emit_bitcode");
                 if let Err(e) = fs::write(&bc_out, data) {
                     let msg = format!("failed to write bytecode to {}: {}", bc_out.display(), e);
                     diag_handler.err(&msg);
@@ -518,12 +520,13 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<LlvmCodegenBackend>,
             }
 
             if config.embed_bitcode {
-                let _timer = cgcx.profile_activity("LLVM_embed_bitcode");
+                let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_embed_bitcode");
                 embed_bitcode(cgcx, llcx, llmod, Some(data));
             }
 
             if config.emit_bc_compressed {
-                let _timer = cgcx.profile_activity("LLVM_compress_bitcode");
+                let _timer =
+                    cgcx.prof.generic_activity("LLVM_module_codegen_emit_compressed_bitcode");
                 let dst = bc_out.with_extension(RLIB_BYTECODE_EXTENSION);
                 let data = bytecode::encode(&module.name, data);
                 if let Err(e) = fs::write(&dst, data) {
@@ -538,7 +541,7 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<LlvmCodegenBackend>,
         time_ext(config.time_passes, None, &format!("codegen passes [{}]", module_name.unwrap()),
             || -> Result<(), FatalError> {
             if config.emit_ir {
-                let _timer = cgcx.profile_activity("LLVM_emit_ir");
+                let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_emit_ir");
                 let out = cgcx.output_filenames.temp_path(OutputType::LlvmAssembly, module_name);
                 let out_c = path_to_c_string(&out);
 
@@ -585,7 +588,7 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<LlvmCodegenBackend>,
             }
 
             if config.emit_asm || asm_to_obj {
-                let _timer = cgcx.profile_activity("LLVM_emit_asm");
+                let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_emit_asm");
                 let path = cgcx.output_filenames.temp_path(OutputType::Assembly, module_name);
 
                 // We can't use the same module for asm and binary output, because that triggers
@@ -603,13 +606,13 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<LlvmCodegenBackend>,
             }
 
             if write_obj {
-                let _timer = cgcx.profile_activity("LLVM_emit_obj");
+                let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_emit_obj");
                 with_codegen(tm, llmod, config.no_builtins, |cpm| {
                     write_output_file(diag_handler, tm, cpm, llmod, &obj_out,
                                       llvm::FileType::ObjectFile)
                 })?;
             } else if asm_to_obj {
-                let _timer = cgcx.profile_activity("LLVM_asm_to_obj");
+                let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_asm_to_obj");
                 let assembly = cgcx.output_filenames.temp_path(OutputType::Assembly, module_name);
                 run_assembler(cgcx, diag_handler, &assembly, &obj_out);
 
diff --git a/src/librustc_codegen_llvm/base.rs b/src/librustc_codegen_llvm/base.rs
index 5758cdbebf7..bd7d0d4017d 100644
--- a/src/librustc_codegen_llvm/base.rs
+++ b/src/librustc_codegen_llvm/base.rs
@@ -108,6 +108,7 @@ pub fn compile_codegen_unit(
     cgu_name: InternedString,
     tx_to_llvm_workers: &std::sync::mpsc::Sender<Box<dyn std::any::Any + Send>>,
 ) {
+    let prof_timer = tcx.prof.generic_activity("codegen_module");
     let start_time = Instant::now();
 
     let dep_node = tcx.codegen_unit(cgu_name).codegen_dep_node(tcx);
@@ -119,6 +120,7 @@ pub fn compile_codegen_unit(
         dep_graph::hash_result,
     );
     let time_to_codegen = start_time.elapsed();
+    drop(prof_timer);
 
     // We assume that the cost to run LLVM on a CGU is proportional to
     // the time we needed for codegenning it.
diff --git a/src/librustc_codegen_llvm/lib.rs b/src/librustc_codegen_llvm/lib.rs
index 309a17a01e3..87eab484faf 100644
--- a/src/librustc_codegen_llvm/lib.rs
+++ b/src/librustc_codegen_llvm/lib.rs
@@ -324,8 +324,9 @@ impl CodegenBackend for LlvmCodegenBackend {
 
         // Run the linker on any artifacts that resulted from the LLVM run.
         // This should produce either a finished executable or library.
-        sess.profiler(|p| p.start_activity("link_crate"));
         time(sess, "linking", || {
+            let _prof_timer = sess.prof.generic_activity("link_crate");
+
             use rustc_codegen_ssa::back::link::link_binary;
             use crate::back::archive::LlvmArchiveBuilder;
 
@@ -338,7 +339,6 @@ impl CodegenBackend for LlvmCodegenBackend {
                 target_cpu,
             );
         });
-        sess.profiler(|p| p.end_activity("link_crate"));
 
         // Now that we won't touch anything in the incremental compilation directory
         // any more, we can finalize it (which involves renaming it)
diff --git a/src/librustc_codegen_ssa/back/write.rs b/src/librustc_codegen_ssa/back/write.rs
index 3c5fbfd0f86..f1cfac27033 100644
--- a/src/librustc_codegen_ssa/back/write.rs
+++ b/src/librustc_codegen_ssa/back/write.rs
@@ -19,7 +19,7 @@ use rustc::util::nodemap::FxHashMap;
 use rustc::hir::def_id::{CrateNum, LOCAL_CRATE};
 use rustc::ty::TyCtxt;
 use rustc::util::common::{time_depth, set_time_depth, print_time_passes_entry};
-use rustc::util::profiling::SelfProfiler;
+use rustc::util::profiling::SelfProfilerRef;
 use rustc_fs_util::link_or_copy;
 use rustc_data_structures::svh::Svh;
 use rustc_errors::{Handler, Level, FatalError, DiagnosticId};
@@ -31,7 +31,6 @@ use syntax_pos::symbol::{Symbol, sym};
 use jobserver::{Client, Acquired};
 
 use std::any::Any;
-use std::borrow::Cow;
 use std::fs;
 use std::io;
 use std::mem;
@@ -196,42 +195,13 @@ impl<B: WriteBackendMethods> Clone for TargetMachineFactory<B> {
     }
 }
 
-pub struct ProfileGenericActivityTimer {
-    profiler: Option<Arc<SelfProfiler>>,
-    label: Cow<'static, str>,
-}
-
-impl ProfileGenericActivityTimer {
-    pub fn start(
-        profiler: Option<Arc<SelfProfiler>>,
-        label: Cow<'static, str>,
-    ) -> ProfileGenericActivityTimer {
-        if let Some(profiler) = &profiler {
-            profiler.start_activity(label.clone());
-        }
-
-        ProfileGenericActivityTimer {
-            profiler,
-            label,
-        }
-    }
-}
-
-impl Drop for ProfileGenericActivityTimer {
-    fn drop(&mut self) {
-        if let Some(profiler) = &self.profiler {
-            profiler.end_activity(self.label.clone());
-        }
-    }
-}
-
 /// Additional resources used by optimize_and_codegen (not module specific)
 #[derive(Clone)]
 pub struct CodegenContext<B: WriteBackendMethods> {
     // Resources needed when running LTO
     pub backend: B,
     pub time_passes: bool,
-    pub profiler: Option<Arc<SelfProfiler>>,
+    pub prof: SelfProfilerRef,
     pub lto: Lto,
     pub no_landing_pads: bool,
     pub save_temps: bool,
@@ -283,31 +253,6 @@ impl<B: WriteBackendMethods> CodegenContext<B> {
             ModuleKind::Allocator => &self.allocator_module_config,
         }
     }
-
-    #[inline(never)]
-    #[cold]
-    fn profiler_active<F: FnOnce(&SelfProfiler) -> ()>(&self, f: F) {
-        match &self.profiler {
-            None => bug!("profiler_active() called but there was no profiler active"),
-            Some(profiler) => {
-                f(&*profiler);
-            }
-        }
-    }
-
-    #[inline(always)]
-    pub fn profile<F: FnOnce(&SelfProfiler) -> ()>(&self, f: F) {
-        if unlikely!(self.profiler.is_some()) {
-            self.profiler_active(f)
-        }
-    }
-
-    pub fn profile_activity(
-        &self,
-        label: impl Into<Cow<'static, str>>,
-    ) -> ProfileGenericActivityTimer {
-        ProfileGenericActivityTimer::start(self.profiler.clone(), label.into())
-    }
 }
 
 fn generate_lto_work<B: ExtraBackendMethods>(
@@ -316,7 +261,7 @@ fn generate_lto_work<B: ExtraBackendMethods>(
     needs_thin_lto: Vec<(String, B::ThinBuffer)>,
     import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>
 ) -> Vec<(WorkItem<B>, u64)> {
-    cgcx.profile(|p| p.start_activity("codegen_run_lto"));
+    let _prof_timer = cgcx.prof.generic_activity("codegen_run_lto");
 
     let (lto_modules, copy_jobs) = if !needs_fat_lto.is_empty() {
         assert!(needs_thin_lto.is_empty());
@@ -343,8 +288,6 @@ fn generate_lto_work<B: ExtraBackendMethods>(
         }), 0)
     })).collect();
 
-    cgcx.profile(|p| p.end_activity("codegen_run_lto"));
-
     result
 }
 
@@ -380,6 +323,9 @@ pub fn start_async_codegen<B: ExtraBackendMethods>(
 ) -> OngoingCodegen<B> {
     let (coordinator_send, coordinator_receive) = channel();
     let sess = tcx.sess;
+
+    sess.prof.generic_activity_start("codegen_and_optimize_crate");
+
     let crate_name = tcx.crate_name(LOCAL_CRATE);
     let crate_hash = tcx.crate_hash(LOCAL_CRATE);
     let no_builtins = attr::contains_name(&tcx.hir().krate().attrs, sym::no_builtins);
@@ -1088,7 +1034,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
         save_temps: sess.opts.cg.save_temps,
         opts: Arc::new(sess.opts.clone()),
         time_passes: sess.time_extended(),
-        profiler: sess.self_profiling.clone(),
+        prof: sess.prof.clone(),
         exported_symbols,
         plugin_passes: sess.plugin_llvm_passes.borrow().clone(),
         remark: sess.opts.cg.remark.clone(),
@@ -1645,12 +1591,8 @@ fn spawn_work<B: ExtraBackendMethods>(
         // as a diagnostic was already sent off to the main thread - just
         // surface that there was an error in this worker.
         bomb.result = {
-            let label = work.name();
-            cgcx.profile(|p| p.start_activity(label.clone()));
-            let result = execute_work_item(&cgcx, work).ok();
-            cgcx.profile(|p| p.end_activity(label));
-
-            result
+            let _prof_timer = cgcx.prof.generic_activity(&work.name());
+            execute_work_item(&cgcx, work).ok()
         };
     });
 }
@@ -1835,6 +1777,8 @@ impl<B: ExtraBackendMethods> OngoingCodegen<B> {
             self.backend.print_pass_timings()
         }
 
+        sess.prof.generic_activity_end("codegen_and_optimize_crate");
+
         (CodegenResults {
             crate_name: self.crate_name,
             crate_hash: self.crate_hash,
diff --git a/src/librustc_codegen_ssa/base.rs b/src/librustc_codegen_ssa/base.rs
index d11d8911a93..935087714a7 100644
--- a/src/librustc_codegen_ssa/base.rs
+++ b/src/librustc_codegen_ssa/base.rs
@@ -559,7 +559,7 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
 
     if need_metadata_module {
         // Codegen the encoded metadata.
-        tcx.sess.profiler(|p| p.start_activity("codegen crate metadata"));
+        let _prof_timer = tcx.prof.generic_activity("codegen_crate_metadata");
 
         let metadata_cgu_name = cgu_name_builder.build_cgu_name(LOCAL_CRATE,
                                                                 &["crate"],
@@ -570,7 +570,6 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
             backend.write_compressed_metadata(tcx, &ongoing_codegen.metadata,
                                               &mut metadata_llvm_module);
         });
-        tcx.sess.profiler(|p| p.end_activity("codegen crate metadata"));
 
         let metadata_module = ModuleCodegen {
             name: metadata_cgu_name,
@@ -599,11 +598,9 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
 
         match cgu_reuse {
             CguReuse::No => {
-                tcx.sess.profiler(|p| p.start_activity(format!("codegen {}", cgu.name())));
                 let start_time = Instant::now();
                 backend.compile_codegen_unit(tcx, *cgu.name(), &ongoing_codegen.coordinator_send);
                 total_codegen_time += start_time.elapsed();
-                tcx.sess.profiler(|p| p.end_activity(format!("codegen {}", cgu.name())));
                 false
             }
             CguReuse::PreLto => {
diff --git a/src/librustc_codegen_ssa/lib.rs b/src/librustc_codegen_ssa/lib.rs
index 161d3ce61f0..5017a60ca69 100644
--- a/src/librustc_codegen_ssa/lib.rs
+++ b/src/librustc_codegen_ssa/lib.rs
@@ -21,7 +21,6 @@
 
 #[macro_use] extern crate log;
 #[macro_use] extern crate rustc;
-#[macro_use] extern crate rustc_data_structures;
 #[macro_use] extern crate syntax;
 
 use std::path::PathBuf;
diff --git a/src/librustc_incremental/persist/save.rs b/src/librustc_incremental/persist/save.rs
index 13e2c5d1c57..6af065513ee 100644
--- a/src/librustc_incremental/persist/save.rs
+++ b/src/librustc_incremental/persist/save.rs
@@ -28,6 +28,8 @@ pub fn save_dep_graph(tcx: TyCtxt<'_>) {
 
         join(move || {
             if tcx.sess.opts.debugging_opts.incremental_queries {
+                let _timer = tcx.prof.generic_activity("incr_comp_persist_result_cache");
+
                 time(sess, "persist query result cache", || {
                     save_in(sess,
                             query_cache_path,
@@ -36,6 +38,8 @@ pub fn save_dep_graph(tcx: TyCtxt<'_>) {
             }
         }, || {
             time(sess, "persist dep-graph", || {
+                let _timer = tcx.prof.generic_activity("incr_comp_persist_dep_graph");
+
                 save_in(sess,
                         dep_graph_path,
                         |e| {
@@ -135,6 +139,7 @@ fn encode_dep_graph(tcx: TyCtxt<'_>, encoder: &mut Encoder) {
 
     // Encode the graph data.
     let serialized_graph = time(tcx.sess, "getting serialized graph", || {
+        let _timer = tcx.prof.generic_activity("incr_comp_serialize_dep_graph");
         tcx.dep_graph.serialize()
     });
 
@@ -214,6 +219,7 @@ fn encode_dep_graph(tcx: TyCtxt<'_>, encoder: &mut Encoder) {
     }
 
     time(tcx.sess, "encoding serialized graph", || {
+        let _timer = tcx.prof.generic_activity("incr_comp_encode_serialized_dep_graph");
         serialized_graph.encode(encoder).unwrap();
     });
 }
diff --git a/src/librustc_interface/passes.rs b/src/librustc_interface/passes.rs
index 9874c0673cf..0055e0a8b2e 100644
--- a/src/librustc_interface/passes.rs
+++ b/src/librustc_interface/passes.rs
@@ -59,15 +59,17 @@ use std::rc::Rc;
 pub fn parse<'a>(sess: &'a Session, input: &Input) -> PResult<'a, ast::Crate> {
     sess.diagnostic()
         .set_continue_after_error(sess.opts.debugging_opts.continue_parse_after_error);
-    sess.profiler(|p| p.start_activity("parsing"));
-    let krate = time(sess, "parsing", || match *input {
-        Input::File(ref file) => parse::parse_crate_from_file(file, &sess.parse_sess),
-        Input::Str {
-            ref input,
-            ref name,
-        } => parse::parse_crate_from_source_str(name.clone(), input.clone(), &sess.parse_sess),
+    let krate = time(sess, "parsing", || {
+        let _prof_timer = sess.prof.generic_activity("parse_crate");
+
+        match *input {
+            Input::File(ref file) => parse::parse_crate_from_file(file, &sess.parse_sess),
+            Input::Str {
+                ref input,
+                ref name,
+            } => parse::parse_crate_from_source_str(name.clone(), input.clone(), &sess.parse_sess),
+        }
     })?;
-    sess.profiler(|p| p.end_activity("parsing"));
 
     sess.diagnostic().set_continue_after_error(true);
 
@@ -355,8 +357,8 @@ fn configure_and_expand_inner<'a>(
     );
 
     // Expand all macros
-    sess.profiler(|p| p.start_activity("macro expansion"));
     krate = time(sess, "expansion", || {
+        let _prof_timer = sess.prof.generic_activity("macro_expand_crate");
         // Windows dlls do not have rpaths, so they don't know how to find their
         // dependencies. It's up to us to tell the system where to find all the
         // dependent dlls. Note that this uses cfg!(windows) as opposed to
@@ -430,7 +432,6 @@ fn configure_and_expand_inner<'a>(
         }
         krate
     });
-    sess.profiler(|p| p.end_activity("macro expansion"));
 
     time(sess, "maybe building test harness", || {
         syntax_ext::test_harness::inject(
@@ -1071,11 +1072,10 @@ pub fn start_codegen<'tcx>(
         encode_and_write_metadata(tcx, outputs)
     });
 
-    tcx.sess.profiler(|p| p.start_activity("codegen crate"));
     let codegen = time(tcx.sess, "codegen", move || {
+        let _prof_timer = tcx.prof.generic_activity("codegen_crate");
         codegen_backend.codegen_crate(tcx, metadata, need_metadata_module)
     });
-    tcx.sess.profiler(|p| p.end_activity("codegen crate"));
 
     if log_enabled!(::log::Level::Info) {
         println!("Post-codegen");
diff --git a/src/librustc_typeck/lib.rs b/src/librustc_typeck/lib.rs
index 00be1c84599..26a8f79b8d8 100644
--- a/src/librustc_typeck/lib.rs
+++ b/src/librustc_typeck/lib.rs
@@ -295,7 +295,7 @@ pub fn provide(providers: &mut Providers<'_>) {
 }
 
 pub fn check_crate(tcx: TyCtxt<'_>) -> Result<(), ErrorReported> {
-    tcx.sess.profiler(|p| p.start_activity("type-check crate"));
+    let _prof_timer = tcx.prof.generic_activity("type_check_crate");
 
     // this ensures that later parts of type checking can assume that items
     // have valid types and not error
@@ -347,8 +347,6 @@ pub fn check_crate(tcx: TyCtxt<'_>) -> Result<(), ErrorReported> {
     check_unused::check_crate(tcx);
     check_for_entry_fn(tcx);
 
-    tcx.sess.profiler(|p| p.end_activity("type-check crate"));
-
     if tcx.sess.err_count() == 0 {
         Ok(())
     } else {