about summary refs log tree commit diff
diff options
context:
space:
mode:
authorJohn Kåre Alsaker <john.kare.alsaker@gmail.com>2020-01-01 02:24:05 +0100
committerJohn Kåre Alsaker <john.kare.alsaker@gmail.com>2020-01-05 02:57:14 +0100
commit5a485ce4a3767bcac6bcc7bf66867c59c30eaeb1 (patch)
tree1ea3869a0eb039709e0bd528f6c9c63f1f54daf7
parent7494250106003d698579edef215d0c01810b5156 (diff)
downloadrust-5a485ce4a3767bcac6bcc7bf66867c59c30eaeb1.tar.gz
rust-5a485ce4a3767bcac6bcc7bf66867c59c30eaeb1.zip
Use self profile infrastructure for -Z time and -Z time-passes
-rw-r--r--src/librustc/hir/map/mod.rs4
-rw-r--r--src/librustc/lib.rs2
-rw-r--r--src/librustc/ty/query/on_disk_cache.rs30
-rw-r--r--src/librustc/util/common.rs133
-rw-r--r--src/librustc_codegen_llvm/back/lto.rs12
-rw-r--r--src/librustc_codegen_llvm/back/write.rs195
-rw-r--r--src/librustc_codegen_llvm/lib.rs7
-rw-r--r--src/librustc_codegen_ssa/back/link.rs5
-rw-r--r--src/librustc_codegen_ssa/back/write.rs51
-rw-r--r--src/librustc_codegen_ssa/base.rs16
-rw-r--r--src/librustc_data_structures/lib.rs3
-rw-r--r--src/librustc_data_structures/profiling.rs229
-rw-r--r--src/librustc_driver/lib.rs6
-rw-r--r--src/librustc_incremental/persist/load.rs54
-rw-r--r--src/librustc_incremental/persist/save.rs23
-rw-r--r--src/librustc_interface/passes.rs99
-rw-r--r--src/librustc_interface/queries.rs4
-rw-r--r--src/librustc_lint/early.rs3
-rw-r--r--src/librustc_lint/late.rs9
-rw-r--r--src/librustc_mir/monomorphize/collector.rs7
-rw-r--r--src/librustc_mir/monomorphize/partitioning.rs5
-rw-r--r--src/librustc_session/session.rs13
-rw-r--r--src/librustc_session/utils.rs15
-rw-r--r--src/librustc_typeck/coherence/mod.rs5
-rw-r--r--src/librustc_typeck/lib.rs19
25 files changed, 472 insertions, 477 deletions
diff --git a/src/librustc/hir/map/mod.rs b/src/librustc/hir/map/mod.rs
index b499ba20b8c..fc234829583 100644
--- a/src/librustc/hir/map/mod.rs
+++ b/src/librustc/hir/map/mod.rs
@@ -11,8 +11,6 @@ use crate::hir::print::Nested;
 use crate::hir::*;
 use crate::middle::cstore::CrateStoreDyn;
 use crate::ty::query::Providers;
-use crate::util::common::time;
-
 use rustc_data_structures::fx::FxHashMap;
 use rustc_data_structures::svh::Svh;
 use rustc_index::vec::IndexVec;
@@ -1245,7 +1243,7 @@ pub fn map_crate<'hir>(
         definitions,
     };
 
-    time(sess, "validate HIR map", || {
+    sess.time("validate HIR map", || {
         hir_id_validator::check_crate(&map);
     });
 
diff --git a/src/librustc/lib.rs b/src/librustc/lib.rs
index 9882b021ff3..ce8263f81a7 100644
--- a/src/librustc/lib.rs
+++ b/src/librustc/lib.rs
@@ -66,8 +66,6 @@
 extern crate bitflags;
 #[macro_use]
 extern crate scoped_tls;
-#[cfg(windows)]
-extern crate libc;
 #[macro_use]
 extern crate rustc_macros;
 #[macro_use]
diff --git a/src/librustc/ty/query/on_disk_cache.rs b/src/librustc/ty/query/on_disk_cache.rs
index c29c68d2148..6758aedfabc 100644
--- a/src/librustc/ty/query/on_disk_cache.rs
+++ b/src/librustc/ty/query/on_disk_cache.rs
@@ -9,7 +9,6 @@ use crate::session::{CrateDisambiguator, Session};
 use crate::ty::codec::{self as ty_codec, TyDecoder, TyEncoder};
 use crate::ty::context::TyCtxt;
 use crate::ty::{self, Ty};
-use crate::util::common::{time, time_ext};
 
 use errors::Diagnostic;
 use rustc_data_structures::fx::FxHashMap;
@@ -200,7 +199,7 @@ impl<'sess> OnDiskCache<'sess> {
             // Encode query results.
             let mut query_result_index = EncodedQueryResultIndex::new();
 
-            time(tcx.sess, "encode query results", || {
+            tcx.sess.time("encode query results", || {
                 let enc = &mut encoder;
                 let qri = &mut query_result_index;
 
@@ -1056,23 +1055,22 @@ where
     E: 'a + TyEncoder,
 {
     let desc = &format!("encode_query_results for {}", ::std::any::type_name::<Q>());
+    let _timer = tcx.sess.prof.generic_pass(desc);
 
-    time_ext(tcx.sess.time_extended(), desc, || {
-        let shards = Q::query_cache(tcx).lock_shards();
-        assert!(shards.iter().all(|shard| shard.active.is_empty()));
-        for (key, entry) in shards.iter().flat_map(|shard| shard.results.iter()) {
-            if Q::cache_on_disk(tcx, key.clone(), Some(&entry.value)) {
-                let dep_node = SerializedDepNodeIndex::new(entry.index.index());
+    let shards = Q::query_cache(tcx).lock_shards();
+    assert!(shards.iter().all(|shard| shard.active.is_empty()));
+    for (key, entry) in shards.iter().flat_map(|shard| shard.results.iter()) {
+        if Q::cache_on_disk(tcx, key.clone(), Some(&entry.value)) {
+            let dep_node = SerializedDepNodeIndex::new(entry.index.index());
 
-                // Record position of the cache entry.
-                query_result_index.push((dep_node, AbsoluteBytePos::new(encoder.position())));
+            // Record position of the cache entry.
+            query_result_index.push((dep_node, AbsoluteBytePos::new(encoder.position())));
 
-                // Encode the type check tables with the `SerializedDepNodeIndex`
-                // as tag.
-                encoder.encode_tagged(dep_node, &entry.value)?;
-            }
+            // Encode the type check tables with the `SerializedDepNodeIndex`
+            // as tag.
+            encoder.encode_tagged(dep_node, &entry.value)?;
         }
+    }
 
-        Ok(())
-    })
+    Ok(())
 }
diff --git a/src/librustc/util/common.rs b/src/librustc/util/common.rs
index 011b70c04d5..95746852157 100644
--- a/src/librustc/util/common.rs
+++ b/src/librustc/util/common.rs
@@ -2,11 +2,9 @@
 
 use rustc_data_structures::sync::Lock;
 
-use std::cell::Cell;
 use std::fmt::Debug;
 use std::time::{Duration, Instant};
 
-use crate::session::Session;
 use rustc_span::symbol::{sym, Symbol};
 
 #[cfg(test)]
@@ -17,85 +15,6 @@ pub const FN_OUTPUT_NAME: Symbol = sym::Output;
 
 pub use errors::ErrorReported;
 
-thread_local!(static TIME_DEPTH: Cell<usize> = Cell::new(0));
-
-#[allow(nonstandard_style)]
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub struct QueryMsg {
-    pub query: &'static str,
-    pub msg: Option<String>,
-}
-
-/// Read the current depth of `time()` calls. This is used to
-/// encourage indentation across threads.
-pub fn time_depth() -> usize {
-    TIME_DEPTH.with(|slot| slot.get())
-}
-
-/// Sets the current depth of `time()` calls. The idea is to call
-/// `set_time_depth()` with the result from `time_depth()` in the
-/// parent thread.
-pub fn set_time_depth(depth: usize) {
-    TIME_DEPTH.with(|slot| slot.set(depth));
-}
-
-pub fn time<T, F>(sess: &Session, what: &str, f: F) -> T
-where
-    F: FnOnce() -> T,
-{
-    time_ext(sess.time_passes(), what, f)
-}
-
-pub fn time_ext<T, F>(do_it: bool, what: &str, f: F) -> T
-where
-    F: FnOnce() -> T,
-{
-    if !do_it {
-        return f();
-    }
-
-    let old = TIME_DEPTH.with(|slot| {
-        let r = slot.get();
-        slot.set(r + 1);
-        r
-    });
-
-    let start = Instant::now();
-    let rv = f();
-    let dur = start.elapsed();
-
-    print_time_passes_entry(true, what, dur);
-
-    TIME_DEPTH.with(|slot| slot.set(old));
-
-    rv
-}
-
-pub fn print_time_passes_entry(do_it: bool, what: &str, dur: Duration) {
-    if !do_it {
-        return;
-    }
-
-    let indentation = TIME_DEPTH.with(|slot| slot.get());
-
-    let mem_string = match get_resident() {
-        Some(n) => {
-            let mb = n as f64 / 1_000_000.0;
-            format!("; rss: {}MB", mb.round() as usize)
-        }
-        None => String::new(),
-    };
-    println!(
-        "{}time: {}{}\t{}",
-        "  ".repeat(indentation),
-        duration_to_secs_str(dur),
-        mem_string,
-        what
-    );
-}
-
-pub use rustc_session::utils::duration_to_secs_str;
-
 pub fn to_readable_str(mut val: usize) -> String {
     let mut groups = vec![];
     loop {
@@ -128,58 +47,6 @@ where
     rv
 }
 
-// Memory reporting
-#[cfg(unix)]
-fn get_resident() -> Option<usize> {
-    use std::fs;
-
-    let field = 1;
-    let contents = fs::read("/proc/self/statm").ok()?;
-    let contents = String::from_utf8(contents).ok()?;
-    let s = contents.split_whitespace().nth(field)?;
-    let npages = s.parse::<usize>().ok()?;
-    Some(npages * 4096)
-}
-
-#[cfg(windows)]
-fn get_resident() -> Option<usize> {
-    type BOOL = i32;
-    type DWORD = u32;
-    type HANDLE = *mut u8;
-    use libc::size_t;
-    use std::mem;
-    #[repr(C)]
-    #[allow(non_snake_case)]
-    struct PROCESS_MEMORY_COUNTERS {
-        cb: DWORD,
-        PageFaultCount: DWORD,
-        PeakWorkingSetSize: size_t,
-        WorkingSetSize: size_t,
-        QuotaPeakPagedPoolUsage: size_t,
-        QuotaPagedPoolUsage: size_t,
-        QuotaPeakNonPagedPoolUsage: size_t,
-        QuotaNonPagedPoolUsage: size_t,
-        PagefileUsage: size_t,
-        PeakPagefileUsage: size_t,
-    }
-    type PPROCESS_MEMORY_COUNTERS = *mut PROCESS_MEMORY_COUNTERS;
-    #[link(name = "psapi")]
-    extern "system" {
-        fn GetCurrentProcess() -> HANDLE;
-        fn GetProcessMemoryInfo(
-            Process: HANDLE,
-            ppsmemCounters: PPROCESS_MEMORY_COUNTERS,
-            cb: DWORD,
-        ) -> BOOL;
-    }
-    let mut pmc: PROCESS_MEMORY_COUNTERS = unsafe { mem::zeroed() };
-    pmc.cb = mem::size_of_val(&pmc) as DWORD;
-    match unsafe { GetProcessMemoryInfo(GetCurrentProcess(), &mut pmc, pmc.cb) } {
-        0 => None,
-        _ => Some(pmc.WorkingSetSize as usize),
-    }
-}
-
 pub fn indent<R, F>(op: F) -> R
 where
     R: Debug,
diff --git a/src/librustc_codegen_llvm/back/lto.rs b/src/librustc_codegen_llvm/back/lto.rs
index 629d303ffec..ca95a316349 100644
--- a/src/librustc_codegen_llvm/back/lto.rs
+++ b/src/librustc_codegen_llvm/back/lto.rs
@@ -11,7 +11,6 @@ use rustc::dep_graph::WorkProduct;
 use rustc::hir::def_id::LOCAL_CRATE;
 use rustc::middle::exported_symbols::SymbolExportLevel;
 use rustc::session::config::{self, Lto};
-use rustc::util::common::time_ext;
 use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule, ThinShared};
 use rustc_codegen_ssa::back::symbol_export;
 use rustc_codegen_ssa::back::write::{CodegenContext, FatLTOInput, ModuleConfig};
@@ -121,7 +120,7 @@ fn prepare_lto(
                 info!("adding bytecode {}", name);
                 let bc_encoded = data.data();
 
-                let (bc, id) = time_ext(cgcx.time_passes, &format!("decode {}", name), || {
+                let (bc, id) = cgcx.prof.generic_pass(&format!("decode {}", name)).run(|| {
                     match DecodedBytecode::new(bc_encoded) {
                         Ok(b) => Ok((b.bytecode(), b.identifier().to_string())),
                         Err(e) => Err(diag_handler.fatal(&e)),
@@ -281,9 +280,8 @@ fn fat_lto(
         // save and persist everything with the original module.
         let mut linker = Linker::new(llmod);
         for (bc_decoded, name) in serialized_modules {
-            let _timer = cgcx.prof.generic_activity("LLVM_fat_lto_link_module");
             info!("linking {:?}", name);
-            time_ext(cgcx.time_passes, &format!("ll link {:?}", name), || {
+            cgcx.prof.generic_pass(&format!("ll link {:?}", name)).run(|| {
                 let data = bc_decoded.data();
                 linker.add(&data).map_err(|()| {
                     let msg = format!("failed to load bc of {:?}", name);
@@ -634,9 +632,9 @@ pub(crate) fn run_pass_manager(
             llvm::LLVMRustAddPass(pm, pass.unwrap());
         }
 
-        time_ext(cgcx.time_passes, "LTO passes", || {
-            llvm::LLVMRunPassManager(pm, module.module_llvm.llmod())
-        });
+        cgcx.prof
+            .generic_pass("LTO passes")
+            .run(|| llvm::LLVMRunPassManager(pm, module.module_llvm.llmod()));
 
         llvm::LLVMDisposePassManager(pm);
     }
diff --git a/src/librustc_codegen_llvm/back/write.rs b/src/librustc_codegen_llvm/back/write.rs
index 818d09ce691..fa4d8a8d847 100644
--- a/src/librustc_codegen_llvm/back/write.rs
+++ b/src/librustc_codegen_llvm/back/write.rs
@@ -16,7 +16,6 @@ use rustc::hir::def_id::LOCAL_CRATE;
 use rustc::session::config::{self, Lto, OutputType, Passes, Sanitizer, SwitchWithOptPath};
 use rustc::session::Session;
 use rustc::ty::TyCtxt;
-use rustc::util::common::time_ext;
 use rustc_codegen_ssa::back::write::{run_assembler, CodegenContext, ModuleConfig};
 use rustc_codegen_ssa::traits::*;
 use rustc_codegen_ssa::{CompiledModule, ModuleCodegen, RLIB_BYTECODE_EXTENSION};
@@ -425,20 +424,14 @@ pub(crate) unsafe fn optimize(
 
         // Finally, run the actual optimization passes
         {
-            let _timer = cgcx.prof.generic_activity("LLVM_module_optimize_function_passes");
-            time_ext(
-                config.time_passes,
-                &format!("llvm function passes [{}]", module_name.unwrap()),
-                || llvm::LLVMRustRunFunctionPassManager(fpm, llmod),
-            );
+            let desc = &format!("llvm function passes [{}]", module_name.unwrap());
+            let _timer = if config.time_module { Some(cgcx.prof.generic_pass(desc)) } else { None };
+            llvm::LLVMRustRunFunctionPassManager(fpm, llmod);
         }
         {
-            let _timer = cgcx.prof.generic_activity("LLVM_module_optimize_module_passes");
-            time_ext(
-                config.time_passes,
-                &format!("llvm module passes [{}]", module_name.unwrap()),
-                || llvm::LLVMRunPassManager(mpm, llmod),
-            );
+            let desc = &format!("llvm module passes [{}]", module_name.unwrap());
+            let _timer = if config.time_module { Some(cgcx.prof.generic_pass(desc)) } else { None };
+            llvm::LLVMRunPassManager(mpm, llmod);
         }
 
         // Deallocate managers that we're now done with
@@ -561,103 +554,97 @@ pub(crate) unsafe fn codegen(
             embed_bitcode(cgcx, llcx, llmod, None);
         }
 
-        time_ext(
-            config.time_passes,
-            &format!("codegen passes [{}]", module_name.unwrap()),
-            || -> Result<(), FatalError> {
-                if config.emit_ir {
-                    let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_emit_ir");
-                    let out =
-                        cgcx.output_filenames.temp_path(OutputType::LlvmAssembly, module_name);
-                    let out_c = path_to_c_string(&out);
-
-                    extern "C" fn demangle_callback(
-                        input_ptr: *const c_char,
-                        input_len: size_t,
-                        output_ptr: *mut c_char,
-                        output_len: size_t,
-                    ) -> size_t {
-                        let input = unsafe {
-                            slice::from_raw_parts(input_ptr as *const u8, input_len as usize)
-                        };
-
-                        let input = match str::from_utf8(input) {
-                            Ok(s) => s,
-                            Err(_) => return 0,
-                        };
-
-                        let output = unsafe {
-                            slice::from_raw_parts_mut(output_ptr as *mut u8, output_len as usize)
-                        };
-                        let mut cursor = io::Cursor::new(output);
-
-                        let demangled = match rustc_demangle::try_demangle(input) {
-                            Ok(d) => d,
-                            Err(_) => return 0,
-                        };
-
-                        if let Err(_) = write!(cursor, "{:#}", demangled) {
-                            // Possible only if provided buffer is not big enough
-                            return 0;
-                        }
-
-                        cursor.position() as size_t
+        {
+            let desc = &format!("codegen passes [{}]", module_name.unwrap());
+            let _timer = if config.time_module { Some(cgcx.prof.generic_pass(desc)) } else { None };
+
+            if config.emit_ir {
+                let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_emit_ir");
+                let out = cgcx.output_filenames.temp_path(OutputType::LlvmAssembly, module_name);
+                let out_c = path_to_c_string(&out);
+
+                extern "C" fn demangle_callback(
+                    input_ptr: *const c_char,
+                    input_len: size_t,
+                    output_ptr: *mut c_char,
+                    output_len: size_t,
+                ) -> size_t {
+                    let input = unsafe {
+                        slice::from_raw_parts(input_ptr as *const u8, input_len as usize)
+                    };
+
+                    let input = match str::from_utf8(input) {
+                        Ok(s) => s,
+                        Err(_) => return 0,
+                    };
+
+                    let output = unsafe {
+                        slice::from_raw_parts_mut(output_ptr as *mut u8, output_len as usize)
+                    };
+                    let mut cursor = io::Cursor::new(output);
+
+                    let demangled = match rustc_demangle::try_demangle(input) {
+                        Ok(d) => d,
+                        Err(_) => return 0,
+                    };
+
+                    if let Err(_) = write!(cursor, "{:#}", demangled) {
+                        // Possible only if provided buffer is not big enough
+                        return 0;
                     }
 
-                    let result =
-                        llvm::LLVMRustPrintModule(llmod, out_c.as_ptr(), demangle_callback);
-                    result.into_result().map_err(|()| {
-                        let msg = format!("failed to write LLVM IR to {}", out.display());
-                        llvm_err(diag_handler, &msg)
-                    })?;
+                    cursor.position() as size_t
                 }
 
-                if config.emit_asm || asm_to_obj {
-                    let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_emit_asm");
-                    let path = cgcx.output_filenames.temp_path(OutputType::Assembly, module_name);
-
-                    // We can't use the same module for asm and binary output, because that triggers
-                    // various errors like invalid IR or broken binaries, so we might have to clone the
-                    // module to produce the asm output
-                    let llmod = if config.emit_obj { llvm::LLVMCloneModule(llmod) } else { llmod };
-                    with_codegen(tm, llmod, config.no_builtins, |cpm| {
-                        write_output_file(
-                            diag_handler,
-                            tm,
-                            cpm,
-                            llmod,
-                            &path,
-                            llvm::FileType::AssemblyFile,
-                        )
-                    })?;
-                }
+                let result = llvm::LLVMRustPrintModule(llmod, out_c.as_ptr(), demangle_callback);
+                result.into_result().map_err(|()| {
+                    let msg = format!("failed to write LLVM IR to {}", out.display());
+                    llvm_err(diag_handler, &msg)
+                })?;
+            }
 
-                if write_obj {
-                    let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_emit_obj");
-                    with_codegen(tm, llmod, config.no_builtins, |cpm| {
-                        write_output_file(
-                            diag_handler,
-                            tm,
-                            cpm,
-                            llmod,
-                            &obj_out,
-                            llvm::FileType::ObjectFile,
-                        )
-                    })?;
-                } else if asm_to_obj {
-                    let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_asm_to_obj");
-                    let assembly =
-                        cgcx.output_filenames.temp_path(OutputType::Assembly, module_name);
-                    run_assembler(cgcx, diag_handler, &assembly, &obj_out);
-
-                    if !config.emit_asm && !cgcx.save_temps {
-                        drop(fs::remove_file(&assembly));
-                    }
-                }
+            if config.emit_asm || asm_to_obj {
+                let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_emit_asm");
+                let path = cgcx.output_filenames.temp_path(OutputType::Assembly, module_name);
+
+                // We can't use the same module for asm and binary output, because that triggers
+                // various errors like invalid IR or broken binaries, so we might have to clone the
+                // module to produce the asm output
+                let llmod = if config.emit_obj { llvm::LLVMCloneModule(llmod) } else { llmod };
+                with_codegen(tm, llmod, config.no_builtins, |cpm| {
+                    write_output_file(
+                        diag_handler,
+                        tm,
+                        cpm,
+                        llmod,
+                        &path,
+                        llvm::FileType::AssemblyFile,
+                    )
+                })?;
+            }
 
-                Ok(())
-            },
-        )?;
+            if write_obj {
+                let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_emit_obj");
+                with_codegen(tm, llmod, config.no_builtins, |cpm| {
+                    write_output_file(
+                        diag_handler,
+                        tm,
+                        cpm,
+                        llmod,
+                        &obj_out,
+                        llvm::FileType::ObjectFile,
+                    )
+                })?;
+            } else if asm_to_obj {
+                let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_asm_to_obj");
+                let assembly = cgcx.output_filenames.temp_path(OutputType::Assembly, module_name);
+                run_assembler(cgcx, diag_handler, &assembly, &obj_out);
+
+                if !config.emit_asm && !cgcx.save_temps {
+                    drop(fs::remove_file(&assembly));
+                }
+            }
+        }
 
         if copy_bc_to_obj {
             debug!("copying bitcode {:?} to obj {:?}", bc_out, obj_out);
diff --git a/src/librustc_codegen_llvm/lib.rs b/src/librustc_codegen_llvm/lib.rs
index 0b6a2861c42..349cff79c78 100644
--- a/src/librustc_codegen_llvm/lib.rs
+++ b/src/librustc_codegen_llvm/lib.rs
@@ -275,7 +275,6 @@ impl CodegenBackend for LlvmCodegenBackend {
         dep_graph: &DepGraph,
         outputs: &OutputFilenames,
     ) -> Result<(), ErrorReported> {
-        use rustc::util::common::time;
         let (codegen_results, work_products) = ongoing_codegen
             .downcast::<rustc_codegen_ssa::back::write::OngoingCodegen<LlvmCodegenBackend>>()
             .expect("Expected LlvmCodegenBackend's OngoingCodegen, found Box<Any>")
@@ -284,7 +283,7 @@ impl CodegenBackend for LlvmCodegenBackend {
             rustc_codegen_ssa::back::write::dump_incremental_data(&codegen_results);
         }
 
-        time(sess, "serialize work products", move || {
+        sess.time("serialize work products", move || {
             rustc_incremental::save_work_product_index(sess, &dep_graph, work_products)
         });
 
@@ -301,9 +300,7 @@ impl CodegenBackend for LlvmCodegenBackend {
 
         // Run the linker on any artifacts that resulted from the LLVM run.
         // This should produce either a finished executable or library.
-        time(sess, "linking", || {
-            let _prof_timer = sess.prof.generic_activity("link_crate");
-
+        sess.time("linking", || {
             use crate::back::archive::LlvmArchiveBuilder;
             use rustc_codegen_ssa::back::link::link_binary;
 
diff --git a/src/librustc_codegen_ssa/back/link.rs b/src/librustc_codegen_ssa/back/link.rs
index 8cdda5630ea..2151c058042 100644
--- a/src/librustc_codegen_ssa/back/link.rs
+++ b/src/librustc_codegen_ssa/back/link.rs
@@ -8,7 +8,6 @@ use rustc::session::search_paths::PathKind;
 /// For all the linkers we support, and information they might
 /// need out of the shared crate context before we get rid of it.
 use rustc::session::{filesearch, Session};
-use rustc::util::common::{time, time_ext};
 use rustc_data_structures::fx::FxHashSet;
 use rustc_fs_util::fix_windows_verbatim_for_gcc;
 use rustc_span::symbol::Symbol;
@@ -578,7 +577,7 @@ fn link_natively<'a, B: ArchiveBuilder<'a>>(
     let mut i = 0;
     loop {
         i += 1;
-        prog = time(sess, "running linker", || exec_linker(sess, &mut cmd, out_filename, tmpdir));
+        prog = sess.time("running linker", || exec_linker(sess, &mut cmd, out_filename, tmpdir));
         let output = match prog {
             Ok(ref output) => output,
             Err(_) => break,
@@ -1563,7 +1562,7 @@ fn add_upstream_rust_crates<'a, B: ArchiveBuilder<'a>>(
         let name = cratepath.file_name().unwrap().to_str().unwrap();
         let name = &name[3..name.len() - 5]; // chop off lib/.rlib
 
-        time_ext(sess.time_extended(), &format!("altering {}.rlib", name), || {
+        sess.prof.generic_pass(&format!("altering {}.rlib", name)).run(|| {
             let mut archive = <B as ArchiveBuilder>::new(sess, &dst, Some(cratepath));
             archive.update_symbols();
 
diff --git a/src/librustc_codegen_ssa/back/write.rs b/src/librustc_codegen_ssa/back/write.rs
index f901a51ada8..44ed297ea83 100644
--- a/src/librustc_codegen_ssa/back/write.rs
+++ b/src/librustc_codegen_ssa/back/write.rs
@@ -18,9 +18,9 @@ use rustc::session::config::{
 };
 use rustc::session::Session;
 use rustc::ty::TyCtxt;
-use rustc::util::common::{print_time_passes_entry, set_time_depth, time_depth};
 use rustc_data_structures::fx::FxHashMap;
 use rustc_data_structures::profiling::SelfProfilerRef;
+use rustc_data_structures::profiling::VerboseTimingGuard;
 use rustc_data_structures::svh::Svh;
 use rustc_data_structures::sync::Lrc;
 use rustc_errors::emitter::Emitter;
@@ -45,7 +45,6 @@ use std::str;
 use std::sync::mpsc::{channel, Receiver, Sender};
 use std::sync::Arc;
 use std::thread;
-use std::time::Instant;
 
 const PRE_LTO_BC_EXT: &str = "pre-lto.bc";
 
@@ -81,7 +80,7 @@ pub struct ModuleConfig {
     pub verify_llvm_ir: bool,
     pub no_prepopulate_passes: bool,
     pub no_builtins: bool,
-    pub time_passes: bool,
+    pub time_module: bool,
     pub vectorize_loop: bool,
     pub vectorize_slp: bool,
     pub merge_functions: bool,
@@ -125,7 +124,7 @@ impl ModuleConfig {
             verify_llvm_ir: false,
             no_prepopulate_passes: false,
             no_builtins: false,
-            time_passes: false,
+            time_module: true,
             vectorize_loop: false,
             vectorize_slp: false,
             merge_functions: false,
@@ -137,7 +136,6 @@ impl ModuleConfig {
         self.verify_llvm_ir = sess.verify_llvm_ir();
         self.no_prepopulate_passes = sess.opts.cg.no_prepopulate_passes;
         self.no_builtins = no_builtins || sess.target.target.options.no_builtins;
-        self.time_passes = sess.time_extended();
         self.inline_threshold = sess.opts.cg.inline_threshold;
         self.obj_is_bitcode =
             sess.target.target.options.obj_is_bitcode || sess.opts.cg.linker_plugin_lto.enabled();
@@ -212,7 +210,6 @@ impl<B: WriteBackendMethods> Clone for TargetMachineFactory<B> {
 pub struct CodegenContext<B: WriteBackendMethods> {
     // Resources needed when running LTO
     pub backend: B,
-    pub time_passes: bool,
     pub prof: SelfProfilerRef,
     pub lto: Lto,
     pub no_landing_pads: bool,
@@ -434,8 +431,8 @@ pub fn start_async_codegen<B: ExtraBackendMethods>(
 
     // Exclude metadata and allocator modules from time_passes output, since
     // they throw off the "LLVM passes" measurement.
-    metadata_config.time_passes = false;
-    allocator_config.time_passes = false;
+    metadata_config.time_module = false;
+    allocator_config.time_module = false;
 
     let (shared_emitter, shared_emitter_main) = SharedEmitter::new();
     let (codegen_worker_send, codegen_worker_receive) = channel();
@@ -1026,7 +1023,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
         fewer_names: sess.fewer_names(),
         save_temps: sess.opts.cg.save_temps,
         opts: Arc::new(sess.opts.clone()),
-        time_passes: sess.time_extended(),
         prof: sess.prof.clone(),
         exported_symbols,
         remark: sess.opts.cg.remark.clone(),
@@ -1184,9 +1180,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
     // necessary. There's already optimizations in place to avoid sending work
     // back to the coordinator if LTO isn't requested.
     return thread::spawn(move || {
-        // We pretend to be within the top-level LLVM time-passes task here:
-        set_time_depth(1);
-
         let max_workers = ::num_cpus::get();
         let mut worker_id_counter = 0;
         let mut free_worker_ids = Vec::new();
@@ -1224,7 +1217,8 @@ fn start_executing_work<B: ExtraBackendMethods>(
         let mut main_thread_worker_state = MainThreadWorkerState::Idle;
         let mut running = 0;
 
-        let mut llvm_start_time = None;
+        let prof = &cgcx.prof;
+        let mut llvm_start_time: Option<VerboseTimingGuard<'_>> = None;
 
         // Run the message loop while there's still anything that needs message
         // processing. Note that as soon as codegen is aborted we simply want to
@@ -1262,6 +1256,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
                             ..cgcx.clone()
                         };
                         maybe_start_llvm_timer(
+                            prof,
                             cgcx.config(item.module_kind()),
                             &mut llvm_start_time,
                         );
@@ -1313,6 +1308,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
                                 ..cgcx.clone()
                             };
                             maybe_start_llvm_timer(
+                                prof,
                                 cgcx.config(item.module_kind()),
                                 &mut llvm_start_time,
                             );
@@ -1345,7 +1341,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
             while !codegen_aborted && work_items.len() > 0 && running < tokens.len() {
                 let (item, _) = work_items.pop().unwrap();
 
-                maybe_start_llvm_timer(cgcx.config(item.module_kind()), &mut llvm_start_time);
+                maybe_start_llvm_timer(prof, cgcx.config(item.module_kind()), &mut llvm_start_time);
 
                 let cgcx =
                     CodegenContext { worker: get_worker_id(&mut free_worker_ids), ..cgcx.clone() };
@@ -1483,13 +1479,8 @@ fn start_executing_work<B: ExtraBackendMethods>(
             }
         }
 
-        if let Some(llvm_start_time) = llvm_start_time {
-            let total_llvm_time = Instant::now().duration_since(llvm_start_time);
-            // This is the top-level timing for all of LLVM, set the time-depth
-            // to zero.
-            set_time_depth(1);
-            print_time_passes_entry(cgcx.time_passes, "LLVM passes", total_llvm_time);
-        }
+        // Drop to print timings
+        drop(llvm_start_time);
 
         // Regardless of what order these modules completed in, report them to
         // the backend in the same order every time to ensure that we're handing
@@ -1514,13 +1505,13 @@ fn start_executing_work<B: ExtraBackendMethods>(
         items_in_queue > 0 && items_in_queue >= max_workers.saturating_sub(workers_running / 2)
     }
 
-    fn maybe_start_llvm_timer(config: &ModuleConfig, llvm_start_time: &mut Option<Instant>) {
-        // We keep track of the -Ztime-passes output manually,
-        // since the closure-based interface does not fit well here.
-        if config.time_passes {
-            if llvm_start_time.is_none() {
-                *llvm_start_time = Some(Instant::now());
-            }
+    fn maybe_start_llvm_timer<'a>(
+        prof: &'a SelfProfilerRef,
+        config: &ModuleConfig,
+        llvm_start_time: &mut Option<VerboseTimingGuard<'a>>,
+    ) {
+        if config.time_module && llvm_start_time.is_none() {
+            *llvm_start_time = Some(prof.generic_pass("LLVM passes"));
         }
     }
 }
@@ -1528,11 +1519,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
 pub const CODEGEN_WORKER_ID: usize = ::std::usize::MAX;
 
 fn spawn_work<B: ExtraBackendMethods>(cgcx: CodegenContext<B>, work: WorkItem<B>) {
-    let depth = time_depth();
-
     thread::spawn(move || {
-        set_time_depth(depth);
-
         // Set up a destructor which will fire off a message that we're done as
         // we exit.
         struct Bomb<B: ExtraBackendMethods> {
diff --git a/src/librustc_codegen_ssa/base.rs b/src/librustc_codegen_ssa/base.rs
index b964fa1c182..6750b7e65ba 100644
--- a/src/librustc_codegen_ssa/base.rs
+++ b/src/librustc_codegen_ssa/base.rs
@@ -39,9 +39,9 @@ use rustc::ty::layout::{self, Align, HasTyCtxt, LayoutOf, TyLayout, VariantIdx};
 use rustc::ty::layout::{FAT_PTR_ADDR, FAT_PTR_EXTRA};
 use rustc::ty::query::Providers;
 use rustc::ty::{self, Instance, Ty, TyCtxt};
-use rustc::util::common::{print_time_passes_entry, set_time_depth, time, time_depth};
 use rustc_codegen_utils::{check_for_rustc_errors_attr, symbol_names_test};
 use rustc_data_structures::fx::FxHashMap;
+use rustc_data_structures::profiling::print_time_passes_entry;
 use rustc_index::vec::Idx;
 use rustc_session::cgu_reuse_tracker::CguReuse;
 use rustc_span::Span;
@@ -565,9 +565,8 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
         let llmod_id =
             cgu_name_builder.build_cgu_name(LOCAL_CRATE, &["crate"], Some("allocator")).to_string();
         let mut modules = backend.new_metadata(tcx, &llmod_id);
-        time(tcx.sess, "write allocator module", || {
-            backend.codegen_allocator(tcx, &mut modules, kind)
-        });
+        tcx.sess
+            .time("write allocator module", || backend.codegen_allocator(tcx, &mut modules, kind));
 
         Some(ModuleCodegen { name: llmod_id, module_llvm: modules, kind: ModuleKind::Allocator })
     } else {
@@ -583,7 +582,7 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
         let metadata_cgu_name =
             cgu_name_builder.build_cgu_name(LOCAL_CRATE, &["crate"], Some("metadata")).to_string();
         let mut metadata_llvm_module = backend.new_metadata(tcx, &metadata_cgu_name);
-        time(tcx.sess, "write compressed metadata", || {
+        tcx.sess.time("write compressed metadata", || {
             backend.write_compressed_metadata(
                 tcx,
                 &ongoing_codegen.metadata,
@@ -653,10 +652,7 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
 
     // Since the main thread is sometimes blocked during codegen, we keep track
     // -Ztime-passes output manually.
-    let time_depth = time_depth();
-    set_time_depth(time_depth + 1);
     print_time_passes_entry(tcx.sess.time_passes(), "codegen to LLVM IR", total_codegen_time);
-    set_time_depth(time_depth);
 
     ::rustc_incremental::assert_module_sources::assert_module_sources(tcx);
 
@@ -716,9 +712,9 @@ impl<B: ExtraBackendMethods> Drop for AbortCodegenOnDrop<B> {
 }
 
 fn assert_and_save_dep_graph(tcx: TyCtxt<'_>) {
-    time(tcx.sess, "assert dep graph", || ::rustc_incremental::assert_dep_graph(tcx));
+    tcx.sess.time("assert dep graph", || ::rustc_incremental::assert_dep_graph(tcx));
 
-    time(tcx.sess, "serialize dep graph", || ::rustc_incremental::save_dep_graph(tcx));
+    tcx.sess.time("serialize dep graph", || ::rustc_incremental::save_dep_graph(tcx));
 }
 
 impl CrateInfo {
diff --git a/src/librustc_data_structures/lib.rs b/src/librustc_data_structures/lib.rs
index 25bb8f6afae..d1b7ee9e83e 100644
--- a/src/librustc_data_structures/lib.rs
+++ b/src/librustc_data_structures/lib.rs
@@ -33,6 +33,9 @@ extern crate libc;
 #[macro_use]
 extern crate cfg_if;
 
+#[cfg(windows)]
+extern crate libc;
+
 pub use rustc_serialize::hex::ToHex;
 
 #[inline(never)]
diff --git a/src/librustc_data_structures/profiling.rs b/src/librustc_data_structures/profiling.rs
index 73dd3b0b3f3..840a2603deb 100644
--- a/src/librustc_data_structures/profiling.rs
+++ b/src/librustc_data_structures/profiling.rs
@@ -5,8 +5,11 @@ use std::path::Path;
 use std::process;
 use std::sync::Arc;
 use std::thread::ThreadId;
+use std::time::{Duration, Instant};
 use std::u32;
 
+use crate::cold_path;
+
 use measureme::StringId;
 
 /// MmapSerializatioSink is faster on macOS and Linux
@@ -41,11 +44,15 @@ bitflags::bitflags! {
         const QUERY_CACHE_HITS   = 1 << 2;
         const QUERY_BLOCKED      = 1 << 3;
         const INCR_CACHE_LOADS   = 1 << 4;
+        const SPARSE_PASS   = 1 << 5;
+        const GENERIC_PASS   = 1 << 6;
 
         const DEFAULT = Self::GENERIC_ACTIVITIES.bits |
                         Self::QUERY_PROVIDERS.bits |
                         Self::QUERY_BLOCKED.bits |
-                        Self::INCR_CACHE_LOADS.bits;
+                        Self::INCR_CACHE_LOADS.bits |
+                        Self::SPARSE_PASS.bits |
+                        Self::GENERIC_PASS.bits;
 
         // empty() and none() aren't const-fns unfortunately
         const NONE = 0;
@@ -56,6 +63,8 @@ bitflags::bitflags! {
 const EVENT_FILTERS_BY_NAME: &[(&str, EventFilter)] = &[
     ("none", EventFilter::NONE),
     ("all", EventFilter::ALL),
+    ("sparse-pass", EventFilter::SPARSE_PASS),
+    ("generic-pass", EventFilter::GENERIC_PASS),
     ("generic-activity", EventFilter::GENERIC_ACTIVITIES),
     ("query-provider", EventFilter::QUERY_PROVIDERS),
     ("query-cache-hit", EventFilter::QUERY_CACHE_HITS),
@@ -79,44 +88,105 @@ pub struct SelfProfilerRef {
     // cost anything and allows for filtering with checking if the profiler is
     // actually enabled.
     event_filter_mask: EventFilter,
+
+    // Print sparse passes to stdout
+    verbose_sparse: bool,
+
+    // Print generic passes to stdout
+    verbose_generic: bool,
 }
 
 impl SelfProfilerRef {
-    pub fn new(profiler: Option<Arc<SelfProfiler>>) -> SelfProfilerRef {
+    pub fn new(
+        profiler: Option<Arc<SelfProfiler>>,
+        verbose_sparse: bool,
+        verbose_generic: bool,
+    ) -> SelfProfilerRef {
         // If there is no SelfProfiler then the filter mask is set to NONE,
         // ensuring that nothing ever tries to actually access it.
-        let event_filter_mask =
+        let mut event_filter_mask =
             profiler.as_ref().map(|p| p.event_filter_mask).unwrap_or(EventFilter::NONE);
 
-        SelfProfilerRef { profiler, event_filter_mask }
+        if verbose_sparse {
+            event_filter_mask |= EventFilter::SPARSE_PASS;
+        }
+
+        if verbose_generic {
+            event_filter_mask |= EventFilter::GENERIC_PASS;
+        }
+
+        SelfProfilerRef { profiler, event_filter_mask, verbose_sparse, verbose_generic }
     }
 
-    // This shim makes sure that calls only get executed if the filter mask
-    // lets them pass. It also contains some trickery to make sure that
-    // code is optimized for non-profiling compilation sessions, i.e. anything
-    // past the filter check is never inlined so it doesn't clutter the fast
-    // path.
     #[inline(always)]
     fn exec<F>(&self, event_filter: EventFilter, f: F) -> TimingGuard<'_>
     where
         F: for<'a> FnOnce(&'a SelfProfiler) -> TimingGuard<'a>,
     {
-        #[inline(never)]
-        fn cold_call<F>(profiler_ref: &SelfProfilerRef, f: F) -> TimingGuard<'_>
-        where
-            F: for<'a> FnOnce(&'a SelfProfiler) -> TimingGuard<'a>,
-        {
-            let profiler = profiler_ref.profiler.as_ref().unwrap();
-            f(&**profiler)
-        }
+        self.handle_event(
+            event_filter,
+            || f(self.profiler.as_ref().unwrap()),
+            || TimingGuard::none(),
+        )
+    }
 
+    // This shim makes sure that cold calls only get executed if the filter mask
+    // lets them pass. It also contains some trickery to make sure that
+    // code is optimized for non-profiling compilation sessions, i.e. anything
+    // past the filter check is never inlined so it doesn't clutter the fast
+    // path.
+    #[inline(always)]
+    fn handle_event<R>(
+        &self,
+        event_filter: EventFilter,
+        cold: impl FnOnce() -> R,
+        hot: impl FnOnce() -> R,
+    ) -> R {
         if unlikely!(self.event_filter_mask.contains(event_filter)) {
-            cold_call(self, f)
+            cold_path(|| cold())
         } else {
-            TimingGuard::none()
+            hot()
         }
     }
 
+    /// Start profiling a sparse pass. Profiling continues until the
+    /// VerboseTimingGuard returned from this call is dropped.
+    #[inline(always)]
+    pub fn sparse_pass<'a>(&'a self, event_id: &'a str) -> VerboseTimingGuard<'a> {
+        self.handle_event(
+            EventFilter::SPARSE_PASS,
+            || {
+                VerboseTimingGuard::start(
+                    self.profiler
+                        .as_ref()
+                        .map(|profiler| (&**profiler, profiler.sparse_pass_event_kind)),
+                    event_id,
+                    self.verbose_sparse,
+                )
+            },
+            || VerboseTimingGuard::none(),
+        )
+    }
+
+    /// Start profiling a generic pass. Profiling continues until the
+    /// VerboseTimingGuard returned from this call is dropped.
+    #[inline(always)]
+    pub fn generic_pass<'a>(&'a self, event_id: &'a str) -> VerboseTimingGuard<'a> {
+        self.handle_event(
+            EventFilter::GENERIC_PASS,
+            || {
+                VerboseTimingGuard::start(
+                    self.profiler
+                        .as_ref()
+                        .map(|profiler| (&**profiler, profiler.generic_pass_event_kind)),
+                    event_id,
+                    self.verbose_generic,
+                )
+            },
+            || VerboseTimingGuard::none(),
+        )
+    }
+
     /// Start profiling a generic activity. Profiling continues until the
     /// TimingGuard returned from this call is dropped.
     #[inline(always)]
@@ -197,6 +267,8 @@ pub struct SelfProfiler {
     profiler: Profiler,
     event_filter_mask: EventFilter,
     query_event_kind: StringId,
+    sparse_pass_event_kind: StringId,
+    generic_pass_event_kind: StringId,
     generic_activity_event_kind: StringId,
     incremental_load_result_event_kind: StringId,
     query_blocked_event_kind: StringId,
@@ -217,6 +289,8 @@ impl SelfProfiler {
         let profiler = Profiler::new(&path)?;
 
         let query_event_kind = profiler.alloc_string("Query");
+        let sparse_pass_event_kind = profiler.alloc_string("SparsePass");
+        let generic_pass_event_kind = profiler.alloc_string("GenericPass");
         let generic_activity_event_kind = profiler.alloc_string("GenericActivity");
         let incremental_load_result_event_kind = profiler.alloc_string("IncrementalLoadResult");
         let query_blocked_event_kind = profiler.alloc_string("QueryBlocked");
@@ -259,6 +333,8 @@ impl SelfProfiler {
             profiler,
             event_filter_mask,
             query_event_kind,
+            sparse_pass_event_kind,
+            generic_pass_event_kind,
             generic_activity_event_kind,
             incremental_load_result_event_kind,
             query_blocked_event_kind,
@@ -301,3 +377,118 @@ impl<'a> TimingGuard<'a> {
         TimingGuard(None)
     }
 }
+
+#[must_use]
+pub struct VerboseTimingGuard<'a> {
+    event_id: &'a str,
+    start: Option<Instant>,
+    _guard: TimingGuard<'a>,
+}
+
+impl<'a> VerboseTimingGuard<'a> {
+    pub fn start(
+        profiler: Option<(&'a SelfProfiler, StringId)>,
+        event_id: &'a str,
+        verbose: bool,
+    ) -> Self {
+        let _guard = profiler.map_or(TimingGuard::none(), |(profiler, event_kind)| {
+            let event = profiler.profiler.alloc_string(event_id);
+            TimingGuard::start(profiler, event_kind, event)
+        });
+        VerboseTimingGuard {
+            event_id,
+            _guard,
+            start: if verbose { Some(Instant::now()) } else { None },
+        }
+    }
+
+    #[inline(always)]
+    pub fn run<R>(self, f: impl FnOnce() -> R) -> R {
+        let _timer = self;
+        f()
+    }
+
+    fn none() -> Self {
+        VerboseTimingGuard { event_id: "", start: None, _guard: TimingGuard::none() }
+    }
+}
+
+impl Drop for VerboseTimingGuard<'_> {
+    fn drop(&mut self) {
+        self.start.map(|start| print_time_passes_entry(true, self.event_id, start.elapsed()));
+    }
+}
+
+pub fn print_time_passes_entry(do_it: bool, what: &str, dur: Duration) {
+    if !do_it {
+        return;
+    }
+
+    let mem_string = match get_resident() {
+        Some(n) => {
+            let mb = n as f64 / 1_000_000.0;
+            format!("; rss: {}MB", mb.round() as usize)
+        }
+        None => String::new(),
+    };
+    println!("time: {}{}\t{}", duration_to_secs_str(dur), mem_string, what);
+}
+
+// Hack up our own formatting for the duration to make it easier for scripts
+// to parse (always use the same number of decimal places and the same unit).
+pub fn duration_to_secs_str(dur: std::time::Duration) -> String {
+    const NANOS_PER_SEC: f64 = 1_000_000_000.0;
+    let secs = dur.as_secs() as f64 + dur.subsec_nanos() as f64 / NANOS_PER_SEC;
+
+    format!("{:.3}", secs)
+}
+
+// Memory reporting
+#[cfg(unix)]
+fn get_resident() -> Option<usize> {
+    let field = 1;
+    let contents = fs::read("/proc/self/statm").ok()?;
+    let contents = String::from_utf8(contents).ok()?;
+    let s = contents.split_whitespace().nth(field)?;
+    let npages = s.parse::<usize>().ok()?;
+    Some(npages * 4096)
+}
+
+#[cfg(windows)]
+fn get_resident() -> Option<usize> {
+    type BOOL = i32;
+    type DWORD = u32;
+    type HANDLE = *mut u8;
+    use libc::size_t;
+    #[repr(C)]
+    #[allow(non_snake_case)]
+    struct PROCESS_MEMORY_COUNTERS {
+        cb: DWORD,
+        PageFaultCount: DWORD,
+        PeakWorkingSetSize: size_t,
+        WorkingSetSize: size_t,
+        QuotaPeakPagedPoolUsage: size_t,
+        QuotaPagedPoolUsage: size_t,
+        QuotaPeakNonPagedPoolUsage: size_t,
+        QuotaNonPagedPoolUsage: size_t,
+        PagefileUsage: size_t,
+        PeakPagefileUsage: size_t,
+    }
+    #[allow(non_camel_case_types)]
+    type PPROCESS_MEMORY_COUNTERS = *mut PROCESS_MEMORY_COUNTERS;
+    #[link(name = "psapi")]
+    extern "system" {
+        fn GetCurrentProcess() -> HANDLE;
+        fn GetProcessMemoryInfo(
+            Process: HANDLE,
+            ppsmemCounters: PPROCESS_MEMORY_COUNTERS,
+            cb: DWORD,
+        ) -> BOOL;
+    }
+    let mut pmc: PROCESS_MEMORY_COUNTERS = unsafe { mem::zeroed() };
+    pmc.cb = mem::size_of_val(&pmc) as DWORD;
+    match unsafe { GetProcessMemoryInfo(GetCurrentProcess(), &mut pmc, pmc.cb) } {
+        0 => None,
+        _ => Some(pmc.WorkingSetSize as usize),
+    }
+}
diff --git a/src/librustc_driver/lib.rs b/src/librustc_driver/lib.rs
index 94636725646..379e9bdfa87 100644
--- a/src/librustc_driver/lib.rs
+++ b/src/librustc_driver/lib.rs
@@ -34,8 +34,9 @@ use rustc::session::config::{ErrorOutputType, Input, OutputType, PrintRequest};
 use rustc::session::{config, DiagnosticOutput, Session};
 use rustc::session::{early_error, early_warn};
 use rustc::ty::TyCtxt;
-use rustc::util::common::{print_time_passes_entry, set_time_depth, time, ErrorReported};
+use rustc::util::common::ErrorReported;
 use rustc_codegen_utils::codegen_backend::CodegenBackend;
+use rustc_data_structures::profiling::print_time_passes_entry;
 use rustc_data_structures::sync::SeqCst;
 use rustc_feature::{find_gated_cfg, UnstableFeatures};
 use rustc_interface::util::get_builtin_codegen_backend;
@@ -368,7 +369,7 @@ pub fn run_compiler(
                 queries.global_ctxt()?.peek_mut().enter(|tcx| {
                     let result = tcx.analysis(LOCAL_CRATE);
 
-                    time(sess, "save analysis", || {
+                    sess.time("save analysis", || {
                         save::process_crate(
                             tcx,
                             &expanded_crate,
@@ -1260,7 +1261,6 @@ pub fn main() {
         Err(_) => EXIT_FAILURE,
     };
     // The extra `\t` is necessary to align this label with the others.
-    set_time_depth(0);
     print_time_passes_entry(callbacks.time_passes, "\ttotal", start.elapsed());
     process::exit(exit_code);
 }
diff --git a/src/librustc_incremental/persist/load.rs b/src/librustc_incremental/persist/load.rs
index 770137863bd..cb06dae6ac9 100644
--- a/src/librustc_incremental/persist/load.rs
+++ b/src/librustc_incremental/persist/load.rs
@@ -4,7 +4,6 @@ use rustc::dep_graph::{PreviousDepGraph, SerializedDepGraph, WorkProduct, WorkPr
 use rustc::session::Session;
 use rustc::ty::query::OnDiskCache;
 use rustc::ty::TyCtxt;
-use rustc::util::common::time_ext;
 use rustc_data_structures::fx::FxHashMap;
 use rustc_serialize::opaque::Decoder;
 use rustc_serialize::Decodable as RustcDecodable;
@@ -96,7 +95,6 @@ pub fn load_dep_graph(sess: &Session) -> DepGraphFuture {
     // Since `sess` isn't `Sync`, we perform all accesses to `sess`
     // before we fire the background thread.
 
-    let time_passes = sess.time_passes();
     let prof = sess.prof.clone();
 
     if sess.opts.incremental.is_none() {
@@ -161,38 +159,36 @@ pub fn load_dep_graph(sess: &Session) -> DepGraphFuture {
     }
 
     MaybeAsync::Async(std::thread::spawn(move || {
-        time_ext(time_passes, "background load prev dep-graph", move || {
-            let _prof_timer = prof.generic_activity("incr_comp_load_dep_graph");
-
-            match load_data(report_incremental_info, &path) {
-                LoadResult::DataOutOfDate => LoadResult::DataOutOfDate,
-                LoadResult::Error { message } => LoadResult::Error { message },
-                LoadResult::Ok { data: (bytes, start_pos) } => {
-                    let mut decoder = Decoder::new(&bytes, start_pos);
-                    let prev_commandline_args_hash = u64::decode(&mut decoder)
-                        .expect("Error reading commandline arg hash from cached dep-graph");
-
-                    if prev_commandline_args_hash != expected_hash {
-                        if report_incremental_info {
-                            println!(
-                                "[incremental] completely ignoring cache because of \
+        let _prof_timer = prof.generic_pass("background load prev dep-graph");
+
+        match load_data(report_incremental_info, &path) {
+            LoadResult::DataOutOfDate => LoadResult::DataOutOfDate,
+            LoadResult::Error { message } => LoadResult::Error { message },
+            LoadResult::Ok { data: (bytes, start_pos) } => {
+                let mut decoder = Decoder::new(&bytes, start_pos);
+                let prev_commandline_args_hash = u64::decode(&mut decoder)
+                    .expect("Error reading commandline arg hash from cached dep-graph");
+
+                if prev_commandline_args_hash != expected_hash {
+                    if report_incremental_info {
+                        println!(
+                            "[incremental] completely ignoring cache because of \
                                     differing commandline arguments"
-                            );
-                        }
-                        // We can't reuse the cache, purge it.
-                        debug!("load_dep_graph_new: differing commandline arg hashes");
-
-                        // No need to do any further work
-                        return LoadResult::DataOutOfDate;
+                        );
                     }
+                    // We can't reuse the cache, purge it.
+                    debug!("load_dep_graph_new: differing commandline arg hashes");
 
-                    let dep_graph = SerializedDepGraph::decode(&mut decoder)
-                        .expect("Error reading cached dep-graph");
-
-                    LoadResult::Ok { data: (PreviousDepGraph::new(dep_graph), prev_work_products) }
+                    // No need to do any further work
+                    return LoadResult::DataOutOfDate;
                 }
+
+                let dep_graph = SerializedDepGraph::decode(&mut decoder)
+                    .expect("Error reading cached dep-graph");
+
+                LoadResult::Ok { data: (PreviousDepGraph::new(dep_graph), prev_work_products) }
             }
-        })
+        }
     }))
 }
 
diff --git a/src/librustc_incremental/persist/save.rs b/src/librustc_incremental/persist/save.rs
index 3abda32b3ea..9487f02aa8e 100644
--- a/src/librustc_incremental/persist/save.rs
+++ b/src/librustc_incremental/persist/save.rs
@@ -1,7 +1,6 @@
 use rustc::dep_graph::{DepGraph, DepKind, WorkProduct, WorkProductId};
 use rustc::session::Session;
 use rustc::ty::TyCtxt;
-use rustc::util::common::time;
 use rustc_data_structures::fx::FxHashMap;
 use rustc_data_structures::sync::join;
 use rustc_serialize::opaque::Encoder;
@@ -33,19 +32,15 @@ pub fn save_dep_graph(tcx: TyCtxt<'_>) {
         join(
             move || {
                 if tcx.sess.opts.debugging_opts.incremental_queries {
-                    let _timer = tcx.prof.generic_activity("incr_comp_persist_result_cache");
-
-                    time(sess, "persist query result cache", || {
+                    sess.time("persist query result cache", || {
                         save_in(sess, query_cache_path, |e| encode_query_cache(tcx, e));
                     });
                 }
             },
             || {
-                time(sess, "persist dep-graph", || {
-                    let _timer = tcx.prof.generic_activity("incr_comp_persist_dep_graph");
-
+                sess.time("persist dep-graph", || {
                     save_in(sess, dep_graph_path, |e| {
-                        time(sess, "encode dep-graph", || encode_dep_graph(tcx, e))
+                        sess.time("encode dep-graph", || encode_dep_graph(tcx, e))
                     });
                 });
             },
@@ -147,10 +142,7 @@ fn encode_dep_graph(tcx: TyCtxt<'_>, encoder: &mut Encoder) {
     tcx.sess.opts.dep_tracking_hash().encode(encoder).unwrap();
 
     // Encode the graph data.
-    let serialized_graph = time(tcx.sess, "getting serialized graph", || {
-        let _timer = tcx.prof.generic_activity("incr_comp_serialize_dep_graph");
-        tcx.dep_graph.serialize()
-    });
+    let serialized_graph = tcx.sess.time("getting serialized graph", || tcx.dep_graph.serialize());
 
     if tcx.sess.opts.debugging_opts.incremental_info {
         #[derive(Clone)]
@@ -231,8 +223,7 @@ fn encode_dep_graph(tcx: TyCtxt<'_>, encoder: &mut Encoder) {
         println!("[incremental]");
     }
 
-    time(tcx.sess, "encoding serialized graph", || {
-        let _timer = tcx.prof.generic_activity("incr_comp_encode_serialized_dep_graph");
+    tcx.sess.time("encoding serialized graph", || {
         serialized_graph.encode(encoder).unwrap();
     });
 }
@@ -253,9 +244,7 @@ fn encode_work_product_index(
 }
 
 fn encode_query_cache(tcx: TyCtxt<'_>, encoder: &mut Encoder) {
-    time(tcx.sess, "serialize query result cache", || {
-        let _timer = tcx.prof.generic_activity("incr_comp_serialize_result_cache");
-
+    tcx.sess.time("serialize query result cache", || {
         tcx.serialize_query_result_cache(encoder).unwrap();
     })
 }
diff --git a/src/librustc_interface/passes.rs b/src/librustc_interface/passes.rs
index 4de0d358afb..cd632c310f0 100644
--- a/src/librustc_interface/passes.rs
+++ b/src/librustc_interface/passes.rs
@@ -17,7 +17,7 @@ use rustc::session::Session;
 use rustc::traits;
 use rustc::ty::steal::Steal;
 use rustc::ty::{self, AllArenas, GlobalCtxt, ResolverOutputs, TyCtxt};
-use rustc::util::common::{time, ErrorReported};
+use rustc::util::common::ErrorReported;
 use rustc_builtin_macros;
 use rustc_codegen_ssa::back::link::emit_metadata;
 use rustc_codegen_utils::codegen_backend::CodegenBackend;
@@ -55,14 +55,10 @@ use std::{env, fs, iter, mem};
 
 pub fn parse<'a>(sess: &'a Session, input: &Input) -> PResult<'a, ast::Crate> {
     sess.diagnostic().set_continue_after_error(sess.opts.debugging_opts.continue_parse_after_error);
-    let krate = time(sess, "parsing", || {
-        let _prof_timer = sess.prof.generic_activity("parse_crate");
-
-        match input {
-            Input::File(file) => parse_crate_from_file(file, &sess.parse_sess),
-            Input::Str { input, name } => {
-                parse_crate_from_source_str(name.clone(), input.clone(), &sess.parse_sess)
-            }
+    let krate = sess.time("parsing", || match input {
+        Input::File(file) => parse_crate_from_file(file, &sess.parse_sess),
+        Input::Str { input, name } => {
+            parse_crate_from_source_str(name.clone(), input.clone(), &sess.parse_sess)
         }
     })?;
 
@@ -162,7 +158,7 @@ pub fn register_plugins<'a>(
     mut krate: ast::Crate,
     crate_name: &str,
 ) -> Result<(ast::Crate, Lrc<lint::LintStore>)> {
-    krate = time(sess, "attributes injection", || {
+    krate = sess.time("attributes injection", || {
         rustc_builtin_macros::cmdline_attrs::inject(
             krate,
             &sess.parse_sess,
@@ -187,7 +183,7 @@ pub fn register_plugins<'a>(
     rustc_incremental::prepare_session_directory(sess, &crate_name, disambiguator);
 
     if sess.opts.incremental.is_some() {
-        time(sess, "garbage-collect incremental cache directory", || {
+        sess.time("garbage-collect incremental cache directory", || {
             let _prof_timer =
                 sess.prof.generic_activity("incr_comp_garbage_collect_session_directories");
             if let Err(e) = rustc_incremental::garbage_collect_session_directories(sess) {
@@ -200,7 +196,7 @@ pub fn register_plugins<'a>(
         });
     }
 
-    time(sess, "recursion limit", || {
+    sess.time("recursion limit", || {
         middle::recursion_limit::update_limits(sess, &krate);
     });
 
@@ -211,8 +207,8 @@ pub fn register_plugins<'a>(
     register_lints(&sess, &mut lint_store);
 
     let registrars =
-        time(sess, "plugin loading", || plugin::load::load_plugins(sess, metadata_loader, &krate));
-    time(sess, "plugin registration", || {
+        sess.time("plugin loading", || plugin::load::load_plugins(sess, metadata_loader, &krate));
+    sess.time("plugin registration", || {
         let mut registry = plugin::Registry { lint_store: &mut lint_store };
         for registrar in registrars {
             registrar(&mut registry);
@@ -230,7 +226,7 @@ fn configure_and_expand_inner<'a>(
     resolver_arenas: &'a ResolverArenas<'a>,
     metadata_loader: &'a MetadataLoaderDyn,
 ) -> Result<(ast::Crate, Resolver<'a>)> {
-    time(sess, "pre-AST-expansion lint checks", || {
+    sess.time("pre-AST-expansion lint checks", || {
         rustc_lint::check_ast_crate(
             sess,
             lint_store,
@@ -244,7 +240,7 @@ fn configure_and_expand_inner<'a>(
     let mut resolver = Resolver::new(sess, &krate, crate_name, metadata_loader, &resolver_arenas);
     rustc_builtin_macros::register_builtin_macros(&mut resolver, sess.edition());
 
-    krate = time(sess, "crate injection", || {
+    krate = sess.time("crate injection", || {
         let alt_std_name = sess.opts.alt_std_name.as_ref().map(|s| Symbol::intern(s));
         let (krate, name) = rustc_builtin_macros::standard_library_imports::inject(
             krate,
@@ -261,8 +257,7 @@ fn configure_and_expand_inner<'a>(
     util::check_attr_crate_type(&krate.attrs, &mut resolver.lint_buffer());
 
     // Expand all macros
-    krate = time(sess, "expansion", || {
-        let _prof_timer = sess.prof.generic_activity("macro_expand_crate");
+    krate = sess.time("expansion", || {
         // Windows dlls do not have rpaths, so they don't know how to find their
         // dependencies. It's up to us to tell the system where to find all the
         // dependent dlls. Note that this uses cfg!(windows) as opposed to
@@ -307,11 +302,11 @@ fn configure_and_expand_inner<'a>(
         let mut ecx = ExtCtxt::new(&sess.parse_sess, cfg, &mut resolver);
 
         // Expand macros now!
-        let krate = time(sess, "expand crate", || ecx.monotonic_expander().expand_crate(krate));
+        let krate = sess.time("expand crate", || ecx.monotonic_expander().expand_crate(krate));
 
         // The rest is error reporting
 
-        time(sess, "check unused macros", || {
+        sess.time("check unused macros", || {
             ecx.check_unused_macros();
         });
 
@@ -330,7 +325,7 @@ fn configure_and_expand_inner<'a>(
         krate
     });
 
-    time(sess, "maybe building test harness", || {
+    sess.time("maybe building test harness", || {
         rustc_builtin_macros::test_harness::inject(
             &sess.parse_sess,
             &mut resolver,
@@ -354,7 +349,7 @@ fn configure_and_expand_inner<'a>(
         util::ReplaceBodyWithLoop::new(&mut resolver).visit_crate(&mut krate);
     }
 
-    let has_proc_macro_decls = time(sess, "AST validation", || {
+    let has_proc_macro_decls = sess.time("AST validation", || {
         ast_validation::check_crate(sess, &krate, &mut resolver.lint_buffer())
     });
 
@@ -376,7 +371,7 @@ fn configure_and_expand_inner<'a>(
         msg.warn("The generated documentation may be incorrect");
         msg.emit()
     } else {
-        krate = time(sess, "maybe creating a macro crate", || {
+        krate = sess.time("maybe creating a macro crate", || {
             let num_crate_types = crate_types.len();
             let is_test_crate = sess.opts.test;
             rustc_builtin_macros::proc_macro_harness::inject(
@@ -406,12 +401,12 @@ fn configure_and_expand_inner<'a>(
         println!("{}", json::as_json(&krate));
     }
 
-    time(sess, "name resolution", || {
+    sess.time("name resolution", || {
         resolver.resolve_crate(&krate);
     });
 
     // Needs to go *after* expansion to be able to check the results of macro expansion.
-    time(sess, "complete gated feature checking", || {
+    sess.time("complete gated feature checking", || {
         syntax::feature_gate::check_crate(
             &krate,
             &sess.parse_sess,
@@ -440,7 +435,7 @@ pub fn lower_to_hir<'res, 'tcx>(
     arena: &'tcx Arena<'tcx>,
 ) -> Result<hir::map::Forest<'tcx>> {
     // Lower AST to HIR.
-    let hir_forest = time(sess, "lowering AST -> HIR", || {
+    let hir_forest = sess.time("lowering AST -> HIR", || {
         let hir_crate = rustc_ast_lowering::lower_crate(
             sess,
             &dep_graph,
@@ -457,7 +452,7 @@ pub fn lower_to_hir<'res, 'tcx>(
         hir::map::Forest::new(hir_crate, &dep_graph)
     });
 
-    time(sess, "early lint checks", || {
+    sess.time("early lint checks", || {
         rustc_lint::check_ast_crate(
             sess,
             lint_store,
@@ -731,12 +726,12 @@ pub fn create_global_ctxt<'tcx>(
     let defs = mem::take(&mut resolver_outputs.definitions);
 
     // Construct the HIR map.
-    let hir_map = time(sess, "indexing HIR", || {
+    let hir_map = sess.time("indexing HIR", || {
         hir::map::map_crate(sess, &*resolver_outputs.cstore, &hir_forest, defs)
     });
 
     let query_result_on_disk_cache =
-        time(sess, "load query result cache", || rustc_incremental::load_query_result_cache(sess));
+        sess.time("load query result cache", || rustc_incremental::load_query_result_cache(sess));
 
     let codegen_backend = compiler.codegen_backend();
     let mut local_providers = ty::query::Providers::default();
@@ -769,7 +764,7 @@ pub fn create_global_ctxt<'tcx>(
 
     // Do some initialization of the DepGraph that can only be done with the tcx available.
     ty::tls::enter_global(&gcx, |tcx| {
-        time(tcx.sess, "dep graph tcx init", || rustc_incremental::dep_graph_tcx_init(tcx));
+        tcx.sess.time("dep graph tcx init", || rustc_incremental::dep_graph_tcx_init(tcx));
     });
 
     QueryContext(gcx)
@@ -783,18 +778,17 @@ fn analysis(tcx: TyCtxt<'_>, cnum: CrateNum) -> Result<()> {
     let sess = tcx.sess;
     let mut entry_point = None;
 
-    time(sess, "misc checking 1", || {
+    sess.time("misc checking 1", || {
         parallel!(
             {
-                entry_point = time(sess, "looking for entry point", || {
-                    rustc_passes::entry::find_entry_point(tcx)
-                });
+                entry_point = sess
+                    .time("looking for entry point", || rustc_passes::entry::find_entry_point(tcx));
 
-                time(sess, "looking for plugin registrar", || {
+                sess.time("looking for plugin registrar", || {
                     plugin::build::find_plugin_registrar(tcx)
                 });
 
-                time(sess, "looking for derive registrar", || proc_macro_decls::find(tcx));
+                sess.time("looking for derive registrar", || proc_macro_decls::find(tcx));
             },
             {
                 par_iter(&tcx.hir().krate().modules).for_each(|(&module, _)| {
@@ -811,17 +805,17 @@ fn analysis(tcx: TyCtxt<'_>, cnum: CrateNum) -> Result<()> {
     // passes are timed inside typeck
     typeck::check_crate(tcx)?;
 
-    time(sess, "misc checking 2", || {
+    sess.time("misc checking 2", || {
         parallel!(
             {
-                time(sess, "match checking", || {
+                sess.time("match checking", || {
                     tcx.par_body_owners(|def_id| {
                         tcx.ensure().check_match(def_id);
                     });
                 });
             },
             {
-                time(sess, "liveness checking + intrinsic checking", || {
+                sess.time("liveness checking + intrinsic checking", || {
                     par_iter(&tcx.hir().krate().modules).for_each(|(&module, _)| {
                         // this must run before MIR dump, because
                         // "not all control paths return a value" is reported here.
@@ -837,21 +831,21 @@ fn analysis(tcx: TyCtxt<'_>, cnum: CrateNum) -> Result<()> {
         );
     });
 
-    time(sess, "MIR borrow checking", || {
+    sess.time("MIR borrow checking", || {
         tcx.par_body_owners(|def_id| tcx.ensure().mir_borrowck(def_id));
     });
 
-    time(sess, "dumping Chalk-like clauses", || {
+    sess.time("dumping Chalk-like clauses", || {
         rustc_traits::lowering::dump_program_clauses(tcx);
     });
 
-    time(sess, "MIR effect checking", || {
+    sess.time("MIR effect checking", || {
         for def_id in tcx.body_owners() {
             mir::transform::check_unsafety::check_unsafety(tcx, def_id)
         }
     });
 
-    time(sess, "layout testing", || layout_test::test_layout(tcx));
+    sess.time("layout testing", || layout_test::test_layout(tcx));
 
     // Avoid overwhelming user with errors if borrow checking failed.
     // I'm not sure how helpful this is, to be honest, but it avoids a
@@ -862,28 +856,28 @@ fn analysis(tcx: TyCtxt<'_>, cnum: CrateNum) -> Result<()> {
         return Err(ErrorReported);
     }
 
-    time(sess, "misc checking 3", || {
+    sess.time("misc checking 3", || {
         parallel!(
             {
-                time(sess, "privacy access levels", || {
+                sess.time("privacy access levels", || {
                     tcx.ensure().privacy_access_levels(LOCAL_CRATE);
                 });
                 parallel!(
                     {
-                        time(sess, "private in public", || {
+                        sess.time("private in public", || {
                             tcx.ensure().check_private_in_public(LOCAL_CRATE);
                         });
                     },
                     {
-                        time(sess, "death checking", || rustc_passes::dead::check_crate(tcx));
+                        sess.time("death checking", || rustc_passes::dead::check_crate(tcx));
                     },
                     {
-                        time(sess, "unused lib feature checking", || {
+                        sess.time("unused lib feature checking", || {
                             rustc_passes::stability::check_unused_or_stable_features(tcx)
                         });
                     },
                     {
-                        time(sess, "lint checking", || {
+                        sess.time("lint checking", || {
                             rustc_lint::check_crate(tcx, || {
                                 rustc_lint::BuiltinCombinedLateLintPass::new()
                             });
@@ -892,7 +886,7 @@ fn analysis(tcx: TyCtxt<'_>, cnum: CrateNum) -> Result<()> {
                 );
             },
             {
-                time(sess, "privacy checking modules", || {
+                sess.time("privacy checking modules", || {
                     par_iter(&tcx.hir().krate().modules).for_each(|(&module, _)| {
                         tcx.ensure().check_mod_privacy(tcx.hir().local_def_id(module));
                     });
@@ -978,10 +972,9 @@ pub fn start_codegen<'tcx>(
     }
 
     let (metadata, need_metadata_module) =
-        time(tcx.sess, "metadata encoding and writing", || encode_and_write_metadata(tcx, outputs));
+        tcx.sess.time("metadata encoding and writing", || encode_and_write_metadata(tcx, outputs));
 
-    let codegen = time(tcx.sess, "codegen", move || {
-        let _prof_timer = tcx.prof.generic_activity("codegen_crate");
+    let codegen = tcx.sess.time("codegen", move || {
         codegen_backend.codegen_crate(tcx, metadata, need_metadata_module)
     });
 
diff --git a/src/librustc_interface/queries.rs b/src/librustc_interface/queries.rs
index 4318f99e690..c0a75e7efbf 100644
--- a/src/librustc_interface/queries.rs
+++ b/src/librustc_interface/queries.rs
@@ -11,7 +11,7 @@ use rustc::session::config::{OutputFilenames, OutputType};
 use rustc::session::Session;
 use rustc::ty::steal::Steal;
 use rustc::ty::{AllArenas, GlobalCtxt, ResolverOutputs};
-use rustc::util::common::{time, ErrorReported};
+use rustc::util::common::ErrorReported;
 use rustc_codegen_utils::codegen_backend::CodegenBackend;
 use rustc_data_structures::sync::{Lrc, Once, WorkerLocal};
 use rustc_incremental::DepGraphFuture;
@@ -195,7 +195,7 @@ impl<'tcx> Queries<'tcx> {
                 None => DepGraph::new_disabled(),
                 Some(future) => {
                     let (prev_graph, prev_work_products) =
-                        time(self.session(), "blocked while dep-graph loading finishes", || {
+                        self.session().time("blocked while dep-graph loading finishes", || {
                             future
                                 .open()
                                 .unwrap_or_else(|e| rustc_incremental::LoadResult::Error {
diff --git a/src/librustc_lint/early.rs b/src/librustc_lint/early.rs
index 482a7822b76..a0995685459 100644
--- a/src/librustc_lint/early.rs
+++ b/src/librustc_lint/early.rs
@@ -18,7 +18,6 @@ use rustc::lint::{EarlyContext, LintStore};
 use rustc::lint::{EarlyLintPass, EarlyLintPassObject};
 use rustc::lint::{LintBuffer, LintContext, LintPass};
 use rustc::session::Session;
-use rustc::util::common::time;
 
 use rustc_span::Span;
 use std::slice;
@@ -351,7 +350,7 @@ pub fn check_ast_crate<T: EarlyLintPass>(
         }
     } else {
         for pass in &mut passes {
-            buffered = time(sess, &format!("running lint: {}", pass.name()), || {
+            buffered = sess.time(&format!("running lint: {}", pass.name()), || {
                 early_lint_crate(
                     sess,
                     lint_store,
diff --git a/src/librustc_lint/late.rs b/src/librustc_lint/late.rs
index 1248cfd3bc2..e66de9cc1bb 100644
--- a/src/librustc_lint/late.rs
+++ b/src/librustc_lint/late.rs
@@ -22,7 +22,6 @@ use rustc::lint::LateContext;
 use rustc::lint::LintPass;
 use rustc::lint::{LateLintPass, LateLintPassObject};
 use rustc::ty::{self, TyCtxt};
-use rustc::util::common::time;
 
 use rustc_data_structures::sync::{join, par_iter, ParallelIterator};
 use rustc_span::Span;
@@ -433,7 +432,7 @@ fn late_lint_crate<'tcx, T: for<'a> LateLintPass<'a, 'tcx>>(tcx: TyCtxt<'tcx>, b
         late_lint_pass_crate(tcx, builtin_lints);
     } else {
         for pass in &mut passes {
-            time(tcx.sess, &format!("running late lint: {}", pass.name()), || {
+            tcx.sess.time(&format!("running late lint: {}", pass.name()), || {
                 late_lint_pass_crate(tcx, LateLintPassObjects { lints: slice::from_mut(pass) });
             });
         }
@@ -442,7 +441,7 @@ fn late_lint_crate<'tcx, T: for<'a> LateLintPass<'a, 'tcx>>(tcx: TyCtxt<'tcx>, b
             tcx.lint_store.late_module_passes.iter().map(|pass| (pass)()).collect();
 
         for pass in &mut passes {
-            time(tcx.sess, &format!("running late module lint: {}", pass.name()), || {
+            tcx.sess.time(&format!("running late module lint: {}", pass.name()), || {
                 late_lint_pass_crate(tcx, LateLintPassObjects { lints: slice::from_mut(pass) });
             });
         }
@@ -456,13 +455,13 @@ pub fn check_crate<'tcx, T: for<'a> LateLintPass<'a, 'tcx>>(
 ) {
     join(
         || {
-            time(tcx.sess, "crate lints", || {
+            tcx.sess.time("crate lints", || {
                 // Run whole crate non-incremental lints
                 late_lint_crate(tcx, builtin_lints());
             });
         },
         || {
-            time(tcx.sess, "module lints", || {
+            tcx.sess.time("module lints", || {
                 // Run per-module lints
                 par_iter(&tcx.hir().krate().modules).for_each(|(&module, _)| {
                     tcx.ensure().lint_mod(tcx.hir().local_def_id(module));
diff --git a/src/librustc_mir/monomorphize/collector.rs b/src/librustc_mir/monomorphize/collector.rs
index 43d57e0e85f..231ee792918 100644
--- a/src/librustc_mir/monomorphize/collector.rs
+++ b/src/librustc_mir/monomorphize/collector.rs
@@ -191,7 +191,6 @@ use rustc::ty::adjustment::{CustomCoerceUnsized, PointerCast};
 use rustc::ty::print::obsolete::DefPathBasedNames;
 use rustc::ty::subst::{InternalSubsts, Subst, SubstsRef};
 use rustc::ty::{self, GenericParamDefKind, Instance, Ty, TyCtxt, TypeFoldable};
-use rustc::util::common::time;
 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
 use rustc_data_structures::sync::{par_iter, MTLock, MTRef, ParallelIterator};
 use rustc_index::bit_set::GrowableBitSet;
@@ -284,7 +283,7 @@ pub fn collect_crate_mono_items(
 ) -> (FxHashSet<MonoItem<'_>>, InliningMap<'_>) {
     let _prof_timer = tcx.prof.generic_activity("monomorphization_collector");
 
-    let roots = time(tcx.sess, "collecting roots", || {
+    let roots = tcx.sess.time("collecting roots", || {
         let _prof_timer = tcx.prof.generic_activity("monomorphization_collector_root_collections");
         collect_roots(tcx, mode)
     });
@@ -295,12 +294,10 @@ pub fn collect_crate_mono_items(
     let mut inlining_map = MTLock::new(InliningMap::new());
 
     {
-        let _prof_timer = tcx.prof.generic_activity("monomorphization_collector_graph_walk");
-
         let visited: MTRef<'_, _> = &mut visited;
         let inlining_map: MTRef<'_, _> = &mut inlining_map;
 
-        time(tcx.sess, "collecting mono items", || {
+        tcx.sess.time("collecting mono items", || {
             par_iter(roots).for_each(|root| {
                 let mut recursion_depths = DefIdMap::default();
                 collect_items_rec(tcx, root, visited, &mut recursion_depths, inlining_map);
diff --git a/src/librustc_mir/monomorphize/partitioning.rs b/src/librustc_mir/monomorphize/partitioning.rs
index 9c8bcfad6da..766d6bbe517 100644
--- a/src/librustc_mir/monomorphize/partitioning.rs
+++ b/src/librustc_mir/monomorphize/partitioning.rs
@@ -105,7 +105,6 @@ use rustc::mir::mono::{InstantiationMode, MonoItem};
 use rustc::ty::print::characteristic_def_id_of_type;
 use rustc::ty::query::Providers;
 use rustc::ty::{self, DefIdTree, InstanceDef, TyCtxt};
-use rustc::util::common::time;
 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
 use rustc_span::symbol::Symbol;
 
@@ -866,7 +865,7 @@ fn collect_and_partition_mono_items(
         }
     };
 
-    let (items, inlining_map) = time(tcx.sess, "monomorphization collection", || {
+    let (items, inlining_map) = tcx.sess.time("monomorphization collection", || {
         collector::collect_crate_mono_items(tcx, collection_mode)
     });
 
@@ -880,7 +879,7 @@ fn collect_and_partition_mono_items(
         PartitioningStrategy::FixedUnitCount(tcx.sess.codegen_units())
     };
 
-    let codegen_units = time(tcx.sess, "codegen unit partitioning", || {
+    let codegen_units = tcx.sess.time("codegen unit partitioning", || {
         partition(tcx, items.iter().cloned(), strategy, &inlining_map)
             .into_iter()
             .map(Arc::new)
diff --git a/src/librustc_session/session.rs b/src/librustc_session/session.rs
index 15911db46fe..dba5b9f3f14 100644
--- a/src/librustc_session/session.rs
+++ b/src/librustc_session/session.rs
@@ -9,7 +9,7 @@ use crate::config::{self, OutputType, PrintRequest, Sanitizer, SwitchWithOptPath
 use crate::filesearch;
 use crate::lint;
 use crate::search_paths::{PathKind, SearchPath};
-use crate::utils::duration_to_secs_str;
+use rustc_data_structures::profiling::duration_to_secs_str;
 use rustc_errors::ErrorReported;
 
 use rustc_data_structures::base_n;
@@ -398,9 +398,6 @@ impl Session {
     pub fn time_passes(&self) -> bool {
         self.opts.debugging_opts.time_passes || self.opts.debugging_opts.time
     }
-    pub fn time_extended(&self) -> bool {
-        self.opts.debugging_opts.time_passes
-    }
     pub fn instrument_mcount(&self) -> bool {
         self.opts.debugging_opts.instrument_mcount
     }
@@ -1030,6 +1027,12 @@ fn build_session_(
         CguReuseTracker::new_disabled()
     };
 
+    let prof = SelfProfilerRef::new(
+        self_profiler,
+        sopts.debugging_opts.time_passes || sopts.debugging_opts.time,
+        sopts.debugging_opts.time_passes,
+    );
+
     let sess = Session {
         target: target_cfg,
         host,
@@ -1049,7 +1052,7 @@ fn build_session_(
         imported_macro_spans: OneThread::new(RefCell::new(FxHashMap::default())),
         incr_comp_session: OneThread::new(RefCell::new(IncrCompSession::NotInitialized)),
         cgu_reuse_tracker,
-        prof: SelfProfilerRef::new(self_profiler),
+        prof,
         perf_stats: PerfStats {
             symbol_hash_time: Lock::new(Duration::from_secs(0)),
             decode_def_path_tables_time: Lock::new(Duration::from_secs(0)),
diff --git a/src/librustc_session/utils.rs b/src/librustc_session/utils.rs
index 298727d07e4..7806f5e8753 100644
--- a/src/librustc_session/utils.rs
+++ b/src/librustc_session/utils.rs
@@ -1,10 +1,13 @@
-// Hack up our own formatting for the duration to make it easier for scripts
-// to parse (always use the same number of decimal places and the same unit).
-pub fn duration_to_secs_str(dur: std::time::Duration) -> String {
-    const NANOS_PER_SEC: f64 = 1_000_000_000.0;
-    let secs = dur.as_secs() as f64 + dur.subsec_nanos() as f64 / NANOS_PER_SEC;
+use crate::session::Session;
+use rustc_data_structures::profiling::VerboseTimingGuard;
 
-    format!("{:.3}", secs)
+impl Session {
+    pub fn timer<'a>(&'a self, what: &'a str) -> VerboseTimingGuard<'a> {
+        self.prof.sparse_pass(what)
+    }
+    pub fn time<R>(&self, what: &str, f: impl FnOnce() -> R) -> R {
+        self.prof.sparse_pass(what).run(f)
+    }
 }
 
 #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, RustcEncodable, RustcDecodable)]
diff --git a/src/librustc_typeck/coherence/mod.rs b/src/librustc_typeck/coherence/mod.rs
index cba0436774d..2d19b73eac3 100644
--- a/src/librustc_typeck/coherence/mod.rs
+++ b/src/librustc_typeck/coherence/mod.rs
@@ -10,7 +10,6 @@ use crate::hir::HirId;
 use rustc::traits;
 use rustc::ty::query::Providers;
 use rustc::ty::{self, TyCtxt, TypeFoldable};
-use rustc::util::common::time;
 
 use rustc_error_codes::*;
 
@@ -146,8 +145,8 @@ pub fn check_coherence(tcx: TyCtxt<'_>) {
         tcx.ensure().coherent_trait(trait_def_id);
     }
 
-    time(tcx.sess, "unsafety checking", || unsafety::check(tcx));
-    time(tcx.sess, "orphan checking", || orphan::check(tcx));
+    tcx.sess.time("unsafety checking", || unsafety::check(tcx));
+    tcx.sess.time("orphan checking", || orphan::check(tcx));
 
     // these queries are executed for side-effects (error reporting):
     tcx.ensure().crate_inherent_impls(LOCAL_CRATE);
diff --git a/src/librustc_typeck/lib.rs b/src/librustc_typeck/lib.rs
index 289a4dc3ecb..dbf4c7655c1 100644
--- a/src/librustc_typeck/lib.rs
+++ b/src/librustc_typeck/lib.rs
@@ -108,7 +108,6 @@ use rustc::util;
 use rustc::util::common::ErrorReported;
 use rustc_span::{Span, DUMMY_SP};
 use rustc_target::spec::abi::Abi;
-use util::common::time;
 
 use rustc_error_codes::*;
 
@@ -304,13 +303,13 @@ pub fn provide(providers: &mut Providers<'_>) {
 }
 
 pub fn check_crate(tcx: TyCtxt<'_>) -> Result<(), ErrorReported> {
-    let _prof_timer = tcx.prof.generic_activity("type_check_crate");
+    let _prof_timer = tcx.sess.timer("type_check_crate");
 
     // this ensures that later parts of type checking can assume that items
     // have valid types and not error
     // FIXME(matthewjasper) We shouldn't need to do this.
     tcx.sess.track_errors(|| {
-        time(tcx.sess, "type collecting", || {
+        tcx.sess.time("type collecting", || {
             for &module in tcx.hir().krate().modules.keys() {
                 tcx.ensure().collect_mod_item_types(tcx.hir().local_def_id(module));
             }
@@ -319,35 +318,35 @@ pub fn check_crate(tcx: TyCtxt<'_>) -> Result<(), ErrorReported> {
 
     if tcx.features().rustc_attrs {
         tcx.sess.track_errors(|| {
-            time(tcx.sess, "outlives testing", || outlives::test::test_inferred_outlives(tcx));
+            tcx.sess.time("outlives testing", || outlives::test::test_inferred_outlives(tcx));
         })?;
     }
 
     tcx.sess.track_errors(|| {
-        time(tcx.sess, "impl wf inference", || impl_wf_check::impl_wf_check(tcx));
+        tcx.sess.time("impl wf inference", || impl_wf_check::impl_wf_check(tcx));
     })?;
 
     tcx.sess.track_errors(|| {
-        time(tcx.sess, "coherence checking", || coherence::check_coherence(tcx));
+        tcx.sess.time("coherence checking", || coherence::check_coherence(tcx));
     })?;
 
     if tcx.features().rustc_attrs {
         tcx.sess.track_errors(|| {
-            time(tcx.sess, "variance testing", || variance::test::test_variance(tcx));
+            tcx.sess.time("variance testing", || variance::test::test_variance(tcx));
         })?;
     }
 
     tcx.sess.track_errors(|| {
-        time(tcx.sess, "wf checking", || check::check_wf_new(tcx));
+        tcx.sess.time("wf checking", || check::check_wf_new(tcx));
     })?;
 
-    time(tcx.sess, "item-types checking", || {
+    tcx.sess.time("item-types checking", || {
         for &module in tcx.hir().krate().modules.keys() {
             tcx.ensure().check_mod_item_types(tcx.hir().local_def_id(module));
         }
     });
 
-    time(tcx.sess, "item-bodies checking", || tcx.typeck_item_bodies(LOCAL_CRATE));
+    tcx.sess.time("item-bodies checking", || tcx.typeck_item_bodies(LOCAL_CRATE));
 
     check_unused::check_crate(tcx);
     check_for_entry_fn(tcx);