about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--src/librustc/session/config.rs20
-rw-r--r--src/librustc_llvm/build.rs1
-rw-r--r--src/librustc_llvm/ffi.rs56
-rw-r--r--src/librustc_trans/back/link.rs43
-rw-r--r--src/librustc_trans/back/lto.rs465
-rw-r--r--src/librustc_trans/back/write.rs88
-rw-r--r--src/librustc_trans/base.rs15
-rw-r--r--src/librustc_trans/partitioning.rs3
-rw-r--r--src/librustc_trans/time_graph.rs152
-rw-r--r--src/rustllvm/PassWrapper.cpp461
-rw-r--r--src/test/codegen-units/item-collection/drop_in_place_intrinsic.rs6
-rw-r--r--src/test/codegen-units/item-collection/generic-drop-glue.rs12
-rw-r--r--src/test/codegen-units/item-collection/instantiation-through-vtable.rs4
-rw-r--r--src/test/codegen-units/item-collection/non-generic-drop-glue.rs4
-rw-r--r--src/test/codegen-units/item-collection/transitive-drop-glue.rs18
-rw-r--r--src/test/codegen-units/item-collection/tuple-drop-glue.rs8
-rw-r--r--src/test/codegen-units/item-collection/unsizing.rs8
-rw-r--r--src/test/run-make/extra-filename-with-temp-outputs/Makefile2
-rw-r--r--src/test/run-make/sepcomp-cci-copies/Makefile2
-rw-r--r--src/test/run-make/sepcomp-inlining/Makefile8
-rw-r--r--src/test/run-make/sepcomp-separate/Makefile2
-rw-r--r--src/test/run-pass/auxiliary/thin-lto-inlines-aux.rs17
-rw-r--r--src/test/run-pass/thin-lto-inlines.rs39
-rw-r--r--src/test/run-pass/thin-lto-inlines2.rs38
24 files changed, 1289 insertions, 183 deletions
diff --git a/src/librustc/session/config.rs b/src/librustc/session/config.rs
index 7c1d457a6ee..6ef64ef3d96 100644
--- a/src/librustc/session/config.rs
+++ b/src/librustc/session/config.rs
@@ -409,9 +409,7 @@ impl_stable_hash_for!(struct self::OutputFilenames {
     outputs
 });
 
-/// Codegen unit names generated by the numbered naming scheme will contain this
-/// marker right before the index of the codegen unit.
-pub const NUMBERED_CODEGEN_UNIT_MARKER: &'static str = ".cgu-";
+pub const RUST_CGU_EXT: &str = "rust-cgu";
 
 impl OutputFilenames {
     pub fn path(&self, flavor: OutputType) -> PathBuf {
@@ -442,22 +440,14 @@ impl OutputFilenames {
         let mut extension = String::new();
 
         if let Some(codegen_unit_name) = codegen_unit_name {
-            if codegen_unit_name.contains(NUMBERED_CODEGEN_UNIT_MARKER) {
-                // If we use the numbered naming scheme for modules, we don't want
-                // the files to look like <crate-name><extra>.<crate-name>.<index>.<ext>
-                // but simply <crate-name><extra>.<index>.<ext>
-                let marker_offset = codegen_unit_name.rfind(NUMBERED_CODEGEN_UNIT_MARKER)
-                                                     .unwrap();
-                let index_offset = marker_offset + NUMBERED_CODEGEN_UNIT_MARKER.len();
-                extension.push_str(&codegen_unit_name[index_offset .. ]);
-            } else {
-                extension.push_str(codegen_unit_name);
-            };
+            extension.push_str(codegen_unit_name);
         }
 
         if !ext.is_empty() {
             if !extension.is_empty() {
                 extension.push_str(".");
+                extension.push_str(RUST_CGU_EXT);
+                extension.push_str(".");
             }
 
             extension.push_str(ext);
@@ -1105,6 +1095,8 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options,
                  "run the non-lexical lifetimes MIR pass"),
     trans_time_graph: bool = (false, parse_bool, [UNTRACKED],
         "generate a graphical HTML report of time spent in trans and LLVM"),
+    thinlto: bool = (false, parse_bool, [TRACKED],
+        "enable ThinLTO when possible"),
 }
 
 pub fn default_lib_output() -> CrateType {
diff --git a/src/librustc_llvm/build.rs b/src/librustc_llvm/build.rs
index dde7a38efc7..676bbc87b6c 100644
--- a/src/librustc_llvm/build.rs
+++ b/src/librustc_llvm/build.rs
@@ -115,6 +115,7 @@ fn main() {
                                 "linker",
                                 "asmparser",
                                 "mcjit",
+                                "lto",
                                 "interpreter",
                                 "instrumentation"];
 
diff --git a/src/librustc_llvm/ffi.rs b/src/librustc_llvm/ffi.rs
index fd4a136f50b..3399bf2acd8 100644
--- a/src/librustc_llvm/ffi.rs
+++ b/src/librustc_llvm/ffi.rs
@@ -345,6 +345,20 @@ pub enum PassKind {
     Module,
 }
 
+/// LLVMRustThinLTOData
+pub enum ThinLTOData {}
+
+/// LLVMRustThinLTOBuffer
+pub enum ThinLTOBuffer {}
+
+/// LLVMRustThinLTOModule
+#[repr(C)]
+pub struct ThinLTOModule {
+    pub identifier: *const c_char,
+    pub data: *const u8,
+    pub len: usize,
+}
+
 // Opaque pointer types
 #[allow(missing_copy_implementations)]
 pub enum Module_opaque {}
@@ -1271,6 +1285,9 @@ extern "C" {
                                                         PM: PassManagerRef,
                                                         Internalize: Bool,
                                                         RunInliner: Bool);
+    pub fn LLVMRustPassManagerBuilderPopulateThinLTOPassManager(
+        PMB: PassManagerBuilderRef,
+        PM: PassManagerRef) -> bool;
 
     // Stuff that's in rustllvm/ because it's not upstream yet.
 
@@ -1685,4 +1702,43 @@ extern "C" {
     pub fn LLVMRustModuleBufferLen(p: *const ModuleBuffer) -> usize;
     pub fn LLVMRustModuleBufferFree(p: *mut ModuleBuffer);
     pub fn LLVMRustModuleCost(M: ModuleRef) -> u64;
+
+    pub fn LLVMRustThinLTOAvailable() -> bool;
+    pub fn LLVMRustWriteThinBitcodeToFile(PMR: PassManagerRef,
+                                          M: ModuleRef,
+                                          BC: *const c_char) -> bool;
+    pub fn LLVMRustThinLTOBufferCreate(M: ModuleRef) -> *mut ThinLTOBuffer;
+    pub fn LLVMRustThinLTOBufferFree(M: *mut ThinLTOBuffer);
+    pub fn LLVMRustThinLTOBufferPtr(M: *const ThinLTOBuffer) -> *const c_char;
+    pub fn LLVMRustThinLTOBufferLen(M: *const ThinLTOBuffer) -> size_t;
+    pub fn LLVMRustCreateThinLTOData(
+        Modules: *const ThinLTOModule,
+        NumModules: c_uint,
+        PreservedSymbols: *const *const c_char,
+        PreservedSymbolsLen: c_uint,
+    ) -> *mut ThinLTOData;
+    pub fn LLVMRustPrepareThinLTORename(
+        Data: *const ThinLTOData,
+        Module: ModuleRef,
+    ) -> bool;
+    pub fn LLVMRustPrepareThinLTOResolveWeak(
+        Data: *const ThinLTOData,
+        Module: ModuleRef,
+    ) -> bool;
+    pub fn LLVMRustPrepareThinLTOInternalize(
+        Data: *const ThinLTOData,
+        Module: ModuleRef,
+    ) -> bool;
+    pub fn LLVMRustPrepareThinLTOImport(
+        Data: *const ThinLTOData,
+        Module: ModuleRef,
+    ) -> bool;
+    pub fn LLVMRustFreeThinLTOData(Data: *mut ThinLTOData);
+    pub fn LLVMRustParseBitcodeForThinLTO(
+        Context: ContextRef,
+        Data: *const u8,
+        len: usize,
+        Identifier: *const c_char,
+    ) -> ModuleRef;
+    pub fn LLVMGetModuleIdentifier(M: ModuleRef, size: *mut usize) -> *const c_char;
 }
diff --git a/src/librustc_trans/back/link.rs b/src/librustc_trans/back/link.rs
index 3badc1b9a69..3f25c182fa2 100644
--- a/src/librustc_trans/back/link.rs
+++ b/src/librustc_trans/back/link.rs
@@ -16,6 +16,7 @@ use super::rpath::RPathConfig;
 use super::rpath;
 use metadata::METADATA_FILENAME;
 use rustc::session::config::{self, NoDebugInfo, OutputFilenames, OutputType, PrintRequest};
+use rustc::session::config::RUST_CGU_EXT;
 use rustc::session::filesearch;
 use rustc::session::search_paths::PathKind;
 use rustc::session::Session;
@@ -45,13 +46,9 @@ use syntax::attr;
 /// The LLVM module name containing crate-metadata. This includes a `.` on
 /// purpose, so it cannot clash with the name of a user-defined module.
 pub const METADATA_MODULE_NAME: &'static str = "crate.metadata";
-/// The name of the crate-metadata object file the compiler generates. Must
-/// match up with `METADATA_MODULE_NAME`.
-pub const METADATA_OBJ_NAME: &'static str = "crate.metadata.o";
 
 // same as for metadata above, but for allocator shim
 pub const ALLOCATOR_MODULE_NAME: &'static str = "crate.allocator";
-pub const ALLOCATOR_OBJ_NAME: &'static str = "crate.allocator.o";
 
 pub use rustc_trans_utils::link::{find_crate_name, filename_for_input, default_output_for_target,
                                   invalid_output_for_target, build_link_meta, out_filename,
@@ -129,6 +126,14 @@ fn command_path(sess: &Session) -> OsString {
     env::join_paths(new_path).unwrap()
 }
 
+fn metadata_obj(outputs: &OutputFilenames) -> PathBuf {
+    outputs.temp_path(OutputType::Object, Some(METADATA_MODULE_NAME))
+}
+
+fn allocator_obj(outputs: &OutputFilenames) -> PathBuf {
+    outputs.temp_path(OutputType::Object, Some(ALLOCATOR_MODULE_NAME))
+}
+
 pub fn remove(sess: &Session, path: &Path) {
     match fs::remove_file(path) {
         Ok(..) => {}
@@ -174,9 +179,9 @@ pub fn link_binary(sess: &Session,
                 remove(sess, &obj.object);
             }
         }
-        remove(sess, &outputs.with_extension(METADATA_OBJ_NAME));
+        remove(sess, &metadata_obj(outputs));
         if trans.allocator_module.is_some() {
-            remove(sess, &outputs.with_extension(ALLOCATOR_OBJ_NAME));
+            remove(sess, &allocator_obj(outputs));
         }
     }
 
@@ -478,7 +483,7 @@ fn link_rlib<'a>(sess: &'a Session,
 
         RlibFlavor::StaticlibBase => {
             if trans.allocator_module.is_some() {
-                ab.add_file(&outputs.with_extension(ALLOCATOR_OBJ_NAME));
+                ab.add_file(&allocator_obj(outputs));
             }
         }
     }
@@ -908,11 +913,11 @@ fn link_args(cmd: &mut Linker,
     // object file, so we link that in here.
     if crate_type == config::CrateTypeDylib ||
        crate_type == config::CrateTypeProcMacro {
-        cmd.add_object(&outputs.with_extension(METADATA_OBJ_NAME));
+        cmd.add_object(&metadata_obj(outputs));
     }
 
     if trans.allocator_module.is_some() {
-        cmd.add_object(&outputs.with_extension(ALLOCATOR_OBJ_NAME));
+        cmd.add_object(&allocator_obj(outputs));
     }
 
     // Try to strip as much out of the generated object by removing unused
@@ -1265,11 +1270,23 @@ fn add_upstream_rust_crates(cmd: &mut Linker,
                 let canonical = f.replace("-", "_");
                 let canonical_name = name.replace("-", "_");
 
+                // Look for `.rust-cgu.o` at the end of the filename to conclude
+                // that this is a Rust-related object file.
+                fn looks_like_rust(s: &str) -> bool {
+                    let path = Path::new(s);
+                    let ext = path.extension().and_then(|s| s.to_str());
+                    if ext != Some(OutputType::Object.extension()) {
+                        return false
+                    }
+                    let ext2 = path.file_stem()
+                        .and_then(|s| Path::new(s).extension())
+                        .and_then(|s| s.to_str());
+                    ext2 == Some(RUST_CGU_EXT)
+                }
+
                 let is_rust_object =
-                    canonical.starts_with(&canonical_name) && {
-                        let num = &f[name.len()..f.len() - 2];
-                        num.len() > 0 && num[1..].parse::<u32>().is_ok()
-                    };
+                    canonical.starts_with(&canonical_name) &&
+                    looks_like_rust(&f);
 
                 // If we've been requested to skip all native object files
                 // (those not generated by the rust compiler) then we can skip
diff --git a/src/librustc_trans/back/lto.rs b/src/librustc_trans/back/lto.rs
index 8651b95b12a..8f75b891a30 100644
--- a/src/librustc_trans/back/lto.rs
+++ b/src/librustc_trans/back/lto.rs
@@ -9,23 +9,25 @@
 // except according to those terms.
 
 use back::bytecode::{DecodedBytecode, RLIB_BYTECODE_EXTENSION};
-use back::write;
 use back::symbol_export;
-use rustc::session::config;
+use back::write::{ModuleConfig, with_llvm_pmb, CodegenContext};
+use back::write;
 use errors::{FatalError, Handler};
-use llvm;
 use llvm::archive_ro::ArchiveRO;
 use llvm::{ModuleRef, TargetMachineRef, True, False};
+use llvm;
+use rustc::hir::def_id::LOCAL_CRATE;
 use rustc::middle::exported_symbols::SymbolExportLevel;
+use rustc::session::config;
 use rustc::util::common::time;
-use rustc::hir::def_id::LOCAL_CRATE;
-use back::write::{ModuleConfig, with_llvm_pmb, CodegenContext};
-use {ModuleTranslation, ModuleKind};
+use time_graph::Timeline;
+use {ModuleTranslation, ModuleLlvm, ModuleKind, ModuleSource};
 
 use libc;
 
 use std::ffi::CString;
 use std::slice;
+use std::sync::Arc;
 
 pub fn crate_type_allows_lto(crate_type: config::CrateType) -> bool {
     match crate_type {
@@ -45,14 +47,14 @@ pub enum LtoModuleTranslation {
         _serialized_bitcode: Vec<SerializedModule>,
     },
 
-    // Note the lack of other entries in this enum! Ideally one day this gap is
-    // intended to be filled with a "Thin" LTO variant.
+    Thin(ThinModule),
 }
 
 impl LtoModuleTranslation {
     pub fn name(&self) -> &str {
         match *self {
             LtoModuleTranslation::Fat { .. } => "everything",
+            LtoModuleTranslation::Thin(ref m) => m.name(),
         }
     }
 
@@ -62,7 +64,9 @@ impl LtoModuleTranslation {
     /// points to LLVM data structures owned by this `LtoModuleTranslation`.
     /// It's intended that the module returned is immediately code generated and
     /// dropped, and then this LTO module is dropped.
-    pub unsafe fn optimize(&mut self, cgcx: &CodegenContext)
+    pub unsafe fn optimize(&mut self,
+                           cgcx: &CodegenContext,
+                           timeline: &mut Timeline)
         -> Result<ModuleTranslation, FatalError>
     {
         match *self {
@@ -71,9 +75,11 @@ impl LtoModuleTranslation {
                 let config = cgcx.config(trans.kind);
                 let llmod = trans.llvm().unwrap().llmod;
                 let tm = trans.llvm().unwrap().tm;
-                run_pass_manager(cgcx, tm, llmod, config);
+                run_pass_manager(cgcx, tm, llmod, config, false);
+                timeline.record("fat-done");
                 Ok(trans)
             }
+            LtoModuleTranslation::Thin(ref mut thin) => thin.optimize(cgcx, timeline),
         }
     }
 
@@ -83,33 +89,31 @@ impl LtoModuleTranslation {
         match *self {
             // Only one module with fat LTO, so the cost doesn't matter.
             LtoModuleTranslation::Fat { .. } => 0,
+            LtoModuleTranslation::Thin(ref m) => m.cost(),
         }
     }
 }
 
-pub fn run(cgcx: &CodegenContext, modules: Vec<ModuleTranslation>)
+pub enum LTOMode {
+    WholeCrateGraph,
+    JustThisCrate,
+}
+
+pub fn run(cgcx: &CodegenContext,
+           modules: Vec<ModuleTranslation>,
+           mode: LTOMode,
+           timeline: &mut Timeline)
     -> Result<Vec<LtoModuleTranslation>, FatalError>
 {
     let diag_handler = cgcx.create_diag_handler();
-    if cgcx.opts.cg.prefer_dynamic {
-        diag_handler.struct_err("cannot prefer dynamic linking when performing LTO")
-                    .note("only 'staticlib', 'bin', and 'cdylib' outputs are \
-                           supported with LTO")
-                    .emit();
-        return Err(FatalError)
-    }
-
-    // Make sure we actually can run LTO
-    for crate_type in cgcx.crate_types.iter() {
-        if !crate_type_allows_lto(*crate_type) {
-            let e = diag_handler.fatal("lto can only be run for executables, cdylibs and \
-                                        static library outputs");
-            return Err(e)
+    let export_threshold = match mode {
+        LTOMode::WholeCrateGraph => {
+            symbol_export::crates_export_threshold(&cgcx.crate_types)
         }
-    }
-
-    let export_threshold =
-        symbol_export::crates_export_threshold(&cgcx.crate_types);
+        LTOMode::JustThisCrate => {
+            SymbolExportLevel::Rust
+        }
+    };
 
     let symbol_filter = &|&(ref name, _, level): &(String, _, SymbolExportLevel)| {
         if level.is_below_threshold(export_threshold) {
@@ -121,55 +125,81 @@ pub fn run(cgcx: &CodegenContext, modules: Vec<ModuleTranslation>)
         }
     };
 
-    let mut symbol_white_list: Vec<CString> = cgcx.exported_symbols[&LOCAL_CRATE]
+    let mut symbol_white_list = cgcx.exported_symbols[&LOCAL_CRATE]
         .iter()
         .filter_map(symbol_filter)
-        .collect();
-    info!("{} symbols in whitelist", symbol_white_list.len());
+        .collect::<Vec<CString>>();
+    timeline.record("whitelist");
 
-    // For each of our upstream dependencies, find the corresponding rlib and
-    // load the bitcode from the archive. Then merge it into the current LLVM
-    // module that we've got.
+    // If we're performing LTO for the entire crate graph, then for each of our
+    // upstream dependencies, find the corresponding rlib and load the bitcode
+    // from the archive.
+    //
+    // We save off all the bytecode and LLVM module ids for later processing
+    // with either fat or thin LTO
     let mut upstream_modules = Vec::new();
-    for &(cnum, ref path) in cgcx.each_linked_rlib_for_lto.iter() {
-        symbol_white_list.extend(
-            cgcx.exported_symbols[&cnum]
-                .iter()
-                .filter_map(symbol_filter));
-        info!("{} symbols in whitelist after {}", symbol_white_list.len(), cnum);
-
-        let archive = ArchiveRO::open(&path).expect("wanted an rlib");
-        let bytecodes = archive.iter().filter_map(|child| {
-            child.ok().and_then(|c| c.name().map(|name| (name, c)))
-        }).filter(|&(name, _)| name.ends_with(RLIB_BYTECODE_EXTENSION));
-        for (name, data) in bytecodes {
-            info!("adding bytecode {}", name);
-            let bc_encoded = data.data();
-
-            let (bc, id) = time(cgcx.time_passes, &format!("decode {}", name), || {
-                match DecodedBytecode::new(bc_encoded) {
-                    Ok(b) => Ok((b.bytecode(), b.identifier().to_string())),
-                    Err(e) => Err(diag_handler.fatal(&e)),
-                }
-            })?;
-            let bc = SerializedModule::FromRlib(bc);
-            upstream_modules.push((bc, CString::new(id).unwrap()));
+    if let LTOMode::WholeCrateGraph = mode {
+        if cgcx.opts.cg.prefer_dynamic {
+            diag_handler.struct_err("cannot prefer dynamic linking when performing LTO")
+                        .note("only 'staticlib', 'bin', and 'cdylib' outputs are \
+                               supported with LTO")
+                        .emit();
+            return Err(FatalError)
+        }
+
+        // Make sure we actually can run LTO
+        for crate_type in cgcx.crate_types.iter() {
+            if !crate_type_allows_lto(*crate_type) {
+                let e = diag_handler.fatal("lto can only be run for executables, cdylibs and \
+                                            static library outputs");
+                return Err(e)
+            }
         }
-    }
 
-    // Internalize everything but the exported symbols of the current module
-    let arr: Vec<*const libc::c_char> = symbol_white_list.iter()
-                                                         .map(|c| c.as_ptr())
-                                                         .collect();
+        for &(cnum, ref path) in cgcx.each_linked_rlib_for_lto.iter() {
+            symbol_white_list.extend(
+                cgcx.exported_symbols[&cnum]
+                    .iter()
+                    .filter_map(symbol_filter));
+
+            let archive = ArchiveRO::open(&path).expect("wanted an rlib");
+            let bytecodes = archive.iter().filter_map(|child| {
+                child.ok().and_then(|c| c.name().map(|name| (name, c)))
+            }).filter(|&(name, _)| name.ends_with(RLIB_BYTECODE_EXTENSION));
+            for (name, data) in bytecodes {
+                info!("adding bytecode {}", name);
+                let bc_encoded = data.data();
+
+                let (bc, id) = time(cgcx.time_passes, &format!("decode {}", name), || {
+                    match DecodedBytecode::new(bc_encoded) {
+                        Ok(b) => Ok((b.bytecode(), b.identifier().to_string())),
+                        Err(e) => Err(diag_handler.fatal(&e)),
+                    }
+                })?;
+                let bc = SerializedModule::FromRlib(bc);
+                upstream_modules.push((bc, CString::new(id).unwrap()));
+            }
+            timeline.record(&format!("load: {}", path.display()));
+        }
+    }
 
-    fat_lto(cgcx, &diag_handler, modules, upstream_modules, &arr)
+    let arr = symbol_white_list.iter().map(|c| c.as_ptr()).collect::<Vec<_>>();
+    match mode {
+        LTOMode::WholeCrateGraph if !cgcx.thinlto => {
+            fat_lto(cgcx, &diag_handler, modules, upstream_modules, &arr, timeline)
+        }
+        _ => {
+            thin_lto(&diag_handler, modules, upstream_modules, &arr, timeline)
+        }
+    }
 }
 
 fn fat_lto(cgcx: &CodegenContext,
            diag_handler: &Handler,
            mut modules: Vec<ModuleTranslation>,
            mut serialized_modules: Vec<(SerializedModule, CString)>,
-           symbol_white_list: &[*const libc::c_char])
+           symbol_white_list: &[*const libc::c_char],
+           timeline: &mut Timeline)
     -> Result<Vec<LtoModuleTranslation>, FatalError>
 {
     info!("going for a fat lto");
@@ -228,6 +258,7 @@ fn fat_lto(cgcx: &CodegenContext,
                 Err(write::llvm_err(&diag_handler, msg))
             }
         })?;
+        timeline.record(&format!("link {:?}", name));
         serialized_bitcode.push(bc_decoded);
     }
     cgcx.save_temp_bitcode(&module, "lto.input");
@@ -248,6 +279,7 @@ fn fat_lto(cgcx: &CodegenContext,
         }
         cgcx.save_temp_bitcode(&module, "lto.after-nounwind");
     }
+    timeline.record("passes");
 
     Ok(vec![LtoModuleTranslation::Fat {
         module: Some(module),
@@ -255,11 +287,143 @@ fn fat_lto(cgcx: &CodegenContext,
     }])
 }
 
+/// Prepare "thin" LTO to get run on these modules.
+///
+/// The general structure of ThinLTO is quite different from the structure of
+/// "fat" LTO above. With "fat" LTO all LLVM modules in question are merged into
+/// one giant LLVM module, and then we run more optimization passes over this
+/// big module after internalizing most symbols. Thin LTO, on the other hand,
+/// avoid this large bottleneck through more targeted optimization.
+///
+/// At a high level Thin LTO looks like:
+///
+///     1. Prepare a "summary" of each LLVM module in question which describes
+///        the values inside, cost of the values, etc.
+///     2. Merge the summaries of all modules in question into one "index"
+///     3. Perform some global analysis on this index
+///     4. For each module, use the index and analysis calculated previously to
+///        perform local transformations on the module, for example inlining
+///        small functions from other modules.
+///     5. Run thin-specific optimization passes over each module, and then code
+///        generate everything at the end.
+///
+/// The summary for each module is intended to be quite cheap, and the global
+/// index is relatively quite cheap to create as well. As a result, the goal of
+/// ThinLTO is to reduce the bottleneck on LTO and enable LTO to be used in more
+/// situations. For example one cheap optimization is that we can parallelize
+/// all codegen modules, easily making use of all the cores on a machine.
+///
+/// With all that in mind, the function here is designed at specifically just
+/// calculating the *index* for ThinLTO. This index will then be shared amongst
+/// all of the `LtoModuleTranslation` units returned below and destroyed once
+/// they all go out of scope.
+fn thin_lto(diag_handler: &Handler,
+            modules: Vec<ModuleTranslation>,
+            serialized_modules: Vec<(SerializedModule, CString)>,
+            symbol_white_list: &[*const libc::c_char],
+            timeline: &mut Timeline)
+    -> Result<Vec<LtoModuleTranslation>, FatalError>
+{
+    unsafe {
+        info!("going for that thin, thin LTO");
+
+        let mut thin_buffers = Vec::new();
+        let mut module_names = Vec::new();
+        let mut thin_modules = Vec::new();
+
+        // FIXME: right now, like with fat LTO, we serialize all in-memory
+        //        modules before working with them and ThinLTO. We really
+        //        shouldn't do this, however, and instead figure out how to
+        //        extract a summary from an in-memory module and then merge that
+        //        into the global index. It turns out that this loop is by far
+        //        the most expensive portion of this small bit of global
+        //        analysis!
+        for (i, module) in modules.iter().enumerate() {
+            info!("local module: {} - {}", i, module.llmod_id);
+            let llvm = module.llvm().expect("can't lto pretranslated module");
+            let name = CString::new(module.llmod_id.clone()).unwrap();
+            let buffer = llvm::LLVMRustThinLTOBufferCreate(llvm.llmod);
+            let buffer = ThinBuffer(buffer);
+            thin_modules.push(llvm::ThinLTOModule {
+                identifier: name.as_ptr(),
+                data: buffer.data().as_ptr(),
+                len: buffer.data().len(),
+            });
+            thin_buffers.push(buffer);
+            module_names.push(name);
+            timeline.record(&module.llmod_id);
+        }
+
+        // FIXME: All upstream crates are deserialized internally in the
+        //        function below to extract their summary and modules. Note that
+        //        unlike the loop above we *must* decode and/or read something
+        //        here as these are all just serialized files on disk. An
+        //        improvement, however, to make here would be to store the
+        //        module summary separately from the actual module itself. Right
+        //        now this is store in one large bitcode file, and the entire
+        //        file is deflate-compressed. We could try to bypass some of the
+        //        decompression by storing the index uncompressed and only
+        //        lazily decompressing the bytecode if necessary.
+        //
+        //        Note that truly taking advantage of this optimization will
+        //        likely be further down the road. We'd have to implement
+        //        incremental ThinLTO first where we could actually avoid
+        //        looking at upstream modules entirely sometimes (the contents,
+        //        we must always unconditionally look at the index).
+        let mut serialized = Vec::new();
+        for (module, name) in serialized_modules {
+            info!("foreign module {:?}", name);
+            thin_modules.push(llvm::ThinLTOModule {
+                identifier: name.as_ptr(),
+                data: module.data().as_ptr(),
+                len: module.data().len(),
+            });
+            serialized.push(module);
+            module_names.push(name);
+        }
+
+        // Delegate to the C++ bindings to create some data here. Once this is a
+        // tried-and-true interface we may wish to try to upstream some of this
+        // to LLVM itself, right now we reimplement a lot of what they do
+        // upstream...
+        let data = llvm::LLVMRustCreateThinLTOData(
+            thin_modules.as_ptr(),
+            thin_modules.len() as u32,
+            symbol_white_list.as_ptr(),
+            symbol_white_list.len() as u32,
+        );
+        if data.is_null() {
+            let msg = format!("failed to prepare thin LTO context");
+            return Err(write::llvm_err(&diag_handler, msg))
+        }
+        let data = ThinData(data);
+        info!("thin LTO data created");
+        timeline.record("data");
+
+        // Throw our data in an `Arc` as we'll be sharing it across threads. We
+        // also put all memory referenced by the C++ data (buffers, ids, etc)
+        // into the arc as well. After this we'll create a thin module
+        // translation per module in this data.
+        let shared = Arc::new(ThinShared {
+            data,
+            thin_buffers,
+            serialized_modules: serialized,
+            module_names,
+        });
+        Ok((0..shared.module_names.len()).map(|i| {
+            LtoModuleTranslation::Thin(ThinModule {
+                shared: shared.clone(),
+                idx: i,
+            })
+        }).collect())
+    }
+}
+
 fn run_pass_manager(cgcx: &CodegenContext,
                     tm: TargetMachineRef,
                     llmod: ModuleRef,
-                    config: &ModuleConfig) {
-
+                    config: &ModuleConfig,
+                    thin: bool) {
     // Now we have one massive module inside of llmod. Time to run the
     // LTO-specific optimization passes that LLVM provides.
     //
@@ -274,9 +438,15 @@ fn run_pass_manager(cgcx: &CodegenContext,
         llvm::LLVMRustAddPass(pm, pass);
 
         with_llvm_pmb(llmod, config, &mut |b| {
-            llvm::LLVMPassManagerBuilderPopulateLTOPassManager(b, pm,
-                /* Internalize = */ False,
-                /* RunInliner = */ True);
+            if thin {
+                if !llvm::LLVMRustPassManagerBuilderPopulateThinLTOPassManager(b, pm) {
+                    panic!("this version of LLVM does not support ThinLTO");
+                }
+            } else {
+                llvm::LLVMPassManagerBuilderPopulateLTOPassManager(b, pm,
+                    /* Internalize = */ False,
+                    /* RunInliner = */ True);
+            }
         });
 
         let pass = llvm::LLVMRustFindAndCreatePass("verify\0".as_ptr() as *const _);
@@ -331,3 +501,158 @@ impl Drop for ModuleBuffer {
         unsafe { llvm::LLVMRustModuleBufferFree(self.0); }
     }
 }
+
+pub struct ThinModule {
+    shared: Arc<ThinShared>,
+    idx: usize,
+}
+
+struct ThinShared {
+    data: ThinData,
+    thin_buffers: Vec<ThinBuffer>,
+    serialized_modules: Vec<SerializedModule>,
+    module_names: Vec<CString>,
+}
+
+struct ThinData(*mut llvm::ThinLTOData);
+
+unsafe impl Send for ThinData {}
+unsafe impl Sync for ThinData {}
+
+impl Drop for ThinData {
+    fn drop(&mut self) {
+        unsafe {
+            llvm::LLVMRustFreeThinLTOData(self.0);
+        }
+    }
+}
+
+struct ThinBuffer(*mut llvm::ThinLTOBuffer);
+
+unsafe impl Send for ThinBuffer {}
+unsafe impl Sync for ThinBuffer {}
+
+impl ThinBuffer {
+    fn data(&self) -> &[u8] {
+        unsafe {
+            let ptr = llvm::LLVMRustThinLTOBufferPtr(self.0) as *const _;
+            let len = llvm::LLVMRustThinLTOBufferLen(self.0);
+            slice::from_raw_parts(ptr, len)
+        }
+    }
+}
+
+impl Drop for ThinBuffer {
+    fn drop(&mut self) {
+        unsafe {
+            llvm::LLVMRustThinLTOBufferFree(self.0);
+        }
+    }
+}
+
+impl ThinModule {
+    fn name(&self) -> &str {
+        self.shared.module_names[self.idx].to_str().unwrap()
+    }
+
+    fn cost(&self) -> u64 {
+        // Yes, that's correct, we're using the size of the bytecode as an
+        // indicator for how costly this codegen unit is.
+        self.data().len() as u64
+    }
+
+    fn data(&self) -> &[u8] {
+        let a = self.shared.thin_buffers.get(self.idx).map(|b| b.data());
+        a.unwrap_or_else(|| {
+            let len = self.shared.thin_buffers.len();
+            self.shared.serialized_modules[self.idx - len].data()
+        })
+    }
+
+    unsafe fn optimize(&mut self, cgcx: &CodegenContext, timeline: &mut Timeline)
+        -> Result<ModuleTranslation, FatalError>
+    {
+        let diag_handler = cgcx.create_diag_handler();
+        let tm = (cgcx.tm_factory)().map_err(|e| {
+            write::llvm_err(&diag_handler, e)
+        })?;
+
+        // Right now the implementation we've got only works over serialized
+        // modules, so we create a fresh new LLVM context and parse the module
+        // into that context. One day, however, we may do this for upstream
+        // crates but for locally translated modules we may be able to reuse
+        // that LLVM Context and Module.
+        let llcx = llvm::LLVMContextCreate();
+        let llmod = llvm::LLVMRustParseBitcodeForThinLTO(
+            llcx,
+            self.data().as_ptr(),
+            self.data().len(),
+            self.shared.module_names[self.idx].as_ptr(),
+        );
+        assert!(!llmod.is_null());
+        let mtrans = ModuleTranslation {
+            source: ModuleSource::Translated(ModuleLlvm {
+                llmod,
+                llcx,
+                tm,
+            }),
+            llmod_id: self.name().to_string(),
+            name: self.name().to_string(),
+            kind: ModuleKind::Regular,
+        };
+        cgcx.save_temp_bitcode(&mtrans, "thin-lto-input");
+
+        // Like with "fat" LTO, get some better optimizations if landing pads
+        // are disabled by removing all landing pads.
+        if cgcx.no_landing_pads {
+            llvm::LLVMRustMarkAllFunctionsNounwind(llmod);
+            cgcx.save_temp_bitcode(&mtrans, "thin-lto-after-nounwind");
+            timeline.record("nounwind");
+        }
+
+        // Up next comes the per-module local analyses that we do for Thin LTO.
+        // Each of these functions is basically copied from the LLVM
+        // implementation and then tailored to suit this implementation. Ideally
+        // each of these would be supported by upstream LLVM but that's perhaps
+        // a patch for another day!
+        //
+        // You can find some more comments about these functions in the LLVM
+        // bindings we've got (currently `PassWrapper.cpp`)
+        if !llvm::LLVMRustPrepareThinLTORename(self.shared.data.0, llmod) {
+            let msg = format!("failed to prepare thin LTO module");
+            return Err(write::llvm_err(&diag_handler, msg))
+        }
+        cgcx.save_temp_bitcode(&mtrans, "thin-lto-after-rename");
+        timeline.record("rename");
+        if !llvm::LLVMRustPrepareThinLTOResolveWeak(self.shared.data.0, llmod) {
+            let msg = format!("failed to prepare thin LTO module");
+            return Err(write::llvm_err(&diag_handler, msg))
+        }
+        cgcx.save_temp_bitcode(&mtrans, "thin-lto-after-resolve");
+        timeline.record("resolve");
+        if !llvm::LLVMRustPrepareThinLTOInternalize(self.shared.data.0, llmod) {
+            let msg = format!("failed to prepare thin LTO module");
+            return Err(write::llvm_err(&diag_handler, msg))
+        }
+        cgcx.save_temp_bitcode(&mtrans, "thin-lto-after-internalize");
+        timeline.record("internalize");
+        if !llvm::LLVMRustPrepareThinLTOImport(self.shared.data.0, llmod) {
+            let msg = format!("failed to prepare thin LTO module");
+            return Err(write::llvm_err(&diag_handler, msg))
+        }
+        cgcx.save_temp_bitcode(&mtrans, "thin-lto-after-import");
+        timeline.record("import");
+
+        // Alright now that we've done everything related to the ThinLTO
+        // analysis it's time to run some optimizations! Here we use the same
+        // `run_pass_manager` as the "fat" LTO above except that we tell it to
+        // populate a thin-specific pass manager, which presumably LLVM treats a
+        // little differently.
+        info!("running thin lto passes over {}", mtrans.name);
+        let config = cgcx.config(mtrans.kind);
+        run_pass_manager(cgcx, tm, llmod, config, true);
+        cgcx.save_temp_bitcode(&mtrans, "thin-lto-after-pm");
+        timeline.record("thin-done");
+        Ok(mtrans)
+    }
+}
diff --git a/src/librustc_trans/back/write.rs b/src/librustc_trans/back/write.rs
index 1988b9f76e4..b3b2384a027 100644
--- a/src/librustc_trans/back/write.rs
+++ b/src/librustc_trans/back/write.rs
@@ -19,7 +19,7 @@ use rustc::session::config::{self, OutputFilenames, OutputType, OutputTypes, Pas
                              AllPasses, Sanitizer};
 use rustc::session::Session;
 use rustc::util::nodemap::FxHashMap;
-use time_graph::{self, TimeGraph};
+use time_graph::{self, TimeGraph, Timeline};
 use llvm;
 use llvm::{ModuleRef, TargetMachineRef, PassManagerRef, DiagnosticInfoRef};
 use llvm::{SMDiagnosticRef, ContextRef};
@@ -303,6 +303,7 @@ pub struct CodegenContext {
     // Resouces needed when running LTO
     pub time_passes: bool,
     pub lto: bool,
+    pub thinlto: bool,
     pub no_landing_pads: bool,
     pub save_temps: bool,
     pub exported_symbols: Arc<ExportedSymbols>,
@@ -315,6 +316,8 @@ pub struct CodegenContext {
     allocator_module_config: Arc<ModuleConfig>,
     pub tm_factory: Arc<Fn() -> Result<TargetMachineRef, String> + Send + Sync>,
 
+    // Number of cgus excluding the allocator/metadata modules
+    pub total_cgus: usize,
     // Handler to use for diagnostics produced during codegen.
     pub diag_emitter: SharedEmitter,
     // LLVM passes added by plugins.
@@ -450,7 +453,8 @@ unsafe extern "C" fn diagnostic_handler(info: DiagnosticInfoRef, user: *mut c_vo
 unsafe fn optimize(cgcx: &CodegenContext,
                    diag_handler: &Handler,
                    mtrans: &ModuleTranslation,
-                   config: &ModuleConfig)
+                   config: &ModuleConfig,
+                   timeline: &mut Timeline)
     -> Result<(), FatalError>
 {
     let (llmod, llcx, tm) = match mtrans.source {
@@ -529,6 +533,7 @@ unsafe fn optimize(cgcx: &CodegenContext,
         // Finally, run the actual optimization passes
         time(config.time_passes, &format!("llvm function passes [{}]", module_name.unwrap()), ||
              llvm::LLVMRustRunFunctionPassManager(fpm, llmod));
+        timeline.record("fpm");
         time(config.time_passes, &format!("llvm module passes [{}]", module_name.unwrap()), ||
              llvm::LLVMRunPassManager(mpm, llmod));
 
@@ -543,7 +548,18 @@ fn generate_lto_work(cgcx: &CodegenContext,
                      modules: Vec<ModuleTranslation>)
     -> Vec<(WorkItem, u64)>
 {
-    let lto_modules = lto::run(cgcx, modules).unwrap_or_else(|e| panic!(e));
+    let mut timeline = cgcx.time_graph.as_ref().map(|tg| {
+        tg.start(TRANS_WORKER_TIMELINE,
+                 TRANS_WORK_PACKAGE_KIND,
+                 "generate lto")
+    }).unwrap_or(Timeline::noop());
+    let mode = if cgcx.lto {
+        lto::LTOMode::WholeCrateGraph
+    } else {
+        lto::LTOMode::JustThisCrate
+    };
+    let lto_modules = lto::run(cgcx, modules, mode, &mut timeline)
+        .unwrap_or_else(|e| panic!(e));
 
     lto_modules.into_iter().map(|module| {
         let cost = module.cost();
@@ -554,9 +570,11 @@ fn generate_lto_work(cgcx: &CodegenContext,
 unsafe fn codegen(cgcx: &CodegenContext,
                   diag_handler: &Handler,
                   mtrans: ModuleTranslation,
-                  config: &ModuleConfig)
+                  config: &ModuleConfig,
+                  timeline: &mut Timeline)
     -> Result<CompiledModule, FatalError>
 {
+    timeline.record("codegen");
     let (llmod, llcx, tm) = match mtrans.source {
         ModuleSource::Translated(ref llvm) => (llvm.llmod, llvm.llcx, llvm.tm),
         ModuleSource::Preexisting(_) => {
@@ -601,7 +619,18 @@ unsafe fn codegen(cgcx: &CodegenContext,
 
     if write_bc {
         let bc_out_c = path2cstr(&bc_out);
-        llvm::LLVMWriteBitcodeToFile(llmod, bc_out_c.as_ptr());
+        if llvm::LLVMRustThinLTOAvailable() {
+            with_codegen(tm, llmod, config.no_builtins, |cpm| {
+                llvm::LLVMRustWriteThinBitcodeToFile(
+                    cpm,
+                    llmod,
+                    bc_out_c.as_ptr(),
+                )
+            });
+        } else {
+            llvm::LLVMWriteBitcodeToFile(llmod, bc_out_c.as_ptr());
+        }
+        timeline.record("bc");
     }
 
     time(config.time_passes, &format!("codegen passes [{}]", module_name.unwrap()),
@@ -644,7 +673,8 @@ unsafe fn codegen(cgcx: &CodegenContext,
             with_codegen(tm, llmod, config.no_builtins, |cpm| {
                 llvm::LLVMRustPrintModule(cpm, llmod, out.as_ptr(), demangle_callback);
                 llvm::LLVMDisposePassManager(cpm);
-            })
+            });
+            timeline.record("ir");
         }
 
         if config.emit_asm {
@@ -665,6 +695,7 @@ unsafe fn codegen(cgcx: &CodegenContext,
             if config.emit_obj {
                 llvm::LLVMDisposeModule(llmod);
             }
+            timeline.record("asm");
         }
 
         if write_obj {
@@ -672,6 +703,7 @@ unsafe fn codegen(cgcx: &CodegenContext,
                 write_output_file(diag_handler, tm, cpm, llmod, &obj_out,
                                   llvm::FileType::ObjectFile)
             })?;
+            timeline.record("obj");
         }
 
         Ok(())
@@ -712,7 +744,8 @@ pub fn start_async_translation(tcx: TyCtxt,
                                time_graph: Option<TimeGraph>,
                                link: LinkMeta,
                                metadata: EncodedMetadata,
-                               coordinator_receive: Receiver<Box<Any + Send>>)
+                               coordinator_receive: Receiver<Box<Any + Send>>,
+                               total_cgus: usize)
                                -> OngoingCrateTranslation {
     let sess = tcx.sess;
     let crate_output = tcx.output_filenames(LOCAL_CRATE);
@@ -836,6 +869,7 @@ pub fn start_async_translation(tcx: TyCtxt,
                                                   shared_emitter,
                                                   trans_worker_send,
                                                   coordinator_receive,
+                                                  total_cgus,
                                                   client,
                                                   time_graph.clone(),
                                                   Arc::new(modules_config),
@@ -1080,7 +1114,9 @@ enum WorkItemResult {
     NeedsLTO(ModuleTranslation),
 }
 
-fn execute_work_item(cgcx: &CodegenContext, work_item: WorkItem)
+fn execute_work_item(cgcx: &CodegenContext,
+                     work_item: WorkItem,
+                     timeline: &mut Timeline)
     -> Result<WorkItemResult, FatalError>
 {
     let diag_handler = cgcx.create_diag_handler();
@@ -1089,8 +1125,8 @@ fn execute_work_item(cgcx: &CodegenContext, work_item: WorkItem)
         WorkItem::Optimize(mtrans) => mtrans,
         WorkItem::LTO(mut lto) => {
             unsafe {
-                let module = lto.optimize(cgcx)?;
-                let module = codegen(cgcx, &diag_handler, module, config)?;
+                let module = lto.optimize(cgcx, timeline)?;
+                let module = codegen(cgcx, &diag_handler, module, config, timeline)?;
                 return Ok(WorkItemResult::Compiled(module))
             }
         }
@@ -1140,9 +1176,27 @@ fn execute_work_item(cgcx: &CodegenContext, work_item: WorkItem)
         debug!("llvm-optimizing {:?}", module_name);
 
         unsafe {
-            optimize(cgcx, &diag_handler, &mtrans, config)?;
-            if !cgcx.lto || mtrans.kind == ModuleKind::Metadata {
-                let module = codegen(cgcx, &diag_handler, mtrans, config)?;
+            optimize(cgcx, &diag_handler, &mtrans, config, timeline)?;
+
+            let lto = cgcx.lto;
+
+            let auto_thin_lto =
+                cgcx.thinlto &&
+                cgcx.total_cgus > 1 &&
+                mtrans.kind != ModuleKind::Allocator;
+
+            // If we're a metadata module we never participate in LTO.
+            //
+            // If LTO was explicitly requested on the command line, we always
+            // LTO everything else.
+            //
+            // If LTO *wasn't* explicitly requested and we're not a metdata
+            // module, then we may automatically do ThinLTO if we've got
+            // multiple codegen units. Note, however, that the allocator module
+            // doesn't participate here automatically because of linker
+            // shenanigans later on.
+            if mtrans.kind == ModuleKind::Metadata || (!lto && !auto_thin_lto) {
+                let module = codegen(cgcx, &diag_handler, mtrans, config, timeline)?;
                 Ok(WorkItemResult::Compiled(module))
             } else {
                 Ok(WorkItemResult::NeedsLTO(mtrans))
@@ -1187,6 +1241,7 @@ fn start_executing_work(tcx: TyCtxt,
                         shared_emitter: SharedEmitter,
                         trans_worker_send: Sender<Message>,
                         coordinator_receive: Receiver<Box<Any + Send>>,
+                        total_cgus: usize,
                         jobserver: Client,
                         time_graph: Option<TimeGraph>,
                         modules_config: Arc<ModuleConfig>,
@@ -1229,6 +1284,7 @@ fn start_executing_work(tcx: TyCtxt,
         crate_types: sess.crate_types.borrow().clone(),
         each_linked_rlib_for_lto,
         lto: sess.lto(),
+        thinlto: sess.opts.debugging_opts.thinlto,
         no_landing_pads: sess.no_landing_pads(),
         save_temps: sess.opts.cg.save_temps,
         opts: Arc::new(sess.opts.clone()),
@@ -1246,6 +1302,7 @@ fn start_executing_work(tcx: TyCtxt,
         metadata_module_config: metadata_config,
         allocator_module_config: allocator_config,
         tm_factory: target_machine_factory(tcx.sess),
+        total_cgus,
     };
 
     // This is the "main loop" of parallel work happening for parallel codegen.
@@ -1743,12 +1800,13 @@ fn spawn_work(cgcx: CodegenContext, work: WorkItem) {
         // as a diagnostic was already sent off to the main thread - just
         // surface that there was an error in this worker.
         bomb.result = {
-            let _timing_guard = cgcx.time_graph.as_ref().map(|tg| {
+            let timeline = cgcx.time_graph.as_ref().map(|tg| {
                 tg.start(time_graph::TimelineId(cgcx.worker),
                          LLVM_WORK_PACKAGE_KIND,
                          &work.name())
             });
-            execute_work_item(&cgcx, work).ok()
+            let mut timeline = timeline.unwrap_or(Timeline::noop());
+            execute_work_item(&cgcx, work, &mut timeline).ok()
         };
     });
 }
diff --git a/src/librustc_trans/base.rs b/src/librustc_trans/base.rs
index 8c39f579b3e..5764b0b34d1 100644
--- a/src/librustc_trans/base.rs
+++ b/src/librustc_trans/base.rs
@@ -886,6 +886,11 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
 
     check_for_rustc_errors_attr(tcx);
 
+    if tcx.sess.opts.debugging_opts.thinlto {
+        if unsafe { !llvm::LLVMRustThinLTOAvailable() } {
+            tcx.sess.fatal("this compiler's LLVM does not support ThinLTO");
+        }
+    }
 
     let crate_hash = tcx.dep_graph
                         .fingerprint_of(&DepNode::new_no_params(DepKind::Krate));
@@ -925,7 +930,8 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
             time_graph.clone(),
             link_meta,
             metadata,
-            rx);
+            rx,
+            1);
 
         ongoing_translation.submit_pre_translated_module_to_llvm(tcx, metadata_module);
         ongoing_translation.translation_finished(tcx);
@@ -961,7 +967,8 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
         time_graph.clone(),
         link_meta,
         metadata,
-        rx);
+        rx,
+        codegen_units.len());
 
     // Translate an allocator shim, if any
     let allocator_module = if let Some(kind) = tcx.sess.allocator_kind.get() {
@@ -1372,7 +1379,9 @@ fn compile_codegen_unit<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
         // crashes if the module identifier is same as other symbols
         // such as a function name in the module.
         // 1. http://llvm.org/bugs/show_bug.cgi?id=11479
-        let llmod_id = format!("{}.rs", cgu.name());
+        let llmod_id = format!("{}-{}.rs",
+                               cgu.name(),
+                               tcx.crate_disambiguator(LOCAL_CRATE));
 
         // Instantiate translation items without filling out definitions yet...
         let scx = SharedCrateContext::new(tcx);
diff --git a/src/librustc_trans/partitioning.rs b/src/librustc_trans/partitioning.rs
index 7b6daa7d133..82ec1aaa413 100644
--- a/src/librustc_trans/partitioning.rs
+++ b/src/librustc_trans/partitioning.rs
@@ -108,7 +108,6 @@ use rustc::dep_graph::{DepNode, WorkProductId};
 use rustc::hir::def_id::DefId;
 use rustc::hir::map::DefPathData;
 use rustc::middle::trans::{Linkage, Visibility};
-use rustc::session::config::NUMBERED_CODEGEN_UNIT_MARKER;
 use rustc::ty::{self, TyCtxt, InstanceDef};
 use rustc::ty::item_path::characteristic_def_id_of_type;
 use rustc::util::nodemap::{FxHashMap, FxHashSet};
@@ -627,7 +626,7 @@ fn compute_codegen_unit_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
 }
 
 fn numbered_codegen_unit_name(crate_name: &str, index: usize) -> InternedString {
-    Symbol::intern(&format!("{}{}{}", crate_name, NUMBERED_CODEGEN_UNIT_MARKER, index)).as_str()
+    Symbol::intern(&format!("{}{}", crate_name, index)).as_str()
 }
 
 fn debug_dump<'a, 'b, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
diff --git a/src/librustc_trans/time_graph.rs b/src/librustc_trans/time_graph.rs
index ec57af888e5..a8502682a80 100644
--- a/src/librustc_trans/time_graph.rs
+++ b/src/librustc_trans/time_graph.rs
@@ -9,11 +9,12 @@
 // except according to those terms.
 
 use std::collections::HashMap;
+use std::fs::File;
+use std::io::prelude::*;
 use std::marker::PhantomData;
+use std::mem;
 use std::sync::{Arc, Mutex};
 use std::time::Instant;
-use std::io::prelude::*;
-use std::fs::File;
 
 const OUTPUT_WIDTH_IN_PX: u64 = 1000;
 const TIME_LINE_HEIGHT_IN_PX: u64 = 20;
@@ -25,6 +26,7 @@ struct Timing {
     end: Instant,
     work_package_kind: WorkPackageKind,
     name: String,
+    events: Vec<(String, Instant)>,
 }
 
 #[derive(Clone, Copy, Hash, Eq, PartialEq, Debug)]
@@ -44,9 +46,14 @@ pub struct TimeGraph {
 #[derive(Clone, Copy)]
 pub struct WorkPackageKind(pub &'static [&'static str]);
 
-pub struct RaiiToken {
+pub struct Timeline {
+    token: Option<RaiiToken>,
+}
+
+struct RaiiToken {
     graph: TimeGraph,
     timeline: TimelineId,
+    events: Vec<(String, Instant)>,
     // The token must not be Send:
     _marker: PhantomData<*const ()>
 }
@@ -54,7 +61,7 @@ pub struct RaiiToken {
 
 impl Drop for RaiiToken {
     fn drop(&mut self) {
-        self.graph.end(self.timeline);
+        self.graph.end(self.timeline, mem::replace(&mut self.events, Vec::new()));
     }
 }
 
@@ -68,7 +75,7 @@ impl TimeGraph {
     pub fn start(&self,
                  timeline: TimelineId,
                  work_package_kind: WorkPackageKind,
-                 name: &str) -> RaiiToken {
+                 name: &str) -> Timeline {
         {
             let mut table = self.data.lock().unwrap();
 
@@ -81,14 +88,17 @@ impl TimeGraph {
             data.open_work_package = Some((Instant::now(), work_package_kind, name.to_string()));
         }
 
-        RaiiToken {
-            graph: self.clone(),
-            timeline,
-            _marker: PhantomData,
+        Timeline {
+            token: Some(RaiiToken {
+                graph: self.clone(),
+                timeline,
+                events: Vec::new(),
+                _marker: PhantomData,
+            }),
         }
     }
 
-    fn end(&self, timeline: TimelineId) {
+    fn end(&self, timeline: TimelineId, events: Vec<(String, Instant)>) {
         let end = Instant::now();
 
         let mut table = self.data.lock().unwrap();
@@ -100,6 +110,7 @@ impl TimeGraph {
                 end,
                 work_package_kind,
                 name,
+                events,
             });
         } else {
             bug!("end timing without start?")
@@ -113,13 +124,13 @@ impl TimeGraph {
             assert!(data.open_work_package.is_none());
         }
 
-        let mut timelines: Vec<PerThread> =
+        let mut threads: Vec<PerThread> =
             table.values().map(|data| data.clone()).collect();
 
-        timelines.sort_by_key(|timeline| timeline.timings[0].start);
+        threads.sort_by_key(|timeline| timeline.timings[0].start);
 
-        let earliest_instant = timelines[0].timings[0].start;
-        let latest_instant = timelines.iter()
+        let earliest_instant = threads[0].timings[0].start;
+        let latest_instant = threads.iter()
                                        .map(|timeline| timeline.timings
                                                                .last()
                                                                .unwrap()
@@ -130,16 +141,46 @@ impl TimeGraph {
 
         let mut file = File::create(format!("{}.html", output_filename)).unwrap();
 
-        writeln!(file, "<html>").unwrap();
-        writeln!(file, "<head></head>").unwrap();
-        writeln!(file, "<body>").unwrap();
+        writeln!(file, "
+            <html>
+            <head>
+                <style>
+                    #threads a {{
+                        position: absolute;
+                        overflow: hidden;
+                    }}
+                    #threads {{
+                        height: {total_height}px;
+                        width: {width}px;
+                    }}
+
+                    .timeline {{
+                        display: none;
+                        width: {width}px;
+                        position: relative;
+                    }}
+
+                    .timeline:target {{
+                        display: block;
+                    }}
+
+                    .event {{
+                        position: absolute;
+                    }}
+                </style>
+            </head>
+            <body>
+                <div id='threads'>
+        ",
+            total_height = threads.len() * TIME_LINE_HEIGHT_STRIDE_IN_PX,
+            width = OUTPUT_WIDTH_IN_PX,
+        ).unwrap();
 
         let mut color = 0;
-
-        for (line_index, timeline) in timelines.iter().enumerate() {
+        for (line_index, thread) in threads.iter().enumerate() {
             let line_top = line_index * TIME_LINE_HEIGHT_STRIDE_IN_PX;
 
-            for span in &timeline.timings {
+            for span in &thread.timings {
                 let start = distance(earliest_instant, span.start);
                 let end = distance(earliest_instant, span.end);
 
@@ -148,13 +189,13 @@ impl TimeGraph {
 
                 let colors = span.work_package_kind.0;
 
-                writeln!(file, "<div style='position:absolute; \
-                                            overflow:hidden; \
-                                            top:{}px; \
-                                            left:{}px; \
-                                            width:{}px; \
-                                            height:{}px; \
-                                            background:{};'>{}</div>",
+                writeln!(file, "<a href='#timing{}'
+                                   style='top:{}px; \
+                                          left:{}px; \
+                                          width:{}px; \
+                                          height:{}px; \
+                                          background:{};'>{}</a>",
+                    color,
                     line_top,
                     start,
                     end - start,
@@ -167,8 +208,61 @@ impl TimeGraph {
             }
         }
 
-        writeln!(file, "</body>").unwrap();
-        writeln!(file, "</html>").unwrap();
+        writeln!(file, "
+            </div>
+        ").unwrap();
+
+        let mut idx = 0;
+        for thread in threads.iter() {
+            for timing in &thread.timings {
+                let colors = timing.work_package_kind.0;
+                let height = TIME_LINE_HEIGHT_STRIDE_IN_PX * timing.events.len();
+                writeln!(file, "<div class='timeline'
+                                     id='timing{}'
+                                     style='background:{};height:{}px;'>",
+                         idx,
+                         colors[idx % colors.len()],
+                         height).unwrap();
+                idx += 1;
+                let max = distance(timing.start, timing.end);
+                for (i, &(ref event, time)) in timing.events.iter().enumerate() {
+                    let i = i as u64;
+                    let time = distance(timing.start, time);
+                    let at = normalize(time, max, OUTPUT_WIDTH_IN_PX);
+                    writeln!(file, "<span class='event'
+                                          style='left:{}px;\
+                                                 top:{}px;'>{}</span>",
+                             at,
+                             TIME_LINE_HEIGHT_IN_PX * i,
+                             event).unwrap();
+                }
+                writeln!(file, "</div>").unwrap();
+            }
+        }
+
+        writeln!(file, "
+            </body>
+            </html>
+        ").unwrap();
+    }
+}
+
+impl Timeline {
+    pub fn noop() -> Timeline {
+        Timeline { token: None }
+    }
+
+    /// Record an event which happened at this moment on this timeline.
+    ///
+    /// Events are displayed in the eventual HTML output where you can click on
+    /// a particular timeline and it'll expand to all of the events that
+    /// happened on that timeline. This can then be used to drill into a
+    /// particular timeline and see what events are happening and taking the
+    /// most time.
+    pub fn record(&mut self, name: &str) {
+        if let Some(ref mut token) = self.token {
+            token.events.push((name.to_string(), Instant::now()));
+        }
     }
 }
 
diff --git a/src/rustllvm/PassWrapper.cpp b/src/rustllvm/PassWrapper.cpp
index 2f966e5a1c5..e37f048dd47 100644
--- a/src/rustllvm/PassWrapper.cpp
+++ b/src/rustllvm/PassWrapper.cpp
@@ -26,7 +26,11 @@
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
 
 #if LLVM_VERSION_GE(4, 0)
+#include "llvm/Object/ModuleSummaryIndexObjectFile.h"
 #include "llvm/Transforms/IPO/AlwaysInliner.h"
+#include "llvm/Transforms/IPO/FunctionImport.h"
+#include "llvm/Transforms/Utils/FunctionImportUtils.h"
+#include "llvm/LTO/LTO.h"
 #endif
 
 #include "llvm-c/Transforms/PassManagerBuilder.h"
@@ -102,6 +106,19 @@ extern "C" void LLVMRustAddPass(LLVMPassManagerRef PMR, LLVMPassRef RustPass) {
   PMB->add(Pass);
 }
 
+extern "C"
+bool LLVMRustPassManagerBuilderPopulateThinLTOPassManager(
+  LLVMPassManagerBuilderRef PMBR,
+  LLVMPassManagerRef PMR
+) {
+#if LLVM_VERSION_GE(4, 0)
+  unwrap(PMBR)->populateThinLTOPassManager(*unwrap(PMR));
+  return true;
+#else
+  return false;
+#endif
+}
+
 #ifdef LLVM_COMPONENT_X86
 #define SUBTARGET_X86 SUBTARGET(X86)
 #else
@@ -740,3 +757,447 @@ extern "C" void LLVMRustSetModulePIELevel(LLVMModuleRef M) {
   unwrap(M)->setPIELevel(PIELevel::Level::Large);
 #endif
 }
+
+extern "C" bool
+LLVMRustThinLTOAvailable() {
+#if LLVM_VERSION_GE(4, 0)
+  return true;
+#else
+  return false;
+#endif
+}
+
+#if LLVM_VERSION_GE(4, 0)
+
+// Here you'll find an implementation of ThinLTO as used by the Rust compiler
+// right now. This ThinLTO support is only enabled on "recent ish" versions of
+// LLVM, and otherwise it's just blanket rejected from other compilers.
+//
+// Most of this implementation is straight copied from LLVM. At the time of
+// this writing it wasn't *quite* suitable to reuse more code from upstream
+// for our purposes, but we should strive to upstream this support once it's
+// ready to go! I figure we may want a bit of testing locally first before
+// sending this upstream to LLVM. I hear though they're quite eager to receive
+// feedback like this!
+//
+// If you're reading this code and wondering "what in the world" or you're
+// working "good lord by LLVM upgrade is *still* failing due to these bindings"
+// then fear not! (ok maybe fear a little). All code here is mostly based
+// on `lib/LTO/ThinLTOCodeGenerator.cpp` in LLVM.
+//
+// You'll find that the general layout here roughly corresponds to the `run`
+// method in that file as well as `ProcessThinLTOModule`. Functions are
+// specifically commented below as well, but if you're updating this code
+// or otherwise trying to understand it, the LLVM source will be useful in
+// interpreting the mysteries within.
+//
+// Otherwise I'll apologize in advance, it probably requires a relatively
+// significant investment on your part to "truly understand" what's going on
+// here. Not saying I do myself, but it took me awhile staring at LLVM's source
+// and various online resources about ThinLTO to make heads or tails of all
+// this.
+
+extern "C" bool
+LLVMRustWriteThinBitcodeToFile(LLVMPassManagerRef PMR,
+                               LLVMModuleRef M,
+                               const char *BcFile) {
+  llvm::legacy::PassManager *PM = unwrap<llvm::legacy::PassManager>(PMR);
+  std::error_code EC;
+  llvm::raw_fd_ostream bc(BcFile, EC, llvm::sys::fs::F_None);
+  if (EC) {
+    LLVMRustSetLastError(EC.message().c_str());
+    return false;
+  }
+  PM->add(createWriteThinLTOBitcodePass(bc));
+  PM->run(*unwrap(M));
+  delete PM;
+  return true;
+}
+
+// This is a shared data structure which *must* be threadsafe to share
+// read-only amongst threads. This also corresponds basically to the arguments
+// of the `ProcessThinLTOModule` function in the LLVM source.
+struct LLVMRustThinLTOData {
+  // The combined index that is the global analysis over all modules we're
+  // performing ThinLTO for. This is mostly managed by LLVM.
+  ModuleSummaryIndex Index;
+
+  // All modules we may look at, stored as in-memory serialized versions. This
+  // is later used when inlining to ensure we can extract any module to inline
+  // from.
+  StringMap<MemoryBufferRef> ModuleMap;
+
+  // A set that we manage of everything we *don't* want internalized. Note that
+  // this includes all transitive references right now as well, but it may not
+  // always!
+  DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
+
+  // Not 100% sure what these are, but they impact what's internalized and
+  // what's inlined across modules, I believe.
+  StringMap<FunctionImporter::ImportMapTy> ImportLists;
+  StringMap<FunctionImporter::ExportSetTy> ExportLists;
+  StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries;
+};
+
+// Just an argument to the `LLVMRustCreateThinLTOData` function below.
+struct LLVMRustThinLTOModule {
+  const char *identifier;
+  const char *data;
+  size_t len;
+};
+
+// This is copied from `lib/LTO/ThinLTOCodeGenerator.cpp`, not sure what it
+// does.
+static const GlobalValueSummary *
+getFirstDefinitionForLinker(const GlobalValueSummaryList &GVSummaryList) {
+  auto StrongDefForLinker = llvm::find_if(
+      GVSummaryList, [](const std::unique_ptr<GlobalValueSummary> &Summary) {
+        auto Linkage = Summary->linkage();
+        return !GlobalValue::isAvailableExternallyLinkage(Linkage) &&
+               !GlobalValue::isWeakForLinker(Linkage);
+      });
+  if (StrongDefForLinker != GVSummaryList.end())
+    return StrongDefForLinker->get();
+
+  auto FirstDefForLinker = llvm::find_if(
+      GVSummaryList, [](const std::unique_ptr<GlobalValueSummary> &Summary) {
+        auto Linkage = Summary->linkage();
+        return !GlobalValue::isAvailableExternallyLinkage(Linkage);
+      });
+  if (FirstDefForLinker == GVSummaryList.end())
+    return nullptr;
+  return FirstDefForLinker->get();
+}
+
+// This is a helper function we added that isn't present in LLVM's source.
+//
+// The way LTO works in Rust is that we typically have a number of symbols that
+// we know ahead of time need to be preserved. We want to ensure that ThinLTO
+// doesn't accidentally internalize any of these and otherwise is always
+// ready to keep them linking correctly.
+//
+// This function will recursively walk the `GUID` provided and all of its
+// references, as specified in the `Index`. In other words, we're taking a
+// `GUID` as input, adding it to `Preserved`, and then taking all `GUID`
+// items that the input references and recursing.
+static void
+addPreservedGUID(const ModuleSummaryIndex &Index,
+                 DenseSet<GlobalValue::GUID> &Preserved,
+                 GlobalValue::GUID GUID) {
+  if (Preserved.count(GUID))
+    return;
+  Preserved.insert(GUID);
+
+  auto SummaryList = Index.findGlobalValueSummaryList(GUID);
+  if (SummaryList == Index.end())
+    return;
+  for (auto &Summary : SummaryList->second) {
+    for (auto &Ref : Summary->refs()) {
+      if (Ref.isGUID()) {
+        addPreservedGUID(Index, Preserved, Ref.getGUID());
+      } else {
+        auto Value = Ref.getValue();
+        addPreservedGUID(Index, Preserved, Value->getGUID());
+      }
+    }
+
+    GlobalValueSummary *GVSummary = Summary.get();
+    if (isa<FunctionSummary>(GVSummary)) {
+      FunctionSummary *FS = cast<FunctionSummary>(GVSummary);
+      for (auto &Call: FS->calls()) {
+        if (Call.first.isGUID()) {
+          addPreservedGUID(Index, Preserved, Call.first.getGUID());
+        } else {
+          auto Value = Call.first.getValue();
+          addPreservedGUID(Index, Preserved, Value->getGUID());
+        }
+      }
+      for (auto &GUID: FS->type_tests()) {
+        addPreservedGUID(Index, Preserved, GUID);
+      }
+    }
+  }
+}
+
+// The main entry point for creating the global ThinLTO analysis. The structure
+// here is basically the same as before threads are spawned in the `run`
+// function of `lib/LTO/ThinLTOCodeGenerator.cpp`.
+extern "C" LLVMRustThinLTOData*
+LLVMRustCreateThinLTOData(LLVMRustThinLTOModule *modules,
+                          int num_modules,
+                          const char **preserved_symbols,
+                          int num_symbols) {
+  auto Ret = llvm::make_unique<LLVMRustThinLTOData>();
+
+  // Load each module's summary and merge it into one combined index
+  for (int i = 0; i < num_modules; i++) {
+    auto module = &modules[i];
+    StringRef buffer(module->data, module->len);
+    MemoryBufferRef mem_buffer(buffer, module->identifier);
+
+    Ret->ModuleMap[module->identifier] = mem_buffer;
+
+    Expected<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr =
+      object::ModuleSummaryIndexObjectFile::create(mem_buffer);
+    if (!ObjOrErr) {
+      LLVMRustSetLastError(toString(ObjOrErr.takeError()).c_str());
+      return nullptr;
+    }
+    auto Index = (*ObjOrErr)->takeIndex();
+    Ret->Index.mergeFrom(std::move(Index), i);
+  }
+
+  // Collect for each module the list of function it defines (GUID -> Summary)
+  Ret->Index.collectDefinedGVSummariesPerModule(Ret->ModuleToDefinedGVSummaries);
+
+  // Convert the preserved symbols set from string to GUID, this is then needed
+  // for internalization. We use `addPreservedGUID` to include any transitively
+  // used symbol as well.
+  for (int i = 0; i < num_symbols; i++) {
+    addPreservedGUID(Ret->Index,
+                     Ret->GUIDPreservedSymbols,
+                     GlobalValue::getGUID(preserved_symbols[i]));
+  }
+
+  // Collect the import/export lists for all modules from the call-graph in the
+  // combined index
+  //
+  // This is copied from `lib/LTO/ThinLTOCodeGenerator.cpp`
+  computeDeadSymbols(Ret->Index, Ret->GUIDPreservedSymbols);
+  ComputeCrossModuleImport(
+    Ret->Index,
+    Ret->ModuleToDefinedGVSummaries,
+    Ret->ImportLists,
+    Ret->ExportLists
+  );
+
+  // Resolve LinkOnce/Weak symbols, this has to be computed early be cause it
+  // impacts the caching.
+  //
+  // This is copied from `lib/LTO/ThinLTOCodeGenerator.cpp`
+  StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
+  DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy;
+  for (auto &I : Ret->Index) {
+    if (I.second.size() > 1)
+      PrevailingCopy[I.first] = getFirstDefinitionForLinker(I.second);
+  }
+  auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) {
+    const auto &Prevailing = PrevailingCopy.find(GUID);
+    if (Prevailing == PrevailingCopy.end())
+      return true;
+    return Prevailing->second == S;
+  };
+  auto recordNewLinkage = [&](StringRef ModuleIdentifier,
+                              GlobalValue::GUID GUID,
+                              GlobalValue::LinkageTypes NewLinkage) {
+    ResolvedODR[ModuleIdentifier][GUID] = NewLinkage;
+  };
+  thinLTOResolveWeakForLinkerInIndex(Ret->Index, isPrevailing, recordNewLinkage);
+  auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) {
+    const auto &ExportList = Ret->ExportLists.find(ModuleIdentifier);
+    return (ExportList != Ret->ExportLists.end() &&
+      ExportList->second.count(GUID)) ||
+      Ret->GUIDPreservedSymbols.count(GUID);
+  };
+  thinLTOInternalizeAndPromoteInIndex(Ret->Index, isExported);
+
+  return Ret.release();
+}
+
+extern "C" void
+LLVMRustFreeThinLTOData(LLVMRustThinLTOData *Data) {
+  delete Data;
+}
+
+// Below are the various passes that happen *per module* when doing ThinLTO.
+//
+// In other words, these are the functions that are all run concurrently
+// with one another, one per module. The passes here correspond to the analysis
+// passes in `lib/LTO/ThinLTOCodeGenerator.cpp`, currently found in the
+// `ProcessThinLTOModule` function. Here they're split up into separate steps
+// so rustc can save off the intermediate bytecode between each step.
+
+extern "C" bool
+LLVMRustPrepareThinLTORename(const LLVMRustThinLTOData *Data, LLVMModuleRef M) {
+  Module &Mod = *unwrap(M);
+  if (renameModuleForThinLTO(Mod, Data->Index)) {
+    LLVMRustSetLastError("renameModuleForThinLTO failed");
+    return false;
+  }
+  return true;
+}
+
+extern "C" bool
+LLVMRustPrepareThinLTOResolveWeak(const LLVMRustThinLTOData *Data, LLVMModuleRef M) {
+  Module &Mod = *unwrap(M);
+  const auto &DefinedGlobals = Data->ModuleToDefinedGVSummaries.lookup(Mod.getModuleIdentifier());
+  thinLTOResolveWeakForLinkerModule(Mod, DefinedGlobals);
+  return true;
+}
+
+extern "C" bool
+LLVMRustPrepareThinLTOInternalize(const LLVMRustThinLTOData *Data, LLVMModuleRef M) {
+  Module &Mod = *unwrap(M);
+  const auto &DefinedGlobals = Data->ModuleToDefinedGVSummaries.lookup(Mod.getModuleIdentifier());
+  thinLTOInternalizeModule(Mod, DefinedGlobals);
+  return true;
+}
+
+extern "C" bool
+LLVMRustPrepareThinLTOImport(const LLVMRustThinLTOData *Data, LLVMModuleRef M) {
+  Module &Mod = *unwrap(M);
+  const auto &ImportList = Data->ImportLists.lookup(Mod.getModuleIdentifier());
+  auto Loader = [&](StringRef Identifier) {
+    const auto &Memory = Data->ModuleMap.lookup(Identifier);
+    auto &Context = Mod.getContext();
+    return getLazyBitcodeModule(Memory, Context, true, true);
+  };
+  FunctionImporter Importer(Data->Index, Loader);
+  Expected<bool> Result = Importer.importFunctions(Mod, ImportList);
+  if (!Result) {
+    LLVMRustSetLastError(toString(Result.takeError()).c_str());
+    return false;
+  }
+  return true;
+}
+
+// This struct and various functions are sort of a hack right now, but the
+// problem is that we've got in-memory LLVM modules after we generate and
+// optimize all codegen-units for one compilation in rustc. To be compatible
+// with the LTO support above we need to serialize the modules plus their
+// ThinLTO summary into memory.
+//
+// This structure is basically an owned version of a serialize module, with
+// a ThinLTO summary attached.
+struct LLVMRustThinLTOBuffer {
+  std::string data;
+};
+
+extern "C" LLVMRustThinLTOBuffer*
+LLVMRustThinLTOBufferCreate(LLVMModuleRef M) {
+  auto Ret = llvm::make_unique<LLVMRustThinLTOBuffer>();
+  {
+    raw_string_ostream OS(Ret->data);
+    {
+      legacy::PassManager PM;
+      PM.add(createWriteThinLTOBitcodePass(OS));
+      PM.run(*unwrap(M));
+    }
+  }
+  return Ret.release();
+}
+
+extern "C" void
+LLVMRustThinLTOBufferFree(LLVMRustThinLTOBuffer *Buffer) {
+  delete Buffer;
+}
+
+extern "C" const void*
+LLVMRustThinLTOBufferPtr(const LLVMRustThinLTOBuffer *Buffer) {
+  return Buffer->data.data();
+}
+
+extern "C" size_t
+LLVMRustThinLTOBufferLen(const LLVMRustThinLTOBuffer *Buffer) {
+  return Buffer->data.length();
+}
+
+// This is what we used to parse upstream bitcode for actual ThinLTO
+// processing.  We'll call this once per module optimized through ThinLTO, and
+// it'll be called concurrently on many threads.
+extern "C" LLVMModuleRef
+LLVMRustParseBitcodeForThinLTO(LLVMContextRef Context,
+                               const char *data,
+                               size_t len,
+                               const char *identifier) {
+  StringRef Data(data, len);
+  MemoryBufferRef Buffer(Data, identifier);
+  unwrap(Context)->enableDebugTypeODRUniquing();
+  Expected<std::unique_ptr<Module>> SrcOrError =
+      parseBitcodeFile(Buffer, *unwrap(Context));
+  if (!SrcOrError) {
+    LLVMRustSetLastError(toString(SrcOrError.takeError()).c_str());
+    return nullptr;
+  }
+  return wrap(std::move(*SrcOrError).release());
+}
+
+#else
+
+extern "C" bool
+LLVMRustWriteThinBitcodeToFile(LLVMPassManagerRef PMR,
+                               LLVMModuleRef M,
+                               const char *BcFile) {
+  llvm_unreachable("ThinLTO not available");
+}
+
+struct LLVMRustThinLTOData {
+};
+
+struct LLVMRustThinLTOModule {
+};
+
+extern "C" LLVMRustThinLTOData*
+LLVMRustCreateThinLTOData(LLVMRustThinLTOModule *modules,
+                          int num_modules,
+                          const char **preserved_symbols,
+                          int num_symbols) {
+  llvm_unreachable("ThinLTO not available");
+}
+
+extern "C" bool
+LLVMRustPrepareThinLTORename(const LLVMRustThinLTOData *Data, LLVMModuleRef M) {
+  llvm_unreachable("ThinLTO not available");
+}
+
+extern "C" bool
+LLVMRustPrepareThinLTOResolveWeak(const LLVMRustThinLTOData *Data, LLVMModuleRef M) {
+  llvm_unreachable("ThinLTO not available");
+}
+
+extern "C" bool
+LLVMRustPrepareThinLTOInternalize(const LLVMRustThinLTOData *Data, LLVMModuleRef M) {
+  llvm_unreachable("ThinLTO not available");
+}
+
+extern "C" bool
+LLVMRustPrepareThinLTOImport(const LLVMRustThinLTOData *Data, LLVMModuleRef M) {
+  llvm_unreachable("ThinLTO not available");
+}
+
+extern "C" void
+LLVMRustFreeThinLTOData(LLVMRustThinLTOData *Data) {
+  llvm_unreachable("ThinLTO not available");
+}
+
+struct LLVMRustThinLTOBuffer {
+};
+
+extern "C" LLVMRustThinLTOBuffer*
+LLVMRustThinLTOBufferCreate(LLVMModuleRef M) {
+  llvm_unreachable("ThinLTO not available");
+}
+
+extern "C" void
+LLVMRustThinLTOBufferFree(LLVMRustThinLTOBuffer *Buffer) {
+  llvm_unreachable("ThinLTO not available");
+}
+
+extern "C" const void*
+LLVMRustThinLTOBufferPtr(const LLVMRustThinLTOBuffer *Buffer) {
+  llvm_unreachable("ThinLTO not available");
+}
+
+extern "C" size_t
+LLVMRustThinLTOBufferLen(const LLVMRustThinLTOBuffer *Buffer) {
+  llvm_unreachable("ThinLTO not available");
+}
+
+extern "C" LLVMModuleRef
+LLVMRustParseBitcodeForThinLTO(LLVMContextRef Context,
+                               const char *data,
+                               size_t len,
+                               const char *identifier) {
+  llvm_unreachable("ThinLTO not available");
+}
+#endif // LLVM_VERSION_GE(4, 0)
diff --git a/src/test/codegen-units/item-collection/drop_in_place_intrinsic.rs b/src/test/codegen-units/item-collection/drop_in_place_intrinsic.rs
index d8e6028b799..bd1325e7501 100644
--- a/src/test/codegen-units/item-collection/drop_in_place_intrinsic.rs
+++ b/src/test/codegen-units/item-collection/drop_in_place_intrinsic.rs
@@ -11,7 +11,7 @@
 // ignore-tidy-linelength
 // compile-flags:-Zprint-trans-items=eager
 
-//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<drop_in_place_intrinsic::StructWithDtor[0]> @@ drop_in_place_intrinsic.cgu-0[Internal]
+//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<drop_in_place_intrinsic::StructWithDtor[0]> @@ drop_in_place_intrinsic0[Internal]
 struct StructWithDtor(u32);
 
 impl Drop for StructWithDtor {
@@ -22,7 +22,7 @@ impl Drop for StructWithDtor {
 //~ TRANS_ITEM fn drop_in_place_intrinsic::main[0]
 fn main() {
 
-    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<[drop_in_place_intrinsic::StructWithDtor[0]; 2]> @@ drop_in_place_intrinsic.cgu-0[Internal]
+    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<[drop_in_place_intrinsic::StructWithDtor[0]; 2]> @@ drop_in_place_intrinsic0[Internal]
     let x = [StructWithDtor(0), StructWithDtor(1)];
 
     drop_slice_in_place(&x);
@@ -34,7 +34,7 @@ fn drop_slice_in_place(x: &[StructWithDtor]) {
         // This is the interesting thing in this test case: Normally we would
         // not have drop-glue for the unsized [StructWithDtor]. This has to be
         // generated though when the drop_in_place() intrinsic is used.
-        //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<[drop_in_place_intrinsic::StructWithDtor[0]]> @@ drop_in_place_intrinsic.cgu-0[Internal]
+        //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<[drop_in_place_intrinsic::StructWithDtor[0]]> @@ drop_in_place_intrinsic0[Internal]
         ::std::ptr::drop_in_place(x as *const _ as *mut [StructWithDtor]);
     }
 }
diff --git a/src/test/codegen-units/item-collection/generic-drop-glue.rs b/src/test/codegen-units/item-collection/generic-drop-glue.rs
index 06e02b10015..108a8b570de 100644
--- a/src/test/codegen-units/item-collection/generic-drop-glue.rs
+++ b/src/test/codegen-units/item-collection/generic-drop-glue.rs
@@ -45,7 +45,7 @@ enum EnumNoDrop<T1, T2> {
 struct NonGenericNoDrop(i32);
 
 struct NonGenericWithDrop(i32);
-//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<generic_drop_glue::NonGenericWithDrop[0]> @@ generic_drop_glue.cgu-0[Internal]
+//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<generic_drop_glue::NonGenericWithDrop[0]> @@ generic_drop_glue0[Internal]
 
 impl Drop for NonGenericWithDrop {
     //~ TRANS_ITEM fn generic_drop_glue::{{impl}}[2]::drop[0]
@@ -54,11 +54,11 @@ impl Drop for NonGenericWithDrop {
 
 //~ TRANS_ITEM fn generic_drop_glue::main[0]
 fn main() {
-    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<generic_drop_glue::StructWithDrop[0]<i8, char>> @@ generic_drop_glue.cgu-0[Internal]
+    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<generic_drop_glue::StructWithDrop[0]<i8, char>> @@ generic_drop_glue0[Internal]
     //~ TRANS_ITEM fn generic_drop_glue::{{impl}}[0]::drop[0]<i8, char>
     let _ = StructWithDrop { x: 0i8, y: 'a' }.x;
 
-    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<generic_drop_glue::StructWithDrop[0]<&str, generic_drop_glue::NonGenericNoDrop[0]>> @@ generic_drop_glue.cgu-0[Internal]
+    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<generic_drop_glue::StructWithDrop[0]<&str, generic_drop_glue::NonGenericNoDrop[0]>> @@ generic_drop_glue0[Internal]
     //~ TRANS_ITEM fn generic_drop_glue::{{impl}}[0]::drop[0]<&str, generic_drop_glue::NonGenericNoDrop[0]>
     let _ = StructWithDrop { x: "&str", y: NonGenericNoDrop(0) }.y;
 
@@ -67,17 +67,17 @@ fn main() {
 
     // This is supposed to generate drop-glue because it contains a field that
     // needs to be dropped.
-    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<generic_drop_glue::StructNoDrop[0]<generic_drop_glue::NonGenericWithDrop[0], f64>> @@ generic_drop_glue.cgu-0[Internal]
+    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<generic_drop_glue::StructNoDrop[0]<generic_drop_glue::NonGenericWithDrop[0], f64>> @@ generic_drop_glue0[Internal]
     let _ = StructNoDrop { x: NonGenericWithDrop(0), y: 0f64 }.y;
 
-    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<generic_drop_glue::EnumWithDrop[0]<i32, i64>> @@ generic_drop_glue.cgu-0[Internal]
+    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<generic_drop_glue::EnumWithDrop[0]<i32, i64>> @@ generic_drop_glue0[Internal]
     //~ TRANS_ITEM fn generic_drop_glue::{{impl}}[1]::drop[0]<i32, i64>
     let _ = match EnumWithDrop::A::<i32, i64>(0) {
         EnumWithDrop::A(x) => x,
         EnumWithDrop::B(x) => x as i32
     };
 
-    //~TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<generic_drop_glue::EnumWithDrop[0]<f64, f32>> @@ generic_drop_glue.cgu-0[Internal]
+    //~TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<generic_drop_glue::EnumWithDrop[0]<f64, f32>> @@ generic_drop_glue0[Internal]
     //~ TRANS_ITEM fn generic_drop_glue::{{impl}}[1]::drop[0]<f64, f32>
     let _ = match EnumWithDrop::B::<f64, f32>(1.0) {
         EnumWithDrop::A(x) => x,
diff --git a/src/test/codegen-units/item-collection/instantiation-through-vtable.rs b/src/test/codegen-units/item-collection/instantiation-through-vtable.rs
index 9c6bdb6624e..875cacb3907 100644
--- a/src/test/codegen-units/item-collection/instantiation-through-vtable.rs
+++ b/src/test/codegen-units/item-collection/instantiation-through-vtable.rs
@@ -31,13 +31,13 @@ impl<T> Trait for Struct<T> {
 fn main() {
     let s1 = Struct { _a: 0u32 };
 
-    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<instantiation_through_vtable::Struct[0]<u32>> @@ instantiation_through_vtable.cgu-0[Internal]
+    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<instantiation_through_vtable::Struct[0]<u32>> @@ instantiation_through_vtable0[Internal]
     //~ TRANS_ITEM fn instantiation_through_vtable::{{impl}}[0]::foo[0]<u32>
     //~ TRANS_ITEM fn instantiation_through_vtable::{{impl}}[0]::bar[0]<u32>
     let _ = &s1 as &Trait;
 
     let s1 = Struct { _a: 0u64 };
-    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<instantiation_through_vtable::Struct[0]<u64>> @@ instantiation_through_vtable.cgu-0[Internal]
+    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<instantiation_through_vtable::Struct[0]<u64>> @@ instantiation_through_vtable0[Internal]
     //~ TRANS_ITEM fn instantiation_through_vtable::{{impl}}[0]::foo[0]<u64>
     //~ TRANS_ITEM fn instantiation_through_vtable::{{impl}}[0]::bar[0]<u64>
     let _ = &s1 as &Trait;
diff --git a/src/test/codegen-units/item-collection/non-generic-drop-glue.rs b/src/test/codegen-units/item-collection/non-generic-drop-glue.rs
index 5f70ff396dd..a6081b2c5eb 100644
--- a/src/test/codegen-units/item-collection/non-generic-drop-glue.rs
+++ b/src/test/codegen-units/item-collection/non-generic-drop-glue.rs
@@ -13,7 +13,7 @@
 
 #![deny(dead_code)]
 
-//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<non_generic_drop_glue::StructWithDrop[0]> @@ non_generic_drop_glue.cgu-0[Internal]
+//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<non_generic_drop_glue::StructWithDrop[0]> @@ non_generic_drop_glue0[Internal]
 struct StructWithDrop {
     x: i32
 }
@@ -27,7 +27,7 @@ struct StructNoDrop {
     x: i32
 }
 
-//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<non_generic_drop_glue::EnumWithDrop[0]> @@ non_generic_drop_glue.cgu-0[Internal]
+//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<non_generic_drop_glue::EnumWithDrop[0]> @@ non_generic_drop_glue0[Internal]
 enum EnumWithDrop {
     A(i32)
 }
diff --git a/src/test/codegen-units/item-collection/transitive-drop-glue.rs b/src/test/codegen-units/item-collection/transitive-drop-glue.rs
index e41cb34eec6..8eef5f00f2a 100644
--- a/src/test/codegen-units/item-collection/transitive-drop-glue.rs
+++ b/src/test/codegen-units/item-collection/transitive-drop-glue.rs
@@ -13,11 +13,11 @@
 
 #![deny(dead_code)]
 
-//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::Root[0]> @@ transitive_drop_glue.cgu-0[Internal]
+//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::Root[0]> @@ transitive_drop_glue0[Internal]
 struct Root(Intermediate);
-//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::Intermediate[0]> @@ transitive_drop_glue.cgu-0[Internal]
+//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::Intermediate[0]> @@ transitive_drop_glue0[Internal]
 struct Intermediate(Leaf);
-//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::Leaf[0]> @@ transitive_drop_glue.cgu-0[Internal]
+//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::Leaf[0]> @@ transitive_drop_glue0[Internal]
 struct Leaf;
 
 impl Drop for Leaf {
@@ -38,15 +38,15 @@ fn main() {
 
     let _ = Root(Intermediate(Leaf));
 
-    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::RootGen[0]<u32>> @@ transitive_drop_glue.cgu-0[Internal]
-    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::IntermediateGen[0]<u32>> @@ transitive_drop_glue.cgu-0[Internal]
-    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::LeafGen[0]<u32>> @@ transitive_drop_glue.cgu-0[Internal]
+    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::RootGen[0]<u32>> @@ transitive_drop_glue0[Internal]
+    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::IntermediateGen[0]<u32>> @@ transitive_drop_glue0[Internal]
+    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::LeafGen[0]<u32>> @@ transitive_drop_glue0[Internal]
     //~ TRANS_ITEM fn transitive_drop_glue::{{impl}}[1]::drop[0]<u32>
     let _ = RootGen(IntermediateGen(LeafGen(0u32)));
 
-    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::RootGen[0]<i16>> @@ transitive_drop_glue.cgu-0[Internal]
-    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::IntermediateGen[0]<i16>> @@ transitive_drop_glue.cgu-0[Internal]
-    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::LeafGen[0]<i16>> @@ transitive_drop_glue.cgu-0[Internal]
+    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::RootGen[0]<i16>> @@ transitive_drop_glue0[Internal]
+    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::IntermediateGen[0]<i16>> @@ transitive_drop_glue0[Internal]
+    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::LeafGen[0]<i16>> @@ transitive_drop_glue0[Internal]
     //~ TRANS_ITEM fn transitive_drop_glue::{{impl}}[1]::drop[0]<i16>
     let _ = RootGen(IntermediateGen(LeafGen(0i16)));
 }
diff --git a/src/test/codegen-units/item-collection/tuple-drop-glue.rs b/src/test/codegen-units/item-collection/tuple-drop-glue.rs
index 39043cf87cb..7edb1c14525 100644
--- a/src/test/codegen-units/item-collection/tuple-drop-glue.rs
+++ b/src/test/codegen-units/item-collection/tuple-drop-glue.rs
@@ -13,7 +13,7 @@
 
 #![deny(dead_code)]
 
-//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<tuple_drop_glue::Dropped[0]> @@ tuple_drop_glue.cgu-0[Internal]
+//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<tuple_drop_glue::Dropped[0]> @@ tuple_drop_glue0[Internal]
 struct Dropped;
 
 impl Drop for Dropped {
@@ -23,10 +23,10 @@ impl Drop for Dropped {
 
 //~ TRANS_ITEM fn tuple_drop_glue::main[0]
 fn main() {
-    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<(u32, tuple_drop_glue::Dropped[0])> @@ tuple_drop_glue.cgu-0[Internal]
+    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<(u32, tuple_drop_glue::Dropped[0])> @@ tuple_drop_glue0[Internal]
     let x = (0u32, Dropped);
 
-    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<(i16, (tuple_drop_glue::Dropped[0], bool))> @@ tuple_drop_glue.cgu-0[Internal]
-    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<(tuple_drop_glue::Dropped[0], bool)> @@ tuple_drop_glue.cgu-0[Internal]
+    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<(i16, (tuple_drop_glue::Dropped[0], bool))> @@ tuple_drop_glue0[Internal]
+    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<(tuple_drop_glue::Dropped[0], bool)> @@ tuple_drop_glue0[Internal]
     let x = (0i16, (Dropped, true));
 }
diff --git a/src/test/codegen-units/item-collection/unsizing.rs b/src/test/codegen-units/item-collection/unsizing.rs
index de7613741b2..cf0c6643238 100644
--- a/src/test/codegen-units/item-collection/unsizing.rs
+++ b/src/test/codegen-units/item-collection/unsizing.rs
@@ -57,13 +57,13 @@ fn main()
 {
     // simple case
     let bool_sized = &true;
-    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<bool> @@ unsizing.cgu-0[Internal]
+    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<bool> @@ unsizing0[Internal]
     //~ TRANS_ITEM fn unsizing::{{impl}}[0]::foo[0]
     let _bool_unsized = bool_sized as &Trait;
 
     let char_sized = &'a';
 
-    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<char> @@ unsizing.cgu-0[Internal]
+    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<char> @@ unsizing0[Internal]
     //~ TRANS_ITEM fn unsizing::{{impl}}[1]::foo[0]
     let _char_unsized = char_sized as &Trait;
 
@@ -73,13 +73,13 @@ fn main()
         _b: 2,
         _c: 3.0f64
     };
-    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<f64> @@ unsizing.cgu-0[Internal]
+    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<f64> @@ unsizing0[Internal]
     //~ TRANS_ITEM fn unsizing::{{impl}}[2]::foo[0]
     let _struct_unsized = struct_sized as &Struct<Trait>;
 
     // custom coercion
     let wrapper_sized = Wrapper(&0u32);
-    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<u32> @@ unsizing.cgu-0[Internal]
+    //~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<u32> @@ unsizing0[Internal]
     //~ TRANS_ITEM fn unsizing::{{impl}}[3]::foo[0]
     let _wrapper_sized = wrapper_sized as Wrapper<Trait>;
 }
diff --git a/src/test/run-make/extra-filename-with-temp-outputs/Makefile b/src/test/run-make/extra-filename-with-temp-outputs/Makefile
index d33c18a6f3c..13ca397eaf2 100644
--- a/src/test/run-make/extra-filename-with-temp-outputs/Makefile
+++ b/src/test/run-make/extra-filename-with-temp-outputs/Makefile
@@ -2,5 +2,5 @@
 
 all:
 	$(RUSTC) -C extra-filename=bar foo.rs -C save-temps
-	rm $(TMPDIR)/foobar.0.o
+	rm $(TMPDIR)/foobar.foo0.rust-cgu.o
 	rm $(TMPDIR)/$(call BIN,foobar)
diff --git a/src/test/run-make/sepcomp-cci-copies/Makefile b/src/test/run-make/sepcomp-cci-copies/Makefile
index 189088219d5..8324a074d6c 100644
--- a/src/test/run-make/sepcomp-cci-copies/Makefile
+++ b/src/test/run-make/sepcomp-cci-copies/Makefile
@@ -6,4 +6,4 @@
 all:
 	$(RUSTC) cci_lib.rs
 	$(RUSTC) foo.rs --emit=llvm-ir -C codegen-units=3
-	[ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c define\ .*cci_fn)" -eq "2" ]
+	[ "$$(cat "$(TMPDIR)"/foo.*.ll | grep -c define\ .*cci_fn)" -eq "2" ]
diff --git a/src/test/run-make/sepcomp-inlining/Makefile b/src/test/run-make/sepcomp-inlining/Makefile
index 720dfff2c04..6dc837b8a78 100644
--- a/src/test/run-make/sepcomp-inlining/Makefile
+++ b/src/test/run-make/sepcomp-inlining/Makefile
@@ -8,7 +8,7 @@
 
 all:
 	$(RUSTC) foo.rs --emit=llvm-ir -C codegen-units=3
-	[ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c define\ i32\ .*inlined)" -eq "0" ]
-	[ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c define\ internal\ i32\ .*inlined)" -eq "2" ]
-	[ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c define\ hidden\ i32\ .*normal)" -eq "1" ]
-	[ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c declare\ hidden\ i32\ .*normal)" -eq "2" ]
+	[ "$$(cat "$(TMPDIR)"/foo.*.ll | grep -c define\ i32\ .*inlined)" -eq "0" ]
+	[ "$$(cat "$(TMPDIR)"/foo.*.ll | grep -c define\ internal\ i32\ .*inlined)" -eq "2" ]
+	[ "$$(cat "$(TMPDIR)"/foo.*.ll | grep -c define\ hidden\ i32\ .*normal)" -eq "1" ]
+	[ "$$(cat "$(TMPDIR)"/foo.*.ll | grep -c declare\ hidden\ i32\ .*normal)" -eq "2" ]
diff --git a/src/test/run-make/sepcomp-separate/Makefile b/src/test/run-make/sepcomp-separate/Makefile
index a475bdfd74a..5b8bdb0fad8 100644
--- a/src/test/run-make/sepcomp-separate/Makefile
+++ b/src/test/run-make/sepcomp-separate/Makefile
@@ -6,4 +6,4 @@
 
 all:
 	$(RUSTC) foo.rs --emit=llvm-ir -C codegen-units=3
-	[ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c define\ .*magic_fn)" -eq "3" ]
+	[ "$$(cat "$(TMPDIR)"/foo.*.ll | grep -c define\ .*magic_fn)" -eq "3" ]
diff --git a/src/test/run-pass/auxiliary/thin-lto-inlines-aux.rs b/src/test/run-pass/auxiliary/thin-lto-inlines-aux.rs
new file mode 100644
index 00000000000..ccbb0e7a718
--- /dev/null
+++ b/src/test/run-pass/auxiliary/thin-lto-inlines-aux.rs
@@ -0,0 +1,17 @@
+// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// no-prefer-dynamic
+
+#![crate_type = "rlib"]
+
+pub fn bar() -> u32 {
+    3
+}
diff --git a/src/test/run-pass/thin-lto-inlines.rs b/src/test/run-pass/thin-lto-inlines.rs
new file mode 100644
index 00000000000..3135a682d86
--- /dev/null
+++ b/src/test/run-pass/thin-lto-inlines.rs
@@ -0,0 +1,39 @@
+// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// compile-flags: -Z thinlto -C codegen-units=8 -O
+// min-llvm-version 4.0
+// ignore-emscripten
+
+// We want to assert here that ThinLTO will inline across codegen units. There's
+// not really a great way to do that in general so we sort of hack around it by
+// praying two functions go into separate codegen units and then assuming that
+// if inlining *doesn't* happen the first byte of the functions will differ.
+
+pub fn foo() -> u32 {
+    bar::bar()
+}
+
+mod bar {
+    pub fn bar() -> u32 {
+        3
+    }
+}
+
+fn main() {
+    println!("{} {}", foo(), bar::bar());
+
+    unsafe {
+        let foo = foo as usize as *const u8;
+        let bar = bar::bar as usize as *const u8;
+
+        assert_eq!(*foo, *bar);
+    }
+}
diff --git a/src/test/run-pass/thin-lto-inlines2.rs b/src/test/run-pass/thin-lto-inlines2.rs
new file mode 100644
index 00000000000..ed899d2b115
--- /dev/null
+++ b/src/test/run-pass/thin-lto-inlines2.rs
@@ -0,0 +1,38 @@
+// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// compile-flags: -Z thinlto -C codegen-units=8 -O -C lto
+// aux-build:thin-lto-inlines-aux.rs
+// min-llvm-version 4.0
+// no-prefer-dynamic
+// ignore-emscripten
+
+// We want to assert here that ThinLTO will inline across codegen units. There's
+// not really a great way to do that in general so we sort of hack around it by
+// praying two functions go into separate codegen units and then assuming that
+// if inlining *doesn't* happen the first byte of the functions will differ.
+
+extern crate thin_lto_inlines_aux as bar;
+
+pub fn foo() -> u32 {
+    bar::bar()
+}
+
+fn main() {
+    println!("{} {}", foo(), bar::bar());
+
+    unsafe {
+        let foo = foo as usize as *const u8;
+        let bar = bar::bar as usize as *const u8;
+
+        assert_eq!(*foo, *bar);
+    }
+}
+