Rollup merge of #58378 - alexcrichton:incremental-lto, r=michaelwoerister

rustc: Implement incremental "fat" LTO Currently the compiler will produce an error if both incremental compilation and full fat LTO is requested. With recent changes and the advent of incremental ThinLTO, however, all the hard work is already done for us and it's actually not too bad to remove this error! This commit updates the codegen backend to allow incremental full fat LTO. The semantics are that the input modules to LTO are all produce incrementally, but the final LTO step is always done unconditionally regardless of whether the inputs changed or not. The only real incremental win we could have here is if zero of the input modules changed, but that's so rare it's unlikely to be worthwhile to implement such a code path. cc #57968 cc rust-lang/cargo#6643
author: Mazdak Farrokhzad <twingoow@gmail.com> 2019-02-14 08:24:15 +0100
committer: GitHub <noreply@github.com> 2019-02-14 08:24:15 +0100
commit: 2a539a1b91c5ac17e6445d23e49c3f69d50d1ab6 (patch)
tree: 190f31969952487440ba3fdc4f9e54e03b6137e2 /src/librustc_codegen_llvm
parent: 0dbb867beb187119bd0bca1f05de6b00cd72f141 (diff)
parent: e983b4f64ee6d919a60938b6e7371a66877f4a23 (diff)
download: rust-2a539a1b91c5ac17e6445d23e49c3f69d50d1ab6.tar.gz
rust-2a539a1b91c5ac17e6445d23e49c3f69d50d1ab6.zip
3 files changed, 137 insertions, 44 deletions
diff --git a/src/librustc_codegen_llvm/back/lto.rs b/src/librustc_codegen_llvm/back/lto.rs
index ac55244d8d9..fc3ab7f734a 100644
--- a/src/librustc_codegen_llvm/back/lto.rs
+++ b/src/librustc_codegen_llvm/back/lto.rs
@@ -1,6 +1,6 @@
 use back::bytecode::{DecodedBytecode, RLIB_BYTECODE_EXTENSION};
 use rustc_codegen_ssa::back::symbol_export;
-use rustc_codegen_ssa::back::write::{ModuleConfig, CodegenContext, pre_lto_bitcode_filename};
+use rustc_codegen_ssa::back::write::{ModuleConfig, CodegenContext, FatLTOInput};
 use rustc_codegen_ssa::back::lto::{SerializedModule, LtoModuleCodegen, ThinShared, ThinModule};
 use rustc_codegen_ssa::traits::*;
 use back::write::{self, DiagnosticHandlers, with_llvm_pmb, save_temp_bitcode, to_llvm_opt_settings};
@@ -21,7 +21,6 @@ use rustc_codegen_ssa::{ModuleCodegen, ModuleKind};
 use libc;
 
 use std::ffi::{CStr, CString};
-use std::fs;
 use std::ptr;
 use std::slice;
 use std::sync::Arc;
@@ -133,7 +132,8 @@ fn prepare_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
 /// Performs fat LTO by merging all modules into a single one and returning it
 /// for further optimization.
 pub(crate) fn run_fat(cgcx: &CodegenContext<LlvmCodegenBackend>,
-                      modules: Vec<ModuleCodegen<ModuleLlvm>>,
+                      modules: Vec<FatLTOInput<LlvmCodegenBackend>>,
+                      cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
                       timeline: &mut Timeline)
     -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError>
 {
@@ -142,7 +142,15 @@ pub(crate) fn run_fat(cgcx: &CodegenContext<LlvmCodegenBackend>,
     let symbol_white_list = symbol_white_list.iter()
                                              .map(|c| c.as_ptr())
                                              .collect::<Vec<_>>();
-    fat_lto(cgcx, &diag_handler, modules, upstream_modules, &symbol_white_list, timeline)
+    fat_lto(
+        cgcx,
+        &diag_handler,
+        modules,
+        cached_modules,
+        upstream_modules,
+        &symbol_white_list,
+        timeline,
+    )
 }
 
 /// Performs thin LTO by performing necessary global analysis and returning two
@@ -173,33 +181,17 @@ pub(crate) fn run_thin(cgcx: &CodegenContext<LlvmCodegenBackend>,
 }
 
 pub(crate) fn prepare_thin(
-    cgcx: &CodegenContext<LlvmCodegenBackend>,
     module: ModuleCodegen<ModuleLlvm>
 ) -> (String, ThinBuffer) {
     let name = module.name.clone();
     let buffer = ThinBuffer::new(module.module_llvm.llmod());
-
-    // We emit the module after having serialized it into a ThinBuffer
-    // because only then it will contain the ThinLTO module summary.
-    if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir {
-        if cgcx.config(module.kind).emit_pre_thin_lto_bc {
-            let path = incr_comp_session_dir
-                .join(pre_lto_bitcode_filename(&name));
-
-            fs::write(&path, buffer.data()).unwrap_or_else(|e| {
-                panic!("Error writing pre-lto-bitcode file `{}`: {}",
-                       path.display(),
-                       e);
-            });
-        }
-    }
-
     (name, buffer)
 }
 
 fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
            diag_handler: &Handler,
-           mut modules: Vec<ModuleCodegen<ModuleLlvm>>,
+           mut modules: Vec<FatLTOInput<LlvmCodegenBackend>>,
+           cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
            mut serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
            symbol_white_list: &[*const libc::c_char],
            timeline: &mut Timeline)
@@ -216,8 +208,14 @@ fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
     // file copy operations in the backend work correctly. The only other kind
     // of module here should be an allocator one, and if your crate is smaller
     // than the allocator module then the size doesn't really matter anyway.
-    let (_, costliest_module) = modules.iter()
+    let costliest_module = modules.iter()
         .enumerate()
+        .filter_map(|(i, module)| {
+            match module {
+                FatLTOInput::InMemory(m) => Some((i, m)),
+                FatLTOInput::Serialized { .. } => None,
+            }
+        })
         .filter(|&(_, module)| module.kind == ModuleKind::Regular)
         .map(|(i, module)| {
             let cost = unsafe {
@@ -225,9 +223,38 @@ fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
             };
             (cost, i)
         })
-        .max()
-        .expect("must be codegen'ing at least one module");
-    let module = modules.remove(costliest_module);
+        .max();
+
+    // If we found a costliest module, we're good to go. Otherwise all our
+    // inputs were serialized which could happen in the case, for example, that
+    // all our inputs were incrementally reread from the cache and we're just
+    // re-executing the LTO passes. If that's the case deserialize the first
+    // module and create a linker with it.
+    let module: ModuleCodegen<ModuleLlvm> = match costliest_module {
+        Some((_cost, i)) => {
+            match modules.remove(i) {
+                FatLTOInput::InMemory(m) => m,
+                FatLTOInput::Serialized { .. } => unreachable!(),
+            }
+        }
+        None => {
+            let pos = modules.iter().position(|m| {
+                match m {
+                    FatLTOInput::InMemory(_) => false,
+                    FatLTOInput::Serialized { .. } => true,
+                }
+            }).expect("must have at least one serialized module");
+            let (name, buffer) = match modules.remove(pos) {
+                FatLTOInput::Serialized { name, buffer } => (name, buffer),
+                FatLTOInput::InMemory(_) => unreachable!(),
+            };
+            ModuleCodegen {
+                module_llvm: ModuleLlvm::parse(cgcx, &name, &buffer, diag_handler)?,
+                name,
+                kind: ModuleKind::Regular,
+            }
+        }
+    };
     let mut serialized_bitcode = Vec::new();
     {
         let (llcx, llmod) = {
@@ -247,10 +274,20 @@ fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
         // way we know of to do that is to serialize them to a string and them parse
         // them later. Not great but hey, that's why it's "fat" LTO, right?
         serialized_modules.extend(modules.into_iter().map(|module| {
-            let buffer = ModuleBuffer::new(module.module_llvm.llmod());
-            let llmod_id = CString::new(&module.name[..]).unwrap();
-
-            (SerializedModule::Local(buffer), llmod_id)
+            match module {
+                FatLTOInput::InMemory(module) => {
+                    let buffer = ModuleBuffer::new(module.module_llvm.llmod());
+                    let llmod_id = CString::new(&module.name[..]).unwrap();
+                    (SerializedModule::Local(buffer), llmod_id)
+                }
+                FatLTOInput::Serialized { name, buffer } => {
+                    let llmod_id = CString::new(name).unwrap();
+                    (SerializedModule::Local(buffer), llmod_id)
+                }
+            }
+        }));
+        serialized_modules.extend(cached_modules.into_iter().map(|(buffer, wp)| {
+            (buffer, CString::new(wp.cgu_name.clone()).unwrap())
         }));
 
         // For all serialized bitcode files we parse them and link them in as we did
@@ -579,6 +616,16 @@ impl ModuleBuffer {
             llvm::LLVMRustModuleBufferCreate(m)
         })
     }
+
+    pub fn parse<'a>(
+        &self,
+        name: &str,
+        cx: &'a llvm::Context,
+        handler: &Handler,
+    ) -> Result<&'a llvm::Module, FatalError> {
+        let name = CString::new(name).unwrap();
+        parse_module(cx, &name, self.data(), handler)
+    }
 }
 
 impl ModuleBufferMethods for ModuleBuffer {
@@ -658,15 +705,12 @@ pub unsafe fn optimize_thin_module(
     // crates but for locally codegened modules we may be able to reuse
     // that LLVM Context and Module.
     let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names);
-    let llmod_raw = llvm::LLVMRustParseBitcodeForThinLTO(
+    let llmod_raw = parse_module(
         llcx,
-        thin_module.data().as_ptr(),
-        thin_module.data().len(),
-        thin_module.shared.module_names[thin_module.idx].as_ptr(),
-    ).ok_or_else(|| {
-        let msg = "failed to parse bitcode for thin LTO module";
-        write::llvm_err(&diag_handler, msg)
-    })? as *const _;
+        &thin_module.shared.module_names[thin_module.idx],
+        thin_module.data(),
+        &diag_handler,
+    )? as *const _;
     let module = ModuleCodegen {
         module_llvm: ModuleLlvm {
             llmod_raw,
@@ -823,3 +867,22 @@ fn module_name_to_str(c_str: &CStr) -> &str {
     c_str.to_str().unwrap_or_else(|e|
         bug!("Encountered non-utf8 LLVM module name `{}`: {}", c_str.to_string_lossy(), e))
 }
+
+fn parse_module<'a>(
+    cx: &'a llvm::Context,
+    name: &CStr,
+    data: &[u8],
+    diag_handler: &Handler,
+) -> Result<&'a llvm::Module, FatalError> {
+    unsafe {
+        llvm::LLVMRustParseBitcodeForLTO(
+            cx,
+            data.as_ptr(),
+            data.len(),
+            name.as_ptr(),
+        ).ok_or_else(|| {
+            let msg = "failed to parse bitcode for LTO module";
+            write::llvm_err(&diag_handler, msg)
+        })
+    }
+}
diff --git a/src/librustc_codegen_llvm/lib.rs b/src/librustc_codegen_llvm/lib.rs
index ad8db25ee95..b605badc153 100644
--- a/src/librustc_codegen_llvm/lib.rs
+++ b/src/librustc_codegen_llvm/lib.rs
@@ -54,7 +54,7 @@ extern crate tempfile;
 extern crate memmap;
 
 use rustc_codegen_ssa::traits::*;
-use rustc_codegen_ssa::back::write::{CodegenContext, ModuleConfig};
+use rustc_codegen_ssa::back::write::{CodegenContext, ModuleConfig, FatLTOInput};
 use rustc_codegen_ssa::back::lto::{SerializedModule, LtoModuleCodegen, ThinModule};
 use rustc_codegen_ssa::CompiledModule;
 use errors::{FatalError, Handler};
@@ -165,10 +165,11 @@ impl WriteBackendMethods for LlvmCodegenBackend {
     }
     fn run_fat_lto(
         cgcx: &CodegenContext<Self>,
-        modules: Vec<ModuleCodegen<Self::Module>>,
+        modules: Vec<FatLTOInput<Self>>,
+        cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
         timeline: &mut Timeline
     ) -> Result<LtoModuleCodegen<Self>, FatalError> {
-        back::lto::run_fat(cgcx, modules, timeline)
+        back::lto::run_fat(cgcx, modules, cached_modules, timeline)
     }
     fn run_thin_lto(
         cgcx: &CodegenContext<Self>,
@@ -204,10 +205,14 @@ impl WriteBackendMethods for LlvmCodegenBackend {
         back::write::codegen(cgcx, diag_handler, module, config, timeline)
     }
     fn prepare_thin(
-        cgcx: &CodegenContext<Self>,
         module: ModuleCodegen<Self::Module>
     ) -> (String, Self::ThinBuffer) {
-        back::lto::prepare_thin(cgcx, module)
+        back::lto::prepare_thin(module)
+    }
+    fn serialize_module(
+        module: ModuleCodegen<Self::Module>
+    ) -> (String, Self::ModuleBuffer) {
+        (module.name, back::lto::ModuleBuffer::new(module.module_llvm.llmod()))
     }
     fn run_lto_pass_manager(
         cgcx: &CodegenContext<Self>,
@@ -375,6 +380,31 @@ impl ModuleLlvm {
         }
     }
 
+    fn parse(
+        cgcx: &CodegenContext<LlvmCodegenBackend>,
+        name: &str,
+        buffer: &back::lto::ModuleBuffer,
+        handler: &Handler,
+    ) -> Result<Self, FatalError> {
+        unsafe {
+            let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names);
+            let llmod_raw = buffer.parse(name, llcx, handler)?;
+            let tm = match (cgcx.tm_factory.0)() {
+                Ok(m) => m,
+                Err(e) => {
+                    handler.struct_err(&e).emit();
+                    return Err(FatalError)
+                }
+            };
+
+            Ok(ModuleLlvm {
+                llmod_raw,
+                llcx,
+                tm,
+            })
+        }
+    }
+
     fn llmod(&self) -> &llvm::Module {
         unsafe {
             &*self.llmod_raw
diff --git a/src/librustc_codegen_llvm/llvm/ffi.rs b/src/librustc_codegen_llvm/llvm/ffi.rs
index 3232f4e8f51..52292f67b33 100644
--- a/src/librustc_codegen_llvm/llvm/ffi.rs
+++ b/src/librustc_codegen_llvm/llvm/ffi.rs
@@ -1804,7 +1804,7 @@ extern "C" {
         CallbackPayload: *mut c_void,
     );
     pub fn LLVMRustFreeThinLTOData(Data: &'static mut ThinLTOData);
-    pub fn LLVMRustParseBitcodeForThinLTO(
+    pub fn LLVMRustParseBitcodeForLTO(
         Context: &Context,
         Data: *const u8,
         len: usize,
author	Mazdak Farrokhzad <twingoow@gmail.com>	2019-02-14 08:24:15 +0100
committer	GitHub <noreply@github.com>	2019-02-14 08:24:15 +0100
commit	2a539a1b91c5ac17e6445d23e49c3f69d50d1ab6 (patch)
tree	190f31969952487440ba3fdc4f9e54e03b6137e2 /src/librustc_codegen_llvm
parent	0dbb867beb187119bd0bca1f05de6b00cd72f141 (diff)
parent	e983b4f64ee6d919a60938b6e7371a66877f4a23 (diff)
download	rust-2a539a1b91c5ac17e6445d23e49c3f69d50d1ab6.tar.gz rust-2a539a1b91c5ac17e6445d23e49c3f69d50d1ab6.zip