Auto merge of #56487 - nikic:discard-modules-earlier, r=alexcrichton

Discard LLVM modules earlier when performing ThinLTO Currently ThinLTO is performed by first compiling all modules (and keeping them in memory), and then serializing them into ThinLTO buffers in a separate, synchronized step. Modules are later read back from ThinLTO buffers when running the ThinLTO optimization pipeline. We can also find the following comment in `lto.rs`: // FIXME: right now, like with fat LTO, we serialize all in-memory // modules before working with them and ThinLTO. We really // shouldn't do this, however, and instead figure out how to // extract a summary from an in-memory module and then merge that // into the global index. It turns out that this loop is by far // the most expensive portion of this small bit of global // analysis! I don't think that what is suggested here is the right approach: One of the primary benefits of using ThinLTO over ordinary LTO is that it's not necessary to keep all the modules (merged or not) in memory for the duration of the linking step. However, we currently don't really make use of this (at least for crate-local ThinLTO), because we keep all modules in memory until the start of the LTO step. This PR changes the implementation to instead perform the serialization into ThinLTO buffers directly after the initial optimization step. Most of the changes here are plumbing to separate out fat and thin lto handling in `write.rs`, as these now use different intermediate artifacts. For fat lto this will be in-memory modules, for thin lto it will be ThinLTO buffers. r? @alexcrichton
author: bors <bors@rust-lang.org> 2018-12-07 12:18:19 +0000
committer: bors <bors@rust-lang.org> 2018-12-07 12:18:19 +0000
commit: f504d3f05985f158c4845816c3d5f7b7a8afeb13 (patch)
tree: 93f41c10caba38d981162a3e02347f8f66512f8b /src/librustc_codegen_ssa
parent: fc84f5f837a3e1b9b9bc992dd603d3d968502288 (diff)
parent: 8128d0d1a95f5d77b672558c202cbf37d595fba3 (diff)
download: rust-f504d3f05985f158c4845816c3d5f7b7a8afeb13.tar.gz
rust-f504d3f05985f158c4845816c3d5f7b7a8afeb13.zip
2 files changed, 139 insertions, 94 deletions
diff --git a/src/librustc_codegen_ssa/back/write.rs b/src/librustc_codegen_ssa/back/write.rs
index 46aee5339ba..59955ce77cd 100644
--- a/src/librustc_codegen_ssa/back/write.rs
+++ b/src/librustc_codegen_ssa/back/write.rs
@@ -252,7 +252,8 @@ impl<B: WriteBackendMethods> CodegenContext<B> {
 
 fn generate_lto_work<B: ExtraBackendMethods>(
     cgcx: &CodegenContext<B>,
-    modules: Vec<ModuleCodegen<B::Module>>,
+    needs_fat_lto: Vec<ModuleCodegen<B::Module>>,
+    needs_thin_lto: Vec<(String, B::ThinBuffer)>,
     import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>
 ) -> Vec<(WorkItem<B>, u64)> {
     let mut timeline = cgcx.time_graph.as_ref().map(|tg| {
@@ -260,22 +261,28 @@ fn generate_lto_work<B: ExtraBackendMethods>(
                  CODEGEN_WORK_PACKAGE_KIND,
                  "generate lto")
     }).unwrap_or(Timeline::noop());
-    let (lto_modules, copy_jobs) = B::run_lto(cgcx, modules, import_only_modules, &mut timeline)
-        .unwrap_or_else(|e| e.raise());
 
-    let lto_modules = lto_modules.into_iter().map(|module| {
+    let (lto_modules, copy_jobs) = if !needs_fat_lto.is_empty() {
+        assert!(needs_thin_lto.is_empty());
+        assert!(import_only_modules.is_empty());
+        let lto_module = B::run_fat_lto(cgcx, needs_fat_lto, &mut timeline)
+            .unwrap_or_else(|e| e.raise());
+        (vec![lto_module], vec![])
+    } else {
+        assert!(needs_fat_lto.is_empty());
+        B::run_thin_lto(cgcx, needs_thin_lto, import_only_modules, &mut timeline)
+            .unwrap_or_else(|e| e.raise())
+    };
+
+    lto_modules.into_iter().map(|module| {
         let cost = module.cost();
         (WorkItem::LTO(module), cost)
-    });
-
-    let copy_jobs = copy_jobs.into_iter().map(|wp| {
+    }).chain(copy_jobs.into_iter().map(|wp| {
         (WorkItem::CopyPostLtoArtifacts(CachedModuleCodegen {
             name: wp.cgu_name.clone(),
             source: wp,
         }), 0)
-    });
-
-    lto_modules.chain(copy_jobs).collect()
+    })).collect()
 }
 
 pub struct CompiledModules {
@@ -671,16 +678,17 @@ impl<B: WriteBackendMethods> WorkItem<B> {
     }
 }
 
-enum WorkItemResult<M> {
+enum WorkItemResult<B: WriteBackendMethods> {
     Compiled(CompiledModule),
-    NeedsLTO(ModuleCodegen<M>),
+    NeedsFatLTO(ModuleCodegen<B::Module>),
+    NeedsThinLTO(String, B::ThinBuffer),
 }
 
 fn execute_work_item<B: ExtraBackendMethods>(
     cgcx: &CodegenContext<B>,
     work_item: WorkItem<B>,
     timeline: &mut Timeline
-) -> Result<WorkItemResult<B::Module>, FatalError> {
+) -> Result<WorkItemResult<B>, FatalError> {
     let module_config = cgcx.config(work_item.module_kind());
 
     match work_item {
@@ -696,67 +704,80 @@ fn execute_work_item<B: ExtraBackendMethods>(
     }
 }
 
+// Actual LTO type we end up chosing based on multiple factors.
+enum ComputedLtoType {
+    No,
+    Thin,
+    Fat,
+}
+
 fn execute_optimize_work_item<B: ExtraBackendMethods>(
     cgcx: &CodegenContext<B>,
     module: ModuleCodegen<B::Module>,
     module_config: &ModuleConfig,
     timeline: &mut Timeline
-) -> Result<WorkItemResult<B::Module>, FatalError> {
+) -> Result<WorkItemResult<B>, FatalError> {
     let diag_handler = cgcx.create_diag_handler();
 
     unsafe {
         B::optimize(cgcx, &diag_handler, &module, module_config, timeline)?;
     }
 
-    let linker_does_lto = cgcx.opts.debugging_opts.cross_lang_lto.enabled();
-
     // After we've done the initial round of optimizations we need to
     // decide whether to synchronously codegen this module or ship it
     // back to the coordinator thread for further LTO processing (which
     // has to wait for all the initial modules to be optimized).
-    //
-    // Here we dispatch based on the `cgcx.lto` and kind of module we're
-    // codegenning...
-    let needs_lto = match cgcx.lto {
-        Lto::No => false,
 
-        // If the linker does LTO, we don't have to do it. Note that we
-        // keep doing full LTO, if it is requested, as not to break the
-        // assumption that the output will be a single module.
-        Lto::Thin | Lto::ThinLocal if linker_does_lto => false,
+    // If the linker does LTO, we don't have to do it. Note that we
+    // keep doing full LTO, if it is requested, as not to break the
+    // assumption that the output will be a single module.
+    let linker_does_lto = cgcx.opts.debugging_opts.cross_lang_lto.enabled();
 
-        // Here we've got a full crate graph LTO requested. We ignore
-        // this, however, if the crate type is only an rlib as there's
-        // no full crate graph to process, that'll happen later.
-        //
-        // This use case currently comes up primarily for targets that
-        // require LTO so the request for LTO is always unconditionally
-        // passed down to the backend, but we don't actually want to do
-        // anything about it yet until we've got a final product.
-        Lto::Fat | Lto::Thin => {
-            cgcx.crate_types.len() != 1 ||
-                cgcx.crate_types[0] != config::CrateType::Rlib
-        }
+    // When we're automatically doing ThinLTO for multi-codegen-unit
+    // builds we don't actually want to LTO the allocator modules if
+    // it shows up. This is due to various linker shenanigans that
+    // we'll encounter later.
+    let is_allocator = module.kind == ModuleKind::Allocator;
 
-        // When we're automatically doing ThinLTO for multi-codegen-unit
-        // builds we don't actually want to LTO the allocator modules if
-        // it shows up. This is due to various linker shenanigans that
-        // we'll encounter later.
-        Lto::ThinLocal => {
-            module.kind != ModuleKind::Allocator
-        }
-    };
+    // We ignore a request for full crate grath LTO if the cate type
+    // is only an rlib, as there is no full crate graph to process,
+    // that'll happen later.
+    //
+    // This use case currently comes up primarily for targets that
+    // require LTO so the request for LTO is always unconditionally
+    // passed down to the backend, but we don't actually want to do
+    // anything about it yet until we've got a final product.
+    let is_rlib = cgcx.crate_types.len() == 1
+        && cgcx.crate_types[0] == config::CrateType::Rlib;
 
     // Metadata modules never participate in LTO regardless of the lto
     // settings.
-    let needs_lto = needs_lto && module.kind != ModuleKind::Metadata;
-
-    if needs_lto {
-        Ok(WorkItemResult::NeedsLTO(module))
+    let lto_type = if module.kind == ModuleKind::Metadata {
+        ComputedLtoType::No
     } else {
-        let module = unsafe { B::codegen(cgcx, &diag_handler, module, module_config, timeline)? };
-        Ok(WorkItemResult::Compiled(module))
-    }
+        match cgcx.lto {
+            Lto::ThinLocal if !linker_does_lto && !is_allocator
+                => ComputedLtoType::Thin,
+            Lto::Thin if !linker_does_lto && !is_rlib
+                => ComputedLtoType::Thin,
+            Lto::Fat if !is_rlib => ComputedLtoType::Fat,
+            _ => ComputedLtoType::No,
+        }
+    };
+
+    Ok(match lto_type {
+        ComputedLtoType::No => {
+            let module = unsafe {
+                B::codegen(cgcx, &diag_handler, module, module_config, timeline)?
+            };
+            WorkItemResult::Compiled(module)
+        }
+        ComputedLtoType::Thin => {
+            let (name, thin_buffer) = B::prepare_thin(cgcx, module);
+            WorkItemResult::NeedsThinLTO(name, thin_buffer)
+        }
+        ComputedLtoType::Fat => WorkItemResult::NeedsFatLTO(module),
+    })
 }
 
 fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
@@ -764,7 +785,7 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
     module: CachedModuleCodegen,
     module_config: &ModuleConfig,
     _: &mut Timeline
-) -> Result<WorkItemResult<B::Module>, FatalError> {
+) -> Result<WorkItemResult<B>, FatalError> {
     let incr_comp_session_dir = cgcx.incr_comp_session_dir
                                     .as_ref()
                                     .unwrap();
@@ -826,7 +847,7 @@ fn execute_lto_work_item<B: ExtraBackendMethods>(
     mut module: lto::LtoModuleCodegen<B>,
     module_config: &ModuleConfig,
     timeline: &mut Timeline
-) -> Result<WorkItemResult<B::Module>, FatalError> {
+) -> Result<WorkItemResult<B>, FatalError> {
     let diag_handler = cgcx.create_diag_handler();
 
     unsafe {
@@ -838,10 +859,15 @@ fn execute_lto_work_item<B: ExtraBackendMethods>(
 
 pub enum Message<B: WriteBackendMethods> {
     Token(io::Result<Acquired>),
-    NeedsLTO {
+    NeedsFatLTO {
         result: ModuleCodegen<B::Module>,
         worker_id: usize,
     },
+    NeedsThinLTO {
+        name: String,
+        thin_buffer: B::ThinBuffer,
+        worker_id: usize,
+    },
     Done {
         result: Result<CompiledModule, ()>,
         worker_id: usize,
@@ -1137,7 +1163,8 @@ fn start_executing_work<B: ExtraBackendMethods>(
         let mut compiled_modules = vec![];
         let mut compiled_metadata_module = None;
         let mut compiled_allocator_module = None;
-        let mut needs_lto = Vec::new();
+        let mut needs_fat_lto = Vec::new();
+        let mut needs_thin_lto = Vec::new();
         let mut lto_import_only_modules = Vec::new();
         let mut started_lto = false;
         let mut codegen_aborted = false;
@@ -1166,7 +1193,8 @@ fn start_executing_work<B: ExtraBackendMethods>(
               running > 0 ||
               (!codegen_aborted && (
                   work_items.len() > 0 ||
-                  needs_lto.len() > 0 ||
+                  needs_fat_lto.len() > 0 ||
+                  needs_thin_lto.len() > 0 ||
                   lto_import_only_modules.len() > 0 ||
                   main_thread_worker_state != MainThreadWorkerState::Idle
               ))
@@ -1212,12 +1240,17 @@ fn start_executing_work<B: ExtraBackendMethods>(
                    running == 0 &&
                    main_thread_worker_state == MainThreadWorkerState::Idle {
                     assert!(!started_lto);
-                    assert!(needs_lto.len() + lto_import_only_modules.len() > 0);
                     started_lto = true;
-                    let modules = mem::replace(&mut needs_lto, Vec::new());
+
+                    let needs_fat_lto =
+                        mem::replace(&mut needs_fat_lto, Vec::new());
+                    let needs_thin_lto =
+                        mem::replace(&mut needs_thin_lto, Vec::new());
                     let import_only_modules =
                         mem::replace(&mut lto_import_only_modules, Vec::new());
-                    for (work, cost) in generate_lto_work(&cgcx, modules, import_only_modules) {
+
+                    for (work, cost) in generate_lto_work(&cgcx, needs_fat_lto,
+                                                          needs_thin_lto, import_only_modules) {
                         let insertion_index = work_items
                             .binary_search_by_key(&cost, |&(_, cost)| cost)
                             .unwrap_or_else(|e| e);
@@ -1284,6 +1317,21 @@ fn start_executing_work<B: ExtraBackendMethods>(
             // Relinquish accidentally acquired extra tokens
             tokens.truncate(running);
 
+            // If a thread exits successfully then we drop a token associated
+            // with that worker and update our `running` count. We may later
+            // re-acquire a token to continue running more work. We may also not
+            // actually drop a token here if the worker was running with an
+            // "ephemeral token"
+            let mut free_worker = |worker_id| {
+                if main_thread_worker_state == MainThreadWorkerState::LLVMing {
+                    main_thread_worker_state = MainThreadWorkerState::Idle;
+                } else {
+                    running -= 1;
+                }
+
+                free_worker_ids.push(worker_id);
+            };
+
             let msg = coordinator_receive.recv().unwrap();
             match *msg.downcast::<Message<B>>().ok().unwrap() {
                 // Save the token locally and the next turn of the loop will use
@@ -1358,24 +1406,8 @@ fn start_executing_work<B: ExtraBackendMethods>(
                     assert_eq!(main_thread_worker_state,
                                MainThreadWorkerState::Codegenning);
                 }
-
-                // If a thread exits successfully then we drop a token associated
-                // with that worker and update our `running` count. We may later
-                // re-acquire a token to continue running more work. We may also not
-                // actually drop a token here if the worker was running with an
-                // "ephemeral token"
-                //
-                // Note that if the thread failed that means it panicked, so we
-                // abort immediately.
                 Message::Done { result: Ok(compiled_module), worker_id } => {
-                    if main_thread_worker_state == MainThreadWorkerState::LLVMing {
-                        main_thread_worker_state = MainThreadWorkerState::Idle;
-                    } else {
-                        running -= 1;
-                    }
-
-                    free_worker_ids.push(worker_id);
-
+                    free_worker(worker_id);
                     match compiled_module.kind {
                         ModuleKind::Regular => {
                             compiled_modules.push(compiled_module);
@@ -1390,15 +1422,15 @@ fn start_executing_work<B: ExtraBackendMethods>(
                         }
                     }
                 }
-                Message::NeedsLTO { result, worker_id } => {
+                Message::NeedsFatLTO { result, worker_id } => {
                     assert!(!started_lto);
-                    if main_thread_worker_state == MainThreadWorkerState::LLVMing {
-                        main_thread_worker_state = MainThreadWorkerState::Idle;
-                    } else {
-                        running -= 1;
-                    }
-                    free_worker_ids.push(worker_id);
-                    needs_lto.push(result);
+                    free_worker(worker_id);
+                    needs_fat_lto.push(result);
+                }
+                Message::NeedsThinLTO { name, thin_buffer, worker_id } => {
+                    assert!(!started_lto);
+                    free_worker(worker_id);
+                    needs_thin_lto.push((name, thin_buffer));
                 }
                 Message::AddImportOnlyModule { module_data, work_product } => {
                     assert!(!started_lto);
@@ -1408,6 +1440,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
                     lto_import_only_modules.push((module_data, work_product));
                     main_thread_worker_state = MainThreadWorkerState::Idle;
                 }
+                // If the thread failed that means it panicked, so we abort immediately.
                 Message::Done { result: Err(()), worker_id: _ } => {
                     bug!("worker thread panicked");
                 }
@@ -1485,7 +1518,7 @@ fn spawn_work<B: ExtraBackendMethods>(
         // we exit.
         struct Bomb<B: ExtraBackendMethods> {
             coordinator_send: Sender<Box<dyn Any + Send>>,
-            result: Option<WorkItemResult<B::Module>>,
+            result: Option<WorkItemResult<B>>,
             worker_id: usize,
         }
         impl<B: ExtraBackendMethods> Drop for Bomb<B> {
@@ -1495,8 +1528,11 @@ fn spawn_work<B: ExtraBackendMethods>(
                     Some(WorkItemResult::Compiled(m)) => {
                         Message::Done::<B> { result: Ok(m), worker_id }
                     }
-                    Some(WorkItemResult::NeedsLTO(m)) => {
-                        Message::NeedsLTO::<B> { result: m, worker_id }
+                    Some(WorkItemResult::NeedsFatLTO(m)) => {
+                        Message::NeedsFatLTO::<B> { result: m, worker_id }
+                    }
+                    Some(WorkItemResult::NeedsThinLTO(name, thin_buffer)) => {
+                        Message::NeedsThinLTO::<B> { name, thin_buffer, worker_id }
                     }
                     None => Message::Done::<B> { result: Err(()), worker_id }
                 };
diff --git a/src/librustc_codegen_ssa/traits/write.rs b/src/librustc_codegen_ssa/traits/write.rs
index 72522e19af2..edc5c2717bc 100644
--- a/src/librustc_codegen_ssa/traits/write.rs
+++ b/src/librustc_codegen_ssa/traits/write.rs
@@ -24,14 +24,19 @@ pub trait WriteBackendMethods: 'static + Sized + Clone {
     type ThinData: Send + Sync;
     type ThinBuffer: ThinBufferMethods;
 
-    /// Performs LTO, which in the case of full LTO means merging all modules into
-    /// a single one and returning it for further optimizing. For ThinLTO, it will
-    /// do the global analysis necessary and return two lists, one of the modules
-    /// the need optimization and another for modules that can simply be copied over
-    /// from the incr. comp. cache.
-    fn run_lto(
+    /// Performs fat LTO by merging all modules into a single one and returning it
+    /// for further optimization.
+    fn run_fat_lto(
         cgcx: &CodegenContext<Self>,
         modules: Vec<ModuleCodegen<Self::Module>>,
+        timeline: &mut Timeline,
+    ) -> Result<LtoModuleCodegen<Self>, FatalError>;
+    /// Performs thin LTO by performing necessary global analysis and returning two
+    /// lists, one of the modules that need optimization and another for modules that
+    /// can simply be copied over from the incr. comp. cache.
+    fn run_thin_lto(
+        cgcx: &CodegenContext<Self>,
+        modules: Vec<(String, Self::ThinBuffer)>,
         cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
         timeline: &mut Timeline,
     ) -> Result<(Vec<LtoModuleCodegen<Self>>, Vec<WorkProduct>), FatalError>;
@@ -55,6 +60,10 @@ pub trait WriteBackendMethods: 'static + Sized + Clone {
         config: &ModuleConfig,
         timeline: &mut Timeline,
     ) -> Result<CompiledModule, FatalError>;
+    fn prepare_thin(
+        cgcx: &CodegenContext<Self>,
+        module: ModuleCodegen<Self::Module>
+    ) -> (String, Self::ThinBuffer);
     fn run_lto_pass_manager(
         cgcx: &CodegenContext<Self>,
         llmod: &ModuleCodegen<Self::Module>,
author	bors <bors@rust-lang.org>	2018-12-07 12:18:19 +0000
committer	bors <bors@rust-lang.org>	2018-12-07 12:18:19 +0000
commit	f504d3f05985f158c4845816c3d5f7b7a8afeb13 (patch)
tree	93f41c10caba38d981162a3e02347f8f66512f8b /src/librustc_codegen_ssa
parent	fc84f5f837a3e1b9b9bc992dd603d3d968502288 (diff)
parent	8128d0d1a95f5d77b672558c202cbf37d595fba3 (diff)
download	rust-f504d3f05985f158c4845816c3d5f7b7a8afeb13.tar.gz rust-f504d3f05985f158c4845816c3d5f7b7a8afeb13.zip