4 files changed, 231 insertions, 166 deletions
diff --git a/src/librustc_codegen_llvm/back/lto.rs b/src/librustc_codegen_llvm/back/lto.rs
index 99828e5b7fb..bddb45da10b 100644
--- a/src/librustc_codegen_llvm/back/lto.rs
+++ b/src/librustc_codegen_llvm/back/lto.rs
@@ -48,18 +48,11 @@ pub fn crate_type_allows_lto(crate_type: config::CrateType) -> bool {
     }
 }
 
-/// Performs LTO, which in the case of full LTO means merging all modules into
-/// a single one and returning it for further optimizing. For ThinLTO, it will
-/// do the global analysis necessary and return two lists, one of the modules
-/// the need optimization and another for modules that can simply be copied over
-/// from the incr. comp. cache.
-pub(crate) fn run(cgcx: &CodegenContext<LlvmCodegenBackend>,
-                  modules: Vec<ModuleCodegen<ModuleLlvm>>,
-                  cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
-                  timeline: &mut Timeline)
-    -> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError>
+fn prepare_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
+               timeline: &mut Timeline,
+               diag_handler: &Handler)
+    -> Result<(Vec<CString>, Vec<(SerializedModule<ModuleBuffer>, CString)>), FatalError>
 {
-    let diag_handler = cgcx.create_diag_handler();
     let export_threshold = match cgcx.lto {
         // We're just doing LTO for our one crate
         Lto::ThinLocal => SymbolExportLevel::Rust,
@@ -144,36 +137,74 @@ pub(crate) fn run(cgcx: &CodegenContext<LlvmCodegenBackend>,
         }
     }
 
+    Ok((symbol_white_list, upstream_modules))
+}
+
+/// Performs fat LTO by merging all modules into a single one and returning it
+/// for further optimization.
+pub(crate) fn run_fat(cgcx: &CodegenContext<LlvmCodegenBackend>,
+                      modules: Vec<ModuleCodegen<ModuleLlvm>>,
+                      timeline: &mut Timeline)
+    -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError>
+{
+    let diag_handler = cgcx.create_diag_handler();
+    let (symbol_white_list, upstream_modules) = prepare_lto(cgcx, timeline, &diag_handler)?;
     let symbol_white_list = symbol_white_list.iter()
                                              .map(|c| c.as_ptr())
                                              .collect::<Vec<_>>();
-    match cgcx.lto {
-        Lto::Fat => {
-            assert!(cached_modules.is_empty());
-            let opt_jobs = fat_lto(cgcx,
-                                   &diag_handler,
-                                   modules,
-                                   upstream_modules,
-                                   &symbol_white_list,
-                                   timeline);
-            opt_jobs.map(|opt_jobs| (opt_jobs, vec![]))
-        }
-        Lto::Thin |
-        Lto::ThinLocal => {
-            if cgcx.opts.debugging_opts.cross_lang_lto.enabled() {
-                unreachable!("We should never reach this case if the LTO step \
-                              is deferred to the linker");
-            }
-            thin_lto(cgcx,
-                     &diag_handler,
-                     modules,
-                     upstream_modules,
-                     cached_modules,
-                     &symbol_white_list,
-                     timeline)
+    fat_lto(cgcx, &diag_handler, modules, upstream_modules, &symbol_white_list, timeline)
+}
+
+/// Performs thin LTO by performing necessary global analysis and returning two
+/// lists, one of the modules that need optimization and another for modules that
+/// can simply be copied over from the incr. comp. cache.
+pub(crate) fn run_thin(cgcx: &CodegenContext<LlvmCodegenBackend>,
+                       modules: Vec<(String, ThinBuffer)>,
+                       cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
+                       timeline: &mut Timeline)
+    -> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError>
+{
+    let diag_handler = cgcx.create_diag_handler();
+    let (symbol_white_list, upstream_modules) = prepare_lto(cgcx, timeline, &diag_handler)?;
+    let symbol_white_list = symbol_white_list.iter()
+                                             .map(|c| c.as_ptr())
+                                             .collect::<Vec<_>>();
+    if cgcx.opts.debugging_opts.cross_lang_lto.enabled() {
+        unreachable!("We should never reach this case if the LTO step \
+                      is deferred to the linker");
+    }
+    thin_lto(cgcx,
+             &diag_handler,
+             modules,
+             upstream_modules,
+             cached_modules,
+             &symbol_white_list,
+             timeline)
+}
+
+pub(crate) fn prepare_thin(
+    cgcx: &CodegenContext<LlvmCodegenBackend>,
+    module: ModuleCodegen<ModuleLlvm>
+) -> (String, ThinBuffer) {
+    let name = module.name.clone();
+    let buffer = ThinBuffer::new(module.module_llvm.llmod());
+
+    // We emit the module after having serialized it into a ThinBuffer
+    // because only then it will contain the ThinLTO module summary.
+    if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir {
+        if cgcx.config(module.kind).emit_pre_thin_lto_bc {
+            let path = incr_comp_session_dir
+                .join(pre_lto_bitcode_filename(&name));
+
+            fs::write(&path, buffer.data()).unwrap_or_else(|e| {
+                panic!("Error writing pre-lto-bitcode file `{}`: {}",
+                       path.display(),
+                       e);
+            });
         }
-        Lto::No => unreachable!(),
     }
+
+    (name, buffer)
 }
 
 fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
@@ -182,7 +213,7 @@ fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
            mut serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
            symbol_white_list: &[*const libc::c_char],
            timeline: &mut Timeline)
-    -> Result<Vec<LtoModuleCodegen<LlvmCodegenBackend>>, FatalError>
+    -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError>
 {
     info!("going for a fat lto");
 
@@ -271,10 +302,10 @@ fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
         timeline.record("passes");
     }
 
-    Ok(vec![LtoModuleCodegen::Fat {
+    Ok(LtoModuleCodegen::Fat {
         module: Some(module),
         _serialized_bitcode: serialized_bitcode,
-    }])
+    })
 }
 
 struct Linker<'a>(&'a mut llvm::Linker<'a>);
@@ -335,7 +366,7 @@ impl Drop for Linker<'a> {
 /// they all go out of scope.
 fn thin_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
             diag_handler: &Handler,
-            modules: Vec<ModuleCodegen<ModuleLlvm>>,
+            modules: Vec<(String, ThinBuffer)>,
             serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
             cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
             symbol_white_list: &[*const libc::c_char],
@@ -355,41 +386,17 @@ fn thin_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
         let mut module_names = Vec::with_capacity(full_scope_len);
         let mut thin_modules = Vec::with_capacity(full_scope_len);
 
-        // FIXME: right now, like with fat LTO, we serialize all in-memory
-        //        modules before working with them and ThinLTO. We really
-        //        shouldn't do this, however, and instead figure out how to
-        //        extract a summary from an in-memory module and then merge that
-        //        into the global index. It turns out that this loop is by far
-        //        the most expensive portion of this small bit of global
-        //        analysis!
-        for (i, module) in modules.into_iter().enumerate() {
-            info!("local module: {} - {}", i, module.name);
-            let name = CString::new(module.name.clone()).unwrap();
-            let buffer = ThinBuffer::new(module.module_llvm.llmod());
-
-            // We emit the module after having serialized it into a ThinBuffer
-            // because only then it will contain the ThinLTO module summary.
-            if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir {
-                if cgcx.config(module.kind).emit_pre_thin_lto_bc {
-                    let path = incr_comp_session_dir
-                        .join(pre_lto_bitcode_filename(&module.name));
-
-                    fs::write(&path, buffer.data()).unwrap_or_else(|e| {
-                        panic!("Error writing pre-lto-bitcode file `{}`: {}",
-                               path.display(),
-                               e);
-                    });
-                }
-            }
-
+        for (i, (name, buffer)) in modules.into_iter().enumerate() {
+            info!("local module: {} - {}", i, name);
+            let cname = CString::new(name.clone()).unwrap();
             thin_modules.push(llvm::ThinLTOModule {
-                identifier: name.as_ptr(),
+                identifier: cname.as_ptr(),
                 data: buffer.data().as_ptr(),
                 len: buffer.data().len(),
             });
             thin_buffers.push(buffer);
-            module_names.push(name);
-            timeline.record(&module.name);
+            module_names.push(cname);
+            timeline.record(&name);
         }
 
         // FIXME: All upstream crates are deserialized internally in the
diff --git a/src/librustc_codegen_llvm/lib.rs b/src/librustc_codegen_llvm/lib.rs
index 4f90cb793b6..ff06d3759bd 100644
--- a/src/librustc_codegen_llvm/lib.rs
+++ b/src/librustc_codegen_llvm/lib.rs
@@ -176,13 +176,20 @@ impl WriteBackendMethods for LlvmCodegenBackend {
     fn print_pass_timings(&self) {
             unsafe { llvm::LLVMRustPrintPassTimings(); }
     }
-    fn run_lto(
+    fn run_fat_lto(
         cgcx: &CodegenContext<Self>,
         modules: Vec<ModuleCodegen<Self::Module>>,
+        timeline: &mut Timeline
+    ) -> Result<LtoModuleCodegen<Self>, FatalError> {
+        back::lto::run_fat(cgcx, modules, timeline)
+    }
+    fn run_thin_lto(
+        cgcx: &CodegenContext<Self>,
+        modules: Vec<(String, Self::ThinBuffer)>,
         cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
         timeline: &mut Timeline
     ) -> Result<(Vec<LtoModuleCodegen<Self>>, Vec<WorkProduct>), FatalError> {
-        back::lto::run(cgcx, modules, cached_modules, timeline)
+        back::lto::run_thin(cgcx, modules, cached_modules, timeline)
     }
     unsafe fn optimize(
         cgcx: &CodegenContext<Self>,
@@ -209,6 +216,12 @@ impl WriteBackendMethods for LlvmCodegenBackend {
     ) -> Result<CompiledModule, FatalError> {
         back::write::codegen(cgcx, diag_handler, module, config, timeline)
     }
+    fn prepare_thin(
+        cgcx: &CodegenContext<Self>,
+        module: ModuleCodegen<Self::Module>
+    ) -> (String, Self::ThinBuffer) {
+        back::lto::prepare_thin(cgcx, module)
+    }
     fn run_lto_pass_manager(
         cgcx: &CodegenContext<Self>,
         module: &ModuleCodegen<Self::Module>,
diff --git a/src/librustc_codegen_ssa/back/write.rs b/src/librustc_codegen_ssa/back/write.rs
index 46aee5339ba..59955ce77cd 100644
--- a/src/librustc_codegen_ssa/back/write.rs
+++ b/src/librustc_codegen_ssa/back/write.rs
@@ -252,7 +252,8 @@ impl<B: WriteBackendMethods> CodegenContext<B> {
 
 fn generate_lto_work<B: ExtraBackendMethods>(
     cgcx: &CodegenContext<B>,
-    modules: Vec<ModuleCodegen<B::Module>>,
+    needs_fat_lto: Vec<ModuleCodegen<B::Module>>,
+    needs_thin_lto: Vec<(String, B::ThinBuffer)>,
     import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>
 ) -> Vec<(WorkItem<B>, u64)> {
     let mut timeline = cgcx.time_graph.as_ref().map(|tg| {
@@ -260,22 +261,28 @@ fn generate_lto_work<B: ExtraBackendMethods>(
                  CODEGEN_WORK_PACKAGE_KIND,
                  "generate lto")
     }).unwrap_or(Timeline::noop());
-    let (lto_modules, copy_jobs) = B::run_lto(cgcx, modules, import_only_modules, &mut timeline)
-        .unwrap_or_else(|e| e.raise());
 
-    let lto_modules = lto_modules.into_iter().map(|module| {
+    let (lto_modules, copy_jobs) = if !needs_fat_lto.is_empty() {
+        assert!(needs_thin_lto.is_empty());
+        assert!(import_only_modules.is_empty());
+        let lto_module = B::run_fat_lto(cgcx, needs_fat_lto, &mut timeline)
+            .unwrap_or_else(|e| e.raise());
+        (vec![lto_module], vec![])
+    } else {
+        assert!(needs_fat_lto.is_empty());
+        B::run_thin_lto(cgcx, needs_thin_lto, import_only_modules, &mut timeline)
+            .unwrap_or_else(|e| e.raise())
+    };
+
+    lto_modules.into_iter().map(|module| {
         let cost = module.cost();
         (WorkItem::LTO(module), cost)
-    });
-
-    let copy_jobs = copy_jobs.into_iter().map(|wp| {
+    }).chain(copy_jobs.into_iter().map(|wp| {
         (WorkItem::CopyPostLtoArtifacts(CachedModuleCodegen {
             name: wp.cgu_name.clone(),
             source: wp,
         }), 0)
-    });
-
-    lto_modules.chain(copy_jobs).collect()
+    })).collect()
 }
 
 pub struct CompiledModules {
@@ -671,16 +678,17 @@ impl<B: WriteBackendMethods> WorkItem<B> {
     }
 }
 
-enum WorkItemResult<M> {
+enum WorkItemResult<B: WriteBackendMethods> {
     Compiled(CompiledModule),
-    NeedsLTO(ModuleCodegen<M>),
+    NeedsFatLTO(ModuleCodegen<B::Module>),
+    NeedsThinLTO(String, B::ThinBuffer),
 }
 
 fn execute_work_item<B: ExtraBackendMethods>(
     cgcx: &CodegenContext<B>,
     work_item: WorkItem<B>,
     timeline: &mut Timeline
-) -> Result<WorkItemResult<B::Module>, FatalError> {
+) -> Result<WorkItemResult<B>, FatalError> {
     let module_config = cgcx.config(work_item.module_kind());
 
     match work_item {
@@ -696,67 +704,80 @@ fn execute_work_item<B: ExtraBackendMethods>(
     }
 }
 
+// Actual LTO type we end up chosing based on multiple factors.
+enum ComputedLtoType {
+    No,
+    Thin,
+    Fat,
+}
+
 fn execute_optimize_work_item<B: ExtraBackendMethods>(
     cgcx: &CodegenContext<B>,
     module: ModuleCodegen<B::Module>,
     module_config: &ModuleConfig,
     timeline: &mut Timeline
-) -> Result<WorkItemResult<B::Module>, FatalError> {
+) -> Result<WorkItemResult<B>, FatalError> {
     let diag_handler = cgcx.create_diag_handler();
 
     unsafe {
         B::optimize(cgcx, &diag_handler, &module, module_config, timeline)?;
     }
 
-    let linker_does_lto = cgcx.opts.debugging_opts.cross_lang_lto.enabled();
-
     // After we've done the initial round of optimizations we need to
     // decide whether to synchronously codegen this module or ship it
     // back to the coordinator thread for further LTO processing (which
     // has to wait for all the initial modules to be optimized).
-    //
-    // Here we dispatch based on the `cgcx.lto` and kind of module we're
-    // codegenning...
-    let needs_lto = match cgcx.lto {
-        Lto::No => false,
 
-        // If the linker does LTO, we don't have to do it. Note that we
-        // keep doing full LTO, if it is requested, as not to break the
-        // assumption that the output will be a single module.
-        Lto::Thin | Lto::ThinLocal if linker_does_lto => false,
+    // If the linker does LTO, we don't have to do it. Note that we
+    // keep doing full LTO, if it is requested, as not to break the
+    // assumption that the output will be a single module.
+    let linker_does_lto = cgcx.opts.debugging_opts.cross_lang_lto.enabled();
 
-        // Here we've got a full crate graph LTO requested. We ignore
-        // this, however, if the crate type is only an rlib as there's
-        // no full crate graph to process, that'll happen later.
-        //
-        // This use case currently comes up primarily for targets that
-        // require LTO so the request for LTO is always unconditionally
-        // passed down to the backend, but we don't actually want to do
-        // anything about it yet until we've got a final product.
-        Lto::Fat | Lto::Thin => {
-            cgcx.crate_types.len() != 1 ||
-                cgcx.crate_types[0] != config::CrateType::Rlib
-        }
+    // When we're automatically doing ThinLTO for multi-codegen-unit
+    // builds we don't actually want to LTO the allocator modules if
+    // it shows up. This is due to various linker shenanigans that
+    // we'll encounter later.
+    let is_allocator = module.kind == ModuleKind::Allocator;
 
-        // When we're automatically doing ThinLTO for multi-codegen-unit
-        // builds we don't actually want to LTO the allocator modules if
-        // it shows up. This is due to various linker shenanigans that
-        // we'll encounter later.
-        Lto::ThinLocal => {
-            module.kind != ModuleKind::Allocator
-        }
-    };
+    // We ignore a request for full crate grath LTO if the cate type
+    // is only an rlib, as there is no full crate graph to process,
+    // that'll happen later.
+    //
+    // This use case currently comes up primarily for targets that
+    // require LTO so the request for LTO is always unconditionally
+    // passed down to the backend, but we don't actually want to do
+    // anything about it yet until we've got a final product.
+    let is_rlib = cgcx.crate_types.len() == 1
+        && cgcx.crate_types[0] == config::CrateType::Rlib;
 
     // Metadata modules never participate in LTO regardless of the lto
     // settings.
-    let needs_lto = needs_lto && module.kind != ModuleKind::Metadata;
-
-    if needs_lto {
-        Ok(WorkItemResult::NeedsLTO(module))
+    let lto_type = if module.kind == ModuleKind::Metadata {
+        ComputedLtoType::No
     } else {
-        let module = unsafe { B::codegen(cgcx, &diag_handler, module, module_config, timeline)? };
-        Ok(WorkItemResult::Compiled(module))
-    }
+        match cgcx.lto {
+            Lto::ThinLocal if !linker_does_lto && !is_allocator
+                => ComputedLtoType::Thin,
+            Lto::Thin if !linker_does_lto && !is_rlib
+                => ComputedLtoType::Thin,
+            Lto::Fat if !is_rlib => ComputedLtoType::Fat,
+            _ => ComputedLtoType::No,
+        }
+    };
+
+    Ok(match lto_type {
+        ComputedLtoType::No => {
+            let module = unsafe {
+                B::codegen(cgcx, &diag_handler, module, module_config, timeline)?
+            };
+            WorkItemResult::Compiled(module)
+        }
+        ComputedLtoType::Thin => {
+            let (name, thin_buffer) = B::prepare_thin(cgcx, module);
+            WorkItemResult::NeedsThinLTO(name, thin_buffer)
+        }
+        ComputedLtoType::Fat => WorkItemResult::NeedsFatLTO(module),
+    })
 }
 
 fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
@@ -764,7 +785,7 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
     module: CachedModuleCodegen,
     module_config: &ModuleConfig,
     _: &mut Timeline
-) -> Result<WorkItemResult<B::Module>, FatalError> {
+) -> Result<WorkItemResult<B>, FatalError> {
     let incr_comp_session_dir = cgcx.incr_comp_session_dir
                                     .as_ref()
                                     .unwrap();
@@ -826,7 +847,7 @@ fn execute_lto_work_item<B: ExtraBackendMethods>(
     mut module: lto::LtoModuleCodegen<B>,
     module_config: &ModuleConfig,
     timeline: &mut Timeline
-) -> Result<WorkItemResult<B::Module>, FatalError> {
+) -> Result<WorkItemResult<B>, FatalError> {
     let diag_handler = cgcx.create_diag_handler();
 
     unsafe {
@@ -838,10 +859,15 @@ fn execute_lto_work_item<B: ExtraBackendMethods>(
 
 pub enum Message<B: WriteBackendMethods> {
     Token(io::Result<Acquired>),
-    NeedsLTO {
+    NeedsFatLTO {
         result: ModuleCodegen<B::Module>,
         worker_id: usize,
     },
+    NeedsThinLTO {
+        name: String,
+        thin_buffer: B::ThinBuffer,
+        worker_id: usize,
+    },
     Done {
         result: Result<CompiledModule, ()>,
         worker_id: usize,
@@ -1137,7 +1163,8 @@ fn start_executing_work<B: ExtraBackendMethods>(
         let mut compiled_modules = vec![];
         let mut compiled_metadata_module = None;
         let mut compiled_allocator_module = None;
-        let mut needs_lto = Vec::new();
+        let mut needs_fat_lto = Vec::new();
+        let mut needs_thin_lto = Vec::new();
         let mut lto_import_only_modules = Vec::new();
         let mut started_lto = false;
         let mut codegen_aborted = false;
@@ -1166,7 +1193,8 @@ fn start_executing_work<B: ExtraBackendMethods>(
               running > 0 ||
               (!codegen_aborted && (
                   work_items.len() > 0 ||
-                  needs_lto.len() > 0 ||
+                  needs_fat_lto.len() > 0 ||
+                  needs_thin_lto.len() > 0 ||
                   lto_import_only_modules.len() > 0 ||
                   main_thread_worker_state != MainThreadWorkerState::Idle
               ))
@@ -1212,12 +1240,17 @@ fn start_executing_work<B: ExtraBackendMethods>(
                    running == 0 &&
                    main_thread_worker_state == MainThreadWorkerState::Idle {
                     assert!(!started_lto);
-                    assert!(needs_lto.len() + lto_import_only_modules.len() > 0);
                     started_lto = true;
-                    let modules = mem::replace(&mut needs_lto, Vec::new());
+
+                    let needs_fat_lto =
+                        mem::replace(&mut needs_fat_lto, Vec::new());
+                    let needs_thin_lto =
+                        mem::replace(&mut needs_thin_lto, Vec::new());
                     let import_only_modules =
                         mem::replace(&mut lto_import_only_modules, Vec::new());
-                    for (work, cost) in generate_lto_work(&cgcx, modules, import_only_modules) {
+
+                    for (work, cost) in generate_lto_work(&cgcx, needs_fat_lto,
+                                                          needs_thin_lto, import_only_modules) {
                         let insertion_index = work_items
                             .binary_search_by_key(&cost, |&(_, cost)| cost)
                             .unwrap_or_else(|e| e);
@@ -1284,6 +1317,21 @@ fn start_executing_work<B: ExtraBackendMethods>(
             // Relinquish accidentally acquired extra tokens
             tokens.truncate(running);
 
+            // If a thread exits successfully then we drop a token associated
+            // with that worker and update our `running` count. We may later
+            // re-acquire a token to continue running more work. We may also not
+            // actually drop a token here if the worker was running with an
+            // "ephemeral token"
+            let mut free_worker = |worker_id| {
+                if main_thread_worker_state == MainThreadWorkerState::LLVMing {
+                    main_thread_worker_state = MainThreadWorkerState::Idle;
+                } else {
+                    running -= 1;
+                }
+
+                free_worker_ids.push(worker_id);
+            };
+
             let msg = coordinator_receive.recv().unwrap();
             match *msg.downcast::<Message<B>>().ok().unwrap() {
                 // Save the token locally and the next turn of the loop will use
@@ -1358,24 +1406,8 @@ fn start_executing_work<B: ExtraBackendMethods>(
                     assert_eq!(main_thread_worker_state,
                                MainThreadWorkerState::Codegenning);
                 }
-
-                // If a thread exits successfully then we drop a token associated
-                // with that worker and update our `running` count. We may later
-                // re-acquire a token to continue running more work. We may also not
-                // actually drop a token here if the worker was running with an
-                // "ephemeral token"
-                //
-                // Note that if the thread failed that means it panicked, so we
-                // abort immediately.
                 Message::Done { result: Ok(compiled_module), worker_id } => {
-                    if main_thread_worker_state == MainThreadWorkerState::LLVMing {
-                        main_thread_worker_state = MainThreadWorkerState::Idle;
-                    } else {
-                        running -= 1;
-                    }
-
-                    free_worker_ids.push(worker_id);
-
+                    free_worker(worker_id);
                     match compiled_module.kind {
                         ModuleKind::Regular => {
                             compiled_modules.push(compiled_module);
@@ -1390,15 +1422,15 @@ fn start_executing_work<B: ExtraBackendMethods>(
                         }
                     }
                 }
-                Message::NeedsLTO { result, worker_id } => {
+                Message::NeedsFatLTO { result, worker_id } => {
                     assert!(!started_lto);
-                    if main_thread_worker_state == MainThreadWorkerState::LLVMing {
-                        main_thread_worker_state = MainThreadWorkerState::Idle;
-                    } else {
-                        running -= 1;
-                    }
-                    free_worker_ids.push(worker_id);
-                    needs_lto.push(result);
+                    free_worker(worker_id);
+                    needs_fat_lto.push(result);
+                }
+                Message::NeedsThinLTO { name, thin_buffer, worker_id } => {
+                    assert!(!started_lto);
+                    free_worker(worker_id);
+                    needs_thin_lto.push((name, thin_buffer));
                 }
                 Message::AddImportOnlyModule { module_data, work_product } => {
                     assert!(!started_lto);
@@ -1408,6 +1440,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
                     lto_import_only_modules.push((module_data, work_product));
                     main_thread_worker_state = MainThreadWorkerState::Idle;
                 }
+                // If the thread failed that means it panicked, so we abort immediately.
                 Message::Done { result: Err(()), worker_id: _ } => {
                     bug!("worker thread panicked");
                 }
@@ -1485,7 +1518,7 @@ fn spawn_work<B: ExtraBackendMethods>(
         // we exit.
         struct Bomb<B: ExtraBackendMethods> {
             coordinator_send: Sender<Box<dyn Any + Send>>,
-            result: Option<WorkItemResult<B::Module>>,
+            result: Option<WorkItemResult<B>>,
             worker_id: usize,
         }
         impl<B: ExtraBackendMethods> Drop for Bomb<B> {
@@ -1495,8 +1528,11 @@ fn spawn_work<B: ExtraBackendMethods>(
                     Some(WorkItemResult::Compiled(m)) => {
                         Message::Done::<B> { result: Ok(m), worker_id }
                     }
-                    Some(WorkItemResult::NeedsLTO(m)) => {
-                        Message::NeedsLTO::<B> { result: m, worker_id }
+                    Some(WorkItemResult::NeedsFatLTO(m)) => {
+                        Message::NeedsFatLTO::<B> { result: m, worker_id }
+                    }
+                    Some(WorkItemResult::NeedsThinLTO(name, thin_buffer)) => {
+                        Message::NeedsThinLTO::<B> { name, thin_buffer, worker_id }
                     }
                     None => Message::Done::<B> { result: Err(()), worker_id }
                 };
diff --git a/src/librustc_codegen_ssa/traits/write.rs b/src/librustc_codegen_ssa/traits/write.rs
index 72522e19af2..edc5c2717bc 100644
--- a/src/librustc_codegen_ssa/traits/write.rs
+++ b/src/librustc_codegen_ssa/traits/write.rs
@@ -24,14 +24,19 @@ pub trait WriteBackendMethods: 'static + Sized + Clone {
     type ThinData: Send + Sync;
     type ThinBuffer: ThinBufferMethods;
 
-    /// Performs LTO, which in the case of full LTO means merging all modules into
-    /// a single one and returning it for further optimizing. For ThinLTO, it will
-    /// do the global analysis necessary and return two lists, one of the modules
-    /// the need optimization and another for modules that can simply be copied over
-    /// from the incr. comp. cache.
-    fn run_lto(
+    /// Performs fat LTO by merging all modules into a single one and returning it
+    /// for further optimization.
+    fn run_fat_lto(
         cgcx: &CodegenContext<Self>,
         modules: Vec<ModuleCodegen<Self::Module>>,
+        timeline: &mut Timeline,
+    ) -> Result<LtoModuleCodegen<Self>, FatalError>;
+    /// Performs thin LTO by performing necessary global analysis and returning two
+    /// lists, one of the modules that need optimization and another for modules that
+    /// can simply be copied over from the incr. comp. cache.
+    fn run_thin_lto(
+        cgcx: &CodegenContext<Self>,
+        modules: Vec<(String, Self::ThinBuffer)>,
         cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
         timeline: &mut Timeline,
     ) -> Result<(Vec<LtoModuleCodegen<Self>>, Vec<WorkProduct>), FatalError>;
@@ -55,6 +60,10 @@ pub trait WriteBackendMethods: 'static + Sized + Clone {
         config: &ModuleConfig,
         timeline: &mut Timeline,
     ) -> Result<CompiledModule, FatalError>;
+    fn prepare_thin(
+        cgcx: &CodegenContext<Self>,
+        module: ModuleCodegen<Self::Module>
+    ) -> (String, Self::ThinBuffer);
     fn run_lto_pass_manager(
         cgcx: &CodegenContext<Self>,
         llmod: &ModuleCodegen<Self::Module>,