about summary refs log tree commit diff
path: root/compiler
diff options
context:
space:
mode:
authorbjorn3 <17426603+bjorn3@users.noreply.github.com>2025-08-28 10:26:29 +0000
committerbjorn3 <17426603+bjorn3@users.noreply.github.com>2025-09-04 08:21:10 +0000
commit319fe230f0d960b343be31a1182dc0f10753156c (patch)
treef52a6178641551c257596ea56fc9cedfb6c6ceac /compiler
parenteea81b5d752f5310fcbd5ddae722afc7de0b8fa1 (diff)
downloadrust-319fe230f0d960b343be31a1182dc0f10753156c.tar.gz
rust-319fe230f0d960b343be31a1182dc0f10753156c.zip
Special case allocator module submission to avoid special casing it elsewhere
A lot of places had special handling just in case they would get an
allocator module even though most of these places could never get one or
would have a trivial implementation for the allocator module. Moving all
handling of the allocator module to a single place simplifies things a
fair bit.
Diffstat (limited to 'compiler')
-rw-r--r--compiler/rustc_codegen_cranelift/src/driver/aot.rs11
-rw-r--r--compiler/rustc_codegen_gcc/src/back/lto.rs2
-rw-r--r--compiler/rustc_codegen_llvm/src/back/lto.rs22
-rw-r--r--compiler/rustc_codegen_ssa/src/back/write.rs135
-rw-r--r--compiler/rustc_codegen_ssa/src/base.rs53
-rw-r--r--compiler/rustc_codegen_ssa/src/lib.rs2
6 files changed, 79 insertions, 146 deletions
diff --git a/compiler/rustc_codegen_cranelift/src/driver/aot.rs b/compiler/rustc_codegen_cranelift/src/driver/aot.rs
index 7e77781dc2f..c3adb5e767e 100644
--- a/compiler/rustc_codegen_cranelift/src/driver/aot.rs
+++ b/compiler/rustc_codegen_cranelift/src/driver/aot.rs
@@ -12,9 +12,7 @@ use cranelift_object::{ObjectBuilder, ObjectModule};
 use rustc_codegen_ssa::assert_module_sources::CguReuse;
 use rustc_codegen_ssa::back::link::ensure_removed;
 use rustc_codegen_ssa::base::determine_cgu_reuse;
-use rustc_codegen_ssa::{
-    CodegenResults, CompiledModule, CrateInfo, ModuleKind, errors as ssa_errors,
-};
+use rustc_codegen_ssa::{CodegenResults, CompiledModule, CrateInfo, errors as ssa_errors};
 use rustc_data_structures::profiling::SelfProfilerRef;
 use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
 use rustc_data_structures::sync::{IntoDynSyncSend, par_map};
@@ -363,7 +361,6 @@ fn emit_cgu(
         invocation_temp,
         prof,
         product.object,
-        ModuleKind::Regular,
         name.clone(),
         producer,
     )?;
@@ -372,7 +369,6 @@ fn emit_cgu(
         module_regular,
         module_global_asm: global_asm_object_file.map(|global_asm_object_file| CompiledModule {
             name: format!("{name}.asm"),
-            kind: ModuleKind::Regular,
             object: Some(global_asm_object_file),
             dwarf_object: None,
             bytecode: None,
@@ -389,7 +385,6 @@ fn emit_module(
     invocation_temp: Option<&str>,
     prof: &SelfProfilerRef,
     mut object: cranelift_object::object::write::Object<'_>,
-    kind: ModuleKind,
     name: String,
     producer_str: &str,
 ) -> Result<CompiledModule, String> {
@@ -430,7 +425,6 @@ fn emit_module(
 
     Ok(CompiledModule {
         name,
-        kind,
         object: Some(tmp_file),
         dwarf_object: None,
         bytecode: None,
@@ -485,7 +479,6 @@ fn reuse_workproduct_for_cgu(
     Ok(ModuleCodegenResult {
         module_regular: CompiledModule {
             name: cgu.name().to_string(),
-            kind: ModuleKind::Regular,
             object: Some(obj_out_regular),
             dwarf_object: None,
             bytecode: None,
@@ -495,7 +488,6 @@ fn reuse_workproduct_for_cgu(
         },
         module_global_asm: source_file_global_asm.map(|source_file| CompiledModule {
             name: cgu.name().to_string(),
-            kind: ModuleKind::Regular,
             object: Some(obj_out_global_asm),
             dwarf_object: None,
             bytecode: None,
@@ -651,7 +643,6 @@ fn emit_allocator_module(tcx: TyCtxt<'_>) -> Option<CompiledModule> {
             tcx.sess.invocation_temp.as_deref(),
             &tcx.sess.prof,
             product.object,
-            ModuleKind::Allocator,
             "allocator_shim".to_owned(),
             &crate::debuginfo::producer(tcx.sess),
         ) {
diff --git a/compiler/rustc_codegen_gcc/src/back/lto.rs b/compiler/rustc_codegen_gcc/src/back/lto.rs
index fcee6b6df62..9d8ce2383f2 100644
--- a/compiler/rustc_codegen_gcc/src/back/lto.rs
+++ b/compiler/rustc_codegen_gcc/src/back/lto.rs
@@ -204,7 +204,7 @@ fn fat_lto(
             let path = tmp_path.path().to_path_buf().join(&module.name);
             let path = path.to_str().expect("path");
             let context = &module.module_llvm.context;
-            let config = cgcx.config(module.kind);
+            let config = &cgcx.module_config;
             // NOTE: we need to set the optimization level here in order for LTO to do its job.
             context.set_optimization_level(to_gcc_opt_level(config.opt_level));
             context.add_command_line_option("-flto=auto");
diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs
index fc38c4f3e51..326b876e7e6 100644
--- a/compiler/rustc_codegen_llvm/src/back/lto.rs
+++ b/compiler/rustc_codegen_llvm/src/back/lto.rs
@@ -11,7 +11,7 @@ use object::{Object, ObjectSection};
 use rustc_codegen_ssa::back::lto::{SerializedModule, ThinModule, ThinShared};
 use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput};
 use rustc_codegen_ssa::traits::*;
-use rustc_codegen_ssa::{ModuleCodegen, ModuleKind, looks_like_rust_object_file};
+use rustc_codegen_ssa::{ModuleCodegen, looks_like_rust_object_file};
 use rustc_data_structures::fx::FxHashMap;
 use rustc_data_structures::memmap::Mmap;
 use rustc_errors::DiagCtxtHandle;
@@ -43,9 +43,7 @@ fn prepare_lto(
         .map(|symbol| CString::new(symbol.to_owned()).unwrap())
         .collect::<Vec<CString>>();
 
-    if cgcx.regular_module_config.instrument_coverage
-        || cgcx.regular_module_config.pgo_gen.enabled()
-    {
+    if cgcx.module_config.instrument_coverage || cgcx.module_config.pgo_gen.enabled() {
         // These are weak symbols that point to the profile version and the
         // profile name, which need to be treated as exported so LTO doesn't nix
         // them.
@@ -55,15 +53,15 @@ fn prepare_lto(
         symbols_below_threshold.extend(PROFILER_WEAK_SYMBOLS.iter().map(|&sym| sym.to_owned()));
     }
 
-    if cgcx.regular_module_config.sanitizer.contains(SanitizerSet::MEMORY) {
+    if cgcx.module_config.sanitizer.contains(SanitizerSet::MEMORY) {
         let mut msan_weak_symbols = Vec::new();
 
         // Similar to profiling, preserve weak msan symbol during LTO.
-        if cgcx.regular_module_config.sanitizer_recover.contains(SanitizerSet::MEMORY) {
+        if cgcx.module_config.sanitizer_recover.contains(SanitizerSet::MEMORY) {
             msan_weak_symbols.push(c"__msan_keep_going");
         }
 
-        if cgcx.regular_module_config.sanitizer_memory_track_origins != 0 {
+        if cgcx.module_config.sanitizer_memory_track_origins != 0 {
             msan_weak_symbols.push(c"__msan_track_origins");
         }
 
@@ -227,15 +225,9 @@ fn fat_lto(
     // All the other modules will be serialized and reparsed into the new
     // context, so this hopefully avoids serializing and parsing the largest
     // codegen unit.
-    //
-    // Additionally use a regular module as the base here to ensure that various
-    // file copy operations in the backend work correctly. The only other kind
-    // of module here should be an allocator one, and if your crate is smaller
-    // than the allocator module then the size doesn't really matter anyway.
     let costliest_module = in_memory
         .iter()
         .enumerate()
-        .filter(|&(_, module)| module.kind == ModuleKind::Regular)
         .map(|(i, module)| {
             let cost = unsafe { llvm::LLVMRustModuleCost(module.module_llvm.llmod()) };
             (cost, i)
@@ -583,7 +575,7 @@ pub(crate) fn run_pass_manager(
     thin: bool,
 ) {
     let _timer = cgcx.prof.generic_activity_with_arg("LLVM_lto_optimize", &*module.name);
-    let config = cgcx.config(module.kind);
+    let config = &cgcx.module_config;
 
     // Now we have one massive module inside of llmod. Time to run the
     // LTO-specific optimization passes that LLVM provides.
@@ -745,7 +737,7 @@ pub(crate) fn optimize_thin_module(
     let module_llvm = ModuleLlvm::parse(cgcx, module_name, thin_module.data(), dcx);
     let mut module = ModuleCodegen::new_regular(thin_module.name(), module_llvm);
     // Given that the newly created module lacks a thinlto buffer for embedding, we need to re-add it here.
-    if cgcx.config(ModuleKind::Regular).embed_bitcode() {
+    if cgcx.module_config.embed_bitcode() {
         module.thin_lto_buffer = Some(thin_module.data().to_vec());
     }
     {
diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index d6f289b3eee..f637e7f58db 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -333,8 +333,7 @@ pub struct CodegenContext<B: WriteBackendMethods> {
     pub crate_types: Vec<CrateType>,
     pub output_filenames: Arc<OutputFilenames>,
     pub invocation_temp: Option<String>,
-    pub regular_module_config: Arc<ModuleConfig>,
-    pub allocator_module_config: Arc<ModuleConfig>,
+    pub module_config: Arc<ModuleConfig>,
     pub tm_factory: TargetMachineFactoryFn<B>,
     pub msvc_imps_needed: bool,
     pub is_pe_coff: bool,
@@ -372,13 +371,6 @@ impl<B: WriteBackendMethods> CodegenContext<B> {
     pub fn create_dcx(&self) -> DiagCtxt {
         DiagCtxt::new(Box::new(self.diag_emitter.clone()))
     }
-
-    pub fn config(&self, kind: ModuleKind) -> &ModuleConfig {
-        match kind {
-            ModuleKind::Regular => &self.regular_module_config,
-            ModuleKind::Allocator => &self.allocator_module_config,
-        }
-    }
 }
 
 fn generate_thin_lto_work<B: ExtraBackendMethods>(
@@ -442,6 +434,7 @@ pub(crate) fn start_async_codegen<B: ExtraBackendMethods>(
     backend: B,
     tcx: TyCtxt<'_>,
     target_cpu: String,
+    allocator_module: Option<ModuleCodegen<B::Module>>,
 ) -> OngoingCodegen<B> {
     let (coordinator_send, coordinator_receive) = channel();
 
@@ -465,6 +458,7 @@ pub(crate) fn start_async_codegen<B: ExtraBackendMethods>(
         coordinator_receive,
         Arc::new(regular_config),
         Arc::new(allocator_config),
+        allocator_module,
         coordinator_send.clone(),
     );
 
@@ -495,7 +489,7 @@ fn copy_all_cgu_workproducts_to_incr_comp_cache_dir(
 
     let _timer = sess.timer("copy_all_cgu_workproducts_to_incr_comp_cache_dir");
 
-    for module in compiled_modules.modules.iter().filter(|m| m.kind == ModuleKind::Regular) {
+    for module in &compiled_modules.modules {
         let mut files = Vec::new();
         if let Some(object_file_path) = &module.object {
             files.push((OutputType::Object.extension(), object_file_path.as_path()));
@@ -720,15 +714,6 @@ pub(crate) enum WorkItem<B: WriteBackendMethods> {
 }
 
 impl<B: WriteBackendMethods> WorkItem<B> {
-    fn module_kind(&self) -> ModuleKind {
-        match *self {
-            WorkItem::Optimize(ref m) => m.kind,
-            WorkItem::CopyPostLtoArtifacts(_) | WorkItem::FatLto { .. } | WorkItem::ThinLto(_) => {
-                ModuleKind::Regular
-            }
-        }
-    }
-
     /// Generate a short description of this work item suitable for use as a thread name.
     fn short_description(&self) -> String {
         // `pthread_setname()` on *nix ignores anything beyond the first 15
@@ -836,32 +821,22 @@ pub(crate) fn compute_per_cgu_lto_type(
 fn execute_optimize_work_item<B: ExtraBackendMethods>(
     cgcx: &CodegenContext<B>,
     mut module: ModuleCodegen<B::Module>,
-    module_config: &ModuleConfig,
 ) -> WorkItemResult<B> {
     let dcx = cgcx.create_dcx();
     let dcx = dcx.handle();
 
-    B::optimize(cgcx, dcx, &mut module, module_config);
+    B::optimize(cgcx, dcx, &mut module, &cgcx.module_config);
 
     // After we've done the initial round of optimizations we need to
     // decide whether to synchronously codegen this module or ship it
     // back to the coordinator thread for further LTO processing (which
     // has to wait for all the initial modules to be optimized).
 
-    // When we're automatically doing ThinLTO for multi-codegen-unit
-    // builds we don't actually want to LTO the allocator modules if
-    // it shows up. This is due to various linker shenanigans that
-    // we'll encounter later.
-    if module.kind == ModuleKind::Allocator {
-        let module = B::codegen(cgcx, module, module_config);
-        return WorkItemResult::Finished(module);
-    }
-
     let lto_type = compute_per_cgu_lto_type(&cgcx.lto, &cgcx.opts, &cgcx.crate_types);
 
     // If we're doing some form of incremental LTO then we need to be sure to
     // save our module to disk first.
-    let bitcode = if cgcx.config(module.kind).emit_pre_lto_bc {
+    let bitcode = if cgcx.module_config.emit_pre_lto_bc {
         let filename = pre_lto_bitcode_filename(&module.name);
         cgcx.incr_comp_session_dir.as_ref().map(|path| path.join(&filename))
     } else {
@@ -870,7 +845,7 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>(
 
     match lto_type {
         ComputedLtoType::No => {
-            let module = B::codegen(cgcx, module, module_config);
+            let module = B::codegen(cgcx, module, &cgcx.module_config);
             WorkItemResult::Finished(module)
         }
         ComputedLtoType::Thin => {
@@ -901,7 +876,6 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>(
 fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
     cgcx: &CodegenContext<B>,
     module: CachedModuleCodegen,
-    module_config: &ModuleConfig,
 ) -> WorkItemResult<B> {
     let incr_comp_session_dir = cgcx.incr_comp_session_dir.as_ref().unwrap();
 
@@ -961,6 +935,7 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
         }
     };
 
+    let module_config = &cgcx.module_config;
     let should_emit_obj = module_config.emit_obj != EmitObj::None;
     let assembly = load_from_incr_cache(module_config.emit_asm, OutputType::Assembly);
     let llvm_ir = load_from_incr_cache(module_config.emit_ir, OutputType::LlvmAssembly);
@@ -973,7 +948,6 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
     WorkItemResult::Finished(CompiledModule {
         links_from_incr_cache,
         name: module.name,
-        kind: ModuleKind::Regular,
         object,
         dwarf_object,
         bytecode,
@@ -988,7 +962,6 @@ fn execute_fat_lto_work_item<B: ExtraBackendMethods>(
     each_linked_rlib_for_lto: &[PathBuf],
     mut needs_fat_lto: Vec<FatLtoInput<B>>,
     import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
-    module_config: &ModuleConfig,
 ) -> WorkItemResult<B> {
     for (module, wp) in import_only_modules {
         needs_fat_lto.push(FatLtoInput::Serialized { name: wp.cgu_name, buffer: module })
@@ -1000,17 +973,16 @@ fn execute_fat_lto_work_item<B: ExtraBackendMethods>(
         each_linked_rlib_for_lto,
         needs_fat_lto,
     );
-    let module = B::codegen(cgcx, module, module_config);
+    let module = B::codegen(cgcx, module, &cgcx.module_config);
     WorkItemResult::Finished(module)
 }
 
 fn execute_thin_lto_work_item<B: ExtraBackendMethods>(
     cgcx: &CodegenContext<B>,
     module: lto::ThinModule<B>,
-    module_config: &ModuleConfig,
 ) -> WorkItemResult<B> {
     let module = B::optimize_thin(cgcx, module);
-    let module = B::codegen(cgcx, module, module_config);
+    let module = B::codegen(cgcx, module, &cgcx.module_config);
     WorkItemResult::Finished(module)
 }
 
@@ -1095,6 +1067,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
     coordinator_receive: Receiver<Message<B>>,
     regular_config: Arc<ModuleConfig>,
     allocator_config: Arc<ModuleConfig>,
+    allocator_module: Option<ModuleCodegen<B::Module>>,
     tx_to_llvm_workers: Sender<Message<B>>,
 ) -> thread::JoinHandle<Result<CompiledModules, ()>> {
     let coordinator_send = tx_to_llvm_workers;
@@ -1159,8 +1132,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
         expanded_args: tcx.sess.expanded_args.clone(),
         diag_emitter: shared_emitter.clone(),
         output_filenames: Arc::clone(tcx.output_filenames(())),
-        regular_module_config: regular_config,
-        allocator_module_config: allocator_config,
+        module_config: regular_config,
         tm_factory: backend.target_machine_factory(tcx.sess, ol, backend_features),
         msvc_imps_needed: msvc_imps_needed(tcx),
         is_pe_coff: tcx.sess.target.is_like_windows,
@@ -1175,6 +1147,11 @@ fn start_executing_work<B: ExtraBackendMethods>(
         invocation_temp: sess.invocation_temp.clone(),
     };
 
+    let compiled_allocator_module = allocator_module.map(|mut allocator_module| {
+        B::optimize(&cgcx, tcx.sess.dcx(), &mut allocator_module, &allocator_config);
+        B::codegen(&cgcx, allocator_module, &allocator_config)
+    });
+
     // This is the "main loop" of parallel work happening for parallel codegen.
     // It's here that we manage parallelism, schedule work, and work with
     // messages coming from clients.
@@ -1314,7 +1291,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
         // This is where we collect codegen units that have gone all the way
         // through codegen and LLVM.
         let mut compiled_modules = vec![];
-        let mut compiled_allocator_module = None;
         let mut needs_fat_lto = Vec::new();
         let mut needs_thin_lto = Vec::new();
         let mut lto_import_only_modules = Vec::new();
@@ -1588,15 +1564,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
 
                     match result {
                         Ok(WorkItemResult::Finished(compiled_module)) => {
-                            match compiled_module.kind {
-                                ModuleKind::Regular => {
-                                    compiled_modules.push(compiled_module);
-                                }
-                                ModuleKind::Allocator => {
-                                    assert!(compiled_allocator_module.is_none());
-                                    compiled_allocator_module = Some(compiled_module);
-                                }
-                            }
+                            compiled_modules.push(compiled_module);
                         }
                         Ok(WorkItemResult::NeedsFatLto(fat_lto_input)) => {
                             assert!(!started_lto);
@@ -1724,45 +1692,38 @@ fn spawn_work<'a, B: ExtraBackendMethods>(
     let cgcx = cgcx.clone();
 
     B::spawn_named_thread(cgcx.time_trace, work.short_description(), move || {
-        let result = std::panic::catch_unwind(AssertUnwindSafe(|| {
-            let module_config = cgcx.config(work.module_kind());
-
-            match work {
-                WorkItem::Optimize(m) => {
-                    let _timer =
-                        cgcx.prof.generic_activity_with_arg("codegen_module_optimize", &*m.name);
-                    execute_optimize_work_item(&cgcx, m, module_config)
-                }
-                WorkItem::CopyPostLtoArtifacts(m) => {
-                    let _timer = cgcx.prof.generic_activity_with_arg(
-                        "codegen_copy_artifacts_from_incr_cache",
-                        &*m.name,
-                    );
-                    execute_copy_from_cache_work_item(&cgcx, m, module_config)
-                }
-                WorkItem::FatLto {
-                    exported_symbols_for_lto,
-                    each_linked_rlib_for_lto,
+        let result = std::panic::catch_unwind(AssertUnwindSafe(|| match work {
+            WorkItem::Optimize(m) => {
+                let _timer =
+                    cgcx.prof.generic_activity_with_arg("codegen_module_optimize", &*m.name);
+                execute_optimize_work_item(&cgcx, m)
+            }
+            WorkItem::CopyPostLtoArtifacts(m) => {
+                let _timer = cgcx
+                    .prof
+                    .generic_activity_with_arg("codegen_copy_artifacts_from_incr_cache", &*m.name);
+                execute_copy_from_cache_work_item(&cgcx, m)
+            }
+            WorkItem::FatLto {
+                exported_symbols_for_lto,
+                each_linked_rlib_for_lto,
+                needs_fat_lto,
+                import_only_modules,
+            } => {
+                let _timer =
+                    cgcx.prof.generic_activity_with_arg("codegen_module_perform_lto", "everything");
+                execute_fat_lto_work_item(
+                    &cgcx,
+                    &exported_symbols_for_lto,
+                    &each_linked_rlib_for_lto,
                     needs_fat_lto,
                     import_only_modules,
-                } => {
-                    let _timer = cgcx
-                        .prof
-                        .generic_activity_with_arg("codegen_module_perform_lto", "everything");
-                    execute_fat_lto_work_item(
-                        &cgcx,
-                        &exported_symbols_for_lto,
-                        &each_linked_rlib_for_lto,
-                        needs_fat_lto,
-                        import_only_modules,
-                        module_config,
-                    )
-                }
-                WorkItem::ThinLto(m) => {
-                    let _timer =
-                        cgcx.prof.generic_activity_with_arg("codegen_module_perform_lto", m.name());
-                    execute_thin_lto_work_item(&cgcx, m, module_config)
-                }
+                )
+            }
+            WorkItem::ThinLto(m) => {
+                let _timer =
+                    cgcx.prof.generic_activity_with_arg("codegen_module_perform_lto", m.name());
+                execute_thin_lto_work_item(&cgcx, m)
             }
         }));
 
diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs
index 071bd09249a..a9a2ae1b3db 100644
--- a/compiler/rustc_codegen_ssa/src/base.rs
+++ b/compiler/rustc_codegen_ssa/src/base.rs
@@ -662,7 +662,7 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
 ) -> OngoingCodegen<B> {
     // Skip crate items and just output metadata in -Z no-codegen mode.
     if tcx.sess.opts.unstable_opts.no_codegen || !tcx.sess.opts.output_types.should_codegen() {
-        let ongoing_codegen = start_async_codegen(backend, tcx, target_cpu);
+        let ongoing_codegen = start_async_codegen(backend, tcx, target_cpu, None);
 
         ongoing_codegen.codegen_finished(tcx);
 
@@ -693,7 +693,27 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
         }
     }
 
-    let ongoing_codegen = start_async_codegen(backend.clone(), tcx, target_cpu);
+    // Codegen an allocator shim, if necessary.
+    let allocator_module = if let Some(kind) = allocator_kind_for_codegen(tcx) {
+        let llmod_id =
+            cgu_name_builder.build_cgu_name(LOCAL_CRATE, &["crate"], Some("allocator")).to_string();
+
+        tcx.sess.time("write_allocator_module", || {
+            let module = backend.codegen_allocator(
+                tcx,
+                &llmod_id,
+                kind,
+                // If allocator_kind is Some then alloc_error_handler_kind must
+                // also be Some.
+                tcx.alloc_error_handler_kind(()).unwrap(),
+            );
+            Some(ModuleCodegen::new_allocator(llmod_id, module))
+        })
+    } else {
+        None
+    };
+
+    let ongoing_codegen = start_async_codegen(backend.clone(), tcx, target_cpu, allocator_module);
 
     // For better throughput during parallel processing by LLVM, we used to sort
     // CGUs largest to smallest. This would lead to better thread utilization
@@ -810,35 +830,6 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
         }
     }
 
-    // Codegen an allocator shim, if necessary.
-    // Do this last to ensure the LLVM_passes timer doesn't start while no CGUs have been codegened
-    // yet for the backend to optimize.
-    if let Some(kind) = allocator_kind_for_codegen(tcx) {
-        let llmod_id =
-            cgu_name_builder.build_cgu_name(LOCAL_CRATE, &["crate"], Some("allocator")).to_string();
-        let module_llvm = tcx.sess.time("write_allocator_module", || {
-            backend.codegen_allocator(
-                tcx,
-                &llmod_id,
-                kind,
-                // If allocator_kind is Some then alloc_error_handler_kind must
-                // also be Some.
-                tcx.alloc_error_handler_kind(()).unwrap(),
-            )
-        });
-
-        ongoing_codegen.wait_for_signal_to_codegen_item();
-        ongoing_codegen.check_for_errors(tcx.sess);
-
-        // These modules are generally cheap and won't throw off scheduling.
-        let cost = 0;
-        submit_codegened_module_to_llvm(
-            &ongoing_codegen.coordinator,
-            ModuleCodegen::new_allocator(llmod_id, module_llvm),
-            cost,
-        );
-    }
-
     ongoing_codegen.codegen_finished(tcx);
 
     // Since the main thread is sometimes blocked during codegen, we keep track
diff --git a/compiler/rustc_codegen_ssa/src/lib.rs b/compiler/rustc_codegen_ssa/src/lib.rs
index fe0500a5d4c..5b90ffaa056 100644
--- a/compiler/rustc_codegen_ssa/src/lib.rs
+++ b/compiler/rustc_codegen_ssa/src/lib.rs
@@ -120,7 +120,6 @@ impl<M> ModuleCodegen<M> {
 
         CompiledModule {
             name: self.name.clone(),
-            kind: self.kind,
             object,
             dwarf_object,
             bytecode,
@@ -134,7 +133,6 @@ impl<M> ModuleCodegen<M> {
 #[derive(Debug, Encodable, Decodable)]
 pub struct CompiledModule {
     pub name: String,
-    pub kind: ModuleKind,
     pub object: Option<PathBuf>,
     pub dwarf_object: Option<PathBuf>,
     pub bytecode: Option<PathBuf>,