diff options
| author | bjorn3 <17426603+bjorn3@users.noreply.github.com> | 2025-09-05 09:32:56 +0000 |
|---|---|---|
| committer | bjorn3 <17426603+bjorn3@users.noreply.github.com> | 2025-09-06 08:35:55 +0000 |
| commit | 027135976849da34250d7638502bcc2a24f741d7 (patch) | |
| tree | 4d9901e14fd37162728290dfebb59e2055e93130 | |
| parent | 397f93362974d298b79e0e0cd43677014aa7b722 (diff) | |
| download | rust-027135976849da34250d7638502bcc2a24f741d7.tar.gz rust-027135976849da34250d7638502bcc2a24f741d7.zip | |
Make the allocator shim participate in LTO again
| -rw-r--r-- | compiler/rustc_codegen_ssa/src/back/lto.rs | 14 | ||||
| -rw-r--r-- | compiler/rustc_codegen_ssa/src/back/write.rs | 40 | ||||
| -rw-r--r-- | compiler/rustc_codegen_ssa/src/base.rs | 11 |
3 files changed, 39 insertions, 26 deletions
diff --git a/compiler/rustc_codegen_ssa/src/back/lto.rs b/compiler/rustc_codegen_ssa/src/back/lto.rs index f4a9037940a..e6df6a2469f 100644 --- a/compiler/rustc_codegen_ssa/src/back/lto.rs +++ b/compiler/rustc_codegen_ssa/src/back/lto.rs @@ -1,7 +1,6 @@ use std::ffi::CString; use std::sync::Arc; -use rustc_ast::expand::allocator::AllocatorKind; use rustc_data_structures::memmap::Mmap; use rustc_hir::def_id::{CrateNum, LOCAL_CRATE}; use rustc_middle::middle::exported_symbols::{ExportedSymbol, SymbolExportInfo, SymbolExportLevel}; @@ -96,19 +95,6 @@ pub(super) fn exported_symbols_for_lto( .filter_map(|&(s, info): &(ExportedSymbol<'_>, SymbolExportInfo)| { if info.level.is_below_threshold(export_threshold) || info.used { Some(symbol_name_for_instance_in_crate(tcx, s, cnum)) - } else if export_threshold == SymbolExportLevel::C - && info.rustc_std_internal_symbol - && let Some(AllocatorKind::Default) = allocator_kind_for_codegen(tcx) - { - // Export the __rdl_* exports for usage by the allocator shim when not using - // #[global_allocator]. Most of the conditions above are only used to avoid - // unnecessary expensive symbol_name_for_instance_in_crate calls. - let sym = symbol_name_for_instance_in_crate(tcx, s, cnum); - if sym.contains("__rdl_") || sym.contains("__rg_oom") { - Some(sym) - } else { - None - } } else { None } diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs index f637e7f58db..afda7226fdd 100644 --- a/compiler/rustc_codegen_ssa/src/back/write.rs +++ b/compiler/rustc_codegen_ssa/src/back/write.rs @@ -334,6 +334,7 @@ pub struct CodegenContext<B: WriteBackendMethods> { pub output_filenames: Arc<OutputFilenames>, pub invocation_temp: Option<String>, pub module_config: Arc<ModuleConfig>, + pub allocator_config: Arc<ModuleConfig>, pub tm_factory: TargetMachineFactoryFn<B>, pub msvc_imps_needed: bool, pub is_pe_coff: bool, @@ -794,12 +795,19 @@ pub(crate) fn compute_per_cgu_lto_type( sess_lto: &Lto, opts: &config::Options, sess_crate_types: &[CrateType], + module_kind: ModuleKind, ) -> ComputedLtoType { // If the linker does LTO, we don't have to do it. Note that we // keep doing full LTO, if it is requested, as not to break the // assumption that the output will be a single module. let linker_does_lto = opts.cg.linker_plugin_lto.enabled(); + // When we're automatically doing ThinLTO for multi-codegen-unit + // builds we don't actually want to LTO the allocator module if + // it shows up. This is due to various linker shenanigans that + // we'll encounter later. + let is_allocator = module_kind == ModuleKind::Allocator; + // We ignore a request for full crate graph LTO if the crate type // is only an rlib, as there is no full crate graph to process, // that'll happen later. @@ -811,7 +819,7 @@ pub(crate) fn compute_per_cgu_lto_type( let is_rlib = matches!(sess_crate_types, [CrateType::Rlib]); match sess_lto { - Lto::ThinLocal if !linker_does_lto => ComputedLtoType::Thin, + Lto::ThinLocal if !linker_does_lto && !is_allocator => ComputedLtoType::Thin, Lto::Thin if !linker_does_lto && !is_rlib => ComputedLtoType::Thin, Lto::Fat if !is_rlib => ComputedLtoType::Fat, _ => ComputedLtoType::No, @@ -825,18 +833,23 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>( let dcx = cgcx.create_dcx(); let dcx = dcx.handle(); - B::optimize(cgcx, dcx, &mut module, &cgcx.module_config); + let module_config = match module.kind { + ModuleKind::Regular => &cgcx.module_config, + ModuleKind::Allocator => &cgcx.allocator_config, + }; + + B::optimize(cgcx, dcx, &mut module, module_config); // After we've done the initial round of optimizations we need to // decide whether to synchronously codegen this module or ship it // back to the coordinator thread for further LTO processing (which // has to wait for all the initial modules to be optimized). - let lto_type = compute_per_cgu_lto_type(&cgcx.lto, &cgcx.opts, &cgcx.crate_types); + let lto_type = compute_per_cgu_lto_type(&cgcx.lto, &cgcx.opts, &cgcx.crate_types, module.kind); // If we're doing some form of incremental LTO then we need to be sure to // save our module to disk first. - let bitcode = if cgcx.module_config.emit_pre_lto_bc { + let bitcode = if module_config.emit_pre_lto_bc { let filename = pre_lto_bitcode_filename(&module.name); cgcx.incr_comp_session_dir.as_ref().map(|path| path.join(&filename)) } else { @@ -845,7 +858,7 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>( match lto_type { ComputedLtoType::No => { - let module = B::codegen(cgcx, module, &cgcx.module_config); + let module = B::codegen(cgcx, module, module_config); WorkItemResult::Finished(module) } ComputedLtoType::Thin => { @@ -1133,6 +1146,7 @@ fn start_executing_work<B: ExtraBackendMethods>( diag_emitter: shared_emitter.clone(), output_filenames: Arc::clone(tcx.output_filenames(())), module_config: regular_config, + allocator_config, tm_factory: backend.target_machine_factory(tcx.sess, ol, backend_features), msvc_imps_needed: msvc_imps_needed(tcx), is_pe_coff: tcx.sess.target.is_like_windows, @@ -1147,11 +1161,6 @@ fn start_executing_work<B: ExtraBackendMethods>( invocation_temp: sess.invocation_temp.clone(), }; - let compiled_allocator_module = allocator_module.map(|mut allocator_module| { - B::optimize(&cgcx, tcx.sess.dcx(), &mut allocator_module, &allocator_config); - B::codegen(&cgcx, allocator_module, &allocator_config) - }); - // This is the "main loop" of parallel work happening for parallel codegen. // It's here that we manage parallelism, schedule work, and work with // messages coming from clients. @@ -1331,6 +1340,17 @@ fn start_executing_work<B: ExtraBackendMethods>( let mut llvm_start_time: Option<VerboseTimingGuard<'_>> = None; + let compiled_allocator_module = allocator_module.and_then(|allocator_module| { + match execute_optimize_work_item(&cgcx, allocator_module) { + WorkItemResult::Finished(compiled_module) => return Some(compiled_module), + WorkItemResult::NeedsFatLto(fat_lto_input) => needs_fat_lto.push(fat_lto_input), + WorkItemResult::NeedsThinLto(name, thin_buffer) => { + needs_thin_lto.push((name, thin_buffer)) + } + } + None + }); + // Run the message loop while there's still anything that needs message // processing. Note that as soon as codegen is aborted we simply want to // wait for all existing work to finish, so many of the conditions here diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs index a9a2ae1b3db..45b028aa8ef 100644 --- a/compiler/rustc_codegen_ssa/src/base.rs +++ b/compiler/rustc_codegen_ssa/src/base.rs @@ -46,7 +46,9 @@ use crate::meth::load_vtable; use crate::mir::operand::OperandValue; use crate::mir::place::PlaceRef; use crate::traits::*; -use crate::{CachedModuleCodegen, CodegenLintLevels, CrateInfo, ModuleCodegen, errors, meth, mir}; +use crate::{ + CachedModuleCodegen, CodegenLintLevels, CrateInfo, ModuleCodegen, ModuleKind, errors, meth, mir, +}; pub(crate) fn bin_op_to_icmp_predicate(op: BinOp, signed: bool) -> IntPredicate { match (op, signed) { @@ -1124,7 +1126,12 @@ pub fn determine_cgu_reuse<'tcx>(tcx: TyCtxt<'tcx>, cgu: &CodegenUnit<'tcx>) -> // We can re-use either the pre- or the post-thinlto state. If no LTO is // being performed then we can use post-LTO artifacts, otherwise we must // reuse pre-LTO artifacts - match compute_per_cgu_lto_type(&tcx.sess.lto(), &tcx.sess.opts, tcx.crate_types()) { + match compute_per_cgu_lto_type( + &tcx.sess.lto(), + &tcx.sess.opts, + tcx.crate_types(), + ModuleKind::Regular, + ) { ComputedLtoType::No => CguReuse::PostLto, _ => CguReuse::PreLto, } |
