diff options
| author | bors <bors@rust-lang.org> | 2025-09-06 15:21:16 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2025-09-06 15:21:16 +0000 |
| commit | bea625f3275e3c897dc965ed97a1d19ef7831f01 (patch) | |
| tree | f32cdc8fb33ca70d76fade0ba3a5af10a2c7d6bf /compiler/rustc_codegen_ssa/src | |
| parent | 6d5caf3a4a358c80f09ba51d71db0dbc41381992 (diff) | |
| parent | 2cf94b92ca852924ad90943a0c469f01742216a6 (diff) | |
| download | rust-bea625f3275e3c897dc965ed97a1d19ef7831f01.tar.gz rust-bea625f3275e3c897dc965ed97a1d19ef7831f01.zip | |
Auto merge of #146232 - bjorn3:lto_allocator_shim, r=lqd
Make the allocator shim participate in LTO again This is likely the cause of the perf regression in https://github.com/rust-lang/rust/pull/145955. It also caused some functional regressions. Fixes https://github.com/rust-lang/rust/issues/146235 Fixes https://github.com/rust-lang/rust/issues/146239
Diffstat (limited to 'compiler/rustc_codegen_ssa/src')
| -rw-r--r-- | compiler/rustc_codegen_ssa/src/back/lto.rs | 14 | ||||
| -rw-r--r-- | compiler/rustc_codegen_ssa/src/back/write.rs | 43 | ||||
| -rw-r--r-- | compiler/rustc_codegen_ssa/src/base.rs | 11 | ||||
| -rw-r--r-- | compiler/rustc_codegen_ssa/src/lib.rs | 2 |
4 files changed, 43 insertions, 27 deletions
diff --git a/compiler/rustc_codegen_ssa/src/back/lto.rs b/compiler/rustc_codegen_ssa/src/back/lto.rs index f4a9037940a..e6df6a2469f 100644 --- a/compiler/rustc_codegen_ssa/src/back/lto.rs +++ b/compiler/rustc_codegen_ssa/src/back/lto.rs @@ -1,7 +1,6 @@ use std::ffi::CString; use std::sync::Arc; -use rustc_ast::expand::allocator::AllocatorKind; use rustc_data_structures::memmap::Mmap; use rustc_hir::def_id::{CrateNum, LOCAL_CRATE}; use rustc_middle::middle::exported_symbols::{ExportedSymbol, SymbolExportInfo, SymbolExportLevel}; @@ -96,19 +95,6 @@ pub(super) fn exported_symbols_for_lto( .filter_map(|&(s, info): &(ExportedSymbol<'_>, SymbolExportInfo)| { if info.level.is_below_threshold(export_threshold) || info.used { Some(symbol_name_for_instance_in_crate(tcx, s, cnum)) - } else if export_threshold == SymbolExportLevel::C - && info.rustc_std_internal_symbol - && let Some(AllocatorKind::Default) = allocator_kind_for_codegen(tcx) - { - // Export the __rdl_* exports for usage by the allocator shim when not using - // #[global_allocator]. Most of the conditions above are only used to avoid - // unnecessary expensive symbol_name_for_instance_in_crate calls. - let sym = symbol_name_for_instance_in_crate(tcx, s, cnum); - if sym.contains("__rdl_") || sym.contains("__rg_oom") { - Some(sym) - } else { - None - } } else { None } diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs index f637e7f58db..95e02a7c6db 100644 --- a/compiler/rustc_codegen_ssa/src/back/write.rs +++ b/compiler/rustc_codegen_ssa/src/back/write.rs @@ -334,6 +334,7 @@ pub struct CodegenContext<B: WriteBackendMethods> { pub output_filenames: Arc<OutputFilenames>, pub invocation_temp: Option<String>, pub module_config: Arc<ModuleConfig>, + pub allocator_config: Arc<ModuleConfig>, pub tm_factory: TargetMachineFactoryFn<B>, pub msvc_imps_needed: bool, pub is_pe_coff: bool, @@ -489,7 +490,7 @@ fn copy_all_cgu_workproducts_to_incr_comp_cache_dir( let _timer = sess.timer("copy_all_cgu_workproducts_to_incr_comp_cache_dir"); - for module in &compiled_modules.modules { + for module in compiled_modules.modules.iter().filter(|m| m.kind == ModuleKind::Regular) { let mut files = Vec::new(); if let Some(object_file_path) = &module.object { files.push((OutputType::Object.extension(), object_file_path.as_path())); @@ -794,12 +795,19 @@ pub(crate) fn compute_per_cgu_lto_type( sess_lto: &Lto, opts: &config::Options, sess_crate_types: &[CrateType], + module_kind: ModuleKind, ) -> ComputedLtoType { // If the linker does LTO, we don't have to do it. Note that we // keep doing full LTO, if it is requested, as not to break the // assumption that the output will be a single module. let linker_does_lto = opts.cg.linker_plugin_lto.enabled(); + // When we're automatically doing ThinLTO for multi-codegen-unit + // builds we don't actually want to LTO the allocator module if + // it shows up. This is due to various linker shenanigans that + // we'll encounter later. + let is_allocator = module_kind == ModuleKind::Allocator; + // We ignore a request for full crate graph LTO if the crate type // is only an rlib, as there is no full crate graph to process, // that'll happen later. @@ -811,7 +819,7 @@ pub(crate) fn compute_per_cgu_lto_type( let is_rlib = matches!(sess_crate_types, [CrateType::Rlib]); match sess_lto { - Lto::ThinLocal if !linker_does_lto => ComputedLtoType::Thin, + Lto::ThinLocal if !linker_does_lto && !is_allocator => ComputedLtoType::Thin, Lto::Thin if !linker_does_lto && !is_rlib => ComputedLtoType::Thin, Lto::Fat if !is_rlib => ComputedLtoType::Fat, _ => ComputedLtoType::No, @@ -825,18 +833,23 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>( let dcx = cgcx.create_dcx(); let dcx = dcx.handle(); - B::optimize(cgcx, dcx, &mut module, &cgcx.module_config); + let module_config = match module.kind { + ModuleKind::Regular => &cgcx.module_config, + ModuleKind::Allocator => &cgcx.allocator_config, + }; + + B::optimize(cgcx, dcx, &mut module, module_config); // After we've done the initial round of optimizations we need to // decide whether to synchronously codegen this module or ship it // back to the coordinator thread for further LTO processing (which // has to wait for all the initial modules to be optimized). - let lto_type = compute_per_cgu_lto_type(&cgcx.lto, &cgcx.opts, &cgcx.crate_types); + let lto_type = compute_per_cgu_lto_type(&cgcx.lto, &cgcx.opts, &cgcx.crate_types, module.kind); // If we're doing some form of incremental LTO then we need to be sure to // save our module to disk first. - let bitcode = if cgcx.module_config.emit_pre_lto_bc { + let bitcode = if module_config.emit_pre_lto_bc { let filename = pre_lto_bitcode_filename(&module.name); cgcx.incr_comp_session_dir.as_ref().map(|path| path.join(&filename)) } else { @@ -845,7 +858,7 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>( match lto_type { ComputedLtoType::No => { - let module = B::codegen(cgcx, module, &cgcx.module_config); + let module = B::codegen(cgcx, module, module_config); WorkItemResult::Finished(module) } ComputedLtoType::Thin => { @@ -947,6 +960,7 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>( WorkItemResult::Finished(CompiledModule { links_from_incr_cache, + kind: ModuleKind::Regular, name: module.name, object, dwarf_object, @@ -1133,6 +1147,7 @@ fn start_executing_work<B: ExtraBackendMethods>( diag_emitter: shared_emitter.clone(), output_filenames: Arc::clone(tcx.output_filenames(())), module_config: regular_config, + allocator_config, tm_factory: backend.target_machine_factory(tcx.sess, ol, backend_features), msvc_imps_needed: msvc_imps_needed(tcx), is_pe_coff: tcx.sess.target.is_like_windows, @@ -1147,11 +1162,6 @@ fn start_executing_work<B: ExtraBackendMethods>( invocation_temp: sess.invocation_temp.clone(), }; - let compiled_allocator_module = allocator_module.map(|mut allocator_module| { - B::optimize(&cgcx, tcx.sess.dcx(), &mut allocator_module, &allocator_config); - B::codegen(&cgcx, allocator_module, &allocator_config) - }); - // This is the "main loop" of parallel work happening for parallel codegen. // It's here that we manage parallelism, schedule work, and work with // messages coming from clients. @@ -1331,6 +1341,17 @@ fn start_executing_work<B: ExtraBackendMethods>( let mut llvm_start_time: Option<VerboseTimingGuard<'_>> = None; + let compiled_allocator_module = allocator_module.and_then(|allocator_module| { + match execute_optimize_work_item(&cgcx, allocator_module) { + WorkItemResult::Finished(compiled_module) => return Some(compiled_module), + WorkItemResult::NeedsFatLto(fat_lto_input) => needs_fat_lto.push(fat_lto_input), + WorkItemResult::NeedsThinLto(name, thin_buffer) => { + needs_thin_lto.push((name, thin_buffer)) + } + } + None + }); + // Run the message loop while there's still anything that needs message // processing. Note that as soon as codegen is aborted we simply want to // wait for all existing work to finish, so many of the conditions here diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs index a9a2ae1b3db..45b028aa8ef 100644 --- a/compiler/rustc_codegen_ssa/src/base.rs +++ b/compiler/rustc_codegen_ssa/src/base.rs @@ -46,7 +46,9 @@ use crate::meth::load_vtable; use crate::mir::operand::OperandValue; use crate::mir::place::PlaceRef; use crate::traits::*; -use crate::{CachedModuleCodegen, CodegenLintLevels, CrateInfo, ModuleCodegen, errors, meth, mir}; +use crate::{ + CachedModuleCodegen, CodegenLintLevels, CrateInfo, ModuleCodegen, ModuleKind, errors, meth, mir, +}; pub(crate) fn bin_op_to_icmp_predicate(op: BinOp, signed: bool) -> IntPredicate { match (op, signed) { @@ -1124,7 +1126,12 @@ pub fn determine_cgu_reuse<'tcx>(tcx: TyCtxt<'tcx>, cgu: &CodegenUnit<'tcx>) -> // We can re-use either the pre- or the post-thinlto state. If no LTO is // being performed then we can use post-LTO artifacts, otherwise we must // reuse pre-LTO artifacts - match compute_per_cgu_lto_type(&tcx.sess.lto(), &tcx.sess.opts, tcx.crate_types()) { + match compute_per_cgu_lto_type( + &tcx.sess.lto(), + &tcx.sess.opts, + tcx.crate_types(), + ModuleKind::Regular, + ) { ComputedLtoType::No => CguReuse::PostLto, _ => CguReuse::PreLto, } diff --git a/compiler/rustc_codegen_ssa/src/lib.rs b/compiler/rustc_codegen_ssa/src/lib.rs index 23146661f27..baba8f9ca3e 100644 --- a/compiler/rustc_codegen_ssa/src/lib.rs +++ b/compiler/rustc_codegen_ssa/src/lib.rs @@ -120,6 +120,7 @@ impl<M> ModuleCodegen<M> { CompiledModule { name: self.name, + kind: self.kind, object, dwarf_object, bytecode, @@ -133,6 +134,7 @@ impl<M> ModuleCodegen<M> { #[derive(Debug, Encodable, Decodable)] pub struct CompiledModule { pub name: String, + pub kind: ModuleKind, pub object: Option<PathBuf>, pub dwarf_object: Option<PathBuf>, pub bytecode: Option<PathBuf>, |
