From fe2eeabe27ce3d5b871ab903e65b4707ad015764 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Fri, 25 Jul 2025 09:42:18 +0000 Subject: Use the object crate rather than LLVM for extracting bitcode sections --- compiler/rustc_codegen_llvm/src/back/lto.rs | 31 ++++++++--------------------- compiler/rustc_codegen_llvm/src/errors.rs | 2 +- compiler/rustc_codegen_llvm/src/llvm/ffi.rs | 7 ------- 3 files changed, 9 insertions(+), 31 deletions(-) (limited to 'compiler/rustc_codegen_llvm/src') diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs index 767835c34f0..cac7d49b74a 100644 --- a/compiler/rustc_codegen_llvm/src/back/lto.rs +++ b/compiler/rustc_codegen_llvm/src/back/lto.rs @@ -7,6 +7,7 @@ use std::sync::Arc; use std::{io, iter, slice}; use object::read::archive::ArchiveFile; +use object::{Object, ObjectSection}; use rustc_codegen_ssa::back::lto::{SerializedModule, ThinModule, ThinShared}; use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput}; use rustc_codegen_ssa::traits::*; @@ -105,31 +106,15 @@ fn get_bitcode_slice_from_object_data<'a>( // name" which in the public API for sections gets treated as part of the section name, but // internally in MachOObjectFile.cpp gets treated separately. let section_name = bitcode_section_name(cgcx).to_str().unwrap().trim_start_matches("__LLVM,"); - let mut len = 0; - let data = unsafe { - llvm::LLVMRustGetSliceFromObjectDataByName( - obj.as_ptr(), - obj.len(), - section_name.as_ptr(), - section_name.len(), - &mut len, - ) - }; - if !data.is_null() { - assert!(len != 0); - let bc = unsafe { slice::from_raw_parts(data, len) }; - // `bc` must be a sub-slice of `obj`. - assert!(obj.as_ptr() <= bc.as_ptr()); - assert!(bc[bc.len()..bc.len()].as_ptr() <= obj[obj.len()..obj.len()].as_ptr()); + let obj = + object::File::parse(obj).map_err(|err| LtoBitcodeFromRlib { err: err.to_string() })?; - Ok(bc) - } else { - assert!(len == 0); - Err(LtoBitcodeFromRlib { - llvm_err: llvm::last_error().unwrap_or_else(|| "unknown LLVM error".to_string()), - }) - } + let section = obj + .section_by_name(section_name) + .ok_or_else(|| LtoBitcodeFromRlib { err: format!("Can't find section {section_name}") })?; + + section.data().map_err(|err| LtoBitcodeFromRlib { err: err.to_string() }) } /// Performs fat LTO by merging all modules into a single one and returning it diff --git a/compiler/rustc_codegen_llvm/src/errors.rs b/compiler/rustc_codegen_llvm/src/errors.rs index 2a889888a39..627b0c9ff3b 100644 --- a/compiler/rustc_codegen_llvm/src/errors.rs +++ b/compiler/rustc_codegen_llvm/src/errors.rs @@ -39,7 +39,7 @@ pub(crate) struct AutoDiffWithoutEnable; #[derive(Diagnostic)] #[diag(codegen_llvm_lto_bitcode_from_rlib)] pub(crate) struct LtoBitcodeFromRlib { - pub llvm_err: String, + pub err: String, } #[derive(Diagnostic)] diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index edfb29dd1be..0d0cb5f139e 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -2612,13 +2612,6 @@ unsafe extern "C" { len: usize, Identifier: *const c_char, ) -> Option<&Module>; - pub(crate) fn LLVMRustGetSliceFromObjectDataByName( - data: *const u8, - len: usize, - name: *const u8, - name_len: usize, - out_len: &mut usize, - ) -> *const u8; pub(crate) fn LLVMRustLinkerNew(M: &Module) -> &mut Linker<'_>; pub(crate) fn LLVMRustLinkerAdd( -- cgit 1.4.1-3-g733a5 From 948f7798d70b6adb6a490b8867e155dadebd3f0e Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Sun, 6 Jul 2025 18:03:07 +0000 Subject: Remove support for -Zcombine-cgu Nobody seems to actually use this, while still adding some extra complexity to the already rather complex codegen coordinator code. It is also not supported by any backend other than the LLVM backend. --- compiler/rustc_codegen_gcc/src/back/write.rs | 9 ------ compiler/rustc_codegen_gcc/src/lib.rs | 8 ----- compiler/rustc_codegen_llvm/src/back/write.rs | 23 -------------- compiler/rustc_codegen_llvm/src/lib.rs | 7 ----- compiler/rustc_codegen_ssa/src/back/write.rs | 42 ++++---------------------- compiler/rustc_codegen_ssa/src/traits/write.rs | 6 ---- compiler/rustc_session/src/options.rs | 2 -- 7 files changed, 6 insertions(+), 91 deletions(-) (limited to 'compiler/rustc_codegen_llvm/src') diff --git a/compiler/rustc_codegen_gcc/src/back/write.rs b/compiler/rustc_codegen_gcc/src/back/write.rs index 113abe70805..c1231142c65 100644 --- a/compiler/rustc_codegen_gcc/src/back/write.rs +++ b/compiler/rustc_codegen_gcc/src/back/write.rs @@ -4,7 +4,6 @@ use gccjit::{Context, OutputKind}; use rustc_codegen_ssa::back::link::ensure_removed; use rustc_codegen_ssa::back::write::{BitcodeSection, CodegenContext, EmitObj, ModuleConfig}; use rustc_codegen_ssa::{CompiledModule, ModuleCodegen}; -use rustc_errors::DiagCtxtHandle; use rustc_fs_util::link_or_copy; use rustc_session::config::OutputType; use rustc_span::fatal_error::FatalError; @@ -258,14 +257,6 @@ pub(crate) fn codegen( )) } -pub(crate) fn link( - _cgcx: &CodegenContext, - _dcx: DiagCtxtHandle<'_>, - mut _modules: Vec>, -) -> Result, FatalError> { - unimplemented!(); -} - pub(crate) fn save_temp_bitcode( cgcx: &CodegenContext, _module: &ModuleCodegen, diff --git a/compiler/rustc_codegen_gcc/src/lib.rs b/compiler/rustc_codegen_gcc/src/lib.rs index 71765c51138..a3120682500 100644 --- a/compiler/rustc_codegen_gcc/src/lib.rs +++ b/compiler/rustc_codegen_gcc/src/lib.rs @@ -426,14 +426,6 @@ impl WriteBackendMethods for GccCodegenBackend { fn serialize_module(_module: ModuleCodegen) -> (String, Self::ModuleBuffer) { unimplemented!(); } - - fn run_link( - cgcx: &CodegenContext, - dcx: DiagCtxtHandle<'_>, - modules: Vec>, - ) -> Result, FatalError> { - back::write::link(cgcx, dcx, modules) - } } /// This is the entrypoint for a hot plugged rustc_codegen_gccjit diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs index 6f8fba2a30d..85a06f457eb 100644 --- a/compiler/rustc_codegen_llvm/src/back/write.rs +++ b/compiler/rustc_codegen_llvm/src/back/write.rs @@ -796,29 +796,6 @@ pub(crate) fn optimize( Ok(()) } -pub(crate) fn link( - cgcx: &CodegenContext, - dcx: DiagCtxtHandle<'_>, - mut modules: Vec>, -) -> Result, FatalError> { - use super::lto::{Linker, ModuleBuffer}; - // Sort the modules by name to ensure deterministic behavior. - modules.sort_by(|a, b| a.name.cmp(&b.name)); - let (first, elements) = - modules.split_first().expect("Bug! modules must contain at least one module."); - - let mut linker = Linker::new(first.module_llvm.llmod()); - for module in elements { - let _timer = cgcx.prof.generic_activity_with_arg("LLVM_link_module", &*module.name); - let buffer = ModuleBuffer::new(module.module_llvm.llmod()); - linker - .add(buffer.data()) - .map_err(|()| llvm_err(dcx, LlvmError::SerializeModule { name: &module.name }))?; - } - drop(linker); - Ok(modules.remove(0)) -} - pub(crate) fn codegen( cgcx: &CodegenContext, module: ModuleCodegen, diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs index 8b1913cfa75..ca84b6de8b1 100644 --- a/compiler/rustc_codegen_llvm/src/lib.rs +++ b/compiler/rustc_codegen_llvm/src/lib.rs @@ -168,13 +168,6 @@ impl WriteBackendMethods for LlvmCodegenBackend { let stats = llvm::build_string(|s| unsafe { llvm::LLVMRustPrintStatistics(s) }).unwrap(); print!("{stats}"); } - fn run_link( - cgcx: &CodegenContext, - dcx: DiagCtxtHandle<'_>, - modules: Vec>, - ) -> Result, FatalError> { - back::write::link(cgcx, dcx, modules) - } fn run_and_optimize_fat_lto( cgcx: &CodegenContext, exported_symbols_for_lto: &[String], diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs index a41cbd306d0..6773d3e24e9 100644 --- a/compiler/rustc_codegen_ssa/src/back/write.rs +++ b/compiler/rustc_codegen_ssa/src/back/write.rs @@ -797,10 +797,6 @@ pub(crate) enum WorkItemResult { /// The backend has finished compiling a CGU, nothing more required. Finished(CompiledModule), - /// The backend has finished compiling a CGU, which now needs linking - /// because `-Zcombine-cgu` was specified. - NeedsLink(ModuleCodegen), - /// The backend has finished compiling a CGU, which now needs to go through /// fat LTO. NeedsFatLto(FatLtoInput), @@ -884,7 +880,10 @@ fn execute_optimize_work_item( }; match lto_type { - ComputedLtoType::No => finish_intra_module_work(cgcx, module, module_config), + ComputedLtoType::No => { + let module = B::codegen(cgcx, module, module_config)?; + Ok(WorkItemResult::Finished(module)) + } ComputedLtoType::Thin => { let (name, thin_buffer) = B::prepare_thin(module, false); if let Some(path) = bitcode { @@ -1024,20 +1023,8 @@ fn execute_thin_lto_work_item( module_config: &ModuleConfig, ) -> Result, FatalError> { let module = B::optimize_thin(cgcx, module)?; - finish_intra_module_work(cgcx, module, module_config) -} - -fn finish_intra_module_work( - cgcx: &CodegenContext, - module: ModuleCodegen, - module_config: &ModuleConfig, -) -> Result, FatalError> { - if !cgcx.opts.unstable_opts.combine_cgu || module.kind == ModuleKind::Allocator { - let module = B::codegen(cgcx, module, module_config)?; - Ok(WorkItemResult::Finished(module)) - } else { - Ok(WorkItemResult::NeedsLink(module)) - } + let module = B::codegen(cgcx, module, module_config)?; + Ok(WorkItemResult::Finished(module)) } /// Messages sent to the coordinator. @@ -1343,7 +1330,6 @@ fn start_executing_work( // through codegen and LLVM. let mut compiled_modules = vec![]; let mut compiled_allocator_module = None; - let mut needs_link = Vec::new(); let mut needs_fat_lto = Vec::new(); let mut needs_thin_lto = Vec::new(); let mut lto_import_only_modules = Vec::new(); @@ -1625,7 +1611,6 @@ fn start_executing_work( Ok(WorkItemResult::Finished(compiled_module)) => { match compiled_module.kind { ModuleKind::Regular => { - assert!(needs_link.is_empty()); compiled_modules.push(compiled_module); } ModuleKind::Allocator => { @@ -1634,10 +1619,6 @@ fn start_executing_work( } } } - Ok(WorkItemResult::NeedsLink(module)) => { - assert!(compiled_modules.is_empty()); - needs_link.push(module); - } Ok(WorkItemResult::NeedsFatLto(fat_lto_input)) => { assert!(!started_lto); assert!(needs_thin_lto.is_empty()); @@ -1674,17 +1655,6 @@ fn start_executing_work( return Err(()); } - let needs_link = mem::take(&mut needs_link); - if !needs_link.is_empty() { - assert!(compiled_modules.is_empty()); - let dcx = cgcx.create_dcx(); - let dcx = dcx.handle(); - let module = B::run_link(&cgcx, dcx, needs_link).map_err(|_| ())?; - let module = - B::codegen(&cgcx, module, cgcx.config(ModuleKind::Regular)).map_err(|_| ())?; - compiled_modules.push(module); - } - // Drop to print timings drop(llvm_start_time); diff --git a/compiler/rustc_codegen_ssa/src/traits/write.rs b/compiler/rustc_codegen_ssa/src/traits/write.rs index 8e78cbe1963..f391c198e1a 100644 --- a/compiler/rustc_codegen_ssa/src/traits/write.rs +++ b/compiler/rustc_codegen_ssa/src/traits/write.rs @@ -16,12 +16,6 @@ pub trait WriteBackendMethods: Clone + 'static { type ThinData: Send + Sync; type ThinBuffer: ThinBufferMethods; - /// Merge all modules into main_module and returning it - fn run_link( - cgcx: &CodegenContext, - dcx: DiagCtxtHandle<'_>, - modules: Vec>, - ) -> Result, FatalError>; /// Performs fat LTO by merging all modules into a single one, running autodiff /// if necessary and running any further optimizations fn run_and_optimize_fat_lto( diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs index 5f1973b31a1..44b35e8921e 100644 --- a/compiler/rustc_session/src/options.rs +++ b/compiler/rustc_session/src/options.rs @@ -2168,8 +2168,6 @@ options! { "hash algorithm of source files used to check freshness in cargo (`blake3` or `sha256`)"), codegen_backend: Option = (None, parse_opt_string, [TRACKED], "the backend to use"), - combine_cgu: bool = (false, parse_bool, [TRACKED], - "combine CGUs into a single one"), contract_checks: Option = (None, parse_opt_bool, [TRACKED], "emit runtime checks for contract pre- and post-conditions (default: no)"), coverage_options: CoverageOptions = (CoverageOptions::default(), parse_coverage_options, [TRACKED], -- cgit 1.4.1-3-g733a5 From 24e2b4832bfa0bf1b60159af0ae11b15de55590e Mon Sep 17 00:00:00 2001 From: Zalathar Date: Sun, 27 Jul 2025 21:19:07 +1000 Subject: coverage: Infer `instances_used` from `pgo_func_name_var_map` In obscure circumstances, we would sometimes emit a covfun record for a function with no physical coverage counters, causing `llvm-cov` to fail with the cryptic error message: malformed instrumentation profile data: function name is empty We can eliminate this mismatch by removing `instances_used` entirely, and instead inferring its contents from the keys of `pgo_func_name_var_map`. This makes it impossible for a "used" function to lack a PGO name entry. --- .../rustc_codegen_llvm/src/coverageinfo/mapgen.rs | 12 ++++------ .../rustc_codegen_llvm/src/coverageinfo/mod.rs | 27 ++++++++++++++-------- 2 files changed, 21 insertions(+), 18 deletions(-) (limited to 'compiler/rustc_codegen_llvm/src') diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs index a9be833a643..8c9dfcfd18c 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs @@ -46,21 +46,17 @@ pub(crate) fn finalize(cx: &mut CodegenCx<'_, '_>) { debug!("Generating coverage map for CodegenUnit: `{}`", cx.codegen_unit.name()); // FIXME(#132395): Can this be none even when coverage is enabled? - let instances_used = match cx.coverage_cx { - Some(ref cx) => cx.instances_used.borrow(), - None => return, - }; + let Some(ref coverage_cx) = cx.coverage_cx else { return }; - let mut covfun_records = instances_used - .iter() - .copied() + let mut covfun_records = coverage_cx + .instances_used() + .into_iter() // Sort by symbol name, so that the global file table is built in an // order that doesn't depend on the stable-hash-based order in which // instances were visited during codegen. .sorted_by_cached_key(|&instance| tcx.symbol_name(instance).name) .filter_map(|instance| prepare_covfun_record(tcx, instance, true)) .collect::>(); - drop(instances_used); // In a single designated CGU, also prepare covfun records for functions // in this crate that were instrumented for coverage, but are unused. diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs index eefbd7cf6c4..35c322326f8 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs @@ -5,7 +5,7 @@ use rustc_abi::Size; use rustc_codegen_ssa::traits::{ BuilderMethods, ConstCodegenMethods, CoverageInfoBuilderMethods, MiscCodegenMethods, }; -use rustc_data_structures::fx::{FxHashMap, FxIndexSet}; +use rustc_data_structures::fx::{FxHashMap, FxIndexMap}; use rustc_middle::mir::coverage::CoverageKind; use rustc_middle::ty::Instance; use tracing::{debug, instrument}; @@ -20,9 +20,14 @@ mod mapgen; /// Extra per-CGU context/state needed for coverage instrumentation. pub(crate) struct CguCoverageContext<'ll, 'tcx> { - /// Coverage data for each instrumented function identified by DefId. - pub(crate) instances_used: RefCell>>, - pub(crate) pgo_func_name_var_map: RefCell, &'ll llvm::Value>>, + /// Associates function instances with an LLVM global that holds the + /// function's symbol name, as needed by LLVM coverage intrinsics. + /// + /// Instances in this map are also considered "used" for the purposes of + /// emitting covfun records. Every covfun record holds a hash of its + /// symbol name, and `llvm-cov` will exit fatally if it can't resolve that + /// hash back to an entry in the binary's `__llvm_prf_names` linker section. + pub(crate) pgo_func_name_var_map: RefCell, &'ll llvm::Value>>, pub(crate) mcdc_condition_bitmap_map: RefCell, Vec<&'ll llvm::Value>>>, covfun_section_name: OnceCell, @@ -31,7 +36,6 @@ pub(crate) struct CguCoverageContext<'ll, 'tcx> { impl<'ll, 'tcx> CguCoverageContext<'ll, 'tcx> { pub(crate) fn new() -> Self { Self { - instances_used: RefCell::>::default(), pgo_func_name_var_map: Default::default(), mcdc_condition_bitmap_map: Default::default(), covfun_section_name: Default::default(), @@ -53,6 +57,14 @@ impl<'ll, 'tcx> CguCoverageContext<'ll, 'tcx> { .and_then(|bitmap_map| bitmap_map.get(decision_depth as usize)) .copied() // Dereference Option<&&Value> to Option<&Value> } + + /// Returns the list of instances considered "used" in this CGU, as + /// inferred from the keys of `pgo_func_name_var_map`. + pub(crate) fn instances_used(&self) -> Vec> { + // Collecting into a Vec is way easier than trying to juggle RefCell + // projections, and this should only run once per CGU anyway. + self.pgo_func_name_var_map.borrow().keys().copied().collect::>() + } } impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { @@ -151,11 +163,6 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> { return; }; - // Mark the instance as used in this CGU, for coverage purposes. - // This includes functions that were not partitioned into this CGU, - // but were MIR-inlined into one of this CGU's functions. - coverage_cx.instances_used.borrow_mut().insert(instance); - match *kind { CoverageKind::SpanMarker | CoverageKind::BlockMarker { .. } => unreachable!( "marker statement {kind:?} should have been removed by CleanupPostBorrowck" -- cgit 1.4.1-3-g733a5 From 89b6b0b6a482cb0d632f54f14936def9c034570d Mon Sep 17 00:00:00 2001 From: Zalathar Date: Sun, 27 Jul 2025 21:29:48 +1000 Subject: coverage: Clarify that getting a PGO name also makes a function "used" --- compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'compiler/rustc_codegen_llvm/src') diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs index 35c322326f8..119237abd6b 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs @@ -90,7 +90,10 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { /// string, to hold the function name passed to LLVM intrinsic /// `instrprof.increment()`. The `Value` is only created once per instance. /// Multiple invocations with the same instance return the same `Value`. - fn get_pgo_func_name_var(&self, instance: Instance<'tcx>) -> &'ll llvm::Value { + /// + /// This has the side-effect of causing coverage codegen to consider this + /// function "used", making it eligible to emit an associated covfun record. + fn ensure_pgo_func_name_var(&self, instance: Instance<'tcx>) -> &'ll llvm::Value { debug!("getting pgo_func_name_var for instance={:?}", instance); let mut pgo_func_name_var_map = self.coverage_cx().pgo_func_name_var_map.borrow_mut(); pgo_func_name_var_map.entry(instance).or_insert_with(|| { @@ -114,7 +117,7 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> { return; } - let fn_name = self.get_pgo_func_name_var(instance); + let fn_name = self.ensure_pgo_func_name_var(instance); let hash = self.const_u64(function_coverage_info.function_source_hash); let bitmap_bits = self.const_u32(function_coverage_info.mcdc_bitmap_bits as u32); self.mcdc_parameters(fn_name, hash, bitmap_bits); @@ -170,7 +173,7 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> { CoverageKind::VirtualCounter { bcb } if let Some(&id) = ids_info.phys_counter_for_node.get(&bcb) => { - let fn_name = bx.get_pgo_func_name_var(instance); + let fn_name = bx.ensure_pgo_func_name_var(instance); let hash = bx.const_u64(function_coverage_info.function_source_hash); let num_counters = bx.const_u32(ids_info.num_counters); let index = bx.const_u32(id.as_u32()); @@ -200,7 +203,7 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> { "bitmap index of the decision out of range" ); - let fn_name = bx.get_pgo_func_name_var(instance); + let fn_name = bx.ensure_pgo_func_name_var(instance); let hash = bx.const_u64(function_coverage_info.function_source_hash); let bitmap_index = bx.const_u32(bitmap_idx); bx.mcdc_tvbitmap_update(fn_name, hash, bitmap_index, cond_bitmap); -- cgit 1.4.1-3-g733a5