Auto merge of #133250 - DianQK:embed-bitcode-pgo, r=nikic

The embedded bitcode should always be prepared for LTO/ThinLTO Fixes #115344. Fixes #117220. There are currently two methods for generating bitcode that used for LTO. One method involves using `-C linker-plugin-lto` to emit object files as bitcode, which is the typical setting used by cargo. The other method is through `-C embed-bitcode=yes`. When using with `-C embed-bitcode=yes -C lto=no`, we run a complete non-LTO LLVM pipeline to obtain bitcode, then the bitcode is used for LTO. We run the Call Graph Profile Pass twice on the same module. This PR is doing something similar to LLVM's `buildFatLTODefaultPipeline`, obtaining the bitcode for embedding after running `buildThinLTOPreLinkDefaultPipeline`. r? nikic
author: bors <bors@rust-lang.org> 2025-03-01 08:22:18 +0000
committer: bors <bors@rust-lang.org> 2025-03-01 08:22:18 +0000
commit: 0c72c0d11adeba449886089c6bd5d48363f7a2cd (patch)
tree: 53caa967ed8e67b4eb3f41714c99a9ee76d7a043
parent: 002da76821d32c8807dc47da16660925d8cc9b62 (diff)
parent: a897cc03519c83e1c426be491c9a1bea63609e16 (diff)
download: rust-0c72c0d11adeba449886089c6bd5d48363f7a2cd.tar.gz
rust-0c72c0d11adeba449886089c6bd5d48363f7a2cd.zip
20 files changed, 294 insertions, 101 deletions
diff --git a/compiler/rustc_codegen_gcc/src/back/lto.rs b/compiler/rustc_codegen_gcc/src/back/lto.rs
index cb4caec8c32..e5221c7da31 100644
--- a/compiler/rustc_codegen_gcc/src/back/lto.rs
+++ b/compiler/rustc_codegen_gcc/src/back/lto.rs
@@ -632,17 +632,16 @@ pub unsafe fn optimize_thin_module(
             Arc::new(SyncContext::new(context))
         }
     };
-    let module = ModuleCodegen {
-        module_llvm: GccContext {
+    let module = ModuleCodegen::new_regular(
+        thin_module.name().to_string(),
+        GccContext {
             context,
             should_combine_object_files,
             // TODO(antoyo): use the correct relocation model here.
             relocation_model: RelocModel::Pic,
             temp_dir: None,
         },
-        name: thin_module.name().to_string(),
-        kind: ModuleKind::Regular,
-    };
+    );
     /*{
         let target = &*module.module_llvm.tm;
         let llmod = module.module_llvm.llmod();
diff --git a/compiler/rustc_codegen_gcc/src/base.rs b/compiler/rustc_codegen_gcc/src/base.rs
index 962f4b161d7..9b495174a3f 100644
--- a/compiler/rustc_codegen_gcc/src/base.rs
+++ b/compiler/rustc_codegen_gcc/src/base.rs
@@ -4,10 +4,10 @@ use std::sync::Arc;
 use std::time::Instant;
 
 use gccjit::{CType, Context, FunctionType, GlobalKind};
+use rustc_codegen_ssa::ModuleCodegen;
 use rustc_codegen_ssa::base::maybe_create_entry_wrapper;
 use rustc_codegen_ssa::mono_item::MonoItemExt;
 use rustc_codegen_ssa::traits::DebugInfoCodegenMethods;
-use rustc_codegen_ssa::{ModuleCodegen, ModuleKind};
 use rustc_middle::dep_graph;
 use rustc_middle::mir::mono::Linkage;
 #[cfg(feature = "master")]
@@ -237,16 +237,15 @@ pub fn compile_codegen_unit(
             }
         }
 
-        ModuleCodegen {
-            name: cgu_name.to_string(),
-            module_llvm: GccContext {
+        ModuleCodegen::new_regular(
+            cgu_name.to_string(),
+            GccContext {
                 context: Arc::new(SyncContext::new(context)),
                 relocation_model: tcx.sess.relocation_model(),
                 should_combine_object_files: false,
                 temp_dir: None,
             },
-            kind: ModuleKind::Regular,
-        }
+        )
     }
 
     (module, cost)
diff --git a/compiler/rustc_codegen_gcc/src/lib.rs b/compiler/rustc_codegen_gcc/src/lib.rs
index 6455bcec685..9d91aab72ab 100644
--- a/compiler/rustc_codegen_gcc/src/lib.rs
+++ b/compiler/rustc_codegen_gcc/src/lib.rs
@@ -393,7 +393,7 @@ impl WriteBackendMethods for GccCodegenBackend {
     unsafe fn optimize(
         _cgcx: &CodegenContext<Self>,
         _dcx: DiagCtxtHandle<'_>,
-        module: &ModuleCodegen<Self::Module>,
+        module: &mut ModuleCodegen<Self::Module>,
         config: &ModuleConfig,
     ) -> Result<(), FatalError> {
         module.module_llvm.context.set_optimization_level(to_gcc_opt_level(config.opt_level));
diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs
index 05a2cd1c5a3..668795191a2 100644
--- a/compiler/rustc_codegen_llvm/src/back/lto.rs
+++ b/compiler/rustc_codegen_llvm/src/back/lto.rs
@@ -2,6 +2,7 @@ use std::collections::BTreeMap;
 use std::ffi::{CStr, CString};
 use std::fs::File;
 use std::path::Path;
+use std::ptr::NonNull;
 use std::sync::Arc;
 use std::{io, iter, slice};
 
@@ -305,11 +306,8 @@ fn fat_lto(
             assert!(!serialized_modules.is_empty(), "must have at least one serialized module");
             let (buffer, name) = serialized_modules.remove(0);
             info!("no in-memory regular modules to choose from, parsing {:?}", name);
-            ModuleCodegen {
-                module_llvm: ModuleLlvm::parse(cgcx, &name, buffer.data(), dcx)?,
-                name: name.into_string().unwrap(),
-                kind: ModuleKind::Regular,
-            }
+            let llvm_module = ModuleLlvm::parse(cgcx, &name, buffer.data(), dcx)?;
+            ModuleCodegen::new_regular(name.into_string().unwrap(), llvm_module)
         }
     };
     {
@@ -655,14 +653,14 @@ pub(crate) fn run_pass_manager(
     }
 
     unsafe {
-        write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, stage)?;
+        write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage)?;
     }
 
     if cfg!(llvm_enzyme) && enable_ad {
         let opt_stage = llvm::OptStage::FatLTO;
         let stage = write::AutodiffStage::PostAD;
         unsafe {
-            write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, stage)?;
+            write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage)?;
         }
 
         // This is the final IR, so people should be able to inspect the optimized autodiff output.
@@ -729,6 +727,11 @@ impl ThinBuffer {
             ThinBuffer(buffer)
         }
     }
+
+    pub unsafe fn from_raw_ptr(ptr: *mut llvm::ThinLTOBuffer) -> ThinBuffer {
+        let mut ptr = NonNull::new(ptr).unwrap();
+        ThinBuffer(unsafe { ptr.as_mut() })
+    }
 }
 
 impl ThinBufferMethods for ThinBuffer {
@@ -772,11 +775,11 @@ pub(crate) unsafe fn optimize_thin_module(
     // crates but for locally codegened modules we may be able to reuse
     // that LLVM Context and Module.
     let module_llvm = ModuleLlvm::parse(cgcx, module_name, thin_module.data(), dcx)?;
-    let mut module = ModuleCodegen {
-        module_llvm,
-        name: thin_module.name().to_string(),
-        kind: ModuleKind::Regular,
-    };
+    let mut module = ModuleCodegen::new_regular(thin_module.name(), module_llvm);
+    // Given that the newly created module lacks a thinlto buffer for embedding, we need to re-add it here.
+    if cgcx.config(ModuleKind::Regular).embed_bitcode() {
+        module.thin_lto_buffer = Some(thin_module.data().to_vec());
+    }
     {
         let target = &*module.module_llvm.tm;
         let llmod = module.module_llvm.llmod();
diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs
index 29d6121844f..bead4c82a81 100644
--- a/compiler/rustc_codegen_llvm/src/back/write.rs
+++ b/compiler/rustc_codegen_llvm/src/back/write.rs
@@ -1,6 +1,7 @@
 use std::ffi::{CStr, CString};
 use std::io::{self, Write};
 use std::path::{Path, PathBuf};
+use std::ptr::null_mut;
 use std::sync::Arc;
 use std::{fs, slice, str};
 
@@ -15,7 +16,7 @@ use rustc_codegen_ssa::back::write::{
     TargetMachineFactoryFn,
 };
 use rustc_codegen_ssa::traits::*;
-use rustc_codegen_ssa::{CompiledModule, ModuleCodegen};
+use rustc_codegen_ssa::{CompiledModule, ModuleCodegen, ModuleKind};
 use rustc_data_structures::profiling::SelfProfilerRef;
 use rustc_data_structures::small_c_str::SmallCStr;
 use rustc_errors::{DiagCtxtHandle, FatalError, Level};
@@ -551,6 +552,7 @@ pub(crate) unsafe fn llvm_optimize(
     cgcx: &CodegenContext<LlvmCodegenBackend>,
     dcx: DiagCtxtHandle<'_>,
     module: &ModuleCodegen<ModuleLlvm>,
+    thin_lto_buffer: Option<&mut *mut llvm::ThinLTOBuffer>,
     config: &ModuleConfig,
     opt_level: config::OptLevel,
     opt_stage: llvm::OptStage,
@@ -584,7 +586,17 @@ pub(crate) unsafe fn llvm_optimize(
         vectorize_loop = config.vectorize_loop;
     }
     trace!(?unroll_loops, ?vectorize_slp, ?vectorize_loop, ?run_enzyme);
-    let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed();
+    if thin_lto_buffer.is_some() {
+        assert!(
+            matches!(
+                opt_stage,
+                llvm::OptStage::PreLinkNoLTO
+                    | llvm::OptStage::PreLinkFatLTO
+                    | llvm::OptStage::PreLinkThinLTO
+            ),
+            "the bitcode for LTO can only be obtained at the pre-link stage"
+        );
+    }
     let pgo_gen_path = get_pgo_gen_path(config);
     let pgo_use_path = get_pgo_use_path(config);
     let pgo_sample_use_path = get_pgo_sample_use_path(config);
@@ -644,7 +656,9 @@ pub(crate) unsafe fn llvm_optimize(
             config.no_prepopulate_passes,
             config.verify_llvm_ir,
             config.lint_llvm_ir,
-            using_thin_buffers,
+            thin_lto_buffer,
+            config.emit_thin_lto,
+            config.emit_thin_lto_summary,
             config.merge_functions,
             unroll_loops,
             vectorize_slp,
@@ -675,7 +689,7 @@ pub(crate) unsafe fn llvm_optimize(
 pub(crate) unsafe fn optimize(
     cgcx: &CodegenContext<LlvmCodegenBackend>,
     dcx: DiagCtxtHandle<'_>,
-    module: &ModuleCodegen<ModuleLlvm>,
+    module: &mut ModuleCodegen<ModuleLlvm>,
     config: &ModuleConfig,
 ) -> Result<(), FatalError> {
     let _timer = cgcx.prof.generic_activity_with_arg("LLVM_module_optimize", &*module.name);
@@ -705,9 +719,53 @@ pub(crate) unsafe fn optimize(
         // Otherwise we pretend AD is already done and run the normal opt pipeline (=PostAD).
         let consider_ad = cfg!(llvm_enzyme) && config.autodiff.contains(&config::AutoDiff::Enable);
         let autodiff_stage = if consider_ad { AutodiffStage::PreAD } else { AutodiffStage::PostAD };
-        return unsafe {
-            llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, autodiff_stage)
+        // The embedded bitcode is used to run LTO/ThinLTO.
+        // The bitcode obtained during the `codegen` phase is no longer suitable for performing LTO.
+        // It may have undergone LTO due to ThinLocal, so we need to obtain the embedded bitcode at
+        // this point.
+        let mut thin_lto_buffer = if (module.kind == ModuleKind::Regular
+            && config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full))
+            || config.emit_thin_lto_summary
+        {
+            Some(null_mut())
+        } else {
+            None
         };
+        unsafe {
+            llvm_optimize(
+                cgcx,
+                dcx,
+                module,
+                thin_lto_buffer.as_mut(),
+                config,
+                opt_level,
+                opt_stage,
+                autodiff_stage,
+            )
+        }?;
+        if let Some(thin_lto_buffer) = thin_lto_buffer {
+            let thin_lto_buffer = unsafe { ThinBuffer::from_raw_ptr(thin_lto_buffer) };
+            module.thin_lto_buffer = Some(thin_lto_buffer.data().to_vec());
+            let bc_summary_out =
+                cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
+            if config.emit_thin_lto_summary
+                && let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
+            {
+                let summary_data = thin_lto_buffer.thin_link_data();
+                cgcx.prof.artifact_size(
+                    "llvm_bitcode_summary",
+                    thin_link_bitcode_filename.to_string_lossy(),
+                    summary_data.len() as u64,
+                );
+                let _timer = cgcx.prof.generic_activity_with_arg(
+                    "LLVM_module_codegen_emit_bitcode_summary",
+                    &*module.name,
+                );
+                if let Err(err) = fs::write(&bc_summary_out, summary_data) {
+                    dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
+                }
+            }
+        }
     }
     Ok(())
 }
@@ -760,59 +818,41 @@ pub(crate) unsafe fn codegen(
         // otherwise requested.
 
         let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name);
-        let bc_summary_out =
-            cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
         let obj_out = cgcx.output_filenames.temp_path(OutputType::Object, module_name);
 
         if config.bitcode_needed() {
-            let _timer = cgcx
-                .prof
-                .generic_activity_with_arg("LLVM_module_codegen_make_bitcode", &*module.name);
-            let thin = ThinBuffer::new(llmod, config.emit_thin_lto, config.emit_thin_lto_summary);
-            let data = thin.data();
-
-            if let Some(bitcode_filename) = bc_out.file_name() {
-                cgcx.prof.artifact_size(
-                    "llvm_bitcode",
-                    bitcode_filename.to_string_lossy(),
-                    data.len() as u64,
-                );
-            }
-
-            if config.emit_thin_lto_summary
-                && let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
-            {
-                let summary_data = thin.thin_link_data();
-                cgcx.prof.artifact_size(
-                    "llvm_bitcode_summary",
-                    thin_link_bitcode_filename.to_string_lossy(),
-                    summary_data.len() as u64,
-                );
-
-                let _timer = cgcx.prof.generic_activity_with_arg(
-                    "LLVM_module_codegen_emit_bitcode_summary",
-                    &*module.name,
-                );
-                if let Err(err) = fs::write(&bc_summary_out, summary_data) {
-                    dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
-                }
-            }
-
             if config.emit_bc || config.emit_obj == EmitObj::Bitcode {
+                let thin = {
+                    let _timer = cgcx.prof.generic_activity_with_arg(
+                        "LLVM_module_codegen_make_bitcode",
+                        &*module.name,
+                    );
+                    ThinBuffer::new(llmod, config.emit_thin_lto, false)
+                };
+                let data = thin.data();
                 let _timer = cgcx
                     .prof
                     .generic_activity_with_arg("LLVM_module_codegen_emit_bitcode", &*module.name);
+                if let Some(bitcode_filename) = bc_out.file_name() {
+                    cgcx.prof.artifact_size(
+                        "llvm_bitcode",
+                        bitcode_filename.to_string_lossy(),
+                        data.len() as u64,
+                    );
+                }
                 if let Err(err) = fs::write(&bc_out, data) {
                     dcx.emit_err(WriteBytecode { path: &bc_out, err });
                 }
             }
 
-            if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full) {
+            if config.embed_bitcode() && module.kind == ModuleKind::Regular {
                 let _timer = cgcx
                     .prof
                     .generic_activity_with_arg("LLVM_module_codegen_embed_bitcode", &*module.name);
+                let thin_bc =
+                    module.thin_lto_buffer.as_deref().expect("cannot find embedded bitcode");
                 unsafe {
-                    embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, data);
+                    embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, &thin_bc);
                 }
             }
         }
diff --git a/compiler/rustc_codegen_llvm/src/base.rs b/compiler/rustc_codegen_llvm/src/base.rs
index d35c7945bae..6bd27914dbd 100644
--- a/compiler/rustc_codegen_llvm/src/base.rs
+++ b/compiler/rustc_codegen_llvm/src/base.rs
@@ -13,10 +13,10 @@
 
 use std::time::Instant;
 
+use rustc_codegen_ssa::ModuleCodegen;
 use rustc_codegen_ssa::base::maybe_create_entry_wrapper;
 use rustc_codegen_ssa::mono_item::MonoItemExt;
 use rustc_codegen_ssa::traits::*;
-use rustc_codegen_ssa::{ModuleCodegen, ModuleKind};
 use rustc_data_structures::small_c_str::SmallCStr;
 use rustc_middle::dep_graph;
 use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs;
@@ -133,11 +133,7 @@ pub(crate) fn compile_codegen_unit(
             }
         }
 
-        ModuleCodegen {
-            name: cgu_name.to_string(),
-            module_llvm: llvm_module,
-            kind: ModuleKind::Regular,
-        }
+        ModuleCodegen::new_regular(cgu_name.to_string(), llvm_module)
     }
 
     (module, cost)
diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs
index e9e1b644f18..c88372db491 100644
--- a/compiler/rustc_codegen_llvm/src/lib.rs
+++ b/compiler/rustc_codegen_llvm/src/lib.rs
@@ -194,7 +194,7 @@ impl WriteBackendMethods for LlvmCodegenBackend {
     unsafe fn optimize(
         cgcx: &CodegenContext<Self>,
         dcx: DiagCtxtHandle<'_>,
-        module: &ModuleCodegen<Self::Module>,
+        module: &mut ModuleCodegen<Self::Module>,
         config: &ModuleConfig,
     ) -> Result<(), FatalError> {
         unsafe { back::write::optimize(cgcx, dcx, module, config) }
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
index da91e6edbcf..2dc14e4613d 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -2425,7 +2425,9 @@ unsafe extern "C" {
         NoPrepopulatePasses: bool,
         VerifyIR: bool,
         LintIR: bool,
-        UseThinLTOBuffers: bool,
+        ThinLTOBuffer: Option<&mut *mut ThinLTOBuffer>,
+        EmitThinLTO: bool,
+        EmitThinLTOSummary: bool,
         MergeFunctions: bool,
         UnrollLoops: bool,
         SLPVectorize: bool,
diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index f008bd12ed8..c8bb229998e 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -278,6 +278,10 @@ impl ModuleConfig {
             || self.emit_obj == EmitObj::Bitcode
             || self.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full)
     }
+
+    pub fn embed_bitcode(&self) -> bool {
+        self.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full)
+    }
 }
 
 /// Configuration passed to the function returned by the `target_machine_factory`.
@@ -877,14 +881,14 @@ pub(crate) fn compute_per_cgu_lto_type(
 
 fn execute_optimize_work_item<B: ExtraBackendMethods>(
     cgcx: &CodegenContext<B>,
-    module: ModuleCodegen<B::Module>,
+    mut module: ModuleCodegen<B::Module>,
     module_config: &ModuleConfig,
 ) -> Result<WorkItemResult<B>, FatalError> {
     let dcx = cgcx.create_dcx();
     let dcx = dcx.handle();
 
     unsafe {
-        B::optimize(cgcx, dcx, &module, module_config)?;
+        B::optimize(cgcx, dcx, &mut module, module_config)?;
     }
 
     // After we've done the initial round of optimizations we need to
diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs
index 73a97d32c2d..63f2f8fa3d1 100644
--- a/compiler/rustc_codegen_ssa/src/base.rs
+++ b/compiler/rustc_codegen_ssa/src/base.rs
@@ -687,7 +687,7 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
         submit_codegened_module_to_llvm(
             &backend,
             &ongoing_codegen.coordinator.sender,
-            ModuleCodegen { name: llmod_id, module_llvm, kind: ModuleKind::Allocator },
+            ModuleCodegen::new_allocator(llmod_id, module_llvm),
             cost,
         );
     }
diff --git a/compiler/rustc_codegen_ssa/src/lib.rs b/compiler/rustc_codegen_ssa/src/lib.rs
index 9d2ac219d59..4e758bfdec3 100644
--- a/compiler/rustc_codegen_ssa/src/lib.rs
+++ b/compiler/rustc_codegen_ssa/src/lib.rs
@@ -75,9 +75,29 @@ pub struct ModuleCodegen<M> {
     pub name: String,
     pub module_llvm: M,
     pub kind: ModuleKind,
+    /// Saving the ThinLTO buffer for embedding in the object file.
+    pub thin_lto_buffer: Option<Vec<u8>>,
 }
 
 impl<M> ModuleCodegen<M> {
+    pub fn new_regular(name: impl Into<String>, module: M) -> Self {
+        Self {
+            name: name.into(),
+            module_llvm: module,
+            kind: ModuleKind::Regular,
+            thin_lto_buffer: None,
+        }
+    }
+
+    pub fn new_allocator(name: impl Into<String>, module: M) -> Self {
+        Self {
+            name: name.into(),
+            module_llvm: module,
+            kind: ModuleKind::Allocator,
+            thin_lto_buffer: None,
+        }
+    }
+
     pub fn into_compiled_module(
         self,
         emit_obj: bool,
diff --git a/compiler/rustc_codegen_ssa/src/traits/write.rs b/compiler/rustc_codegen_ssa/src/traits/write.rs
index 97fe614aa10..c77efdd1728 100644
--- a/compiler/rustc_codegen_ssa/src/traits/write.rs
+++ b/compiler/rustc_codegen_ssa/src/traits/write.rs
@@ -40,7 +40,7 @@ pub trait WriteBackendMethods: 'static + Sized + Clone {
     unsafe fn optimize(
         cgcx: &CodegenContext<Self>,
         dcx: DiagCtxtHandle<'_>,
-        module: &ModuleCodegen<Self::Module>,
+        module: &mut ModuleCodegen<Self::Module>,
         config: &ModuleConfig,
     ) -> Result<(), FatalError>;
     fn optimize_fat(
diff --git a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp
index 9ce4abdb432..6e607baebb5 100644
--- a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp
+++ b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp
@@ -7,6 +7,7 @@
 #include "llvm/Analysis/Lint.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/Bitcode/BitcodeWriterPass.h"
 #include "llvm/CodeGen/CommandFlags.h"
 #include "llvm/IR/AssemblyAnnotationWriter.h"
 #include "llvm/IR/AutoUpgrade.h"
@@ -37,6 +38,7 @@
 #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
 #include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
 #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
+#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
 #include "llvm/Transforms/Utils/CanonicalizeAliases.h"
 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
 #include "llvm/Transforms/Utils/NameAnonGlobals.h"
@@ -195,6 +197,19 @@ extern "C" void LLVMRustTimeTraceProfilerFinish(const char *FileName) {
 GEN_SUBTARGETS
 #undef SUBTARGET
 
+// This struct and various functions are sort of a hack right now, but the
+// problem is that we've got in-memory LLVM modules after we generate and
+// optimize all codegen-units for one compilation in rustc. To be compatible
+// with the LTO support above we need to serialize the modules plus their
+// ThinLTO summary into memory.
+//
+// This structure is basically an owned version of a serialize module, with
+// a ThinLTO summary attached.
+struct LLVMRustThinLTOBuffer {
+  std::string data;
+  std::string thin_link_data;
+};
+
 extern "C" bool LLVMRustHasFeature(LLVMTargetMachineRef TM,
                                    const char *Feature) {
   TargetMachine *Target = unwrap(TM);
@@ -704,7 +719,8 @@ extern "C" LLVMRustResult LLVMRustOptimize(
     LLVMModuleRef ModuleRef, LLVMTargetMachineRef TMRef,
     LLVMRustPassBuilderOptLevel OptLevelRust, LLVMRustOptStage OptStage,
     bool IsLinkerPluginLTO, bool NoPrepopulatePasses, bool VerifyIR,
-    bool LintIR, bool UseThinLTOBuffers, bool MergeFunctions, bool UnrollLoops,
+    bool LintIR, LLVMRustThinLTOBuffer **ThinLTOBufferRef, bool EmitThinLTO,
+    bool EmitThinLTOSummary, bool MergeFunctions, bool UnrollLoops,
     bool SLPVectorize, bool LoopVectorize, bool DisableSimplifyLibCalls,
     bool EmitLifetimeMarkers, bool RunEnzyme,
     LLVMRustSanitizerOptions *SanitizerOptions, const char *PGOGenPath,
@@ -952,7 +968,10 @@ extern "C" LLVMRustResult LLVMRustOptimize(
   }
 
   ModulePassManager MPM;
-  bool NeedThinLTOBufferPasses = UseThinLTOBuffers;
+  bool NeedThinLTOBufferPasses = EmitThinLTO;
+  auto ThinLTOBuffer = std::make_unique<LLVMRustThinLTOBuffer>();
+  raw_string_ostream ThinLTODataOS(ThinLTOBuffer->data);
+  raw_string_ostream ThinLinkDataOS(ThinLTOBuffer->thin_link_data);
   if (!NoPrepopulatePasses) {
     // The pre-link pipelines don't support O0 and require using
     // buildO0DefaultPipeline() instead. At the same time, the LTO pipelines do
@@ -976,7 +995,25 @@ extern "C" LLVMRustResult LLVMRustOptimize(
 
       switch (OptStage) {
       case LLVMRustOptStage::PreLinkNoLTO:
-        MPM = PB.buildPerModuleDefaultPipeline(OptLevel);
+        if (ThinLTOBufferRef) {
+          // This is similar to LLVM's `buildFatLTODefaultPipeline`, where the
+          // bitcode for embedding is obtained after performing
+          // `ThinLTOPreLinkDefaultPipeline`.
+          MPM.addPass(PB.buildThinLTOPreLinkDefaultPipeline(OptLevel));
+          if (EmitThinLTO) {
+            MPM.addPass(ThinLTOBitcodeWriterPass(
+                ThinLTODataOS, EmitThinLTOSummary ? &ThinLinkDataOS : nullptr));
+          } else {
+            MPM.addPass(BitcodeWriterPass(ThinLTODataOS));
+          }
+          *ThinLTOBufferRef = ThinLTOBuffer.release();
+          MPM.addPass(PB.buildModuleOptimizationPipeline(
+              OptLevel, ThinOrFullLTOPhase::None));
+          MPM.addPass(
+              createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
+        } else {
+          MPM = PB.buildPerModuleDefaultPipeline(OptLevel);
+        }
         break;
       case LLVMRustOptStage::PreLinkThinLTO:
         MPM = PB.buildThinLTOPreLinkDefaultPipeline(OptLevel);
@@ -1022,6 +1059,16 @@ extern "C" LLVMRustResult LLVMRustOptimize(
     MPM.addPass(CanonicalizeAliasesPass());
     MPM.addPass(NameAnonGlobalPass());
   }
+  // For `-Copt-level=0`, ThinLTO, or LTO.
+  if (ThinLTOBufferRef && *ThinLTOBufferRef == nullptr) {
+    if (EmitThinLTO) {
+      MPM.addPass(ThinLTOBitcodeWriterPass(
+          ThinLTODataOS, EmitThinLTOSummary ? &ThinLinkDataOS : nullptr));
+    } else {
+      MPM.addPass(BitcodeWriterPass(ThinLTODataOS));
+    }
+    *ThinLTOBufferRef = ThinLTOBuffer.release();
+  }
 
   // now load "-enzyme" pass:
 #ifdef ENZYME
@@ -1500,19 +1547,6 @@ extern "C" bool LLVMRustPrepareThinLTOImport(const LLVMRustThinLTOData *Data,
   return true;
 }
 
-// This struct and various functions are sort of a hack right now, but the
-// problem is that we've got in-memory LLVM modules after we generate and
-// optimize all codegen-units for one compilation in rustc. To be compatible
-// with the LTO support above we need to serialize the modules plus their
-// ThinLTO summary into memory.
-//
-// This structure is basically an owned version of a serialize module, with
-// a ThinLTO summary attached.
-struct LLVMRustThinLTOBuffer {
-  std::string data;
-  std::string thin_link_data;
-};
-
 extern "C" LLVMRustThinLTOBuffer *
 LLVMRustThinLTOBufferCreate(LLVMModuleRef M, bool is_thin, bool emit_summary) {
   auto Ret = std::make_unique<LLVMRustThinLTOBuffer>();
diff --git a/compiler/rustc_session/src/config.rs b/compiler/rustc_session/src/config.rs
index ba7ccd6a02d..7586c5766b5 100644
--- a/compiler/rustc_session/src/config.rs
+++ b/compiler/rustc_session/src/config.rs
@@ -539,7 +539,10 @@ impl FromStr for SplitDwarfKind {
 #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, PartialOrd, Ord, HashStable_Generic)]
 #[derive(Encodable, Decodable)]
 pub enum OutputType {
+    /// This is the optimized bitcode, which could be either pre-LTO or non-LTO bitcode,
+    /// depending on the specific request type.
     Bitcode,
+    /// This is the summary or index data part of the ThinLTO bitcode.
     ThinLinkBitcode,
     Assembly,
     LlvmAssembly,
diff --git a/tests/codegen/overaligned-constant.rs b/tests/codegen/overaligned-constant.rs
index e5540aca387..0f5977880f2 100644
--- a/tests/codegen/overaligned-constant.rs
+++ b/tests/codegen/overaligned-constant.rs
@@ -9,14 +9,14 @@ struct S(i32);
 
 struct SmallStruct(f32, Option<S>, &'static [f32]);
 
-// CHECK: @0 = private unnamed_addr constant
+// CHECK: [[const:@.*]] = private unnamed_addr constant
 // CHECK-SAME: , align 8
 
 #[no_mangle]
 pub fn overaligned_constant() {
     // CHECK-LABEL: @overaligned_constant
     // CHECK: [[full:%_.*]] = alloca [32 x i8], align 8
-    // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[full]], ptr align 8 @0, i64 32, i1 false)
+    // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[full]], ptr align 8 [[const]], i64 32, i1 false)
     let mut s = S(1);
 
     s.0 = 3;
diff --git a/tests/codegen/uninit-consts.rs b/tests/codegen/uninit-consts.rs
index 649927b87b4..a58008e171e 100644
--- a/tests/codegen/uninit-consts.rs
+++ b/tests/codegen/uninit-consts.rs
@@ -11,15 +11,15 @@ pub struct PartiallyUninit {
     y: MaybeUninit<[u8; 10]>,
 }
 
-// CHECK: [[FULLY_UNINIT:@[0-9]+]] = private unnamed_addr constant <{ [10 x i8] }> undef
+// CHECK: [[FULLY_UNINIT:@.*]] = private unnamed_addr constant <{ [10 x i8] }> undef
 
-// CHECK: [[PARTIALLY_UNINIT:@[0-9]+]] = private unnamed_addr constant <{ [4 x i8], [12 x i8] }> <{ [4 x i8] c"{{\\EF\\BE\\AD\\DE|\\DE\\AD\\BE\\EF}}", [12 x i8] undef }>, align 4
+// CHECK: [[PARTIALLY_UNINIT:@.*]] = private unnamed_addr constant <{ [4 x i8], [12 x i8] }> <{ [4 x i8] c"{{\\EF\\BE\\AD\\DE|\\DE\\AD\\BE\\EF}}", [12 x i8] undef }>, align 4
 
 // This shouldn't contain undef, since it contains more chunks
 // than the default value of uninit_const_chunk_threshold.
-// CHECK: [[UNINIT_PADDING_HUGE:@[0-9]+]] = private unnamed_addr constant <{ [32768 x i8] }> <{ [32768 x i8] c"{{.+}}" }>, align 4
+// CHECK: [[UNINIT_PADDING_HUGE:@.*]] = private unnamed_addr constant <{ [32768 x i8] }> <{ [32768 x i8] c"{{.+}}" }>, align 4
 
-// CHECK: [[FULLY_UNINIT_HUGE:@[0-9]+]] = private unnamed_addr constant <{ [16384 x i8] }> undef
+// CHECK: [[FULLY_UNINIT_HUGE:@.*]] = private unnamed_addr constant <{ [16384 x i8] }> undef
 
 // CHECK-LABEL: @fully_uninit
 #[no_mangle]
diff --git a/tests/run-make/pgo-embed-bc-lto/interesting.rs b/tests/run-make/pgo-embed-bc-lto/interesting.rs
new file mode 100644
index 00000000000..13105c17e12
--- /dev/null
+++ b/tests/run-make/pgo-embed-bc-lto/interesting.rs
@@ -0,0 +1,16 @@
+#![crate_name = "interesting"]
+#![crate_type = "rlib"]
+
+extern crate opaque;
+
+#[no_mangle]
+#[inline(never)]
+pub fn function_called_once() {
+    opaque::foo();
+}
+
+// CHECK-LABEL: @function_called_once
+// CHECK-SAME: !prof [[function_called_once_id:![0-9]+]] {
+// CHECK: "CG Profile"
+// CHECK-NOT: "CG Profile"
+// CHECK-DAG: [[function_called_once_id]] = !{!"function_entry_count", i64 1}
diff --git a/tests/run-make/pgo-embed-bc-lto/main.rs b/tests/run-make/pgo-embed-bc-lto/main.rs
new file mode 100644
index 00000000000..ce8747bef3c
--- /dev/null
+++ b/tests/run-make/pgo-embed-bc-lto/main.rs
@@ -0,0 +1,5 @@
+extern crate interesting;
+
+fn main() {
+    interesting::function_called_once();
+}
diff --git a/tests/run-make/pgo-embed-bc-lto/opaque.rs b/tests/run-make/pgo-embed-bc-lto/opaque.rs
new file mode 100644
index 00000000000..b4467dc7796
--- /dev/null
+++ b/tests/run-make/pgo-embed-bc-lto/opaque.rs
@@ -0,0 +1,5 @@
+#![crate_name = "opaque"]
+#![crate_type = "rlib"]
+
+#[inline(never)]
+pub fn foo() {}
diff --git a/tests/run-make/pgo-embed-bc-lto/rmake.rs b/tests/run-make/pgo-embed-bc-lto/rmake.rs
new file mode 100644
index 00000000000..b7eba0c68e3
--- /dev/null
+++ b/tests/run-make/pgo-embed-bc-lto/rmake.rs
@@ -0,0 +1,67 @@
+// This test case verifies that we successfully complete an LTO build with PGO
+// using the embedded bitcode.
+// It also ensures that the generated IR correctly includes the call results.
+
+//@ needs-profiler-runtime
+//@ ignore-cross-compile
+
+use std::path::Path;
+
+use run_make_support::{
+    has_extension, has_prefix, llvm_filecheck, llvm_profdata, rfs, run, rustc, shallow_find_files,
+};
+
+fn run_test(cg_units: usize) {
+    let path_prof_data_dir = Path::new("prof_data_dir");
+    if path_prof_data_dir.exists() {
+        rfs::remove_dir_all(path_prof_data_dir);
+    }
+    rfs::create_dir_all(&path_prof_data_dir);
+    let path_merged_profdata = path_prof_data_dir.join("merged.profdata");
+    rustc().input("opaque.rs").codegen_units(1).run();
+    rustc()
+        .input("interesting.rs")
+        .profile_generate(&path_prof_data_dir)
+        .opt()
+        .crate_type("lib,cdylib")
+        .codegen_units(cg_units)
+        .run();
+    rustc()
+        .input("main.rs")
+        .arg("-Clto=thin")
+        .opt()
+        .codegen_units(cg_units)
+        .profile_generate(&path_prof_data_dir)
+        .opt()
+        .run();
+    run("main");
+    llvm_profdata().merge().output(&path_merged_profdata).input(path_prof_data_dir).run();
+    rustc()
+        .input("interesting.rs")
+        .profile_use(&path_merged_profdata)
+        .opt()
+        .crate_type("lib,cdylib")
+        .codegen_units(cg_units)
+        .emit("link")
+        .run();
+    rustc()
+        .input("main.rs")
+        .arg("-Clto=thin")
+        .opt()
+        .codegen_units(cg_units)
+        .profile_use(&path_merged_profdata)
+        .emit("llvm-ir,link")
+        .opt()
+        .run();
+    let files = shallow_find_files(".", |path| {
+        has_prefix(path, "main.interesting.interesting") && has_extension(path, "ll")
+    });
+    assert_eq!(files.len(), 1);
+    let llvm_ir = &files[0];
+    llvm_filecheck().patterns("interesting.rs").stdin_buf(rfs::read(llvm_ir)).run();
+}
+
+fn main() {
+    run_test(1);
+    run_test(16);
+}
author	bors <bors@rust-lang.org>	2025-03-01 08:22:18 +0000
committer	bors <bors@rust-lang.org>	2025-03-01 08:22:18 +0000
commit	0c72c0d11adeba449886089c6bd5d48363f7a2cd (patch)
tree	53caa967ed8e67b4eb3f41714c99a9ee76d7a043
parent	002da76821d32c8807dc47da16660925d8cc9b62 (diff)
parent	a897cc03519c83e1c426be491c9a1bea63609e16 (diff)
download	rust-0c72c0d11adeba449886089c6bd5d48363f7a2cd.tar.gz rust-0c72c0d11adeba449886089c6bd5d48363f7a2cd.zip