about summary refs log tree commit diff
diff options
context:
space:
mode:
authorGuillaume Gomez <guillaume1.gomez@gmail.com>2024-05-23 23:39:26 +0200
committerGitHub <noreply@github.com>2024-05-23 23:39:26 +0200
commit4ee97fc3dbcddc9d69701e346a1be9e0b66855a7 (patch)
tree7d713b1ab239668be85249ea07a6585fca7ca347
parentd6a1f1d3fc519af29a27123fb94a4b6934e95de3 (diff)
parentcfe3f77f9dd83bceb35e9685c4690b7b1a5be1b0 (diff)
downloadrust-4ee97fc3dbcddc9d69701e346a1be9e0b66855a7.tar.gz
rust-4ee97fc3dbcddc9d69701e346a1be9e0b66855a7.zip
Rollup merge of #125345 - durin42:thin-link-bitcode, r=bjorn3
rustc_codegen_llvm: add support for writing summary bitcode

Typical uses of ThinLTO don't have any use for this as a standalone file, but distributed ThinLTO uses this to make the linker phase more efficient. With clang you'd do something like `clang -flto=thin -fthin-link-bitcode=foo.indexing.o -c foo.c` and then get both foo.o (full of bitcode) and foo.indexing.o (just the summary or index part of the bitcode). That's then usable by a two-stage linking process that's more friendly to distributed build systems like bazel, which is why I'm working on this area.

I talked some to `@teresajohnson` about naming in this area, as things seem to be a little confused between various blog posts and build systems. "bitcode index" and "bitcode summary" tend to be a little too ambiguous, and she tends to use "thin link bitcode" and "minimized bitcode" (which matches the descriptions in LLVM). Since the clang option is thin-link-bitcode, I went with that to try and not add a new spelling in the world.

Per `@dtolnay,` you can work around the lack of this by using `lld --thinlto-index-only` to do the indexing on regular .o files of bitcode, but that is a bit wasteful on actions when we already have all the information in rustc and could just write out the matching minimized bitcode. I didn't test that at all in our infrastructure, because by the time I learned that I already had this patch largely written.
-rw-r--r--compiler/rustc_codegen_cranelift/src/driver/aot.rs2
-rw-r--r--compiler/rustc_codegen_gcc/src/lib.rs6
-rw-r--r--compiler/rustc_codegen_llvm/src/back/lto.rs19
-rw-r--r--compiler/rustc_codegen_llvm/src/back/write.rs23
-rw-r--r--compiler/rustc_codegen_llvm/src/lib.rs7
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm/ffi.rs8
-rw-r--r--compiler/rustc_codegen_ssa/src/back/write.rs11
-rw-r--r--compiler/rustc_codegen_ssa/src/traits/write.rs6
-rw-r--r--compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp19
-rw-r--r--compiler/rustc_session/src/config.rs24
10 files changed, 107 insertions, 18 deletions
diff --git a/compiler/rustc_codegen_cranelift/src/driver/aot.rs b/compiler/rustc_codegen_cranelift/src/driver/aot.rs
index fce4690f97d..394c810176a 100644
--- a/compiler/rustc_codegen_cranelift/src/driver/aot.rs
+++ b/compiler/rustc_codegen_cranelift/src/driver/aot.rs
@@ -200,7 +200,7 @@ fn produce_final_output_artifacts(
     // to get rid of it.
     for output_type in crate_output.outputs.keys() {
         match *output_type {
-            OutputType::Bitcode => {
+            OutputType::Bitcode | OutputType::ThinLinkBitcode => {
                 // Cranelift doesn't have bitcode
                 // user_wants_bitcode = true;
                 // // Copy to .bc, but always keep the .0.bc. There is a later
diff --git a/compiler/rustc_codegen_gcc/src/lib.rs b/compiler/rustc_codegen_gcc/src/lib.rs
index 3f2fadce9e4..b1785e150ad 100644
--- a/compiler/rustc_codegen_gcc/src/lib.rs
+++ b/compiler/rustc_codegen_gcc/src/lib.rs
@@ -335,6 +335,10 @@ impl ThinBufferMethods for ThinBuffer {
     fn data(&self) -> &[u8] {
         unimplemented!();
     }
+
+    fn thin_link_data(&self) -> &[u8] {
+        unimplemented!();
+    }
 }
 
 pub struct GccContext {
@@ -414,7 +418,7 @@ impl WriteBackendMethods for GccCodegenBackend {
         back::write::codegen(cgcx, dcx, module, config)
     }
 
-    fn prepare_thin(_module: ModuleCodegen<Self::Module>) -> (String, Self::ThinBuffer) {
+    fn prepare_thin(_module: ModuleCodegen<Self::Module>, _emit_summary: bool) -> (String, Self::ThinBuffer) {
         unimplemented!();
     }
 
diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs
index e61af863dc0..c4bcf16bef3 100644
--- a/compiler/rustc_codegen_llvm/src/back/lto.rs
+++ b/compiler/rustc_codegen_llvm/src/back/lto.rs
@@ -229,9 +229,12 @@ pub(crate) fn run_thin(
     thin_lto(cgcx, &dcx, modules, upstream_modules, cached_modules, &symbols_below_threshold)
 }
 
-pub(crate) fn prepare_thin(module: ModuleCodegen<ModuleLlvm>) -> (String, ThinBuffer) {
+pub(crate) fn prepare_thin(
+    module: ModuleCodegen<ModuleLlvm>,
+    emit_summary: bool,
+) -> (String, ThinBuffer) {
     let name = module.name;
-    let buffer = ThinBuffer::new(module.module_llvm.llmod(), true);
+    let buffer = ThinBuffer::new(module.module_llvm.llmod(), true, emit_summary);
     (name, buffer)
 }
 
@@ -671,9 +674,9 @@ unsafe impl Send for ThinBuffer {}
 unsafe impl Sync for ThinBuffer {}
 
 impl ThinBuffer {
-    pub fn new(m: &llvm::Module, is_thin: bool) -> ThinBuffer {
+    pub fn new(m: &llvm::Module, is_thin: bool, emit_summary: bool) -> ThinBuffer {
         unsafe {
-            let buffer = llvm::LLVMRustThinLTOBufferCreate(m, is_thin);
+            let buffer = llvm::LLVMRustThinLTOBufferCreate(m, is_thin, emit_summary);
             ThinBuffer(buffer)
         }
     }
@@ -687,6 +690,14 @@ impl ThinBufferMethods for ThinBuffer {
             slice::from_raw_parts(ptr, len)
         }
     }
+
+    fn thin_link_data(&self) -> &[u8] {
+        unsafe {
+            let ptr = llvm::LLVMRustThinLTOBufferThinLinkDataPtr(self.0) as *const _;
+            let len = llvm::LLVMRustThinLTOBufferThinLinkDataLen(self.0);
+            slice::from_raw_parts(ptr, len)
+        }
+    }
 }
 
 impl Drop for ThinBuffer {
diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs
index 49f9d7ddab6..08cfda9e8ce 100644
--- a/compiler/rustc_codegen_llvm/src/back/write.rs
+++ b/compiler/rustc_codegen_llvm/src/back/write.rs
@@ -708,13 +708,15 @@ pub(crate) unsafe fn codegen(
         //   asm from LLVM and use `gcc` to create the object file.
 
         let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name);
+        let bc_summary_out =
+            cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
         let obj_out = cgcx.output_filenames.temp_path(OutputType::Object, module_name);
 
         if config.bitcode_needed() {
             let _timer = cgcx
                 .prof
                 .generic_activity_with_arg("LLVM_module_codegen_make_bitcode", &*module.name);
-            let thin = ThinBuffer::new(llmod, config.emit_thin_lto);
+            let thin = ThinBuffer::new(llmod, config.emit_thin_lto, config.emit_thin_lto_summary);
             let data = thin.data();
 
             if let Some(bitcode_filename) = bc_out.file_name() {
@@ -725,6 +727,25 @@ pub(crate) unsafe fn codegen(
                 );
             }
 
+            if config.emit_thin_lto_summary
+                && let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
+            {
+                let summary_data = thin.thin_link_data();
+                cgcx.prof.artifact_size(
+                    "llvm_bitcode_summary",
+                    thin_link_bitcode_filename.to_string_lossy(),
+                    summary_data.len() as u64,
+                );
+
+                let _timer = cgcx.prof.generic_activity_with_arg(
+                    "LLVM_module_codegen_emit_bitcode_summary",
+                    &*module.name,
+                );
+                if let Err(err) = fs::write(&bc_summary_out, summary_data) {
+                    dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
+                }
+            }
+
             if config.emit_bc || config.emit_obj == EmitObj::Bitcode {
                 let _timer = cgcx
                     .prof
diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs
index 1cecf682e5d..1e4f4c01b1c 100644
--- a/compiler/rustc_codegen_llvm/src/lib.rs
+++ b/compiler/rustc_codegen_llvm/src/lib.rs
@@ -240,8 +240,11 @@ impl WriteBackendMethods for LlvmCodegenBackend {
     ) -> Result<CompiledModule, FatalError> {
         back::write::codegen(cgcx, dcx, module, config)
     }
-    fn prepare_thin(module: ModuleCodegen<Self::Module>) -> (String, Self::ThinBuffer) {
-        back::lto::prepare_thin(module)
+    fn prepare_thin(
+        module: ModuleCodegen<Self::Module>,
+        emit_summary: bool,
+    ) -> (String, Self::ThinBuffer) {
+        back::lto::prepare_thin(module, emit_summary)
     }
     fn serialize_module(module: ModuleCodegen<Self::Module>) -> (String, Self::ModuleBuffer) {
         (module.name, back::lto::ModuleBuffer::new(module.module_llvm.llmod()))
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
index 7a34e21628d..132e1f9e8fd 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -2350,10 +2350,16 @@ extern "C" {
     #[allow(improper_ctypes)]
     pub fn LLVMRustModuleInstructionStats(M: &Module, Str: &RustString);
 
-    pub fn LLVMRustThinLTOBufferCreate(M: &Module, is_thin: bool) -> &'static mut ThinLTOBuffer;
+    pub fn LLVMRustThinLTOBufferCreate(
+        M: &Module,
+        is_thin: bool,
+        emit_summary: bool,
+    ) -> &'static mut ThinLTOBuffer;
     pub fn LLVMRustThinLTOBufferFree(M: &'static mut ThinLTOBuffer);
     pub fn LLVMRustThinLTOBufferPtr(M: &ThinLTOBuffer) -> *const c_char;
     pub fn LLVMRustThinLTOBufferLen(M: &ThinLTOBuffer) -> size_t;
+    pub fn LLVMRustThinLTOBufferThinLinkDataPtr(M: &ThinLTOBuffer) -> *const c_char;
+    pub fn LLVMRustThinLTOBufferThinLinkDataLen(M: &ThinLTOBuffer) -> size_t;
     pub fn LLVMRustCreateThinLTOData(
         Modules: *const ThinLTOModule,
         NumModules: c_uint,
diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index 53ba0da7d04..f112cea6c82 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -107,6 +107,7 @@ pub struct ModuleConfig {
     pub emit_asm: bool,
     pub emit_obj: EmitObj,
     pub emit_thin_lto: bool,
+    pub emit_thin_lto_summary: bool,
     pub bc_cmdline: String,
 
     // Miscellaneous flags. These are mostly copied from command-line
@@ -231,6 +232,10 @@ impl ModuleConfig {
             ),
             emit_obj,
             emit_thin_lto: sess.opts.unstable_opts.emit_thin_lto,
+            emit_thin_lto_summary: if_regular!(
+                sess.opts.output_types.contains_key(&OutputType::ThinLinkBitcode),
+                false
+            ),
             bc_cmdline: sess.target.bitcode_llvm_cmdline.to_string(),
 
             verify_llvm_ir: sess.verify_llvm_ir(),
@@ -282,6 +287,7 @@ impl ModuleConfig {
 
     pub fn bitcode_needed(&self) -> bool {
         self.emit_bc
+            || self.emit_thin_lto_summary
             || self.emit_obj == EmitObj::Bitcode
             || self.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full)
     }
@@ -629,6 +635,9 @@ fn produce_final_output_artifacts(
                 // them for making an rlib.
                 copy_if_one_unit(OutputType::Bitcode, true);
             }
+            OutputType::ThinLinkBitcode => {
+                copy_if_one_unit(OutputType::ThinLinkBitcode, false);
+            }
             OutputType::LlvmAssembly => {
                 copy_if_one_unit(OutputType::LlvmAssembly, false);
             }
@@ -882,7 +891,7 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>(
     match lto_type {
         ComputedLtoType::No => finish_intra_module_work(cgcx, module, module_config),
         ComputedLtoType::Thin => {
-            let (name, thin_buffer) = B::prepare_thin(module);
+            let (name, thin_buffer) = B::prepare_thin(module, false);
             if let Some(path) = bitcode {
                 fs::write(&path, thin_buffer.data()).unwrap_or_else(|e| {
                     panic!("Error writing pre-lto-bitcode file `{}`: {}", path.display(), e);
diff --git a/compiler/rustc_codegen_ssa/src/traits/write.rs b/compiler/rustc_codegen_ssa/src/traits/write.rs
index 048540894ac..f83e34ab01b 100644
--- a/compiler/rustc_codegen_ssa/src/traits/write.rs
+++ b/compiler/rustc_codegen_ssa/src/traits/write.rs
@@ -56,12 +56,16 @@ pub trait WriteBackendMethods: 'static + Sized + Clone {
         module: ModuleCodegen<Self::Module>,
         config: &ModuleConfig,
     ) -> Result<CompiledModule, FatalError>;
-    fn prepare_thin(module: ModuleCodegen<Self::Module>) -> (String, Self::ThinBuffer);
+    fn prepare_thin(
+        module: ModuleCodegen<Self::Module>,
+        want_summary: bool,
+    ) -> (String, Self::ThinBuffer);
     fn serialize_module(module: ModuleCodegen<Self::Module>) -> (String, Self::ModuleBuffer);
 }
 
 pub trait ThinBufferMethods: Send + Sync {
     fn data(&self) -> &[u8];
+    fn thin_link_data(&self) -> &[u8];
 }
 
 pub trait ModuleBufferMethods: Send + Sync {
diff --git a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp
index 067374c0261..3fcf3aca8af 100644
--- a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp
+++ b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp
@@ -1488,13 +1488,15 @@ LLVMRustPrepareThinLTOImport(const LLVMRustThinLTOData *Data, LLVMModuleRef M,
 // a ThinLTO summary attached.
 struct LLVMRustThinLTOBuffer {
   std::string data;
+  std::string thin_link_data;
 };
 
 extern "C" LLVMRustThinLTOBuffer*
-LLVMRustThinLTOBufferCreate(LLVMModuleRef M, bool is_thin) {
+LLVMRustThinLTOBufferCreate(LLVMModuleRef M, bool is_thin, bool emit_summary) {
   auto Ret = std::make_unique<LLVMRustThinLTOBuffer>();
   {
     auto OS = raw_string_ostream(Ret->data);
+    auto ThinLinkOS = raw_string_ostream(Ret->thin_link_data);
     {
       if (is_thin) {
         PassBuilder PB;
@@ -1508,7 +1510,10 @@ LLVMRustThinLTOBufferCreate(LLVMModuleRef M, bool is_thin) {
         PB.registerLoopAnalyses(LAM);
         PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
         ModulePassManager MPM;
-        MPM.addPass(ThinLTOBitcodeWriterPass(OS, nullptr));
+        // We only pass ThinLinkOS to be filled in if we want the summary,
+        // because otherwise LLVM does extra work and may double-emit some
+        // errors or warnings.
+        MPM.addPass(ThinLTOBitcodeWriterPass(OS, emit_summary ? &ThinLinkOS : nullptr));
         MPM.run(*unwrap(M), MAM);
       } else {
         WriteBitcodeToFile(*unwrap(M), OS);
@@ -1533,6 +1538,16 @@ LLVMRustThinLTOBufferLen(const LLVMRustThinLTOBuffer *Buffer) {
   return Buffer->data.length();
 }
 
+extern "C" const void*
+LLVMRustThinLTOBufferThinLinkDataPtr(const LLVMRustThinLTOBuffer *Buffer) {
+  return Buffer->thin_link_data.data();
+}
+
+extern "C" size_t
+LLVMRustThinLTOBufferThinLinkDataLen(const LLVMRustThinLTOBuffer *Buffer) {
+  return Buffer->thin_link_data.length();
+}
+
 // This is what we used to parse upstream bitcode for actual ThinLTO
 // processing. We'll call this once per module optimized through ThinLTO, and
 // it'll be called concurrently on many threads.
diff --git a/compiler/rustc_session/src/config.rs b/compiler/rustc_session/src/config.rs
index 7dd9fdf60f9..5ac4d194e87 100644
--- a/compiler/rustc_session/src/config.rs
+++ b/compiler/rustc_session/src/config.rs
@@ -465,6 +465,7 @@ impl FromStr for SplitDwarfKind {
 #[derive(Encodable, Decodable)]
 pub enum OutputType {
     Bitcode,
+    ThinLinkBitcode,
     Assembly,
     LlvmAssembly,
     Mir,
@@ -492,6 +493,7 @@ impl OutputType {
         match *self {
             OutputType::Exe | OutputType::DepInfo | OutputType::Metadata => true,
             OutputType::Bitcode
+            | OutputType::ThinLinkBitcode
             | OutputType::Assembly
             | OutputType::LlvmAssembly
             | OutputType::Mir
@@ -502,6 +504,7 @@ impl OutputType {
     pub fn shorthand(&self) -> &'static str {
         match *self {
             OutputType::Bitcode => "llvm-bc",
+            OutputType::ThinLinkBitcode => "thin-link-bitcode",
             OutputType::Assembly => "asm",
             OutputType::LlvmAssembly => "llvm-ir",
             OutputType::Mir => "mir",
@@ -518,6 +521,7 @@ impl OutputType {
             "llvm-ir" => OutputType::LlvmAssembly,
             "mir" => OutputType::Mir,
             "llvm-bc" => OutputType::Bitcode,
+            "thin-link-bitcode" => OutputType::ThinLinkBitcode,
             "obj" => OutputType::Object,
             "metadata" => OutputType::Metadata,
             "link" => OutputType::Exe,
@@ -528,8 +532,9 @@ impl OutputType {
 
     fn shorthands_display() -> String {
         format!(
-            "`{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`",
+            "`{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`",
             OutputType::Bitcode.shorthand(),
+            OutputType::ThinLinkBitcode.shorthand(),
             OutputType::Assembly.shorthand(),
             OutputType::LlvmAssembly.shorthand(),
             OutputType::Mir.shorthand(),
@@ -543,6 +548,7 @@ impl OutputType {
     pub fn extension(&self) -> &'static str {
         match *self {
             OutputType::Bitcode => "bc",
+            OutputType::ThinLinkBitcode => "indexing.o",
             OutputType::Assembly => "s",
             OutputType::LlvmAssembly => "ll",
             OutputType::Mir => "mir",
@@ -559,9 +565,11 @@ impl OutputType {
             | OutputType::LlvmAssembly
             | OutputType::Mir
             | OutputType::DepInfo => true,
-            OutputType::Bitcode | OutputType::Object | OutputType::Metadata | OutputType::Exe => {
-                false
-            }
+            OutputType::Bitcode
+            | OutputType::ThinLinkBitcode
+            | OutputType::Object
+            | OutputType::Metadata
+            | OutputType::Exe => false,
         }
     }
 }
@@ -644,6 +652,7 @@ impl OutputTypes {
     pub fn should_codegen(&self) -> bool {
         self.0.keys().any(|k| match *k {
             OutputType::Bitcode
+            | OutputType::ThinLinkBitcode
             | OutputType::Assembly
             | OutputType::LlvmAssembly
             | OutputType::Mir
@@ -657,6 +666,7 @@ impl OutputTypes {
     pub fn should_link(&self) -> bool {
         self.0.keys().any(|k| match *k {
             OutputType::Bitcode
+            | OutputType::ThinLinkBitcode
             | OutputType::Assembly
             | OutputType::LlvmAssembly
             | OutputType::Mir
@@ -1769,6 +1779,12 @@ fn parse_output_types(
                         display = OutputType::shorthands_display(),
                     ))
                 });
+                if output_type == OutputType::ThinLinkBitcode && !unstable_opts.unstable_options {
+                    early_dcx.early_fatal(format!(
+                        "{} requested but -Zunstable-options not specified",
+                        OutputType::ThinLinkBitcode.shorthand()
+                    ));
+                }
                 output_types.insert(output_type, path);
             }
         }