about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2025-03-21 21:03:49 +0000
committerbors <bors@rust-lang.org>2025-03-21 21:03:49 +0000
commit48b36c9d5943a4aef49aa04b8059b92d65ff31a9 (patch)
tree5c0c4be8b5a7dc98914aea80c6cffcb18e3f3d58
parentbe73c1f4617c97bce81b2694a767353300a75072 (diff)
parente21502cf9ed247bbf879132b8c62e423d959bd38 (diff)
downloadrust-48b36c9d5943a4aef49aa04b8059b92d65ff31a9.tar.gz
rust-48b36c9d5943a4aef49aa04b8059b92d65ff31a9.zip
Auto merge of #128320 - saethlin:link-me-maybe, r=compiler-errors
Avoid no-op unlink+link dances in incr comp

Incremental compilation scales quite poorly with the number of CGUs. This PR improves one reason for that.

The incr comp process hard-links all the files from an old session into a new one, then it runs the backend, which may just hard-link the new session files into the output directory. Then codegen hard-links all the output files back to the new session directory.

This PR (perhaps unimaginatively) fixes the silliness that ensues in the last step. The old `link_or_copy` implementation would be passed pairs of paths which are already the same inode, then it would blindly delete the destination and re-create the hard-link that it just deleted. This PR lets us skip both those operations. We don't skip the other two hard-links.

`cargo +stage1 b && touch crates/core/main.rs && strace -cfw -elink,linkat,unlink,unlinkat cargo +stage1 b` before and then after on `ripgrep-13.0.0`:
```
% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 52.56    0.024950          25       978       485 unlink
 34.38    0.016318          22       727           linkat
 13.06    0.006200          24       249           unlinkat
------ ----------- ----------- --------- --------- ----------------
100.00    0.047467          24      1954       485 total
```
```
% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 42.83    0.014521          57       252           unlink
 38.41    0.013021          26       486           linkat
 18.77    0.006362          25       249           unlinkat
------ ----------- ----------- --------- --------- ----------------
100.00    0.033904          34       987           total
```

This reduces the number of hard-links that are causing perf troubles, noted in https://github.com/rust-lang/rust/issues/64291 and https://github.com/rust-lang/rust/issues/137560
-rw-r--r--compiler/rustc_codegen_cranelift/src/driver/aot.rs20
-rw-r--r--compiler/rustc_codegen_ssa/src/back/write.rs21
-rw-r--r--compiler/rustc_codegen_ssa/src/base.rs1
-rw-r--r--compiler/rustc_codegen_ssa/src/lib.rs2
-rw-r--r--compiler/rustc_fs_util/src/lib.rs32
-rw-r--r--compiler/rustc_incremental/src/persist/work_product.rs7
6 files changed, 57 insertions, 26 deletions
diff --git a/compiler/rustc_codegen_cranelift/src/driver/aot.rs b/compiler/rustc_codegen_cranelift/src/driver/aot.rs
index a52b18573b1..fb7864ae612 100644
--- a/compiler/rustc_codegen_cranelift/src/driver/aot.rs
+++ b/compiler/rustc_codegen_cranelift/src/driver/aot.rs
@@ -103,12 +103,14 @@ impl OngoingCodegen {
                             ("o", &module_regular.object.as_ref().unwrap()),
                             ("asm.o", &module_global_asm.object.as_ref().unwrap()),
                         ],
+                        &[],
                     )
                 } else {
                     rustc_incremental::copy_cgu_workproduct_to_incr_comp_cache_dir(
                         sess,
                         &module_regular.name,
                         &[("o", &module_regular.object.as_ref().unwrap())],
+                        &[],
                     )
                 };
                 if let Some((work_product_id, work_product)) = work_product {
@@ -381,6 +383,7 @@ fn emit_cgu(
             bytecode: None,
             assembly: None,
             llvm_ir: None,
+            links_from_incr_cache: Vec::new(),
         }),
         existing_work_product: None,
     })
@@ -437,6 +440,7 @@ fn emit_module(
         bytecode: None,
         assembly: None,
         llvm_ir: None,
+        links_from_incr_cache: Vec::new(),
     })
 }
 
@@ -460,22 +464,23 @@ fn reuse_workproduct_for_cgu(
             err
         ));
     }
+
     let obj_out_global_asm =
         crate::global_asm::add_file_stem_postfix(obj_out_regular.clone(), ".asm");
-    let has_global_asm = if let Some(asm_o) = work_product.saved_files.get("asm.o") {
+    let source_file_global_asm = if let Some(asm_o) = work_product.saved_files.get("asm.o") {
         let source_file_global_asm = rustc_incremental::in_incr_comp_dir_sess(&tcx.sess, asm_o);
         if let Err(err) = rustc_fs_util::link_or_copy(&source_file_global_asm, &obj_out_global_asm)
         {
             return Err(format!(
                 "unable to copy {} to {}: {}",
-                source_file_regular.display(),
-                obj_out_regular.display(),
+                source_file_global_asm.display(),
+                obj_out_global_asm.display(),
                 err
             ));
         }
-        true
+        Some(source_file_global_asm)
     } else {
-        false
+        None
     };
 
     Ok(ModuleCodegenResult {
@@ -487,8 +492,9 @@ fn reuse_workproduct_for_cgu(
             bytecode: None,
             assembly: None,
             llvm_ir: None,
+            links_from_incr_cache: vec![source_file_regular],
         },
-        module_global_asm: has_global_asm.then(|| CompiledModule {
+        module_global_asm: source_file_global_asm.map(|source_file| CompiledModule {
             name: cgu.name().to_string(),
             kind: ModuleKind::Regular,
             object: Some(obj_out_global_asm),
@@ -496,6 +502,7 @@ fn reuse_workproduct_for_cgu(
             bytecode: None,
             assembly: None,
             llvm_ir: None,
+            links_from_incr_cache: vec![source_file],
         }),
         existing_work_product: Some((cgu.work_product_id(), work_product)),
     })
@@ -637,6 +644,7 @@ fn emit_metadata_module(tcx: TyCtxt<'_>, metadata: &EncodedMetadata) -> Compiled
         bytecode: None,
         assembly: None,
         llvm_ir: None,
+        links_from_incr_cache: Vec::new(),
     }
 }
 
diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index 2ec203458a3..216a18e72ed 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -546,9 +546,12 @@ fn copy_all_cgu_workproducts_to_incr_comp_cache_dir(
         if let Some(path) = &module.bytecode {
             files.push((OutputType::Bitcode.extension(), path.as_path()));
         }
-        if let Some((id, product)) =
-            copy_cgu_workproduct_to_incr_comp_cache_dir(sess, &module.name, files.as_slice())
-        {
+        if let Some((id, product)) = copy_cgu_workproduct_to_incr_comp_cache_dir(
+            sess,
+            &module.name,
+            files.as_slice(),
+            &module.links_from_incr_cache,
+        ) {
             work_products.insert(id, product);
         }
     }
@@ -934,7 +937,9 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
 ) -> WorkItemResult<B> {
     let incr_comp_session_dir = cgcx.incr_comp_session_dir.as_ref().unwrap();
 
-    let load_from_incr_comp_dir = |output_path: PathBuf, saved_path: &str| {
+    let mut links_from_incr_cache = Vec::new();
+
+    let mut load_from_incr_comp_dir = |output_path: PathBuf, saved_path: &str| {
         let source_file = in_incr_comp_dir(incr_comp_session_dir, saved_path);
         debug!(
             "copying preexisting module `{}` from {:?} to {}",
@@ -943,7 +948,10 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
             output_path.display()
         );
         match link_or_copy(&source_file, &output_path) {
-            Ok(_) => Some(output_path),
+            Ok(_) => {
+                links_from_incr_cache.push(source_file);
+                Some(output_path)
+            }
             Err(error) => {
                 cgcx.create_dcx().handle().emit_err(errors::CopyPathBuf {
                     source_file,
@@ -966,7 +974,7 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
             load_from_incr_comp_dir(dwarf_obj_out, saved_dwarf_object_file)
         });
 
-    let load_from_incr_cache = |perform, output_type: OutputType| {
+    let mut load_from_incr_cache = |perform, output_type: OutputType| {
         if perform {
             let saved_file = module.source.saved_files.get(output_type.extension())?;
             let output_path = cgcx.output_filenames.temp_path(output_type, Some(&module.name));
@@ -986,6 +994,7 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
     }
 
     WorkItemResult::Finished(CompiledModule {
+        links_from_incr_cache,
         name: module.name,
         kind: ModuleKind::Regular,
         object,
diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs
index 396febcc637..a739743be8c 100644
--- a/compiler/rustc_codegen_ssa/src/base.rs
+++ b/compiler/rustc_codegen_ssa/src/base.rs
@@ -658,6 +658,7 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
                 bytecode: None,
                 assembly: None,
                 llvm_ir: None,
+                links_from_incr_cache: Vec::new(),
             }
         })
     });
diff --git a/compiler/rustc_codegen_ssa/src/lib.rs b/compiler/rustc_codegen_ssa/src/lib.rs
index 44b8cc459cf..93c34a2f576 100644
--- a/compiler/rustc_codegen_ssa/src/lib.rs
+++ b/compiler/rustc_codegen_ssa/src/lib.rs
@@ -122,6 +122,7 @@ impl<M> ModuleCodegen<M> {
             bytecode,
             assembly,
             llvm_ir,
+            links_from_incr_cache: Vec::new(),
         }
     }
 }
@@ -135,6 +136,7 @@ pub struct CompiledModule {
     pub bytecode: Option<PathBuf>,
     pub assembly: Option<PathBuf>, // --emit=asm
     pub llvm_ir: Option<PathBuf>,  // --emit=llvm-ir, llvm-bc is in bytecode
+    pub links_from_incr_cache: Vec<PathBuf>,
 }
 
 impl CompiledModule {
diff --git a/compiler/rustc_fs_util/src/lib.rs b/compiler/rustc_fs_util/src/lib.rs
index 4e9d21c900d..0df1b243d69 100644
--- a/compiler/rustc_fs_util/src/lib.rs
+++ b/compiler/rustc_fs_util/src/lib.rs
@@ -55,25 +55,31 @@ pub enum LinkOrCopy {
     Copy,
 }
 
-/// Copies `p` into `q`, preferring to use hard-linking if possible. If
-/// `q` already exists, it is removed first.
+/// Copies `p` into `q`, preferring to use hard-linking if possible.
 /// The result indicates which of the two operations has been performed.
 pub fn link_or_copy<P: AsRef<Path>, Q: AsRef<Path>>(p: P, q: Q) -> io::Result<LinkOrCopy> {
+    // Creating a hard-link will fail if the destination path already exists. We could defensively
+    // call remove_file in this function, but that pessimizes callers who can avoid such calls.
+    // Incremental compilation calls this function a lot, and is able to avoid calls that
+    // would fail the first hard_link attempt.
+
     let p = p.as_ref();
     let q = q.as_ref();
-    match fs::remove_file(q) {
-        Ok(()) => (),
-        Err(err) if err.kind() == io::ErrorKind::NotFound => (),
-        Err(err) => return Err(err),
-    }
 
-    match fs::hard_link(p, q) {
-        Ok(()) => Ok(LinkOrCopy::Link),
-        Err(_) => match fs::copy(p, q) {
-            Ok(_) => Ok(LinkOrCopy::Copy),
-            Err(e) => Err(e),
-        },
+    let err = match fs::hard_link(p, q) {
+        Ok(()) => return Ok(LinkOrCopy::Link),
+        Err(err) => err,
+    };
+
+    if err.kind() == io::ErrorKind::AlreadyExists {
+        fs::remove_file(q)?;
+        if fs::hard_link(p, q).is_ok() {
+            return Ok(LinkOrCopy::Link);
+        }
     }
+
+    // Hard linking failed, fall back to copying.
+    fs::copy(p, q).map(|_| LinkOrCopy::Copy)
 }
 
 #[cfg(any(unix, all(target_os = "wasi", target_env = "p1")))]
diff --git a/compiler/rustc_incremental/src/persist/work_product.rs b/compiler/rustc_incremental/src/persist/work_product.rs
index 048981f0d5c..7b1eb0a82e3 100644
--- a/compiler/rustc_incremental/src/persist/work_product.rs
+++ b/compiler/rustc_incremental/src/persist/work_product.rs
@@ -3,7 +3,7 @@
 //! [work products]: WorkProduct
 
 use std::fs as std_fs;
-use std::path::Path;
+use std::path::{Path, PathBuf};
 
 use rustc_data_structures::unord::UnordMap;
 use rustc_fs_util::link_or_copy;
@@ -20,6 +20,7 @@ pub fn copy_cgu_workproduct_to_incr_comp_cache_dir(
     sess: &Session,
     cgu_name: &str,
     files: &[(&'static str, &Path)],
+    known_links: &[PathBuf],
 ) -> Option<(WorkProductId, WorkProduct)> {
     debug!(?cgu_name, ?files);
     sess.opts.incremental.as_ref()?;
@@ -28,6 +29,10 @@ pub fn copy_cgu_workproduct_to_incr_comp_cache_dir(
     for (ext, path) in files {
         let file_name = format!("{cgu_name}.{ext}");
         let path_in_incr_dir = in_incr_comp_dir_sess(sess, &file_name);
+        if known_links.contains(&path_in_incr_dir) {
+            let _ = saved_files.insert(ext.to_string(), file_name);
+            continue;
+        }
         match link_or_copy(path, &path_in_incr_dir) {
             Ok(_) => {
                 let _ = saved_files.insert(ext.to_string(), file_name);