about summary refs log tree commit diff
diff options
context:
space:
mode:
authorJakub Beránek <berykubik@gmail.com>2023-10-02 21:11:15 +0200
committerJakub Beránek <berykubik@gmail.com>2023-10-09 21:55:00 +0200
commitdd7c5a00cb1cc5dbf5f661bb72c8e7d237031953 (patch)
tree49adeff82d4b577c7b13cd8185990585b7d2d11d
parent589e38a413f13ed66019d9d9907ad48895b037dc (diff)
downloadrust-dd7c5a00cb1cc5dbf5f661bb72c8e7d237031953.tar.gz
rust-dd7c5a00cb1cc5dbf5f661bb72c8e7d237031953.zip
Optimize `librustc_driver.so` with BOLT
-rw-r--r--src/tools/opt-dist/src/bolt.rs14
-rw-r--r--src/tools/opt-dist/src/exec.rs4
-rw-r--r--src/tools/opt-dist/src/main.rs56
-rw-r--r--src/tools/opt-dist/src/training.rs53
4 files changed, 84 insertions, 43 deletions
diff --git a/src/tools/opt-dist/src/bolt.rs b/src/tools/opt-dist/src/bolt.rs
index cf9f4fabcec..f694c08f9b9 100644
--- a/src/tools/opt-dist/src/bolt.rs
+++ b/src/tools/opt-dist/src/bolt.rs
@@ -1,14 +1,14 @@
 use anyhow::Context;
 
 use crate::exec::cmd;
-use crate::training::LlvmBoltProfile;
+use crate::training::BoltProfile;
 use camino::{Utf8Path, Utf8PathBuf};
 
 use crate::utils::io::copy_file;
 
 /// Instruments an artifact at the given `path` (in-place) with BOLT and then calls `func`.
 /// After this function finishes, the original file will be restored.
-pub fn with_bolt_instrumented<F: FnOnce() -> anyhow::Result<R>, R>(
+pub fn with_bolt_instrumented<F: FnOnce(&Utf8Path) -> anyhow::Result<R>, R>(
     path: &Utf8Path,
     func: F,
 ) -> anyhow::Result<R> {
@@ -20,10 +20,16 @@ pub fn with_bolt_instrumented<F: FnOnce() -> anyhow::Result<R>, R>(
 
     let instrumented_path = tempfile::NamedTempFile::new()?.into_temp_path();
 
+    let profile_dir =
+        tempfile::TempDir::new().context("Could not create directory for BOLT profiles")?;
+    let profile_prefix = profile_dir.path().join("prof.fdata");
+    let profile_prefix = Utf8Path::from_path(&profile_prefix).unwrap();
+
     // Instrument the original file with BOLT, saving the result into `instrumented_path`
     cmd(&["llvm-bolt"])
         .arg("-instrument")
         .arg(path)
+        .arg(&format!("--instrumentation-file={profile_prefix}"))
         // Make sure that each process will write its profiles into a separate file
         .arg("--instrumentation-file-append-pid")
         .arg("-o")
@@ -36,11 +42,11 @@ pub fn with_bolt_instrumented<F: FnOnce() -> anyhow::Result<R>, R>(
 
     // Run the function that will make use of the instrumented artifact.
     // The original file will be restored when `_backup_file` is dropped.
-    func()
+    func(profile_prefix)
 }
 
 /// Optimizes the file at `path` with BOLT in-place using the given `profile`.
-pub fn bolt_optimize(path: &Utf8Path, profile: &LlvmBoltProfile) -> anyhow::Result<()> {
+pub fn bolt_optimize(path: &Utf8Path, profile: &BoltProfile) -> anyhow::Result<()> {
     // Copy the artifact to a new location, so that we do not use the same input and output file.
     // BOLT cannot handle optimizing when the input and output is the same file, because it performs
     // in-place patching.
diff --git a/src/tools/opt-dist/src/exec.rs b/src/tools/opt-dist/src/exec.rs
index 04e0184528a..f07bd5f9fce 100644
--- a/src/tools/opt-dist/src/exec.rs
+++ b/src/tools/opt-dist/src/exec.rs
@@ -1,7 +1,7 @@
 use crate::environment::Environment;
 use crate::metrics::{load_metrics, record_metrics};
 use crate::timer::TimerSection;
-use crate::training::{LlvmBoltProfile, LlvmPGOProfile, RustcPGOProfile};
+use crate::training::{BoltProfile, LlvmPGOProfile, RustcPGOProfile};
 use camino::{Utf8Path, Utf8PathBuf};
 use std::collections::BTreeMap;
 use std::fs::File;
@@ -159,7 +159,7 @@ impl Bootstrap {
         self
     }
 
-    pub fn with_bolt_profile(mut self, profile: LlvmBoltProfile) -> Self {
+    pub fn with_bolt_profile(mut self, profile: BoltProfile) -> Self {
         self.cmd = self.cmd.arg("--reproducible-artifact").arg(profile.0.as_str());
         self
     }
diff --git a/src/tools/opt-dist/src/main.rs b/src/tools/opt-dist/src/main.rs
index 03a1912f5ce..e6829b79f87 100644
--- a/src/tools/opt-dist/src/main.rs
+++ b/src/tools/opt-dist/src/main.rs
@@ -12,7 +12,10 @@ use crate::environment::{Environment, EnvironmentBuilder};
 use crate::exec::{cmd, Bootstrap};
 use crate::tests::run_tests;
 use crate::timer::Timer;
-use crate::training::{gather_llvm_bolt_profiles, gather_llvm_profiles, gather_rustc_profiles};
+use crate::training::{
+    gather_bolt_profiles, gather_llvm_profiles, gather_rustc_profiles, llvm_benchmarks,
+    rustc_benchmarks,
+};
 use crate::utils::artifact_size::print_binary_sizes;
 use crate::utils::io::{copy_directory, move_directory, reset_directory};
 use crate::utils::{
@@ -246,13 +249,13 @@ fn execute_pipeline(
         Ok(profile)
     })?;
 
-    let llvm_bolt_profile = if env.use_bolt() {
+    let bolt_profiles = if env.use_bolt() {
         // Stage 3: Build BOLT instrumented LLVM
         // We build a PGO optimized LLVM in this step, then instrument it with BOLT and gather BOLT profiles.
         // Note that we don't remove LLVM artifacts after this step, so that they are reused in the final dist build.
         // BOLT instrumentation is performed "on-the-fly" when the LLVM library is copied to the sysroot of rustc,
         // therefore the LLVM artifacts on disk are not "tainted" with BOLT instrumentation and they can be reused.
-        timer.section("Stage 3 (LLVM BOLT)", |stage| {
+        timer.section("Stage 3 (BOLT)", |stage| {
             stage.section("Build PGO optimized LLVM", |stage| {
                 Bootstrap::build(env)
                     .with_llvm_bolt_ldflags()
@@ -261,16 +264,17 @@ fn execute_pipeline(
                     .run(stage)
             })?;
 
-            // Find the path to the `libLLVM.so` file
-            let llvm_lib = io::find_file_in_dir(
-                &env.build_artifacts().join("stage2").join("lib"),
-                "libLLVM",
-                ".so",
-            )?;
+            let libdir = env.build_artifacts().join("stage2").join("lib");
+            let llvm_lib = io::find_file_in_dir(&libdir, "libLLVM", ".so")?;
 
-            // Instrument it and gather profiles
-            let profile = with_bolt_instrumented(&llvm_lib, || {
-                stage.section("Gather profiles", |_| gather_llvm_bolt_profiles(env))
+            log::info!("Optimizing {llvm_lib} with BOLT");
+
+            // FIXME(kobzol: try gather profiles together, at once for LLVM and rustc
+            // Instrument the libraries and gather profiles
+            let llvm_profile = with_bolt_instrumented(&llvm_lib, |llvm_profile_dir| {
+                stage.section("Gather profiles", |_| {
+                    gather_bolt_profiles(env, "LLVM", llvm_benchmarks(env), llvm_profile_dir)
+                })
             })?;
             print_free_disk_space()?;
 
@@ -279,13 +283,29 @@ fn execute_pipeline(
             // the final dist build. However, when BOLT optimizes an artifact, it does so *in-place*,
             // therefore it will actually optimize all the hard links, which means that the final
             // packaged `libLLVM.so` file *will* be BOLT optimized.
-            bolt_optimize(&llvm_lib, &profile).context("Could not optimize LLVM with BOLT")?;
+            bolt_optimize(&llvm_lib, &llvm_profile).context("Could not optimize LLVM with BOLT")?;
+
+            let rustc_lib = io::find_file_in_dir(&libdir, "librustc_driver", ".so")?;
+
+            log::info!("Optimizing {rustc_lib} with BOLT");
+
+            // Instrument it and gather profiles
+            let rustc_profile = with_bolt_instrumented(&rustc_lib, |rustc_profile_dir| {
+                stage.section("Gather profiles", |_| {
+                    gather_bolt_profiles(env, "rustc", rustc_benchmarks(env), rustc_profile_dir)
+                })
+            })?;
+            print_free_disk_space()?;
+
+            // Now optimize the library with BOLT.
+            bolt_optimize(&rustc_lib, &rustc_profile)
+                .context("Could not optimize rustc with BOLT")?;
 
             // LLVM is not being cleared here, we want to use the BOLT-optimized LLVM
-            Ok(Some(profile))
+            Ok(vec![llvm_profile, rustc_profile])
         })?
     } else {
-        None
+        vec![]
     };
 
     let mut dist = Bootstrap::dist(env, &dist_args)
@@ -293,13 +313,13 @@ fn execute_pipeline(
         .rustc_pgo_optimize(&rustc_pgo_profile)
         .avoid_rustc_rebuild();
 
-    if let Some(llvm_bolt_profile) = llvm_bolt_profile {
-        dist = dist.with_bolt_profile(llvm_bolt_profile);
+    for bolt_profile in bolt_profiles {
+        dist = dist.with_bolt_profile(bolt_profile);
     }
 
     // Final stage: Assemble the dist artifacts
     // The previous PGO optimized rustc build and PGO optimized LLVM builds should be reused.
-    timer.section("Stage 4 (final build)", |stage| dist.run(stage))?;
+    timer.section("Stage 5 (final build)", |stage| dist.run(stage))?;
 
     // After dist has finished, run a subset of the test suite on the optimized artifacts to discover
     // possible regressions.
diff --git a/src/tools/opt-dist/src/training.rs b/src/tools/opt-dist/src/training.rs
index 274f4cea0ab..46040e32a03 100644
--- a/src/tools/opt-dist/src/training.rs
+++ b/src/tools/opt-dist/src/training.rs
@@ -27,8 +27,6 @@ const RUSTC_PGO_CRATES: &[&str] = &[
     "bitmaps-3.1.0",
 ];
 
-const LLVM_BOLT_CRATES: &[&str] = LLVM_PGO_CRATES;
-
 fn init_compiler_benchmarks(
     env: &Environment,
     profiles: &[&str],
@@ -113,6 +111,14 @@ fn log_profile_stats(
     Ok(())
 }
 
+pub fn llvm_benchmarks(env: &Environment) -> CmdBuilder {
+    init_compiler_benchmarks(env, &["Debug", "Opt"], &["Full"], LLVM_PGO_CRATES)
+}
+
+pub fn rustc_benchmarks(env: &Environment) -> CmdBuilder {
+    init_compiler_benchmarks(env, &["Check", "Debug", "Opt"], &["All"], RUSTC_PGO_CRATES)
+}
+
 pub struct LlvmPGOProfile(pub Utf8PathBuf);
 
 pub fn gather_llvm_profiles(
@@ -122,9 +128,7 @@ pub fn gather_llvm_profiles(
     log::info!("Running benchmarks with PGO instrumented LLVM");
 
     with_log_group("Running benchmarks", || {
-        init_compiler_benchmarks(env, &["Debug", "Opt"], &["Full"], LLVM_PGO_CRATES)
-            .run()
-            .context("Cannot gather LLVM PGO profiles")
+        llvm_benchmarks(env).run().context("Cannot gather LLVM PGO profiles")
     })?;
 
     let merged_profile = env.artifact_dir().join("llvm-pgo.profdata");
@@ -157,7 +161,7 @@ pub fn gather_rustc_profiles(
     // Here we're profiling the `rustc` frontend, so we also include `Check`.
     // The benchmark set includes various stress tests that put the frontend under pressure.
     with_log_group("Running benchmarks", || {
-        init_compiler_benchmarks(env, &["Check", "Debug", "Opt"], &["All"], RUSTC_PGO_CRATES)
+        rustc_benchmarks(env)
             .env("LLVM_PROFILE_FILE", profile_template.as_str())
             .run()
             .context("Cannot gather rustc PGO profiles")
@@ -176,23 +180,25 @@ pub fn gather_rustc_profiles(
     Ok(RustcPGOProfile(merged_profile))
 }
 
-pub struct LlvmBoltProfile(pub Utf8PathBuf);
+pub struct BoltProfile(pub Utf8PathBuf);
 
-pub fn gather_llvm_bolt_profiles(env: &Environment) -> anyhow::Result<LlvmBoltProfile> {
-    log::info!("Running benchmarks with BOLT instrumented LLVM");
+pub fn gather_bolt_profiles(
+    env: &Environment,
+    name: &str,
+    benchmarks: CmdBuilder,
+    profile_prefix: &Utf8Path,
+) -> anyhow::Result<BoltProfile> {
+    log::info!("Running benchmarks with BOLT instrumented {name}");
 
     with_log_group("Running benchmarks", || {
-        init_compiler_benchmarks(env, &["Check", "Debug", "Opt"], &["Full"], LLVM_BOLT_CRATES)
-            .run()
-            .context("Cannot gather LLVM BOLT profiles")
+        benchmarks.run().with_context(|| "Cannot gather {name} BOLT profiles")
     })?;
 
-    let merged_profile = env.artifact_dir().join("llvm-bolt.profdata");
-    let profile_root = Utf8PathBuf::from("/tmp/prof.fdata");
-    log::info!("Merging LLVM BOLT profiles to {merged_profile}");
+    let merged_profile = env.artifact_dir().join(format!("{name}-bolt.profdata"));
+    log::info!("Merging {name} BOLT profiles from {profile_prefix} to {merged_profile}");
 
     let profiles: Vec<_> =
-        glob::glob(&format!("{profile_root}*"))?.collect::<Result<Vec<_>, _>>()?;
+        glob::glob(&format!("{profile_prefix}*"))?.collect::<Result<Vec<_>, _>>()?;
 
     let mut merge_args = vec!["merge-fdata"];
     merge_args.extend(profiles.iter().map(|p| p.to_str().unwrap()));
@@ -204,7 +210,7 @@ pub fn gather_llvm_bolt_profiles(env: &Environment) -> anyhow::Result<LlvmBoltPr
             .context("Cannot merge BOLT profiles")
     })?;
 
-    log::info!("LLVM BOLT statistics");
+    log::info!("{name} BOLT statistics");
     log::info!(
         "{merged_profile}: {}",
         humansize::format_size(std::fs::metadata(merged_profile.as_std_path())?.len(), BINARY)
@@ -216,8 +222,17 @@ pub fn gather_llvm_bolt_profiles(env: &Environment) -> anyhow::Result<LlvmBoltPr
         .collect::<Result<Vec<_>, _>>()?
         .into_iter()
         .sum::<u64>();
-    log::info!("{profile_root}: {}", humansize::format_size(size, BINARY));
+    log::info!("{profile_prefix}: {}", humansize::format_size(size, BINARY));
     log::info!("Profile file count: {}", profiles.len());
 
-    Ok(LlvmBoltProfile(merged_profile))
+    // Delete the gathered profiles
+    for profile in glob::glob(&format!("{profile_prefix}*"))?.into_iter() {
+        if let Ok(profile) = profile {
+            if let Err(error) = std::fs::remove_file(&profile) {
+                log::error!("Cannot delete BOLT profile {}: {error:?}", profile.display());
+            }
+        }
+    }
+
+    Ok(BoltProfile(merged_profile))
 }