about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMichael Benfield <mbenfield@google.com>2021-05-07 07:41:37 +0000
committerMichael Benfield <mbenfield@google.com>2021-10-06 19:36:52 +0000
commita17193dbb931ea0c8b66d82f640385bce8b4929a (patch)
tree5649047fed7b0c037e06ba0cfc05bd70028428e1
parentd7539a6af09e5889ed9bcb8b49571b7a59c32e65 (diff)
downloadrust-a17193dbb931ea0c8b66d82f640385bce8b4929a.tar.gz
rust-a17193dbb931ea0c8b66d82f640385bce8b4929a.zip
Enable AutoFDO.
This largely involves implementing the options debug-info-for-profiling
and profile-sample-use and forwarding them on to LLVM.

AutoFDO can be used on x86-64 Linux like this:
rustc -O -Cdebug-info-for-profiling main.rs -o main
perf record -b ./main
create_llvm_prof --binary=main --out=code.prof
rustc -O -Cprofile-sample-use=code.prof main.rs -o main2

Now `main2` will have feedback directed optimization applied to it.

The create_llvm_prof tool can be obtained from this github repository:
https://github.com/google/autofdo

Fixes #64892.
-rw-r--r--compiler/rustc_codegen_llvm/src/attributes.rs4
-rw-r--r--compiler/rustc_codegen_llvm/src/back/write.rs15
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm/ffi.rs3
-rw-r--r--compiler/rustc_codegen_ssa/src/back/linker.rs3
-rw-r--r--compiler/rustc_codegen_ssa/src/back/write.rs4
-rw-r--r--compiler/rustc_interface/src/tests.rs2
-rw-r--r--compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp30
-rw-r--r--compiler/rustc_session/src/config.rs9
-rw-r--r--compiler/rustc_session/src/options.rs4
-rw-r--r--compiler/rustc_session/src/session.rs10
-rw-r--r--src/doc/unstable-book/src/compiler-flags/debug_info_for_profiling.md35
-rw-r--r--src/doc/unstable-book/src/compiler-flags/profile_sample_use.md10
12 files changed, 120 insertions, 9 deletions
diff --git a/compiler/rustc_codegen_llvm/src/attributes.rs b/compiler/rustc_codegen_llvm/src/attributes.rs
index 51c70f0868f..659cf9ea070 100644
--- a/compiler/rustc_codegen_llvm/src/attributes.rs
+++ b/compiler/rustc_codegen_llvm/src/attributes.rs
@@ -263,6 +263,10 @@ pub fn from_fn_attrs(cx: &CodegenCx<'ll, 'tcx>, llfn: &'ll Value, instance: ty::
         attributes::emit_uwtable(llfn, true);
     }
 
+    if cx.sess().opts.debugging_opts.profile_sample_use.is_some() {
+        llvm::AddFunctionAttrString(llfn, Function, cstr!("use-sample-profile"));
+    }
+
     // FIXME: none of these three functions interact with source level attributes.
     set_frame_pointer_type(cx, llfn);
     set_instrument_function(cx, llfn);
diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs
index ab48c56a626..ca78254f0c8 100644
--- a/compiler/rustc_codegen_llvm/src/back/write.rs
+++ b/compiler/rustc_codegen_llvm/src/back/write.rs
@@ -370,6 +370,13 @@ fn get_pgo_use_path(config: &ModuleConfig) -> Option<CString> {
         .map(|path_buf| CString::new(path_buf.to_string_lossy().as_bytes()).unwrap())
 }
 
+fn get_pgo_sample_use_path(config: &ModuleConfig) -> Option<CString> {
+    config
+        .pgo_sample_use
+        .as_ref()
+        .map(|path_buf| CString::new(path_buf.to_string_lossy().as_bytes()).unwrap())
+}
+
 pub(crate) fn should_use_new_llvm_pass_manager(config: &ModuleConfig) -> bool {
     // The new pass manager is enabled by default for LLVM >= 13.
     // This matches Clang, which also enables it since Clang 13.
@@ -389,6 +396,7 @@ pub(crate) unsafe fn optimize_with_new_llvm_pass_manager(
     let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed();
     let pgo_gen_path = get_pgo_gen_path(config);
     let pgo_use_path = get_pgo_use_path(config);
+    let pgo_sample_use_path = get_pgo_sample_use_path(config);
     let is_lto = opt_stage == llvm::OptStage::ThinLTO || opt_stage == llvm::OptStage::FatLTO;
     // Sanitizer instrumentation is only inserted during the pre-link optimization stage.
     let sanitizer_options = if !is_lto {
@@ -439,6 +447,8 @@ pub(crate) unsafe fn optimize_with_new_llvm_pass_manager(
         pgo_use_path.as_ref().map_or(std::ptr::null(), |s| s.as_ptr()),
         config.instrument_coverage,
         config.instrument_gcov,
+        pgo_sample_use_path.as_ref().map_or(std::ptr::null(), |s| s.as_ptr()),
+        config.debug_info_for_profiling,
         llvm_selfprofiler,
         selfprofile_before_pass_callback,
         selfprofile_after_pass_callback,
@@ -544,6 +554,9 @@ pub(crate) unsafe fn optimize(
             if config.instrument_coverage {
                 llvm::LLVMRustAddPass(mpm, find_pass("instrprof").unwrap());
             }
+            if config.debug_info_for_profiling {
+                llvm::LLVMRustAddPass(mpm, find_pass("add-discriminators").unwrap());
+            }
 
             add_sanitizer_passes(config, &mut extra_passes);
 
@@ -1001,6 +1014,7 @@ pub unsafe fn with_llvm_pmb(
     let inline_threshold = config.inline_threshold;
     let pgo_gen_path = get_pgo_gen_path(config);
     let pgo_use_path = get_pgo_use_path(config);
+    let pgo_sample_use_path = get_pgo_sample_use_path(config);
 
     llvm::LLVMRustConfigurePassManagerBuilder(
         builder,
@@ -1011,6 +1025,7 @@ pub unsafe fn with_llvm_pmb(
         prepare_for_thin_lto,
         pgo_gen_path.as_ref().map_or(ptr::null(), |s| s.as_ptr()),
         pgo_use_path.as_ref().map_or(ptr::null(), |s| s.as_ptr()),
+        pgo_sample_use_path.as_ref().map_or(ptr::null(), |s| s.as_ptr()),
     );
 
     llvm::LLVMPassManagerBuilderSetSizeLevel(builder, opt_size as u32);
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
index 436d906827b..5d437b2a068 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -2176,6 +2176,7 @@ extern "C" {
         PrepareForThinLTO: bool,
         PGOGenPath: *const c_char,
         PGOUsePath: *const c_char,
+        PGOSampleUsePath: *const c_char,
     );
     pub fn LLVMRustAddLibraryInfo(
         PM: &PassManager<'a>,
@@ -2210,6 +2211,8 @@ extern "C" {
         PGOUsePath: *const c_char,
         InstrumentCoverage: bool,
         InstrumentGCOV: bool,
+        PGOSampleUsePath: *const c_char,
+        DebugInfoForProfiling: bool,
         llvm_selfprofiler: *mut c_void,
         begin_callback: SelfProfileBeforePassCallback,
         end_callback: SelfProfileAfterPassCallback,
diff --git a/compiler/rustc_codegen_ssa/src/back/linker.rs b/compiler/rustc_codegen_ssa/src/back/linker.rs
index e3b0eea0d89..429dc45d6a4 100644
--- a/compiler/rustc_codegen_ssa/src/back/linker.rs
+++ b/compiler/rustc_codegen_ssa/src/back/linker.rs
@@ -286,6 +286,9 @@ impl<'a> GccLinker<'a> {
             config::OptLevel::Aggressive => "O3",
         };
 
+        if let Some(path) = &self.sess.opts.debugging_opts.profile_sample_use {
+            self.linker_arg(&format!("-plugin-opt=sample-profile={}", path.display()));
+        };
         self.linker_arg(&format!("-plugin-opt={}", opt_level));
         self.linker_arg(&format!("-plugin-opt=mcpu={}", self.target_cpu));
     }
diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index 31722d07414..da34612ce76 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -83,6 +83,8 @@ pub struct ModuleConfig {
 
     pub pgo_gen: SwitchWithOptPath,
     pub pgo_use: Option<PathBuf>,
+    pub pgo_sample_use: Option<PathBuf>,
+    pub debug_info_for_profiling: bool,
     pub instrument_coverage: bool,
     pub instrument_gcov: bool,
 
@@ -176,6 +178,8 @@ impl ModuleConfig {
                 SwitchWithOptPath::Disabled
             ),
             pgo_use: if_regular!(sess.opts.cg.profile_use.clone(), None),
+            pgo_sample_use: if_regular!(sess.opts.debugging_opts.profile_sample_use.clone(), None),
+            debug_info_for_profiling: sess.opts.debugging_opts.debug_info_for_profiling,
             instrument_coverage: if_regular!(sess.instrument_coverage(), false),
             instrument_gcov: if_regular!(
                 // compiler_builtins overrides the codegen-units settings,
diff --git a/compiler/rustc_interface/src/tests.rs b/compiler/rustc_interface/src/tests.rs
index cfe13b1fd4e..844e5ab56a4 100644
--- a/compiler/rustc_interface/src/tests.rs
+++ b/compiler/rustc_interface/src/tests.rs
@@ -715,6 +715,7 @@ fn test_debugging_options_tracking_hash() {
     tracked!(chalk, true);
     tracked!(codegen_backend, Some("abc".to_string()));
     tracked!(crate_attr, vec!["abc".to_string()]);
+    tracked!(debug_info_for_profiling, true);
     tracked!(debug_macros, true);
     tracked!(dep_info_omit_d_target, true);
     tracked!(dual_proc_macros, true);
@@ -752,6 +753,7 @@ fn test_debugging_options_tracking_hash() {
     tracked!(profile, true);
     tracked!(profile_emit, Some(PathBuf::from("abc")));
     tracked!(profiler_runtime, "abc".to_string());
+    tracked!(profile_sample_use, Some(PathBuf::from("abc")));
     tracked!(relax_elf_relocations, Some(true));
     tracked!(relro_level, Some(RelroLevel::Full));
     tracked!(remap_cwd_prefix, Some(PathBuf::from("abc")));
diff --git a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp
index ddb5f7dcebf..87f423fb2d5 100644
--- a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp
+++ b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp
@@ -25,6 +25,7 @@
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
 #include "llvm/Transforms/IPO/AlwaysInliner.h"
 #include "llvm/Transforms/IPO/FunctionImport.h"
+#include "llvm/Transforms/Utils/AddDiscriminators.h"
 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
 #include "llvm/LTO/LTO.h"
 #include "llvm-c/Transforms/PassManagerBuilder.h"
@@ -39,6 +40,7 @@
 #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
 #include "llvm/Transforms/Utils/CanonicalizeAliases.h"
 #include "llvm/Transforms/Utils/NameAnonGlobals.h"
+#include "llvm/Transforms/Utils.h"
 
 using namespace llvm;
 
@@ -523,7 +525,7 @@ extern "C" void LLVMRustDisposeTargetMachine(LLVMTargetMachineRef TM) {
 extern "C" void LLVMRustConfigurePassManagerBuilder(
     LLVMPassManagerBuilderRef PMBR, LLVMRustCodeGenOptLevel OptLevel,
     bool MergeFunctions, bool SLPVectorize, bool LoopVectorize, bool PrepareForThinLTO,
-    const char* PGOGenPath, const char* PGOUsePath) {
+    const char* PGOGenPath, const char* PGOUsePath, const char* PGOSampleUsePath) {
   unwrap(PMBR)->MergeFunctions = MergeFunctions;
   unwrap(PMBR)->SLPVectorize = SLPVectorize;
   unwrap(PMBR)->OptLevel = fromRust(OptLevel);
@@ -531,13 +533,14 @@ extern "C" void LLVMRustConfigurePassManagerBuilder(
   unwrap(PMBR)->PrepareForThinLTO = PrepareForThinLTO;
 
   if (PGOGenPath) {
-    assert(!PGOUsePath);
+    assert(!PGOUsePath && !PGOSampleUsePath);
     unwrap(PMBR)->EnablePGOInstrGen = true;
     unwrap(PMBR)->PGOInstrGen = PGOGenPath;
-  }
-  if (PGOUsePath) {
-    assert(!PGOGenPath);
+  } else if (PGOUsePath) {
+    assert(!PGOSampleUsePath);
     unwrap(PMBR)->PGOInstrUse = PGOUsePath;
+  } else if (PGOSampleUsePath) {
+    unwrap(PMBR)->PGOSampleUse = PGOSampleUsePath;
   }
 }
 
@@ -759,6 +762,7 @@ LLVMRustOptimizeWithNewPassManager(
     LLVMRustSanitizerOptions *SanitizerOptions,
     const char *PGOGenPath, const char *PGOUsePath,
     bool InstrumentCoverage, bool InstrumentGCOV,
+    const char *PGOSampleUsePath, bool DebugInfoForProfiling,
     void* LlvmSelfProfiler,
     LLVMRustSelfProfileBeforePassCallback BeforePassCallback,
     LLVMRustSelfProfileAfterPassCallback AfterPassCallback,
@@ -797,11 +801,19 @@ LLVMRustOptimizeWithNewPassManager(
 
   Optional<PGOOptions> PGOOpt;
   if (PGOGenPath) {
-    assert(!PGOUsePath);
-    PGOOpt = PGOOptions(PGOGenPath, "", "", PGOOptions::IRInstr);
+    assert(!PGOUsePath && !PGOSampleUsePath);
+    PGOOpt = PGOOptions(PGOGenPath, "", "", PGOOptions::IRInstr,
+                        PGOOptions::NoCSAction, DebugInfoForProfiling);
   } else if (PGOUsePath) {
-    assert(!PGOGenPath);
-    PGOOpt = PGOOptions(PGOUsePath, "", "", PGOOptions::IRUse);
+    assert(!PGOSampleUsePath);
+    PGOOpt = PGOOptions(PGOUsePath, "", "", PGOOptions::IRUse,
+                        PGOOptions::NoCSAction, DebugInfoForProfiling);
+  } else if (PGOSampleUsePath) {
+    PGOOpt = PGOOptions(PGOSampleUsePath, "", "", PGOOptions::SampleUse,
+                        PGOOptions::NoCSAction, DebugInfoForProfiling);
+  } else if (DebugInfoForProfiling) {
+    PGOOpt = PGOOptions("", "", "", PGOOptions::NoAction,
+                        PGOOptions::NoCSAction, DebugInfoForProfiling);
   }
 
 #if LLVM_VERSION_GE(12, 0) && !LLVM_VERSION_GE(13,0)
diff --git a/compiler/rustc_session/src/config.rs b/compiler/rustc_session/src/config.rs
index 32aa035e1cd..ac4bce7350b 100644
--- a/compiler/rustc_session/src/config.rs
+++ b/compiler/rustc_session/src/config.rs
@@ -2009,6 +2009,15 @@ pub fn build_session_options(matches: &getopts::Matches) -> Options {
         );
     }
 
+    if debugging_opts.profile_sample_use.is_some()
+        && (cg.profile_generate.enabled() || cg.profile_use.is_some())
+    {
+        early_error(
+            error_format,
+            "option `-Z profile-sample-use` cannot be used with `-C profile-generate` or `-C profile-use`",
+        );
+    }
+
     if debugging_opts.instrument_coverage.is_some()
         && debugging_opts.instrument_coverage != Some(InstrumentCoverage::Off)
     {
diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs
index 8ecb7a031ad..b3d36b396c5 100644
--- a/compiler/rustc_session/src/options.rs
+++ b/compiler/rustc_session/src/options.rs
@@ -1040,6 +1040,8 @@ options! {
         "combine CGUs into a single one"),
     crate_attr: Vec<String> = (Vec::new(), parse_string_push, [TRACKED],
         "inject the given attribute in the crate"),
+    debug_info_for_profiling: bool = (false, parse_bool, [TRACKED],
+        "emit discriminators and other data necessary for AutoFDO"),
     debug_macros: bool = (false, parse_bool, [TRACKED],
         "emit line numbers debug info inside macros (default: no)"),
     deduplicate_diagnostics: bool = (true, parse_bool, [UNTRACKED],
@@ -1242,6 +1244,8 @@ options! {
         (default based on relative source path)"),
     profiler_runtime: String = (String::from("profiler_builtins"), parse_string, [TRACKED],
         "name of the profiler runtime crate to automatically inject (default: `profiler_builtins`)"),
+    profile_sample_use: Option<PathBuf> = (None, parse_opt_pathbuf, [TRACKED],
+        "use the given `.prof` file for sampled profile-guided optimization (also known as AutoFDO)"),
     query_dep_graph: bool = (false, parse_bool, [UNTRACKED],
         "enable queries of the dependency graph for regression testing (default: no)"),
     query_stats: bool = (false, parse_bool, [UNTRACKED],
diff --git a/compiler/rustc_session/src/session.rs b/compiler/rustc_session/src/session.rs
index bf04154a3da..b6ba6cc1dd6 100644
--- a/compiler/rustc_session/src/session.rs
+++ b/compiler/rustc_session/src/session.rs
@@ -1353,6 +1353,16 @@ fn validate_commandline_args_with_session_available(sess: &Session) {
         }
     }
 
+    // Do the same for sample profile data.
+    if let Some(ref path) = sess.opts.debugging_opts.profile_sample_use {
+        if !path.exists() {
+            sess.err(&format!(
+                "File `{}` passed to `-C profile-sample-use` does not exist.",
+                path.display()
+            ));
+        }
+    }
+
     // Unwind tables cannot be disabled if the target requires them.
     if let Some(include_uwtables) = sess.opts.cg.force_unwind_tables {
         if sess.target.requires_uwtable && !include_uwtables {
diff --git a/src/doc/unstable-book/src/compiler-flags/debug_info_for_profiling.md b/src/doc/unstable-book/src/compiler-flags/debug_info_for_profiling.md
new file mode 100644
index 00000000000..44bd3baeeed
--- /dev/null
+++ b/src/doc/unstable-book/src/compiler-flags/debug_info_for_profiling.md
@@ -0,0 +1,35 @@
+# `debug-info-for-profiling
+
+---
+
+## Introduction
+
+Automatic Feedback Directed Optimization (AFDO) is a method for using sampling
+based profiles to guide optimizations. This is contrasted with other methods of
+FDO or profile-guided optimization (PGO) which use instrumented profiling.
+
+Unlike PGO (controlled by the `rustc` flags `-Cprofile-generate` and
+`-Cprofile-use`), a binary being profiled does not perform significantly worse,
+and thus it's possible to profile binaries used in real workflows and not
+necessary to construct artificial workflows.
+
+## Use
+
+In order to use AFDO, the target platform must be Linux running on an `x86_64`
+architecture with the performance profiler `perf` available. In addition, the
+external tool `create_llvm_prof` from [this repository] must be used.
+
+Given a Rust file `main.rs`, we can produce an optimized binary as follows:
+
+```shell
+rustc -O -Zdebug-info-for-profiling main.rs -o main
+perf record -b ./main
+create_llvm_prof --binary=main --out=code.prof
+rustc -O -Zprofile-sample-use=code.prof main.rs -o main2
+```
+
+The `perf` command produces a profile `perf.data`, which is then used by the
+`create_llvm_prof` command to create `code.prof`. This final profile is then
+used by `rustc` to guide optimizations in producing the binary `main2`.
+
+[this repository]: https://github.com/google/autofdo
diff --git a/src/doc/unstable-book/src/compiler-flags/profile_sample_use.md b/src/doc/unstable-book/src/compiler-flags/profile_sample_use.md
new file mode 100644
index 00000000000..ce894ce6ac7
--- /dev/null
+++ b/src/doc/unstable-book/src/compiler-flags/profile_sample_use.md
@@ -0,0 +1,10 @@
+# `profile-sample-use
+
+---
+
+`-Zprofile-sample-use=code.prof` directs `rustc` to use the profile
+`code.prof` as a source for Automatic Feedback Directed Optimization (AFDO).
+See the documentation of [`-Zdebug-info-for-profiling`] for more information
+on using AFDO.
+
+[`-Zdebug-info-for-profiling`]: debug_info_for_profiling.html