From 335151f8bbadf31c2d8dae7d2a25dbcdab45a3b6 Mon Sep 17 00:00:00 2001 From: Manuel Drehwald Date: Sat, 1 Feb 2025 21:51:21 -0500 Subject: adding first autodiff test --- tests/codegen/autodiff.rs | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 tests/codegen/autodiff.rs (limited to 'tests/codegen') diff --git a/tests/codegen/autodiff.rs b/tests/codegen/autodiff.rs new file mode 100644 index 00000000000..96983fecf38 --- /dev/null +++ b/tests/codegen/autodiff.rs @@ -0,0 +1,33 @@ +//@ compile-flags: -C opt-level=3 -Clto=fat -Zllvm-plugins=/home/manuel/prog/rust-middle/build/x86_64-unknown-linux-gnu/enzyme/build/Enzyme/libEnzyme-19.so -Cpasses=enzyme +//@ no-prefer-dynamic +//@ needs-enzyme +#![feature(autodiff)] + +use std::autodiff::autodiff; + +#[autodiff(d_square, Reverse, Duplicated, Active)] +#[no_mangle] +fn square(x: &f64) -> f64 { + x * x +} + +// CHECK:define internal fastcc double @diffesquare(double %x.0.val, ptr nocapture align 8 %"x'" +// CHECK-NEXT:invertstart: +// CHECK-NEXT: %_0 = fmul double %x.0.val, %x.0.val +// CHECK-NEXT: %0 = fadd fast double %x.0.val, %x.0.val +// CHECK-NEXT: %1 = load double, ptr %"x'", align 8, !alias.scope !17816, !noalias !17819 +// CHECK-NEXT: %2 = fadd fast double %1, %0 +// CHECK-NEXT: store double %2, ptr %"x'", align 8, !alias.scope !17816, !noalias !17819 +// CHECK-NEXT: ret double %_0 +// CHECK-NEXT:} + +fn main() { + let x = 3.0; + let output = square(&x); + assert_eq!(9.0, output); + + let mut df_dx = 0.0; + let output_ = d_square(&x, &mut df_dx, 1.0); + assert_eq!(output, output_); + assert_eq!(6.0, df_dx); +} -- cgit 1.4.1-3-g733a5 From 6345b59e741d3fac0a54b18ba1db816062841602 Mon Sep 17 00:00:00 2001 From: Manuel Drehwald Date: Fri, 7 Feb 2025 17:31:19 -0500 Subject: simplify autodiff tests to run without local path --- tests/codegen/autodiff.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tests/codegen') diff --git a/tests/codegen/autodiff.rs b/tests/codegen/autodiff.rs index 96983fecf38..efc5ac77f1b 100644 --- a/tests/codegen/autodiff.rs +++ b/tests/codegen/autodiff.rs @@ -1,4 +1,4 @@ -//@ compile-flags: -C opt-level=3 -Clto=fat -Zllvm-plugins=/home/manuel/prog/rust-middle/build/x86_64-unknown-linux-gnu/enzyme/build/Enzyme/libEnzyme-19.so -Cpasses=enzyme +//@ compile-flags: -C opt-level=3 -Clto=fat //@ no-prefer-dynamic //@ needs-enzyme #![feature(autodiff)] -- cgit 1.4.1-3-g733a5 From 1221cff55149d2dbbf8761345799ef06f8099b97 Mon Sep 17 00:00:00 2001 From: Manuel Drehwald Date: Mon, 10 Feb 2025 01:35:22 -0500 Subject: move second opt run to lto phase and cleanup code --- compiler/rustc_codegen_llvm/src/back/lto.rs | 25 +++++++++++-- compiler/rustc_codegen_llvm/src/back/write.rs | 41 +++++++++++++--------- .../rustc_codegen_llvm/src/builder/autodiff.rs | 35 ++++-------------- compiler/rustc_llvm/build.rs | 4 +++ compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp | 11 ++++-- src/bootstrap/src/core/build_steps/compile.rs | 9 +++-- tests/codegen/autodiff.rs | 4 +-- 7 files changed, 75 insertions(+), 54 deletions(-) (limited to 'tests/codegen') diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs index 78c759bbe8c..8bad437eeb7 100644 --- a/compiler/rustc_codegen_llvm/src/back/lto.rs +++ b/compiler/rustc_codegen_llvm/src/back/lto.rs @@ -606,10 +606,31 @@ pub(crate) fn run_pass_manager( // If this rustc version was build with enzyme/autodiff enabled, and if users applied the // `#[autodiff]` macro at least once, then we will later call llvm_optimize a second time. - let first_run = true; debug!("running llvm pm opt pipeline"); unsafe { - write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, first_run)?; + write::llvm_optimize( + cgcx, + dcx, + module, + config, + opt_level, + opt_stage, + write::AutodiffStage::DuringAD, + )?; + } + // FIXME(ZuseZ4): Make this more granular + if cfg!(llvm_enzyme) && !thin { + unsafe { + write::llvm_optimize( + cgcx, + dcx, + module, + config, + opt_level, + llvm::OptStage::FatLTO, + write::AutodiffStage::PostAD, + )?; + } } debug!("lto done"); Ok(()) diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs index 155c07ff635..ae4c4d5876e 100644 --- a/compiler/rustc_codegen_llvm/src/back/write.rs +++ b/compiler/rustc_codegen_llvm/src/back/write.rs @@ -530,6 +530,16 @@ fn get_instr_profile_output_path(config: &ModuleConfig) -> Option { config.instrument_coverage.then(|| c"default_%m_%p.profraw".to_owned()) } +// PreAD will run llvm opts but disable size increasing opts (vectorization, loop unrolling) +// DuringAD is the same as above, but also runs the enzyme opt and autodiff passes. +// PostAD will run all opts, including size increasing opts. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum AutodiffStage { + PreAD, + DuringAD, + PostAD, +} + pub(crate) unsafe fn llvm_optimize( cgcx: &CodegenContext, dcx: DiagCtxtHandle<'_>, @@ -537,7 +547,7 @@ pub(crate) unsafe fn llvm_optimize( config: &ModuleConfig, opt_level: config::OptLevel, opt_stage: llvm::OptStage, - skip_size_increasing_opts: bool, + autodiff_stage: AutodiffStage, ) -> Result<(), FatalError> { // Enzyme: // The whole point of compiler based AD is to differentiate optimized IR instead of unoptimized @@ -550,13 +560,16 @@ pub(crate) unsafe fn llvm_optimize( let unroll_loops; let vectorize_slp; let vectorize_loop; + let run_enzyme = cfg!(llvm_enzyme) && autodiff_stage == AutodiffStage::DuringAD; - let run_enzyme = cfg!(llvm_enzyme); // When we build rustc with enzyme/autodiff support, we want to postpone size-increasing - // optimizations until after differentiation. FIXME(ZuseZ4): Before shipping on nightly, + // optimizations until after differentiation. Our pipeline is thus: (opt + enzyme), (full opt). + // We therefore have two calls to llvm_optimize, if autodiff is used. + // + // FIXME(ZuseZ4): Before shipping on nightly, // we should make this more granular, or at least check that the user has at least one autodiff // call in their code, to justify altering the compilation pipeline. - if skip_size_increasing_opts && run_enzyme { + if cfg!(llvm_enzyme) && autodiff_stage != AutodiffStage::PostAD { unroll_loops = false; vectorize_slp = false; vectorize_loop = false; @@ -566,7 +579,7 @@ pub(crate) unsafe fn llvm_optimize( vectorize_slp = config.vectorize_slp; vectorize_loop = config.vectorize_loop; } - trace!(?unroll_loops, ?vectorize_slp, ?vectorize_loop); + trace!(?unroll_loops, ?vectorize_slp, ?vectorize_loop, ?run_enzyme); let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed(); let pgo_gen_path = get_pgo_gen_path(config); let pgo_use_path = get_pgo_use_path(config); @@ -686,18 +699,14 @@ pub(crate) unsafe fn optimize( _ => llvm::OptStage::PreLinkNoLTO, }; - // If we know that we will later run AD, then we disable vectorization and loop unrolling - let skip_size_increasing_opts = cfg!(llvm_enzyme); + // If we know that we will later run AD, then we disable vectorization and loop unrolling. + // Otherwise we pretend AD is already done and run the normal opt pipeline (=PostAD). + // FIXME(ZuseZ4): Make this more granular, only set PreAD if we actually have autodiff + // usages, not just if we build rustc with autodiff support. + let autodiff_stage = + if cfg!(llvm_enzyme) { AutodiffStage::PreAD } else { AutodiffStage::PostAD }; return unsafe { - llvm_optimize( - cgcx, - dcx, - module, - config, - opt_level, - opt_stage, - skip_size_increasing_opts, - ) + llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, autodiff_stage) }; } Ok(()) diff --git a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs index 474b0940203..f82449479a8 100644 --- a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs +++ b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs @@ -4,10 +4,9 @@ use rustc_ast::expand::autodiff_attrs::{AutoDiffAttrs, AutoDiffItem, DiffActivit use rustc_codegen_ssa::ModuleCodegen; use rustc_codegen_ssa::back::write::ModuleConfig; use rustc_errors::FatalError; -use rustc_session::config::Lto; use tracing::{debug, trace}; -use crate::back::write::{llvm_err, llvm_optimize}; +use crate::back::write::llvm_err; use crate::builder::SBuilder; use crate::context::SimpleCx; use crate::declare::declare_simple_fn; @@ -153,7 +152,7 @@ fn generate_enzyme_call<'ll>( _ => {} } - trace!("matching autodiff arguments"); + debug!("matching autodiff arguments"); // We now handle the issue that Rust level arguments not always match the llvm-ir level // arguments. A slice, `&[f32]`, for example, is represented as a pointer and a length on // llvm-ir level. The number of activities matches the number of Rust level arguments, so we @@ -222,7 +221,10 @@ fn generate_enzyme_call<'ll>( // A duplicated pointer will have the following two outer_fn arguments: // (..., ptr, ptr, ...). We add the following llvm-ir to our __enzyme call: // (..., metadata! enzyme_dup, ptr, ptr, ...). - if matches!(diff_activity, DiffActivity::Duplicated | DiffActivity::DuplicatedOnly) { + if matches!( + diff_activity, + DiffActivity::Duplicated | DiffActivity::DuplicatedOnly + ) { assert!( llvm::LLVMRustGetTypeKind(next_outer_ty) == llvm::TypeKind::Pointer ); @@ -282,7 +284,7 @@ pub(crate) fn differentiate<'ll>( module: &'ll ModuleCodegen, cgcx: &CodegenContext, diff_items: Vec, - config: &ModuleConfig, + _config: &ModuleConfig, ) -> Result<(), FatalError> { for item in &diff_items { trace!("{}", item); @@ -317,29 +319,6 @@ pub(crate) fn differentiate<'ll>( // FIXME(ZuseZ4): support SanitizeHWAddress and prevent illegal/unsupported opts - if let Some(opt_level) = config.opt_level { - let opt_stage = match cgcx.lto { - Lto::Fat => llvm::OptStage::PreLinkFatLTO, - Lto::Thin | Lto::ThinLocal => llvm::OptStage::PreLinkThinLTO, - _ if cgcx.opts.cg.linker_plugin_lto.enabled() => llvm::OptStage::PreLinkThinLTO, - _ => llvm::OptStage::PreLinkNoLTO, - }; - // This is our second opt call, so now we run all opts, - // to make sure we get the best performance. - let skip_size_increasing_opts = false; - trace!("running Module Optimization after differentiation"); - unsafe { - llvm_optimize( - cgcx, - diag_handler.handle(), - module, - config, - opt_level, - opt_stage, - skip_size_increasing_opts, - )? - }; - } trace!("done with differentiate()"); Ok(()) diff --git a/compiler/rustc_llvm/build.rs b/compiler/rustc_llvm/build.rs index d9d28299413..48806888b43 100644 --- a/compiler/rustc_llvm/build.rs +++ b/compiler/rustc_llvm/build.rs @@ -193,6 +193,10 @@ fn main() { cfg.define(&flag, None); } + if tracked_env_var_os("LLVM_ENZYME").is_some() { + cfg.define("ENZYME", None); + } + if tracked_env_var_os("LLVM_RUSTLLVM").is_some() { cfg.define("LLVM_RUSTLLVM", None); } diff --git a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp index 0e591786d91..a6b2384f2d7 100644 --- a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp +++ b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp @@ -689,7 +689,9 @@ struct LLVMRustSanitizerOptions { }; // This symbol won't be available or used when Enzyme is not enabled -extern "C" void registerEnzyme(llvm::PassBuilder &PB) __attribute__((weak)); +#ifdef ENZYME +extern "C" void registerEnzyme(llvm::PassBuilder &PB); +#endif extern "C" LLVMRustResult LLVMRustOptimize( LLVMModuleRef ModuleRef, LLVMTargetMachineRef TMRef, @@ -697,8 +699,9 @@ extern "C" LLVMRustResult LLVMRustOptimize( bool IsLinkerPluginLTO, bool NoPrepopulatePasses, bool VerifyIR, bool LintIR, bool UseThinLTOBuffers, bool MergeFunctions, bool UnrollLoops, bool SLPVectorize, bool LoopVectorize, bool DisableSimplifyLibCalls, - bool EmitLifetimeMarkers, bool RunEnzyme, LLVMRustSanitizerOptions *SanitizerOptions, - const char *PGOGenPath, const char *PGOUsePath, bool InstrumentCoverage, + bool EmitLifetimeMarkers, bool RunEnzyme, + LLVMRustSanitizerOptions *SanitizerOptions, const char *PGOGenPath, + const char *PGOUsePath, bool InstrumentCoverage, const char *InstrProfileOutput, const char *PGOSampleUsePath, bool DebugInfoForProfiling, void *LlvmSelfProfiler, LLVMRustSelfProfileBeforePassCallback BeforePassCallback, @@ -1014,6 +1017,7 @@ extern "C" LLVMRustResult LLVMRustOptimize( } // now load "-enzyme" pass: +#ifdef ENZYME if (RunEnzyme) { registerEnzyme(PB); if (auto Err = PB.parsePassPipeline(MPM, "enzyme")) { @@ -1022,6 +1026,7 @@ extern "C" LLVMRustResult LLVMRustOptimize( return LLVMRustResult::Failure; } } +#endif // Upgrade all calls to old intrinsics first. for (Module::iterator I = TheModule->begin(), E = TheModule->end(); I != E;) diff --git a/src/bootstrap/src/core/build_steps/compile.rs b/src/bootstrap/src/core/build_steps/compile.rs index 4e360e94fd6..308d4723d98 100644 --- a/src/bootstrap/src/core/build_steps/compile.rs +++ b/src/bootstrap/src/core/build_steps/compile.rs @@ -1049,9 +1049,9 @@ pub fn rustc_cargo( // . cargo.rustflag("-Zon-broken-pipe=kill"); - // We temporarily disable linking here as part of some refactoring. - // This way, people can manually use -Z llvm-plugins and -C passes=enzyme for now. - // In a follow-up PR, we will re-enable linking here and load the pass for them. + // We want to link against registerEnzyme and in the future we want to use additional + // functionality from Enzyme core. For that we need to link against Enzyme. + // FIXME(ZuseZ4): Get the LLVM version number automatically instead of hardcoding it. if builder.config.llvm_enzyme { cargo.rustflag("-l").rustflag("Enzyme-19"); } @@ -1234,6 +1234,9 @@ fn rustc_llvm_env(builder: &Builder<'_>, cargo: &mut Cargo, target: TargetSelect if builder.is_rust_llvm(target) { cargo.env("LLVM_RUSTLLVM", "1"); } + if builder.config.llvm_enzyme { + cargo.env("LLVM_ENZYME", "1"); + } let llvm::LlvmResult { llvm_config, .. } = builder.ensure(llvm::Llvm { target }); cargo.env("LLVM_CONFIG", &llvm_config); diff --git a/tests/codegen/autodiff.rs b/tests/codegen/autodiff.rs index efc5ac77f1b..abf7fcf3e4b 100644 --- a/tests/codegen/autodiff.rs +++ b/tests/codegen/autodiff.rs @@ -15,9 +15,9 @@ fn square(x: &f64) -> f64 { // CHECK-NEXT:invertstart: // CHECK-NEXT: %_0 = fmul double %x.0.val, %x.0.val // CHECK-NEXT: %0 = fadd fast double %x.0.val, %x.0.val -// CHECK-NEXT: %1 = load double, ptr %"x'", align 8, !alias.scope !17816, !noalias !17819 +// CHECK-NEXT: %1 = load double, ptr %"x'", align 8 // CHECK-NEXT: %2 = fadd fast double %1, %0 -// CHECK-NEXT: store double %2, ptr %"x'", align 8, !alias.scope !17816, !noalias !17819 +// CHECK-NEXT: store double %2, ptr %"x'", align 8 // CHECK-NEXT: ret double %_0 // CHECK-NEXT:} -- cgit 1.4.1-3-g733a5