diff options
| author | Ralf Jung <post@ralfj.de> | 2025-04-07 07:18:31 +0000 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-04-07 07:18:31 +0000 |
| commit | f6e4d3bc5a22268292b620a570d47df2c204f9c6 (patch) | |
| tree | b451ad6d5c40298bbbf4b6bcd2c15e8776c1ccf8 /compiler/rustc_codegen_llvm/src | |
| parent | 1a4e85a4e6ce99e4adf8f043fb9f2aa6b2e7ca94 (diff) | |
| parent | 6ac0120a60d543d9557d2affd334ba02704c9fb8 (diff) | |
| download | rust-f6e4d3bc5a22268292b620a570d47df2c204f9c6.tar.gz rust-f6e4d3bc5a22268292b620a570d47df2c204f9c6.zip | |
Merge pull request #4252 from rust-lang/rustup-2025-04-07
Automatic Rustup
Diffstat (limited to 'compiler/rustc_codegen_llvm/src')
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/abi.rs | 17 | ||||
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/attributes.rs | 50 | ||||
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/back/lto.rs | 12 | ||||
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/back/write.rs | 6 | ||||
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/builder.rs | 30 | ||||
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/builder/autodiff.rs | 199 | ||||
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/consts.rs | 2 | ||||
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/context.rs | 62 | ||||
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/errors.rs | 4 | ||||
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs | 4 | ||||
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/llvm/ffi.rs | 2 | ||||
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/llvm_util.rs | 17 | ||||
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/mono_item.rs | 2 | ||||
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/va_arg.rs | 2 |
14 files changed, 254 insertions, 155 deletions
diff --git a/compiler/rustc_codegen_llvm/src/abi.rs b/compiler/rustc_codegen_llvm/src/abi.rs index 71059338151..8294e29d07d 100644 --- a/compiler/rustc_codegen_llvm/src/abi.rs +++ b/compiler/rustc_codegen_llvm/src/abi.rs @@ -17,14 +17,13 @@ use rustc_target::callconv::{ use rustc_target::spec::SanitizerSet; use smallvec::SmallVec; -use crate::attributes::llfn_attrs_from_instance; +use crate::attributes::{self, llfn_attrs_from_instance}; use crate::builder::Builder; use crate::context::CodegenCx; use crate::llvm::{self, Attribute, AttributePlace}; use crate::type_::Type; use crate::type_of::LayoutLlvmExt; use crate::value::Value; -use crate::{attributes, llvm_util}; trait ArgAttributesExt { fn apply_attrs_to_llfn(&self, idx: AttributePlace, cx: &CodegenCx<'_, '_>, llfn: &Value); @@ -437,7 +436,6 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> { let apply_range_attr = |idx: AttributePlace, scalar: rustc_abi::Scalar| { if cx.sess().opts.optimize != config::OptLevel::No - && llvm_util::get_version() >= (19, 0, 0) && matches!(scalar.primitive(), Primitive::Int(..)) // If the value is a boolean, the range is 0..2 and that ultimately // become 0..0 when the type becomes i1, which would be rejected @@ -571,19 +569,6 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> { } _ => {} } - if bx.cx.sess().opts.optimize != config::OptLevel::No - && llvm_util::get_version() < (19, 0, 0) - && let BackendRepr::Scalar(scalar) = self.ret.layout.backend_repr - && matches!(scalar.primitive(), Primitive::Int(..)) - // If the value is a boolean, the range is 0..2 and that ultimately - // become 0..0 when the type becomes i1, which would be rejected - // by the LLVM verifier. - && !scalar.is_bool() - // LLVM also rejects full range. - && !scalar.is_always_valid(bx) - { - bx.range_metadata(callsite, scalar.valid_range(bx)); - } for arg in self.args.iter() { match &arg.mode { PassMode::Ignore => {} diff --git a/compiler/rustc_codegen_llvm/src/attributes.rs b/compiler/rustc_codegen_llvm/src/attributes.rs index 3d7afa17bdf..e8c42d16733 100644 --- a/compiler/rustc_codegen_llvm/src/attributes.rs +++ b/compiler/rustc_codegen_llvm/src/attributes.rs @@ -407,30 +407,28 @@ pub(crate) fn llfn_attrs_from_instance<'ll, 'tcx>( // Do not set sanitizer attributes for naked functions. to_add.extend(sanitize_attrs(cx, codegen_fn_attrs.no_sanitize)); - if llvm_util::get_version() >= (19, 0, 0) { - // For non-naked functions, set branch protection attributes on aarch64. - if let Some(BranchProtection { bti, pac_ret }) = - cx.sess().opts.unstable_opts.branch_protection - { - assert!(cx.sess().target.arch == "aarch64"); - if bti { - to_add.push(llvm::CreateAttrString(cx.llcx, "branch-target-enforcement")); - } - if let Some(PacRet { leaf, pc, key }) = pac_ret { - if pc { - to_add.push(llvm::CreateAttrString(cx.llcx, "branch-protection-pauth-lr")); - } - to_add.push(llvm::CreateAttrStringValue( - cx.llcx, - "sign-return-address", - if leaf { "all" } else { "non-leaf" }, - )); - to_add.push(llvm::CreateAttrStringValue( - cx.llcx, - "sign-return-address-key", - if key == PAuthKey::A { "a_key" } else { "b_key" }, - )); + // For non-naked functions, set branch protection attributes on aarch64. + if let Some(BranchProtection { bti, pac_ret }) = + cx.sess().opts.unstable_opts.branch_protection + { + assert!(cx.sess().target.arch == "aarch64"); + if bti { + to_add.push(llvm::CreateAttrString(cx.llcx, "branch-target-enforcement")); + } + if let Some(PacRet { leaf, pc, key }) = pac_ret { + if pc { + to_add.push(llvm::CreateAttrString(cx.llcx, "branch-protection-pauth-lr")); } + to_add.push(llvm::CreateAttrStringValue( + cx.llcx, + "sign-return-address", + if leaf { "all" } else { "non-leaf" }, + )); + to_add.push(llvm::CreateAttrStringValue( + cx.llcx, + "sign-return-address-key", + if key == PAuthKey::A { "a_key" } else { "b_key" }, + )); } } } @@ -510,12 +508,6 @@ pub(crate) fn llfn_attrs_from_instance<'ll, 'tcx>( InstructionSetAttr::ArmA32 => "-thumb-mode".to_string(), InstructionSetAttr::ArmT32 => "+thumb-mode".to_string(), })) - // HACK: LLVM versions 19+ do not have the FPMR feature and treat it as always enabled - // It only exists as a feature in LLVM 18, cannot be passed down for any other version - .chain(match &*cx.tcx.sess.target.arch { - "aarch64" if llvm_util::get_version().0 == 18 => vec!["+fpmr".to_string()], - _ => vec![], - }) .collect::<Vec<String>>(); if cx.tcx.sess.target.is_like_wasm { diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs index f083cfbd7d3..a8b49e9552c 100644 --- a/compiler/rustc_codegen_llvm/src/back/lto.rs +++ b/compiler/rustc_codegen_llvm/src/back/lto.rs @@ -610,6 +610,8 @@ fn enable_autodiff_settings(ad: &[config::AutoDiff], module: &mut ModuleCodegen< } // We handle this below config::AutoDiff::PrintModAfter => {} + // We handle this below + config::AutoDiff::PrintModFinal => {} // This is required and already checked config::AutoDiff::Enable => {} } @@ -657,14 +659,20 @@ pub(crate) fn run_pass_manager( } if cfg!(llvm_enzyme) && enable_ad { + // This is the post-autodiff IR, mainly used for testing and educational purposes. + if config.autodiff.contains(&config::AutoDiff::PrintModAfter) { + unsafe { llvm::LLVMDumpModule(module.module_llvm.llmod()) }; + } + let opt_stage = llvm::OptStage::FatLTO; let stage = write::AutodiffStage::PostAD; unsafe { write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage)?; } - // This is the final IR, so people should be able to inspect the optimized autodiff output. - if config.autodiff.contains(&config::AutoDiff::PrintModAfter) { + // This is the final IR, so people should be able to inspect the optimized autodiff output, + // for manual inspection. + if config.autodiff.contains(&config::AutoDiff::PrintModFinal) { unsafe { llvm::LLVMDumpModule(module.module_llvm.llmod()) }; } } diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs index bead4c82a81..bf6138142b6 100644 --- a/compiler/rustc_codegen_llvm/src/back/write.rs +++ b/compiler/rustc_codegen_llvm/src/back/write.rs @@ -1024,7 +1024,7 @@ fn create_section_with_flags_asm(section_name: &str, section_flags: &str, data: } pub(crate) fn bitcode_section_name(cgcx: &CodegenContext<LlvmCodegenBackend>) -> &'static CStr { - if cgcx.target_is_like_osx { + if cgcx.target_is_like_darwin { c"__LLVM,__bitcode" } else if cgcx.target_is_like_aix { c".ipa" @@ -1077,7 +1077,7 @@ unsafe fn embed_bitcode( // and COFF we emit the sections using module level inline assembly for that // reason (see issue #90326 for historical background). unsafe { - if cgcx.target_is_like_osx + if cgcx.target_is_like_darwin || cgcx.target_is_like_aix || cgcx.target_arch == "wasm32" || cgcx.target_arch == "wasm64" @@ -1096,7 +1096,7 @@ unsafe fn embed_bitcode( let llglobal = llvm::add_global(llmod, common::val_ty(llconst), c"rustc.embedded.cmdline"); llvm::set_initializer(llglobal, llconst); - let section = if cgcx.target_is_like_osx { + let section = if cgcx.target_is_like_darwin { c"__LLVM,__cmdline" } else if cgcx.target_is_like_aix { c".info" diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs index 297f104d124..35134e9f5a0 100644 --- a/compiler/rustc_codegen_llvm/src/builder.rs +++ b/compiler/rustc_codegen_llvm/src/builder.rs @@ -30,6 +30,7 @@ use smallvec::SmallVec; use tracing::{debug, instrument}; use crate::abi::FnAbiLlvmExt; +use crate::attributes; use crate::common::Funclet; use crate::context::{CodegenCx, FullCx, GenericCx, SCx}; use crate::llvm::{ @@ -38,7 +39,6 @@ use crate::llvm::{ use crate::type_::Type; use crate::type_of::LayoutLlvmExt; use crate::value::Value; -use crate::{attributes, llvm_util}; #[must_use] pub(crate) struct GenericBuilder<'a, 'll, CX: Borrow<SCx<'ll>>> { @@ -927,11 +927,9 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { debug_assert_ne!(self.val_ty(val), dest_ty); let trunc = self.trunc(val, dest_ty); - if llvm_util::get_version() >= (19, 0, 0) { - unsafe { - if llvm::LLVMIsAInstruction(trunc).is_some() { - llvm::LLVMSetNUW(trunc, True); - } + unsafe { + if llvm::LLVMIsAInstruction(trunc).is_some() { + llvm::LLVMSetNUW(trunc, True); } } trunc @@ -941,11 +939,9 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { debug_assert_ne!(self.val_ty(val), dest_ty); let trunc = self.trunc(val, dest_ty); - if llvm_util::get_version() >= (19, 0, 0) { - unsafe { - if llvm::LLVMIsAInstruction(trunc).is_some() { - llvm::LLVMSetNSW(trunc, True); - } + unsafe { + if llvm::LLVMIsAInstruction(trunc).is_some() { + llvm::LLVMSetNSW(trunc, True); } } trunc @@ -1899,10 +1895,6 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> { hash: &'ll Value, bitmap_bits: &'ll Value, ) { - assert!( - crate::llvm_util::get_version() >= (19, 0, 0), - "MCDC intrinsics require LLVM 19 or later" - ); self.call_intrinsic("llvm.instrprof.mcdc.parameters", &[fn_name, hash, bitmap_bits]); } @@ -1914,10 +1906,6 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> { bitmap_index: &'ll Value, mcdc_temp: &'ll Value, ) { - assert!( - crate::llvm_util::get_version() >= (19, 0, 0), - "MCDC intrinsics require LLVM 19 or later" - ); let args = &[fn_name, hash, bitmap_index, mcdc_temp]; self.call_intrinsic("llvm.instrprof.mcdc.tvbitmap.update", args); } @@ -1929,10 +1917,6 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> { #[instrument(level = "debug", skip(self))] pub(crate) fn mcdc_condbitmap_update(&mut self, cond_index: &'ll Value, mcdc_temp: &'ll Value) { - assert!( - crate::llvm_util::get_version() >= (19, 0, 0), - "MCDC intrinsics require LLVM 19 or later" - ); let align = self.tcx.data_layout.i32_align.abi; let current_tv_index = self.load(self.cx.type_i32(), mcdc_temp, align); let new_tv_index = self.add(current_tv_index, cond_index); diff --git a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs index 7cd4ee539d8..7d264ba4d00 100644 --- a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs +++ b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs @@ -3,8 +3,10 @@ use std::ptr; use rustc_ast::expand::autodiff_attrs::{AutoDiffAttrs, AutoDiffItem, DiffActivity, DiffMode}; use rustc_codegen_ssa::ModuleCodegen; use rustc_codegen_ssa::back::write::ModuleConfig; -use rustc_codegen_ssa::traits::BaseTypeCodegenMethods as _; +use rustc_codegen_ssa::common::TypeKind; +use rustc_codegen_ssa::traits::BaseTypeCodegenMethods; use rustc_errors::FatalError; +use rustc_middle::bug; use tracing::{debug, trace}; use crate::back::write::llvm_err; @@ -18,21 +20,42 @@ use crate::value::Value; use crate::{CodegenContext, LlvmCodegenBackend, ModuleLlvm, attributes, llvm}; fn get_params(fnc: &Value) -> Vec<&Value> { + let param_num = llvm::LLVMCountParams(fnc) as usize; + let mut fnc_args: Vec<&Value> = vec![]; + fnc_args.reserve(param_num); unsafe { - let param_num = llvm::LLVMCountParams(fnc) as usize; - let mut fnc_args: Vec<&Value> = vec![]; - fnc_args.reserve(param_num); llvm::LLVMGetParams(fnc, fnc_args.as_mut_ptr()); fnc_args.set_len(param_num); - fnc_args } + fnc_args } +fn has_sret(fnc: &Value) -> bool { + let num_args = llvm::LLVMCountParams(fnc) as usize; + if num_args == 0 { + false + } else { + unsafe { llvm::LLVMRustHasAttributeAtIndex(fnc, 0, llvm::AttributeKind::StructRet) } + } +} + +// When we call the `__enzyme_autodiff` or `__enzyme_fwddiff` function, we need to pass all the +// original inputs, as well as metadata and the additional shadow arguments. +// This function matches the arguments from the outer function to the inner enzyme call. +// +// This function also considers that Rust level arguments not always match the llvm-ir level +// arguments. A slice, `&[f32]`, for example, is represented as a pointer and a length on +// llvm-ir level. The number of activities matches the number of Rust level arguments, so we +// need to match those. +// FIXME(ZuseZ4): This logic is a bit more complicated than it should be, can we simplify it +// using iterators and peek()? fn match_args_from_caller_to_enzyme<'ll>( cx: &SimpleCx<'ll>, + width: u32, args: &mut Vec<&'ll llvm::Value>, inputs: &[DiffActivity], outer_args: &[&'ll llvm::Value], + has_sret: bool, ) { debug!("matching autodiff arguments"); // We now handle the issue that Rust level arguments not always match the llvm-ir level @@ -44,6 +67,14 @@ fn match_args_from_caller_to_enzyme<'ll>( let mut outer_pos: usize = 0; let mut activity_pos = 0; + if has_sret { + // Then the first outer arg is the sret pointer. Enzyme doesn't know about sret, so the + // inner function will still return something. We increase our outer_pos by one, + // and once we're done with all other args we will take the return of the inner call and + // update the sret pointer with it + outer_pos = 1; + } + let enzyme_const = cx.create_metadata("enzyme_const".to_string()).unwrap(); let enzyme_out = cx.create_metadata("enzyme_out".to_string()).unwrap(); let enzyme_dup = cx.create_metadata("enzyme_dup".to_string()).unwrap(); @@ -92,23 +123,20 @@ fn match_args_from_caller_to_enzyme<'ll>( // (..., metadata! enzyme_dup, ptr, ptr, int1, ...). // FIXME(ZuseZ4): We will upstream a safety check later which asserts that // int2 >= int1, which means the shadow vector is large enough to store the gradient. - assert!(unsafe { - llvm::LLVMRustGetTypeKind(next_outer_ty) == llvm::TypeKind::Integer - }); - let next_outer_arg2 = outer_args[outer_pos + 2]; - let next_outer_ty2 = cx.val_ty(next_outer_arg2); - assert!(unsafe { - llvm::LLVMRustGetTypeKind(next_outer_ty2) == llvm::TypeKind::Pointer - }); - let next_outer_arg3 = outer_args[outer_pos + 3]; - let next_outer_ty3 = cx.val_ty(next_outer_arg3); - assert!(unsafe { - llvm::LLVMRustGetTypeKind(next_outer_ty3) == llvm::TypeKind::Integer - }); - args.push(next_outer_arg2); + assert_eq!(cx.type_kind(next_outer_ty), TypeKind::Integer); + + for i in 0..(width as usize) { + let next_outer_arg2 = outer_args[outer_pos + 2 * (i + 1)]; + let next_outer_ty2 = cx.val_ty(next_outer_arg2); + assert_eq!(cx.type_kind(next_outer_ty2), TypeKind::Pointer); + let next_outer_arg3 = outer_args[outer_pos + 2 * (i + 1) + 1]; + let next_outer_ty3 = cx.val_ty(next_outer_arg3); + assert_eq!(cx.type_kind(next_outer_ty3), TypeKind::Integer); + args.push(next_outer_arg2); + } args.push(cx.get_metadata_value(enzyme_const)); args.push(next_outer_arg); - outer_pos += 4; + outer_pos += 2 + 2 * width as usize; activity_pos += 2; } else { // A duplicated pointer will have the following two outer_fn arguments: @@ -116,15 +144,19 @@ fn match_args_from_caller_to_enzyme<'ll>( // (..., metadata! enzyme_dup, ptr, ptr, ...). if matches!(diff_activity, DiffActivity::Duplicated | DiffActivity::DuplicatedOnly) { - assert!( - unsafe { llvm::LLVMRustGetTypeKind(next_outer_ty) } - == llvm::TypeKind::Pointer - ); + assert_eq!(cx.type_kind(next_outer_ty), TypeKind::Pointer); } // In the case of Dual we don't have assumptions, e.g. f32 would be valid. args.push(next_outer_arg); outer_pos += 2; activity_pos += 1; + + // Now, if width > 1, we need to account for that + for _ in 1..width { + let next_outer_arg = outer_args[outer_pos]; + args.push(next_outer_arg); + outer_pos += 1; + } } } else { // We do not differentiate with resprect to this argument. @@ -135,6 +167,76 @@ fn match_args_from_caller_to_enzyme<'ll>( } } +// On LLVM-IR, we can luckily declare __enzyme_ functions without specifying the input +// arguments. We do however need to declare them with their correct return type. +// We already figured the correct return type out in our frontend, when generating the outer_fn, +// so we can now just go ahead and use that. This is not always trivial, e.g. because sret. +// Beyond sret, this article describes our challenges nicely: +// <https://yorickpeterse.com/articles/the-mess-that-is-handling-structure-arguments-and-returns-in-llvm/> +// I.e. (i32, f32) will get merged into i64, but we don't handle that yet. +fn compute_enzyme_fn_ty<'ll>( + cx: &SimpleCx<'ll>, + attrs: &AutoDiffAttrs, + fn_to_diff: &'ll Value, + outer_fn: &'ll Value, +) -> &'ll llvm::Type { + let fn_ty = cx.get_type_of_global(outer_fn); + let mut ret_ty = cx.get_return_type(fn_ty); + + let has_sret = has_sret(outer_fn); + + if has_sret { + // Now we don't just forward the return type, so we have to figure it out based on the + // primal return type, in combination with the autodiff settings. + let fn_ty = cx.get_type_of_global(fn_to_diff); + let inner_ret_ty = cx.get_return_type(fn_ty); + + let void_ty = unsafe { llvm::LLVMVoidTypeInContext(cx.llcx) }; + if inner_ret_ty == void_ty { + // This indicates that even the inner function has an sret. + // Right now I only look for an sret in the outer function. + // This *probably* needs some extra handling, but I never ran + // into such a case. So I'll wait for user reports to have a test case. + bug!("sret in inner function"); + } + + if attrs.width == 1 { + todo!("Handle sret for scalar ad"); + } else { + // First we check if we also have to deal with the primal return. + match attrs.mode { + DiffMode::Forward => match attrs.ret_activity { + DiffActivity::Dual => { + let arr_ty = + unsafe { llvm::LLVMArrayType2(inner_ret_ty, attrs.width as u64 + 1) }; + ret_ty = arr_ty; + } + DiffActivity::DualOnly => { + let arr_ty = + unsafe { llvm::LLVMArrayType2(inner_ret_ty, attrs.width as u64) }; + ret_ty = arr_ty; + } + DiffActivity::Const => { + todo!("Not sure, do we need to do something here?"); + } + _ => { + bug!("unreachable"); + } + }, + DiffMode::Reverse => { + todo!("Handle sret for reverse mode"); + } + _ => { + bug!("unreachable"); + } + } + } + } + + // LLVM can figure out the input types on it's own, so we take a shortcut here. + unsafe { llvm::LLVMFunctionType(ret_ty, ptr::null(), 0, True) } +} + /// When differentiating `fn_to_diff`, take a `outer_fn` and generate another /// function with expected naming and calling conventions[^1] which will be /// discovered by the enzyme LLVM pass and its body populated with the differentiated @@ -197,17 +299,9 @@ fn generate_enzyme_call<'ll>( // } // ``` unsafe { - // On LLVM-IR, we can luckily declare __enzyme_ functions without specifying the input - // arguments. We do however need to declare them with their correct return type. - // We already figured the correct return type out in our frontend, when generating the outer_fn, - // so we can now just go ahead and use that. FIXME(ZuseZ4): This doesn't handle sret yet. - let fn_ty = llvm::LLVMGlobalGetValueType(outer_fn); - let ret_ty = llvm::LLVMGetReturnType(fn_ty); - - // LLVM can figure out the input types on it's own, so we take a shortcut here. - let enzyme_ty = llvm::LLVMFunctionType(ret_ty, ptr::null(), 0, True); + let enzyme_ty = compute_enzyme_fn_ty(cx, &attrs, fn_to_diff, outer_fn); - //FIXME(ZuseZ4): the CC/Addr/Vis values are best effort guesses, we should look at tests and + // FIXME(ZuseZ4): the CC/Addr/Vis values are best effort guesses, we should look at tests and // think a bit more about what should go here. let cc = llvm::LLVMGetFunctionCallConv(outer_fn); let ad_fn = declare_simple_fn( @@ -240,14 +334,27 @@ fn generate_enzyme_call<'ll>( if matches!(attrs.ret_activity, DiffActivity::Dual | DiffActivity::Active) { args.push(cx.get_metadata_value(enzyme_primal_ret)); } + if attrs.width > 1 { + let enzyme_width = cx.create_metadata("enzyme_width".to_string()).unwrap(); + args.push(cx.get_metadata_value(enzyme_width)); + args.push(cx.get_const_i64(attrs.width as u64)); + } + let has_sret = has_sret(outer_fn); let outer_args: Vec<&llvm::Value> = get_params(outer_fn); - match_args_from_caller_to_enzyme(&cx, &mut args, &attrs.input_activity, &outer_args); + match_args_from_caller_to_enzyme( + &cx, + attrs.width, + &mut args, + &attrs.input_activity, + &outer_args, + has_sret, + ); let call = builder.call(enzyme_ty, ad_fn, &args, None); // This part is a bit iffy. LLVM requires that a call to an inlineable function has some - // metadata attachted to it, but we just created this code oota. Given that the + // metadata attached to it, but we just created this code oota. Given that the // differentiated function already has partly confusing metadata, and given that this // affects nothing but the auttodiff IR, we take a shortcut and just steal metadata from the // dummy code which we inserted at a higher level. @@ -268,7 +375,22 @@ fn generate_enzyme_call<'ll>( // Now that we copied the metadata, get rid of dummy code. llvm::LLVMRustEraseInstUntilInclusive(entry, last_inst); - if cx.val_ty(call) == cx.type_void() { + if cx.val_ty(call) == cx.type_void() || has_sret { + if has_sret { + // This is what we already have in our outer_fn (shortened): + // define void @_foo(ptr <..> sret([32 x i8]) initializes((0, 32)) %0, <...>) { + // %7 = call [4 x double] (...) @__enzyme_fwddiff_foo(ptr @square, metadata !"enzyme_width", i64 4, <...>) + // <Here we are, we want to add the following two lines> + // store [4 x double] %7, ptr %0, align 8 + // ret void + // } + + // now store the result of the enzyme call into the sret pointer. + let sret_ptr = outer_args[0]; + let call_ty = cx.val_ty(call); + assert_eq!(cx.type_kind(call_ty), TypeKind::Array); + llvm::LLVMBuildStore(&builder.llbuilder, call, sret_ptr); + } builder.ret_void(); } else { builder.ret(call); @@ -300,8 +422,7 @@ pub(crate) fn differentiate<'ll>( if !diff_items.is_empty() && !cgcx.opts.unstable_opts.autodiff.contains(&rustc_session::config::AutoDiff::Enable) { - let dcx = cgcx.create_dcx(); - return Err(dcx.handle().emit_almost_fatal(AutoDiffWithoutEnable)); + return Err(diag_handler.handle().emit_almost_fatal(AutoDiffWithoutEnable)); } // Before dumping the module, we want all the TypeTrees to become part of the module. diff --git a/compiler/rustc_codegen_llvm/src/consts.rs b/compiler/rustc_codegen_llvm/src/consts.rs index 7675e75338a..bf81eb648f8 100644 --- a/compiler/rustc_codegen_llvm/src/consts.rs +++ b/compiler/rustc_codegen_llvm/src/consts.rs @@ -430,7 +430,7 @@ impl<'ll> CodegenCx<'ll, '_> { let val_llty = self.val_ty(v); let g = self.get_static_inner(def_id, val_llty); - let llty = llvm::LLVMGlobalGetValueType(g); + let llty = self.get_type_of_global(g); let g = if val_llty == llty { g diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs index f7b096ff976..4ec69995518 100644 --- a/compiler/rustc_codegen_llvm/src/context.rs +++ b/compiler/rustc_codegen_llvm/src/context.rs @@ -8,6 +8,7 @@ use std::str; use rustc_abi::{HasDataLayout, Size, TargetDataLayout, VariantIdx}; use rustc_codegen_ssa::back::versioned_llvm_target; use rustc_codegen_ssa::base::{wants_msvc_seh, wants_wasm_eh}; +use rustc_codegen_ssa::common::TypeKind; use rustc_codegen_ssa::errors as ssa_errors; use rustc_codegen_ssa::traits::*; use rustc_data_structures::base_n::{ALPHANUMERIC_ONLY, ToBaseN}; @@ -38,7 +39,7 @@ use crate::debuginfo::metadata::apply_vcall_visibility_metadata; use crate::llvm::Metadata; use crate::type_::Type; use crate::value::Value; -use crate::{attributes, coverageinfo, debuginfo, llvm, llvm_util}; +use crate::{attributes, common, coverageinfo, debuginfo, llvm, llvm_util}; /// `TyCtxt` (and related cache datastructures) can't be move between threads. /// However, there are various cx related functions which we want to be available to the builder and @@ -163,23 +164,6 @@ pub(crate) unsafe fn create_module<'ll>( let mut target_data_layout = sess.target.data_layout.to_string(); let llvm_version = llvm_util::get_version(); - if llvm_version < (19, 0, 0) { - if sess.target.arch == "aarch64" || sess.target.arch.starts_with("arm64") { - // LLVM 19 sets -Fn32 in its data layout string for 64-bit ARM - // Earlier LLVMs leave this default, so remove it. - // See https://github.com/llvm/llvm-project/pull/90702 - target_data_layout = target_data_layout.replace("-Fn32", ""); - } - } - - if llvm_version < (19, 0, 0) { - if sess.target.arch == "loongarch64" { - // LLVM 19 updates the LoongArch64 data layout. - // See https://github.com/llvm/llvm-project/pull/93814 - target_data_layout = target_data_layout.replace("-n32:64", "-n64"); - } - } - if llvm_version < (20, 0, 0) { if sess.target.arch == "aarch64" || sess.target.arch.starts_with("arm64") { // LLVM 20 defines three additional address spaces for alternate @@ -327,6 +311,22 @@ pub(crate) unsafe fn create_module<'ll>( pfe.prefix().into(), ); } + + // Add "kcfi-arity" module flag if KCFI arity indicator is enabled. (See + // https://github.com/llvm/llvm-project/pull/117121.) + if sess.is_sanitizer_kcfi_arity_enabled() { + // KCFI arity indicator requires LLVM 21.0.0 or later. + if llvm_version < (21, 0, 0) { + tcx.dcx().emit_err(crate::errors::SanitizerKcfiArityRequiresLLVM2100); + } + + llvm::add_module_flag_u32( + llmod, + llvm::ModuleFlagMergeBehavior::Override, + "kcfi-arity", + 1, + ); + } } // Control Flow Guard is currently only supported by MSVC and LLVM on Windows. @@ -643,7 +643,18 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { llvm::set_section(g, c"llvm.metadata"); } } - +impl<'ll> SimpleCx<'ll> { + pub(crate) fn get_return_type(&self, ty: &'ll Type) -> &'ll Type { + assert_eq!(self.type_kind(ty), TypeKind::Function); + unsafe { llvm::LLVMGetReturnType(ty) } + } + pub(crate) fn get_type_of_global(&self, val: &'ll Value) -> &'ll Type { + unsafe { llvm::LLVMGlobalGetValueType(val) } + } + pub(crate) fn val_ty(&self, v: &'ll Value) -> &'ll Type { + common::val_ty(v) + } +} impl<'ll> SimpleCx<'ll> { pub(crate) fn new( llmod: &'ll llvm::Module, @@ -660,6 +671,13 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> { llvm::LLVMMetadataAsValue(self.llcx(), metadata) } + // FIXME(autodiff): We should split `ConstCodegenMethods` to pull the reusable parts + // onto a trait that is also implemented for GenericCx. + pub(crate) fn get_const_i64(&self, n: u64) -> &'ll Value { + let ty = unsafe { llvm::LLVMInt64TypeInContext(self.llcx()) }; + unsafe { llvm::LLVMConstInt(ty, n, llvm::False) } + } + pub(crate) fn get_function(&self, name: &str) -> Option<&'ll Value> { let name = SmallCStr::new(name); unsafe { llvm::LLVMGetNamedFunction((**self).borrow().llmod, name.as_ptr()) } @@ -1183,10 +1201,8 @@ impl<'ll> CodegenCx<'ll, '_> { if self.sess().instrument_coverage() { ifn!("llvm.instrprof.increment", fn(ptr, t_i64, t_i32, t_i32) -> void); - if crate::llvm_util::get_version() >= (19, 0, 0) { - ifn!("llvm.instrprof.mcdc.parameters", fn(ptr, t_i64, t_i32) -> void); - ifn!("llvm.instrprof.mcdc.tvbitmap.update", fn(ptr, t_i64, t_i32, ptr) -> void); - } + ifn!("llvm.instrprof.mcdc.parameters", fn(ptr, t_i64, t_i32) -> void); + ifn!("llvm.instrprof.mcdc.tvbitmap.update", fn(ptr, t_i64, t_i32, ptr) -> void); } ifn!("llvm.type.test", fn(ptr, t_metadata) -> i1); diff --git a/compiler/rustc_codegen_llvm/src/errors.rs b/compiler/rustc_codegen_llvm/src/errors.rs index 4c5a78ca74f..ecf108f988f 100644 --- a/compiler/rustc_codegen_llvm/src/errors.rs +++ b/compiler/rustc_codegen_llvm/src/errors.rs @@ -217,3 +217,7 @@ pub(crate) struct MismatchedDataLayout<'a> { pub(crate) struct FixedX18InvalidArch<'a> { pub arch: &'a str, } + +#[derive(Diagnostic)] +#[diag(codegen_llvm_sanitizer_kcfi_arity_requires_llvm_21_0_0)] +pub(crate) struct SanitizerKcfiArityRequiresLLVM2100; diff --git a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs index 79e4cc8aa77..a9b3bdf7344 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs @@ -4,7 +4,7 @@ use libc::{c_char, c_uint}; use super::MetadataKindId; -use super::ffi::{BasicBlock, Metadata, Module, Type, Value}; +use super::ffi::{AttributeKind, BasicBlock, Metadata, Module, Type, Value}; use crate::llvm::Bool; #[link(name = "llvm-wrapper", kind = "static")] @@ -17,6 +17,8 @@ unsafe extern "C" { pub(crate) fn LLVMRustEraseInstFromParent(V: &Value); pub(crate) fn LLVMRustGetTerminator<'a>(B: &BasicBlock) -> &'a Value; pub(crate) fn LLVMRustVerifyFunction(V: &Value, action: LLVMRustVerifierFailureAction) -> Bool; + pub(crate) fn LLVMRustHasAttributeAtIndex(V: &Value, i: c_uint, Kind: AttributeKind) -> bool; + pub(crate) fn LLVMRustGetArrayNumElements(Ty: &Type) -> u64; } unsafe extern "C" { diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index 3ce3761944b..9ff04f72903 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -1180,7 +1180,7 @@ unsafe extern "C" { // Operations on parameters pub(crate) fn LLVMIsAArgument(Val: &Value) -> Option<&Value>; - pub(crate) fn LLVMCountParams(Fn: &Value) -> c_uint; + pub(crate) safe fn LLVMCountParams(Fn: &Value) -> c_uint; pub(crate) fn LLVMGetParam(Fn: &Value, Index: c_uint) -> &Value; // Operations on basic blocks diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs index 4e85286ed55..36e35f81392 100644 --- a/compiler/rustc_codegen_llvm/src/llvm_util.rs +++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs @@ -256,7 +256,6 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea ("aarch64", "pmuv3") => Some(LLVMFeature::new("perfmon")), ("aarch64", "paca") => Some(LLVMFeature::new("pauth")), ("aarch64", "pacg") => Some(LLVMFeature::new("pauth")), - ("aarch64", "pauth-lr") if get_version().0 < 19 => None, // Before LLVM 20 those two features were packaged together as b16b16 ("aarch64", "sve-b16b16") if get_version().0 < 20 => Some(LLVMFeature::new("b16b16")), ("aarch64", "sme-b16b16") if get_version().0 < 20 => Some(LLVMFeature::new("b16b16")), @@ -270,20 +269,9 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea ("aarch64", "fhm") => Some(LLVMFeature::new("fp16fml")), ("aarch64", "fp16") => Some(LLVMFeature::new("fullfp16")), // Filter out features that are not supported by the current LLVM version - ("aarch64", "fpmr") if get_version().0 != 18 => None, + ("aarch64", "fpmr") => None, // only existed in 18 ("arm", "fp16") => Some(LLVMFeature::new("fullfp16")), - // In LLVM 18, `unaligned-scalar-mem` was merged with `unaligned-vector-mem` into a single - // feature called `fast-unaligned-access`. In LLVM 19, it was split back out. - ("riscv32" | "riscv64", "unaligned-scalar-mem" | "unaligned-vector-mem") - if get_version().0 == 18 => - { - Some(LLVMFeature::new("fast-unaligned-access")) - } // Filter out features that are not supported by the current LLVM version - ("riscv32" | "riscv64", "zaamo") if get_version().0 < 19 => None, - ("riscv32" | "riscv64", "zabha") if get_version().0 < 19 => None, - ("riscv32" | "riscv64", "zalrsc") if get_version().0 < 19 => None, - ("riscv32" | "riscv64", "zama16b") if get_version().0 < 19 => None, ("riscv32" | "riscv64", "zacas") if get_version().0 < 20 => None, // Enable the evex512 target feature if an avx512 target feature is enabled. ("x86", s) if s.starts_with("avx512") => { @@ -295,10 +283,9 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea ("sparc", "leoncasa") => Some(LLVMFeature::new("hasleoncasa")), // In LLVM 19, there is no `v8plus` feature and `v9` means "SPARC-V9 instruction available and SPARC-V8+ ABI used". // https://github.com/llvm/llvm-project/blob/llvmorg-19.1.0/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp#L27-L28 - // Before LLVM 19, there is no `v8plus` feature and `v9` means "SPARC-V9 instruction available". + // Before LLVM 19, there was no `v8plus` feature and `v9` means "SPARC-V9 instruction available". // https://github.com/llvm/llvm-project/blob/llvmorg-18.1.0/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp#L26 ("sparc", "v8plus") if get_version().0 == 19 => Some(LLVMFeature::new("v9")), - ("sparc", "v8plus") if get_version().0 < 19 => None, ("powerpc", "power8-crypto") => Some(LLVMFeature::new("crypto")), // These new `amx` variants and `movrs` were introduced in LLVM20 ("x86", "amx-avx512" | "amx-fp8" | "amx-movrs" | "amx-tf32" | "amx-transpose") diff --git a/compiler/rustc_codegen_llvm/src/mono_item.rs b/compiler/rustc_codegen_llvm/src/mono_item.rs index a64627eaf59..fdf62a08065 100644 --- a/compiler/rustc_codegen_llvm/src/mono_item.rs +++ b/compiler/rustc_codegen_llvm/src/mono_item.rs @@ -120,7 +120,7 @@ impl CodegenCx<'_, '_> { } // Match clang by only supporting COFF and ELF for now. - if self.tcx.sess.target.is_like_osx { + if self.tcx.sess.target.is_like_darwin { return false; } diff --git a/compiler/rustc_codegen_llvm/src/va_arg.rs b/compiler/rustc_codegen_llvm/src/va_arg.rs index 8baa69cefe1..c216f0f4a09 100644 --- a/compiler/rustc_codegen_llvm/src/va_arg.rs +++ b/compiler/rustc_codegen_llvm/src/va_arg.rs @@ -399,7 +399,7 @@ pub(super) fn emit_va_arg<'ll, 'tcx>( emit_ptr_va_arg(bx, addr, target_ty, false, Align::from_bytes(8).unwrap(), false) } // macOS / iOS AArch64 - "aarch64" if target.is_like_osx => { + "aarch64" if target.is_like_darwin => { emit_ptr_va_arg(bx, addr, target_ty, false, Align::from_bytes(8).unwrap(), true) } "aarch64" => emit_aapcs_va_arg(bx, addr, target_ty), |
