about summary refs log tree commit diff
path: root/compiler/rustc_codegen_llvm/src
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_codegen_llvm/src')
-rw-r--r--compiler/rustc_codegen_llvm/src/abi.rs19
-rw-r--r--compiler/rustc_codegen_llvm/src/back/lto.rs93
-rw-r--r--compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs11
-rw-r--r--compiler/rustc_codegen_llvm/src/back/write.rs21
-rw-r--r--compiler/rustc_codegen_llvm/src/builder.rs150
-rw-r--r--compiler/rustc_codegen_llvm/src/builder/autodiff.rs13
-rw-r--r--compiler/rustc_codegen_llvm/src/callee.rs106
-rw-r--r--compiler/rustc_codegen_llvm/src/common.rs5
-rw-r--r--compiler/rustc_codegen_llvm/src/consts.rs19
-rw-r--r--compiler/rustc_codegen_llvm/src/context.rs8
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs2
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs18
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/mod.rs2
-rw-r--r--compiler/rustc_codegen_llvm/src/declare.rs3
-rw-r--r--compiler/rustc_codegen_llvm/src/errors.rs5
-rw-r--r--compiler/rustc_codegen_llvm/src/intrinsic.rs42
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs98
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm/ffi.rs24
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm/mod.rs33
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm_util.rs9
-rw-r--r--compiler/rustc_codegen_llvm/src/mono_item.rs25
-rw-r--r--compiler/rustc_codegen_llvm/src/type_of.rs13
22 files changed, 490 insertions, 229 deletions
diff --git a/compiler/rustc_codegen_llvm/src/abi.rs b/compiler/rustc_codegen_llvm/src/abi.rs
index 28f423efc21..8c75125e009 100644
--- a/compiler/rustc_codegen_llvm/src/abi.rs
+++ b/compiler/rustc_codegen_llvm/src/abi.rs
@@ -2,19 +2,18 @@ use std::borrow::Borrow;
 use std::cmp;
 
 use libc::c_uint;
-use rustc_abi as abi;
-pub(crate) use rustc_abi::ExternAbi;
-use rustc_abi::{HasDataLayout, Primitive, Reg, RegKind, Size};
+use rustc_abi::{BackendRepr, HasDataLayout, Primitive, Reg, RegKind, Size};
 use rustc_codegen_ssa::MemFlags;
 use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue};
 use rustc_codegen_ssa::mir::place::{PlaceRef, PlaceValue};
 use rustc_codegen_ssa::traits::*;
 use rustc_middle::ty::Ty;
 use rustc_middle::ty::layout::LayoutOf;
-pub(crate) use rustc_middle::ty::layout::{WIDE_PTR_ADDR, WIDE_PTR_EXTRA};
 use rustc_middle::{bug, ty};
 use rustc_session::config;
-pub(crate) use rustc_target::callconv::*;
+use rustc_target::callconv::{
+    ArgAbi, ArgAttribute, ArgAttributes, ArgExtension, CastTarget, Conv, FnAbi, PassMode,
+};
 use rustc_target::spec::SanitizerSet;
 use smallvec::SmallVec;
 
@@ -458,7 +457,7 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
         match &self.ret.mode {
             PassMode::Direct(attrs) => {
                 attrs.apply_attrs_to_llfn(llvm::AttributePlace::ReturnValue, cx, llfn);
-                if let abi::BackendRepr::Scalar(scalar) = self.ret.layout.backend_repr {
+                if let BackendRepr::Scalar(scalar) = self.ret.layout.backend_repr {
                     apply_range_attr(llvm::AttributePlace::ReturnValue, scalar);
                 }
             }
@@ -499,7 +498,7 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
                 }
                 PassMode::Direct(attrs) => {
                     let i = apply(attrs);
-                    if let abi::BackendRepr::Scalar(scalar) = arg.layout.backend_repr {
+                    if let BackendRepr::Scalar(scalar) = arg.layout.backend_repr {
                         apply_range_attr(llvm::AttributePlace::Argument(i), scalar);
                     }
                 }
@@ -514,9 +513,7 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
                 PassMode::Pair(a, b) => {
                     let i = apply(a);
                     let ii = apply(b);
-                    if let abi::BackendRepr::ScalarPair(scalar_a, scalar_b) =
-                        arg.layout.backend_repr
-                    {
+                    if let BackendRepr::ScalarPair(scalar_a, scalar_b) = arg.layout.backend_repr {
                         apply_range_attr(llvm::AttributePlace::Argument(i), scalar_a);
                         apply_range_attr(llvm::AttributePlace::Argument(ii), scalar_b);
                     }
@@ -576,7 +573,7 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
         }
         if bx.cx.sess().opts.optimize != config::OptLevel::No
                 && llvm_util::get_version() < (19, 0, 0)
-                && let abi::BackendRepr::Scalar(scalar) = self.ret.layout.backend_repr
+                && let BackendRepr::Scalar(scalar) = self.ret.layout.backend_repr
                 && matches!(scalar.primitive(), Primitive::Int(..))
                 // If the value is a boolean, the range is 0..2 and that ultimately
                 // become 0..0 when the type becomes i1, which would be rejected
diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs
index 7262fce4911..05a2cd1c5a3 100644
--- a/compiler/rustc_codegen_llvm/src/back/lto.rs
+++ b/compiler/rustc_codegen_llvm/src/back/lto.rs
@@ -362,8 +362,8 @@ fn fat_lto(
                 ptr as *const *const libc::c_char,
                 symbols_below_threshold.len() as libc::size_t,
             );
-            save_temp_bitcode(cgcx, &module, "lto.after-restriction");
         }
+        save_temp_bitcode(cgcx, &module, "lto.after-restriction");
     }
 
     Ok(LtoModuleCodegen::Fat(module))
@@ -586,6 +586,42 @@ fn thin_lto(
     }
 }
 
+fn enable_autodiff_settings(ad: &[config::AutoDiff], module: &mut ModuleCodegen<ModuleLlvm>) {
+    for &val in ad {
+        match val {
+            config::AutoDiff::PrintModBefore => {
+                unsafe { llvm::LLVMDumpModule(module.module_llvm.llmod()) };
+            }
+            config::AutoDiff::PrintPerf => {
+                llvm::set_print_perf(true);
+            }
+            config::AutoDiff::PrintAA => {
+                llvm::set_print_activity(true);
+            }
+            config::AutoDiff::PrintTA => {
+                llvm::set_print_type(true);
+            }
+            config::AutoDiff::Inline => {
+                llvm::set_inline(true);
+            }
+            config::AutoDiff::LooseTypes => {
+                llvm::set_loose_types(false);
+            }
+            config::AutoDiff::PrintSteps => {
+                llvm::set_print(true);
+            }
+            // We handle this below
+            config::AutoDiff::PrintModAfter => {}
+            // This is required and already checked
+            config::AutoDiff::Enable => {}
+        }
+    }
+    // This helps with handling enums for now.
+    llvm::set_strict_aliasing(false);
+    // FIXME(ZuseZ4): Test this, since it was added a long time ago.
+    llvm::set_rust_rules(true);
+}
+
 pub(crate) fn run_pass_manager(
     cgcx: &CodegenContext<LlvmCodegenBackend>,
     dcx: DiagCtxtHandle<'_>,
@@ -604,34 +640,37 @@ pub(crate) fn run_pass_manager(
     let opt_stage = if thin { llvm::OptStage::ThinLTO } else { llvm::OptStage::FatLTO };
     let opt_level = config.opt_level.unwrap_or(config::OptLevel::No);
 
-    // If this rustc version was build with enzyme/autodiff enabled, and if users applied the
-    // `#[autodiff]` macro at least once, then we will later call llvm_optimize a second time.
-    debug!("running llvm pm opt pipeline");
+    // The PostAD behavior is the same that we would have if no autodiff was used.
+    // It will run the default optimization pipeline. If AD is enabled we select
+    // the DuringAD stage, which will disable vectorization and loop unrolling, and
+    // schedule two autodiff optimization + differentiation passes.
+    // We then run the llvm_optimize function a second time, to optimize the code which we generated
+    // in the enzyme differentiation pass.
+    let enable_ad = config.autodiff.contains(&config::AutoDiff::Enable);
+    let stage =
+        if enable_ad { write::AutodiffStage::DuringAD } else { write::AutodiffStage::PostAD };
+
+    if enable_ad {
+        enable_autodiff_settings(&config.autodiff, module);
+    }
+
     unsafe {
-        write::llvm_optimize(
-            cgcx,
-            dcx,
-            module,
-            config,
-            opt_level,
-            opt_stage,
-            write::AutodiffStage::DuringAD,
-        )?;
+        write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, stage)?;
     }
-    // FIXME(ZuseZ4): Make this more granular
-    if cfg!(llvm_enzyme) && !thin {
+
+    if cfg!(llvm_enzyme) && enable_ad {
+        let opt_stage = llvm::OptStage::FatLTO;
+        let stage = write::AutodiffStage::PostAD;
         unsafe {
-            write::llvm_optimize(
-                cgcx,
-                dcx,
-                module,
-                config,
-                opt_level,
-                llvm::OptStage::FatLTO,
-                write::AutodiffStage::PostAD,
-            )?;
+            write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, stage)?;
+        }
+
+        // This is the final IR, so people should be able to inspect the optimized autodiff output.
+        if config.autodiff.contains(&config::AutoDiff::PrintModAfter) {
+            unsafe { llvm::LLVMDumpModule(module.module_llvm.llmod()) };
         }
     }
+
     debug!("lto done");
     Ok(())
 }
@@ -754,7 +793,9 @@ pub(crate) unsafe fn optimize_thin_module(
         {
             let _timer =
                 cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_rename", thin_module.name());
-            unsafe { llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod, target) };
+            unsafe {
+                llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod, target.raw())
+            };
             save_temp_bitcode(cgcx, &module, "thin-lto-after-rename");
         }
 
@@ -784,7 +825,7 @@ pub(crate) unsafe fn optimize_thin_module(
             let _timer =
                 cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_import", thin_module.name());
             if unsafe {
-                !llvm::LLVMRustPrepareThinLTOImport(thin_module.shared.data.0, llmod, target)
+                !llvm::LLVMRustPrepareThinLTOImport(thin_module.shared.data.0, llmod, target.raw())
             } {
                 return Err(write::llvm_err(dcx, LlvmError::PrepareThinLtoModule));
             }
diff --git a/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs b/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs
index f075f332462..dfde4595590 100644
--- a/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs
+++ b/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs
@@ -1,6 +1,5 @@
 use std::ffi::{CStr, c_char};
 use std::marker::PhantomData;
-use std::ops::Deref;
 use std::ptr::NonNull;
 
 use rustc_data_structures::small_c_str::SmallCStr;
@@ -80,12 +79,12 @@ impl OwnedTargetMachine {
             .map(|tm_unique| Self { tm_unique, phantom: PhantomData })
             .ok_or_else(|| LlvmError::CreateTargetMachine { triple: SmallCStr::from(triple) })
     }
-}
-
-impl Deref for OwnedTargetMachine {
-    type Target = llvm::TargetMachine;
 
-    fn deref(&self) -> &Self::Target {
+    /// Returns inner `llvm::TargetMachine` type.
+    ///
+    /// This could be a `Deref` implementation, but `llvm::TargetMachine` is an extern type and
+    /// `Deref::Target: ?Sized`.
+    pub fn raw(&self) -> &llvm::TargetMachine {
         // SAFETY: constructing ensures we have a valid pointer created by
         // llvm::LLVMRustCreateTargetMachine.
         unsafe { self.tm_unique.as_ref() }
diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs
index 9fa10e96068..29d6121844f 100644
--- a/compiler/rustc_codegen_llvm/src/back/write.rs
+++ b/compiler/rustc_codegen_llvm/src/back/write.rs
@@ -564,19 +564,16 @@ pub(crate) unsafe fn llvm_optimize(
     // FIXME(ZuseZ4): In a future update we could figure out how to only optimize individual functions getting
     // differentiated.
 
+    let consider_ad = cfg!(llvm_enzyme) && config.autodiff.contains(&config::AutoDiff::Enable);
+    let run_enzyme = autodiff_stage == AutodiffStage::DuringAD;
     let unroll_loops;
     let vectorize_slp;
     let vectorize_loop;
-    let run_enzyme = cfg!(llvm_enzyme) && autodiff_stage == AutodiffStage::DuringAD;
 
     // When we build rustc with enzyme/autodiff support, we want to postpone size-increasing
     // optimizations until after differentiation. Our pipeline is thus: (opt + enzyme), (full opt).
     // We therefore have two calls to llvm_optimize, if autodiff is used.
-    //
-    // FIXME(ZuseZ4): Before shipping on nightly,
-    // we should make this more granular, or at least check that the user has at least one autodiff
-    // call in their code, to justify altering the compilation pipeline.
-    if cfg!(llvm_enzyme) && autodiff_stage != AutodiffStage::PostAD {
+    if consider_ad && autodiff_stage != AutodiffStage::PostAD {
         unroll_loops = false;
         vectorize_slp = false;
         vectorize_loop = false;
@@ -640,7 +637,7 @@ pub(crate) unsafe fn llvm_optimize(
     let result = unsafe {
         llvm::LLVMRustOptimize(
             module.module_llvm.llmod(),
-            &*module.module_llvm.tm,
+            &*module.module_llvm.tm.raw(),
             to_pass_builder_opt_level(opt_level),
             opt_stage,
             cgcx.opts.cg.linker_plugin_lto.enabled(),
@@ -706,10 +703,8 @@ pub(crate) unsafe fn optimize(
 
         // If we know that we will later run AD, then we disable vectorization and loop unrolling.
         // Otherwise we pretend AD is already done and run the normal opt pipeline (=PostAD).
-        // FIXME(ZuseZ4): Make this more granular, only set PreAD if we actually have autodiff
-        // usages, not just if we build rustc with autodiff support.
-        let autodiff_stage =
-            if cfg!(llvm_enzyme) { AutodiffStage::PreAD } else { AutodiffStage::PostAD };
+        let consider_ad = cfg!(llvm_enzyme) && config.autodiff.contains(&config::AutoDiff::Enable);
+        let autodiff_stage = if consider_ad { AutodiffStage::PreAD } else { AutodiffStage::PostAD };
         return unsafe {
             llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, autodiff_stage)
         };
@@ -880,7 +875,7 @@ pub(crate) unsafe fn codegen(
             };
             write_output_file(
                 dcx,
-                tm,
+                tm.raw(),
                 config.no_builtins,
                 llmod,
                 &path,
@@ -914,7 +909,7 @@ pub(crate) unsafe fn codegen(
 
                 write_output_file(
                     dcx,
-                    tm,
+                    tm.raw(),
                     config.no_builtins,
                     llmod,
                     &obj_out,
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
index acae0b444c0..3f20350d0ef 100644
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -4,7 +4,7 @@ use std::{iter, ptr};
 
 pub(crate) mod autodiff;
 
-use libc::{c_char, c_uint};
+use libc::{c_char, c_uint, size_t};
 use rustc_abi as abi;
 use rustc_abi::{Align, Size, WrappingRange};
 use rustc_codegen_ssa::MemFlags;
@@ -29,13 +29,15 @@ use smallvec::SmallVec;
 use tracing::{debug, instrument};
 
 use crate::abi::FnAbiLlvmExt;
-use crate::attributes;
 use crate::common::Funclet;
 use crate::context::{CodegenCx, SimpleCx};
-use crate::llvm::{self, AtomicOrdering, AtomicRmwBinOp, BasicBlock, False, True};
+use crate::llvm::{
+    self, AtomicOrdering, AtomicRmwBinOp, BasicBlock, False, GEPNoWrapFlags, Metadata, True,
+};
 use crate::type_::Type;
 use crate::type_of::LayoutLlvmExt;
 use crate::value::Value;
+use crate::{attributes, llvm_util};
 
 #[must_use]
 pub(crate) struct GenericBuilder<'a, 'll, CX: Borrow<SimpleCx<'ll>>> {
@@ -333,6 +335,50 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         }
     }
 
+    fn switch_with_weights(
+        &mut self,
+        v: Self::Value,
+        else_llbb: Self::BasicBlock,
+        else_is_cold: bool,
+        cases: impl ExactSizeIterator<Item = (u128, Self::BasicBlock, bool)>,
+    ) {
+        if self.cx.sess().opts.optimize == rustc_session::config::OptLevel::No {
+            self.switch(v, else_llbb, cases.map(|(val, dest, _)| (val, dest)));
+            return;
+        }
+
+        let id_str = "branch_weights";
+        let id = unsafe {
+            llvm::LLVMMDStringInContext2(self.cx.llcx, id_str.as_ptr().cast(), id_str.len())
+        };
+
+        // For switch instructions with 2 targets, the `llvm.expect` intrinsic is used.
+        // This function handles switch instructions with more than 2 targets and it needs to
+        // emit branch weights metadata instead of using the intrinsic.
+        // The values 1 and 2000 are the same as the values used by the `llvm.expect` intrinsic.
+        let cold_weight = unsafe { llvm::LLVMValueAsMetadata(self.cx.const_u32(1)) };
+        let hot_weight = unsafe { llvm::LLVMValueAsMetadata(self.cx.const_u32(2000)) };
+        let weight =
+            |is_cold: bool| -> &Metadata { if is_cold { cold_weight } else { hot_weight } };
+
+        let mut md: SmallVec<[&Metadata; 16]> = SmallVec::with_capacity(cases.len() + 2);
+        md.push(id);
+        md.push(weight(else_is_cold));
+
+        let switch =
+            unsafe { llvm::LLVMBuildSwitch(self.llbuilder, v, else_llbb, cases.len() as c_uint) };
+        for (on_val, dest, is_cold) in cases {
+            let on_val = self.const_uint_big(self.val_ty(v), on_val);
+            unsafe { llvm::LLVMAddCase(switch, on_val, dest) }
+            md.push(weight(is_cold));
+        }
+
+        unsafe {
+            let md_node = llvm::LLVMMDNodeInContext2(self.cx.llcx, md.as_ptr(), md.len() as size_t);
+            self.cx.set_metadata(switch, llvm::MD_prof, md_node);
+        }
+    }
+
     fn invoke(
         &mut self,
         llty: &'ll Type,
@@ -359,7 +405,7 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
 
         // Emit KCFI operand bundle
         let kcfi_bundle = self.kcfi_operand_bundle(fn_attrs, fn_abi, instance, llfn);
-        if let Some(kcfi_bundle) = kcfi_bundle.as_deref() {
+        if let Some(kcfi_bundle) = kcfi_bundle.as_ref().map(|b| b.raw()) {
             bundles.push(kcfi_bundle);
         }
 
@@ -421,6 +467,37 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         unchecked_umul(x, y) => LLVMBuildNUWMul,
     }
 
+    fn unchecked_suadd(&mut self, a: &'ll Value, b: &'ll Value) -> &'ll Value {
+        unsafe {
+            let add = llvm::LLVMBuildAdd(self.llbuilder, a, b, UNNAMED);
+            if llvm::LLVMIsAInstruction(add).is_some() {
+                llvm::LLVMSetNUW(add, True);
+                llvm::LLVMSetNSW(add, True);
+            }
+            add
+        }
+    }
+    fn unchecked_susub(&mut self, a: &'ll Value, b: &'ll Value) -> &'ll Value {
+        unsafe {
+            let sub = llvm::LLVMBuildSub(self.llbuilder, a, b, UNNAMED);
+            if llvm::LLVMIsAInstruction(sub).is_some() {
+                llvm::LLVMSetNUW(sub, True);
+                llvm::LLVMSetNSW(sub, True);
+            }
+            sub
+        }
+    }
+    fn unchecked_sumul(&mut self, a: &'ll Value, b: &'ll Value) -> &'ll Value {
+        unsafe {
+            let mul = llvm::LLVMBuildMul(self.llbuilder, a, b, UNNAMED);
+            if llvm::LLVMIsAInstruction(mul).is_some() {
+                llvm::LLVMSetNUW(mul, True);
+                llvm::LLVMSetNSW(mul, True);
+            }
+            mul
+        }
+    }
+
     fn or_disjoint(&mut self, a: &'ll Value, b: &'ll Value) -> &'ll Value {
         unsafe {
             let or = llvm::LLVMBuildOr(self.llbuilder, a, b, UNNAMED);
@@ -531,7 +608,7 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
 
     fn to_immediate_scalar(&mut self, val: Self::Value, scalar: abi::Scalar) -> Self::Value {
         if scalar.is_bool() {
-            return self.trunc(val, self.cx().type_i1());
+            return self.unchecked_utrunc(val, self.cx().type_i1());
         }
         val
     }
@@ -671,10 +748,12 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
                 let load = self.load(llty, place.val.llval, place.val.align);
                 if let abi::BackendRepr::Scalar(scalar) = place.layout.backend_repr {
                     scalar_load_metadata(self, load, scalar, place.layout, Size::ZERO);
+                    self.to_immediate_scalar(load, scalar)
+                } else {
+                    load
                 }
-                load
             });
-            OperandValue::Immediate(self.to_immediate(llval, place.layout))
+            OperandValue::Immediate(llval)
         } else if let abi::BackendRepr::ScalarPair(a, b) = place.layout.backend_repr {
             let b_offset = a.size(self).align_to(b.align(self).abi);
 
@@ -833,13 +912,14 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
 
     fn gep(&mut self, ty: &'ll Type, ptr: &'ll Value, indices: &[&'ll Value]) -> &'ll Value {
         unsafe {
-            llvm::LLVMBuildGEP2(
+            llvm::LLVMBuildGEPWithNoWrapFlags(
                 self.llbuilder,
                 ty,
                 ptr,
                 indices.as_ptr(),
                 indices.len() as c_uint,
                 UNNAMED,
+                GEPNoWrapFlags::default(),
             )
         }
     }
@@ -851,13 +931,33 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         indices: &[&'ll Value],
     ) -> &'ll Value {
         unsafe {
-            llvm::LLVMBuildInBoundsGEP2(
+            llvm::LLVMBuildGEPWithNoWrapFlags(
+                self.llbuilder,
+                ty,
+                ptr,
+                indices.as_ptr(),
+                indices.len() as c_uint,
+                UNNAMED,
+                GEPNoWrapFlags::InBounds,
+            )
+        }
+    }
+
+    fn inbounds_nuw_gep(
+        &mut self,
+        ty: &'ll Type,
+        ptr: &'ll Value,
+        indices: &[&'ll Value],
+    ) -> &'ll Value {
+        unsafe {
+            llvm::LLVMBuildGEPWithNoWrapFlags(
                 self.llbuilder,
                 ty,
                 ptr,
                 indices.as_ptr(),
                 indices.len() as c_uint,
                 UNNAMED,
+                GEPNoWrapFlags::InBounds | GEPNoWrapFlags::NUW,
             )
         }
     }
@@ -867,6 +967,34 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         unsafe { llvm::LLVMBuildTrunc(self.llbuilder, val, dest_ty, UNNAMED) }
     }
 
+    fn unchecked_utrunc(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
+        debug_assert_ne!(self.val_ty(val), dest_ty);
+
+        let trunc = self.trunc(val, dest_ty);
+        if llvm_util::get_version() >= (19, 0, 0) {
+            unsafe {
+                if llvm::LLVMIsAInstruction(trunc).is_some() {
+                    llvm::LLVMSetNUW(trunc, True);
+                }
+            }
+        }
+        trunc
+    }
+
+    fn unchecked_strunc(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
+        debug_assert_ne!(self.val_ty(val), dest_ty);
+
+        let trunc = self.trunc(val, dest_ty);
+        if llvm_util::get_version() >= (19, 0, 0) {
+            unsafe {
+                if llvm::LLVMIsAInstruction(trunc).is_some() {
+                    llvm::LLVMSetNSW(trunc, True);
+                }
+            }
+        }
+        trunc
+    }
+
     fn sext(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
         unsafe { llvm::LLVMBuildSExt(self.llbuilder, val, dest_ty, UNNAMED) }
     }
@@ -1305,7 +1433,7 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
 
         // Emit KCFI operand bundle
         let kcfi_bundle = self.kcfi_operand_bundle(fn_attrs, fn_abi, instance, llfn);
-        if let Some(kcfi_bundle) = kcfi_bundle.as_deref() {
+        if let Some(kcfi_bundle) = kcfi_bundle.as_ref().map(|b| b.raw()) {
             bundles.push(kcfi_bundle);
         }
 
@@ -1654,7 +1782,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
 
         // Emit KCFI operand bundle
         let kcfi_bundle = self.kcfi_operand_bundle(fn_attrs, fn_abi, instance, llfn);
-        if let Some(kcfi_bundle) = kcfi_bundle.as_deref() {
+        if let Some(kcfi_bundle) = kcfi_bundle.as_ref().map(|b| b.raw()) {
             bundles.push(kcfi_bundle);
         }
 
diff --git a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
index b2c1088e3fc..2c7899975e3 100644
--- a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
+++ b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
@@ -10,7 +10,7 @@ use crate::back::write::llvm_err;
 use crate::builder::SBuilder;
 use crate::context::SimpleCx;
 use crate::declare::declare_simple_fn;
-use crate::errors::LlvmError;
+use crate::errors::{AutoDiffWithoutEnable, LlvmError};
 use crate::llvm::AttributePlace::Function;
 use crate::llvm::{Metadata, True};
 use crate::value::Value;
@@ -46,9 +46,6 @@ fn generate_enzyme_call<'ll>(
     let output = attrs.ret_activity;
 
     // We have to pick the name depending on whether we want forward or reverse mode autodiff.
-    // FIXME(ZuseZ4): The new pass based approach should not need the {Forward/Reverse}First method anymore, since
-    // it will handle higher-order derivatives correctly automatically (in theory). Currently
-    // higher-order derivatives fail, so we should debug that before adjusting this code.
     let mut ad_name: String = match attrs.mode {
         DiffMode::Forward => "__enzyme_fwddiff",
         DiffMode::Reverse => "__enzyme_autodiff",
@@ -291,6 +288,14 @@ pub(crate) fn differentiate<'ll>(
     let diag_handler = cgcx.create_dcx();
     let cx = SimpleCx { llmod: module.module_llvm.llmod(), llcx: module.module_llvm.llcx };
 
+    // First of all, did the user try to use autodiff without using the -Zautodiff=Enable flag?
+    if !diff_items.is_empty()
+        && !cgcx.opts.unstable_opts.autodiff.contains(&rustc_session::config::AutoDiff::Enable)
+    {
+        let dcx = cgcx.create_dcx();
+        return Err(dcx.handle().emit_almost_fatal(AutoDiffWithoutEnable));
+    }
+
     // Before dumping the module, we want all the TypeTrees to become part of the module.
     for item in diff_items.iter() {
         let name = item.source.clone();
diff --git a/compiler/rustc_codegen_llvm/src/callee.rs b/compiler/rustc_codegen_llvm/src/callee.rs
index aa9a0f34f55..ea9ab5c02bd 100644
--- a/compiler/rustc_codegen_llvm/src/callee.rs
+++ b/compiler/rustc_codegen_llvm/src/callee.rs
@@ -66,9 +66,7 @@ pub(crate) fn get_fn<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>, instance: Instance<'t
             // LLVM will prefix the name with `__imp_`. Ideally, we'd like the
             // existing logic below to set the Storage Class, but it has an
             // exemption for MinGW for backwards compatibility.
-            unsafe {
-                llvm::LLVMSetDLLStorageClass(llfn, llvm::DLLStorageClass::DllImport);
-            }
+            llvm::set_dllimport_storage_class(llfn);
             llfn
         } else {
             cx.declare_fn(sym, fn_abi, Some(instance))
@@ -99,65 +97,61 @@ pub(crate) fn get_fn<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>, instance: Instance<'t
         // has been applied to the definition (wherever that definition may be).
 
         llvm::set_linkage(llfn, llvm::Linkage::ExternalLinkage);
-        unsafe {
-            let is_generic = instance.args.non_erasable_generics().next().is_some();
-
-            let is_hidden = if is_generic {
-                // This is a monomorphization of a generic function.
-                if !(cx.tcx.sess.opts.share_generics()
-                    || tcx.codegen_fn_attrs(instance_def_id).inline
-                        == rustc_attr_parsing::InlineAttr::Never)
-                {
-                    // When not sharing generics, all instances are in the same
-                    // crate and have hidden visibility.
-                    true
-                } else {
-                    if let Some(instance_def_id) = instance_def_id.as_local() {
-                        // This is a monomorphization of a generic function
-                        // defined in the current crate. It is hidden if:
-                        // - the definition is unreachable for downstream
-                        //   crates, or
-                        // - the current crate does not re-export generics
-                        //   (because the crate is a C library or executable)
-                        cx.tcx.is_unreachable_local_definition(instance_def_id)
-                            || !cx.tcx.local_crate_exports_generics()
-                    } else {
-                        // This is a monomorphization of a generic function
-                        // defined in an upstream crate. It is hidden if:
-                        // - it is instantiated in this crate, and
-                        // - the current crate does not re-export generics
-                        instance.upstream_monomorphization(tcx).is_none()
-                            && !cx.tcx.local_crate_exports_generics()
-                    }
-                }
-            } else {
-                // This is a non-generic function. It is hidden if:
-                // - it is instantiated in the local crate, and
-                //   - it is defined an upstream crate (non-local), or
-                //   - it is not reachable
-                cx.tcx.is_codegened_item(instance_def_id)
-                    && (!instance_def_id.is_local()
-                        || !cx.tcx.is_reachable_non_generic(instance_def_id))
-            };
-            if is_hidden {
-                llvm::set_visibility(llfn, llvm::Visibility::Hidden);
-            }
+        let is_generic = instance.args.non_erasable_generics().next().is_some();
 
-            // MinGW: For backward compatibility we rely on the linker to decide whether it
-            // should use dllimport for functions.
-            if cx.use_dll_storage_attrs
-                && let Some(library) = tcx.native_library(instance_def_id)
-                && library.kind.is_dllimport()
-                && !matches!(tcx.sess.target.env.as_ref(), "gnu" | "uclibc")
+        let is_hidden = if is_generic {
+            // This is a monomorphization of a generic function.
+            if !(cx.tcx.sess.opts.share_generics()
+                || tcx.codegen_fn_attrs(instance_def_id).inline
+                    == rustc_attr_parsing::InlineAttr::Never)
             {
-                llvm::LLVMSetDLLStorageClass(llfn, llvm::DLLStorageClass::DllImport);
+                // When not sharing generics, all instances are in the same
+                // crate and have hidden visibility.
+                true
+            } else {
+                if let Some(instance_def_id) = instance_def_id.as_local() {
+                    // This is a monomorphization of a generic function
+                    // defined in the current crate. It is hidden if:
+                    // - the definition is unreachable for downstream
+                    //   crates, or
+                    // - the current crate does not re-export generics
+                    //   (because the crate is a C library or executable)
+                    cx.tcx.is_unreachable_local_definition(instance_def_id)
+                        || !cx.tcx.local_crate_exports_generics()
+                } else {
+                    // This is a monomorphization of a generic function
+                    // defined in an upstream crate. It is hidden if:
+                    // - it is instantiated in this crate, and
+                    // - the current crate does not re-export generics
+                    instance.upstream_monomorphization(tcx).is_none()
+                        && !cx.tcx.local_crate_exports_generics()
+                }
             }
+        } else {
+            // This is a non-generic function. It is hidden if:
+            // - it is instantiated in the local crate, and
+            //   - it is defined an upstream crate (non-local), or
+            //   - it is not reachable
+            cx.tcx.is_codegened_item(instance_def_id)
+                && (!instance_def_id.is_local()
+                    || !cx.tcx.is_reachable_non_generic(instance_def_id))
+        };
+        if is_hidden {
+            llvm::set_visibility(llfn, llvm::Visibility::Hidden);
+        }
 
-            if cx.should_assume_dso_local(llfn, true) {
-                llvm::LLVMRustSetDSOLocal(llfn, true);
-            }
+        // MinGW: For backward compatibility we rely on the linker to decide whether it
+        // should use dllimport for functions.
+        if cx.use_dll_storage_attrs
+            && let Some(library) = tcx.native_library(instance_def_id)
+            && library.kind.is_dllimport()
+            && !matches!(tcx.sess.target.env.as_ref(), "gnu" | "uclibc")
+        {
+            llvm::set_dllimport_storage_class(llfn);
         }
 
+        cx.assume_dso_local(llfn, true);
+
         llfn
     };
 
diff --git a/compiler/rustc_codegen_llvm/src/common.rs b/compiler/rustc_codegen_llvm/src/common.rs
index 78b3a7f8541..0621b893e75 100644
--- a/compiler/rustc_codegen_llvm/src/common.rs
+++ b/compiler/rustc_codegen_llvm/src/common.rs
@@ -7,7 +7,8 @@ use rustc_abi::{AddressSpace, HasDataLayout};
 use rustc_ast::Mutability;
 use rustc_codegen_ssa::common::TypeKind;
 use rustc_codegen_ssa::traits::*;
-use rustc_data_structures::stable_hasher::{Hash128, HashStable, StableHasher};
+use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
+use rustc_hashes::Hash128;
 use rustc_hir::def_id::DefId;
 use rustc_middle::bug;
 use rustc_middle::mir::interpret::{ConstAllocation, GlobalAlloc, Scalar};
@@ -76,7 +77,7 @@ impl<'ll> Funclet<'ll> {
     }
 
     pub(crate) fn bundle(&self) -> &llvm::OperandBundle<'ll> {
-        &self.operand
+        self.operand.raw()
     }
 }
 
diff --git a/compiler/rustc_codegen_llvm/src/consts.rs b/compiler/rustc_codegen_llvm/src/consts.rs
index 4a5491ec7a1..330e8a8f406 100644
--- a/compiler/rustc_codegen_llvm/src/consts.rs
+++ b/compiler/rustc_codegen_llvm/src/consts.rs
@@ -336,12 +336,7 @@ impl<'ll> CodegenCx<'ll, '_> {
             llvm::set_thread_local_mode(g, self.tls_model);
         }
 
-        let dso_local = self.should_assume_dso_local(g, true);
-        if dso_local {
-            unsafe {
-                llvm::LLVMRustSetDSOLocal(g, true);
-            }
-        }
+        let dso_local = self.assume_dso_local(g, true);
 
         if !def_id.is_local() {
             let needs_dll_storage_attr = self.use_dll_storage_attrs
@@ -375,9 +370,7 @@ impl<'ll> CodegenCx<'ll, '_> {
                 // is actually present in the current crate. We can find out via the
                 // is_codegened_item query.
                 if !self.tcx.is_codegened_item(def_id) {
-                    unsafe {
-                        llvm::LLVMSetDLLStorageClass(g, llvm::DLLStorageClass::DllImport);
-                    }
+                    llvm::set_dllimport_storage_class(g);
                 }
             }
         }
@@ -387,9 +380,7 @@ impl<'ll> CodegenCx<'ll, '_> {
             && library.kind.is_dllimport()
         {
             // For foreign (native) libs we know the exact storage type to use.
-            unsafe {
-                llvm::LLVMSetDLLStorageClass(g, llvm::DLLStorageClass::DllImport);
-            }
+            llvm::set_dllimport_storage_class(g);
         }
 
         self.instances.borrow_mut().insert(instance, g);
@@ -460,9 +451,7 @@ impl<'ll> CodegenCx<'ll, '_> {
             set_global_alignment(self, g, alloc.align);
             llvm::set_initializer(g, v);
 
-            if self.should_assume_dso_local(g, true) {
-                llvm::LLVMRustSetDSOLocal(g, true);
-            }
+            self.assume_dso_local(g, true);
 
             // Forward the allocation's mutability (picked by the const interner) to LLVM.
             if alloc.mutability.is_not() {
diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs
index 7fe527a4c07..ed8426ae197 100644
--- a/compiler/rustc_codegen_llvm/src/context.rs
+++ b/compiler/rustc_codegen_llvm/src/context.rs
@@ -194,12 +194,18 @@ pub(crate) unsafe fn create_module<'ll>(
             target_data_layout = target_data_layout.replace("-i128:128", "");
         }
     }
+    if llvm_version < (21, 0, 0) {
+        if sess.target.arch == "nvptx64" {
+            // LLVM 21 updated the default layout on nvptx: https://github.com/llvm/llvm-project/pull/124961
+            target_data_layout = target_data_layout.replace("e-p6:32:32-i64", "e-i64");
+        }
+    }
 
     // Ensure the data-layout values hardcoded remain the defaults.
     {
         let tm = crate::back::write::create_informational_target_machine(tcx.sess, false);
         unsafe {
-            llvm::LLVMRustSetDataLayoutFromTargetMachine(llmod, &tm);
+            llvm::LLVMRustSetDataLayoutFromTargetMachine(llmod, tm.raw());
         }
 
         let llvm_data_layout = unsafe { llvm::LLVMGetDataLayoutStr(llmod) };
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs b/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs
index 54c5d445f66..4ffe551df09 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs
@@ -87,7 +87,7 @@ pub(crate) fn get_or_insert_gdb_debug_scripts_section_global<'ll>(
 
 pub(crate) fn needs_gdb_debug_scripts_section(cx: &CodegenCx<'_, '_>) -> bool {
     let omit_gdb_pretty_printer_section =
-        attr::contains_name(cx.tcx.hir().krate_attrs(), sym::omit_gdb_pretty_printer_section);
+        attr::contains_name(cx.tcx.hir_krate_attrs(), sym::omit_gdb_pretty_printer_section);
 
     // To ensure the section `__rustc_debug_gdb_scripts_section__` will not create
     // ODR violations at link time, this section will not be emitted for rlibs since
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
index 59c3fe635d0..98d59f5a8ae 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
@@ -11,7 +11,9 @@ use rustc_codegen_ssa::traits::*;
 use rustc_hir::def::{CtorKind, DefKind};
 use rustc_hir::def_id::{DefId, LOCAL_CRATE};
 use rustc_middle::bug;
-use rustc_middle::ty::layout::{HasTypingEnv, LayoutOf, TyAndLayout};
+use rustc_middle::ty::layout::{
+    HasTypingEnv, LayoutOf, TyAndLayout, WIDE_PTR_ADDR, WIDE_PTR_EXTRA,
+};
 use rustc_middle::ty::{
     self, AdtKind, CoroutineArgsExt, ExistentialTraitRef, Instance, Ty, TyCtxt, Visibility,
 };
@@ -34,12 +36,12 @@ use crate::common::{AsCCharPtr, CodegenCx};
 use crate::debuginfo::dwarf_const;
 use crate::debuginfo::metadata::type_map::build_type_with_children;
 use crate::debuginfo::utils::{WidePtrKind, wide_pointer_kind};
+use crate::llvm;
 use crate::llvm::debuginfo::{
     DIDescriptor, DIFile, DIFlags, DILexicalBlock, DIScope, DIType, DebugEmissionKind,
     DebugNameTableKind,
 };
 use crate::value::Value;
-use crate::{abi, llvm};
 
 impl PartialEq for llvm::Metadata {
     fn eq(&self, other: &Self) -> bool {
@@ -211,16 +213,16 @@ fn build_pointer_or_reference_di_node<'ll, 'tcx>(
                     };
 
                     let layout = cx.layout_of(layout_type);
-                    let addr_field = layout.field(cx, abi::WIDE_PTR_ADDR);
-                    let extra_field = layout.field(cx, abi::WIDE_PTR_EXTRA);
+                    let addr_field = layout.field(cx, WIDE_PTR_ADDR);
+                    let extra_field = layout.field(cx, WIDE_PTR_EXTRA);
 
                     let (addr_field_name, extra_field_name) = match wide_pointer_kind {
                         WidePtrKind::Dyn => ("pointer", "vtable"),
                         WidePtrKind::Slice => ("data_ptr", "length"),
                     };
 
-                    assert_eq!(abi::WIDE_PTR_ADDR, 0);
-                    assert_eq!(abi::WIDE_PTR_EXTRA, 1);
+                    assert_eq!(WIDE_PTR_ADDR, 0);
+                    assert_eq!(WIDE_PTR_EXTRA, 1);
 
                     // The data pointer type is a regular, thin pointer, regardless of whether this
                     // is a slice or a trait object.
@@ -242,7 +244,7 @@ fn build_pointer_or_reference_di_node<'ll, 'tcx>(
                             owner,
                             addr_field_name,
                             (addr_field.size, addr_field.align.abi),
-                            layout.fields.offset(abi::WIDE_PTR_ADDR),
+                            layout.fields.offset(WIDE_PTR_ADDR),
                             DIFlags::FlagZero,
                             data_ptr_type_di_node,
                             None,
@@ -252,7 +254,7 @@ fn build_pointer_or_reference_di_node<'ll, 'tcx>(
                             owner,
                             extra_field_name,
                             (extra_field.size, extra_field.align.abi),
-                            layout.fields.offset(abi::WIDE_PTR_EXTRA),
+                            layout.fields.offset(WIDE_PTR_EXTRA),
                             DIFlags::FlagZero,
                             type_di_node(cx, extra_field.ty),
                             None,
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
index 17f2d5f4e73..10819a53b1d 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
@@ -22,6 +22,7 @@ use rustc_session::config::{self, DebugInfo};
 use rustc_span::{
     BytePos, Pos, SourceFile, SourceFileAndLine, SourceFileHash, Span, StableSourceFileId, Symbol,
 };
+use rustc_target::callconv::FnAbi;
 use rustc_target::spec::DebuginfoKind;
 use smallvec::SmallVec;
 use tracing::debug;
@@ -29,7 +30,6 @@ use tracing::debug;
 use self::metadata::{UNKNOWN_COLUMN_NUMBER, UNKNOWN_LINE_NUMBER, file_metadata, type_di_node};
 use self::namespace::mangled_name_of_instance;
 use self::utils::{DIB, create_DIArray, is_node_local_to_unit};
-use crate::abi::FnAbi;
 use crate::builder::Builder;
 use crate::common::{AsCCharPtr, CodegenCx};
 use crate::llvm;
diff --git a/compiler/rustc_codegen_llvm/src/declare.rs b/compiler/rustc_codegen_llvm/src/declare.rs
index cebceef1b93..e79662ebc64 100644
--- a/compiler/rustc_codegen_llvm/src/declare.rs
+++ b/compiler/rustc_codegen_llvm/src/declare.rs
@@ -16,10 +16,11 @@ use rustc_codegen_ssa::traits::TypeMembershipCodegenMethods;
 use rustc_data_structures::fx::FxIndexSet;
 use rustc_middle::ty::{Instance, Ty};
 use rustc_sanitizers::{cfi, kcfi};
+use rustc_target::callconv::FnAbi;
 use smallvec::SmallVec;
 use tracing::debug;
 
-use crate::abi::{FnAbi, FnAbiLlvmExt};
+use crate::abi::FnAbiLlvmExt;
 use crate::common::AsCCharPtr;
 use crate::context::{CodegenCx, SimpleCx};
 use crate::llvm::AttributePlace::Function;
diff --git a/compiler/rustc_codegen_llvm/src/errors.rs b/compiler/rustc_codegen_llvm/src/errors.rs
index 97f49256165..4c5a78ca74f 100644
--- a/compiler/rustc_codegen_llvm/src/errors.rs
+++ b/compiler/rustc_codegen_llvm/src/errors.rs
@@ -92,10 +92,13 @@ impl<G: EmissionGuarantee> Diagnostic<'_, G> for ParseTargetMachineConfig<'_> {
 
 #[derive(Diagnostic)]
 #[diag(codegen_llvm_autodiff_without_lto)]
-#[note]
 pub(crate) struct AutoDiffWithoutLTO;
 
 #[derive(Diagnostic)]
+#[diag(codegen_llvm_autodiff_without_enable)]
+pub(crate) struct AutoDiffWithoutEnable;
+
+#[derive(Diagnostic)]
 #[diag(codegen_llvm_lto_disallowed)]
 pub(crate) struct LtoDisallowed;
 
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
index 8b976885904..1169b9c067d 100644
--- a/compiler/rustc_codegen_llvm/src/intrinsic.rs
+++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -1,7 +1,7 @@
 use std::assert_matches::assert_matches;
 use std::cmp::Ordering;
 
-use rustc_abi::{self as abi, Align, Float, HasDataLayout, Primitive, Size};
+use rustc_abi::{Align, BackendRepr, ExternAbi, Float, HasDataLayout, Primitive, Size};
 use rustc_codegen_ssa::base::{compare_simd_types, wants_msvc_seh, wants_wasm_eh};
 use rustc_codegen_ssa::common::{IntPredicate, TypeKind};
 use rustc_codegen_ssa::errors::{ExpectedPointerMutability, InvalidMonomorphization};
@@ -14,10 +14,11 @@ use rustc_middle::ty::layout::{FnAbiOf, HasTyCtxt, HasTypingEnv, LayoutOf};
 use rustc_middle::ty::{self, GenericArgsRef, Ty};
 use rustc_middle::{bug, span_bug};
 use rustc_span::{Span, Symbol, sym};
+use rustc_target::callconv::{FnAbi, PassMode};
 use rustc_target::spec::{HasTargetSpec, PanicStrategy};
 use tracing::debug;
 
-use crate::abi::{ExternAbi, FnAbi, FnAbiLlvmExt, LlvmType, PassMode};
+use crate::abi::{FnAbiLlvmExt, LlvmType};
 use crate::builder::Builder;
 use crate::context::CodegenCx;
 use crate::llvm::{self, Metadata};
@@ -126,15 +127,14 @@ fn get_simple_intrinsic<'ll>(
         sym::truncf64 => "llvm.trunc.f64",
         sym::truncf128 => "llvm.trunc.f128",
 
-        sym::rintf16 => "llvm.rint.f16",
-        sym::rintf32 => "llvm.rint.f32",
-        sym::rintf64 => "llvm.rint.f64",
-        sym::rintf128 => "llvm.rint.f128",
-
-        sym::nearbyintf16 => "llvm.nearbyint.f16",
-        sym::nearbyintf32 => "llvm.nearbyint.f32",
-        sym::nearbyintf64 => "llvm.nearbyint.f64",
-        sym::nearbyintf128 => "llvm.nearbyint.f128",
+        // We could use any of `rint`, `nearbyint`, or `roundeven`
+        // for this -- they are all identical in semantics when
+        // assuming the default FP environment.
+        // `rint` is what we used for $forever.
+        sym::round_ties_even_f16 => "llvm.rint.f16",
+        sym::round_ties_even_f32 => "llvm.rint.f32",
+        sym::round_ties_even_f64 => "llvm.rint.f64",
+        sym::round_ties_even_f128 => "llvm.rint.f128",
 
         sym::roundf16 => "llvm.round.f16",
         sym::roundf32 => "llvm.round.f32",
@@ -143,11 +143,6 @@ fn get_simple_intrinsic<'ll>(
 
         sym::ptr_mask => "llvm.ptrmask",
 
-        sym::roundevenf16 => "llvm.roundeven.f16",
-        sym::roundevenf32 => "llvm.roundeven.f32",
-        sym::roundevenf64 => "llvm.roundeven.f64",
-        sym::roundevenf128 => "llvm.roundeven.f128",
-
         _ => return None,
     };
     Some(cx.get_intrinsic(llvm_name))
@@ -257,7 +252,7 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
             }
             sym::va_arg => {
                 match fn_abi.ret.layout.backend_repr {
-                    abi::BackendRepr::Scalar(scalar) => {
+                    BackendRepr::Scalar(scalar) => {
                         match scalar.primitive() {
                             Primitive::Int(..) => {
                                 if self.cx().size_of(ret_ty).bytes() < 4 {
@@ -470,12 +465,12 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
             }
 
             sym::raw_eq => {
-                use abi::BackendRepr::*;
+                use BackendRepr::*;
                 let tp_ty = fn_args.type_at(0);
                 let layout = self.layout_of(tp_ty).layout;
                 let use_integer_compare = match layout.backend_repr() {
                     Scalar(_) | ScalarPair(_, _) => true,
-                    Uninhabited | Vector { .. } => false,
+                    Vector { .. } => false,
                     Memory { .. } => {
                         // For rusty ABIs, small aggregates are actually passed
                         // as `RegKind::Integer` (see `FnAbi::adjust_for_abi`),
@@ -582,8 +577,7 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                 }
 
                 let llret_ty = if ret_ty.is_simd()
-                    && let abi::BackendRepr::Memory { .. } =
-                        self.layout_of(ret_ty).layout.backend_repr
+                    && let BackendRepr::Memory { .. } = self.layout_of(ret_ty).layout.backend_repr
                 {
                     let (size, elem_ty) = ret_ty.simd_size_and_type(self.tcx());
                     let elem_ll_ty = match elem_ty.kind() {
@@ -1335,7 +1329,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         ));
     }
 
-    if name == sym::simd_shuffle_generic {
+    if name == sym::simd_shuffle_const_generic {
         let idx = fn_args[2].expect_const().to_value().valtree.unwrap_branch();
         let n = idx.len() as u64;
 
@@ -1587,8 +1581,6 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             sym::simd_floor => ("floor", bx.type_func(&[vec_ty], vec_ty)),
             sym::simd_fma => ("fma", bx.type_func(&[vec_ty, vec_ty, vec_ty], vec_ty)),
             sym::simd_relaxed_fma => ("fmuladd", bx.type_func(&[vec_ty, vec_ty, vec_ty], vec_ty)),
-            sym::simd_fpowi => ("powi", bx.type_func(&[vec_ty, bx.type_i32()], vec_ty)),
-            sym::simd_fpow => ("pow", bx.type_func(&[vec_ty, vec_ty], vec_ty)),
             sym::simd_fsin => ("sin", bx.type_func(&[vec_ty], vec_ty)),
             sym::simd_fsqrt => ("sqrt", bx.type_func(&[vec_ty], vec_ty)),
             sym::simd_round => ("round", bx.type_func(&[vec_ty], vec_ty)),
@@ -1621,8 +1613,6 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             | sym::simd_flog
             | sym::simd_floor
             | sym::simd_fma
-            | sym::simd_fpow
-            | sym::simd_fpowi
             | sym::simd_fsin
             | sym::simd_fsqrt
             | sym::simd_relaxed_fma
diff --git a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
index 39bac13a968..3c2c6964a3d 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
@@ -7,7 +7,7 @@ use super::ffi::{BasicBlock, Metadata, Module, Type, Value};
 use crate::llvm::Bool;
 
 #[link(name = "llvm-wrapper", kind = "static")]
-extern "C" {
+unsafe extern "C" {
     // Enzyme
     pub(crate) fn LLVMRustHasMetadata(I: &Value, KindID: c_uint) -> bool;
     pub(crate) fn LLVMRustEraseInstUntilInclusive(BB: &BasicBlock, I: &Value);
@@ -18,7 +18,7 @@ extern "C" {
     pub(crate) fn LLVMRustVerifyFunction(V: &Value, action: LLVMRustVerifierFailureAction) -> Bool;
 }
 
-extern "C" {
+unsafe extern "C" {
     // Enzyme
     pub(crate) fn LLVMDumpModule(M: &Module);
     pub(crate) fn LLVMDumpValue(V: &Value);
@@ -35,3 +35,97 @@ pub enum LLVMRustVerifierFailureAction {
     LLVMPrintMessageAction = 1,
     LLVMReturnStatusAction = 2,
 }
+
+#[cfg(llvm_enzyme)]
+pub use self::Enzyme_AD::*;
+
+#[cfg(llvm_enzyme)]
+pub mod Enzyme_AD {
+    use libc::c_void;
+    extern "C" {
+        pub fn EnzymeSetCLBool(arg1: *mut ::std::os::raw::c_void, arg2: u8);
+    }
+    extern "C" {
+        static mut EnzymePrintPerf: c_void;
+        static mut EnzymePrintActivity: c_void;
+        static mut EnzymePrintType: c_void;
+        static mut EnzymePrint: c_void;
+        static mut EnzymeStrictAliasing: c_void;
+        static mut looseTypeAnalysis: c_void;
+        static mut EnzymeInline: c_void;
+        static mut RustTypeRules: c_void;
+    }
+    pub fn set_print_perf(print: bool) {
+        unsafe {
+            EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymePrintPerf), print as u8);
+        }
+    }
+    pub fn set_print_activity(print: bool) {
+        unsafe {
+            EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymePrintActivity), print as u8);
+        }
+    }
+    pub fn set_print_type(print: bool) {
+        unsafe {
+            EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymePrintType), print as u8);
+        }
+    }
+    pub fn set_print(print: bool) {
+        unsafe {
+            EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymePrint), print as u8);
+        }
+    }
+    pub fn set_strict_aliasing(strict: bool) {
+        unsafe {
+            EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymeStrictAliasing), strict as u8);
+        }
+    }
+    pub fn set_loose_types(loose: bool) {
+        unsafe {
+            EnzymeSetCLBool(std::ptr::addr_of_mut!(looseTypeAnalysis), loose as u8);
+        }
+    }
+    pub fn set_inline(val: bool) {
+        unsafe {
+            EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymeInline), val as u8);
+        }
+    }
+    pub fn set_rust_rules(val: bool) {
+        unsafe {
+            EnzymeSetCLBool(std::ptr::addr_of_mut!(RustTypeRules), val as u8);
+        }
+    }
+}
+
+#[cfg(not(llvm_enzyme))]
+pub use self::Fallback_AD::*;
+
+#[cfg(not(llvm_enzyme))]
+pub mod Fallback_AD {
+    #![allow(unused_variables)]
+
+    pub fn set_inline(val: bool) {
+        unimplemented!()
+    }
+    pub fn set_print_perf(print: bool) {
+        unimplemented!()
+    }
+    pub fn set_print_activity(print: bool) {
+        unimplemented!()
+    }
+    pub fn set_print_type(print: bool) {
+        unimplemented!()
+    }
+    pub fn set_print(print: bool) {
+        unimplemented!()
+    }
+    pub fn set_strict_aliasing(strict: bool) {
+        unimplemented!()
+    }
+    pub fn set_loose_types(loose: bool) {
+        unimplemented!()
+    }
+    pub fn set_rust_rules(val: bool) {
+        unimplemented!()
+    }
+}
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
index 618fdb09d4d..da91e6edbcf 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -954,6 +954,17 @@ bitflags! {
     }
 }
 
+// These values **must** match with LLVMGEPNoWrapFlags
+bitflags! {
+    #[repr(transparent)]
+    #[derive(Default)]
+    pub struct GEPNoWrapFlags : c_uint {
+        const InBounds = 1 << 0;
+        const NUSW = 1 << 1;
+        const NUW = 1 << 2;
+    }
+}
+
 unsafe extern "C" {
     pub type ModuleBuffer;
 }
@@ -1430,6 +1441,8 @@ unsafe extern "C" {
 
     // Extra flags on arithmetic
     pub(crate) fn LLVMSetIsDisjoint(Instr: &Value, IsDisjoint: Bool);
+    pub(crate) fn LLVMSetNUW(ArithInst: &Value, HasNUW: Bool);
+    pub(crate) fn LLVMSetNSW(ArithInst: &Value, HasNSW: Bool);
 
     // Memory
     pub(crate) fn LLVMBuildAlloca<'a>(
@@ -1452,21 +1465,14 @@ unsafe extern "C" {
 
     pub(crate) fn LLVMBuildStore<'a>(B: &Builder<'a>, Val: &'a Value, Ptr: &'a Value) -> &'a Value;
 
-    pub(crate) fn LLVMBuildGEP2<'a>(
-        B: &Builder<'a>,
-        Ty: &'a Type,
-        Pointer: &'a Value,
-        Indices: *const &'a Value,
-        NumIndices: c_uint,
-        Name: *const c_char,
-    ) -> &'a Value;
-    pub(crate) fn LLVMBuildInBoundsGEP2<'a>(
+    pub(crate) fn LLVMBuildGEPWithNoWrapFlags<'a>(
         B: &Builder<'a>,
         Ty: &'a Type,
         Pointer: &'a Value,
         Indices: *const &'a Value,
         NumIndices: c_uint,
         Name: *const c_char,
+        Flags: GEPNoWrapFlags,
     ) -> &'a Value;
 
     // Casts
diff --git a/compiler/rustc_codegen_llvm/src/llvm/mod.rs b/compiler/rustc_codegen_llvm/src/llvm/mod.rs
index efc9cf2ef69..a36226b25a2 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/mod.rs
@@ -1,7 +1,6 @@
 #![allow(non_snake_case)]
 
 use std::ffi::{CStr, CString};
-use std::ops::Deref;
 use std::ptr;
 use std::str::FromStr;
 use std::string::FromUtf8Error;
@@ -355,6 +354,16 @@ impl<'a> OperandBundleOwned<'a> {
         };
         OperandBundleOwned { raw: ptr::NonNull::new(raw).unwrap() }
     }
+
+    /// Returns inner `OperandBundle` type.
+    ///
+    /// This could be a `Deref` implementation, but `OperandBundle` contains an extern type and
+    /// `Deref::Target: ?Sized`.
+    pub(crate) fn raw(&self) -> &OperandBundle<'a> {
+        // SAFETY: The returned reference is opaque and can only used for FFI.
+        // It is valid for as long as `&self` is.
+        unsafe { self.raw.as_ref() }
+    }
 }
 
 impl Drop for OperandBundleOwned<'_> {
@@ -365,16 +374,6 @@ impl Drop for OperandBundleOwned<'_> {
     }
 }
 
-impl<'a> Deref for OperandBundleOwned<'a> {
-    type Target = OperandBundle<'a>;
-
-    fn deref(&self) -> &Self::Target {
-        // SAFETY: The returned reference is opaque and can only used for FFI.
-        // It is valid for as long as `&self` is.
-        unsafe { self.raw.as_ref() }
-    }
-}
-
 pub(crate) fn add_module_flag_u32(
     module: &Module,
     merge_behavior: ModuleFlagMergeBehavior,
@@ -403,3 +402,15 @@ pub(crate) fn add_module_flag_str(
         );
     }
 }
+
+pub(crate) fn set_dllimport_storage_class<'ll>(v: &'ll Value) {
+    unsafe {
+        LLVMSetDLLStorageClass(v, DLLStorageClass::DllImport);
+    }
+}
+
+pub(crate) fn set_dso_local<'ll>(v: &'ll Value) {
+    unsafe {
+        LLVMRustSetDSOLocal(v, true);
+    }
+}
diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs
index b4b5d6a5b19..5cc4f4ab9e6 100644
--- a/compiler/rustc_codegen_llvm/src/llvm_util.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs
@@ -281,6 +281,8 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
         ("riscv32" | "riscv64", "zaamo") if get_version().0 < 19 => None,
         ("riscv32" | "riscv64", "zabha") if get_version().0 < 19 => None,
         ("riscv32" | "riscv64", "zalrsc") if get_version().0 < 19 => None,
+        ("riscv32" | "riscv64", "zama16b") if get_version().0 < 19 => None,
+        ("riscv32" | "riscv64", "zacas") if get_version().0 < 20 => None,
         // Enable the evex512 target feature if an avx512 target feature is enabled.
         ("x86", s) if s.starts_with("avx512") => {
             Some(LLVMFeature::with_dependency(s, TargetFeatureFoldStrength::EnableOnly("evex512")))
@@ -329,7 +331,8 @@ pub(crate) fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<S
                 if let Some(feat) = to_llvm_features(sess, feature) {
                     for llvm_feature in feat {
                         let cstr = SmallCStr::new(llvm_feature);
-                        if !unsafe { llvm::LLVMRustHasFeature(&target_machine, cstr.as_ptr()) } {
+                        if !unsafe { llvm::LLVMRustHasFeature(target_machine.raw(), cstr.as_ptr()) }
+                        {
                             return false;
                         }
                     }
@@ -451,8 +454,8 @@ pub(crate) fn print(req: &PrintRequest, out: &mut String, sess: &Session) {
     require_inited();
     let tm = create_informational_target_machine(sess, false);
     match req.kind {
-        PrintKind::TargetCPUs => print_target_cpus(sess, &tm, out),
-        PrintKind::TargetFeatures => print_target_features(sess, &tm, out),
+        PrintKind::TargetCPUs => print_target_cpus(sess, tm.raw(), out),
+        PrintKind::TargetFeatures => print_target_features(sess, tm.raw(), out),
         _ => bug!("rustc_codegen_llvm can't handle print request: {:?}", req),
     }
 }
diff --git a/compiler/rustc_codegen_llvm/src/mono_item.rs b/compiler/rustc_codegen_llvm/src/mono_item.rs
index 70edee21bd6..a64627eaf59 100644
--- a/compiler/rustc_codegen_llvm/src/mono_item.rs
+++ b/compiler/rustc_codegen_llvm/src/mono_item.rs
@@ -38,11 +38,7 @@ impl<'tcx> PreDefineCodegenMethods<'tcx> for CodegenCx<'_, 'tcx> {
 
         llvm::set_linkage(g, base::linkage_to_llvm(linkage));
         llvm::set_visibility(g, base::visibility_to_llvm(visibility));
-        unsafe {
-            if self.should_assume_dso_local(g, false) {
-                llvm::LLVMRustSetDSOLocal(g, true);
-            }
-        }
+        self.assume_dso_local(g, false);
 
         self.instances.borrow_mut().insert(instance, g);
     }
@@ -79,9 +75,7 @@ impl<'tcx> PreDefineCodegenMethods<'tcx> for CodegenCx<'_, 'tcx> {
 
         debug!("predefine_fn: instance = {:?}", instance);
 
-        if self.should_assume_dso_local(lldecl, false) {
-            unsafe { llvm::LLVMRustSetDSOLocal(lldecl, true) };
-        }
+        self.assume_dso_local(lldecl, false);
 
         self.instances.borrow_mut().insert(instance, lldecl);
     }
@@ -90,11 +84,16 @@ impl<'tcx> PreDefineCodegenMethods<'tcx> for CodegenCx<'_, 'tcx> {
 impl CodegenCx<'_, '_> {
     /// Whether a definition or declaration can be assumed to be local to a group of
     /// libraries that form a single DSO or executable.
-    pub(crate) fn should_assume_dso_local(
-        &self,
-        llval: &llvm::Value,
-        is_declaration: bool,
-    ) -> bool {
+    /// Marks the local as DSO if so.
+    pub(crate) fn assume_dso_local(&self, llval: &llvm::Value, is_declaration: bool) -> bool {
+        let assume = self.should_assume_dso_local(llval, is_declaration);
+        if assume {
+            llvm::set_dso_local(llval);
+        }
+        assume
+    }
+
+    fn should_assume_dso_local(&self, llval: &llvm::Value, is_declaration: bool) -> bool {
         let linkage = llvm::get_linkage(llval);
         let visibility = llvm::get_visibility(llval);
 
diff --git a/compiler/rustc_codegen_llvm/src/type_of.rs b/compiler/rustc_codegen_llvm/src/type_of.rs
index b0b6da869da..ba01fbff385 100644
--- a/compiler/rustc_codegen_llvm/src/type_of.rs
+++ b/compiler/rustc_codegen_llvm/src/type_of.rs
@@ -23,7 +23,7 @@ fn uncached_llvm_type<'a, 'tcx>(
             let element = layout.scalar_llvm_type_at(cx, element);
             return cx.type_vector(element, count);
         }
-        BackendRepr::Uninhabited | BackendRepr::Memory { .. } | BackendRepr::ScalarPair(..) => {}
+        BackendRepr::Memory { .. } | BackendRepr::ScalarPair(..) => {}
     }
 
     let name = match layout.ty.kind() {
@@ -172,19 +172,16 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> {
     fn is_llvm_immediate(&self) -> bool {
         match self.backend_repr {
             BackendRepr::Scalar(_) | BackendRepr::Vector { .. } => true,
-            BackendRepr::ScalarPair(..) | BackendRepr::Uninhabited | BackendRepr::Memory { .. } => {
-                false
-            }
+            BackendRepr::ScalarPair(..) | BackendRepr::Memory { .. } => false,
         }
     }
 
     fn is_llvm_scalar_pair(&self) -> bool {
         match self.backend_repr {
             BackendRepr::ScalarPair(..) => true,
-            BackendRepr::Uninhabited
-            | BackendRepr::Scalar(_)
-            | BackendRepr::Vector { .. }
-            | BackendRepr::Memory { .. } => false,
+            BackendRepr::Scalar(_) | BackendRepr::Vector { .. } | BackendRepr::Memory { .. } => {
+                false
+            }
         }
     }