about summary refs log tree commit diff
path: root/compiler/rustc_codegen_llvm/src
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_codegen_llvm/src')
-rw-r--r--compiler/rustc_codegen_llvm/src/asm.rs120
-rw-r--r--compiler/rustc_codegen_llvm/src/attributes.rs17
-rw-r--r--compiler/rustc_codegen_llvm/src/back/lto.rs67
-rw-r--r--compiler/rustc_codegen_llvm/src/back/write.rs20
-rw-r--r--compiler/rustc_codegen_llvm/src/base.rs6
-rw-r--r--compiler/rustc_codegen_llvm/src/builder.rs14
-rw-r--r--compiler/rustc_codegen_llvm/src/builder/autodiff.rs45
-rw-r--r--compiler/rustc_codegen_llvm/src/common.rs6
-rw-r--r--compiler/rustc_codegen_llvm/src/consts.rs7
-rw-r--r--compiler/rustc_codegen_llvm/src/context.rs26
-rw-r--r--compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs2
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/create_scope_map.rs4
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs6
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs36
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs3
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/mod.rs1
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/native.rs6
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs48
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/mod.rs36
-rw-r--r--compiler/rustc_codegen_llvm/src/intrinsic.rs70
-rw-r--r--compiler/rustc_codegen_llvm/src/lib.rs9
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs13
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm/ffi.rs46
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm/mod.rs53
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm_util.rs143
-rw-r--r--compiler/rustc_codegen_llvm/src/type_.rs4
26 files changed, 569 insertions, 239 deletions
diff --git a/compiler/rustc_codegen_llvm/src/asm.rs b/compiler/rustc_codegen_llvm/src/asm.rs
index 88daa025740..9e3893d5314 100644
--- a/compiler/rustc_codegen_llvm/src/asm.rs
+++ b/compiler/rustc_codegen_llvm/src/asm.rs
@@ -14,7 +14,7 @@ use smallvec::SmallVec;
 use tracing::debug;
 
 use crate::builder::Builder;
-use crate::common::{AsCCharPtr, Funclet};
+use crate::common::Funclet;
 use crate::context::CodegenCx;
 use crate::type_::Type;
 use crate::type_of::LayoutLlvmExt;
@@ -376,7 +376,7 @@ impl<'ll, 'tcx> AsmBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
 
 impl<'tcx> AsmCodegenMethods<'tcx> for CodegenCx<'_, 'tcx> {
     fn codegen_global_asm(
-        &self,
+        &mut self,
         template: &[InlineAsmTemplatePiece],
         operands: &[GlobalAsmOperandRef<'tcx>],
         options: InlineAsmOptions,
@@ -435,13 +435,7 @@ impl<'tcx> AsmCodegenMethods<'tcx> for CodegenCx<'_, 'tcx> {
             template_str.push_str("\n.att_syntax\n");
         }
 
-        unsafe {
-            llvm::LLVMAppendModuleInlineAsm(
-                self.llmod,
-                template_str.as_c_char_ptr(),
-                template_str.len(),
-            );
-        }
+        llvm::append_module_inline_asm(self.llmod, template_str.as_bytes());
     }
 
     fn mangled_name(&self, instance: Instance<'tcx>) -> String {
@@ -482,67 +476,67 @@ pub(crate) fn inline_asm_call<'ll>(
 
     debug!("Asm Output Type: {:?}", output);
     let fty = bx.cx.type_func(&argtys, output);
+
     // Ask LLVM to verify that the constraints are well-formed.
-    let constraints_ok =
-        unsafe { llvm::LLVMRustInlineAsmVerify(fty, cons.as_c_char_ptr(), cons.len()) };
+    let constraints_ok = unsafe { llvm::LLVMRustInlineAsmVerify(fty, cons.as_ptr(), cons.len()) };
     debug!("constraint verification result: {:?}", constraints_ok);
-    if constraints_ok {
-        let v = unsafe {
-            llvm::LLVMRustInlineAsm(
-                fty,
-                asm.as_c_char_ptr(),
-                asm.len(),
-                cons.as_c_char_ptr(),
-                cons.len(),
-                volatile,
-                alignstack,
-                dia,
-                can_throw,
-            )
-        };
+    if !constraints_ok {
+        // LLVM has detected an issue with our constraints, so bail out.
+        return None;
+    }
 
-        let call = if !labels.is_empty() {
-            assert!(catch_funclet.is_none());
-            bx.callbr(fty, None, None, v, inputs, dest.unwrap(), labels, None, None)
-        } else if let Some((catch, funclet)) = catch_funclet {
-            bx.invoke(fty, None, None, v, inputs, dest.unwrap(), catch, funclet, None)
-        } else {
-            bx.call(fty, None, None, v, inputs, None, None)
-        };
+    let v = unsafe {
+        llvm::LLVMGetInlineAsm(
+            fty,
+            asm.as_ptr(),
+            asm.len(),
+            cons.as_ptr(),
+            cons.len(),
+            volatile,
+            alignstack,
+            dia,
+            can_throw,
+        )
+    };
 
-        // Store mark in a metadata node so we can map LLVM errors
-        // back to source locations. See #17552.
-        let key = "srcloc";
-        let kind = bx.get_md_kind_id(key);
+    let call = if !labels.is_empty() {
+        assert!(catch_funclet.is_none());
+        bx.callbr(fty, None, None, v, inputs, dest.unwrap(), labels, None, None)
+    } else if let Some((catch, funclet)) = catch_funclet {
+        bx.invoke(fty, None, None, v, inputs, dest.unwrap(), catch, funclet, None)
+    } else {
+        bx.call(fty, None, None, v, inputs, None, None)
+    };
 
-        // `srcloc` contains one 64-bit integer for each line of assembly code,
-        // where the lower 32 bits hold the lo byte position and the upper 32 bits
-        // hold the hi byte position.
-        let mut srcloc = vec![];
-        if dia == llvm::AsmDialect::Intel && line_spans.len() > 1 {
-            // LLVM inserts an extra line to add the ".intel_syntax", so add
-            // a dummy srcloc entry for it.
-            //
-            // Don't do this if we only have 1 line span since that may be
-            // due to the asm template string coming from a macro. LLVM will
-            // default to the first srcloc for lines that don't have an
-            // associated srcloc.
-            srcloc.push(llvm::LLVMValueAsMetadata(bx.const_u64(0)));
-        }
-        srcloc.extend(line_spans.iter().map(|span| {
-            llvm::LLVMValueAsMetadata(
-                bx.const_u64(u64::from(span.lo().to_u32()) | (u64::from(span.hi().to_u32()) << 32)),
-            )
-        }));
-        let md = unsafe { llvm::LLVMMDNodeInContext2(bx.llcx, srcloc.as_ptr(), srcloc.len()) };
-        let md = bx.get_metadata_value(md);
-        llvm::LLVMSetMetadata(call, kind, md);
+    // Store mark in a metadata node so we can map LLVM errors
+    // back to source locations. See #17552.
+    let key = "srcloc";
+    let kind = bx.get_md_kind_id(key);
 
-        Some(call)
-    } else {
-        // LLVM has detected an issue with our constraints, bail out
-        None
+    // `srcloc` contains one 64-bit integer for each line of assembly code,
+    // where the lower 32 bits hold the lo byte position and the upper 32 bits
+    // hold the hi byte position.
+    let mut srcloc = vec![];
+    if dia == llvm::AsmDialect::Intel && line_spans.len() > 1 {
+        // LLVM inserts an extra line to add the ".intel_syntax", so add
+        // a dummy srcloc entry for it.
+        //
+        // Don't do this if we only have 1 line span since that may be
+        // due to the asm template string coming from a macro. LLVM will
+        // default to the first srcloc for lines that don't have an
+        // associated srcloc.
+        srcloc.push(llvm::LLVMValueAsMetadata(bx.const_u64(0)));
     }
+    srcloc.extend(line_spans.iter().map(|span| {
+        llvm::LLVMValueAsMetadata(
+            bx.const_u64(u64::from(span.lo().to_u32()) | (u64::from(span.hi().to_u32()) << 32)),
+        )
+    }));
+    let md = unsafe { llvm::LLVMMDNodeInContext2(bx.llcx, srcloc.as_ptr(), srcloc.len()) };
+    let md = bx.get_metadata_value(md);
+    llvm::LLVMSetMetadata(call, kind, md);
+
+    Some(call)
 }
 
 /// If the register is an xmm/ymm/zmm register then return its index.
diff --git a/compiler/rustc_codegen_llvm/src/attributes.rs b/compiler/rustc_codegen_llvm/src/attributes.rs
index e8c42d16733..176fb72dfdc 100644
--- a/compiler/rustc_codegen_llvm/src/attributes.rs
+++ b/compiler/rustc_codegen_llvm/src/attributes.rs
@@ -1,5 +1,4 @@
 //! Set and unset common attributes on LLVM values.
-
 use rustc_attr_parsing::{InlineAttr, InstructionSetAttr, OptimizeAttr};
 use rustc_codegen_ssa::traits::*;
 use rustc_hir::def_id::DefId;
@@ -28,6 +27,22 @@ pub(crate) fn apply_to_callsite(callsite: &Value, idx: AttributePlace, attrs: &[
     }
 }
 
+pub(crate) fn has_attr(llfn: &Value, idx: AttributePlace, attr: AttributeKind) -> bool {
+    llvm::HasAttributeAtIndex(llfn, idx, attr)
+}
+
+pub(crate) fn has_string_attr(llfn: &Value, name: &str) -> bool {
+    llvm::HasStringAttribute(llfn, name)
+}
+
+pub(crate) fn remove_from_llfn(llfn: &Value, place: AttributePlace, kind: AttributeKind) {
+    llvm::RemoveRustEnumAttributeAtIndex(llfn, place, kind);
+}
+
+pub(crate) fn remove_string_attr_from_llfn(llfn: &Value, name: &str) {
+    llvm::RemoveStringAttrFromFn(llfn, name);
+}
+
 /// Get LLVM attribute for the provided inline heuristic.
 #[inline]
 fn inline_attr<'ll>(cx: &CodegenCx<'ll, '_>, inline: InlineAttr) -> Option<&'ll Attribute> {
diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs
index a8b49e9552c..cb329323f5d 100644
--- a/compiler/rustc_codegen_llvm/src/back/lto.rs
+++ b/compiler/rustc_codegen_llvm/src/back/lto.rs
@@ -28,8 +28,9 @@ use crate::back::write::{
 use crate::errors::{
     DynamicLinkingWithLTO, LlvmError, LtoBitcodeFromRlib, LtoDisallowed, LtoDylib, LtoProcMacro,
 };
+use crate::llvm::AttributePlace::Function;
 use crate::llvm::{self, build_string};
-use crate::{LlvmCodegenBackend, ModuleLlvm};
+use crate::{LlvmCodegenBackend, ModuleLlvm, SimpleCx, attributes};
 
 /// We keep track of the computed LTO cache keys from the previous
 /// session to determine which CGUs we can reuse.
@@ -41,7 +42,8 @@ fn crate_type_allows_lto(crate_type: CrateType) -> bool {
         | CrateType::Dylib
         | CrateType::Staticlib
         | CrateType::Cdylib
-        | CrateType::ProcMacro => true,
+        | CrateType::ProcMacro
+        | CrateType::Sdylib => true,
         CrateType::Rlib => false,
     }
 }
@@ -584,12 +586,10 @@ fn thin_lto(
     }
 }
 
-fn enable_autodiff_settings(ad: &[config::AutoDiff], module: &mut ModuleCodegen<ModuleLlvm>) {
+fn enable_autodiff_settings(ad: &[config::AutoDiff]) {
     for &val in ad {
+        // We intentionally don't use a wildcard, to not forget handling anything new.
         match val {
-            config::AutoDiff::PrintModBefore => {
-                unsafe { llvm::LLVMDumpModule(module.module_llvm.llmod()) };
-            }
             config::AutoDiff::PrintPerf => {
                 llvm::set_print_perf(true);
             }
@@ -603,17 +603,23 @@ fn enable_autodiff_settings(ad: &[config::AutoDiff], module: &mut ModuleCodegen<
                 llvm::set_inline(true);
             }
             config::AutoDiff::LooseTypes => {
-                llvm::set_loose_types(false);
+                llvm::set_loose_types(true);
             }
             config::AutoDiff::PrintSteps => {
                 llvm::set_print(true);
             }
-            // We handle this below
+            // We handle this in the PassWrapper.cpp
+            config::AutoDiff::PrintPasses => {}
+            // We handle this in the PassWrapper.cpp
+            config::AutoDiff::PrintModBefore => {}
+            // We handle this in the PassWrapper.cpp
             config::AutoDiff::PrintModAfter => {}
-            // We handle this below
+            // We handle this in the PassWrapper.cpp
             config::AutoDiff::PrintModFinal => {}
             // This is required and already checked
             config::AutoDiff::Enable => {}
+            // We handle this below
+            config::AutoDiff::NoPostopt => {}
         }
     }
     // This helps with handling enums for now.
@@ -647,27 +653,52 @@ pub(crate) fn run_pass_manager(
     // We then run the llvm_optimize function a second time, to optimize the code which we generated
     // in the enzyme differentiation pass.
     let enable_ad = config.autodiff.contains(&config::AutoDiff::Enable);
-    let stage =
-        if enable_ad { write::AutodiffStage::DuringAD } else { write::AutodiffStage::PostAD };
+    let stage = if thin {
+        write::AutodiffStage::PreAD
+    } else {
+        if enable_ad { write::AutodiffStage::DuringAD } else { write::AutodiffStage::PostAD }
+    };
 
     if enable_ad {
-        enable_autodiff_settings(&config.autodiff, module);
+        enable_autodiff_settings(&config.autodiff);
     }
 
     unsafe {
         write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage)?;
     }
 
-    if cfg!(llvm_enzyme) && enable_ad {
-        // This is the post-autodiff IR, mainly used for testing and educational purposes.
-        if config.autodiff.contains(&config::AutoDiff::PrintModAfter) {
-            unsafe { llvm::LLVMDumpModule(module.module_llvm.llmod()) };
+    if cfg!(llvm_enzyme) && enable_ad && !thin {
+        let cx =
+            SimpleCx::new(module.module_llvm.llmod(), &module.module_llvm.llcx, cgcx.pointer_size);
+
+        for function in cx.get_functions() {
+            let enzyme_marker = "enzyme_marker";
+            if attributes::has_string_attr(function, enzyme_marker) {
+                // Sanity check: Ensure 'noinline' is present before replacing it.
+                assert!(
+                    !attributes::has_attr(function, Function, llvm::AttributeKind::NoInline),
+                    "Expected __enzyme function to have 'noinline' before adding 'alwaysinline'"
+                );
+
+                attributes::remove_from_llfn(function, Function, llvm::AttributeKind::NoInline);
+                attributes::remove_string_attr_from_llfn(function, enzyme_marker);
+
+                assert!(
+                    !attributes::has_string_attr(function, enzyme_marker),
+                    "Expected function to not have 'enzyme_marker'"
+                );
+
+                let always_inline = llvm::AttributeKind::AlwaysInline.create_attr(cx.llcx);
+                attributes::apply_to_llfn(function, Function, &[always_inline]);
+            }
         }
 
         let opt_stage = llvm::OptStage::FatLTO;
         let stage = write::AutodiffStage::PostAD;
-        unsafe {
-            write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage)?;
+        if !config.autodiff.contains(&config::AutoDiff::NoPostopt) {
+            unsafe {
+                write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage)?;
+            }
         }
 
         // This is the final IR, so people should be able to inspect the optimized autodiff output,
diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs
index 18d221d232e..20721c74608 100644
--- a/compiler/rustc_codegen_llvm/src/back/write.rs
+++ b/compiler/rustc_codegen_llvm/src/back/write.rs
@@ -572,6 +572,10 @@ pub(crate) unsafe fn llvm_optimize(
 
     let consider_ad = cfg!(llvm_enzyme) && config.autodiff.contains(&config::AutoDiff::Enable);
     let run_enzyme = autodiff_stage == AutodiffStage::DuringAD;
+    let print_before_enzyme = config.autodiff.contains(&config::AutoDiff::PrintModBefore);
+    let print_after_enzyme = config.autodiff.contains(&config::AutoDiff::PrintModAfter);
+    let print_passes = config.autodiff.contains(&config::AutoDiff::PrintPasses);
+    let merge_functions;
     let unroll_loops;
     let vectorize_slp;
     let vectorize_loop;
@@ -579,13 +583,20 @@ pub(crate) unsafe fn llvm_optimize(
     // When we build rustc with enzyme/autodiff support, we want to postpone size-increasing
     // optimizations until after differentiation. Our pipeline is thus: (opt + enzyme), (full opt).
     // We therefore have two calls to llvm_optimize, if autodiff is used.
+    //
+    // We also must disable merge_functions, since autodiff placeholder/dummy bodies tend to be
+    // identical. We run opts before AD, so there is a chance that LLVM will merge our dummies.
+    // In that case, we lack some dummy bodies and can't replace them with the real AD code anymore.
+    // We then would need to abort compilation. This was especially common in test cases.
     if consider_ad && autodiff_stage != AutodiffStage::PostAD {
+        merge_functions = false;
         unroll_loops = false;
         vectorize_slp = false;
         vectorize_loop = false;
     } else {
         unroll_loops =
             opt_level != config::OptLevel::Size && opt_level != config::OptLevel::SizeMin;
+        merge_functions = config.merge_functions;
         vectorize_slp = config.vectorize_slp;
         vectorize_loop = config.vectorize_loop;
     }
@@ -663,13 +674,16 @@ pub(crate) unsafe fn llvm_optimize(
             thin_lto_buffer,
             config.emit_thin_lto,
             config.emit_thin_lto_summary,
-            config.merge_functions,
+            merge_functions,
             unroll_loops,
             vectorize_slp,
             vectorize_loop,
             config.no_builtins,
             config.emit_lifetime_markers,
             run_enzyme,
+            print_before_enzyme,
+            print_after_enzyme,
+            print_passes,
             sanitizer_options.as_ref(),
             pgo_gen_path.as_ref().map_or(std::ptr::null(), |s| s.as_ptr()),
             pgo_use_path.as_ref().map_or(std::ptr::null(), |s| s.as_ptr()),
@@ -1134,9 +1148,9 @@ unsafe fn embed_bitcode(
             // We need custom section flags, so emit module-level inline assembly.
             let section_flags = if cgcx.is_pe_coff { "n" } else { "e" };
             let asm = create_section_with_flags_asm(".llvmbc", section_flags, bitcode);
-            llvm::LLVMAppendModuleInlineAsm(llmod, asm.as_c_char_ptr(), asm.len());
+            llvm::append_module_inline_asm(llmod, &asm);
             let asm = create_section_with_flags_asm(".llvmcmd", section_flags, cmdline.as_bytes());
-            llvm::LLVMAppendModuleInlineAsm(llmod, asm.as_c_char_ptr(), asm.len());
+            llvm::append_module_inline_asm(llmod, &asm);
         }
     }
 }
diff --git a/compiler/rustc_codegen_llvm/src/base.rs b/compiler/rustc_codegen_llvm/src/base.rs
index 6bd27914dbd..e4fac35aa44 100644
--- a/compiler/rustc_codegen_llvm/src/base.rs
+++ b/compiler/rustc_codegen_llvm/src/base.rs
@@ -83,15 +83,15 @@ pub(crate) fn compile_codegen_unit(
         // Instantiate monomorphizations without filling out definitions yet...
         let llvm_module = ModuleLlvm::new(tcx, cgu_name.as_str());
         {
-            let cx = CodegenCx::new(tcx, cgu, &llvm_module);
+            let mut cx = CodegenCx::new(tcx, cgu, &llvm_module);
             let mono_items = cx.codegen_unit.items_in_deterministic_order(cx.tcx);
             for &(mono_item, data) in &mono_items {
                 mono_item.predefine::<Builder<'_, '_, '_>>(&cx, data.linkage, data.visibility);
             }
 
             // ... and now that we have everything pre-defined, fill out those definitions.
-            for &(mono_item, _) in &mono_items {
-                mono_item.define::<Builder<'_, '_, '_>>(&cx);
+            for &(mono_item, item_data) in &mono_items {
+                mono_item.define::<Builder<'_, '_, '_>>(&mut cx, item_data);
             }
 
             // If this codegen unit contains the main function, also create the
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
index 35134e9f5a0..5238755c8eb 100644
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -123,7 +123,7 @@ impl<'a, 'll, CX: Borrow<SCx<'ll>>> GenericBuilder<'a, 'll, CX> {
 /// Empty string, to be used where LLVM expects an instruction name, indicating
 /// that the instruction is to be left unnamed (i.e. numbered, in textual IR).
 // FIXME(eddyb) pass `&CStr` directly to FFI once it's a thin pointer.
-const UNNAMED: *const c_char = c"".as_ptr();
+pub(crate) const UNNAMED: *const c_char = c"".as_ptr();
 
 impl<'ll, CX: Borrow<SCx<'ll>>> BackendTypes for GenericBuilder<'_, 'll, CX> {
     type Value = <GenericCx<'ll, CX> as BackendTypes>::Value;
@@ -361,7 +361,7 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
 
         // Emit KCFI operand bundle
         let kcfi_bundle = self.kcfi_operand_bundle(fn_attrs, fn_abi, instance, llfn);
-        if let Some(kcfi_bundle) = kcfi_bundle.as_ref().map(|b| b.raw()) {
+        if let Some(kcfi_bundle) = kcfi_bundle.as_ref().map(|b| b.as_ref()) {
             bundles.push(kcfi_bundle);
         }
 
@@ -594,6 +594,7 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
     fn load(&mut self, ty: &'ll Type, ptr: &'ll Value, align: Align) -> &'ll Value {
         unsafe {
             let load = llvm::LLVMBuildLoad2(self.llbuilder, ty, ptr, UNNAMED);
+            let align = align.min(self.cx().tcx.sess.target.max_reliable_alignment());
             llvm::LLVMSetAlignment(load, align.bytes() as c_uint);
             load
         }
@@ -807,6 +808,7 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         assert_eq!(self.cx.type_kind(self.cx.val_ty(ptr)), TypeKind::Pointer);
         unsafe {
             let store = llvm::LLVMBuildStore(self.llbuilder, val, ptr);
+            let align = align.min(self.cx().tcx.sess.target.max_reliable_alignment());
             let align =
                 if flags.contains(MemFlags::UNALIGNED) { 1 } else { align.bytes() as c_uint };
             llvm::LLVMSetAlignment(store, align);
@@ -1414,7 +1416,7 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
 
         // Emit KCFI operand bundle
         let kcfi_bundle = self.kcfi_operand_bundle(fn_attrs, fn_abi, instance, llfn);
-        if let Some(kcfi_bundle) = kcfi_bundle.as_ref().map(|b| b.raw()) {
+        if let Some(kcfi_bundle) = kcfi_bundle.as_ref().map(|b| b.as_ref()) {
             bundles.push(kcfi_bundle);
         }
 
@@ -1747,7 +1749,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
 
         // Emit KCFI operand bundle
         let kcfi_bundle = self.kcfi_operand_bundle(fn_attrs, fn_abi, instance, llfn);
-        if let Some(kcfi_bundle) = kcfi_bundle.as_ref().map(|b| b.raw()) {
+        if let Some(kcfi_bundle) = kcfi_bundle.as_ref().map(|b| b.as_ref()) {
             bundles.push(kcfi_bundle);
         }
 
@@ -1834,7 +1836,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
         fn_abi: Option<&FnAbi<'tcx, Ty<'tcx>>>,
         instance: Option<Instance<'tcx>>,
         llfn: &'ll Value,
-    ) -> Option<llvm::OperandBundleOwned<'ll>> {
+    ) -> Option<llvm::OperandBundleBox<'ll>> {
         let is_indirect_call = unsafe { llvm::LLVMRustIsNonGVFunctionPointerTy(llfn) };
         let kcfi_bundle = if self.tcx.sess.is_sanitizer_kcfi_enabled()
             && let Some(fn_abi) = fn_abi
@@ -1860,7 +1862,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
                 kcfi::typeid_for_fnabi(self.tcx, fn_abi, options)
             };
 
-            Some(llvm::OperandBundleOwned::new("kcfi", &[self.const_u32(kcfi_typeid)]))
+            Some(llvm::OperandBundleBox::new("kcfi", &[self.const_u32(kcfi_typeid)]))
         } else {
             None
         };
diff --git a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
index 5e7ef27143b..c5c13ac097a 100644
--- a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
+++ b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
@@ -10,7 +10,7 @@ use rustc_middle::bug;
 use tracing::{debug, trace};
 
 use crate::back::write::llvm_err;
-use crate::builder::SBuilder;
+use crate::builder::{SBuilder, UNNAMED};
 use crate::context::SimpleCx;
 use crate::declare::declare_simple_fn;
 use crate::errors::{AutoDiffWithoutEnable, LlvmError};
@@ -51,6 +51,7 @@ fn has_sret(fnc: &Value) -> bool {
 // using iterators and peek()?
 fn match_args_from_caller_to_enzyme<'ll>(
     cx: &SimpleCx<'ll>,
+    builder: &SBuilder<'ll, 'll>,
     width: u32,
     args: &mut Vec<&'ll llvm::Value>,
     inputs: &[DiffActivity],
@@ -78,7 +79,9 @@ fn match_args_from_caller_to_enzyme<'ll>(
     let enzyme_const = cx.create_metadata("enzyme_const".to_string()).unwrap();
     let enzyme_out = cx.create_metadata("enzyme_out".to_string()).unwrap();
     let enzyme_dup = cx.create_metadata("enzyme_dup".to_string()).unwrap();
+    let enzyme_dupv = cx.create_metadata("enzyme_dupv".to_string()).unwrap();
     let enzyme_dupnoneed = cx.create_metadata("enzyme_dupnoneed".to_string()).unwrap();
+    let enzyme_dupnoneedv = cx.create_metadata("enzyme_dupnoneedv".to_string()).unwrap();
 
     while activity_pos < inputs.len() {
         let diff_activity = inputs[activity_pos as usize];
@@ -90,13 +93,34 @@ fn match_args_from_caller_to_enzyme<'ll>(
             DiffActivity::Active => (enzyme_out, false),
             DiffActivity::ActiveOnly => (enzyme_out, false),
             DiffActivity::Dual => (enzyme_dup, true),
+            DiffActivity::Dualv => (enzyme_dupv, true),
             DiffActivity::DualOnly => (enzyme_dupnoneed, true),
+            DiffActivity::DualvOnly => (enzyme_dupnoneedv, true),
             DiffActivity::Duplicated => (enzyme_dup, true),
             DiffActivity::DuplicatedOnly => (enzyme_dupnoneed, true),
-            DiffActivity::FakeActivitySize => (enzyme_const, false),
+            DiffActivity::FakeActivitySize(_) => (enzyme_const, false),
         };
         let outer_arg = outer_args[outer_pos];
         args.push(cx.get_metadata_value(activity));
+        if matches!(diff_activity, DiffActivity::Dualv) {
+            let next_outer_arg = outer_args[outer_pos + 1];
+            let elem_bytes_size: u64 = match inputs[activity_pos + 1] {
+                DiffActivity::FakeActivitySize(Some(s)) => s.into(),
+                _ => bug!("incorrect Dualv handling recognized."),
+            };
+            // stride: sizeof(T) * n_elems.
+            // n_elems is the next integer.
+            // Now we multiply `4 * next_outer_arg` to get the stride.
+            let mul = unsafe {
+                llvm::LLVMBuildMul(
+                    builder.llbuilder,
+                    cx.get_const_i64(elem_bytes_size),
+                    next_outer_arg,
+                    UNNAMED,
+                )
+            };
+            args.push(mul);
+        }
         args.push(outer_arg);
         if duplicated {
             // We know that duplicated args by construction have a following argument,
@@ -114,7 +138,7 @@ fn match_args_from_caller_to_enzyme<'ll>(
                 } else {
                     let next_activity = inputs[activity_pos + 1];
                     // We analyze the MIR types and add this dummy activity if we visit a slice.
-                    next_activity == DiffActivity::FakeActivitySize
+                    matches!(next_activity, DiffActivity::FakeActivitySize(_))
                 }
             };
             if slice {
@@ -125,7 +149,10 @@ fn match_args_from_caller_to_enzyme<'ll>(
                 // int2 >= int1, which means the shadow vector is large enough to store the gradient.
                 assert_eq!(cx.type_kind(next_outer_ty), TypeKind::Integer);
 
-                for i in 0..(width as usize) {
+                let iterations =
+                    if matches!(diff_activity, DiffActivity::Dualv) { 1 } else { width as usize };
+
+                for i in 0..iterations {
                     let next_outer_arg2 = outer_args[outer_pos + 2 * (i + 1)];
                     let next_outer_ty2 = cx.val_ty(next_outer_arg2);
                     assert_eq!(cx.type_kind(next_outer_ty2), TypeKind::Pointer);
@@ -136,7 +163,7 @@ fn match_args_from_caller_to_enzyme<'ll>(
                 }
                 args.push(cx.get_metadata_value(enzyme_const));
                 args.push(next_outer_arg);
-                outer_pos += 2 + 2 * width as usize;
+                outer_pos += 2 + 2 * iterations;
                 activity_pos += 2;
             } else {
                 // A duplicated pointer will have the following two outer_fn arguments:
@@ -334,6 +361,11 @@ fn generate_enzyme_call<'ll>(
         let attr = llvm::AttributeKind::NoInline.create_attr(cx.llcx);
         attributes::apply_to_llfn(ad_fn, Function, &[attr]);
 
+        // We add a made-up attribute just such that we can recognize it after AD to update
+        // (no)-inline attributes. We'll then also remove this attribute.
+        let enzyme_marker_attr = llvm::CreateAttrString(cx.llcx, "enzyme_marker");
+        attributes::apply_to_llfn(outer_fn, Function, &[enzyme_marker_attr]);
+
         // first, remove all calls from fnc
         let entry = llvm::LLVMGetFirstBasicBlock(outer_fn);
         let br = llvm::LLVMRustGetTerminator(entry);
@@ -360,6 +392,7 @@ fn generate_enzyme_call<'ll>(
         let outer_args: Vec<&llvm::Value> = get_params(outer_fn);
         match_args_from_caller_to_enzyme(
             &cx,
+            &builder,
             attrs.width,
             &mut args,
             &attrs.input_activity,
@@ -445,7 +478,7 @@ pub(crate) fn differentiate<'ll>(
         return Err(diag_handler.handle().emit_almost_fatal(AutoDiffWithoutEnable));
     }
 
-    // Before dumping the module, we want all the TypeTrees to become part of the module.
+    // Here we replace the placeholder code with the actual autodiff code, which calls Enzyme.
     for item in diff_items.iter() {
         let name = item.source.clone();
         let fn_def: Option<&llvm::Value> = cx.get_function(&name);
diff --git a/compiler/rustc_codegen_llvm/src/common.rs b/compiler/rustc_codegen_llvm/src/common.rs
index a6f277e4455..3cfa96393e9 100644
--- a/compiler/rustc_codegen_llvm/src/common.rs
+++ b/compiler/rustc_codegen_llvm/src/common.rs
@@ -67,12 +67,12 @@ use crate::value::Value;
 /// the `OperandBundleDef` value created for MSVC landing pads.
 pub(crate) struct Funclet<'ll> {
     cleanuppad: &'ll Value,
-    operand: llvm::OperandBundleOwned<'ll>,
+    operand: llvm::OperandBundleBox<'ll>,
 }
 
 impl<'ll> Funclet<'ll> {
     pub(crate) fn new(cleanuppad: &'ll Value) -> Self {
-        Funclet { cleanuppad, operand: llvm::OperandBundleOwned::new("funclet", &[cleanuppad]) }
+        Funclet { cleanuppad, operand: llvm::OperandBundleBox::new("funclet", &[cleanuppad]) }
     }
 
     pub(crate) fn cleanuppad(&self) -> &'ll Value {
@@ -80,7 +80,7 @@ impl<'ll> Funclet<'ll> {
     }
 
     pub(crate) fn bundle(&self) -> &llvm::OperandBundle<'ll> {
-        self.operand.raw()
+        self.operand.as_ref()
     }
 }
 
diff --git a/compiler/rustc_codegen_llvm/src/consts.rs b/compiler/rustc_codegen_llvm/src/consts.rs
index bf81eb648f8..cbac55c7153 100644
--- a/compiler/rustc_codegen_llvm/src/consts.rs
+++ b/compiler/rustc_codegen_llvm/src/consts.rs
@@ -364,7 +364,12 @@ impl<'ll> CodegenCx<'ll, '_> {
 
         if !def_id.is_local() {
             let needs_dll_storage_attr = self.use_dll_storage_attrs
-                && !self.tcx.is_foreign_item(def_id)
+                // If the symbol is a foreign item, then don't automatically apply DLLImport, as
+                // we'll rely on the #[link] attribute instead. BUT, if this is an internal symbol
+                // then it may be generated by the compiler in some crate, so we do need to apply
+                // DLLImport when linking with the MSVC linker.
+                && (!self.tcx.is_foreign_item(def_id)
+                    || (self.sess().target.is_like_msvc && fn_attrs.flags.contains(CodegenFnAttrFlags::RUSTC_STD_INTERNAL_SYMBOL)))
                 // Local definitions can never be imported, so we must not apply
                 // the DLLImport annotation.
                 && !dso_local
diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs
index 4ec69995518..b0d8e11d1fb 100644
--- a/compiler/rustc_codegen_llvm/src/context.rs
+++ b/compiler/rustc_codegen_llvm/src/context.rs
@@ -698,6 +698,16 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
             llvm::LLVMMDStringInContext2(self.llcx(), name.as_ptr() as *const c_char, name.len())
         })
     }
+
+    pub(crate) fn get_functions(&self) -> Vec<&'ll Value> {
+        let mut functions = vec![];
+        let mut func = unsafe { llvm::LLVMGetFirstFunction(self.llmod()) };
+        while let Some(f) = func {
+            functions.push(f);
+            func = unsafe { llvm::LLVMGetNextFunction(f) }
+        }
+        functions
+    }
 }
 
 impl<'ll, 'tcx> MiscCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
@@ -999,11 +1009,27 @@ impl<'ll> CodegenCx<'ll, '_> {
         ifn!("llvm.minnum.f64", fn(t_f64, t_f64) -> t_f64);
         ifn!("llvm.minnum.f128", fn(t_f128, t_f128) -> t_f128);
 
+        ifn!("llvm.minimum.f16", fn(t_f16, t_f16) -> t_f16);
+        ifn!("llvm.minimum.f32", fn(t_f32, t_f32) -> t_f32);
+        ifn!("llvm.minimum.f64", fn(t_f64, t_f64) -> t_f64);
+        // There are issues on x86_64 and aarch64 with the f128 variant.
+        //  - https://github.com/llvm/llvm-project/issues/139380
+        //  - https://github.com/llvm/llvm-project/issues/139381
+        // ifn!("llvm.minimum.f128", fn(t_f128, t_f128) -> t_f128);
+
         ifn!("llvm.maxnum.f16", fn(t_f16, t_f16) -> t_f16);
         ifn!("llvm.maxnum.f32", fn(t_f32, t_f32) -> t_f32);
         ifn!("llvm.maxnum.f64", fn(t_f64, t_f64) -> t_f64);
         ifn!("llvm.maxnum.f128", fn(t_f128, t_f128) -> t_f128);
 
+        ifn!("llvm.maximum.f16", fn(t_f16, t_f16) -> t_f16);
+        ifn!("llvm.maximum.f32", fn(t_f32, t_f32) -> t_f32);
+        ifn!("llvm.maximum.f64", fn(t_f64, t_f64) -> t_f64);
+        // There are issues on x86_64 and aarch64 with the f128 variant.
+        //  - https://github.com/llvm/llvm-project/issues/139380
+        //  - https://github.com/llvm/llvm-project/issues/139381
+        // ifn!("llvm.maximum.f128", fn(t_f128, t_f128) -> t_f128);
+
         ifn!("llvm.floor.f16", fn(t_f16) -> t_f16);
         ifn!("llvm.floor.f32", fn(t_f32) -> t_f32);
         ifn!("llvm.floor.f64", fn(t_f64) -> t_f64);
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs
index 68f60f169b5..fe3a7a1580b 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs
@@ -157,7 +157,7 @@ fn make_dummy_instance<'tcx>(tcx: TyCtxt<'tcx>, local_def_id: LocalDefId) -> ty:
     let def_id = local_def_id.to_def_id();
 
     // Make a dummy instance that fills in all generics with placeholders.
-    ty::Instance::new(
+    ty::Instance::new_raw(
         def_id,
         ty::GenericArgs::for_item(tcx, def_id, |param, _| {
             if let ty::GenericParamDefKind::Lifetime = param.kind {
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/create_scope_map.rs b/compiler/rustc_codegen_llvm/src/debuginfo/create_scope_map.rs
index f52991b3697..d2591139d6e 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/create_scope_map.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/create_scope_map.rs
@@ -3,7 +3,6 @@ use std::collections::hash_map::Entry;
 use rustc_codegen_ssa::mir::debuginfo::{DebugScope, FunctionDebugContext};
 use rustc_codegen_ssa::traits::*;
 use rustc_data_structures::fx::FxHashMap;
-use rustc_index::Idx;
 use rustc_index::bit_set::DenseBitSet;
 use rustc_middle::mir::{Body, SourceScope};
 use rustc_middle::ty::layout::{FnAbiOf, HasTypingEnv};
@@ -43,8 +42,7 @@ pub(crate) fn compute_mir_scopes<'ll, 'tcx>(
     let mut instantiated = DenseBitSet::new_empty(mir.source_scopes.len());
     let mut discriminators = FxHashMap::default();
     // Instantiate all scopes.
-    for idx in 0..mir.source_scopes.len() {
-        let scope = SourceScope::new(idx);
+    for scope in mir.source_scopes.indices() {
         make_mir_scope(
             cx,
             instance,
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs b/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs
index 4ffe551df09..8f0948b8183 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs
@@ -95,7 +95,11 @@ pub(crate) fn needs_gdb_debug_scripts_section(cx: &CodegenCx<'_, '_>) -> bool {
     // in the `.debug_gdb_scripts` section. For that reason, we make sure that the
     // section is only emitted for leaf crates.
     let embed_visualizers = cx.tcx.crate_types().iter().any(|&crate_type| match crate_type {
-        CrateType::Executable | CrateType::Dylib | CrateType::Cdylib | CrateType::Staticlib => {
+        CrateType::Executable
+        | CrateType::Dylib
+        | CrateType::Cdylib
+        | CrateType::Staticlib
+        | CrateType::Sdylib => {
             // These are crate types for which we will embed pretty printers since they
             // are treated as leaf crates.
             true
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
index 1eb8f367c54..7f3e486ca31 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
@@ -1315,31 +1315,21 @@ fn build_generic_type_param_di_nodes<'ll, 'tcx>(
     ty: Ty<'tcx>,
 ) -> SmallVec<Option<&'ll DIType>> {
     if let ty::Adt(def, args) = *ty.kind() {
-        let generics = cx.tcx.generics_of(def.did());
-        return get_template_parameters(cx, generics, args);
-    }
-
-    return smallvec![];
-}
-
-pub(super) fn get_template_parameters<'ll, 'tcx>(
-    cx: &CodegenCx<'ll, 'tcx>,
-    generics: &ty::Generics,
-    args: ty::GenericArgsRef<'tcx>,
-) -> SmallVec<Option<&'ll DIType>> {
-    if args.types().next().is_some() {
-        let names = get_parameter_names(cx, generics);
-        let template_params: SmallVec<_> = iter::zip(args, names)
-            .filter_map(|(kind, name)| {
-                kind.as_type().map(|ty| {
-                    let actual_type = cx.tcx.normalize_erasing_regions(cx.typing_env(), ty);
-                    let actual_type_di_node = type_di_node(cx, actual_type);
-                    Some(cx.create_template_type_parameter(name.as_str(), actual_type_di_node))
+        if args.types().next().is_some() {
+            let generics = cx.tcx.generics_of(def.did());
+            let names = get_parameter_names(cx, generics);
+            let template_params: SmallVec<_> = iter::zip(args, names)
+                .filter_map(|(kind, name)| {
+                    kind.as_type().map(|ty| {
+                        let actual_type = cx.tcx.normalize_erasing_regions(cx.typing_env(), ty);
+                        let actual_type_di_node = type_di_node(cx, actual_type);
+                        Some(cx.create_template_type_parameter(name.as_str(), actual_type_di_node))
+                    })
                 })
-            })
-            .collect();
+                .collect();
 
-        return template_params;
+            return template_params;
+        }
     }
 
     return smallvec![];
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs
index 07075be55fa..e9574108696 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs
@@ -721,8 +721,7 @@ fn build_union_fields_for_direct_tag_coroutine<'ll, 'tcx>(
         _ => unreachable!(),
     };
 
-    let coroutine_layout =
-        cx.tcx.coroutine_layout(coroutine_def_id, coroutine_args.kind_ty()).unwrap();
+    let coroutine_layout = cx.tcx.coroutine_layout(coroutine_def_id, coroutine_args.args).unwrap();
 
     let common_upvar_names = cx.tcx.closure_saved_names_of_captured_variables(coroutine_def_id);
     let variant_range = coroutine_args.variant_range(coroutine_def_id, cx.tcx);
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/mod.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/mod.rs
index 6792c307fdc..7c701926d2c 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/mod.rs
@@ -363,7 +363,6 @@ fn build_coroutine_variant_struct_type_di_node<'ll, 'tcx>(
 
             state_specific_fields.into_iter().chain(common_fields).collect()
         },
-        // FIXME: this is a no-op. `build_generic_type_param_di_nodes` only works for Adts.
         |cx| build_generic_type_param_di_nodes(cx, coroutine_type_and_layout.ty),
     )
     .di_node
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/native.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/native.rs
index bfd131cfd3d..20a841f2287 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/native.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/native.rs
@@ -174,10 +174,8 @@ pub(super) fn build_coroutine_di_node<'ll, 'tcx>(
             DIFlags::FlagZero,
         ),
         |cx, coroutine_type_di_node| {
-            let coroutine_layout = cx
-                .tcx
-                .coroutine_layout(coroutine_def_id, coroutine_args.as_coroutine().kind_ty())
-                .unwrap();
+            let coroutine_layout =
+                cx.tcx.coroutine_layout(coroutine_def_id, coroutine_args).unwrap();
 
             let Variants::Multiple { tag_encoding: TagEncoding::Direct, ref variants, .. } =
                 coroutine_type_and_layout.variants
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs
index ae2ab32ef53..56fb12d3c22 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs
@@ -247,6 +247,16 @@ pub(super) fn stub<'ll, 'tcx>(
     StubInfo { metadata, unique_type_id }
 }
 
+struct AdtStackPopGuard<'ll, 'tcx, 'a> {
+    cx: &'a CodegenCx<'ll, 'tcx>,
+}
+
+impl<'ll, 'tcx, 'a> Drop for AdtStackPopGuard<'ll, 'tcx, 'a> {
+    fn drop(&mut self) {
+        debug_context(self.cx).adt_stack.borrow_mut().pop();
+    }
+}
+
 /// This function enables creating debuginfo nodes that can recursively refer to themselves.
 /// It will first insert the given stub into the type map and only then execute the `members`
 /// and `generics` closures passed in. These closures have access to the stub so they can
@@ -261,6 +271,44 @@ pub(super) fn build_type_with_children<'ll, 'tcx>(
 ) -> DINodeCreationResult<'ll> {
     assert_eq!(debug_context(cx).type_map.di_node_for_unique_id(stub_info.unique_type_id), None);
 
+    let mut _adt_stack_pop_guard = None;
+    if let UniqueTypeId::Ty(ty, ..) = stub_info.unique_type_id
+        && let ty::Adt(adt_def, args) = ty.kind()
+    {
+        let def_id = adt_def.did();
+        // If any sub type reference the original type definition and the sub type has a type
+        // parameter that strictly contains the original parameter, the original type is a recursive
+        // type that can expanding indefinitely. Example,
+        // ```
+        // enum Recursive<T> {
+        //     Recurse(*const Recursive<Wrap<T>>),
+        //     Item(T),
+        // }
+        // ```
+        let is_expanding_recursive =
+            debug_context(cx).adt_stack.borrow().iter().any(|(parent_def_id, parent_args)| {
+                if def_id == *parent_def_id {
+                    args.iter().zip(parent_args.iter()).any(|(arg, parent_arg)| {
+                        if let (Some(arg), Some(parent_arg)) = (arg.as_type(), parent_arg.as_type())
+                        {
+                            arg != parent_arg && arg.contains(parent_arg)
+                        } else {
+                            false
+                        }
+                    })
+                } else {
+                    false
+                }
+            });
+        if is_expanding_recursive {
+            // FIXME: indicate that this is an expanding recursive type in stub metadata?
+            return DINodeCreationResult::new(stub_info.metadata, false);
+        } else {
+            debug_context(cx).adt_stack.borrow_mut().push((def_id, args));
+            _adt_stack_pop_guard = Some(AdtStackPopGuard { cx });
+        }
+    }
+
     debug_context(cx).type_map.insert(stub_info.unique_type_id, stub_info.metadata);
 
     let members: SmallVec<_> =
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
index ae7d080db66..c5085927923 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
@@ -2,8 +2,8 @@
 
 use std::cell::{OnceCell, RefCell};
 use std::ops::Range;
-use std::ptr;
 use std::sync::Arc;
+use std::{iter, ptr};
 
 use libc::c_uint;
 use metadata::create_subroutine_type;
@@ -66,6 +66,7 @@ pub(crate) struct CodegenUnitDebugContext<'ll, 'tcx> {
     created_files: RefCell<UnordMap<Option<(StableSourceFileId, SourceFileHash)>, &'ll DIFile>>,
 
     type_map: metadata::TypeMap<'ll, 'tcx>,
+    adt_stack: RefCell<Vec<(DefId, GenericArgsRef<'tcx>)>>,
     namespace_map: RefCell<DefIdMap<&'ll DIScope>>,
     recursion_marker_type: OnceCell<&'ll DIType>,
 }
@@ -80,6 +81,7 @@ impl<'ll, 'tcx> CodegenUnitDebugContext<'ll, 'tcx> {
             builder,
             created_files: Default::default(),
             type_map: Default::default(),
+            adt_stack: Default::default(),
             namespace_map: RefCell::new(Default::default()),
             recursion_marker_type: OnceCell::new(),
         }
@@ -486,10 +488,40 @@ impl<'ll, 'tcx> DebugInfoCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
             generics: &ty::Generics,
             args: GenericArgsRef<'tcx>,
         ) -> &'ll DIArray {
-            let template_params = metadata::get_template_parameters(cx, generics, args);
+            if args.types().next().is_none() {
+                return create_DIArray(DIB(cx), &[]);
+            }
+
+            // Again, only create type information if full debuginfo is enabled
+            let template_params: Vec<_> = if cx.sess().opts.debuginfo == DebugInfo::Full {
+                let names = get_parameter_names(cx, generics);
+                iter::zip(args, names)
+                    .filter_map(|(kind, name)| {
+                        kind.as_type().map(|ty| {
+                            let actual_type = cx.tcx.normalize_erasing_regions(cx.typing_env(), ty);
+                            let actual_type_metadata = type_di_node(cx, actual_type);
+                            Some(cx.create_template_type_parameter(
+                                name.as_str(),
+                                actual_type_metadata,
+                            ))
+                        })
+                    })
+                    .collect()
+            } else {
+                vec![]
+            };
+
             create_DIArray(DIB(cx), &template_params)
         }
 
+        fn get_parameter_names(cx: &CodegenCx<'_, '_>, generics: &ty::Generics) -> Vec<Symbol> {
+            let mut names = generics.parent.map_or_else(Vec::new, |def_id| {
+                get_parameter_names(cx, cx.tcx.generics_of(def_id))
+            });
+            names.extend(generics.own_params.iter().map(|param| param.name));
+            names
+        }
+
         /// Returns a scope, plus `true` if that's a type scope for "class" methods,
         /// otherwise `false` for plain namespace scopes.
         fn get_containing_scope<'ll, 'tcx>(
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
index d1d6bcebd33..5ca57375292 100644
--- a/compiler/rustc_codegen_llvm/src/intrinsic.rs
+++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -103,11 +103,23 @@ fn get_simple_intrinsic<'ll>(
         sym::minnumf64 => "llvm.minnum.f64",
         sym::minnumf128 => "llvm.minnum.f128",
 
+        sym::minimumf16 => "llvm.minimum.f16",
+        sym::minimumf32 => "llvm.minimum.f32",
+        sym::minimumf64 => "llvm.minimum.f64",
+        // There are issues on x86_64 and aarch64 with the f128 variant,
+        // let's instead use the instrinsic fallback body.
+        // sym::minimumf128 => "llvm.minimum.f128",
         sym::maxnumf16 => "llvm.maxnum.f16",
         sym::maxnumf32 => "llvm.maxnum.f32",
         sym::maxnumf64 => "llvm.maxnum.f64",
         sym::maxnumf128 => "llvm.maxnum.f128",
 
+        sym::maximumf16 => "llvm.maximum.f16",
+        sym::maximumf32 => "llvm.maximum.f32",
+        sym::maximumf64 => "llvm.maximum.f64",
+        // There are issues on x86_64 and aarch64 with the f128 variant,
+        // let's instead use the instrinsic fallback body.
+        // sym::maximumf128 => "llvm.maximum.f128",
         sym::copysignf16 => "llvm.copysign.f16",
         sym::copysignf32 => "llvm.copysign.f32",
         sym::copysignf64 => "llvm.copysign.f64",
@@ -613,7 +625,7 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
             _ => {
                 debug!("unknown intrinsic '{}' -- falling back to default body", name);
                 // Call the fallback body instead of generating the intrinsic code
-                return Err(ty::Instance::new(instance.def_id(), instance.args));
+                return Err(ty::Instance::new_raw(instance.def_id(), instance.args));
             }
         };
 
@@ -1184,18 +1196,6 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         }};
     }
 
-    /// Returns the bitwidth of the `$ty` argument if it is an `Int` type.
-    macro_rules! require_int_ty {
-        ($ty: expr, $diag: expr) => {
-            match $ty {
-                ty::Int(i) => i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size.bits()),
-                _ => {
-                    return_error!($diag);
-                }
-            }
-        };
-    }
-
     /// Returns the bitwidth of the `$ty` argument if it is an `Int` or `Uint` type.
     macro_rules! require_int_or_uint_ty {
         ($ty: expr, $diag: expr) => {
@@ -1485,9 +1485,9 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             m_len == v_len,
             InvalidMonomorphization::MismatchedLengths { span, name, m_len, v_len }
         );
-        let in_elem_bitwidth = require_int_ty!(
+        let in_elem_bitwidth = require_int_or_uint_ty!(
             m_elem_ty.kind(),
-            InvalidMonomorphization::MaskType { span, name, ty: m_elem_ty }
+            InvalidMonomorphization::MaskWrongElementType { span, name, ty: m_elem_ty }
         );
         let m_i1s = vector_mask_to_bitmask(bx, args[0].immediate(), in_elem_bitwidth, m_len);
         return Ok(bx.select(m_i1s, args[1].immediate(), args[2].immediate()));
@@ -1508,7 +1508,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         // Integer vector <i{in_bitwidth} x in_len>:
         let in_elem_bitwidth = require_int_or_uint_ty!(
             in_elem.kind(),
-            InvalidMonomorphization::VectorArgument { span, name, in_ty, in_elem }
+            InvalidMonomorphization::MaskWrongElementType { span, name, ty: in_elem }
         );
 
         let i1xn = vector_mask_to_bitmask(bx, args[0].immediate(), in_elem_bitwidth, in_len);
@@ -1732,14 +1732,9 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             }
         );
 
-        let mask_elem_bitwidth = require_int_ty!(
+        let mask_elem_bitwidth = require_int_or_uint_ty!(
             element_ty2.kind(),
-            InvalidMonomorphization::ThirdArgElementType {
-                span,
-                name,
-                expected_element: element_ty2,
-                third_arg: arg_tys[2]
-            }
+            InvalidMonomorphization::MaskWrongElementType { span, name, ty: element_ty2 }
         );
 
         // Alignment of T, must be a constant integer value:
@@ -1834,14 +1829,9 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             }
         );
 
-        let m_elem_bitwidth = require_int_ty!(
+        let m_elem_bitwidth = require_int_or_uint_ty!(
             mask_elem.kind(),
-            InvalidMonomorphization::ThirdArgElementType {
-                span,
-                name,
-                expected_element: values_elem,
-                third_arg: mask_ty,
-            }
+            InvalidMonomorphization::MaskWrongElementType { span, name, ty: mask_elem }
         );
 
         let mask = vector_mask_to_bitmask(bx, args[0].immediate(), m_elem_bitwidth, mask_len);
@@ -1924,14 +1914,9 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             }
         );
 
-        let m_elem_bitwidth = require_int_ty!(
+        let m_elem_bitwidth = require_int_or_uint_ty!(
             mask_elem.kind(),
-            InvalidMonomorphization::ThirdArgElementType {
-                span,
-                name,
-                expected_element: values_elem,
-                third_arg: mask_ty,
-            }
+            InvalidMonomorphization::MaskWrongElementType { span, name, ty: mask_elem }
         );
 
         let mask = vector_mask_to_bitmask(bx, args[0].immediate(), m_elem_bitwidth, mask_len);
@@ -2019,15 +2004,10 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             }
         );
 
-        // The element type of the third argument must be a signed integer type of any width:
-        let mask_elem_bitwidth = require_int_ty!(
+        // The element type of the third argument must be an integer type of any width:
+        let mask_elem_bitwidth = require_int_or_uint_ty!(
             element_ty2.kind(),
-            InvalidMonomorphization::ThirdArgElementType {
-                span,
-                name,
-                expected_element: element_ty2,
-                third_arg: arg_tys[2]
-            }
+            InvalidMonomorphization::MaskWrongElementType { span, name, ty: element_ty2 }
         );
 
         // Alignment of T, must be a constant integer value:
diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs
index 425381b0ffa..5736314b96a 100644
--- a/compiler/rustc_codegen_llvm/src/lib.rs
+++ b/compiler/rustc_codegen_llvm/src/lib.rs
@@ -15,7 +15,6 @@
 #![feature(if_let_guard)]
 #![feature(impl_trait_in_assoc_type)]
 #![feature(iter_intersperse)]
-#![feature(let_chains)]
 #![feature(rustdoc_internals)]
 #![feature(slice_as_array)]
 #![feature(try_blocks)]
@@ -29,7 +28,7 @@ use back::owned_target_machine::OwnedTargetMachine;
 use back::write::{create_informational_target_machine, create_target_machine};
 use context::SimpleCx;
 use errors::{AutoDiffWithoutLTO, ParseTargetMachineConfig};
-use llvm_util::target_features_cfg;
+use llvm_util::target_config;
 use rustc_ast::expand::allocator::AllocatorKind;
 use rustc_ast::expand::autodiff_attrs::AutoDiffItem;
 use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule};
@@ -37,7 +36,7 @@ use rustc_codegen_ssa::back::write::{
     CodegenContext, FatLtoInput, ModuleConfig, TargetMachineFactoryConfig, TargetMachineFactoryFn,
 };
 use rustc_codegen_ssa::traits::*;
-use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleCodegen};
+use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleCodegen, TargetConfig};
 use rustc_data_structures::fx::FxIndexMap;
 use rustc_errors::{DiagCtxtHandle, FatalError};
 use rustc_metadata::EncodedMetadata;
@@ -338,8 +337,8 @@ impl CodegenBackend for LlvmCodegenBackend {
         llvm_util::print_version();
     }
 
-    fn target_features_cfg(&self, sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) {
-        target_features_cfg(sess)
+    fn target_config(&self, sess: &Session) -> TargetConfig {
+        target_config(sess)
     }
 
     fn codegen_crate<'tcx>(
diff --git a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
index a9b3bdf7344..2ad39fc8538 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
@@ -19,6 +19,19 @@ unsafe extern "C" {
     pub(crate) fn LLVMRustVerifyFunction(V: &Value, action: LLVMRustVerifierFailureAction) -> Bool;
     pub(crate) fn LLVMRustHasAttributeAtIndex(V: &Value, i: c_uint, Kind: AttributeKind) -> bool;
     pub(crate) fn LLVMRustGetArrayNumElements(Ty: &Type) -> u64;
+    pub(crate) fn LLVMRustHasFnAttribute(
+        F: &Value,
+        Name: *const c_char,
+        NameLen: libc::size_t,
+    ) -> bool;
+    pub(crate) fn LLVMRustRemoveFnAttribute(F: &Value, Name: *const c_char, NameLen: libc::size_t);
+    pub(crate) fn LLVMGetFirstFunction(M: &Module) -> Option<&Value>;
+    pub(crate) fn LLVMGetNextFunction(Fn: &Value) -> Option<&Value>;
+    pub(crate) fn LLVMRustRemoveEnumAttributeAtIndex(
+        Fn: &Value,
+        index: c_uint,
+        kind: AttributeKind,
+    );
 }
 
 unsafe extern "C" {
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
index 9ff04f72903..67a66e6ec79 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -1,7 +1,7 @@
 //! Bindings to the LLVM-C API (`LLVM*`), and to our own `extern "C"` wrapper
 //! functions around the unstable LLVM C++ API (`LLVMRust*`).
 //!
-//! ## Passing pointer/length strings as `*const c_uchar`
+//! ## Passing pointer/length strings as `*const c_uchar` (PTR_LEN_STR)
 //!
 //! Normally it's a good idea for Rust-side bindings to match the corresponding
 //! C-side function declarations as closely as possible. But when passing `&str`
@@ -415,6 +415,7 @@ impl AtomicRmwBinOp {
 pub(crate) enum AtomicOrdering {
     #[allow(dead_code)]
     NotAtomic = 0,
+    #[allow(dead_code)]
     Unordered = 1,
     Monotonic = 2,
     // Consume = 3,  // Not specified yet.
@@ -428,7 +429,6 @@ impl AtomicOrdering {
     pub(crate) fn from_generic(ao: rustc_codegen_ssa::common::AtomicOrdering) -> Self {
         use rustc_codegen_ssa::common::AtomicOrdering as Common;
         match ao {
-            Common::Unordered => Self::Unordered,
             Common::Relaxed => Self::Monotonic,
             Common::Acquire => Self::Acquire,
             Common::Release => Self::Release,
@@ -471,7 +471,7 @@ pub(crate) enum MetadataType {
     MD_kcfi_type = 36,
 }
 
-/// LLVMRustAsmDialect
+/// Must match the layout of `LLVMInlineAsmDialect`.
 #[derive(Copy, Clone, PartialEq)]
 #[repr(C)]
 pub(crate) enum AsmDialect {
@@ -1014,8 +1014,25 @@ unsafe extern "C" {
     pub(crate) fn LLVMGetDataLayoutStr(M: &Module) -> *const c_char;
     pub(crate) fn LLVMSetDataLayout(M: &Module, Triple: *const c_char);
 
-    /// See Module::setModuleInlineAsm.
-    pub(crate) fn LLVMAppendModuleInlineAsm(M: &Module, Asm: *const c_char, Len: size_t);
+    /// Append inline assembly to a module. See `Module::appendModuleInlineAsm`.
+    pub(crate) fn LLVMAppendModuleInlineAsm(
+        M: &Module,
+        Asm: *const c_uchar, // See "PTR_LEN_STR".
+        Len: size_t,
+    );
+
+    /// Create the specified uniqued inline asm string. See `InlineAsm::get()`.
+    pub(crate) fn LLVMGetInlineAsm<'ll>(
+        Ty: &'ll Type,
+        AsmString: *const c_uchar, // See "PTR_LEN_STR".
+        AsmStringSize: size_t,
+        Constraints: *const c_uchar, // See "PTR_LEN_STR".
+        ConstraintsSize: size_t,
+        HasSideEffects: llvm::Bool,
+        IsAlignStack: llvm::Bool,
+        Dialect: AsmDialect,
+        CanThrow: llvm::Bool,
+    ) -> &'ll Value;
 
     // Operations on integer types
     pub(crate) fn LLVMInt1TypeInContext(C: &Context) -> &Type;
@@ -1766,7 +1783,7 @@ unsafe extern "C" {
     pub(crate) fn LLVMDIBuilderCreateNameSpace<'ll>(
         Builder: &DIBuilder<'ll>,
         ParentScope: Option<&'ll Metadata>,
-        Name: *const c_uchar,
+        Name: *const c_uchar, // See "PTR_LEN_STR".
         NameLen: size_t,
         ExportSymbols: llvm::Bool,
     ) -> &'ll Metadata;
@@ -1994,21 +2011,9 @@ unsafe extern "C" {
     /// Prints the statistics collected by `-Zprint-codegen-stats`.
     pub(crate) fn LLVMRustPrintStatistics(OutStr: &RustString);
 
-    /// Prepares inline assembly.
-    pub(crate) fn LLVMRustInlineAsm(
-        Ty: &Type,
-        AsmString: *const c_char,
-        AsmStringLen: size_t,
-        Constraints: *const c_char,
-        ConstraintsLen: size_t,
-        SideEffects: Bool,
-        AlignStack: Bool,
-        Dialect: AsmDialect,
-        CanThrow: Bool,
-    ) -> &Value;
     pub(crate) fn LLVMRustInlineAsmVerify(
         Ty: &Type,
-        Constraints: *const c_char,
+        Constraints: *const c_uchar, // See "PTR_LEN_STR".
         ConstraintsLen: size_t,
     ) -> bool;
 
@@ -2454,6 +2459,9 @@ unsafe extern "C" {
         DisableSimplifyLibCalls: bool,
         EmitLifetimeMarkers: bool,
         RunEnzyme: bool,
+        PrintBeforeEnzyme: bool,
+        PrintAfterEnzyme: bool,
+        PrintPasses: bool,
         SanitizerOptions: Option<&SanitizerOptions>,
         PGOGenPath: *const c_char,
         PGOUsePath: *const c_char,
diff --git a/compiler/rustc_codegen_llvm/src/llvm/mod.rs b/compiler/rustc_codegen_llvm/src/llvm/mod.rs
index 6ca81c651ed..ed23f911930 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/mod.rs
@@ -41,6 +41,32 @@ pub(crate) fn AddFunctionAttributes<'ll>(
     }
 }
 
+pub(crate) fn HasAttributeAtIndex<'ll>(
+    llfn: &'ll Value,
+    idx: AttributePlace,
+    kind: AttributeKind,
+) -> bool {
+    unsafe { LLVMRustHasAttributeAtIndex(llfn, idx.as_uint(), kind) }
+}
+
+pub(crate) fn HasStringAttribute<'ll>(llfn: &'ll Value, name: &str) -> bool {
+    unsafe { LLVMRustHasFnAttribute(llfn, name.as_c_char_ptr(), name.len()) }
+}
+
+pub(crate) fn RemoveStringAttrFromFn<'ll>(llfn: &'ll Value, name: &str) {
+    unsafe { LLVMRustRemoveFnAttribute(llfn, name.as_c_char_ptr(), name.len()) }
+}
+
+pub(crate) fn RemoveRustEnumAttributeAtIndex(
+    llfn: &Value,
+    place: AttributePlace,
+    kind: AttributeKind,
+) {
+    unsafe {
+        LLVMRustRemoveEnumAttributeAtIndex(llfn, place.as_uint(), kind);
+    }
+}
+
 pub(crate) fn AddCallSiteAttributes<'ll>(
     callsite: &'ll Value,
     idx: AttributePlace,
@@ -337,12 +363,13 @@ pub(crate) fn last_error() -> Option<String> {
     }
 }
 
-/// Owns an [`OperandBundle`], and will dispose of it when dropped.
-pub(crate) struct OperandBundleOwned<'a> {
+/// Owning pointer to an [`OperandBundle`] that will dispose of the bundle
+/// when dropped.
+pub(crate) struct OperandBundleBox<'a> {
     raw: ptr::NonNull<OperandBundle<'a>>,
 }
 
-impl<'a> OperandBundleOwned<'a> {
+impl<'a> OperandBundleBox<'a> {
     pub(crate) fn new(name: &str, vals: &[&'a Value]) -> Self {
         let raw = unsafe {
             LLVMCreateOperandBundle(
@@ -352,21 +379,21 @@ impl<'a> OperandBundleOwned<'a> {
                 vals.len() as c_uint,
             )
         };
-        OperandBundleOwned { raw: ptr::NonNull::new(raw).unwrap() }
+        Self { raw: ptr::NonNull::new(raw).unwrap() }
     }
 
-    /// Returns inner `OperandBundle` type.
+    /// Dereferences to the underlying `&OperandBundle`.
     ///
-    /// This could be a `Deref` implementation, but `OperandBundle` contains an extern type and
-    /// `Deref::Target: ?Sized`.
-    pub(crate) fn raw(&self) -> &OperandBundle<'a> {
+    /// This can't be a `Deref` implementation because `OperandBundle` transitively
+    /// contains an extern type, which is incompatible with `Deref::Target: ?Sized`.
+    pub(crate) fn as_ref(&self) -> &OperandBundle<'a> {
         // SAFETY: The returned reference is opaque and can only used for FFI.
         // It is valid for as long as `&self` is.
         unsafe { self.raw.as_ref() }
     }
 }
 
-impl Drop for OperandBundleOwned<'_> {
+impl Drop for OperandBundleBox<'_> {
     fn drop(&mut self) {
         unsafe {
             LLVMDisposeOperandBundle(self.raw);
@@ -414,3 +441,11 @@ pub(crate) fn set_dso_local<'ll>(v: &'ll Value) {
         LLVMRustSetDSOLocal(v, true);
     }
 }
+
+/// Safe wrapper for `LLVMAppendModuleInlineAsm`, which delegates to
+/// `Module::appendModuleInlineAsm`.
+pub(crate) fn append_module_inline_asm<'ll>(llmod: &'ll Module, asm: &[u8]) {
+    unsafe {
+        LLVMAppendModuleInlineAsm(llmod, asm.as_ptr(), asm.len());
+    }
+}
diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs
index 36e35f81392..8f57f0983ab 100644
--- a/compiler/rustc_codegen_llvm/src/llvm_util.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs
@@ -6,6 +6,7 @@ use std::sync::Once;
 use std::{ptr, slice, str};
 
 use libc::c_int;
+use rustc_codegen_ssa::TargetConfig;
 use rustc_codegen_ssa::base::wants_wasm_eh;
 use rustc_codegen_ssa::codegen_attrs::check_tied_features;
 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
@@ -18,6 +19,7 @@ use rustc_session::config::{PrintKind, PrintRequest};
 use rustc_span::Symbol;
 use rustc_target::spec::{MergeFunctions, PanicStrategy, SmallDataThresholdSupport};
 use rustc_target::target_features::{RUSTC_SPECIAL_FEATURES, RUSTC_SPECIFIC_FEATURES};
+use smallvec::{SmallVec, smallvec};
 
 use crate::back::write::create_informational_target_machine;
 use crate::errors::{
@@ -179,27 +181,27 @@ impl<'a> TargetFeatureFoldStrength<'a> {
 
 pub(crate) struct LLVMFeature<'a> {
     llvm_feature_name: &'a str,
-    dependency: Option<TargetFeatureFoldStrength<'a>>,
+    dependencies: SmallVec<[TargetFeatureFoldStrength<'a>; 1]>,
 }
 
 impl<'a> LLVMFeature<'a> {
     fn new(llvm_feature_name: &'a str) -> Self {
-        Self { llvm_feature_name, dependency: None }
+        Self { llvm_feature_name, dependencies: SmallVec::new() }
     }
 
-    fn with_dependency(
+    fn with_dependencies(
         llvm_feature_name: &'a str,
-        dependency: TargetFeatureFoldStrength<'a>,
+        dependencies: SmallVec<[TargetFeatureFoldStrength<'a>; 1]>,
     ) -> Self {
-        Self { llvm_feature_name, dependency: Some(dependency) }
+        Self { llvm_feature_name, dependencies }
     }
 
-    fn contains(&self, feat: &str) -> bool {
+    fn contains(&'a self, feat: &str) -> bool {
         self.iter().any(|dep| dep == feat)
     }
 
     fn iter(&'a self) -> impl Iterator<Item = &'a str> {
-        let dependencies = self.dependency.iter().map(|feat| feat.as_str());
+        let dependencies = self.dependencies.iter().map(|feat| feat.as_str());
         std::iter::once(self.llvm_feature_name).chain(dependencies)
     }
 }
@@ -209,7 +211,7 @@ impl<'a> IntoIterator for LLVMFeature<'a> {
     type IntoIter = impl Iterator<Item = &'a str>;
 
     fn into_iter(self) -> Self::IntoIter {
-        let dependencies = self.dependency.into_iter().map(|feat| feat.as_str());
+        let dependencies = self.dependencies.into_iter().map(|feat| feat.as_str());
         std::iter::once(self.llvm_feature_name).chain(dependencies)
     }
 }
@@ -239,9 +241,9 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
         &*sess.target.arch
     };
     match (arch, s) {
-        ("x86", "sse4.2") => Some(LLVMFeature::with_dependency(
+        ("x86", "sse4.2") => Some(LLVMFeature::with_dependencies(
             "sse4.2",
-            TargetFeatureFoldStrength::EnableOnly("crc32"),
+            smallvec![TargetFeatureFoldStrength::EnableOnly("crc32")],
         )),
         ("x86", "pclmulqdq") => Some(LLVMFeature::new("pclmul")),
         ("x86", "rdrand") => Some(LLVMFeature::new("rdrnd")),
@@ -261,9 +263,10 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
         ("aarch64", "sme-b16b16") if get_version().0 < 20 => Some(LLVMFeature::new("b16b16")),
         ("aarch64", "flagm2") => Some(LLVMFeature::new("altnzcv")),
         // Rust ties fp and neon together.
-        ("aarch64", "neon") => {
-            Some(LLVMFeature::with_dependency("neon", TargetFeatureFoldStrength::Both("fp-armv8")))
-        }
+        ("aarch64", "neon") => Some(LLVMFeature::with_dependencies(
+            "neon",
+            smallvec![TargetFeatureFoldStrength::Both("fp-armv8")],
+        )),
         // In LLVM neon implicitly enables fp, but we manually enable
         // neon when a feature only implicitly enables fp
         ("aarch64", "fhm") => Some(LLVMFeature::new("fp16fml")),
@@ -272,11 +275,18 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
         ("aarch64", "fpmr") => None, // only existed in 18
         ("arm", "fp16") => Some(LLVMFeature::new("fullfp16")),
         // Filter out features that are not supported by the current LLVM version
+        ("loongarch64", "div32" | "lam-bh" | "lamcas" | "ld-seq-sa" | "scq")
+            if get_version().0 < 20 =>
+        {
+            None
+        }
+        // Filter out features that are not supported by the current LLVM version
         ("riscv32" | "riscv64", "zacas") if get_version().0 < 20 => None,
         // Enable the evex512 target feature if an avx512 target feature is enabled.
-        ("x86", s) if s.starts_with("avx512") => {
-            Some(LLVMFeature::with_dependency(s, TargetFeatureFoldStrength::EnableOnly("evex512")))
-        }
+        ("x86", s) if s.starts_with("avx512") => Some(LLVMFeature::with_dependencies(
+            s,
+            smallvec![TargetFeatureFoldStrength::EnableOnly("evex512")],
+        )),
         // Support for `wide-arithmetic` will first land in LLVM 20 as part of
         // llvm/llvm-project#111598
         ("wasm32" | "wasm64", "wide-arithmetic") if get_version() < (20, 0, 0) => None,
@@ -294,6 +304,21 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
             None
         }
         ("x86", "movrs") if get_version().0 < 20 => None,
+        ("x86", "avx10.1") => Some(LLVMFeature::new("avx10.1-512")),
+        ("x86", "avx10.2") if get_version().0 < 20 => None,
+        ("x86", "avx10.2") if get_version().0 >= 20 => Some(LLVMFeature::new("avx10.2-512")),
+        ("x86", "apxf") => Some(LLVMFeature::with_dependencies(
+            "egpr",
+            smallvec![
+                TargetFeatureFoldStrength::Both("push2pop2"),
+                TargetFeatureFoldStrength::Both("ppx"),
+                TargetFeatureFoldStrength::Both("ndd"),
+                TargetFeatureFoldStrength::Both("ccmp"),
+                TargetFeatureFoldStrength::Both("cf"),
+                TargetFeatureFoldStrength::Both("nf"),
+                TargetFeatureFoldStrength::Both("zu"),
+            ],
+        )),
         (_, s) => Some(LLVMFeature::new(s)),
     }
 }
@@ -302,7 +327,7 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
 /// Must express features in the way Rust understands them.
 ///
 /// We do not have to worry about RUSTC_SPECIFIC_FEATURES here, those are handled outside codegen.
-pub(crate) fn target_features_cfg(sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) {
+pub(crate) fn target_config(sess: &Session) -> TargetConfig {
     // Add base features for the target.
     // We do *not* add the -Ctarget-features there, and instead duplicate the logic for that below.
     // The reason is that if LLVM considers a feature implied but we do not, we don't want that to
@@ -402,7 +427,85 @@ pub(crate) fn target_features_cfg(sess: &Session) -> (Vec<Symbol>, Vec<Symbol>)
 
     let target_features = f(false);
     let unstable_target_features = f(true);
-    (target_features, unstable_target_features)
+    let mut cfg = TargetConfig {
+        target_features,
+        unstable_target_features,
+        has_reliable_f16: true,
+        has_reliable_f16_math: true,
+        has_reliable_f128: true,
+        has_reliable_f128_math: true,
+    };
+
+    update_target_reliable_float_cfg(sess, &mut cfg);
+    cfg
+}
+
+/// Determine whether or not experimental float types are reliable based on known bugs.
+fn update_target_reliable_float_cfg(sess: &Session, cfg: &mut TargetConfig) {
+    let target_arch = sess.target.arch.as_ref();
+    let target_os = sess.target.options.os.as_ref();
+    let target_env = sess.target.options.env.as_ref();
+    let target_abi = sess.target.options.abi.as_ref();
+    let target_pointer_width = sess.target.pointer_width;
+
+    cfg.has_reliable_f16 = match (target_arch, target_os) {
+        // Selection failure <https://github.com/llvm/llvm-project/issues/50374>
+        ("s390x", _) => false,
+        // Unsupported <https://github.com/llvm/llvm-project/issues/94434>
+        ("arm64ec", _) => false,
+        // MinGW ABI bugs <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115054>
+        ("x86_64", "windows") if target_env == "gnu" && target_abi != "llvm" => false,
+        // Infinite recursion <https://github.com/llvm/llvm-project/issues/97981>
+        ("csky", _) => false,
+        ("hexagon", _) => false,
+        ("powerpc" | "powerpc64", _) => false,
+        ("sparc" | "sparc64", _) => false,
+        ("wasm32" | "wasm64", _) => false,
+        // `f16` support only requires that symbols converting to and from `f32` are available. We
+        // provide these in `compiler-builtins`, so `f16` should be available on all platforms that
+        // do not have other ABI issues or LLVM crashes.
+        _ => true,
+    };
+
+    cfg.has_reliable_f128 = match (target_arch, target_os) {
+        // Unsupported <https://github.com/llvm/llvm-project/issues/94434>
+        ("arm64ec", _) => false,
+        // Selection bug <https://github.com/llvm/llvm-project/issues/96432>
+        ("mips64" | "mips64r6", _) => false,
+        // Selection bug <https://github.com/llvm/llvm-project/issues/95471>
+        ("nvptx64", _) => false,
+        // ABI bugs <https://github.com/rust-lang/rust/issues/125109> et al. (full
+        // list at <https://github.com/rust-lang/rust/issues/116909>)
+        ("powerpc" | "powerpc64", _) => false,
+        // ABI unsupported  <https://github.com/llvm/llvm-project/issues/41838>
+        ("sparc", _) => false,
+        // Stack alignment bug <https://github.com/llvm/llvm-project/issues/77401>. NB: tests may
+        // not fail if our compiler-builtins is linked.
+        ("x86", _) => false,
+        // MinGW ABI bugs <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115054>
+        ("x86_64", "windows") if target_env == "gnu" && target_abi != "llvm" => false,
+        // There are no known problems on other platforms, so the only requirement is that symbols
+        // are available. `compiler-builtins` provides all symbols required for core `f128`
+        // support, so this should work for everything else.
+        _ => true,
+    };
+
+    // Assume that working `f16` means working `f16` math for most platforms, since
+    // operations just go through `f32`.
+    cfg.has_reliable_f16_math = cfg.has_reliable_f16;
+
+    cfg.has_reliable_f128_math = match (target_arch, target_os) {
+        // LLVM lowers `fp128` math to `long double` symbols even on platforms where
+        // `long double` is not IEEE binary128. See
+        // <https://github.com/llvm/llvm-project/issues/44744>.
+        //
+        // This rules out anything that doesn't have `long double` = `binary128`; <= 32 bits
+        // (ld is `f64`), anything other than Linux (Windows and MacOS use `f64`), and `x86`
+        // (ld is 80-bit extended precision).
+        ("x86_64", _) => false,
+        (_, "linux") if target_pointer_width == 64 => true,
+        _ => false,
+    } && cfg.has_reliable_f128;
 }
 
 pub(crate) fn print_version() {
@@ -686,7 +789,7 @@ pub(crate) fn global_llvm_features(
                 )
             } else if let Some(feature) = feature.strip_prefix('-') {
                 // FIXME: Why do we not remove implied features on "-" here?
-                // We do the equivalent above in `target_features_cfg`.
+                // We do the equivalent above in `target_config`.
                 // See <https://github.com/rust-lang/rust/issues/134792>.
                 all_rust_features.push((false, feature));
             } else if !feature.is_empty() {
@@ -765,7 +868,7 @@ pub(crate) fn global_llvm_features(
                         "{}{}",
                         enable_disable, llvm_feature.llvm_feature_name
                     ))
-                    .chain(llvm_feature.dependency.into_iter().filter_map(
+                    .chain(llvm_feature.dependencies.into_iter().filter_map(
                         move |feat| match (enable, feat) {
                             (_, TargetFeatureFoldStrength::Both(f))
                             | (true, TargetFeatureFoldStrength::EnableOnly(f)) => {
diff --git a/compiler/rustc_codegen_llvm/src/type_.rs b/compiler/rustc_codegen_llvm/src/type_.rs
index b89ce90d1a1..169036f5152 100644
--- a/compiler/rustc_codegen_llvm/src/type_.rs
+++ b/compiler/rustc_codegen_llvm/src/type_.rs
@@ -128,6 +128,10 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
         (**self).borrow().llcx
     }
 
+    pub(crate) fn llmod(&self) -> &'ll llvm::Module {
+        (**self).borrow().llmod
+    }
+
     pub(crate) fn isize_ty(&self) -> &'ll Type {
         (**self).borrow().isize_ty
     }